Got the basics for matching and importing icd->GBD data

llm-extraction
will king 3 years ago
parent d1edac3c4f
commit 5600ad932d

@ -1 +1 @@
{"resources":{"development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-schema":"public","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"}}} {"resources":{"Scripts/DiseaseBurdens_create_table.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/GlobalBurdensOfDisease2019Codebook.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"DiseaseBurden"},"development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-schema":"public","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"}}}

@ -0,0 +1,32 @@
from drugtools.env_setup import ENV,postgres_conn
from psycopg2 import extras
from collections import namedtuple
FILES=[
"../non-db_data_sources/GBD and ICD-10_(2019 version)/NONFATAL_cause2code.psv",
"../non-db_data_sources/GBD and ICD-10_(2019 version)/COD_cause2code.psv"
]
SEP="|"
sql = """
INSERT INTO "DiseaseBurden".icd10_to_cause
(code,cause_text)
VALUES
(%(code)s,%(cause)s)
"""
#read in files
#adjust codes?
#load files to table
with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse:
for fpath in FILES:
with open(fpath,"r") as fh:
for line in fh.readlines():
code,cause = line.split(SEP)
code = code.strip()
cause = cause.strip()
pcurse.execute(sql,{"code":code,"cause":cause})

@ -1,10 +1,11 @@
from dotenv import dotenv_values
import requests import requests
import json import json
from drugtools.env_setup import ENV,postgres_conn from drugtools.env_setup import ENV,postgres_conn
from psycopg2 import extras from psycopg2 import extras
from collections import namedtuple
RecordStuff = namedtuple("RecordStuff", "nct_id condition ui uri rootSource name")
class Requestor(): class Requestor():
def __init__(self,api_key): def __init__(self,api_key):
@ -21,7 +22,6 @@ class Requestor():
query = "https://uts-ws.nlm.nih.gov/rest/search/current/" query = "https://uts-ws.nlm.nih.gov/rest/search/current/"
r = requests.get(query,params=query_terms) r = requests.get(query,params=query_terms)
print(r.url)
return r return r
@ -29,30 +29,42 @@ r = Requestor(ENV.get("UMLS_API_KEY"))
print(json.dumps(r.search("leukemia").json(),indent=2)) print(json.dumps(r.search("leukemia").json(),indent=2))
conditions_link = {}
pconn = postgres_conn() with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse:
pcurse = pconn.cursor(cursor_factory=extras.DictCursor) sql = """
sql = """ select nct_id, downcase_mesh_term
select nct_id, downcase_mesh_term from ctgov.browse_conditions bc
from ctgov.browse_conditions bc where
where mesh_type = 'mesh-list'
mesh_type = 'mesh-list' and
and nct_id in (select distinct nct_id from history.trial_snapshots ts)
nct_id in (select distinct nct_id from history.trial_snapshots ts) order by nct_id
order by nct_id ;
; """
""" pcurse.execute(sql)
pcurse.execute(sql) rows = pcurse.fetchall()
rows = pcurse.fetchall()
for row in rows:
nctid = row[0]
condition = row[1]
print(nctid,condition)
results = r.search(row[1]).json().get('result', Exception("No result entry in json")).get('results',Exception("No results entry in json")) entries = []
for entry in results:
print("\t", entry["ui"], entry["name"])
for row in rows:
nctid = row[0]
condition = row[1]
print(nctid,condition)
results = r.search(row[1]).json().get('result', Exception("No result entry in json")).get('results',Exception("No results entry in json"))
#if results are empty?
if not results:
entries.append(RecordStuff(nctid,condition,None,None,None,None))
else:
for entry in results:
entries.append(RecordStuff(nctid, condition, entry["ui"], entry["uri"], entry["rootSource"], entry["name"]))
sql_insert = """
INSERT INTO "DiseaseBurden".trial_to_icd10
(nct_id, "condition", ui,uri,rootsource,"name")
VALUES
(%(nct_id)s, %(condition)s, %(ui)s, %(uri)s, %(rootSource)s, %(name)s)
"""
for entry in entries:
pcurse.execute(sql_insert,entry._asdict())

Loading…
Cancel
Save