You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ClinicalTrialsDataProcessing/scripts/umls_requests.py

88 lines
2.8 KiB
Python

import requests
import json
from drugtools.env_setup import ENV,postgres_conn
from psycopg2 import extras
from collections import namedtuple
from tqdm import tqdm
RecordStuff = namedtuple("RecordStuff", "nct_id condition ui uri rootSource name")
class Requestor():
def __init__(self,api_key):
self.key = api_key
def search(self,search_term,inputType="sourceUi", returnIdType="code", addnl_terms={}):
query_terms = {
"apiKey":self.key,
"sabs":"ICD10",
"string":search_term,
"returnIdType":returnIdType,
"inputType":inputType
} | addnl_terms
query = "https://uts-ws.nlm.nih.gov/rest/search/current/"
r = requests.get(query,params=query_terms)
return r
r = Requestor(ENV.get("UMLS_API_KEY"))
with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse:
sql = """
select nct_id, downcase_mesh_term
from ctgov.browse_conditions bc
where
mesh_type = 'mesh-list'
and
nct_id in (select distinct nct_id from history.trial_snapshots ts)
order by nct_id
;
"""
sql2 = """
with cte as (
/* Keywords added too much noise
select nct_id,downcase_name
from ctgov.keywords k
where nct_id in (select distinct nct_id from history.trial_snapshots ts)
union */
select nct_id, downcase_name
from ctgov.conditions c
union
select nct_id ,downcase_mesh_term as downcase_name
from ctgov.browse_conditions bc
where mesh_type = 'mesh-list'
)
select nct_id, downcase_name from cte
where nct_id in (select distinct nct_id from history.trial_snapshots ts)
order by nct_id
"""
pcurse.execute(sql2)
rows = pcurse.fetchall()
entries = []
for row in tqdm(rows,desc="Search MeSH terms"):
nctid = row[0]
condition = row[1]
# print(nctid,condition)
results = r.search(row[1]).json().get('result', Exception("No result entry in json")).get('results',Exception("No results entry in json"))
#if results are empty?
if not results:
entries.append(RecordStuff(nctid,condition,None,None,None,None))
else:
for entry in results:
entries.append(RecordStuff(nctid, condition, entry["ui"], entry["uri"], entry["rootSource"], entry["name"]))
sql_insert = """
INSERT INTO "DiseaseBurden".trial_to_icd10
(nct_id, "condition", ui,uri,rootsource,"name","source",approved,approval_timestamp)
VALUES
(%(nct_id)s, %(condition)s, %(ui)s, %(uri)s, %(rootSource)s, %(name)s, 'UMLS API search', null,null)
"""
for entry in tqdm(entries,desc="Inserting entries to DB"):
pcurse.execute(sql_insert,entry._asdict())