merging results
commit
52f8152afd
@ -0,0 +1,27 @@
|
|||||||
|
/*Get relationships between brands and branded drugs and packs*/
|
||||||
|
select * from rxnorm_migrated.rxnorm_relations rr
|
||||||
|
where tty1 = 'BN' and tty2 in ('SBD', 'BPCK')
|
||||||
|
limit 100;
|
||||||
|
|
||||||
|
/*get all the ndc codes associated with an rxcui
|
||||||
|
* Same as query
|
||||||
|
* http://will-office:4000/REST/rxcui/1668240/allhistoricalndcs.json
|
||||||
|
* note the different formats of the dates.
|
||||||
|
*
|
||||||
|
* Based on http://will-office:4000/RxNav/search?searchBy=RXCUI&searchTerm=1668240
|
||||||
|
* it appears that this rxcui is a sbd or bpck (branded drug or pack)
|
||||||
|
*
|
||||||
|
* If I grab every brand, then every branded drug or pack associated with that drug and then every
|
||||||
|
* */
|
||||||
|
select * from ALLNDC_HISTORY ah
|
||||||
|
where RXCUI ='1668240'
|
||||||
|
and SAB='RXNORM'
|
||||||
|
;
|
||||||
|
/**
|
||||||
|
* If I grab every brand, then every branded drug or pack associated with that drug and then attach that to the nsde data I would get the marketing dates required.
|
||||||
|
* trial -> mesh_term -> IN/MIN (rxcui) -> BN (rxcui) -> SBD/BPCK (rxcui) -> ndc11 -> nsde (marketing dates)
|
||||||
|
* */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* I do need to figure out a way to change the date types when importing into postgres. In mariadb they ar mmYYYY wheras in the jsonapi they are YYYYmm but I want is YYYY-mm-01
|
||||||
|
*/*/
|
||||||
@ -1,49 +0,0 @@
|
|||||||
import requests
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from dataclasses import dataclass
|
|
||||||
|
|
||||||
BASE_URL = "http://LOCALHOST:4000/REST"
|
|
||||||
FORMAT = '.json'
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class RxCui():
|
|
||||||
id: str
|
|
||||||
|
|
||||||
def get_atc_class(self):
|
|
||||||
pass
|
|
||||||
def get_brandnames(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def FindRxcuiByString(name: str, **kwargs) -> RxCui:
|
|
||||||
'''
|
|
||||||
Find a RxCUI by string based on a string
|
|
||||||
Defaults to searching RxNorm (i.e. drugs) using a best match option
|
|
||||||
'''
|
|
||||||
|
|
||||||
url = BASE_URL + "/rxcui" + FORMAT
|
|
||||||
query = {'allsrc':0, 'srclist':'RXNORM', 'search':2} | kwargs | {'name':name}
|
|
||||||
r = requests.get(url, params=query)
|
|
||||||
|
|
||||||
#extract RxCUIs
|
|
||||||
return [RxCui(x) for x in r.json()['idGroup']['rxnormId']]
|
|
||||||
|
|
||||||
|
|
||||||
def get_brands_from_ingredients(rxcui: RxCui):
|
|
||||||
'''
|
|
||||||
This is used to query for properties
|
|
||||||
'''
|
|
||||||
url = BASE_URL + "/brands" + FORMAT
|
|
||||||
r = requests.get(url, params={"ingredientids": rxcui.id})
|
|
||||||
j = r.json()
|
|
||||||
|
|
||||||
return [ AssociatedBrand(x,rxcui) for x in j['brandGroup']['conceptProperties']]
|
|
||||||
|
|
||||||
class AssociatedBrand():
|
|
||||||
def __init__(self,brand,ingredient: RxCui):
|
|
||||||
self.ingredient_rxcui = ingredient
|
|
||||||
self.brand_rxcui = RxCui(brand['rxcui'])
|
|
||||||
|
|
||||||
def get_rx_property(rxcui)
|
|
||||||
@ -0,0 +1,147 @@
|
|||||||
|
import psycopg2 as psyco
|
||||||
|
import pymysql
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
##############NOTE
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
mariadb --mariadb.connect--> incrementally fetched dict --psycopg2--> postgres
|
||||||
|
|
||||||
|
I will have the ability to reduce memory usage and simplify what I am doing.
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
####################CONSTANTS#################################
|
||||||
|
|
||||||
|
#SPLIT_RE = re.compile("(\w+)(\((\d+)\))?")
|
||||||
|
|
||||||
|
|
||||||
|
###################QUERIES#########################
|
||||||
|
|
||||||
|
QUERY_columns_from_Information_Schema = """
|
||||||
|
SELECT *
|
||||||
|
FROM INFORMATION_SCHEMA.columns
|
||||||
|
WHERE
|
||||||
|
TABLE_SCHEMA=%s
|
||||||
|
and
|
||||||
|
TABLE_NAME=%s
|
||||||
|
;
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
QUERY_data_from_table = "SELECT * FROM {schema}.{table} limit 10"
|
||||||
|
|
||||||
|
|
||||||
|
########FUNCTIONS#################
|
||||||
|
|
||||||
|
|
||||||
|
def convert_column(d):
|
||||||
|
"""
|
||||||
|
Given the metadata about a column in mysql, make the portion of the `create table`
|
||||||
|
statement that corresponds to that column in postgres
|
||||||
|
"""
|
||||||
|
#extract
|
||||||
|
data_type = d["DATA_TYPE"]
|
||||||
|
position = d["ORDINAL_POSITION"]
|
||||||
|
table_name = d["TABLE_NAME"]
|
||||||
|
d["IS_NULLABLE"] = "NOT NULL" if d["IS_NULLABLE"] == "NO" else ""
|
||||||
|
|
||||||
|
#convert
|
||||||
|
if data_type=="varchar":
|
||||||
|
string = "{COLUMN_NAME} character varying({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
|
||||||
|
elif data_type=="char":
|
||||||
|
string = "{COLUMN_NAME} character({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
|
||||||
|
elif data_type=="tinyint":
|
||||||
|
string = "{COLUMN_NAME} smallint {IS_NULLABLE}".format(**d)
|
||||||
|
elif data_type=="decimal":
|
||||||
|
string = "{COLUMN_NAME} numeric({NUMERIC_PRECISION},{NUMERIC_SCALE}) {IS_NULLABLE}".format(**d)
|
||||||
|
elif data_type=="int":
|
||||||
|
string = "{COLUMN_NAME} integer {IS_NULLABLE},".format(**d)
|
||||||
|
elif data_type=="enum":
|
||||||
|
string = None
|
||||||
|
elif data_type=="text":
|
||||||
|
string = None
|
||||||
|
|
||||||
|
return string
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
#process environment variables
|
||||||
|
load_dotenv()
|
||||||
|
POSTGRES_HOST = os.getenv("POSTGRES_HOST")
|
||||||
|
POSTGRES_DB = os.getenv("POSTGRES_DB")
|
||||||
|
POSTGRES_USER = os.getenv("POSTGRES_USER")
|
||||||
|
POSTGRES_PASSWD = os.getenv("POSTGRES_PASSWD")
|
||||||
|
POSTGRES_PORT = os.getenv("POSTGRES_PORT")
|
||||||
|
|
||||||
|
MARIADB_HOST = os.getenv("MARIADB_HOST")
|
||||||
|
MARIADB_DB = os.getenv("MARIADB_DB")
|
||||||
|
MARIADB_USER = os.getenv("MARIADB_USER")
|
||||||
|
MARIADB_PASSWD = os.getenv("MARIADB_PASSWD")
|
||||||
|
MARIADB_PORT = os.getenv("MARIADB_PORT")
|
||||||
|
|
||||||
|
#get & convert datatypes for each table of interest
|
||||||
|
tables_of_interest = [
|
||||||
|
"rxnorm_props"
|
||||||
|
,"rxnorm_relations"
|
||||||
|
,"ALLNDC_HISTORY"
|
||||||
|
,"ALLRXCUI_HISTORY"
|
||||||
|
]
|
||||||
|
mschema="rxnorm_current"
|
||||||
|
pschema="rxnorm_migrated"
|
||||||
|
|
||||||
|
|
||||||
|
with pymysql.connect(
|
||||||
|
user=MARIADB_USER
|
||||||
|
,password=MARIADB_PASSWD
|
||||||
|
,host=MARIADB_HOST
|
||||||
|
,port=MARIADB_PORT
|
||||||
|
,database=MARIADB_DB
|
||||||
|
,cursorclass=pymysql.cursors.DictCursor
|
||||||
|
) as mcon, psyco.connect(
|
||||||
|
user=POSTGRES_USER
|
||||||
|
,password=POSTGRES_PASSWD
|
||||||
|
,host=POSTGRES_HOST
|
||||||
|
,port=POSTGRES_PORT
|
||||||
|
,database=POSTGRES_DB
|
||||||
|
) as pcon:
|
||||||
|
with mcon.cursor() as mcurse, pcon.cursor() as pcurse:
|
||||||
|
for table in tables_of_interest: #create equivalent table in postgres
|
||||||
|
continue
|
||||||
|
q = QUERY_columns_from_Information_Schema
|
||||||
|
|
||||||
|
mcurse.execute(q,[mschema,table])
|
||||||
|
|
||||||
|
columns = [convert_column(a) for a in mcurse.fetchall() ]
|
||||||
|
column_sql = ",\n".join(columns)
|
||||||
|
|
||||||
|
#create a header and footer
|
||||||
|
header="CREATE TABLE IF NOT EXISTS {schema}.{table_name}\n(".format(schema=pschema, table_name=table)
|
||||||
|
footer=");"
|
||||||
|
|
||||||
|
#CREATE TABLE
|
||||||
|
create_table_statement = "\n".join([header,column_sql,footer])
|
||||||
|
pcurse.execute(create_table_statement)
|
||||||
|
|
||||||
|
#extract data from mysql
|
||||||
|
#
|
||||||
|
|
||||||
|
with mcon.cursor() as mcurse, pcon.cursor() as pcurse:
|
||||||
|
for table in tables_of_interest:
|
||||||
|
mcurse.execute("select * from rxnorm_current.{table} limit 10".format(table=table))
|
||||||
|
print(mcurse.fetchone())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,30 @@
|
|||||||
|
/***************CREATE VIEWS*******************/
|
||||||
|
create view if not exists
|
||||||
|
history.match_drugs_to_trials as
|
||||||
|
select nct_id, rxcui, propvalue1
|
||||||
|
from
|
||||||
|
ctgov.browse_interventions as bi
|
||||||
|
join
|
||||||
|
rxnorm_migrated.rxnorm_props as rp
|
||||||
|
on bi.downcase_mesh_term = rp.propvalue1
|
||||||
|
where
|
||||||
|
propname='RxNorm Name'
|
||||||
|
and
|
||||||
|
nct_id in (select nct_id from history.trial_snapshots)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
/********************IN DEVLEOPMENT*********************/
|
||||||
|
|
||||||
|
/* Get the count of brand names attached to each trial
|
||||||
|
* I should develop this into a view that matches trials to brands
|
||||||
|
* then create a view that gets the counts.
|
||||||
|
*/
|
||||||
|
select rxcui1,count(rxcui2) from rxnorm_migrated.rxnorm_relations rr
|
||||||
|
where
|
||||||
|
rxcui1 in (select rxcui from history.match_drugs_to_trials)
|
||||||
|
and
|
||||||
|
tty2 = 'BN'
|
||||||
|
group by rxcui1
|
||||||
|
order by count(rxcui2) desc
|
||||||
|
;
|
||||||
@ -0,0 +1 @@
|
|||||||
|
downloads and extracts nsde data.
|
||||||
Loading…
Reference in New Issue