merging results

3 years ago · 52f8152afd
parent 23826fb576 966171c840
commit 52f8152afd
6 changed files with 214 additions and 49 deletions
--- a/RxMix/ASSOICATING
+++ b/RxMix/ASSOICATING
@ -0,0 +1,27 @@
+/*Get relationships between brands and branded drugs and packs*/
+select * from rxnorm_migrated.rxnorm_relations rr 
+where tty1 = 'BN' and tty2 in ('SBD', 'BPCK')
+limit 100;
+
+/*get all the ndc codes associated with an rxcui
+ * Same as query
+ * http://will-office:4000/REST/rxcui/1668240/allhistoricalndcs.json
+ * note the different formats of the dates.
+ * 
+ * Based on http://will-office:4000/RxNav/search?searchBy=RXCUI&searchTerm=1668240
+ * it appears that this rxcui is a sbd or bpck (branded drug or pack)
+ * 
+ * If I grab every brand, then every branded drug or pack associated with that drug and then every 
+ * */
+select * from ALLNDC_HISTORY ah 
+where RXCUI ='1668240'
+and SAB='RXNORM'
+;
+/** 
+ * If I grab every brand, then every branded drug or pack associated with that drug and then attach that to the nsde data I would get the marketing dates required.
+ * trial -> mesh_term -> IN/MIN (rxcui) -> BN (rxcui) -> SBD/BPCK (rxcui) -> ndc11 -> nsde (marketing dates)
+ * */
+
+/*
+ * I do need to figure out a way to change the date types when importing into postgres. In mariadb they ar mmYYYY wheras in the jsonapi they are YYYYmm but I want is YYYY-mm-01
+ */*/
--- a/RxMix/RxMixInABox.py
+++ b/RxMix/RxMixInABox.py
@ -1,49 +0,0 @@
-import requests
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-
-BASE_URL = "http://LOCALHOST:4000/REST"
-FORMAT = '.json'
-
-@dataclass
-class RxCui():
-    id: str
-
-    def get_atc_class(self):
-        pass
-    def get_brandnames(self):
-        pass
-
-
-
-
-def FindRxcuiByString(name: str, **kwargs) -> RxCui:
-    '''
-    Find a RxCUI by string based on a string
-    Defaults to searching RxNorm (i.e. drugs) using a best match option
-    '''
-
-    url = BASE_URL + "/rxcui" + FORMAT 
-    query = {'allsrc':0, 'srclist':'RXNORM', 'search':2} | kwargs | {'name':name}
-    r = requests.get(url, params=query)
-
-    #extract RxCUIs
-    return [RxCui(x) for x in r.json()['idGroup']['rxnormId']]
-
-
-def get_brands_from_ingredients(rxcui: RxCui):
-    '''
-    This is used to query for properties
-    '''
-    url = BASE_URL + "/brands" + FORMAT
-    r = requests.get(url, params={"ingredientids": rxcui.id})
-    j = r.json()
-    
-    return [ AssociatedBrand(x,rxcui) for x in j['brandGroup']['conceptProperties']]
-
-class AssociatedBrand():
-    def __init__(self,brand,ingredient: RxCui):
-        self.ingredient_rxcui = ingredient
-        self.brand_rxcui = RxCui(brand['rxcui'])
-        
-def get_rx_property(rxcui)
--- a/RxMix/migrate_rxnav.py
+++ b/RxMix/migrate_rxnav.py
@ -0,0 +1,147 @@
+import psycopg2 as psyco
+import pymysql
+from dotenv import load_dotenv
+import os
+
+
+##############NOTE
+'''
+
+
+mariadb --mariadb.connect--> incrementally fetched dict --psycopg2--> postgres
+
+I will have the ability to reduce memory usage and simplify what I am doing.
+
+
+'''
+
+
+####################CONSTANTS#################################
+
+#SPLIT_RE = re.compile("(\w+)(\((\d+)\))?")
+
+
+###################QUERIES#########################
+
+QUERY_columns_from_Information_Schema = """
+SELECT *
+FROM INFORMATION_SCHEMA.columns
+WHERE 
+    TABLE_SCHEMA=%s
+    and 
+    TABLE_NAME=%s
+;
+"""
+
+
+QUERY_data_from_table = "SELECT * FROM {schema}.{table} limit 10"
+
+
+########FUNCTIONS#################
+
+
+def convert_column(d):
+    """
+    Given the metadata about a column in mysql, make the portion of the `create table` 
+    statement that corresponds to that column in postgres
+    """
+    #extract
+    data_type = d["DATA_TYPE"]
+    position = d["ORDINAL_POSITION"]
+    table_name = d["TABLE_NAME"]
+    d["IS_NULLABLE"] = "NOT NULL" if d["IS_NULLABLE"] == "NO" else ""
+
+    #convert
+    if data_type=="varchar":
+        string = "{COLUMN_NAME} character varying({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
+    elif data_type=="char":
+        string = "{COLUMN_NAME} character({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
+    elif data_type=="tinyint":
+        string = "{COLUMN_NAME} smallint {IS_NULLABLE}".format(**d)
+    elif data_type=="decimal":
+        string = "{COLUMN_NAME} numeric({NUMERIC_PRECISION},{NUMERIC_SCALE}) {IS_NULLABLE}".format(**d)
+    elif data_type=="int":
+        string = "{COLUMN_NAME} integer {IS_NULLABLE},".format(**d)
+    elif data_type=="enum":
+        string = None
+    elif data_type=="text":
+        string = None
+
+    return string
+
+if __name__ == "__main__":
+    #process environment variables
+    load_dotenv()
+    POSTGRES_HOST = os.getenv("POSTGRES_HOST")
+    POSTGRES_DB =  os.getenv("POSTGRES_DB")
+    POSTGRES_USER =  os.getenv("POSTGRES_USER")
+    POSTGRES_PASSWD =  os.getenv("POSTGRES_PASSWD")
+    POSTGRES_PORT =  os.getenv("POSTGRES_PORT")
+
+    MARIADB_HOST = os.getenv("MARIADB_HOST")
+    MARIADB_DB = os.getenv("MARIADB_DB")
+    MARIADB_USER = os.getenv("MARIADB_USER")
+    MARIADB_PASSWD = os.getenv("MARIADB_PASSWD")
+    MARIADB_PORT = os.getenv("MARIADB_PORT")
+
+    #get & convert datatypes for each table of interest
+    tables_of_interest = [
+            "rxnorm_props"
+            ,"rxnorm_relations"
+            ,"ALLNDC_HISTORY"
+            ,"ALLRXCUI_HISTORY"
+            ]
+    mschema="rxnorm_current"
+    pschema="rxnorm_migrated"
+
+
+    with pymysql.connect(
+        user=MARIADB_USER
+        ,password=MARIADB_PASSWD
+        ,host=MARIADB_HOST
+        ,port=MARIADB_PORT
+        ,database=MARIADB_DB
+        ,cursorclass=pymysql.cursors.DictCursor
+    ) as mcon, psyco.connect(
+        user=POSTGRES_USER
+        ,password=POSTGRES_PASSWD
+        ,host=POSTGRES_HOST
+        ,port=POSTGRES_PORT
+        ,database=POSTGRES_DB
+    ) as pcon:
+        with mcon.cursor() as mcurse, pcon.cursor() as pcurse:
+            for table in tables_of_interest: #create equivalent table in postgres
+                continue
+                q = QUERY_columns_from_Information_Schema
+
+                mcurse.execute(q,[mschema,table])
+
+                columns = [convert_column(a) for a in mcurse.fetchall() ]
+                column_sql = ",\n".join(columns)
+                
+                #create a header and footer
+                header="CREATE TABLE IF NOT EXISTS {schema}.{table_name}\n(".format(schema=pschema, table_name=table)
+                footer=");"
+
+                #CREATE TABLE
+                create_table_statement = "\n".join([header,column_sql,footer])
+                pcurse.execute(create_table_statement)
+                
+                #extract data from mysql
+                #
+        
+        with mcon.cursor() as mcurse, pcon.cursor() as pcurse:
+            for table in tables_of_interest: 
+                mcurse.execute("select * from rxnorm_current.{table} limit 10".format(table=table))
+                print(mcurse.fetchone())
+
+
+
+
+        
+
+
+
+            
+        
+            
--- a/history.sql
+++ b/history.sql
@ -0,0 +1,30 @@
+/***************CREATE VIEWS*******************/
+create view if not exists 
+	history.match_drugs_to_trials as
+select nct_id,  rxcui, propvalue1
+from 
+	ctgov.browse_interventions as bi
+	join
+	rxnorm_migrated.rxnorm_props as rp
+	on bi.downcase_mesh_term = rp.propvalue1 
+where 
+	propname='RxNorm Name'
+	and 
+	nct_id in (select nct_id from history.trial_snapshots)
+;
+
+
+/********************IN DEVLEOPMENT*********************/
+
+/* Get the count of brand names attached to each trial
+ * I should develop this into a view that matches trials to brands
+ * then create a view that gets the counts.
+ */
+select rxcui1,count(rxcui2) from rxnorm_migrated.rxnorm_relations rr 
+where 
+	rxcui1 in (select rxcui from history.match_drugs_to_trials)
+	and
+	tty2 = 'BN'
+group by rxcui1 
+order by count(rxcui2) desc 
+;
--- a/9
+++ b/9
@ -16,6 +16,9 @@ docker_container := `docker container ls -a | grep aact_db | cut -f 1 -d " " | t
 #Various paths for docker stuff
 docker-compose_path := "./AACT_downloader/docker-compose.yaml"

+#rxnorm_mappings
+rxnorm_mappings_url := "https://dailymed-data.nlm.nih.gov/public-release-files/rxnorm_mappings.zip"
+
 #Number of historical trials to download.
 count := "100"

@ -101,3 +104,9 @@ get-histories: download-trial-histories parse-trial-histories
 get-nsde:
    cd market_data && bash download_nsde.sh
    cd market_data && python extract_nsde.py
+
+get-rxnorm-mappings:
+    #this may not be needed, all it does is match spls to rxcuis and I think I already have that.
+    curl {{rxnorm_mappings_url}} > ./market_data/rxnorm_mappings.zip
+    cd ./market_data && unzip ./rxnorm_mappings.zip
+    rm ./market_data/rxnorm_mappings.zip
--- a/market_data/readme.md
+++ b/market_data/readme.md
@ -0,0 +1 @@
+downloads and extracts nsde data.