merging results

3 years ago · 52f8152afd
parent 23826fb576 966171c840
commit 52f8152afd
6 changed files with 214 additions and 49 deletions
--- a/RxMix/ASSOICATING
+++ b/RxMix/ASSOICATING
@ -0,0 +1,27 @@
 /*Get relationships between brands and branded drugs and packs*/
 select * from rxnorm_migrated.rxnorm_relations rr 
 where tty1 = 'BN' and tty2 in ('SBD', 'BPCK')
 limit 100;
 /*get all the ndc codes associated with an rxcui
 * Same as query
 * http://will-office:4000/REST/rxcui/1668240/allhistoricalndcs.json
 * note the different formats of the dates.
 * 
 * Based on http://will-office:4000/RxNav/search?searchBy=RXCUI&searchTerm=1668240
 * it appears that this rxcui is a sbd or bpck (branded drug or pack)
 * 
 * If I grab every brand, then every branded drug or pack associated with that drug and then every 
 * */
 select * from ALLNDC_HISTORY ah 
 where RXCUI ='1668240'
 and SAB='RXNORM'
 ;
 /** 
 * If I grab every brand, then every branded drug or pack associated with that drug and then attach that to the nsde data I would get the marketing dates required.
 * trial -> mesh_term -> IN/MIN (rxcui) -> BN (rxcui) -> SBD/BPCK (rxcui) -> ndc11 -> nsde (marketing dates)
 * */
 /*
 * I do need to figure out a way to change the date types when importing into postgres. In mariadb they ar mmYYYY wheras in the jsonapi they are YYYYmm but I want is YYYY-mm-01
 */*/
--- a/RxMix/RxMixInABox.py
+++ b/RxMix/RxMixInABox.py
@ -1,49 +0,0 @@
 import requests
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 BASE_URL = "http://LOCALHOST:4000/REST"
 FORMAT = '.json'
@dataclass
 class RxCui():
    id: str
    def get_atc_class(self):
        pass
    def get_brandnames(self):
        pass
 def FindRxcuiByString(name: str, **kwargs) -> RxCui:
    '''
    Find a RxCUI by string based on a string
    Defaults to searching RxNorm (i.e. drugs) using a best match option
    '''
    url = BASE_URL + "/rxcui" + FORMAT 
    query = {'allsrc':0, 'srclist':'RXNORM', 'search':2} | kwargs | {'name':name}
    r = requests.get(url, params=query)
    #extract RxCUIs
    return [RxCui(x) for x in r.json()['idGroup']['rxnormId']]
 def get_brands_from_ingredients(rxcui: RxCui):
    '''
    This is used to query for properties
    '''
    url = BASE_URL + "/brands" + FORMAT
    r = requests.get(url, params={"ingredientids": rxcui.id})
    j = r.json()
    return [ AssociatedBrand(x,rxcui) for x in j['brandGroup']['conceptProperties']]
 class AssociatedBrand():
    def __init__(self,brand,ingredient: RxCui):
        self.ingredient_rxcui = ingredient
        self.brand_rxcui = RxCui(brand['rxcui'])
 def get_rx_property(rxcui)
--- a/RxMix/migrate_rxnav.py
+++ b/RxMix/migrate_rxnav.py
@ -0,0 +1,147 @@
 import psycopg2 as psyco
 import pymysql
 from dotenv import load_dotenv
 import os
 ##############NOTE
 '''
 mariadb --mariadb.connect--> incrementally fetched dict --psycopg2--> postgres
 I will have the ability to reduce memory usage and simplify what I am doing.
 '''
 ####################CONSTANTS#################################
 #SPLIT_RE = re.compile("(\w+)(\((\d+)\))?")
 ###################QUERIES#########################
 QUERY_columns_from_Information_Schema = """
 SELECT *
 FROM INFORMATION_SCHEMA.columns
 WHERE 
    TABLE_SCHEMA=%s
    and 
    TABLE_NAME=%s
 ;
 """
 QUERY_data_from_table = "SELECT * FROM {schema}.{table} limit 10"
 ########FUNCTIONS#################
 def convert_column(d):
    """
    Given the metadata about a column in mysql, make the portion of the `create table` 
    statement that corresponds to that column in postgres
    """
    #extract
    data_type = d["DATA_TYPE"]
    position = d["ORDINAL_POSITION"]
    table_name = d["TABLE_NAME"]
    d["IS_NULLABLE"] = "NOT NULL" if d["IS_NULLABLE"] == "NO" else ""
    #convert
    if data_type=="varchar":
        string = "{COLUMN_NAME} character varying({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
    elif data_type=="char":
        string = "{COLUMN_NAME} character({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
    elif data_type=="tinyint":
        string = "{COLUMN_NAME} smallint {IS_NULLABLE}".format(**d)
    elif data_type=="decimal":
        string = "{COLUMN_NAME} numeric({NUMERIC_PRECISION},{NUMERIC_SCALE}) {IS_NULLABLE}".format(**d)
    elif data_type=="int":
        string = "{COLUMN_NAME} integer {IS_NULLABLE},".format(**d)
    elif data_type=="enum":
        string = None
    elif data_type=="text":
        string = None
    return string
 if __name__ == "__main__":
    #process environment variables
    load_dotenv()
    POSTGRES_HOST = os.getenv("POSTGRES_HOST")
    POSTGRES_DB =  os.getenv("POSTGRES_DB")
    POSTGRES_USER =  os.getenv("POSTGRES_USER")
    POSTGRES_PASSWD =  os.getenv("POSTGRES_PASSWD")
    POSTGRES_PORT =  os.getenv("POSTGRES_PORT")
    MARIADB_HOST = os.getenv("MARIADB_HOST")
    MARIADB_DB = os.getenv("MARIADB_DB")
    MARIADB_USER = os.getenv("MARIADB_USER")
    MARIADB_PASSWD = os.getenv("MARIADB_PASSWD")
    MARIADB_PORT = os.getenv("MARIADB_PORT")
    #get & convert datatypes for each table of interest
    tables_of_interest = [
            "rxnorm_props"
            ,"rxnorm_relations"
            ,"ALLNDC_HISTORY"
            ,"ALLRXCUI_HISTORY"
            ]
    mschema="rxnorm_current"
    pschema="rxnorm_migrated"
    with pymysql.connect(
        user=MARIADB_USER
        ,password=MARIADB_PASSWD
        ,host=MARIADB_HOST
        ,port=MARIADB_PORT
        ,database=MARIADB_DB
        ,cursorclass=pymysql.cursors.DictCursor
    ) as mcon, psyco.connect(
        user=POSTGRES_USER
        ,password=POSTGRES_PASSWD
        ,host=POSTGRES_HOST
        ,port=POSTGRES_PORT
        ,database=POSTGRES_DB
    ) as pcon:
        with mcon.cursor() as mcurse, pcon.cursor() as pcurse:
            for table in tables_of_interest: #create equivalent table in postgres
                continue
                q = QUERY_columns_from_Information_Schema
                mcurse.execute(q,[mschema,table])
                columns = [convert_column(a) for a in mcurse.fetchall() ]
                column_sql = ",\n".join(columns)
                #create a header and footer
                header="CREATE TABLE IF NOT EXISTS {schema}.{table_name}\n(".format(schema=pschema, table_name=table)
                footer=");"
                #CREATE TABLE
                create_table_statement = "\n".join([header,column_sql,footer])
                pcurse.execute(create_table_statement)
                #extract data from mysql
                #
        with mcon.cursor() as mcurse, pcon.cursor() as pcurse:
            for table in tables_of_interest: 
                mcurse.execute("select * from rxnorm_current.{table} limit 10".format(table=table))
                print(mcurse.fetchone())
--- a/history.sql
+++ b/history.sql
@ -0,0 +1,30 @@
 /***************CREATE VIEWS*******************/
 create view if not exists 
 	history.match_drugs_to_trials as
 select nct_id,  rxcui, propvalue1
 from 
 	ctgov.browse_interventions as bi
 	join
 	rxnorm_migrated.rxnorm_props as rp
 	on bi.downcase_mesh_term = rp.propvalue1 
 where 
 	propname='RxNorm Name'
 	and 
 	nct_id in (select nct_id from history.trial_snapshots)
 ;
 /********************IN DEVLEOPMENT*********************/
 /* Get the count of brand names attached to each trial
 * I should develop this into a view that matches trials to brands
 * then create a view that gets the counts.
 */
 select rxcui1,count(rxcui2) from rxnorm_migrated.rxnorm_relations rr 
 where 
 	rxcui1 in (select rxcui from history.match_drugs_to_trials)
 	and
 	tty2 = 'BN'
 group by rxcui1 
 order by count(rxcui2) desc 
 ;
--- a/9
+++ b/9
@ -16,6 +16,9 @@ docker_container := `docker container ls -a | grep aact_db | cut -f 1 -d " " | t
 #Various paths for docker stuff
 docker-compose_path := "./AACT_downloader/docker-compose.yaml"
 #rxnorm_mappings
 rxnorm_mappings_url := "https://dailymed-data.nlm.nih.gov/public-release-files/rxnorm_mappings.zip"
 #Number of historical trials to download.
 count := "100"
@ -101,3 +104,9 @@ get-histories: download-trial-histories parse-trial-histories
 get-nsde:
    cd market_data && bash download_nsde.sh
    cd market_data && python extract_nsde.py
 get-rxnorm-mappings:
    #this may not be needed, all it does is match spls to rxcuis and I think I already have that.
    curl {{rxnorm_mappings_url}} > ./market_data/rxnorm_mappings.zip
    cd ./market_data && unzip ./rxnorm_mappings.zip
    rm ./market_data/rxnorm_mappings.zip
--- a/market_data/readme.md
+++ b/market_data/readme.md
@ -0,0 +1 @@
 downloads and extracts nsde data.