added RxNavInABox mariadb -> postgres importation and some of the views etc I am developing.

3 years ago · 97af862419
parent 4cc4c5c99f
commit 97af862419
2 changed files with 172 additions and 0 deletions
--- a/RxMix/migrate_rxnav.py
+++ b/RxMix/migrate_rxnav.py
@ -0,0 +1,142 @@
+import psycopg2 as psyco
+import pymysql
+from dotenv import load_dotenv
+import os
+
+
+##############NOTE
+'''
+
+
+mariadb --mariadb.connect--> incrementally fetched dict --psycopg2--> postgres
+
+I will have the ability to reduce memory usage and simplify what I am doing.
+
+
+'''
+
+
+####################CONSTANTS#################################
+
+#SPLIT_RE = re.compile("(\w+)(\((\d+)\))?")
+
+
+###################QUERIES#########################
+
+QUERY_columns_from_Information_Schema = """
+SELECT *
+FROM INFORMATION_SCHEMA.columns
+WHERE 
+    TABLE_SCHEMA=%s
+    and 
+    TABLE_NAME=%s
+;
+"""
+
+
+QUERY_data_from_table = "SELECT * FROM {schema}.{table} limit 10"
+
+
+########FUNCTIONS#################
+
+
+def convert_column(d):
+    """
+    Given the metadata about a column in mysql, make the portion of the `create table` 
+    statement that corresponds to that column in postgres
+    """
+    #extract
+    data_type = d["DATA_TYPE"]
+    position = d["ORDINAL_POSITION"]
+    table_name = d["TABLE_NAME"]
+    d["IS_NULLABLE"] = "NOT NULL" if d["IS_NULLABLE"] == "NO" else ""
+
+    #convert
+    if data_type=="varchar":
+        string = "{COLUMN_NAME} character varying({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
+    elif data_type=="char":
+        string = "{COLUMN_NAME} character({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
+    elif data_type=="tinyint":
+        string = "{COLUMN_NAME} smallint {IS_NULLABLE}".format(**d)
+    elif data_type=="decimal":
+        string = "{COLUMN_NAME} numeric({NUMERIC_PRECISION},{NUMERIC_SCALE}) {IS_NULLABLE}".format(**d)
+    elif data_type=="int":
+        string = "{COLUMN_NAME} integer {IS_NULLABLE},".format(**d)
+    elif data_type=="enum":
+        string = None
+    elif data_type=="text":
+        string = None
+
+    return string
+
+if __name__ == "__main__":
+    #process environment variables
+    load_dotenv()
+    POSTGRES_HOST = os.getenv("POSTGRES_HOST")
+    POSTGRES_DB =  os.getenv("POSTGRES_DB")
+    POSTGRES_USER =  os.getenv("POSTGRES_USER")
+    POSTGRES_PASSWD =  os.getenv("POSTGRES_PASSWD")
+    POSTGRES_PORT =  os.getenv("POSTGRES_PORT")
+
+    MARIADB_HOST = os.getenv("MARIADB_HOST")
+    MARIADB_DB = os.getenv("MARIADB_DB")
+    MARIADB_USER = os.getenv("MARIADB_USER")
+    MARIADB_PASSWD = os.getenv("MARIADB_PASSWD")
+    MARIADB_PORT = os.getenv("MARIADB_PORT")
+
+    #get & convert datatypes for each table of interest
+    tables_of_interest = ["rxnorm_props","rxnorm_relations"]
+    mschema="rxnorm_current"
+    pschema="rxnorm_migrated"
+
+
+    with pymysql.connect(
+        user=MARIADB_USER
+        ,password=MARIADB_PASSWD
+        ,host=MARIADB_HOST
+        ,port=MARIADB_PORT
+        ,database=MARIADB_DB
+        ,cursorclass=pymysql.cursors.DictCursor
+    ) as mcon, psyco.connect(
+        user=POSTGRES_USER
+        ,password=POSTGRES_PASSWD
+        ,host=POSTGRES_HOST
+        ,port=POSTGRES_PORT
+        ,database=POSTGRES_DB
+    ) as pcon:
+        with mcon.cursor() as mcurse, pcon.cursor() as pcurse:
+            for table in tables_of_interest: #create equivalent table in postgres
+                continue
+                q = QUERY_columns_from_Information_Schema
+
+                mcurse.execute(q,[mschema,table])
+
+                columns = [convert_column(a) for a in mcurse.fetchall() ]
+                column_sql = ",\n".join(columns)
+                
+                #create a header and footer
+                header="CREATE TABLE IF NOT EXISTS {schema}.{table_name}\n(".format(schema=pschema, table_name=table)
+                footer=");"
+
+                #CREATE TABLE
+                create_table_statement = "\n".join([header,column_sql,footer])
+                pcurse.execute(create_table_statement)
+                
+                #extract data from mysql
+                #
+        
+        with mcon.cursor() as mcurse, pcon.cursor() as pcurse:
+            for table in tables_of_interest: 
+                mcurse.execute("select * from rxnorm_current.{table} limit 10".format(table=table))
+                print(mcurse.fetchone())
+
+
+
+
+        
+
+
+
+            
+        
+            
--- a/history.sql
+++ b/history.sql
@ -0,0 +1,30 @@
+/***************CREATE VIEWS*******************/
+create view if not exists 
+	history.match_drugs_to_trials as
+select nct_id,  rxcui, propvalue1
+from 
+	ctgov.browse_interventions as bi
+	join
+	rxnorm_migrated.rxnorm_props as rp
+	on bi.downcase_mesh_term = rp.propvalue1 
+where 
+	propname='RxNorm Name'
+	and 
+	nct_id in (select nct_id from history.trial_snapshots)
+;
+
+
+/********************IN DEVLEOPMENT*********************/
+
+/* Get the count of brand names attached to each trial
+ * I should develop this into a view that matches trials to brands
+ * then create a view that gets the counts.
+ */
+select rxcui1,count(rxcui2) from rxnorm_migrated.rxnorm_relations rr 
+where 
+	rxcui1 in (select rxcui from history.match_drugs_to_trials)
+	and
+	tty2 = 'BN'
+group by rxcui1 
+order by count(rxcui2) desc 
+;