From 091fd6336616df352050168ff94ff44448404365 Mon Sep 17 00:00:00 2001
From: youainti <youainti@protonmail.com>
Date: Tue, 7 Feb 2023 09:51:45 -0800
Subject: [PATCH] committing results

---
 ...ING NCTIDs to NDCs and Marketing dates.sql |  6 +-
 RxMix/migrate_mysql2pgsql.py                  | 64 +++++++++++++------
 .../{tests => }/download_tests.py             |  2 +-
 3 files changed, 51 insertions(+), 21 deletions(-)
 rename history_downloader/{tests => }/download_tests.py (95%)

diff --git a/RxMix/ASSOICATING NCTIDs to NDCs and Marketing dates.sql b/RxMix/ASSOICATING NCTIDs to NDCs and Marketing dates.sql
index 941f443..885dbf6 100644
--- a/RxMix/ASSOICATING NCTIDs to NDCs and Marketing dates.sql	
+++ b/RxMix/ASSOICATING NCTIDs to NDCs and Marketing dates.sql	
@@ -20,8 +20,12 @@ and SAB='RXNORM'
 /** 
  * If I grab every brand, then every branded drug or pack associated with that drug and then attach that to the nsde data I would get the marketing dates required.
  * trial -> mesh_term -> IN/MIN (rxcui) -> BN (rxcui) -> SBD/BPCK (rxcui) -> ndc11 -> nsde (marketing dates)
+ This will take tables
+ list_of_trials -nctid-> ctgov.studies -mesh_term(filtered)-> rxnorm_props -rxcui-> rxnorm_relations
  * */
 
 /*
  * I do need to figure out a way to change the date types when importing into postgres. In mariadb they ar mmYYYY wheras in the jsonapi they are YYYYmm but I want is YYYY-mm-01
- */*/
\ No newline at end of file
+
+ I think I will just ignore this
+ */
diff --git a/RxMix/migrate_mysql2pgsql.py b/RxMix/migrate_mysql2pgsql.py
index 05cde98..6f2fce7 100644
--- a/RxMix/migrate_mysql2pgsql.py
+++ b/RxMix/migrate_mysql2pgsql.py
@@ -43,7 +43,7 @@ def convert_column(d):
     elif data_type=="decimal":
         string = "{COLUMN_NAME} numeric({NUMERIC_PRECISION},{NUMERIC_SCALE}) {IS_NULLABLE}".format(**d)
     elif data_type=="int":
-        string = "{COLUMN_NAME} integer {IS_NULLABLE},".format(**d)
+        string = "{COLUMN_NAME} integer {IS_NULLABLE}".format(**d)
     elif data_type=="enum":
         string = None
     elif data_type=="text":
@@ -67,7 +67,12 @@ if __name__ == "__main__":
     MARIADB_PORT = int(os.getenv("MARIADB_PORT"))
 
     #get & convert datatypes for each table of interest
-    tables_of_interest = ["rxnorm_props","rxnorm_relations"]
+    tables_of_interest = [
+            "rxnorm_props"
+            ,"rxnorm_relations"
+            ,"ALLNDC_HISTORY"
+            ,"ALLRXCUI_HISTORY"
+            ]
     mschema="rxnorm_current"
     pschema="rxnorm_migrated"
 
@@ -88,39 +93,65 @@ if __name__ == "__main__":
     ) as pcon:
         with mcon.cursor() as mcurse, pcon.cursor(cursor_factory=extras.DictCursor) as pcurse:
             for table in tables_of_interest: #create equivalent table in postgres
+
+                #get columns from mysql
                 q = "SELECT * FROM INFORMATION_SCHEMA.columns WHERE TABLE_SCHEMA=%s and TABLE_NAME=%s;"
                 mcurse.execute(q,[mschema,table])
 
-                columns = [convert_column(a) for a in mcurse.fetchall() ]
+                #convert mysql column names and types to postgres column statements.
+                columns = [convert_column(a) for a in mcurse.fetchall() ] 
+                #TODO make sure this uses psycopg colums correctly.
                 column_sql = sql.SQL(",\n".join(columns))
                 
-                #create a header and footer
+                #build a header and footer
                 header=sql.SQL("CREATE TABLE IF NOT EXISTS {}\n(").format(sql.Identifier(pschema,table))
                 footer=sql.SQL(");")
 
-                #CREATE TABLE
+                #Joint the header, columns, and footer.
                 create_table_statement = sql.SQL("\n").join([header,column_sql,footer])
-                #print(create_table_statement.as_string(pcon))
+                print(create_table_statement.as_string(pcon))
+
+                #Create the table in postgres
                 pcurse.execute(create_table_statement)
-                pcon.commit() #commit the new table as they are done.
+                pcon.commit() 
                 
-                #FIX below uses a poor approach, need to change to use the parameters approach.
+                #check if tables already exist and have the proper size
+                #msize_check = 'select count(*) from {schema}.{table};'.format(schema=mschema,table=table)
+                #psize_check = 'select count(*) from {schema}.{table};'.format(schema=pschema,table=table)
+                #yes I am using an insecure way to build these^^^ statements. 
+                #It shouldn't matter because if someone is changing this source to 
+                #to harm your db, you've already lost.
+                #mcurse.execute(msize_check)
+                #pcurse.execute(psize_check)
+
+                #psize = pcurse.fetchall()[0][0]
+                #msize = mcurse.fetchall()[0]['count(*)']
+
+                #if  psize > msize  : 
+                #    #if they arn't the same, mention error and continue
+                #    raise Exception("TABLE {} in postgres has more data than mysql".format(table))
+                #    continue
+                #elif psize != 0:
+                #    raise Exception("TABLE {} in postgres is not empty".format(table))
+                #    continue
+
+                #Get the data from mysql
                 mcurse.execute("SELECT * FROM {schema}.{table}".format(schema=mschema,table=table))
+                #FIX setting up sql this^^^ way is improper.
                 a = mcurse.fetchall()
 
-                #get list of field names and build the appropriate
+                #build the insert statement template
+                #get list of field names
                 column_list = [sql.SQL(x) for x in a[0]]
                 column_inserts = [sql.SQL("%({})s".format(x)) for x in a[0]] #fix with sql.Placeholder
-                #print(column_inserts)
-
-                #Building the sql
+                #generate insert statement
                 psql_insert = sql.SQL("INSERT INTO {table} ({columns}) VALUES  %s ").format(
                     table=sql.Identifier(pschema,table)
                     ,columns=sql.SQL(",").join(column_list)
                 )
-                #Note that this does not contain parenthases around the placeholder
+                #Note that this^^^^ does not contain parenthases around the placeholder
 
-                #Building the template. 
+                #Building the values template. 
                 #Note that it must include the parenthases so that the 
                 #VALUES portion is formatted correctly.
                 template = sql.SQL(",").join(column_inserts)
@@ -132,11 +163,6 @@ if __name__ == "__main__":
                 
                 #insert the data with page_size
                 extras.execute_values(pcurse,psql_insert,argslist=a,template=template, page_size=1000)
-                """ 
-                ISSUE HERE ^^^^^ somehow execute values isn't separating over dictionaries very well
-                https://www.psycopg.org/docs/extras.html#psycopg2.extras.execute_batch
-                maybe replace with execute_batch?
-                """
 
 
 
diff --git a/history_downloader/tests/download_tests.py b/history_downloader/download_tests.py
similarity index 95%
rename from history_downloader/tests/download_tests.py
rename to history_downloader/download_tests.py
index f50c454..c6966da 100644
--- a/history_downloader/tests/download_tests.py
+++ b/history_downloader/download_tests.py
@@ -29,7 +29,7 @@ def trial_downloads(nct_id):
             print("\t downloading versions {} & {}".format(version_a,version_b))
             req = download.make_request(nct_id,version_a,version_b)
             responses.append(req)
-        responses.append(download.make_reqauest(nct_id,1,v))
+        responses.append(download.make_request(nct_id,1,v))
 
     print("\tDownloaded {} versions".format(v))