reorganized python scripts into package, added requisit sql, and added shell scripts to notify me when the systems are up again.

3 years ago · 39397cc224
parent 804a90c247
commit 39397cc224
27 changed files with 166 additions and 5276 deletions
--- a/Parser/NCT00658567.html
+++ b/Parser/NCT00658567.html
--- a/Parser/NCT01303796.html
+++ b/Parser/NCT01303796.html
--- a/containers/AACT_downloader/docker-compose.yaml
+++ b/containers/AACT_downloader/docker-compose.yaml
@ -8,6 +8,8 @@ services:
    image: postgres:14-alpine
    networks:
      - pharmaceutical_research
+    shm_size: '4gb' #adjust the shared memeory /dev/shm when running
+      #https://stackoverflow.com/questions/30210362/how-to-increase-the-size-of-the-dev-shm-in-docker-container
    container_name: aact_db
    #restart: always #restart after crashes
    environment:
--- a/containers/AACT_downloader/docker-entrypoint-initdb.d/050_RxNormMigrated_Schema.sql
+++ b/containers/AACT_downloader/docker-entrypoint-initdb.d/050_RxNormMigrated_Schema.sql
@ -0,0 +1,6 @@
+-- Create a schema handling trial history.
+CREATE SCHEMA rxnorm_migrated;
+
+--Create role for anyone who needs to both select and insert on historical data
+
+GRANT ALL ON ALL TABLES IN SCHEMA rxnorm_migrated TO root;
--- a/containers/AACT_downloader/docker-entrypoint-initdb.d/999_ntfy.sh
+++ b/containers/AACT_downloader/docker-entrypoint-initdb.d/999_ntfy.sh
@ -0,0 +1,2 @@
+#!/bin/bash
+wget --post-data="postgres complete:$(date)" -qO- https://ntfy.sh/$NTFY > /dev/null
--- a/containers/RxNav-In-a-box/rxnav-in-a-box-20230103/mysql/999_ntfy.sh
+++ b/containers/RxNav-In-a-box/rxnav-in-a-box-20230103/mysql/999_ntfy.sh
@ -0,0 +1,6 @@
+#!/bin/bash
+#install wget
+apt update
+apt install -y wget
+#send notification
+wget --post-data="mariadb complete:$(date)" -qO- https://ntfy.sh/$NTFY > /dev/null
--- a/containers/docker-compose.yaml
+++ b/containers/docker-compose.yaml
@ -26,6 +26,7 @@ services:
      - ./AACT_downloader/aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
      # this is the folder containing entrypoint info.
      - ./AACT_downloader/docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
+    shm-size: 1g


  rxnav-db:
--- a/1
+++ b/1
@ -1 +0,0 @@
-<mxfile host="Electron" modified="2022-09-19T21:58:15.288Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/16.5.1 Chrome/96.0.4664.110 Electron/16.0.7 Safari/537.36" etag="K1oYB1ahwdBUMmjzqr-S" version="16.5.1" type="device"><diagram id="-7mtYT5q5bNZQN0eJ9dG" name="Page-1">7Vxtb6M4EP41kfY+JOI1Lx/z0t5VykZVu7fX/XRywBBvASPjbJP99TcmJoSatLQJobeLVDV4MI49z3jm8dikY07DzZ8MxavP1MVBx9DcTcecdQxDH9kGfAjJdicZGvZO4DPiykq54J78xFKoSemauDgpVOSUBpzERaFDowg7vCBDjNGnYjWPBsVvjZGPFcG9gwJV+g9x+UqOwtZy+V+Y+Kvsm3VN3glRVlkKkhVy6dOByLzqmFNGKd9dhZspDoTyMr3snrs+cnffMYYjXuUBdGMFnuZynkxCf+V9vXPQ9661a+UHCtZywNOARATG/4URFCQ9n/6Q3efbTCfJEwkDFEFp4tGI38s7GpSdFQncOdrStehTwpHzmJUmK8rIT6iPArilgwBuMy4hN7VCjXvxpGyT4QTq3GYD1Z+JPqNNoeIcJTzrDQ0CFCdkmfZPPBgi5pNoQjmnoawUoCUOJvB1PqPryJ3SgDK4FdHdAEkQZKKOYboIDz0n7Tujj/jgTt8Z4qUHd6RGMeN4cxQqfW8AMHMwDTFnW6iSPSBNRs4ZPbO2p9wCdUvKVofWl1VE0ur9fdO5YcCFtI032Imt2Mli+uVmppgGjJkr2ilRpRShgPgRFAPsiceE0oTtjaU4JK4rWp4kMXJI5M/TajMrl9zJwQsRhce9IJ1iK3gQQwuTmJKIp8qwJ/AH6plqPbtjQ1+nUNbzMvyJ6oxPaQTdRySFCoMxPWFhUBNGOeJouTf1Sjgfn3Qq+BJtsyLYZl1Y9xWsbyIXYOGERtUAFxOlv+zbfXUCeZ5nOE4LfSn0/aahHyjQOzSMhVtMWuTrRH7UNPKWqUB/FTGIn6EYlqHdRxBIV5RXt4Pl0LZsrcQOhg5u7eCIHehG04YwVOzgnrO1w9cMuyC/ZdSFElzNBW9SzaElhzWTQ9soskOzeXY4Um3mqmWH73QQw4/NDrNxHC4FZmNoapx+aC3sdcDeODPMlqSHmQJJDVu8z49343xQV6f54SpQ64pkGsPYoywUOTkBeWsHZ7eD5vmgbiiGMCMJqAKDcLJmQrUtB7wwB+wOihzQrsoBR7WZibp+nI3n39LM/FrMOuOaIY7b5eOJ7mI3HT8uPdTVHQXyxuxhGyHeBHnz1FBNGC9m0+797bw7AxV0r+ZYJJGSnpO0e0mNpwvKQoWhldjLfhfq/AajpplvOBY0ckpd3DqJdzmJ/lvjQino9TkJNa8ITgIE2UKjBfysgJdEhcsCbqnrhs+IPWIOChczHcigT+X3tOCfF/yS7MGFwVezB+M4DvL0wWIdLjGDi0958vCP1hZqsIWyDMKFjUFNHR56glm7LKzfCKqSvvriv7pHdMtozAgG4i56ukBhS/1qwX7YNPaGSgXuNos0cdyuBi+7GrS04mqwar6otryhoeYNYcY4OBZHDMReovbp7mH6982sZQfvcg7GB88XGiUnkPf7ilrUBoV6cG88aWioOaBdTPg3RHEMWk96vGRLsQ0QNQeI/rMAoVf1EPt1xvlNRU0dPQ8RbOOsiduGiPe5isEHDxElx8swb88Z1YJ244Eha1jdHDBbwGsAvPGTRqa6AYBdH2ehHXS0oj6NUHCVSydpvMSuVE5eZ05pLOPtd8z5VoZ4tOZUBHkeZgQAdMa2D4eFb6IxQEcWZ1lo35W2BRxEB18OsDAeumYOrhDXgIr4+EUEzXIEGQ4QJz+KPTk/Pkaj+GjN4TOqiM+oUXjU3XfwKR5hoTivXwJcem6/qOzMMTqgI8xeco0pzz1wS0WPN3vJUck3dOXDnf17sa86MPPIUkY2r/W0wcAqktZdqTIAsu1bMZiDKtTzEswVhPZdOAE0NZb9HnOqX3FK6Udek3kbpGPG0PaggrRWFfHsWJ3+bPVjyLfVc/h3TZ7XGEqOQwgdrUj0mH5iJs5aovR/iJOVEGIWJuKTwr8gq0iS3i8y5Y9EvAwnrWcNsj3GbJly2pzfW4BZ9CRZ+QI+Yfib+oRBVRp0qk84Lc6qq9Dqs3SFhI2FKZv+JefrkVPs+xBt6HZhXnU/fIi21MT04u66O/vSMSCImbpQ7QKWf6l2RJeoJ8azicS+lnmt4NpmLuv+0YxRMXbburqyLd337Ne1srVUai5AlabiIi5chXiRxmU0jtPXa5diQF/H6cyIA+TkwkR0CpyNjwPx6sXeBhVDa5MiVd7CfvNvcFz4zMzgNyUDGat7nQ0c8QaXYQNWw2StZwz7hxjpryCUlm4xIzB8QR92wg3hDwfXB2BDKW9JFBqFulHiZ6nEryy+/y95m/Uab9NG2qhI3E5cZ51C3MQ7ZPufS9tVz390zrz6Dw==</diagram></mxfile>
--- a/development_sql/.project
+++ b/development_sql/.project
@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>development_sql</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+	</buildSpec>
+	<natures>
+	</natures>
+</projectDescription>
--- a/development_sql/ASSOICATING
+++ b/development_sql/ASSOICATING
--- a/development_sql/views
+++ b/development_sql/views
--- a/history_downloader/db_connection.py
+++ b/history_downloader/db_connection.py
@ -1,15 +0,0 @@
-# Description
-#   This program tests the ability to connect the the DB
-#
-
-import psycopg2 as psyco
-
-conn = psyco.connect(dbname="aact_db", user="root", host="localhost", password="root")
-
-curse = conn.cursor()
-
-curse.execute("select nct_id FROM ctgov.studies LIMIT 10;")
-print(curse.fetchall())
-
-curse.close()
-conn.close()
--- a/history_downloader/readme.md
+++ b/history_downloader/readme.md
@ -1,20 +0,0 @@
-# File descriptions
-
-db_connection.py 
- is just a test file
- [ ] TODO: should be incorporated in a tests justfile recipe. maybe moved to a test location?
-
-
-downloader_prep.sql
- contains sql to identify which trials are of interest.
- [ ] TODO: add into the automation routine somewhere.
-
-downloader.py 
- does the actual downloading
- setup to also act as a python module if needed.
- [ ] TODO: there are quite a few things that need cleaned or refactored.
-
-./tests/download_tests.py
- downloads some test html values from clinicaltrials.gov
-
-
--- a/history_downloader/select_trials.py
+++ b/history_downloader/select_trials.py
@ -1,19 +0,0 @@
-import downloader as dldr
-
-
-
-
-if __name__ == "__main__":
-
-    dbc = dldr.DBConnectionCreator(
-            dbname="aact_db"
-            ,user="root"
-            ,host="will-office"
-            ,port=5432
-            ,password="root")
-
-    with open('selected_trials.sql','r') as fh:
-        sqlfile = fh.read()
-        with dbc.new() as connection:
-            with connection.cursor() as curse:
-                curse.execute(sqlfile)
--- a/market_data/download.sh
+++ b/market_data/download.sh
@ -1,17 +0,0 @@
-#!/bin/bash
-set -e
-
-download_and_unzip () {
-
-    curl "$1" > out.zip
-    unzip ./out.zip
-    rm ./out.zip
-}
-
-#date on market data
-download_and_unzip "https://download.open.fda.gov/other/nsde/other-nsde-0001-of-0003.json.zip"
-download_and_unzip "https://download.open.fda.gov/other/nsde/other-nsde-0002-of-0003.json.zip"
-download_and_unzip "https://download.open.fda.gov/other/nsde/other-nsde-0003-of-0003.json.zip"
-
-#rxnorm data
-download_and_unzip "https://dailymed-data.nlm.nih.gov/public-release-files/rxnorm_mappings.zip"
--- a/market_data/migrate_rxnav.py
+++ b/market_data/migrate_rxnav.py
@ -1,95 +0,0 @@
-import connetorx as cx
-from sqlalchemy import create_engine
-import re
-
-####################CONSTANTS#################################
-MYSQL_CONNECTION_STRING="mysql://webuser:9521354c77aa@localhost/"
-POSTGRES_CONNECTION_STRING="postgresql://root:root@localhost/aact_db"
-POSTGRES_ENGINE = create_engine(POSTGRES_CONNECTION_STRING)
-SPLIT_RE = re.compile("(\w+)(\((\d+)\))?")
-
-
-###################QUERIES#########################
-
-QUERY_columns_from_Information_Schema = """
-SELECT *
-FROM INFORMATION_SCHEMA.columns
-WHERE 
-    TABLE_SCHEMA="rxnorm_current"
-"""
-
-QUERY_data_from_table = ""
-
-
-########FUNCTIONS#################
-def query_mysql(query):
-    """
-    runs a query against the MYSQL database, returning a pandas df
-    """
-    return cx.read_sql(MYSQL_CONNECTION_STRING, query)
-
-def insert_table_postgres(df, table, schema):
-    """
-    Inserts data into a table
-    """
-    return df.to_sql(
-            table
-            ,POSTGRES_ENGINE
-            ,schema=schema
-            ,if_exists="append"
-            ,method="multi"
-            )
-
-
-
-def convert_mysql_types_to_pgsql(binary_type):
-    """
-    Given a binary string of a column's type,
-    convert to utf8, and then parse it into 
-    a postgres type
-    """
-    string_type = binary_type.decode("utf-8").lower()
-
-    #get the value name and length out.
-    val_type,_,length = SPLIT_RE.match(string_type).groups()
-
-def convert_column(df_row):
-    #extract
-    position = df_row.ORDINAL_POSITION
-    table_name = df_row.TABLE_NAME
-    
-    #convert
-    if data_type=="varchar":
-        string = "{column_name} character varying({data_length}) COLLATE pg_catalog.\"default\" {is_nullable},".format(
-                column_name = df_row.COLUMN_NAME
-                ,data_length = np.int64(df_row.CHARACTER_MAXIMUM_LENGTH)
-                ,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
-                )
-    elif data_type=="char":
-        string = "{column_name} char({data_length})[] COLLATE pg_catalog.\"default\" {is_nullable},".format(
-                column_name = df_row.COLUMN_NAME
-                ,data_length = np.int64(df_row.CHARACTER_MAXIMUM_LENGTH)
-                ,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
-                )
-    elif data_type=="tinyint":
-        string = "{column_name} smallint {is_nullable},".format(
-                column_name = df_row.COLUMN_NAME
-                ,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
-                )
-        series_type = numpy.int8
-    elif data_type=="decimal":
-        string = "{column_name} numeric({precision},{scale}) {is_nullable},".format(
-                column_name = df_row.COLUMN_NAME
-                ,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
-                ,precision= np.int64(df_row.NUMERIC_PRECISION)
-                ,scale= np.int64(df_row.NUMERIC_SCALE)
-                )
-    elif data_type=="int":
-        pass
-    elif data_type=="enum":
-        pass
-    elif data_type=="text":
-        pass
-
-    return string
-
--- a/market_data/readme.md
+++ b/market_data/readme.md
@ -1 +0,0 @@
-downloads and extracts nsde data.
--- a/scripts/db_connection_test.py
+++ b/scripts/db_connection_test.py
@ -0,0 +1,11 @@
+from drugtools.env_setup import postgres_conn, mariadb_conn, ENV
+
+print(ENV)
+
+with postgres_conn() as pconn, pconn.cursor() as curse:
+    curse.execute("select nct_id FROM ctgov.studies LIMIT 10;")
+    print(curse.fetchall())
+
+with mariadb_conn() as mconn, mconn.cursor() as mcurse:
+    mcurse.execute("select * FROM ALLNDC_HISTORY LIMIT 10;")
+    print(mcurse.fetchall())
--- a/history_downloader/download_tests.py
+++ b/history_downloader/download_tests.py
@ -1,9 +1,7 @@
-import downloader as download
+from drugtools.historical_nct_downloader import make_request, get_highest_version_number, step_generator 
+
+#this uses the history downloader script to test downloads

-def print_response_to_file(r):
-    pass
-def pretty_print_response(r):
-    pass

 def trial_downloads(nct_id):
    """
@ -11,25 +9,25 @@ def trial_downloads(nct_id):
    Instead it writes to files
    """
    print("downloading {}".format(nct_id))
-    r = download.make_request(nct_id,1,2)
+    r = make_request(nct_id,1,2)
    responses = [ r ]
    print(r.url)

-    v = download.get_highest_version_number(r)
+    v = get_highest_version_number(r)

    if v == 2:
        pass
    elif v%2 == 0:
-        for version_a, version_b in download.step_generator(v):
+        for version_a, version_b in step_generator(v):
            print("\t versions {} & {}".format(version_a,version_b))
-            req = download.make_request(nct_id,version_a,version_b)
+            req = make_request(nct_id,version_a,version_b)
            responses.append(req)
    elif v %2 == 1:
-        for version_a, version_b in download.step_generator(v):
+        for version_a, version_b in step_generator(v):
            print("\t downloading versions {} & {}".format(version_a,version_b))
-            req = download.make_request(nct_id,version_a,version_b)
+            req = make_request(nct_id,version_a,version_b)
            responses.append(req)
-        responses.append(download.make_request(nct_id,1,v))
+        responses.append(make_request(nct_id,1,v))

    print("\tDownloaded {} versions".format(v))

--- a/scripts/drugtools/init.py
+++ b/scripts/drugtools/init.py
--- a/scripts/drugtools/download_and_extract_nsde.py
+++ b/scripts/drugtools/download_and_extract_nsde.py
@ -1,11 +1,17 @@
 import json
-import psycopg2 as psyco
 from psycopg2.extras import execute_values
 import datetime as dt
+from drugtools.env_setup import postgres_conn, ENV
+import requests
+import zipfile
+import io

-def file_generator(max_num):
+URL_STEM = 'https://download.open.fda.gov/other/nsde/'
+NUMBER_OF_NSDE_FILES = int(ENV["NUMBER_OF_NSDE_FILES"])
+
+def filename_generator(max_num):
    for itt in range(1,max_num+1):
-        filename = "other-nsde-{:0>4}-of-{:0>4}.json".format(itt,max_num)
+        filename = "other-nsde-{:0>4}-of-{:0>4}.json.zip".format(itt,max_num)
        yield filename

 def get_date(result,key):
@ -45,18 +51,22 @@ def build_values(result):
    ,reactivation_date
            )

+def download_and_extract_zip(base_url,filename):
+    response = requests.get(base_url + filename)

-if __name__ == "__main__":
-    for x in file_generator(3):
+    with zipfile.ZipFile(io.BytesIO(response.content)) as the_zip:
+        contents_list = the_zip.infolist() 
+        for content_name in contents_list:
+            return the_zip.read(content_name)
+
+def run():
+    for filename in filename_generator(NUMBER_OF_NSDE_FILES):
        #It would be nice to replace this^^ file_generator with something that retrieves and unzips the files directly.
-        with (
-                psyco.connect(dbname="aact_db", user="root", host="localhost", password="root") as con,
-                con.cursor() as curse,
-                open("./"+x,"r") as j
-            ):
-            print(x)
+        with (postgres_conn() as con , con.cursor() as curse):
+            print(filename)

-            results = json.loads(j.read())["results"]
+            j = download_and_extract_zip(URL_STEM, filename)
+            results = json.loads(j)["results"]
            query = """
 INSERT INTO spl.nsde (
    proprietary_name
@ -82,4 +92,5 @@ VALUES %s;



-
+if __name__ == "__main__":
+    run()
--- a/scripts/drugtools/env_setup.py
+++ b/scripts/drugtools/env_setup.py
@ -0,0 +1,30 @@
+import pymysql
+import psycopg2 as psyco
+from dotenv import dotenv_values
+
+env_path = "../containers/.env"
+ENV = dotenv_values(env_path)
+
+def mariadb_conn(**kwargs):
+    return pymysql.connect(
+            database=ENV["MYSQL_DB"]
+            ,user=ENV["MYSQL_USER"]
+            ,host=ENV["MYSQL_HOST"]
+            ,port=int(ENV["MYSQL_PORT"])
+            ,password=ENV["MYSQL_PASSWORD"]
+            ,**kwargs
+        )
+
+def postgres_conn(**kwargs):
+    return psyco.connect(
+            dbname=ENV["POSTGRES_DB"]
+            ,user=ENV["POSTGRES_USER"]
+            ,host=ENV["POSTGRES_HOST"]
+            ,port=ENV["POSTGRES_PORT"]
+            ,password=ENV["POSTGRES_PASSWORD"]
+            ,**kwargs
+        )
+
+
+def get_tables_of_interest():
+    return ENV["TABLES_OF_INTEREST"].split(",")
--- a/scripts/drugtools/historical_nct_downloader.py
+++ b/scripts/drugtools/historical_nct_downloader.py
@ -1,13 +1,17 @@
 import requests
-import psycopg2 as psyco
 from datetime import datetime
 from bs4 import BeautifulSoup
-from multiprocessing import Pool, Value
+from multiprocess import Pool, Value
 import math
 import time
-import argparse
-from dotenv import dotenv_values
+from drugtools.env_setup import postgres_conn, ENV

+############ GLOBALS
+RESET_TIME = Value('I',int(ENV["TRIAL_DOWNLOAD_RESET_TIME"]))
+DELAY_TIME = Value("I",int(ENV["TRIAL_DOWNLOAD_DELAY_TIME"]))
+TRIAL_RESERVATION_LIMIT=int(ENV["TRIAL_RESERVATION_LIMIT"])
+    
+############ Functions
 def get_highest_version_number(response):
    """
    Navigate to the version table and and extract the highest posted version.
@ -114,7 +118,7 @@ def write_incomplete(cursor, nct_id):
    """
    cursor.execute(query, [nct_id] )

-def download_trial_records(nct_id, db_connection_specs, delay_time, reset_time):
+def download_trial_records(nct_id, delay_time, reset_time):
    """
    Manage the download of all records associated with a given trial.
    It uses a single connection and cursor for downloading the entire trial.
@ -130,7 +134,7 @@ def download_trial_records(nct_id, db_connection_specs, delay_time, reset_time):

    # A new connection is created every time the function is called so that this 
    # function can be run using a multiprocessing pool
-    with db_connection_specs.new() as db_conn:
+    with postgres_conn() as db_conn:
        with db_conn.cursor() as cursor:

            #upload the first two versions
@ -164,27 +168,6 @@ def download_trial_records(nct_id, db_connection_specs, delay_time, reset_time):



-class DBConnectionCreator():
-    """
-    Creates new database connections based on a specified set of parameters.
-    This simplifies connection creation by allowing the programmer to pass
-    around the preconfigured connection creator.
-    """
-    def __init__(self,dbname, user, host, port, password):
-        self.dbname = dbname
-        self.user = user
-        self.host = host
-        self.port = port
-        self.password=password
-
-    def new(self):
-        return psyco.connect(
-                dbname=self.dbname
-                ,user=self.user
-                ,host=self.host
-                ,port=self.port
-                ,password=self.password
-                )

 def step_generator(max_version):
    """
@ -222,58 +205,30 @@ def reserve_trials(db_connection, limit=10):

    return nctids_list

-if __name__ == "__main__":
-    env_path = ''
-    config = dotenv_values(env_path)
-    """
-    Main!
-    """
-    parser = argparse.ArgumentParser(description="Download historical data")
-    parser.add_argument(
-            "-c"
-            ,"--count"
-            , dest="count"
-            , type=int
-            , default=10
-            , help="Specify how many studies to download (default 10). If you want to download all of them, just enter some number higher than the total number of trials selected." 
-            )
-    parser.add_argument(
-            "-H"
-            ,"--host"
-            , dest="host"
-            , default="localhost"
-            , help="Specify the hostname of the postgres server (Default: localhost)"
-            )
-    args = parser.parse_args()
-
-
-    #instantiate a database connnection creator
-    dbc = DBConnectionCreator(
-            dbname="aact_db"
-            ,user="root"
-            ,host=args.host
-            ,port=5432
-            ,password="root")
-    

+def reserve_and_download_versions(limit):
    #db connection
-    with dbc.new() as con:
+    with postgres_conn() as con:

        #get list of nct_ids
-        nctids = reserve_trials(con, args.count)
-        print(nctids)
+        nctids = reserve_trials(con, limit)
+        print("reserving_trials: ", nctids)


-    reset_time = 10
-    delay_time = Value("I",1)
    #lambda that parameterizes the downloader, allowing it to be passed to the pool.
    def downloader(nct):
-        download_trial_records(nct, dbc, delay_time, reset_time)
+        download_trial_records(nct, DELAY_TIME, RESET_TIME)

    #start analyzing them
    with Pool(processes=12) as process_pool:
        process_pool.map(downloader, nctids)


+def run():
+    reserve_and_download_versions(TRIAL_RESERVATION_LIMIT)

-
+if __name__ == "__main__":
+    """
+    Main!
+    """
+    run()
--- a/scripts/drugtools/historical_nct_extractor.py
+++ b/scripts/drugtools/historical_nct_extractor.py
@ -1,12 +1,12 @@
 from collections import namedtuple
 from copy import copy
 from datetime import datetime
-import psycopg2
 from bs4 import BeautifulSoup
-import argparse
-#import textprocessing as tp #cuz tp is important
+from drugtools.env_setup import ENV,postgres_conn
 #requires Python 3.10

+#### GLOBALS
+VERBOSE = True if ENV["VERBOSE"] == "True" else False

 ###CLASSES AND CONSTRUCTORS

@ -380,34 +380,9 @@ def get_data_from_versions(nct_id,html, version_a_int, version_b_int):
    return version_a,version_b


-if __name__ == "__main__":
-    argParser = argparse.ArgumentParser()
-
-    # Adding diagnostic printing
-    argParser.add_argument(
-            "-V"
-            ,"--verbose"
-            , help="Display a lot of of diagnostic information"
-            , action='store_true'
-            )
-
-    # host
-    argParser.add_argument(
-            "--host"
-            , help="Change hostname"
-            )
-

-    args = argParser.parse_args()
-    VERBOSE = args.verbose 
-
-    if args.host:
-        host=args.host
-    else:
-        host="localhost"
-
-
-    with psycopg2.connect(dbname="aact_db", user="root", password="root",host=host) as db_connection:
+def run():
+    with postgres_conn() as db_connection:
        #pull the requests from the db
        with db_connection.cursor() as curse:
            sql = """
@ -430,6 +405,8 @@ if __name__ == "__main__":
                    version2.load_to_db(db_connection)


+if __name__ == "__main__":
+    run()

 """
 Documentation:
--- a/scripts/drugtools/historical_trial_selector.py
+++ b/scripts/drugtools/historical_trial_selector.py
@ -0,0 +1,15 @@
+from drugtools.env_setup import postgres_conn
+from pathlib import Path
+
+
+def run():
+    #get relative path
+    p = Path(__file__).with_name("selected_trials.sql")
+    with open(p,'r') as fh:
+        sqlfile = fh.read()
+        with postgres_conn() as connection:
+            with connection.cursor() as curse:
+                curse.execute(sqlfile)
+
+if __name__ == "__main__":
+    run()
--- a/scripts/drugtools/migrate_mysql2pgsql.py
+++ b/scripts/drugtools/migrate_mysql2pgsql.py
@ -4,6 +4,7 @@ from psycopg2 import extras
 import pymysql
 from dotenv import load_dotenv
 import os
+from drugtools.env_setup import postgres_conn, mariadb_conn, get_tables_of_interest


 ##############NOTE
@ -17,7 +18,10 @@ I will have the ability to reduce memory usage and simplify what I am doing.

 '''

-
+############### GLOBALS
+#these are hardcoded so they shouldn't require any updates
+mschema="rxnorm_current" 
+pschema="rxnorm_migrated"

 ########FUNCTIONS#################

@ -51,46 +55,12 @@ def convert_column(d):

    return string

-if __name__ == "__main__":
-    #process environment variables
-    load_dotenv()
-    POSTGRES_HOST = os.getenv("POSTGRES_HOST")
-    POSTGRES_DB =  os.getenv("POSTGRES_DB")
-    POSTGRES_USER =  os.getenv("POSTGRES_USER")
-    POSTGRES_PASSWD =  os.getenv("POSTGRES_PASSWD")
-    POSTGRES_PORT =  int(os.getenv("POSTGRES_PORT"))
-
-    MARIADB_HOST = os.getenv("MARIADB_HOST")
-    MARIADB_DB = os.getenv("MARIADB_DB")
-    MARIADB_USER = os.getenv("MARIADB_USER")
-    MARIADB_PASSWD = os.getenv("MARIADB_PASSWD")
-    MARIADB_PORT = int(os.getenv("MARIADB_PORT"))
-
+def run():
    #get & convert datatypes for each table of interest
-    tables_of_interest = [
-            "rxnorm_props"
-            ,"rxnorm_relations"
-            ,"ALLNDC_HISTORY"
-            ,"ALLRXCUI_HISTORY"
-            ]
-    mschema="rxnorm_current"
-    pschema="rxnorm_migrated"
-
-
-    with pymysql.connect(
-        user=MARIADB_USER
-        ,password=MARIADB_PASSWD
-        ,host=MARIADB_HOST
-        ,port=MARIADB_PORT
-        ,database=MARIADB_DB
-        ,cursorclass=pymysql.cursors.DictCursor
-    ) as mcon, psyco.connect(
-        user=POSTGRES_USER
-        ,password=POSTGRES_PASSWD
-        ,host=POSTGRES_HOST
-        ,port=POSTGRES_PORT
-        ,database=POSTGRES_DB
-    ) as pcon:
+    tables_of_interest = get_tables_of_interest()
+
+
+    with mariadb_conn(cursorclass=pymysql.cursors.DictCursor) as mcon, postgres_conn() as pcon:
        with mcon.cursor() as mcurse, pcon.cursor(cursor_factory=extras.DictCursor) as pcurse:
            for table in tables_of_interest: #create equivalent table in postgres

@ -115,35 +85,15 @@ if __name__ == "__main__":
                pcurse.execute(create_table_statement)
                pcon.commit() 
                
-                #check if tables already exist and have the proper size
-                #msize_check = 'select count(*) from {schema}.{table};'.format(schema=mschema,table=table)
-                #psize_check = 'select count(*) from {schema}.{table};'.format(schema=pschema,table=table)
-                #yes I am using an insecure way to build these^^^ statements. 
-                #It shouldn't matter because if someone is changing this source to 
-                #to harm your db, you've already lost.
-                #mcurse.execute(msize_check)
-                #pcurse.execute(psize_check)
-
-                #psize = pcurse.fetchall()[0][0]
-                #msize = mcurse.fetchall()[0]['count(*)']
-
-                #if  psize > msize  : 
-                #    #if they arn't the same, mention error and continue
-                #    raise Exception("TABLE {} in postgres has more data than mysql".format(table))
-                #    continue
-                #elif psize != 0:
-                #    raise Exception("TABLE {} in postgres is not empty".format(table))
-                #    continue
-
                #Get the data from mysql
                mcurse.execute("SELECT * FROM {schema}.{table}".format(schema=mschema,table=table))
                #FIX setting up sql this^^^ way is improper.
-                a = mcurse.fetchall()
+                results = mcurse.fetchall()

                #build the insert statement template
                #get list of field names
-                column_list = [sql.SQL(x) for x in a[0]]
-                column_inserts = [sql.SQL("%({})s".format(x)) for x in a[0]] #fix with sql.Placeholder
+                column_list = [sql.SQL(x) for x in results[0]]
+                column_inserts = [sql.SQL("%({})s".format(x)) for x in results[0]] #fix with sql.Placeholder
                #generate insert statement
                psql_insert = sql.SQL("INSERT INTO {table} ({columns}) VALUES  %s ").format(
                    table=sql.Identifier(pschema,table)
@ -162,4 +112,7 @@ if __name__ == "__main__":
                    ])
                
                #insert the data with page_size
-                extras.execute_values(pcurse,psql_insert,argslist=a,template=template, page_size=1000)
+                extras.execute_values(pcurse,psql_insert,argslist=results,template=template, page_size=1000)
+
+if __name__ == "__main__":
+    run()
--- a/history_downloader/selected_trials.sql
+++ b/history_downloader/selected_trials.sql
				`@ -1 +0,0 @@`
				<mxfile host="Electron" modified="2022-09-19T21:58:15.288Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/16.5.1 Chrome/96.0.4664.110 Electron/16.0.7 Safari/537.36" etag="K1oYB1ahwdBUMmjzqr-S" version="16.5.1" type="device"><diagram id="-7mtYT5q5bNZQN0eJ9dG" name="Page-1">7Vxtb6M4EP41kfY+JOI1Lx/z0t5VykZVu7fX/XRywBBvASPjbJP99TcmJoSatLQJobeLVDV4MI49z3jm8dikY07DzZ8MxavP1MVBx9DcTcecdQxDH9kGfAjJdicZGvZO4DPiykq54J78xFKoSemauDgpVOSUBpzERaFDowg7vCBDjNGnYjWPBsVvjZGPFcG9gwJV+g9x+UqOwtZy+V+Y+Kvsm3VN3glRVlkKkhVy6dOByLzqmFNGKd9dhZspDoTyMr3snrs+cnffMYYjXuUBdGMFnuZynkxCf+V9vXPQ9661a+UHCtZywNOARATG/4URFCQ9n/6Q3efbTCfJEwkDFEFp4tGI38s7GpSdFQncOdrStehTwpHzmJUmK8rIT6iPArilgwBuMy4hN7VCjXvxpGyT4QTq3GYD1Z+JPqNNoeIcJTzrDQ0CFCdkmfZPPBgi5pNoQjmnoawUoCUOJvB1PqPryJ3SgDK4FdHdAEkQZKKOYboIDz0n7Tujj/jgTt8Z4qUHd6RGMeN4cxQqfW8AMHMwDTFnW6iSPSBNRs4ZPbO2p9wCdUvKVofWl1VE0ur9fdO5YcCFtI032Imt2Mli+uVmppgGjJkr2ilRpRShgPgRFAPsiceE0oTtjaU4JK4rWp4kMXJI5M/TajMrl9zJwQsRhce9IJ1iK3gQQwuTmJKIp8qwJ/AH6plqPbtjQ1+nUNbzMvyJ6oxPaQTdRySFCoMxPWFhUBNGOeJouTf1Sjgfn3Qq+BJtsyLYZl1Y9xWsbyIXYOGERtUAFxOlv+zbfXUCeZ5nOE4LfSn0/aahHyjQOzSMhVtMWuTrRH7UNPKWqUB/FTGIn6EYlqHdRxBIV5RXt4Pl0LZsrcQOhg5u7eCIHehG04YwVOzgnrO1w9cMuyC/ZdSFElzNBW9SzaElhzWTQ9soskOzeXY4Um3mqmWH73QQw4/NDrNxHC4FZmNoapx+aC3sdcDeODPMlqSHmQJJDVu8z49343xQV6f54SpQ64pkGsPYoywUOTkBeWsHZ7eD5vmgbiiGMCMJqAKDcLJmQrUtB7wwB+wOihzQrsoBR7WZibp+nI3n39LM/FrMOuOaIY7b5eOJ7mI3HT8uPdTVHQXyxuxhGyHeBHnz1FBNGC9m0+797bw7AxV0r+ZYJJGSnpO0e0mNpwvKQoWhldjLfhfq/AajpplvOBY0ckpd3DqJdzmJ/lvjQino9TkJNa8ITgIE2UKjBfysgJdEhcsCbqnrhs+IPWIOChczHcigT+X3tOCfF/yS7MGFwVezB+M4DvL0wWIdLjGDi0958vCP1hZqsIWyDMKFjUFNHR56glm7LKzfCKqSvvriv7pHdMtozAgG4i56ukBhS/1qwX7YNPaGSgXuNos0cdyuBi+7GrS04mqwar6otryhoeYNYcY4OBZHDMReovbp7mH6982sZQfvcg7GB88XGiUnkPf7ilrUBoV6cG88aWioOaBdTPg3RHEMWk96vGRLsQ0QNQeI/rMAoVf1EPt1xvlNRU0dPQ8RbOOsiduGiPe5isEHDxElx8swb88Z1YJ244Eha1jdHDBbwGsAvPGTRqa6AYBdH2ehHXS0oj6NUHCVSydpvMSuVE5eZ05pLOPtd8z5VoZ4tOZUBHkeZgQAdMa2D4eFb6IxQEcWZ1lo35W2BRxEB18OsDAeumYOrhDXgIr4+EUEzXIEGQ4QJz+KPTk/Pkaj+GjN4TOqiM+oUXjU3XfwKR5hoTivXwJcem6/qOzMMTqgI8xeco0pzz1wS0WPN3vJUck3dOXDnf17sa86MPPIUkY2r/W0wcAqktZdqTIAsu1bMZiDKtTzEswVhPZdOAE0NZb9HnOqX3FK6Udek3kbpGPG0PaggrRWFfHsWJ3+bPVjyLfVc/h3TZ7XGEqOQwgdrUj0mH5iJs5aovR/iJOVEGIWJuKTwr8gq0iS3i8y5Y9EvAwnrWcNsj3GbJly2pzfW4BZ9CRZ+QI+Yfib+oRBVRp0qk84Lc6qq9Dqs3SFhI2FKZv+JefrkVPs+xBt6HZhXnU/fIi21MT04u66O/vSMSCImbpQ7QKWf6l2RJeoJ8azicS+lnmt4NpmLuv+0YxRMXbburqyLd337Ne1srVUai5AlabiIi5chXiRxmU0jtPXa5diQF/H6cyIA+TkwkR0CpyNjwPx6sXeBhVDa5MiVd7CfvNvcFz4zMzgNyUDGat7nQ0c8QaXYQNWw2StZwz7hxjpryCUlm4xIzB8QR92wg3hDwfXB2BDKW9JFBqFulHiZ6nEryy+/y95m/Uab9NG2qhI3E5cZ51C3MQ7ZPufS9tVz390zrz6Dw==</diagram></mxfile>