reorganized python scripts into package, added requisit sql, and added shell scripts to notify me when the systems are up again.
parent
804a90c247
commit
39397cc224
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,6 @@
|
||||
-- Create a schema handling trial history.
|
||||
CREATE SCHEMA rxnorm_migrated;
|
||||
|
||||
--Create role for anyone who needs to both select and insert on historical data
|
||||
|
||||
GRANT ALL ON ALL TABLES IN SCHEMA rxnorm_migrated TO root;
|
||||
@ -0,0 +1,2 @@
|
||||
#!/bin/bash
|
||||
wget --post-data="postgres complete:$(date)" -qO- https://ntfy.sh/$NTFY > /dev/null
|
||||
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
#install wget
|
||||
apt update
|
||||
apt install -y wget
|
||||
#send notification
|
||||
wget --post-data="mariadb complete:$(date)" -qO- https://ntfy.sh/$NTFY > /dev/null
|
||||
@ -1 +0,0 @@
|
||||
<mxfile host="Electron" modified="2022-09-19T21:58:15.288Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/16.5.1 Chrome/96.0.4664.110 Electron/16.0.7 Safari/537.36" etag="K1oYB1ahwdBUMmjzqr-S" version="16.5.1" type="device"><diagram id="-7mtYT5q5bNZQN0eJ9dG" name="Page-1">7Vxtb6M4EP41kfY+JOI1Lx/z0t5VykZVu7fX/XRywBBvASPjbJP99TcmJoSatLQJobeLVDV4MI49z3jm8dikY07DzZ8MxavP1MVBx9DcTcecdQxDH9kGfAjJdicZGvZO4DPiykq54J78xFKoSemauDgpVOSUBpzERaFDowg7vCBDjNGnYjWPBsVvjZGPFcG9gwJV+g9x+UqOwtZy+V+Y+Kvsm3VN3glRVlkKkhVy6dOByLzqmFNGKd9dhZspDoTyMr3snrs+cnffMYYjXuUBdGMFnuZynkxCf+V9vXPQ9661a+UHCtZywNOARATG/4URFCQ9n/6Q3efbTCfJEwkDFEFp4tGI38s7GpSdFQncOdrStehTwpHzmJUmK8rIT6iPArilgwBuMy4hN7VCjXvxpGyT4QTq3GYD1Z+JPqNNoeIcJTzrDQ0CFCdkmfZPPBgi5pNoQjmnoawUoCUOJvB1PqPryJ3SgDK4FdHdAEkQZKKOYboIDz0n7Tujj/jgTt8Z4qUHd6RGMeN4cxQqfW8AMHMwDTFnW6iSPSBNRs4ZPbO2p9wCdUvKVofWl1VE0ur9fdO5YcCFtI032Imt2Mli+uVmppgGjJkr2ilRpRShgPgRFAPsiceE0oTtjaU4JK4rWp4kMXJI5M/TajMrl9zJwQsRhce9IJ1iK3gQQwuTmJKIp8qwJ/AH6plqPbtjQ1+nUNbzMvyJ6oxPaQTdRySFCoMxPWFhUBNGOeJouTf1Sjgfn3Qq+BJtsyLYZl1Y9xWsbyIXYOGERtUAFxOlv+zbfXUCeZ5nOE4LfSn0/aahHyjQOzSMhVtMWuTrRH7UNPKWqUB/FTGIn6EYlqHdRxBIV5RXt4Pl0LZsrcQOhg5u7eCIHehG04YwVOzgnrO1w9cMuyC/ZdSFElzNBW9SzaElhzWTQ9soskOzeXY4Um3mqmWH73QQw4/NDrNxHC4FZmNoapx+aC3sdcDeODPMlqSHmQJJDVu8z49343xQV6f54SpQ64pkGsPYoywUOTkBeWsHZ7eD5vmgbiiGMCMJqAKDcLJmQrUtB7wwB+wOihzQrsoBR7WZibp+nI3n39LM/FrMOuOaIY7b5eOJ7mI3HT8uPdTVHQXyxuxhGyHeBHnz1FBNGC9m0+797bw7AxV0r+ZYJJGSnpO0e0mNpwvKQoWhldjLfhfq/AajpplvOBY0ckpd3DqJdzmJ/lvjQino9TkJNa8ITgIE2UKjBfysgJdEhcsCbqnrhs+IPWIOChczHcigT+X3tOCfF/yS7MGFwVezB+M4DvL0wWIdLjGDi0958vCP1hZqsIWyDMKFjUFNHR56glm7LKzfCKqSvvriv7pHdMtozAgG4i56ukBhS/1qwX7YNPaGSgXuNos0cdyuBi+7GrS04mqwar6otryhoeYNYcY4OBZHDMReovbp7mH6982sZQfvcg7GB88XGiUnkPf7ilrUBoV6cG88aWioOaBdTPg3RHEMWk96vGRLsQ0QNQeI/rMAoVf1EPt1xvlNRU0dPQ8RbOOsiduGiPe5isEHDxElx8swb88Z1YJ244Eha1jdHDBbwGsAvPGTRqa6AYBdH2ehHXS0oj6NUHCVSydpvMSuVE5eZ05pLOPtd8z5VoZ4tOZUBHkeZgQAdMa2D4eFb6IxQEcWZ1lo35W2BRxEB18OsDAeumYOrhDXgIr4+EUEzXIEGQ4QJz+KPTk/Pkaj+GjN4TOqiM+oUXjU3XfwKR5hoTivXwJcem6/qOzMMTqgI8xeco0pzz1wS0WPN3vJUck3dOXDnf17sa86MPPIUkY2r/W0wcAqktZdqTIAsu1bMZiDKtTzEswVhPZdOAE0NZb9HnOqX3FK6Udek3kbpGPG0PaggrRWFfHsWJ3+bPVjyLfVc/h3TZ7XGEqOQwgdrUj0mH5iJs5aovR/iJOVEGIWJuKTwr8gq0iS3i8y5Y9EvAwnrWcNsj3GbJly2pzfW4BZ9CRZ+QI+Yfib+oRBVRp0qk84Lc6qq9Dqs3SFhI2FKZv+JefrkVPs+xBt6HZhXnU/fIi21MT04u66O/vSMSCImbpQ7QKWf6l2RJeoJ8azicS+lnmt4NpmLuv+0YxRMXbburqyLd337Ne1srVUai5AlabiIi5chXiRxmU0jtPXa5diQF/H6cyIA+TkwkR0CpyNjwPx6sXeBhVDa5MiVd7CfvNvcFz4zMzgNyUDGat7nQ0c8QaXYQNWw2StZwz7hxjpryCUlm4xIzB8QR92wg3hDwfXB2BDKW9JFBqFulHiZ6nEryy+/y95m/Uab9NG2qhI3E5cZ51C3MQ7ZPufS9tVz390zrz6Dw==</diagram></mxfile>
|
||||
@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>development_sql</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
</natures>
|
||||
</projectDescription>
|
||||
@ -1,15 +0,0 @@
|
||||
# Description
|
||||
# This program tests the ability to connect the the DB
|
||||
#
|
||||
|
||||
import psycopg2 as psyco
|
||||
|
||||
conn = psyco.connect(dbname="aact_db", user="root", host="localhost", password="root")
|
||||
|
||||
curse = conn.cursor()
|
||||
|
||||
curse.execute("select nct_id FROM ctgov.studies LIMIT 10;")
|
||||
print(curse.fetchall())
|
||||
|
||||
curse.close()
|
||||
conn.close()
|
||||
@ -1,20 +0,0 @@
|
||||
# File descriptions
|
||||
|
||||
db_connection.py
|
||||
- is just a test file
|
||||
- [ ] TODO: should be incorporated in a tests justfile recipe. maybe moved to a test location?
|
||||
|
||||
|
||||
downloader_prep.sql
|
||||
- contains sql to identify which trials are of interest.
|
||||
- [ ] TODO: add into the automation routine somewhere.
|
||||
|
||||
downloader.py
|
||||
- does the actual downloading
|
||||
- setup to also act as a python module if needed.
|
||||
- [ ] TODO: there are quite a few things that need cleaned or refactored.
|
||||
|
||||
./tests/download_tests.py
|
||||
- downloads some test html values from clinicaltrials.gov
|
||||
|
||||
|
||||
@ -1,19 +0,0 @@
|
||||
import downloader as dldr
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
dbc = dldr.DBConnectionCreator(
|
||||
dbname="aact_db"
|
||||
,user="root"
|
||||
,host="will-office"
|
||||
,port=5432
|
||||
,password="root")
|
||||
|
||||
with open('selected_trials.sql','r') as fh:
|
||||
sqlfile = fh.read()
|
||||
with dbc.new() as connection:
|
||||
with connection.cursor() as curse:
|
||||
curse.execute(sqlfile)
|
||||
@ -1,17 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
download_and_unzip () {
|
||||
|
||||
curl "$1" > out.zip
|
||||
unzip ./out.zip
|
||||
rm ./out.zip
|
||||
}
|
||||
|
||||
#date on market data
|
||||
download_and_unzip "https://download.open.fda.gov/other/nsde/other-nsde-0001-of-0003.json.zip"
|
||||
download_and_unzip "https://download.open.fda.gov/other/nsde/other-nsde-0002-of-0003.json.zip"
|
||||
download_and_unzip "https://download.open.fda.gov/other/nsde/other-nsde-0003-of-0003.json.zip"
|
||||
|
||||
#rxnorm data
|
||||
download_and_unzip "https://dailymed-data.nlm.nih.gov/public-release-files/rxnorm_mappings.zip"
|
||||
@ -1,95 +0,0 @@
|
||||
import connetorx as cx
|
||||
from sqlalchemy import create_engine
|
||||
import re
|
||||
|
||||
####################CONSTANTS#################################
|
||||
MYSQL_CONNECTION_STRING="mysql://webuser:9521354c77aa@localhost/"
|
||||
POSTGRES_CONNECTION_STRING="postgresql://root:root@localhost/aact_db"
|
||||
POSTGRES_ENGINE = create_engine(POSTGRES_CONNECTION_STRING)
|
||||
SPLIT_RE = re.compile("(\w+)(\((\d+)\))?")
|
||||
|
||||
|
||||
###################QUERIES#########################
|
||||
|
||||
QUERY_columns_from_Information_Schema = """
|
||||
SELECT *
|
||||
FROM INFORMATION_SCHEMA.columns
|
||||
WHERE
|
||||
TABLE_SCHEMA="rxnorm_current"
|
||||
"""
|
||||
|
||||
QUERY_data_from_table = ""
|
||||
|
||||
|
||||
########FUNCTIONS#################
|
||||
def query_mysql(query):
|
||||
"""
|
||||
runs a query against the MYSQL database, returning a pandas df
|
||||
"""
|
||||
return cx.read_sql(MYSQL_CONNECTION_STRING, query)
|
||||
|
||||
def insert_table_postgres(df, table, schema):
|
||||
"""
|
||||
Inserts data into a table
|
||||
"""
|
||||
return df.to_sql(
|
||||
table
|
||||
,POSTGRES_ENGINE
|
||||
,schema=schema
|
||||
,if_exists="append"
|
||||
,method="multi"
|
||||
)
|
||||
|
||||
|
||||
|
||||
def convert_mysql_types_to_pgsql(binary_type):
|
||||
"""
|
||||
Given a binary string of a column's type,
|
||||
convert to utf8, and then parse it into
|
||||
a postgres type
|
||||
"""
|
||||
string_type = binary_type.decode("utf-8").lower()
|
||||
|
||||
#get the value name and length out.
|
||||
val_type,_,length = SPLIT_RE.match(string_type).groups()
|
||||
|
||||
def convert_column(df_row):
|
||||
#extract
|
||||
position = df_row.ORDINAL_POSITION
|
||||
table_name = df_row.TABLE_NAME
|
||||
|
||||
#convert
|
||||
if data_type=="varchar":
|
||||
string = "{column_name} character varying({data_length}) COLLATE pg_catalog.\"default\" {is_nullable},".format(
|
||||
column_name = df_row.COLUMN_NAME
|
||||
,data_length = np.int64(df_row.CHARACTER_MAXIMUM_LENGTH)
|
||||
,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
|
||||
)
|
||||
elif data_type=="char":
|
||||
string = "{column_name} char({data_length})[] COLLATE pg_catalog.\"default\" {is_nullable},".format(
|
||||
column_name = df_row.COLUMN_NAME
|
||||
,data_length = np.int64(df_row.CHARACTER_MAXIMUM_LENGTH)
|
||||
,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
|
||||
)
|
||||
elif data_type=="tinyint":
|
||||
string = "{column_name} smallint {is_nullable},".format(
|
||||
column_name = df_row.COLUMN_NAME
|
||||
,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
|
||||
)
|
||||
series_type = numpy.int8
|
||||
elif data_type=="decimal":
|
||||
string = "{column_name} numeric({precision},{scale}) {is_nullable},".format(
|
||||
column_name = df_row.COLUMN_NAME
|
||||
,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
|
||||
,precision= np.int64(df_row.NUMERIC_PRECISION)
|
||||
,scale= np.int64(df_row.NUMERIC_SCALE)
|
||||
)
|
||||
elif data_type=="int":
|
||||
pass
|
||||
elif data_type=="enum":
|
||||
pass
|
||||
elif data_type=="text":
|
||||
pass
|
||||
|
||||
return string
|
||||
|
||||
@ -1 +0,0 @@
|
||||
downloads and extracts nsde data.
|
||||
@ -0,0 +1,11 @@
|
||||
from drugtools.env_setup import postgres_conn, mariadb_conn, ENV
|
||||
|
||||
print(ENV)
|
||||
|
||||
with postgres_conn() as pconn, pconn.cursor() as curse:
|
||||
curse.execute("select nct_id FROM ctgov.studies LIMIT 10;")
|
||||
print(curse.fetchall())
|
||||
|
||||
with mariadb_conn() as mconn, mconn.cursor() as mcurse:
|
||||
mcurse.execute("select * FROM ALLNDC_HISTORY LIMIT 10;")
|
||||
print(mcurse.fetchall())
|
||||
@ -0,0 +1,30 @@
|
||||
import pymysql
|
||||
import psycopg2 as psyco
|
||||
from dotenv import dotenv_values
|
||||
|
||||
env_path = "../containers/.env"
|
||||
ENV = dotenv_values(env_path)
|
||||
|
||||
def mariadb_conn(**kwargs):
|
||||
return pymysql.connect(
|
||||
database=ENV["MYSQL_DB"]
|
||||
,user=ENV["MYSQL_USER"]
|
||||
,host=ENV["MYSQL_HOST"]
|
||||
,port=int(ENV["MYSQL_PORT"])
|
||||
,password=ENV["MYSQL_PASSWORD"]
|
||||
,**kwargs
|
||||
)
|
||||
|
||||
def postgres_conn(**kwargs):
|
||||
return psyco.connect(
|
||||
dbname=ENV["POSTGRES_DB"]
|
||||
,user=ENV["POSTGRES_USER"]
|
||||
,host=ENV["POSTGRES_HOST"]
|
||||
,port=ENV["POSTGRES_PORT"]
|
||||
,password=ENV["POSTGRES_PASSWORD"]
|
||||
,**kwargs
|
||||
)
|
||||
|
||||
|
||||
def get_tables_of_interest():
|
||||
return ENV["TABLES_OF_INTEREST"].split(",")
|
||||
@ -0,0 +1,15 @@
|
||||
from drugtools.env_setup import postgres_conn
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def run():
|
||||
#get relative path
|
||||
p = Path(__file__).with_name("selected_trials.sql")
|
||||
with open(p,'r') as fh:
|
||||
sqlfile = fh.read()
|
||||
with postgres_conn() as connection:
|
||||
with connection.cursor() as curse:
|
||||
curse.execute(sqlfile)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
Loading…
Reference in New Issue