diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b720f8a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +*.sql.gzip filter=lfs diff=lfs merge=lfs -text +*.xlsx filter=lfs diff=lfs merge=lfs -text +containers/AACT_Reloader/2023-09-06_aactdb_with_matches.sql.gz filter=lfs diff=lfs merge=lfs -text +other_data/USP[[:space:]]DC/usp_dc_pub_2023_release_2.0_updated_final.csv filter=lfs diff=lfs merge=lfs -text +other_data/USP[[:space:]]MMG/MMG_v8.0_Alignment_File.csv filter=lfs diff=lfs merge=lfs -text +other_data/VA[[:space:]]Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv filter=lfs diff=lfs merge=lfs -text diff --git a/containers/AACT_Reloader/.gitattributes b/containers/AACT_Reloader/.gitattributes new file mode 100644 index 0000000..a20c5a3 --- /dev/null +++ b/containers/AACT_Reloader/.gitattributes @@ -0,0 +1 @@ +backup/2023-09-06_aactdb_with_matches.sql.gz filter=lfs diff=lfs merge=lfs -text diff --git a/containers/AACT_Reloader/StartRestoreContainer.sh b/containers/AACT_Reloader/StartRestoreContainer.sh new file mode 100755 index 0000000..e647c84 --- /dev/null +++ b/containers/AACT_Reloader/StartRestoreContainer.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +RESTORE_DUMP_GZ=2023-09-06_aactdb_with_matches.sql.gz +POSTGRES_USER=root +POSTGRES_PASSWORD=root +POSTGRES_DB=aact_db + +#start container +podman run \ + -e POSTGRES_PASSWORD="${POSTGRES_PASSWORD}" \ + -e POSTGRES_USER="${POSTGRES_USER}" \ + -e POSTGRES_DB="${POSTGRES_DB}" \ + --name "${POSTGRES_DB}" \ + --detach \ + --shm-size=512mb \ + --volume ./backup/:/backup/ \ + -p 5432:5432\ + postgres:14-alpine + + +sleep 10 + +# Function to check if PostgreSQL is ready +function check_postgres { + podman exec -i "${POSTGRES_DB}" psql -h localhost -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c '\q' > /dev/null 2>&1 +} + +# Wait for PostgreSQL to be ready +until check_postgres; do + echo "Waiting for PostgreSQL to be ready..." + sleep 4 +done + +echo "PostgreSQL is ready. Restoring the database..." + +# Decompress the dump file and restore it to the database +podman exec -i "${POSTGRES_DB}" sh -c "gunzip -c /backup/${RESTORE_DUMP_GZ} | psql -h localhost -U ${POSTGRES_USER} -d ${POSTGRES_DB}" + +echo "Database restoration complete." diff --git a/containers/AACT_Reloader/backup/2023-09-06_aactdb_with_matches.sql.gz b/containers/AACT_Reloader/backup/2023-09-06_aactdb_with_matches.sql.gz new file mode 100644 index 0000000..7ff43a7 --- /dev/null +++ b/containers/AACT_Reloader/backup/2023-09-06_aactdb_with_matches.sql.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5142f25ff9ef65048c02d5173b5e2a90f4a07513480d5e8c399cf9da39e678 +size 1897211561 diff --git a/justfile b/justfile index a3691ed..0be52ea 100644 --- a/justfile +++ b/justfile @@ -5,23 +5,19 @@ # - move postgress login credentials (allow them to be printed from just while setting up) -#paths for aact_db (postgres) -aact_download_link := "https://ctti-aact.nyc3.digitaloceanspaces.com/27grtsnhtccplxapj2o8ak9aotvv" -aact_download_file := "2022-12-23_postgres_data.zip" -aact_download_path := "./containers/AACT_downloader/aact_downloads" -aact_zipped_data_filepath := aact_download_path / aact_download_file +data_link := "https://ctti-aact.nyc3.digitaloceanspaces.com/27grtsnhtccplxapj2o8ak9aotvv" +data_file := "2022-12-23_postgres_data.zip" +data_path := "./containers/AACT_downloader/aact_downloads" +data_filepath := data_path / data_file #must match the 'container name: aact_db' in the docker-compose.yaml -docker_container := `docker container ls -a | grep "aact_db|rxnav_db" | cut -f 1 -d " " | tr "\n" " "` - -#paths for rxnavinabox -rxnav_path := "./containers/RxNav-In-a-box" -rxnav_version := "rxnav-in-a-box-20230103" -rxnav_data_path := rxnav_path / rxnav_version / "mysql" / "02_data.sql" +docker_container := `docker container ls -a | grep aact_db | cut -f 1 -d " " | tr "\n" " "` #Various paths for docker stuff -docker-compose_path := "./containers/docker-compose.yaml" +docker-compose_path := "./AACT_downloader/docker-compose.yaml" +#rxnorm_mappings +rxnorm_mappings_url := "https://dailymed-data.nlm.nih.gov/public-release-files/rxnorm_mappings.zip" #Number of historical trials to download. count := "100" @@ -32,23 +28,18 @@ check-status: docker --version #check if python version > 3.10. python --version - #python -c 'import sys; exit(sys.hexversion >= 50859504)' + python -c 'import sys; exit(sys.hexversion >= 50859504)' curl --version echo "current docker containers:{{docker_container}}" - +#Setup the AACT container setup-containers: - echo "todo" @echo "Check for downloaded data" - #aact - [ -s {{aact_download_path}}/postgres_data.dmp ] - #rxnav - [ -s {{rxnav_data_path}} ] + [ -s {{data_path}}/postgres_data.dmp ] #run docker compose - @echo "Setting up AACT_db & RxNav_db container" + @echo "Setting up AACT container" docker-compose -f {{docker-compose_path}} up -d - #Stop the appropriate docker container stop-containers: @@ -69,13 +60,10 @@ clean-docker: stop-containers #Download the AACT data download-aact-data: - #download - curl {{aact_download_link}} > {{aact_zipped_data_filepath}} - unzip {{aact_zipped_data_filepath}} -d {{aact_download_path}} - rm {{aact_zipped_data_filepath}} + curl {{data_link}} > ./AACT_downloader/aact_downloads/{{data_file}} + unzip {{data_filepath}} -d {{data_path}} + rm {{data_filepath}} -download-rxnav-data: - echo "Currently manually downloaded." #build based on previously downloaded data build: check-status setup-containers @@ -117,3 +105,8 @@ get-nsde: cd market_data && bash download_nsde.sh cd market_data && python extract_nsde.py +get-rxnorm-mappings: + #this may not be needed, all it does is match spls to rxcuis and I think I already have that. + curl {{rxnorm_mappings_url}} > ./market_data/rxnorm_mappings.zip + cd ./market_data && unzip ./rxnorm_mappings.zip + rm ./market_data/rxnorm_mappings.zip diff --git a/other_data/USP DC/USP_DC_12_2021_RELEASE_1.0.xlsx b/other_data/USP DC/USP_DC_12_2021_RELEASE_1.0.xlsx new file mode 100644 index 0000000..8354a7a --- /dev/null +++ b/other_data/USP DC/USP_DC_12_2021_RELEASE_1.0.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd45c46b7fc30af8e5b6c53ba03429af9fd9b6f17c026d2739b99ccf3ff44ba +size 1790714 diff --git a/other_data/USP DC/usp_dc_pub_2023_release_2.0_updated_final.csv b/other_data/USP DC/usp_dc_pub_2023_release_2.0_updated_final.csv new file mode 100644 index 0000000..6cd7c22 --- /dev/null +++ b/other_data/USP DC/usp_dc_pub_2023_release_2.0_updated_final.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d08d3944a859c0b1f6bbd466ca027fc46c86ef5bb0328cb005fa002b7b61e70b +size 2451625 diff --git a/other_data/USP DC/usp_dc_pub_2023_release_2.0_updated_final.xlsx b/other_data/USP DC/usp_dc_pub_2023_release_2.0_updated_final.xlsx new file mode 100644 index 0000000..2b275a7 --- /dev/null +++ b/other_data/USP DC/usp_dc_pub_2023_release_2.0_updated_final.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f5864bccd3abea775523e841af3da1f70d2cff5394d2a2c9702ebec8131037c +size 891222 diff --git a/other_data/USP MMG/Final_Report_and_Summary_of_Methodology_and_Approach_v1.1.pdf b/other_data/USP MMG/Final_Report_and_Summary_of_Methodology_and_Approach_v1.1.pdf new file mode 100644 index 0000000..a17f1d3 Binary files /dev/null and b/other_data/USP MMG/Final_Report_and_Summary_of_Methodology_and_Approach_v1.1.pdf differ diff --git a/other_data/USP MMG/MMG_v8.0_Alignment_File.csv b/other_data/USP MMG/MMG_v8.0_Alignment_File.csv new file mode 100644 index 0000000..f12587c --- /dev/null +++ b/other_data/USP MMG/MMG_v8.0_Alignment_File.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85859dae3971460d36e0643ee1396cb646dba158b75862d557210cb2c50707a9 +size 874058 diff --git a/other_data/USP MMG/MMG_v8.0_Alignment_File.xlsx b/other_data/USP MMG/MMG_v8.0_Alignment_File.xlsx new file mode 100644 index 0000000..9d1fdaa --- /dev/null +++ b/other_data/USP MMG/MMG_v8.0_Alignment_File.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e352b380988cc76f4ebb0377719c8cf3f6e268f61c5f775648153d20350aeb +size 356637 diff --git a/other_data/USP MMG/Summary_of_Changes_between_MMGv7.0_and_MMGv8.0.pdf b/other_data/USP MMG/Summary_of_Changes_between_MMGv7.0_and_MMGv8.0.pdf new file mode 100644 index 0000000..add4b2d Binary files /dev/null and b/other_data/USP MMG/Summary_of_Changes_between_MMGv7.0_and_MMGv8.0.pdf differ diff --git a/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__All_Excel_Spreadsheets_.xlsx b/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__All_Excel_Spreadsheets_.xlsx new file mode 100644 index 0000000..e18fc7b --- /dev/null +++ b/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__All_Excel_Spreadsheets_.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c9f3b08135a250b7b2f2e5c1a1efc23e46898f1dce6399eddc12a00032d36d +size 131520 diff --git a/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__Categories_and_Classes_.pdf b/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__Categories_and_Classes_.pdf new file mode 100644 index 0000000..43d5a7d Binary files /dev/null and b/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__Categories_and_Classes_.pdf differ diff --git a/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__Showing_changes_from_v7.0_.pdf b/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__Showing_changes_from_v7.0_.pdf new file mode 100644 index 0000000..7477fd6 Binary files /dev/null and b/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__Showing_changes_from_v7.0_.pdf differ diff --git a/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__With_Example_Part_D_Drugs_.pdf b/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__With_Example_Part_D_Drugs_.pdf new file mode 100644 index 0000000..8cb9058 Binary files /dev/null and b/other_data/USP MMG/USP_Medicare_Model_Guidelines_v8.0__With_Example_Part_D_Drugs_.pdf differ diff --git a/other_data/VA Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv b/other_data/VA Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv new file mode 100644 index 0000000..e383df4 --- /dev/null +++ b/other_data/VA Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c877a461be9e75565f78d0552e76f597b5cce709c82a3f9ad30dcc0f26ddafc +size 32481883 diff --git a/other_data/VA Formulary/PharmacyProductSystem_NationalDrugCodeExtract.xlsx b/other_data/VA Formulary/PharmacyProductSystem_NationalDrugCodeExtract.xlsx new file mode 100644 index 0000000..f8bf29e --- /dev/null +++ b/other_data/VA Formulary/PharmacyProductSystem_NationalDrugCodeExtract.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77f2d74681a8d509195e5d2bc01104b9321fe500bcca9065738bc9b523edd74a +size 13465117 diff --git a/other_data/VA Formulary/VADrugClass2132012.xls b/other_data/VA Formulary/VADrugClass2132012.xls new file mode 100644 index 0000000..d5c7e5a Binary files /dev/null and b/other_data/VA Formulary/VADrugClass2132012.xls differ diff --git a/other_data/VA Formulary/VA_National_Formulary_JUNE_2023.xlsx b/other_data/VA Formulary/VA_National_Formulary_JUNE_2023.xlsx new file mode 100644 index 0000000..59676c5 --- /dev/null +++ b/other_data/VA Formulary/VA_National_Formulary_JUNE_2023.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb97661ad316e7ab307a5aaece1b2f5b7438d1ee1c6a278192f47e63ff6388cc +size 316461 diff --git a/other_data/VA Formulary/VA_National_Formulary_by_class_JUNE_2023.xlsx b/other_data/VA Formulary/VA_National_Formulary_by_class_JUNE_2023.xlsx new file mode 100644 index 0000000..895d8a7 --- /dev/null +++ b/other_data/VA Formulary/VA_National_Formulary_by_class_JUNE_2023.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58131fd8577707cc94a7208a3da4fabbf0fe10d16cf69dde1cd7b6e7d5701bae +size 379551 diff --git a/scripts/drugtools/historical_nct_downloader.py b/scripts/drugtools/historical_nct_downloader.py index af3ce3e..31cf456 100644 --- a/scripts/drugtools/historical_nct_downloader.py +++ b/scripts/drugtools/historical_nct_downloader.py @@ -5,11 +5,13 @@ from multiprocess import Pool, Value import math import time from drugtools.env_setup import postgres_conn, ENV +from tqdm import tqdm ############ GLOBALS RESET_TIME = Value('I',int(ENV["TRIAL_DOWNLOAD_RESET_TIME"])) DELAY_TIME = Value("I",int(ENV["TRIAL_DOWNLOAD_DELAY_TIME"])) TRIAL_RESERVATION_LIMIT=int(ENV["TRIAL_RESERVATION_LIMIT"]) +TRIAL_RESERVATION_BATCH_SIZE=int(ENV["TRIAL_RESERVATION_BATCH_SIZE"]) ############ Functions def get_highest_version_number(response): @@ -25,7 +27,10 @@ def get_highest_version_number(response): soup = BeautifulSoup(response.text, features="lxml") #get version table rows - table_rows = soup.findChildren("fieldset")[0].table.tbody.findChildren("tr") + try: + table_rows = soup.findChildren("fieldset")[0].table.tbody.findChildren("tr") + except IndexError as ie: + raise ie for row in reversed(table_rows): # if it is