diff --git a/.gitattributes b/.gitattributes index 20425b7..b720f8a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,6 @@ *.sql.gzip filter=lfs diff=lfs merge=lfs -text *.xlsx filter=lfs diff=lfs merge=lfs -text +containers/AACT_Reloader/2023-09-06_aactdb_with_matches.sql.gz filter=lfs diff=lfs merge=lfs -text +other_data/USP[[:space:]]DC/usp_dc_pub_2023_release_2.0_updated_final.csv filter=lfs diff=lfs merge=lfs -text +other_data/USP[[:space:]]MMG/MMG_v8.0_Alignment_File.csv filter=lfs diff=lfs merge=lfs -text +other_data/VA[[:space:]]Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv filter=lfs diff=lfs merge=lfs -text diff --git a/2023-09-06_aactdb_with_matches.sql.gzip b/2023-09-06_aactdb_with_matches.sql.gzip deleted file mode 100644 index c7282ee..0000000 --- a/2023-09-06_aactdb_with_matches.sql.gzip +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9ac5e99d27f53b81380d98f546c656332fac2be05dabea0ade16fe692a1334d6 -size 1897211526 diff --git a/containers/AACT_Reloader/2023-09-06_aactdb_with_matches.sql.gz b/containers/AACT_Reloader/2023-09-06_aactdb_with_matches.sql.gz new file mode 100644 index 0000000..7ff43a7 --- /dev/null +++ b/containers/AACT_Reloader/2023-09-06_aactdb_with_matches.sql.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5142f25ff9ef65048c02d5173b5e2a90f4a07513480d5e8c399cf9da39e678 +size 1897211561 diff --git a/containers/AACT_Reloader/StartRestoreContainer.sh b/containers/AACT_Reloader/StartRestoreContainer.sh new file mode 100755 index 0000000..d495fbf --- /dev/null +++ b/containers/AACT_Reloader/StartRestoreContainer.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +RESTORE_DUMP_GZ=2023-09-06_aactdb_with_matches.sql.gz +POSTGRES_USER=root +POSTGRES_PASSWORD=root + +#start container +podman run \ + -e POSTGRES_PASSWORD="${POSTGRES_PASSWORD}" \ + -e POSTGRES_USER="${POSTGRES_USER}" \ + -e POSTGRES_DB=aact_db \ + --name aact_db \ + --detatch \ + --shm-size=512mb \ + --volume ${RESTORE_DUMP_GZ}:/backup/${RESTORE_DUMP_GZ} \ + --ports 5432:5432\ + postgres:14-alpine + + + +#execute within container +podman exec aact_db \ + "gzip --keep --stdout --decompress /backup/2023-09-06_aactdb_with_matches.sql.gz | psql -U ${POSTGRES_USER}" diff --git a/justfile b/justfile index a3691ed..0be52ea 100644 --- a/justfile +++ b/justfile @@ -5,23 +5,19 @@ # - move postgress login credentials (allow them to be printed from just while setting up) -#paths for aact_db (postgres) -aact_download_link := "https://ctti-aact.nyc3.digitaloceanspaces.com/27grtsnhtccplxapj2o8ak9aotvv" -aact_download_file := "2022-12-23_postgres_data.zip" -aact_download_path := "./containers/AACT_downloader/aact_downloads" -aact_zipped_data_filepath := aact_download_path / aact_download_file +data_link := "https://ctti-aact.nyc3.digitaloceanspaces.com/27grtsnhtccplxapj2o8ak9aotvv" +data_file := "2022-12-23_postgres_data.zip" +data_path := "./containers/AACT_downloader/aact_downloads" +data_filepath := data_path / data_file #must match the 'container name: aact_db' in the docker-compose.yaml -docker_container := `docker container ls -a | grep "aact_db|rxnav_db" | cut -f 1 -d " " | tr "\n" " "` - -#paths for rxnavinabox -rxnav_path := "./containers/RxNav-In-a-box" -rxnav_version := "rxnav-in-a-box-20230103" -rxnav_data_path := rxnav_path / rxnav_version / "mysql" / "02_data.sql" +docker_container := `docker container ls -a | grep aact_db | cut -f 1 -d " " | tr "\n" " "` #Various paths for docker stuff -docker-compose_path := "./containers/docker-compose.yaml" +docker-compose_path := "./AACT_downloader/docker-compose.yaml" +#rxnorm_mappings +rxnorm_mappings_url := "https://dailymed-data.nlm.nih.gov/public-release-files/rxnorm_mappings.zip" #Number of historical trials to download. count := "100" @@ -32,23 +28,18 @@ check-status: docker --version #check if python version > 3.10. python --version - #python -c 'import sys; exit(sys.hexversion >= 50859504)' + python -c 'import sys; exit(sys.hexversion >= 50859504)' curl --version echo "current docker containers:{{docker_container}}" - +#Setup the AACT container setup-containers: - echo "todo" @echo "Check for downloaded data" - #aact - [ -s {{aact_download_path}}/postgres_data.dmp ] - #rxnav - [ -s {{rxnav_data_path}} ] + [ -s {{data_path}}/postgres_data.dmp ] #run docker compose - @echo "Setting up AACT_db & RxNav_db container" + @echo "Setting up AACT container" docker-compose -f {{docker-compose_path}} up -d - #Stop the appropriate docker container stop-containers: @@ -69,13 +60,10 @@ clean-docker: stop-containers #Download the AACT data download-aact-data: - #download - curl {{aact_download_link}} > {{aact_zipped_data_filepath}} - unzip {{aact_zipped_data_filepath}} -d {{aact_download_path}} - rm {{aact_zipped_data_filepath}} + curl {{data_link}} > ./AACT_downloader/aact_downloads/{{data_file}} + unzip {{data_filepath}} -d {{data_path}} + rm {{data_filepath}} -download-rxnav-data: - echo "Currently manually downloaded." #build based on previously downloaded data build: check-status setup-containers @@ -117,3 +105,8 @@ get-nsde: cd market_data && bash download_nsde.sh cd market_data && python extract_nsde.py +get-rxnorm-mappings: + #this may not be needed, all it does is match spls to rxcuis and I think I already have that. + curl {{rxnorm_mappings_url}} > ./market_data/rxnorm_mappings.zip + cd ./market_data && unzip ./rxnorm_mappings.zip + rm ./market_data/rxnorm_mappings.zip diff --git a/other_data/USP DC/usp_dc_pub_2023_release_2.0_updated_final.csv b/other_data/USP DC/usp_dc_pub_2023_release_2.0_updated_final.csv new file mode 100644 index 0000000..6cd7c22 --- /dev/null +++ b/other_data/USP DC/usp_dc_pub_2023_release_2.0_updated_final.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d08d3944a859c0b1f6bbd466ca027fc46c86ef5bb0328cb005fa002b7b61e70b +size 2451625 diff --git a/other_data/USP MMG/MMG_v8.0_Alignment_File.csv b/other_data/USP MMG/MMG_v8.0_Alignment_File.csv new file mode 100644 index 0000000..f12587c --- /dev/null +++ b/other_data/USP MMG/MMG_v8.0_Alignment_File.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85859dae3971460d36e0643ee1396cb646dba158b75862d557210cb2c50707a9 +size 874058 diff --git a/other_data/VA Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv b/other_data/VA Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv new file mode 100644 index 0000000..e383df4 --- /dev/null +++ b/other_data/VA Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c877a461be9e75565f78d0552e76f597b5cce709c82a3f9ad30dcc0f26ddafc +size 32481883 diff --git a/scripts/drugtools/historical_nct_extractor.py b/scripts/drugtools/historical_nct_extractor.py index ab62bf3..326d4fd 100644 --- a/scripts/drugtools/historical_nct_extractor.py +++ b/scripts/drugtools/historical_nct_extractor.py @@ -126,17 +126,18 @@ def extract_submission_dates(soup): version_date_dict = {} - for row in reversed(table_rows): + for row in table_rows: # if it is