You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ClinicalTrialsDataProcessing/justfile

120 lines
3.9 KiB
Plaintext

#justfile, used for automating build/setup
# TODO
# - setup a .env file so things can be shared between just and docker
# - move network name to .env
# - move postgress login credentials (allow them to be printed from just while setting up)
#paths for aact_db (postgres)
aact_download_link := "https://ctti-aact.nyc3.digitaloceanspaces.com/27grtsnhtccplxapj2o8ak9aotvv"
aact_download_file := "2022-12-23_postgres_data.zip"
aact_download_path := "./containers/AACT_downloader/aact_downloads"
aact_zipped_data_filepath := aact_download_path / aact_download_file
#must match the 'container name: aact_db' in the docker-compose.yaml
docker_container := `docker container ls -a | grep "aact_db|rxnav_db" | cut -f 1 -d " " | tr "\n" " "`
#paths for rxnavinabox
rxnav_path := "./containers/RxNav-In-a-box"
rxnav_version := "rxnav-in-a-box-20230103"
rxnav_data_path := rxnav_path / rxnav_version / "mysql" / "02_data.sql"
#Various paths for docker stuff
docker-compose_path := "./containers/docker-compose.yaml"
#Number of historical trials to download.
count := "100"
#check for necessary dependencies
check-status:
docker --version
#check if python version > 3.10.
python --version
#python -c 'import sys; exit(sys.hexversion >= 50859504)'
curl --version
echo "current docker containers:{{docker_container}}"
setup-containers:
echo "todo"
@echo "Check for downloaded data"
#aact
[ -s {{aact_download_path}}/postgres_data.dmp ]
#rxnav
[ -s {{rxnav_data_path}} ]
#run docker compose
@echo "Setting up AACT_db & RxNav_db container"
docker-compose -f {{docker-compose_path}} up -d
#Stop the appropriate docker container
stop-containers:
#stop all docker containers if they are currently running.
#The if [empty string] statement because sometimes there are no running containers
if [ -n "{{docker_container}}" ]; then docker stop {{docker_container}}; fi
@echo "confirmed that docker containers {{docker_container}} are stopped"
#Remove the appropriate docker container as well as associated volumes
clean-docker: stop-containers
# remove docker containers
if [ -n "{{docker_container}}" ]; then docker rm {{docker_container}}; fi
# cleanup docker network
docker network prune
# cleanup docker volumes
docker volume prune
#Download the AACT data
download-aact-data:
#download
curl {{aact_download_link}} > {{aact_zipped_data_filepath}}
unzip {{aact_zipped_data_filepath}} -d {{aact_download_path}}
rm {{aact_zipped_data_filepath}}
download-rxnav-data:
echo "Currently manually downloaded."
#build based on previously downloaded data
build: check-status setup-containers
#system built from downloaded data
#remove containers and rebuild based on previously downloaded data
rebuild: clean-docker build
#system will be built from scratch, using previously downloaded data
#download data and create the containers
create: check-status download-aact-data build
# downloaded data and built from scratch
#remove containers, redownload data, then rebuild containers
recreate: clean-docker create
# removed containers, redownloaded data, then rebuilt containers
#Register trials of interest in the database based on ./history_downloader/selected_trials.sql
select-trials:
cd history_downloader && python ./select_trials.py
#Download trial histories based on registered trials of interest.
download-trial-histories:
cd history_downloader && python ./downloader.py --count {{count}}
#Check if you can connect to the db
test-db-connection:
cd history_downloader && python db_connection.py
#Parse previously downloaded histories into tables.
parse-trial-histories:
cd Parser && python extraction_lib.py
#Download and install
get-histories: download-trial-histories parse-trial-histories
#download market data
get-nsde:
cd market_data && bash download_nsde.sh
cd market_data && python extract_nsde.py