diff --git a/README.md b/README.md index ad7f0e7..f434863 100644 --- a/README.md +++ b/README.md @@ -20,15 +20,73 @@ Check prerequisites just check-status ``` -Setup the underlying AACT database including downloading historical data. +Setup the underlying AACT database including downloading both +the AACT dump and historical data. ```bash just create just select-trials +just count=1000 get-histories ``` +replacing the 1000 in `count=1000` with the number of trials you want to download. ## Advanced Usage -## TODO -finish advanced usage -add a section describing networking +If you need to reset the db without downloading the AACT dump +```bash +just rebuild +just select-trials +just count=1000 get-histories +``` + + +### Description of all the `just` recipes + +# Background information + +This is designed to run on a linux machine with bash. +If you are using a shell other than bash you should be aware of what +is needed to run all of this using bash + +If any of the discussions below don't make sense, talk to your sysadmin, +a local linux user, or reach out to the author. + +## Just installation + +I use the command runner `just` to automate/simplfy setting up the +docker containers and running many of the python scripts. +It is similar to `make` in many ways but is designed to do less. + +Just can be installed from https://github.com/casey/just/ + +## Python installation + +This requires python 3.10 or above due to the use of match-case statements +in the html parser. + +Check which version of python you have by typing `python --version`. +If you do not have the required version, I would recommend installing +the conda python manager and setting up a conda environment with python 3.10. +Instructions for doing so are on the internet. + +## Docker and Postgres +Docker is a tool to manage and run OCI containers. +What this means in regards to this project is that docker makes it +easy to setup containers. + +Install docker based on instructions for your linux distribution. + +### Docker networking + +I have the docker container for the database attached to a +network called "pharmaceutical_research" because I have a +container with pgadmin4 running on that docker network. +This can be adjusted in the dockerfile. + +I also have the database container open on port 5432, the typical +postgresql database port. +### Database logins +I have choosen the database user of *root* with a password of *root* +because I don't really need this database to be secure. +If you do need to think about the security of your database I would recommend +you start by changing these. diff --git a/history_downloader/downloader.py b/history_downloader/downloader.py index d664d4e..10b878b 100644 --- a/history_downloader/downloader.py +++ b/history_downloader/downloader.py @@ -5,7 +5,7 @@ from bs4 import BeautifulSoup from multiprocessing import Pool, Value import math import time - +import argparse def get_highest_version_number(response): """ @@ -225,11 +225,30 @@ if __name__ == "__main__": """ Main! """ + parser = argparse.ArgumentParser(description="Download historical data") + parser.add_argument( + "-c" + ,"--count" + , dest="count" + , type=int + , default=10 + , help="Specify how many studies to download (default 10). If you want to download all of them, just enter some number higher than the total number of trials selected." + ) + parser.add_argument( + "-H" + ,"--host" + , dest="host" + , default="localhost" + , help="Specify the hostname of the postgres server (Default: localhost)" + ) + args = parser.parse_args() + + #instantiate a database connnection creator dbc = DBConnectionCreator( dbname="aact_db" ,user="root" - ,host="will-office" + ,host=args.host ,port=5432 ,password="root") @@ -238,7 +257,7 @@ if __name__ == "__main__": with dbc.new() as con: #get list of nct_ids - nctids = reserve_trials(con, 150) + nctids = reserve_trials(con, args.count) print(nctids) diff --git a/justfile b/justfile index ad6ea43..037ef23 100644 --- a/justfile +++ b/justfile @@ -16,11 +16,16 @@ docker_container := `docker container ls -a | grep aact_db | cut -f 1 -d " " | t #Various paths for docker stuff docker-compose_path := "./AACT_downloader/docker-compose.yaml" +#Number of historical trials to download. +count := "100" + #check for necessary dependencies check-status: docker --version + #check if python version > 3.10. python --version + python -c 'import sys; exit(sys.hexversion >= 50859504)' curl --version echo "current docker containers:{{docker_container}}" @@ -79,7 +84,7 @@ select-trials: #Download trial histories based on registered trials of interest. download-trial-histories: - cd history_downloader && python ./downloader.py + cd history_downloader && python ./downloader.py --count {{count}} #Check if you can connect to the db test-db-connection: