diff --git a/Parser/extraction_lib.py b/Parser/extraction_lib.py index 8a3628f..2e732d3 100644 --- a/Parser/extraction_lib.py +++ b/Parser/extraction_lib.py @@ -3,6 +3,7 @@ from copy import copy from datetime import datetime import psycopg2 from bs4 import BeautifulSoup +import argparse #import textprocessing as tp #cuz tp is important #requires Python 3.10 @@ -163,13 +164,16 @@ def extract_study_statuses(study_status_form, version_a,version_b): case ["Overall Status:" as row_label, old,new]: print("row matched: {}".format(row_label)) if VERBOSE else "" - version_a._overall_status = optional_strip(old.text) - version_b._overall_status = optional_strip(new.text) + #split out any notes such as "Suspended [reason for suspenstion ]" + version_a._overall_status = optional_strip(old.text.split("[")[0]) + #split out any notes such as "Suspended [reason for suspenstion ]" + version_b._overall_status = optional_strip(new.text.split("[")[0]) #FIX: There is an issue with NCT00789633 where the overall status includes information as to why it was suspended. case _ as row_label: print("row not matched: {}".format(row_label)) if VERBOSE else "" + def extract_study_design(study_status_form, version_a,version_b): """ This extracts data from a study_status form and returns one or two @@ -377,9 +381,33 @@ def get_data_from_versions(nct_id,html, version_a_int, version_b_int): if __name__ == "__main__": - VERBOSE = True + argParser = argparse.ArgumentParser() + + # Adding diagnostic printing + argParser.add_argument( + "-V" + ,"--verbose" + , help="Display a lot of of diagnostic information" + , action='store_true' + ) + + # host + argParser.add_argument( + "--host" + , help="Change hostname" + ) + + + args = argParser.parse_args() + VERBOSE = args.verbose + + if args.host: + host=args.host + else: + host="localhost" + - with psycopg2.connect(dbname="aact_db", user="root", password="root",host="will-office") as db_connection: + with psycopg2.connect(dbname="aact_db", user="root", password="root",host=host) as db_connection: #pull the requests from the db with db_connection.cursor() as curse: sql = """ diff --git a/justfile b/justfile index 57d54e4..ad6ea43 100644 --- a/justfile +++ b/justfile @@ -81,5 +81,13 @@ select-trials: download-trial-histories: cd history_downloader && python ./downloader.py +#Check if you can connect to the db test-db-connection: cd history_downloader && python db_connection.py + +#Parse previously downloaded histories into tables. +parse-trial-histories: + cd Parser && python extraction_lib.py + +#Download and install +get-histories: download-trial-histories parse-trial-histories