Fixed bug related to statuses sometimes containing notes

llm-extraction
youainti 3 years ago
parent 3eb9a4130a
commit fc38a2e92c

@ -3,6 +3,7 @@ from copy import copy
from datetime import datetime
import psycopg2
from bs4 import BeautifulSoup
import argparse
#import textprocessing as tp #cuz tp is important
#requires Python 3.10
@ -163,13 +164,16 @@ def extract_study_statuses(study_status_form, version_a,version_b):
case ["Overall Status:" as row_label, old,new]:
print("row matched: {}".format(row_label)) if VERBOSE else ""
version_a._overall_status = optional_strip(old.text)
version_b._overall_status = optional_strip(new.text)
#split out any notes such as "Suspended [reason for suspenstion ]"
version_a._overall_status = optional_strip(old.text.split("[")[0])
#split out any notes such as "Suspended [reason for suspenstion ]"
version_b._overall_status = optional_strip(new.text.split("[")[0])
#FIX: There is an issue with NCT00789633 where the overall status includes information as to why it was suspended.
case _ as row_label:
print("row not matched: {}".format(row_label)) if VERBOSE else ""
def extract_study_design(study_status_form, version_a,version_b):
"""
This extracts data from a study_status form and returns one or two
@ -377,9 +381,33 @@ def get_data_from_versions(nct_id,html, version_a_int, version_b_int):
if __name__ == "__main__":
VERBOSE = True
argParser = argparse.ArgumentParser()
# Adding diagnostic printing
argParser.add_argument(
"-V"
,"--verbose"
, help="Display a lot of of diagnostic information"
, action='store_true'
)
# host
argParser.add_argument(
"--host"
, help="Change hostname"
)
args = argParser.parse_args()
VERBOSE = args.verbose
if args.host:
host=args.host
else:
host="localhost"
with psycopg2.connect(dbname="aact_db", user="root", password="root",host="will-office") as db_connection:
with psycopg2.connect(dbname="aact_db", user="root", password="root",host=host) as db_connection:
#pull the requests from the db
with db_connection.cursor() as curse:
sql = """

@ -81,5 +81,13 @@ select-trials:
download-trial-histories:
cd history_downloader && python ./downloader.py
#Check if you can connect to the db
test-db-connection:
cd history_downloader && python db_connection.py
#Parse previously downloaded histories into tables.
parse-trial-histories:
cd Parser && python extraction_lib.py
#Download and install
get-histories: download-trial-histories parse-trial-histories

Loading…
Cancel
Save