From 1de1ff9e4ae0a7509171cb1e1ed22fc1a629f729 Mon Sep 17 00:00:00 2001 From: will king Date: Mon, 27 Mar 2023 19:18:07 -0700 Subject: [PATCH] Minor updates to umls_... to add progress bars. Got the main features of validation running. --- Scripts/DevelopingLinks.sql | 6 ++-- .../Icd10ConditionsMatching/__init__.py | 1 + .../Icd10ConditionsMatching/db_interface.py | 34 ++++++++++++++++++- .../templates/validation_of_trial.html | 21 +++++++----- .../Icd10ConditionsMatching/validation.py | 26 ++++++++++---- scripts/umls_requests.py | 13 ++++--- 6 files changed, 76 insertions(+), 25 deletions(-) diff --git a/Scripts/DevelopingLinks.sql b/Scripts/DevelopingLinks.sql index b146bc4..37e6367 100644 --- a/Scripts/DevelopingLinks.sql +++ b/Scripts/DevelopingLinks.sql @@ -8,9 +8,10 @@ CREATE TABLE "DiseaseBurden".trial_to_icd10 ( uri varchar NULL, rootsource varchar NULL, "name" varchar NULL, + "source" varchar null, CONSTRAINT trial_to_icd10_pk PRIMARY KEY (id) ); - +comment on type "DiseaseBurden".trial_to_icd10 is 'This represents potential links between trials and icd10 codes. Most of the links are both automatic and wrong.'; @@ -25,12 +26,13 @@ CREATE TABLE "DiseaseBurden".icd10_to_cause ( + drop table if exists "DiseaseBurden".match_status; +drop type if exists "DiseaseBurden".validation_type; create type "DiseaseBurden".validation_type as enum ('accepted', 'rejected', 'unmatched'); comment on type "DiseaseBurden".validation_type is 'This is used to record interactions with each type. It can be accepted (yes this should be used), rejected (no this doesn`t match), or unmatched (where non of the proposed options match)'; - CREATE TABLE "DiseaseBurden".match_status ( id serial4 NOT NULL, approved "DiseaseBurden".validation_type NOT NULL, diff --git a/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/__init__.py b/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/__init__.py index 9374946..e6f1a64 100644 --- a/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/__init__.py +++ b/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/__init__.py @@ -20,6 +20,7 @@ def create_app(test_config=None): ) + # ensure the instance folder exists try: os.makedirs(app.instance_path) diff --git a/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/db_interface.py b/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/db_interface.py index 4947195..6c8a897 100644 --- a/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/db_interface.py +++ b/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/db_interface.py @@ -1,5 +1,6 @@ import psycopg2 as psyco from psycopg2 import extras +from datetime import datetime import click #used for cli commands. Not needed for what I am doing. from flask import current_app, g @@ -157,4 +158,35 @@ where k.nct_id = %s curse.execute(sql_conditions,[nct_id]) conditions = curse.fetchall() - return {"summary":summary, "keywords":keywords, "conditions":conditions} \ No newline at end of file + return {"summary":summary, "keywords":keywords, "conditions":conditions} + +def get_list_icd10_codes(db_conn): + sql = """ + select distinct code + from "DiseaseBurden".icd10_to_cause itc + order by code; + """ + with db_conn.cursor() as curse: + curse.execute(sql) + codes = curse.fetchall() + + return [ x[0] for x in codes ] + +def record_suggested_matches(db_conn, nct_id,condition,icd10_code): + sql1 = """ + INSERT INTO "DiseaseBurden".trial_to_icd10 + (nct_id,"condition",ui,"source") + VALUES (%s,%s,%s,'hand matched') + returning id + ; + """ + sql2 = ''' + INSERT INTO "DiseaseBurden".match_status (id,approved,approval_timestamp) + VALUES (%s,%s,%s) + ''' + + with db_conn.cursor() as curse: + curse.execute(sql1,[nct_id,condition,icd10_code]) + id = curse.fetchone()[0] + curse.execute(sql2,[id,"accepted",datetime.now()]) + db_conn.commit() \ No newline at end of file diff --git a/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/templates/validation_of_trial.html b/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/templates/validation_of_trial.html index 1fa6981..9d2e0c2 100644 --- a/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/templates/validation_of_trial.html +++ b/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/templates/validation_of_trial.html @@ -50,6 +50,7 @@ Identifier Source Description + Source {% for condition in condition_list %} @@ -59,6 +60,7 @@ {{condition[3]}} {{condition[5]}} {{condition[6]}} + {{condition[7]}} {% endfor %} @@ -73,20 +75,21 @@

Submit Alternate Conditions

- - + +
- - + +
+
+ +
{% endblock %} \ No newline at end of file diff --git a/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/validation.py b/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/validation.py index 058bb79..d742a53 100644 --- a/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/validation.py +++ b/scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/validation.py @@ -7,12 +7,16 @@ from Icd10ConditionsMatching.db_interface import ( get_trial_conditions_and_proposed_matches, store_validation, get_trial_summary, + get_list_icd10_codes, + record_suggested_matches, ) from datetime import datetime #### First Blueprint: Checking Data bp = Blueprint("validation", __name__, url_prefix="/validation") + + @bp.route("/",methods=["GET"]) def remaining(): db_conn = get_db() @@ -30,6 +34,7 @@ def remaining(): unmatched_list = unmatched_list ) + @bp.route("/", methods=["GET","POST"]) def validate_trial(nct_id): @@ -38,14 +43,12 @@ def validate_trial(nct_id): condition_list = get_trial_conditions_and_proposed_matches(db_conn, nct_id) summary_dats = get_trial_summary(db_conn, nct_id) - icd10_codes = [1,2,3] return render_template( "validation_of_trial.html", nct_id=nct_id, condition_list=condition_list, summary_dats=summary_dats, - icd10_codes = icd10_codes ) elif request.method == "POST": db_conn = get_db() @@ -66,16 +69,27 @@ def validate_trial(nct_id): id = condition[0] list_of_insert_data.append((id, request.form.get(str(id),"rejected"),datetime.now())) + store_validation(db_conn, list_of_insert_data) + return redirect(url_for("validation.remaining")) elif "marked_unmatched" in request.form: #if this was marked as "unmatched", store that for each entry. for condition in condition_list: id = condition[0] list_of_insert_data.append((id, "unmatched",datetime.now())) + + store_validation(db_conn, list_of_insert_data) + return redirect(url_for("validation.remaining")) elif "alternate_submission" in request.form: - pass - - store_validation(db_conn, list_of_insert_data) + code = request.form["alt_sub"] + code = code.strip().replace("\.",'').ljust(7,"-") + + condition = request.form["condition"].strip() - return redirect(url_for("validation.remaining")) + codelist = get_list_icd10_codes(db_conn) + if code in codelist: + record_suggested_matches(db_conn, nct_id, condition, code) + return redirect(url_for("validation.remaining")) + else: + return "Entered `{}`, which is not in the list of available ICD-10 codes".format(code.strip("-")), 422 diff --git a/scripts/umls_requests.py b/scripts/umls_requests.py index f91b8df..ccd7662 100644 --- a/scripts/umls_requests.py +++ b/scripts/umls_requests.py @@ -3,6 +3,7 @@ import json from drugtools.env_setup import ENV,postgres_conn from psycopg2 import extras from collections import namedtuple +from tqdm import tqdm RecordStuff = namedtuple("RecordStuff", "nct_id condition ui uri rootSource name") @@ -26,8 +27,6 @@ class Requestor(): r = Requestor(ENV.get("UMLS_API_KEY")) -print(json.dumps(r.search("leukemia").json(),indent=2)) - with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse: @@ -46,10 +45,10 @@ with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as entries = [] - for row in rows: + for row in tqdm(rows,desc="Search MeSH terms"): nctid = row[0] condition = row[1] - print(nctid,condition) +# print(nctid,condition) results = r.search(row[1]).json().get('result', Exception("No result entry in json")).get('results',Exception("No results entry in json")) @@ -62,9 +61,9 @@ with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as sql_insert = """ INSERT INTO "DiseaseBurden".trial_to_icd10 - (nct_id, "condition", ui,uri,rootsource,"name") + (nct_id, "condition", ui,uri,rootsource,"name","source") VALUES - (%(nct_id)s, %(condition)s, %(ui)s, %(uri)s, %(rootSource)s, %(name)s) + (%(nct_id)s, %(condition)s, %(ui)s, %(uri)s, %(rootSource)s, %(name)s, 'UMLS API search') """ - for entry in entries: + for entry in tqdm(entries,desc="Inserting entries to DB"): pcurse.execute(sql_insert,entry._asdict())