Minor updates to umls_... to add progress bars. Got the main features of validation running.

llm-extraction
will king 3 years ago
parent 47996ba607
commit 1de1ff9e4a

@ -8,9 +8,10 @@ CREATE TABLE "DiseaseBurden".trial_to_icd10 (
uri varchar NULL,
rootsource varchar NULL,
"name" varchar NULL,
"source" varchar null,
CONSTRAINT trial_to_icd10_pk PRIMARY KEY (id)
);
comment on type "DiseaseBurden".trial_to_icd10 is 'This represents potential links between trials and icd10 codes. Most of the links are both automatic and wrong.';
@ -25,12 +26,13 @@ CREATE TABLE "DiseaseBurden".icd10_to_cause (
drop table if exists "DiseaseBurden".match_status;
drop type if exists "DiseaseBurden".validation_type;
create type "DiseaseBurden".validation_type as enum ('accepted', 'rejected', 'unmatched');
comment on type "DiseaseBurden".validation_type is 'This is used to record interactions with each type. It can be accepted (yes this should be used), rejected (no this doesn`t match), or unmatched (where non of the proposed options match)';
CREATE TABLE "DiseaseBurden".match_status (
id serial4 NOT NULL,
approved "DiseaseBurden".validation_type NOT NULL,

@ -20,6 +20,7 @@ def create_app(test_config=None):
)
# ensure the instance folder exists
try:
os.makedirs(app.instance_path)

@ -1,5 +1,6 @@
import psycopg2 as psyco
from psycopg2 import extras
from datetime import datetime
import click #used for cli commands. Not needed for what I am doing.
from flask import current_app, g
@ -157,4 +158,35 @@ where k.nct_id = %s
curse.execute(sql_conditions,[nct_id])
conditions = curse.fetchall()
return {"summary":summary, "keywords":keywords, "conditions":conditions}
return {"summary":summary, "keywords":keywords, "conditions":conditions}
def get_list_icd10_codes(db_conn):
sql = """
select distinct code
from "DiseaseBurden".icd10_to_cause itc
order by code;
"""
with db_conn.cursor() as curse:
curse.execute(sql)
codes = curse.fetchall()
return [ x[0] for x in codes ]
def record_suggested_matches(db_conn, nct_id,condition,icd10_code):
sql1 = """
INSERT INTO "DiseaseBurden".trial_to_icd10
(nct_id,"condition",ui,"source")
VALUES (%s,%s,%s,'hand matched')
returning id
;
"""
sql2 = '''
INSERT INTO "DiseaseBurden".match_status (id,approved,approval_timestamp)
VALUES (%s,%s,%s)
'''
with db_conn.cursor() as curse:
curse.execute(sql1,[nct_id,condition,icd10_code])
id = curse.fetchone()[0]
curse.execute(sql2,[id,"accepted",datetime.now()])
db_conn.commit()

@ -50,6 +50,7 @@
<th>Identifier</th>
<th>Source</th>
<th>Description</th>
<th>Source</th>
</tr>
{% for condition in condition_list %}
@ -59,6 +60,7 @@
<td> {{condition[3]}} </td>
<td> {{condition[5]}} </td>
<td> {{condition[6]}} </td>
<td> {{condition[7]}} </td>
</tr>
{% endfor %}
@ -73,20 +75,21 @@
<h3>Submit Alternate Conditions</h3>
<!--For each listed condition, provide a spot to enter a ICT10 code-->
<form method="post">
<label for="alternate_sub">Please Select a code that appears to be the best match:</label>
<select name="alt_sub" id="alternate_sub">
<option value="" selected disabled>--Please choose an option--</option>
{% for code in icd10_codes %}
<option value="{{ code }}"> {{ code }} </option>
{% endfor %}
</select>
<label for="alternate_sub">Please enter the proposed code that appears to be the best match:</label>
<input name="alt_sub" id="alternate_sub">
<br/>
<label for="notes">Helpful Notes:</label>
<input type="text" id="notes">
<label for="condition">
Please give a name to the condition you used to match this<br/>
Condition:
</label>
<input name="condition", id="condition">
<br/>
<input type="submit" name="alternate_submission" value="Submit alternate ICD-10 code">
</form>
</section>
<section class="approved">
<!--TODO:This will list the already approved values-->
</section>
{% endblock %}

@ -7,12 +7,16 @@ from Icd10ConditionsMatching.db_interface import (
get_trial_conditions_and_proposed_matches,
store_validation,
get_trial_summary,
get_list_icd10_codes,
record_suggested_matches,
)
from datetime import datetime
#### First Blueprint: Checking Data
bp = Blueprint("validation", __name__, url_prefix="/validation")
@bp.route("/",methods=["GET"])
def remaining():
db_conn = get_db()
@ -30,6 +34,7 @@ def remaining():
unmatched_list = unmatched_list
)
@bp.route("/<nct_id>", methods=["GET","POST"])
def validate_trial(nct_id):
@ -38,14 +43,12 @@ def validate_trial(nct_id):
condition_list = get_trial_conditions_and_proposed_matches(db_conn, nct_id)
summary_dats = get_trial_summary(db_conn, nct_id)
icd10_codes = [1,2,3]
return render_template(
"validation_of_trial.html",
nct_id=nct_id,
condition_list=condition_list,
summary_dats=summary_dats,
icd10_codes = icd10_codes
)
elif request.method == "POST":
db_conn = get_db()
@ -66,16 +69,27 @@ def validate_trial(nct_id):
id = condition[0]
list_of_insert_data.append((id, request.form.get(str(id),"rejected"),datetime.now()))
store_validation(db_conn, list_of_insert_data)
return redirect(url_for("validation.remaining"))
elif "marked_unmatched" in request.form:
#if this was marked as "unmatched", store that for each entry.
for condition in condition_list:
id = condition[0]
list_of_insert_data.append((id, "unmatched",datetime.now()))
store_validation(db_conn, list_of_insert_data)
return redirect(url_for("validation.remaining"))
elif "alternate_submission" in request.form:
pass
store_validation(db_conn, list_of_insert_data)
code = request.form["alt_sub"]
code = code.strip().replace("\.",'').ljust(7,"-")
condition = request.form["condition"].strip()
return redirect(url_for("validation.remaining"))
codelist = get_list_icd10_codes(db_conn)
if code in codelist:
record_suggested_matches(db_conn, nct_id, condition, code)
return redirect(url_for("validation.remaining"))
else:
return "Entered `{}`, which is not in the list of available ICD-10 codes".format(code.strip("-")), 422

@ -3,6 +3,7 @@ import json
from drugtools.env_setup import ENV,postgres_conn
from psycopg2 import extras
from collections import namedtuple
from tqdm import tqdm
RecordStuff = namedtuple("RecordStuff", "nct_id condition ui uri rootSource name")
@ -26,8 +27,6 @@ class Requestor():
r = Requestor(ENV.get("UMLS_API_KEY"))
print(json.dumps(r.search("leukemia").json(),indent=2))
with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse:
@ -46,10 +45,10 @@ with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as
entries = []
for row in rows:
for row in tqdm(rows,desc="Search MeSH terms"):
nctid = row[0]
condition = row[1]
print(nctid,condition)
# print(nctid,condition)
results = r.search(row[1]).json().get('result', Exception("No result entry in json")).get('results',Exception("No results entry in json"))
@ -62,9 +61,9 @@ with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as
sql_insert = """
INSERT INTO "DiseaseBurden".trial_to_icd10
(nct_id, "condition", ui,uri,rootsource,"name")
(nct_id, "condition", ui,uri,rootsource,"name","source")
VALUES
(%(nct_id)s, %(condition)s, %(ui)s, %(uri)s, %(rootSource)s, %(name)s)
(%(nct_id)s, %(condition)s, %(ui)s, %(uri)s, %(rootSource)s, %(name)s, 'UMLS API search')
"""
for entry in entries:
for entry in tqdm(entries,desc="Inserting entries to DB"):
pcurse.execute(sql_insert,entry._asdict())

Loading…
Cancel
Save