Working validation tool

llm-extraction
will king 3 years ago
parent 1de1ff9e4a
commit 277b5b9bd5

@ -1,4 +1,9 @@
drop table if exists "DiseaseBurden".trial_to_icd10; drop table if exists "DiseaseBurden".trial_to_icd10;
drop type if exists "DiseaseBurden".validation_type;
create type "DiseaseBurden".validation_type as enum ('accepted', 'rejected', 'unmatched');
comment on type "DiseaseBurden".validation_type is 'This is used to record interactions with each type. It can be accepted (yes this should be used), rejected (no this doesn`t match), or unmatched (where non of the proposed options match)';
CREATE TABLE "DiseaseBurden".trial_to_icd10 ( CREATE TABLE "DiseaseBurden".trial_to_icd10 (
id integer NOT NULL GENERATED ALWAYS AS IDENTITY, id integer NOT NULL GENERATED ALWAYS AS IDENTITY,
@ -9,7 +14,9 @@ CREATE TABLE "DiseaseBurden".trial_to_icd10 (
rootsource varchar NULL, rootsource varchar NULL,
"name" varchar NULL, "name" varchar NULL,
"source" varchar null, "source" varchar null,
CONSTRAINT trial_to_icd10_pk PRIMARY KEY (id) approved "DiseaseBurden".validation_type,
approval_timestamp timestamp,
CONSTRAINT trial_to_icd10_pk PRIMARY KEY (id)
); );
comment on type "DiseaseBurden".trial_to_icd10 is 'This represents potential links between trials and icd10 codes. Most of the links are both automatic and wrong.'; comment on type "DiseaseBurden".trial_to_icd10 is 'This represents potential links between trials and icd10 codes. Most of the links are both automatic and wrong.';
@ -27,17 +34,3 @@ CREATE TABLE "DiseaseBurden".icd10_to_cause (
drop table if exists "DiseaseBurden".match_status;
drop type if exists "DiseaseBurden".validation_type;
create type "DiseaseBurden".validation_type as enum ('accepted', 'rejected', 'unmatched');
comment on type "DiseaseBurden".validation_type is 'This is used to record interactions with each type. It can be accepted (yes this should be used), rejected (no this doesn`t match), or unmatched (where non of the proposed options match)';
CREATE TABLE "DiseaseBurden".match_status (
id serial4 NOT NULL,
approved "DiseaseBurden".validation_type NOT NULL,
approval_timestamp timestamp NOT NULL,
CONSTRAINT match_status_pk PRIMARY KEY (id, approval_timestamp)
);
COMMENT ON TABLE "DiseaseBurden".match_status IS 'This allows me to record if a particular proposed match is approved or not.';

@ -45,11 +45,7 @@ def select_remaing_trials_to_analyze(db_conn):
sql = ''' sql = '''
select distinct nct_id select distinct nct_id
from "DiseaseBurden".trial_to_icd10 tti from "DiseaseBurden".trial_to_icd10 tti
where tti.id not in where tti.approved is null
(
select distinct id
from "DiseaseBurden".match_status
)
order by nct_id order by nct_id
; ;
''' '''
@ -63,15 +59,11 @@ def select_analyzed_trials(db_conn):
This will get the set of trials that have been analyzed. This will get the set of trials that have been analyzed.
''' '''
sql = ''' sql = '''
select distinct nct_id select distinct nct_id, max(approval_timestamp)
from "DiseaseBurden".trial_to_icd10 tti from "DiseaseBurden".trial_to_icd10 tti
where tti.id in where tti.approved in ('accepted','rejected')
( group by nct_id
select distinct id order by max(approval_timestamp) desc
from "DiseaseBurden".match_status
where approved in ('accepted','rejected')
)
order by nct_id
; ;
''' '''
with db_conn.cursor() as cursor: with db_conn.cursor() as cursor:
@ -85,12 +77,7 @@ def select_unmatched_trials(db_conn):
sql = ''' sql = '''
select distinct nct_id select distinct nct_id
from "DiseaseBurden".trial_to_icd10 tti from "DiseaseBurden".trial_to_icd10 tti
where tti.id in where tti.approved = 'unmatched'
(
select distinct id
from "DiseaseBurden".match_status
where approved = 'unmatched'
)
order by nct_id order by nct_id
; ;
''' '''
@ -110,14 +97,16 @@ def get_trial_conditions_and_proposed_matches(db_conn, nct_id):
return cursor.fetchall() return cursor.fetchall()
def store_validation(db_conn, list_of_inserts): def store_validation(db_conn, list_of_insert_data):
sql = """ sql = """
insert into "DiseaseBurden".match_status (id, approved, approval_timestamp) update "DiseaseBurden".trial_to_icd10
values %s set approved=%s, approval_timestamp=%s
where id=%s
; ;
""" """
with db_conn.cursor() as cursor: with db_conn.cursor() as cursor:
extras.execute_values(cursor, sql, list_of_inserts) for l in list_of_insert_data:
cursor.execute(sql, l)
db_conn.commit() db_conn.commit()
def get_trial_summary(db_conn,nct_id): def get_trial_summary(db_conn,nct_id):
@ -175,18 +164,12 @@ def get_list_icd10_codes(db_conn):
def record_suggested_matches(db_conn, nct_id,condition,icd10_code): def record_suggested_matches(db_conn, nct_id,condition,icd10_code):
sql1 = """ sql1 = """
INSERT INTO "DiseaseBurden".trial_to_icd10 INSERT INTO "DiseaseBurden".trial_to_icd10
(nct_id,"condition",ui,"source") (nct_id,"condition",ui,"source",approved,approval_timestamp)
VALUES (%s,%s,%s,'hand matched') VALUES (%s,%s,%s,'hand matched','accepted',%s)
returning id
; ;
""" """
sql2 = '''
INSERT INTO "DiseaseBurden".match_status (id,approved,approval_timestamp)
VALUES (%s,%s,%s)
'''
with db_conn.cursor() as curse: with db_conn.cursor() as curse:
curse.execute(sql1,[nct_id,condition,icd10_code]) curse.execute(sql1,[nct_id,condition,icd10_code,datetime.now()])
id = curse.fetchone()[0] db_conn.commit()
curse.execute(sql2,[id,"accepted",datetime.now()])
db_conn.commit()

@ -5,7 +5,15 @@
<nav> <nav>
<h1>Nav</h1> <h1>Nav</h1>
<ul> <ul>
<a href="{{ url_for('validation.remaining') }}">Validation Home</a> <li>
<a href="{{ url_for('validation.remaining') }}">Validation Home</a>
</li>
<li>
<a href="https://icd.who.int/browse10/2019/en">WHO ICD-10 Codes (2019)</a>
</li>
<li>
<a href="https://uts.nlm.nih.gov/uts/umls/home">UMLS Metathesaurs browser (requires login)</a>
</li>
</ul> </ul>
</nav> </nav>

@ -27,7 +27,8 @@
<tr><td> <tr><td>
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}"> <a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
{{ trial [0] }} {{ trial [0] }}
</a> </a>
(Most recently updated {{trial[1]}})
</td></tr> </td></tr>
{% endfor %} {% endfor %}
</table> </table>

@ -55,7 +55,7 @@
{% for condition in condition_list %} {% for condition in condition_list %}
<tr> <tr>
<td> <input type="checkbox" id="{{ condition[0] }}" name="{{condition[0]}}" value="accepted"> </td> <td> <input type="checkbox" id="{{ condition[0] }}" name="{{condition[0]}}" value="accepted" {% if condition[8] == "accepted" %}checked{% endif %}> </td>
<td> {{condition[2]}} </td> <td> {{condition[2]}} </td>
<td> {{condition[3]}} </td> <td> {{condition[3]}} </td>
<td> {{condition[5]}} </td> <td> {{condition[5]}} </td>

@ -67,29 +67,31 @@ def validate_trial(nct_id):
#if match id in submitted form, mark as approved, otherwise mark as rejected #if match id in submitted form, mark as approved, otherwise mark as rejected
for condition in condition_list: for condition in condition_list:
id = condition[0] id = condition[0]
list_of_insert_data.append((id, request.form.get(str(id),"rejected"),datetime.now())) list_of_insert_data.append((request.form.get(str(id),"rejected"), datetime.now(),id))
print(list_of_insert_data)
store_validation(db_conn, list_of_insert_data) store_validation(db_conn, list_of_insert_data)
return redirect(url_for("validation.remaining")) return redirect(url_for("validation.remaining"))
elif "marked_unmatched" in request.form: elif "marked_unmatched" in request.form:
#if this was marked as "unmatched", store that for each entry. #if this was marked as "unmatched", store that for each entry.
for condition in condition_list: for condition in condition_list:
id = condition[0] id = condition[0]
list_of_insert_data.append((id, "unmatched",datetime.now())) list_of_insert_data.append(( "unmatched", datetime.now(), id))
store_validation(db_conn, list_of_insert_data) store_validation(db_conn, list_of_insert_data)
return redirect(url_for("validation.remaining")) return redirect(url_for("validation.remaining"))
elif "alternate_submission" in request.form: elif "alternate_submission" in request.form:
code = request.form["alt_sub"] code = request.form["alt_sub"]
code = code.strip().replace("\.",'').ljust(7,"-") code = code.strip().replace(".",'').ljust(7,"-")
condition = request.form["condition"].strip() condition = request.form["condition"].strip()
codelist = get_list_icd10_codes(db_conn) codelist = get_list_icd10_codes(db_conn)
if code in codelist: if code in codelist:
record_suggested_matches(db_conn, nct_id, condition, code) record_suggested_matches(db_conn, nct_id, condition, code)
return redirect(url_for("validation.remaining")) return redirect(request.path)
else: else:
return "Entered `{}`, which is not in the list of available ICD-10 codes".format(code.strip("-")), 422 return """
Entered `{}`, which is not in the list of available ICD-10 codes. <a href={}>Return to trial summary</a>
""".format(code.strip("-"),request.path), 422

@ -61,9 +61,9 @@ with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as
sql_insert = """ sql_insert = """
INSERT INTO "DiseaseBurden".trial_to_icd10 INSERT INTO "DiseaseBurden".trial_to_icd10
(nct_id, "condition", ui,uri,rootsource,"name","source") (nct_id, "condition", ui,uri,rootsource,"name","source",approved,approval_timestamp)
VALUES VALUES
(%(nct_id)s, %(condition)s, %(ui)s, %(uri)s, %(rootSource)s, %(name)s, 'UMLS API search') (%(nct_id)s, %(condition)s, %(ui)s, %(uri)s, %(rootSource)s, %(name)s, 'UMLS API search', null,null)
""" """
for entry in tqdm(entries,desc="Inserting entries to DB"): for entry in tqdm(entries,desc="Inserting entries to DB"):
pcurse.execute(sql_insert,entry._asdict()) pcurse.execute(sql_insert,entry._asdict())

Loading…
Cancel
Save