Merge branch 'main' of ssh://git.youainti.com:3022/Research/ClinicalTrials_DataLinkers

main
Will King 2 years ago
commit 363dc5e3da

6
.gitignore vendored

@ -160,3 +160,9 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
*bin/
avenv
lib64
pyvenv.cfg
*.swp

@ -0,0 +1,47 @@
from flask import (Flask,render_template)
import os
from .db_interface import get_connection_details, check_connection
def create_app(test_config=None):
# create and configure the app
app = Flask(__name__, instance_relative_config=True)
app.config.from_mapping(
SECRET_KEY='6e674d6e41b733270fd01c6257b3a1b4769eb80f3f773cd0fe8eff25f350fc1f',
POSTGRES_DB="aact_db",
POSTGRES_USER="root",
POSTGRES_HOST="localhost",
POSTGRES_PORT=5432,
POSTGRES_PASSWORD="root",
)
# ensure the instance folder exists
try:
os.makedirs(app.instance_path)
except OSError:
pass
# a simple page that says hello
@app.route('/')
def hello():
return render_template("index.html")
@app.route('/debug')
def debug_info():
return {
"connection": get_connection_details(),
}
from . import matching
app.register_blueprint(matching.bp)
return app

@ -0,0 +1,163 @@
import psycopg as psyco
from datetime import datetime
from flask import current_app,g
def get_db(**kwargs):
if "db" not in g:
g.db = psyco.connect(
dbname=current_app.config["POSTGRES_DB"]
,user=current_app.config["POSTGRES_USER"]
,host=current_app.config["POSTGRES_HOST"]
,port=current_app.config["POSTGRES_PORT"]
,password=current_app.config["POSTGRES_PASSWORD"]
,**kwargs
)
return g.db
def close_db(e=None):
db = g.pop('db', None)
if db is not None:
db.close()
def get_connection_details():
return {
"dbname":current_app.config["POSTGRES_DB"]
,"user":current_app.config["POSTGRES_USER"]
,"host":current_app.config["POSTGRES_HOST"]
,"port":current_app.config["POSTGRES_PORT"]
,"password":current_app.config["POSTGRES_PASSWORD"]
}
def check_connection(app):
db = get_db()
with db.cursor() as curse:
curse.execute("select count(*) from \"DiseaseBurden\".trial_to_icd10")
curse.fetchall()
#just checking if everything is going to fail
return True
def get_trial_summary(db_conn,nct_id):
sql_summary ="""
/*get brief and detailed descriptions*/
select
s.nct_id,
brief_title ,
official_title ,
bs.description as brief_description,
dd.description as detailed_description
from ctgov.studies s
left join ctgov.brief_summaries bs
on bs.nct_id = s.nct_id
left join ctgov.detailed_descriptions dd
on dd.nct_id = s.nct_id
where s.nct_id = %s
;
"""
sql_conditions="""
--conditions mentioned
select * from ctgov.conditions c
where c.nct_id = %s
;
"""
sql_keywords="""
/*get keywords*/
select nct_id ,downcase_name
from ctgov.keywords k
where k.nct_id = %s
;
"""
sql_indications='''
select downcase_mesh_term
from ctgov.browse_interventions bi
where bi.nct_id = %s and mesh_type = 'mesh-list'
'''
with db_conn.cursor() as curse:
curse.execute(sql_summary,[nct_id])
summary = curse.fetchall()
curse.execute(sql_keywords,[nct_id])
keywords = [ x[1] for x in curse.fetchall()]
curse.execute(sql_conditions,[nct_id])
conditions = [ x[2] for x in curse.fetchall()]
curse.execute(sql_indications,[nct_id])
indications = [ x[0] for x in curse.fetchall()]
return {
"summary":summary,
"keywords":keywords,
"conditions":conditions,
"indications":indications
}
def get_trials_unmatched_to_formularies(db_conn):
"""
Get the NCT_IDs of trials not yet matchec to formularies
For each formulary
get list
add to dsp
"""
#setup sql for each formulary
uspdc_sql = '''\
/*
Get the trials that have not been proceesed.
First: get most recent matched status
Second: only include those who have non-null status
Third: check list of trials against this and remove any of them.
This leaves unmatched trials.
*/
select distinct(nct_id)
from "DiseaseBurden".trial_to_icd10 tti
where nct_id not in (
select nct_id from "Formularies".uspdc_most_recent_matched_status umrms
where status is not null
)
and nct_id in (select distinct nct_id from public.formatted_data_mat fd )
;
'''
uspmmg_sql = '''\
/*
Get the trials that have not been proceesed.
First: get most recent matched status
Second: only include those who have non-null status
Third: check list of trials against this and remove any of them.
This leaves unmatched trials.
*/
select distinct(nct_id)
from "DiseaseBurden".trial_to_icd10 tti
where nct_id not in (
select nct_id from "Formularies".uspmmg_most_recent_matched_status umrms
where status is not null
)
and nct_id in (select distinct nct_id from public.formatted_data_mat fd )
;
'''
vaform_sql = ''' Null; '''
#query each formulary, adding data to dict
formulary_list = dict()
with db_conn.cursor() as curse:
# uspdc
curse.execute(uspdc_sql)
formulary_list["uspdc"] = curse.fetchall()
# uspmm
curse.execute(uspmmg_sql)
formulary_list["uspmmg"] = curse.fetchall()
# vaform
return formulary_list

@ -0,0 +1,25 @@
import psycopg as psyco
from datetime import datetime
from flask import current_app, g
def get_all_formulary_groups(db_conn):
'''
Get the list of active formulary groups
TODO: IMplement for the given formulary
'''
pass
def get_formulary_groups_per_NCTID(db_conn, nct_id):
'''
Get the list of formulary groups associated with
the drugs found in a trial identified by NCTID
'''
pass
def store_trial_to_formulary_group_matches():
pass

@ -0,0 +1,175 @@
import psycopg2 as psyco
from psycopg2 import extras
from datetime import datetime
import click #used for cli commands. Not needed for what I am doing.
from flask import current_app, g
def get_db(**kwargs):
if "db" not in g:
g.db = psyco.connect(
dbname=current_app.config["POSTGRES_DB"]
,user=current_app.config["POSTGRES_USER"]
,host=current_app.config["POSTGRES_HOST"]
,port=current_app.config["POSTGRES_PORT"]
,password=current_app.config["POSTGRES_PASSWORD"]
,**kwargs
)
return g.db
def close_db(e=None):
db = g.pop('db', None)
if db is not None:
db.close()
def check_initialization(app):
db = get_db()
with db.cursor() as curse:
curse.execute("select count(*) from \"DiseaseBurden\".trial_to_icd10")
curse.fetchall()
#just checking if everything is going to fail
def init_database(app):
#check_initialization(app)
app.teardown_appcontext(close_db)
def select_remaing_trials_to_analyze(db_conn):
'''
This will get the set of trials that need to be analyzed.
'''
sql = '''
select distinct nct_id
from "DiseaseBurden".trial_to_icd10 tti
where tti.approved is null
order by nct_id
;
'''
with db_conn.cursor() as cursor:
cursor.execute(sql)
return cursor.fetchall()
def select_analyzed_trials(db_conn):
'''
This will get the set of trials that have been analyzed.
'''
sql = '''
select distinct nct_id, max(approval_timestamp)
from "DiseaseBurden".trial_to_icd10 tti
where tti.approved in ('accepted','rejected')
group by nct_id
order by max(approval_timestamp) desc
;
'''
with db_conn.cursor() as cursor:
cursor.execute(sql)
return cursor.fetchall()
def select_unmatched_trials(db_conn):
'''
This will get the set of trials that have been analyzed.
'''
sql = '''
select distinct nct_id
from "DiseaseBurden".trial_to_icd10 tti
where tti.approved = 'unmatched'
order by nct_id
;
'''
with db_conn.cursor() as cursor:
cursor.execute(sql)
return cursor.fetchall()
def get_trial_conditions_and_proposed_matches(db_conn, nct_id):
sql = '''
select *
from "DiseaseBurden".trial_to_icd10 tti
where nct_id = %s
'''
with db_conn.cursor() as cursor:
cursor.execute(sql,[nct_id])
return cursor.fetchall()
def store_validation(db_conn, list_of_insert_data):
sql = """
update "DiseaseBurden".trial_to_icd10
set approved=%s, approval_timestamp=%s
where id=%s
;
"""
with db_conn.cursor() as cursor:
for l in list_of_insert_data:
cursor.execute(sql, l)
db_conn.commit()
def get_trial_summary(db_conn,nct_id):
sql_summary ="""
select
s.nct_id,
brief_title ,
official_title ,
bs.description as brief_description,
dd.description as detailed_description
from ctgov.studies s
left join ctgov.brief_summaries bs
on bs.nct_id = s.nct_id
left join ctgov.detailed_descriptions dd
on dd.nct_id = s.nct_id
where s.nct_id = %s
;
"""
sql_conditions="""
--conditions mentioned
select * from ctgov.conditions c
where c.nct_id = %s
;
"""
sql_keywords="""
select nct_id ,downcase_name
from ctgov.keywords k
where k.nct_id = %s
;
"""
with db_conn.cursor() as curse:
curse.execute(sql_summary,[nct_id])
summary = curse.fetchall()
curse.execute(sql_keywords,[nct_id])
keywords = curse.fetchall()
curse.execute(sql_conditions,[nct_id])
conditions = curse.fetchall()
return {"summary":summary, "keywords":keywords, "conditions":conditions}
def get_list_icd10_codes(db_conn):
sql = """
select distinct code
from "DiseaseBurden".icd10_to_cause itc
order by code;
"""
with db_conn.cursor() as curse:
curse.execute(sql)
codes = curse.fetchall()
return [ x[0] for x in codes ]
def record_suggested_matches(db_conn, nct_id,condition,icd10_code):
sql1 = """
INSERT INTO "DiseaseBurden".trial_to_icd10
(nct_id,"condition",ui,"source",approved,approval_timestamp)
VALUES (%s,%s,%s,'hand matched','accepted',%s)
;
"""
with db_conn.cursor() as curse:
curse.execute(sql1,[nct_id,condition,icd10_code,datetime.now()])
db_conn.commit()

@ -0,0 +1,105 @@
import functools
from flask import (Blueprint, flash, g, redirect, render_template, request, session, url_for)
from datetime import datetime
from FormulariesMatching.db_interface import (
get_db,
get_connection_details,
get_trial_summary,
get_trials_unmatched_to_formularies,
)
import FormulariesMatching.uspdc_db as uspdc
import FormulariesMatching.uspmmg_db as uspmmg
import FormulariesMatching.vaform_db as vaform
import re
FORMULARIES = {
"USP DC":uspdc,
"USP MMG":uspmmg,
# "VA Formulary":vaform,
}
#setup blueprint
bp = Blueprint("formularies", __name__, url_prefix='/link/formularies')
@bp.route("/", methods=['GET'])
def get_remaining_trials():
#get db connection
db_conn = get_db()
#get list of trials
unmatched_trials = get_trials_unmatched_to_formularies(db_conn)
unmatched_trials_list = list(set([ x[0] for y in unmatched_trials for x in unmatched_trials[y]] ))
#return html
# return {"formularies":unmatched_trials,"nctids":nctid_list}
return render_template('formulary_index.html',
unmatched_trials=unmatched_trials_list,
)
@bp.route("/<nct_id>", methods=['GET',"POST"])
def match_trial_to_formulary_groups(nct_id):
#get db connection
db_conn = get_db()
if request.method == "GET":
#get list of potential matches for each of the formularies
potential_matches = {}
class_lists = {}
for formulary in FORMULARIES:
potential_matches[formulary] = FORMULARIES[formulary].get_groups_per_nctid(db_conn,nct_id)
class_lists[formulary] = FORMULARIES[formulary].get_all_formulary_groups(db_conn)
#get trial summary
summary = get_trial_summary(db_conn,nct_id)
#render template
# return [potential_matches,class_lists,summary]
return render_template('trial_formularies.html',
nct_id=nct_id,
potential_matches=potential_matches,
class_lists=class_lists,
summary=summary,
)
elif request.method == "POST":
#For each Formulary
translation={"('":None,"')":None}
for key in request.form.keys():
match key.split("|"):
case ["select_box", formulary]:
#parse data
result = request.form["select_box|{}".format(formulary)]
if result == 'difficult':
FORMULARIES[formulary].insert_match(db_conn,nct_id,None,None,'difficult')
else:
category,uspclass = [re.sub("['\(\)]","",x).strip() for x in result.split("', '")]
FORMULARIES[formulary].insert_match(db_conn,nct_id,category,uspclass,'accepted')
case ["check_box", data]:
formulary_trial,category,uspclass = [re.sub("['\(\)]","",x).strip() for x in data.split("', '")]
formulary,_ = formulary_trial.split(",")
#Insert data
FORMULARIES[formulary].insert_match(db_conn,nct_id,category,uspclass,'accepted')
case _:
return 400
return redirect(url_for("formularies.get_remaining_trials"))
else:
raise Exception("HTTP method <{}> not implemented".format(request.method))

@ -0,0 +1,5 @@
.table {
width: 100%;
border-collapse: collapse;
border: 1px solid;
}

@ -0,0 +1,26 @@
<!doctype html>
<html>
<head>
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
<title>{% block title %}{% endblock %} - ClinicalTrialsProject</title>
<nav>
<h1>Nav</h1>
<ul>
<li>
<a href="{{ url_for('hello') }}">Linking Home</a>
</li>
<li>
<a href="{{ url_for('formularies.get_remaining_trials') }}">Linking Formularies</a>
</li>
</ul>
</nav>
</head>
<body>
{% block header %}{% endblock %}
{% block content %}{% endblock %}
</body>
</html>

@ -0,0 +1,47 @@
{% extends 'base.html' %}
{% block header %}
<h1>{% block title %} Linking Trials to Formularies{% endblock %}</h1>
{% endblock %}
{% block content %}
<h2>Unlinked Trials</h2>
<ul>
{% for trial in unmatched_trials %}
<li> <a href="{{ url_for('formularies.match_trial_to_formulary_groups', nct_id=trial ) }}">
{{ trial }}
</a>
</li>
{% endfor %}
</ul>
<h2>Linked Trials</h2>
<table>
<th>Trials Links</th>
{% for trial in validated_list %}
<tr><td>
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
{{ trial [0] }}
</a>
(Most recently updated {{trial[1]}})
</td></tr>
{% endfor %}
</table>
<h2>Flagged for later Trials</h2>
<table>
<th>Trial Links</th>
{% for trial in unmatched_list %}
<tr><td>
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
{{ trial [0] }}
</a>
</td></tr>
{% endfor %}
</table>
{% endblock %}

@ -0,0 +1,49 @@
{% extends 'base.html' %}
{% block header %}
<h1>{% block title %} ICD-10 to Trial Conditions Validation {% endblock %}</h1>
{% endblock %}
{% block content %}
<h2>Trials to Validate</h2>
<table>
<th>Trials</th>
{% for trial in list_to_validate %}
<tr><td>
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
{{ trial [0] }}
</a>
</td></tr>
{% endfor %}
</table>
<h2>Trials that have been Validated</h2>
<table>
<th>Trials Links</th>
{% for trial in validated_list %}
<tr><td>
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
{{ trial [0] }}
</a>
(Most recently updated {{trial[1]}})
</td></tr>
{% endfor %}
</table>
<h2>Trials that don't have a good match</h2>
<table>
<th>Trial Links</th>
{% for trial in unmatched_list %}
<tr><td>
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
{{ trial [0] }}
</a>
</td></tr>
{% endfor %}
</table>
{% endblock %}

@ -0,0 +1,24 @@
{% extends 'base.html' %}
{% block header %}
<h1>
{% block title %}
Formulary Matching
{% endblock %}
</h1>
{% endblock %}
{% block content %}
This is the home page for matching things to clinical trials
There are a few major efforts related to matching.
The first is to match trials to IDC10 codes
The second is to link trials to formulary groups.
{% endblock %}

@ -0,0 +1,132 @@
{% extends 'base.html' %}
{% block header %}
<h1>
{% block title %}
Match Trial {{nct_id}} to Formularies
{% endblock %}
</h1>
{% endblock %}
{% block content %}
<div class=trial_summary>
<h3> Trial Summary </h3>
<table>
<tr>
<th>Short Title</th>
<td>{{ summary['summary'][0][1] }}</td>
</tr>
<tr>
<th>Complete Title</th>
<td>{{ summary['summary'][0][2] }}</td>
</tr>
<tr>
<th>Summary</th>
<td>{{ summary['summary'][0][3] }}</td>
</tr>
<tr>
<th>Summary 2</th>
<td>{{ summary['summary'][0][4] }}</td>
</tr>
<tr>
<th>Indicated Drugs</th>
<td>
<ul>
{% for drug in summary['indications'] %}
<li>
<a
href="https://www.drugs.com/search.php?searchterm={{drug}}"
target="_blank"
>
{{drug}}
</a>
</li>
{% endfor %}
</ul>
</td>
</tr>
<tr>
<th>Conditions</th>
<td>
<ul>
{% for condition in summary['conditions'] %}
<li>
<a
href="https://www.mayoclinic.org/search/search-results?q={{condition}}"
target="_blank"
>
{{condition}}
</a>
</li>
{% endfor %}
</ul>
</td>
</tr>
<tr>
<th>keywords</th>
<td>{{ summary['keywords'] }}</td>
</tr>
</table>
</div>
<form method="post">
<h2>Matching Classes</h2>
{% for formulary in potential_matches %}
<div class=formulary_matches>
<h3>
{{ formulary }}
</h3>
<p>
<table>
<tr>
<th>Category</th>
<th>Class</th>
<th>Select</th>
</tr>
{% for row in potential_matches[formulary] %}
<tr>
<td> {{ row[1] }} </td>
<td> {{ row[2] }} </td>
<td> <input
type="checkbox"
id="{{ formulary,row }}"
name="check_box|{{ formulary,row }}"
value="selected">
</td>
<!-- add checkbox here -->
</tr>
{% endfor %}
</table>
</p>
<p>
If you've determined it belongs to another class
<select name="select_box|{{ formulary }}" id="{{ formularay }}-select_box">
<option disabled selected value> -- select an option -- </option>
<option value="difficult">
Difficult to choose
</option>
{% for option in class_lists[formulary] %}
<option value="{{ (option[0],option[1]) }}">
Cat:{{ option[0] }} Class:{{ option[1] }}
<!-- FIX this is really hard to read -->
<!-- FIX try to add opgroups? -->
</option>
{% endfor %}
</select>
{% endfor %}
</p>
<p>
<input type="submit" value="Submit">
</p>
</form>
{% endblock %}

@ -0,0 +1,61 @@
import psycopg as psyco
from datetime import datetime
from flask import current_app, g
def get_all_formulary_groups(db_conn):
'''
Get the list of active formulary groups
TODO: IMplement for the given formulary
'''
sql = '''\
select distinct "USP Category", "USP Class"
from "Formularies".usp_dc ud
order by "USP Category", "USP Class"
;
'''
#query
with db_conn.cursor() as curse:
curse.execute(sql)
return curse.fetchall()
def get_groups_per_nctid(db_conn, nct_id):
'''
Get the list of formulary groups associated with
the drugs found in a trial identified by NCTID
'''
pass
sql = '''\
select * from "Formularies".uspdc_trial_to_category_class ttucc
where nct_id = %(nctid)s
;
'''
#query
with db_conn.cursor() as curse:
curse.execute(sql, {"nctid":nct_id})
return curse.fetchall()
def insert_match(db_conn, nct_id, category,uspclass,status):
sql = '''\
INSERT INTO "Formularies".uspdc_matching
VALUES
(
%(nct_id)s
,%(category)s
,%(uspclass)s
,%(status)s
,NOW()
)
;
'''
with db_conn.cursor() as curse:
curse.execute(sql, {"nct_id":nct_id, "category":category, "uspclass":uspclass, "status":status} )
db_conn.commit()

@ -0,0 +1,62 @@
import psycopg as psyco
from datetime import datetime
from flask import current_app, g
def get_all_formulary_groups(db_conn):
'''
Get the list of active formulary groups
TODO: IMplement for the given formulary
'''
sql = '''\
select distinct "USP Category", "USP Class"
from "Formularies".usp_mmg ud
order by "USP Category", "USP Class"
;
'''
#query
with db_conn.cursor() as curse:
curse.execute(sql)
return curse.fetchall()
def get_groups_per_nctid(db_conn, nct_id):
'''
Get the list of formulary groups associated with
the drugs found in a trial identified by NCTID
'''
pass
sql = '''\
select * from "Formularies".uspmmg_trial_to_category_class ttucc
where nct_id = %(nctid)s
;
'''
#query
with db_conn.cursor() as curse:
curse.execute(sql, {"nctid":nct_id})
return curse.fetchall()
def insert_match(db_conn, nct_id, category,uspclass,status):
sql = '''\
INSERT INTO "Formularies".uspmmg_matching
VALUES
(
%(nct_id)s
,%(category)s
,%(uspclass)s
,%(status)s
,NOW()
)
;
'''
with db_conn.cursor() as curse:
curse.execute(sql, {"nct_id":nct_id, "category":category, "uspclass":uspclass, "status":status} )
db_conn.commit()

@ -0,0 +1,35 @@
import psycopg as psyco
from datetime import datetime
from flask import current_app, g
def get_all_formulary_groups(db_conn):
'''
Get the list of active formulary groups
TODO: IMplement for the given formulary
'''
sql = '''\
select distinct "USP Category", "USP Class"
from "Formularies".usp_dc ud
order by "USP Category", "USP Class"
;
'''
#query
with db_conn.cursor() as curse:
curse.execute(sql)
return curse.fetchall()
def get_formulary_groups_per_NCTID(db_conn, nct_id):
'''
Get the list of formulary groups associated with
the drugs found in a trial identified by NCTID
'''
pass
def store_trial_to_formulary_group_matches():
pass

@ -0,0 +1,6 @@
flask
psycopg[binary]
datetime
watchdog
waitress
python-dotenv

@ -0,0 +1,12 @@
from setuptools import setup
setup(
name='FormulariesMatching',
packages=['FormulariesMatching'],
include_package_data=True,
install_requires=[
'flask',
'psycopg[binary]',
'datetime',
],
)

@ -0,0 +1,2 @@
waitress-serve --port=5000 --call 'FormulariesMatching:create_app'
#flask --app FormulariesMatching run --debug

@ -1,6 +1,6 @@
#Postgres Info
POSTGRES_HOST=
POSTGRES_HOST=will-office
POSTGRES_DB=aact_db
POSTGRES_PORT=5432
POSTGRES_USER=root

Loading…
Cancel
Save