diff --git a/Scripts/Data_summaries.sql b/Scripts/Data_summaries.sql new file mode 100644 index 0000000..b063a27 --- /dev/null +++ b/Scripts/Data_summaries.sql @@ -0,0 +1,64 @@ + +/* How many trials were included? + * How many trial were inspected? + * How many trials were reserved for downloaded? + * How many trials didn't get included for some technical reason? + * +********* Data from 2023-03-29 *********** +Of Interest 1981 +Reserved 1709 #I believe this is lower than the downloaded number because I reserved them earlier +Downloaded 1960 +Incomplete 3 #there were are few http 500 and 404 codes +****************************************** + * Note there were 21 missing trials of interest. + * */ +select status,count(distinct nct_id) from http.download_status ds +group by status; + +/* Get a list of trials + * -- There are currently 304 trials for which I was able to extract unique snapshots (2023-03-29) + * */ +select count(distinct nct_id) from history.trial_snapshots ts + +/* Get the number of listed conditions + * -- There are only 609 listed (MeSH classified) conditions from 284 trials + * I may need to expand how I address conditions + */ +select count(*) +from ctgov.browse_conditions bc +where +mesh_type = 'mesh-list' +and +nct_id in (select distinct nct_id from history.trial_snapshots ts) +; + +select count(distinct nct_id) +from ctgov.browse_conditions bc +where +mesh_type = 'mesh-list' +and +nct_id in (select distinct nct_id from history.trial_snapshots ts) +; + +/* + * If I were to expand that to non-coded conditions that would be + * 304 trials with 398 conditions + * */ +select count(distinct nct_id) +from ctgov.conditions bc +where +nct_id in (select distinct nct_id from history.trial_snapshots ts) +; +select count(*) from ctgov.conditions c +where +nct_id in (select distinct nct_id from history.trial_snapshots ts) + + +/* Get the number of matches from UMLS + * There are about 5,808 proposed matches. + * + */ +select count(*) from "DiseaseBurden".trial_to_icd10 tti ; +--1383 before run at 8pm 2023-03-29 + + diff --git a/Scripts/GroupingTrials.sql b/Scripts/GroupingTrials.sql new file mode 100644 index 0000000..6ea338e --- /dev/null +++ b/Scripts/GroupingTrials.sql @@ -0,0 +1,32 @@ +select * from "DiseaseBurden".icd10_to_cause itc ; +select * from "DiseaseBurden".cause c ; +select + + +select c.id, count(distinct code) +from "DiseaseBurden".cause c + join "DiseaseBurden".icd10_to_cause itc + on c.cause = itc.cause_text +group by c.id +order by c.id +; + +select count(distinct nct_id) from "DiseaseBurden".trial_to_icd10 tti +where tti.approved = 'accepted'; + +select nct_id, "condition", ui +from "DiseaseBurden".trial_to_icd10 tti +where tti.approved = 'accepted'; + + +select tti.nct_id, tti."condition",itc.cause_text +from "DiseaseBurden".trial_to_icd10 tti + join "DiseaseBurden".icd10_to_cause itc + on replace(REPLACE(tti.ui,'-',''),'.','') = replace(REPLACE(itc.code ,'-',''),'.','') +where + tti.approved = 'accepted' + and + itc.cause_text not in ('Non-communicable diseases','Neoplasms','Mental disorders','Other non-communicable diseases') +group by tti.nct_id , tti."condition" , itc.cause_text +order by nct_id +; \ No newline at end of file diff --git a/scripts/Icd10ConditionsMatching/start.sh b/scripts/Icd10ConditionsMatching/start.sh new file mode 100755 index 0000000..6868a58 --- /dev/null +++ b/scripts/Icd10ConditionsMatching/start.sh @@ -0,0 +1 @@ +waitress-serve --port=5000 --call 'Icd10ConditionsMatching:create_app'