/* How many trials were included? * How many trial were inspected? * How many trials were reserved for downloaded? * How many trials didn't get included for some technical reason? * ********* Data from 2023-03-29 *********** Of Interest 1981 Reserved 1709 #I believe this is lower than the downloaded number because I reserved them earlier Downloaded 1960 Incomplete 3 #there were are few http 500 and 404 codes ****************************************** * Note there were 21 missing trials of interest. * */ select status,count(distinct nct_id) from http.download_status ds group by status; /* Get a list of trials * -- There are currently 304 trials for which I was able to extract unique snapshots (2023-03-29) * */ select count(distinct nct_id) from history.trial_snapshots ts /* Get the number of listed conditions * -- There are only 609 listed (MeSH classified) conditions from 284 trials * I may need to expand how I address conditions */ select count(*) from ctgov.browse_conditions bc where mesh_type = 'mesh-list' and nct_id in (select distinct nct_id from history.trial_snapshots ts) ; select count(distinct nct_id) from ctgov.browse_conditions bc where mesh_type = 'mesh-list' and nct_id in (select distinct nct_id from history.trial_snapshots ts) ; /* * If I were to expand that to non-coded conditions that would be * 304 trials with 398 conditions * */ select count(distinct nct_id) from ctgov.conditions bc where nct_id in (select distinct nct_id from history.trial_snapshots ts) ; select count(*) from ctgov.conditions c where nct_id in (select distinct nct_id from history.trial_snapshots ts) /* Get the number of matches from UMLS * There are about 5,808 proposed matches. * */ select count(*) from "DiseaseBurden".trial_to_icd10 tti ; --1383 before run at 8pm 2023-03-29