/* How many trials were included? * How many trial were inspected? * How many trials were reserved for downloaded? * How many trials didn't get included for some technical reason? * ********* Data from 2023-03-29 *********** Of Interest 1981 Reserved 1709 #I believe this is lower than the downloaded number because I reserved them earlier Downloaded 1960 Incomplete 3 #there were are few http 500 and 404 codes ****************************************** * Note there were 21 missing trials of interest. * */ select status,count(distinct nct_id) from http.download_status ds group by status; /* Get a list of trials * -- There are currently 304 trials for which I was able to extract unique snapshots (2023-03-29) * -- There are currently 1138 trials for which I was able to extract unique snapshots (2023-04-03) * */ select count(distinct nct_id) from history.trial_snapshots ts /* Get the number of listed conditions * -- There are only 609 listed (MeSH classified) conditions from 284 trials(2023-03-29) * I may need to expand how I address conditions */ select count(*) from ctgov.browse_conditions bc where mesh_type = 'mesh-list' and nct_id in (select distinct nct_id from history.trial_snapshots ts) ; select count(distinct nct_id) from ctgov.browse_conditions bc where mesh_type = 'mesh-list' and nct_id in (select distinct nct_id from history.trial_snapshots ts) ; /* * If I were to expand that to non-coded conditions that would be * 304 trials with 398 conditions * */ select count(distinct nct_id) from ctgov.conditions bc where nct_id in (select distinct nct_id from history.trial_snapshots ts) ; select count(*) from ctgov.conditions c where nct_id in (select distinct nct_id from history.trial_snapshots ts) /* Get the number of matches from UMLS * There are about 5,808 proposed matches. * */ select count(*) from "DiseaseBurden".trial_to_icd10 tti ; --1383 before run at 8pm 2023-03-29 --5209 at 2023-04-03T11:21 /*Get the number of trials that have links to icd10 trials*/ select tti.approved,count(distinct nct_id) from "DiseaseBurden".trial_to_icd10 tti group by tti.approved; -- finding and removing duplicates from the trial linking stuff. Useful when you redownload trials. /* with CTE as ( select row_number() over (partition by nct_id, "condition",ui) as rownum, * from "DiseaseBurden".trial_to_icd10 tti ) delete from "DiseaseBurden".trial_to_icd10 tti2 where id in ( select id from cte where rownum > 1 ); */ --get the number of completed vs terminated trials select overall_status,count(distinct nct_id) from ctgov.studies s where nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti where tti.approved ='accepted' ) group by overall_status ; select overall_status,count(distinct nct_id) from ctgov.studies s where nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti) group by overall_status ; select overall_status,count(distinct nct_id) from ctgov.studies s where nct_id in (select distinct nct_id from history.trial_snapshots ts ) group by overall_status ;