You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ClinicalTrialsDataProcessing/Scripts/Data_summaries.sql

65 lines
1.8 KiB
SQL

/* How many trials were included?
* How many trial were inspected?
* How many trials were reserved for downloaded?
* How many trials didn't get included for some technical reason?
*
********* Data from 2023-03-29 ***********
Of Interest 1981
Reserved 1709 #I believe this is lower than the downloaded number because I reserved them earlier
Downloaded 1960
Incomplete 3 #there were are few http 500 and 404 codes
******************************************
* Note there were 21 missing trials of interest.
* */
select status,count(distinct nct_id) from http.download_status ds
group by status;
/* Get a list of trials
* -- There are currently 304 trials for which I was able to extract unique snapshots (2023-03-29)
* */
select count(distinct nct_id) from history.trial_snapshots ts
/* Get the number of listed conditions
* -- There are only 609 listed (MeSH classified) conditions from 284 trials
* I may need to expand how I address conditions
*/
select count(*)
from ctgov.browse_conditions bc
where
mesh_type = 'mesh-list'
and
nct_id in (select distinct nct_id from history.trial_snapshots ts)
;
select count(distinct nct_id)
from ctgov.browse_conditions bc
where
mesh_type = 'mesh-list'
and
nct_id in (select distinct nct_id from history.trial_snapshots ts)
;
/*
* If I were to expand that to non-coded conditions that would be
* 304 trials with 398 conditions
* */
select count(distinct nct_id)
from ctgov.conditions bc
where
nct_id in (select distinct nct_id from history.trial_snapshots ts)
;
select count(*) from ctgov.conditions c
where
nct_id in (select distinct nct_id from history.trial_snapshots ts)
/* Get the number of matches from UMLS
* There are about 5,808 proposed matches.
*
*/
select count(*) from "DiseaseBurden".trial_to_icd10 tti ;
--1383 before run at 8pm 2023-03-29