You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
101 lines
3.0 KiB
SQL
101 lines
3.0 KiB
SQL
|
|
/* How many trials were included?
|
|
* How many trial were inspected?
|
|
* How many trials were reserved for downloaded?
|
|
* How many trials didn't get included for some technical reason?
|
|
*
|
|
********* Data from 2023-03-29 ***********
|
|
Of Interest 1981
|
|
Reserved 1709 #I believe this is lower than the downloaded number because I reserved them earlier
|
|
Downloaded 1960
|
|
Incomplete 3 #there were are few http 500 and 404 codes
|
|
******************************************
|
|
* Note there were 21 missing trials of interest.
|
|
* */
|
|
select status,count(distinct nct_id) from http.download_status ds
|
|
group by status;
|
|
|
|
/* Get a list of trials
|
|
* -- There are currently 304 trials for which I was able to extract unique snapshots (2023-03-29)
|
|
* -- There are currently 1138 trials for which I was able to extract unique snapshots (2023-04-03)
|
|
* */
|
|
select count(distinct nct_id) from history.trial_snapshots ts
|
|
|
|
/* Get the number of listed conditions
|
|
* -- There are only 609 listed (MeSH classified) conditions from 284 trials(2023-03-29)
|
|
* I may need to expand how I address conditions
|
|
*/
|
|
select count(*)
|
|
from ctgov.browse_conditions bc
|
|
where
|
|
mesh_type = 'mesh-list'
|
|
and
|
|
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
|
;
|
|
|
|
select count(distinct nct_id)
|
|
from ctgov.browse_conditions bc
|
|
where
|
|
mesh_type = 'mesh-list'
|
|
and
|
|
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
|
;
|
|
|
|
/*
|
|
* If I were to expand that to non-coded conditions that would be
|
|
* 304 trials with 398 conditions
|
|
* */
|
|
select count(distinct nct_id)
|
|
from ctgov.conditions bc
|
|
where
|
|
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
|
;
|
|
select count(*) from ctgov.conditions c
|
|
where
|
|
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
|
|
|
|
|
/* Get the number of matches from UMLS
|
|
* There are about 5,808 proposed matches.
|
|
*
|
|
*/
|
|
select count(*) from "DiseaseBurden".trial_to_icd10 tti ;
|
|
--1383 before run at 8pm 2023-03-29
|
|
--5209 at 2023-04-03T11:21
|
|
|
|
|
|
/*Get the number of trials that have links to icd10 trials*/
|
|
select tti.approved,count(distinct nct_id) from "DiseaseBurden".trial_to_icd10 tti
|
|
group by tti.approved;
|
|
|
|
-- finding and removing duplicates from the trial linking stuff. Useful when you redownload trials.
|
|
/*
|
|
with CTE as (
|
|
select row_number() over (partition by nct_id, "condition",ui) as rownum, *
|
|
from "DiseaseBurden".trial_to_icd10 tti
|
|
)
|
|
delete from "DiseaseBurden".trial_to_icd10 tti2
|
|
where id in (
|
|
select id from cte where rownum > 1
|
|
);
|
|
*/
|
|
|
|
--get the number of completed vs terminated trials
|
|
select overall_status,count(distinct nct_id)
|
|
from ctgov.studies s
|
|
where nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti where tti.approved ='accepted' )
|
|
group by overall_status
|
|
;
|
|
|
|
select overall_status,count(distinct nct_id)
|
|
from ctgov.studies s
|
|
where nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti)
|
|
group by overall_status
|
|
;
|
|
|
|
select overall_status,count(distinct nct_id)
|
|
from ctgov.studies s
|
|
where nct_id in (select distinct nct_id from history.trial_snapshots ts )
|
|
group by overall_status
|
|
;
|