Adding sql that was sitting on home computer
parent
9aaf007791
commit
dfb041d12b
@ -0,0 +1,308 @@
|
|||||||
|
select * from formatted_data_with_planned_enrollment fdwpe
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
select * from formatted_data_mat fdm
|
||||||
|
;
|
||||||
|
|
||||||
|
select count(distinct condition ) from formatted_data_mat fdm
|
||||||
|
|
||||||
|
select nct_id, fdm.current_status , count(*)
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
group by nct_id , fdm.current_status
|
||||||
|
order by nct_id
|
||||||
|
;
|
||||||
|
|
||||||
|
select * from formatted_data_mat fdm ;
|
||||||
|
|
||||||
|
|
||||||
|
-- group with trial split
|
||||||
|
with cte as (
|
||||||
|
select nct_id
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
group by nct_id
|
||||||
|
having count(distinct current_status) > 1
|
||||||
|
order by nct_id
|
||||||
|
)
|
||||||
|
select
|
||||||
|
fdm.nct_id
|
||||||
|
, current_status
|
||||||
|
, earliest_date_observed
|
||||||
|
, elapsed_duration
|
||||||
|
, n_brands
|
||||||
|
, category_id
|
||||||
|
, h_sdi_val
|
||||||
|
, h_sdi_u95
|
||||||
|
, h_sdi_l95
|
||||||
|
, hm_sdi_val
|
||||||
|
, hm_sdi_u95
|
||||||
|
, hm_sdi_l95
|
||||||
|
, m_sdi_val
|
||||||
|
, m_sdi_u95
|
||||||
|
, m_sdi_l95
|
||||||
|
, lm_sdi_val
|
||||||
|
, lm_sdi_u95
|
||||||
|
, lm_sdi_l95
|
||||||
|
, l_sdi_val
|
||||||
|
, l_sdi_u95
|
||||||
|
, l_sdi_l95
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
join cte on cte.nct_id = fdm.nct_id
|
||||||
|
group by
|
||||||
|
fdm.nct_id
|
||||||
|
, current_status
|
||||||
|
, earliest_date_observed
|
||||||
|
, elapsed_duration
|
||||||
|
, n_brands
|
||||||
|
, category_id
|
||||||
|
, h_sdi_val
|
||||||
|
, h_sdi_u95
|
||||||
|
, h_sdi_l95
|
||||||
|
, hm_sdi_val
|
||||||
|
, hm_sdi_u95
|
||||||
|
, hm_sdi_l95
|
||||||
|
, m_sdi_val
|
||||||
|
, m_sdi_u95
|
||||||
|
, m_sdi_l95
|
||||||
|
, lm_sdi_val
|
||||||
|
, lm_sdi_u95
|
||||||
|
, lm_sdi_l95
|
||||||
|
, l_sdi_val
|
||||||
|
, l_sdi_u95
|
||||||
|
, l_sdi_l95
|
||||||
|
order by nct_id , earliest_date_observed
|
||||||
|
;
|
||||||
|
|
||||||
|
select count(distinct category_id ) from
|
||||||
|
|
||||||
|
|
||||||
|
select distinct category_id from formatted_data_mat fdm
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-- group with trial split
|
||||||
|
with cte as (
|
||||||
|
select nct_id
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
group by nct_id
|
||||||
|
having count(distinct current_status) > 1
|
||||||
|
order by nct_id
|
||||||
|
)
|
||||||
|
select
|
||||||
|
fdm.nct_id
|
||||||
|
, current_status
|
||||||
|
, earliest_date_observed
|
||||||
|
, elapsed_duration
|
||||||
|
, n_brands
|
||||||
|
, category_id
|
||||||
|
, h_sdi_val
|
||||||
|
, h_sdi_u95
|
||||||
|
, h_sdi_l95
|
||||||
|
, hm_sdi_val
|
||||||
|
, hm_sdi_u95
|
||||||
|
, hm_sdi_l95
|
||||||
|
, m_sdi_val
|
||||||
|
, m_sdi_u95
|
||||||
|
, m_sdi_l95
|
||||||
|
, lm_sdi_val
|
||||||
|
, lm_sdi_u95
|
||||||
|
, lm_sdi_l95
|
||||||
|
, l_sdi_val
|
||||||
|
, l_sdi_u95
|
||||||
|
, l_sdi_l95
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
join cte on cte.nct_id = fdm.nct_id
|
||||||
|
group by
|
||||||
|
fdm.nct_id
|
||||||
|
, current_status
|
||||||
|
, earliest_date_observed
|
||||||
|
, elapsed_duration
|
||||||
|
, n_brands
|
||||||
|
, category_id
|
||||||
|
, h_sdi_val
|
||||||
|
, h_sdi_u95
|
||||||
|
, h_sdi_l95
|
||||||
|
, hm_sdi_val
|
||||||
|
, hm_sdi_u95
|
||||||
|
, hm_sdi_l95
|
||||||
|
, m_sdi_val
|
||||||
|
, m_sdi_u95
|
||||||
|
, m_sdi_l95
|
||||||
|
, lm_sdi_val
|
||||||
|
, lm_sdi_u95
|
||||||
|
, lm_sdi_l95
|
||||||
|
, l_sdi_val
|
||||||
|
, l_sdi_u95
|
||||||
|
, l_sdi_l95
|
||||||
|
order by nct_id , earliest_date_observed
|
||||||
|
; --TODO: join to usp dc dataset
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
WITH trialncts AS (
|
||||||
|
SELECT DISTINCT ts.nct_id
|
||||||
|
FROM history.trial_snapshots ts
|
||||||
|
), nct_to_cui AS (
|
||||||
|
SELECT bi.nct_id,
|
||||||
|
bi.downcase_mesh_term,
|
||||||
|
rr.tty2,
|
||||||
|
rr.rxcui2 AS approved_drug_rxcui,
|
||||||
|
count(*) AS count
|
||||||
|
FROM ctgov.browse_interventions bi
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui
|
||||||
|
WHERE (bi.nct_id::text IN ( SELECT trialncts.nct_id
|
||||||
|
FROM trialncts)) AND bi.mesh_type::text = 'mesh-list'::text AND rp.propname::text = 'Active_ingredient_name'::text AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text]))
|
||||||
|
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2
|
||||||
|
)
|
||||||
|
SELECT nct_to_cui.nct_id,
|
||||||
|
ud."USP Category",
|
||||||
|
ud."USP Class"
|
||||||
|
FROM nct_to_cui
|
||||||
|
JOIN "Formularies".usp_dc ud ON ud.rxcui::bpchar = nct_to_cui.approved_drug_rxcui
|
||||||
|
GROUP BY nct_to_cui.nct_id, ud."USP Category", ud."USP Class"
|
||||||
|
ORDER BY nct_to_cui.nct_id;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE MATERIALIZED VIEW "Formularies".nct_to_brands_through_uspdc
|
||||||
|
AS
|
||||||
|
WITH trialncts AS (
|
||||||
|
SELECT DISTINCT ts.nct_id
|
||||||
|
FROM history.trial_snapshots ts
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
bi.nct_id,
|
||||||
|
count( distinct rr2.rxcui2 ) as brand_name_count
|
||||||
|
FROM ctgov.browse_interventions bi
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text --match mesh terms to rxcui
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui -- match rxcui to relations between rxcuis
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr2 ON rr.rxcui2 = rr2.rxcui1 -- match rxcui to relations between rxcuis
|
||||||
|
WHERE
|
||||||
|
(bi.nct_id::text IN (SELECT trialncts.nct_id FROM trialncts)) --check the nct_id is in our list
|
||||||
|
AND
|
||||||
|
bi.mesh_type::text = 'mesh-list'::text --we are only looking at mesh "list" rxcuis
|
||||||
|
AND rp.propname::text = 'Active_ingredient_name'::text --and we only care about active ingredients linked to \/\/\/\/\/
|
||||||
|
AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text])) --and we are linking from active ingredients ^^^^ to branded packs
|
||||||
|
AND (rr2.tty2::text = 'BN') --and from branded packs back to brand names
|
||||||
|
GROUP BY bi.nct_id --remove duplicates
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
select
|
||||||
|
fdqpe.nct_id
|
||||||
|
--,fdqpe.start_date
|
||||||
|
--,fdqpe.current_enrollment
|
||||||
|
--,fdqpe.enrollment_category
|
||||||
|
,fdqpe.current_status
|
||||||
|
,fdqpe.earliest_date_observed
|
||||||
|
,fdqpe.elapsed_duration
|
||||||
|
,fdqpe.n_brands as identical_brands
|
||||||
|
,ntbtu.brand_name_count
|
||||||
|
,fdqpe.category_id
|
||||||
|
,fdqpe.final_status
|
||||||
|
,fdqpe.h_sdi_val
|
||||||
|
--,fdqpe.h_sdi_u95
|
||||||
|
--,fdqpe.h_sdi_l95
|
||||||
|
,fdqpe.hm_sdi_val
|
||||||
|
--,fdqpe.hm_sdi_u95
|
||||||
|
--,fdqpe.hm_sdi_l95
|
||||||
|
,fdqpe.m_sdi_val
|
||||||
|
--,fdqpe.m_sdi_u95
|
||||||
|
--,fdqpe.m_sdi_l95
|
||||||
|
,fdqpe.lm_sdi_val
|
||||||
|
--,fdqpe.lm_sdi_u95
|
||||||
|
--,fdqpe.lm_sdi_l95
|
||||||
|
,fdqpe.l_sdi_val
|
||||||
|
--,fdqpe.l_sdi_u95
|
||||||
|
--,fdqpe.l_sdi_l95
|
||||||
|
from formatted_data_mat fdqpe
|
||||||
|
join "Formularies".nct_to_brands_through_uspdc ntbtu
|
||||||
|
on fdqpe.nct_id = ntbtu.nct_id
|
||||||
|
;
|
||||||
|
|
||||||
|
--example of multiple reopenings
|
||||||
|
select *
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
where nct_id = 'NCT01239797'
|
||||||
|
|
||||||
|
--attempt to automatically find transition periods
|
||||||
|
with cte1 as (
|
||||||
|
select nct_id, min(earliest_date_observed) over (partition by nct_id) as earliest_closed_enrollment
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
where current_status = 'Active, not recruiting'
|
||||||
|
), cte2 as (
|
||||||
|
select nct_id, max(earliest_date_observed) over (partition by nct_id) latest_open_enrollment
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
where current_status != 'Active, not recruiting'
|
||||||
|
)
|
||||||
|
select
|
||||||
|
cte1.nct_id
|
||||||
|
,cte1.earliest_closed_enrollment
|
||||||
|
,cte2.latest_open_enrollment
|
||||||
|
,cte1.earliest_closed_enrollment - cte2.latest_open_enrollment
|
||||||
|
from cte1
|
||||||
|
join cte2 on cte1.nct_id = cte2.nct_id
|
||||||
|
/*group by
|
||||||
|
cte1.nct_id
|
||||||
|
,cte1.earliest_closed_enrollment
|
||||||
|
,cte2.latest_open_enrollment
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* So ocassionally a study reopens enrollment.
|
||||||
|
* If that didn't happen, then I could just find the first enrollment matching X and/or last enrollment matching Y
|
||||||
|
* to get the transitions
|
||||||
|
* Instead I need to create shifts of statuses between snapshots, and then remove all of those that did not change.
|
||||||
|
*
|
||||||
|
* Better yet, just get the last shift to ANR.
|
||||||
|
* */
|
||||||
|
|
||||||
|
|
||||||
|
/* Take each entry and get the status from a lagged snapshot
|
||||||
|
* Then select each snapshot moving from previous_state to ANR
|
||||||
|
* and filter out everything except the last one.
|
||||||
|
* */
|
||||||
|
with cte as (
|
||||||
|
select
|
||||||
|
nct_id
|
||||||
|
,lag(current_status, 1) over (partition by nct_id order by earliest_date_observed) as previous_status
|
||||||
|
,current_status
|
||||||
|
,earliest_date_observed as date_current
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
), cte2 as (
|
||||||
|
select
|
||||||
|
nct_id
|
||||||
|
,previous_status
|
||||||
|
,current_status
|
||||||
|
,max(date_current) as date_current_max
|
||||||
|
from cte
|
||||||
|
where
|
||||||
|
previous_status != current_status
|
||||||
|
and
|
||||||
|
current_status = 'Active, not recruiting'
|
||||||
|
group by
|
||||||
|
nct_id
|
||||||
|
,previous_status
|
||||||
|
,current_status
|
||||||
|
,date_current
|
||||||
|
)
|
||||||
|
select *
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
join cte2
|
||||||
|
on cte2.nct_id = fdm.nct_id
|
||||||
|
and cte2.date_current_max = fdm.earliest_date_observed
|
||||||
|
; --join back into
|
||||||
|
|
||||||
Loading…
Reference in New Issue