select * from formatted_data_with_planned_enrollment fdwpe ; select * from formatted_data_mat fdm ; select count(distinct condition ) from formatted_data_mat fdm select nct_id, fdm.current_status , count(*) from formatted_data_mat fdm group by nct_id , fdm.current_status order by nct_id ; select * from formatted_data_mat fdm ; -- group with trial split with cte as ( select nct_id from formatted_data_mat fdm group by nct_id having count(distinct current_status) > 1 order by nct_id ) select fdm.nct_id , current_status , earliest_date_observed , elapsed_duration , n_brands , category_id , h_sdi_val , h_sdi_u95 , h_sdi_l95 , hm_sdi_val , hm_sdi_u95 , hm_sdi_l95 , m_sdi_val , m_sdi_u95 , m_sdi_l95 , lm_sdi_val , lm_sdi_u95 , lm_sdi_l95 , l_sdi_val , l_sdi_u95 , l_sdi_l95 from formatted_data_mat fdm join cte on cte.nct_id = fdm.nct_id group by fdm.nct_id , current_status , earliest_date_observed , elapsed_duration , n_brands , category_id , h_sdi_val , h_sdi_u95 , h_sdi_l95 , hm_sdi_val , hm_sdi_u95 , hm_sdi_l95 , m_sdi_val , m_sdi_u95 , m_sdi_l95 , lm_sdi_val , lm_sdi_u95 , lm_sdi_l95 , l_sdi_val , l_sdi_u95 , l_sdi_l95 order by nct_id , earliest_date_observed ; select count(distinct category_id ) from select distinct category_id from formatted_data_mat fdm ; -- group with trial split with cte as ( select nct_id from formatted_data_mat fdm group by nct_id having count(distinct current_status) > 1 order by nct_id ) select fdm.nct_id , current_status , earliest_date_observed , elapsed_duration , n_brands , category_id , h_sdi_val , h_sdi_u95 , h_sdi_l95 , hm_sdi_val , hm_sdi_u95 , hm_sdi_l95 , m_sdi_val , m_sdi_u95 , m_sdi_l95 , lm_sdi_val , lm_sdi_u95 , lm_sdi_l95 , l_sdi_val , l_sdi_u95 , l_sdi_l95 from formatted_data_mat fdm join cte on cte.nct_id = fdm.nct_id group by fdm.nct_id , current_status , earliest_date_observed , elapsed_duration , n_brands , category_id , h_sdi_val , h_sdi_u95 , h_sdi_l95 , hm_sdi_val , hm_sdi_u95 , hm_sdi_l95 , m_sdi_val , m_sdi_u95 , m_sdi_l95 , lm_sdi_val , lm_sdi_u95 , lm_sdi_l95 , l_sdi_val , l_sdi_u95 , l_sdi_l95 order by nct_id , earliest_date_observed ; --TODO: join to usp dc dataset WITH trialncts AS ( SELECT DISTINCT ts.nct_id FROM history.trial_snapshots ts ), nct_to_cui AS ( SELECT bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2 AS approved_drug_rxcui, count(*) AS count FROM ctgov.browse_interventions bi LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui WHERE (bi.nct_id::text IN ( SELECT trialncts.nct_id FROM trialncts)) AND bi.mesh_type::text = 'mesh-list'::text AND rp.propname::text = 'Active_ingredient_name'::text AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text])) GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2 ) SELECT nct_to_cui.nct_id, ud."USP Category", ud."USP Class" FROM nct_to_cui JOIN "Formularies".usp_dc ud ON ud.rxcui::bpchar = nct_to_cui.approved_drug_rxcui GROUP BY nct_to_cui.nct_id, ud."USP Category", ud."USP Class" ORDER BY nct_to_cui.nct_id; CREATE MATERIALIZED VIEW "Formularies".nct_to_brands_through_uspdc AS WITH trialncts AS ( SELECT DISTINCT ts.nct_id FROM history.trial_snapshots ts ) SELECT bi.nct_id, count( distinct rr2.rxcui2 ) as brand_name_count FROM ctgov.browse_interventions bi LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text --match mesh terms to rxcui LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui -- match rxcui to relations between rxcuis LEFT JOIN rxnorm_migrated.rxnorm_relations rr2 ON rr.rxcui2 = rr2.rxcui1 -- match rxcui to relations between rxcuis WHERE (bi.nct_id::text IN (SELECT trialncts.nct_id FROM trialncts)) --check the nct_id is in our list AND bi.mesh_type::text = 'mesh-list'::text --we are only looking at mesh "list" rxcuis AND rp.propname::text = 'Active_ingredient_name'::text --and we only care about active ingredients linked to \/\/\/\/\/ AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text])) --and we are linking from active ingredients ^^^^ to branded packs AND (rr2.tty2::text = 'BN') --and from branded packs back to brand names GROUP BY bi.nct_id --remove duplicates ; /* * */ select fdqpe.nct_id --,fdqpe.start_date --,fdqpe.current_enrollment --,fdqpe.enrollment_category ,fdqpe.current_status ,fdqpe.earliest_date_observed ,fdqpe.elapsed_duration ,fdqpe.n_brands as identical_brands ,ntbtu.brand_name_count ,fdqpe.category_id ,fdqpe.final_status ,fdqpe.h_sdi_val --,fdqpe.h_sdi_u95 --,fdqpe.h_sdi_l95 ,fdqpe.hm_sdi_val --,fdqpe.hm_sdi_u95 --,fdqpe.hm_sdi_l95 ,fdqpe.m_sdi_val --,fdqpe.m_sdi_u95 --,fdqpe.m_sdi_l95 ,fdqpe.lm_sdi_val --,fdqpe.lm_sdi_u95 --,fdqpe.lm_sdi_l95 ,fdqpe.l_sdi_val --,fdqpe.l_sdi_u95 --,fdqpe.l_sdi_l95 from formatted_data_mat fdqpe join "Formularies".nct_to_brands_through_uspdc ntbtu on fdqpe.nct_id = ntbtu.nct_id ; --example of multiple reopenings select * from formatted_data_mat fdm where nct_id = 'NCT01239797' --attempt to automatically find transition periods with cte1 as ( select nct_id, min(earliest_date_observed) over (partition by nct_id) as earliest_closed_enrollment from formatted_data_mat fdm where current_status = 'Active, not recruiting' ), cte2 as ( select nct_id, max(earliest_date_observed) over (partition by nct_id) latest_open_enrollment from formatted_data_mat fdm where current_status != 'Active, not recruiting' ) select cte1.nct_id ,cte1.earliest_closed_enrollment ,cte2.latest_open_enrollment ,cte1.earliest_closed_enrollment - cte2.latest_open_enrollment from cte1 join cte2 on cte1.nct_id = cte2.nct_id /*group by cte1.nct_id ,cte1.earliest_closed_enrollment ,cte2.latest_open_enrollment */ /* So ocassionally a study reopens enrollment. * If that didn't happen, then I could just find the first enrollment matching X and/or last enrollment matching Y * to get the transitions * Instead I need to create shifts of statuses between snapshots, and then remove all of those that did not change. * * Better yet, just get the last shift to ANR. * */ /* Take each entry and get the status from a lagged snapshot * Then select each snapshot moving from previous_state to ANR * and filter out everything except the last one. * */ with cte as ( select nct_id ,lag(current_status, 1) over (partition by nct_id order by earliest_date_observed) as previous_status ,current_status ,earliest_date_observed as date_current from formatted_data_mat fdm ), cte2 as ( select nct_id ,previous_status ,current_status ,max(date_current) as date_current_max from cte where previous_status != current_status and current_status = 'Active, not recruiting' group by nct_id ,previous_status ,current_status ,date_current ) select * from formatted_data_mat fdm join cte2 on cte2.nct_id = fdm.nct_id and cte2.date_current_max = fdm.earliest_date_observed ; --join back into