Compare commits
12 Commits
llm-extrac
...
main
| Author | SHA1 | Date |
|---|---|---|
|
|
d1d0dc87a7 | 1 year ago |
|
|
1782372a45 | 1 year ago |
|
|
fc478517ac | 1 year ago |
|
|
d912408456 | 1 year ago |
|
|
2488cceebc | 1 year ago |
|
|
3311159ab6 | 2 years ago |
|
|
bb374dbde9 | 2 years ago |
|
|
635cfe42d9 | 2 years ago |
|
|
495955170c | 2 years ago |
|
|
de3698052b | 2 years ago |
|
|
dfb041d12b | 2 years ago |
|
|
9aaf007791 | 2 years ago |
Binary file not shown.
@ -1,64 +0,0 @@
|
||||
{
|
||||
"folders": {},
|
||||
"connections": {
|
||||
"mariaDB-186c896820e-6ff11b5b802d8b82": {
|
||||
"provider": "mysql",
|
||||
"driver": "mariaDB",
|
||||
"name": "rxnav",
|
||||
"save-password": true,
|
||||
"configuration": {
|
||||
"host": "will-office",
|
||||
"port": "3306",
|
||||
"url": "jdbc:mariadb://will-office:3306/",
|
||||
"configurationType": "MANUAL",
|
||||
"type": "dev",
|
||||
"auth-model": "native"
|
||||
}
|
||||
},
|
||||
"postgres-jdbc-186c896a347-2a3d946d2dea4df7": {
|
||||
"provider": "postgresql",
|
||||
"driver": "postgres-jdbc",
|
||||
"name": "aact_db",
|
||||
"save-password": true,
|
||||
"configuration": {
|
||||
"host": "100.95.169.11",
|
||||
"port": "5432",
|
||||
"database": "aact_db",
|
||||
"url": "jdbc:postgresql://100.95.169.11:5432/aact_db",
|
||||
"configurationType": "MANUAL",
|
||||
"type": "dev",
|
||||
"provider-properties": {},
|
||||
"auth-model": "native"
|
||||
},
|
||||
"custom-properties": {
|
||||
"resultset.maxrows": "500"
|
||||
}
|
||||
},
|
||||
"postgres-jdbc-186cd8f479f-6cc3c10c8adc3359": {
|
||||
"provider": "postgresql",
|
||||
"driver": "postgres-jdbc",
|
||||
"name": "drugcentral",
|
||||
"save-password": true,
|
||||
"configuration": {
|
||||
"host": "localhost",
|
||||
"port": "54320",
|
||||
"database": "postgres",
|
||||
"url": "jdbc:postgresql://localhost:54320/postgres",
|
||||
"configurationType": "MANUAL",
|
||||
"type": "dev",
|
||||
"auth-model": "native"
|
||||
}
|
||||
}
|
||||
},
|
||||
"connection-types": {
|
||||
"dev": {
|
||||
"name": "Development",
|
||||
"color": "255,255,255",
|
||||
"description": "Regular development database",
|
||||
"auto-commit": true,
|
||||
"confirm-execute": false,
|
||||
"confirm-data-change": false,
|
||||
"auto-close-transactions": false
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1 +0,0 @@
|
||||
{"resources":{"Scripts/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/Data_summaries.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/DevelopingLinks.sql":{"default-schema":"public","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"},"Scripts/DiseaseBurdens_create_table.sql":{"default-schema":"public","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"},"Scripts/GlobalBurdensOfDisease2019Codebook.sql":{"default-schema":"DiseaseBurden","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"},"Scripts/GroupingTrials.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/Script.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/TablesAndViews_Public.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-schema":"public","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"}}}
|
||||
@ -0,0 +1,308 @@
|
||||
select * from formatted_data_with_planned_enrollment fdwpe
|
||||
;
|
||||
|
||||
|
||||
select * from formatted_data_mat fdm
|
||||
;
|
||||
|
||||
select count(distinct condition ) from formatted_data_mat fdm
|
||||
|
||||
select nct_id, fdm.current_status , count(*)
|
||||
from formatted_data_mat fdm
|
||||
group by nct_id , fdm.current_status
|
||||
order by nct_id
|
||||
;
|
||||
|
||||
select * from formatted_data_mat fdm ;
|
||||
|
||||
|
||||
-- group with trial split
|
||||
with cte as (
|
||||
select nct_id
|
||||
from formatted_data_mat fdm
|
||||
group by nct_id
|
||||
having count(distinct current_status) > 1
|
||||
order by nct_id
|
||||
)
|
||||
select
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
from formatted_data_mat fdm
|
||||
join cte on cte.nct_id = fdm.nct_id
|
||||
group by
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
order by nct_id , earliest_date_observed
|
||||
;
|
||||
|
||||
select count(distinct category_id ) from
|
||||
|
||||
|
||||
select distinct category_id from formatted_data_mat fdm
|
||||
;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
-- group with trial split
|
||||
with cte as (
|
||||
select nct_id
|
||||
from formatted_data_mat fdm
|
||||
group by nct_id
|
||||
having count(distinct current_status) > 1
|
||||
order by nct_id
|
||||
)
|
||||
select
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
from formatted_data_mat fdm
|
||||
join cte on cte.nct_id = fdm.nct_id
|
||||
group by
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
order by nct_id , earliest_date_observed
|
||||
; --TODO: join to usp dc dataset
|
||||
|
||||
|
||||
|
||||
|
||||
WITH trialncts AS (
|
||||
SELECT DISTINCT ts.nct_id
|
||||
FROM history.trial_snapshots ts
|
||||
), nct_to_cui AS (
|
||||
SELECT bi.nct_id,
|
||||
bi.downcase_mesh_term,
|
||||
rr.tty2,
|
||||
rr.rxcui2 AS approved_drug_rxcui,
|
||||
count(*) AS count
|
||||
FROM ctgov.browse_interventions bi
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui
|
||||
WHERE (bi.nct_id::text IN ( SELECT trialncts.nct_id
|
||||
FROM trialncts)) AND bi.mesh_type::text = 'mesh-list'::text AND rp.propname::text = 'Active_ingredient_name'::text AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text]))
|
||||
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2
|
||||
)
|
||||
SELECT nct_to_cui.nct_id,
|
||||
ud."USP Category",
|
||||
ud."USP Class"
|
||||
FROM nct_to_cui
|
||||
JOIN "Formularies".usp_dc ud ON ud.rxcui::bpchar = nct_to_cui.approved_drug_rxcui
|
||||
GROUP BY nct_to_cui.nct_id, ud."USP Category", ud."USP Class"
|
||||
ORDER BY nct_to_cui.nct_id;
|
||||
|
||||
|
||||
|
||||
|
||||
CREATE MATERIALIZED VIEW "Formularies".nct_to_brands_through_uspdc
|
||||
AS
|
||||
WITH trialncts AS (
|
||||
SELECT DISTINCT ts.nct_id
|
||||
FROM history.trial_snapshots ts
|
||||
)
|
||||
SELECT
|
||||
bi.nct_id,
|
||||
count( distinct rr2.rxcui2 ) as brand_name_count
|
||||
FROM ctgov.browse_interventions bi
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text --match mesh terms to rxcui
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui -- match rxcui to relations between rxcuis
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr2 ON rr.rxcui2 = rr2.rxcui1 -- match rxcui to relations between rxcuis
|
||||
WHERE
|
||||
(bi.nct_id::text IN (SELECT trialncts.nct_id FROM trialncts)) --check the nct_id is in our list
|
||||
AND
|
||||
bi.mesh_type::text = 'mesh-list'::text --we are only looking at mesh "list" rxcuis
|
||||
AND rp.propname::text = 'Active_ingredient_name'::text --and we only care about active ingredients linked to \/\/\/\/\/
|
||||
AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text])) --and we are linking from active ingredients ^^^^ to branded packs
|
||||
AND (rr2.tty2::text = 'BN') --and from branded packs back to brand names
|
||||
GROUP BY bi.nct_id --remove duplicates
|
||||
;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
select
|
||||
fdqpe.nct_id
|
||||
--,fdqpe.start_date
|
||||
--,fdqpe.current_enrollment
|
||||
--,fdqpe.enrollment_category
|
||||
,fdqpe.current_status
|
||||
,fdqpe.earliest_date_observed
|
||||
,fdqpe.elapsed_duration
|
||||
,fdqpe.n_brands as identical_brands
|
||||
,ntbtu.brand_name_count
|
||||
,fdqpe.category_id
|
||||
,fdqpe.final_status
|
||||
,fdqpe.h_sdi_val
|
||||
--,fdqpe.h_sdi_u95
|
||||
--,fdqpe.h_sdi_l95
|
||||
,fdqpe.hm_sdi_val
|
||||
--,fdqpe.hm_sdi_u95
|
||||
--,fdqpe.hm_sdi_l95
|
||||
,fdqpe.m_sdi_val
|
||||
--,fdqpe.m_sdi_u95
|
||||
--,fdqpe.m_sdi_l95
|
||||
,fdqpe.lm_sdi_val
|
||||
--,fdqpe.lm_sdi_u95
|
||||
--,fdqpe.lm_sdi_l95
|
||||
,fdqpe.l_sdi_val
|
||||
--,fdqpe.l_sdi_u95
|
||||
--,fdqpe.l_sdi_l95
|
||||
from formatted_data_mat fdqpe
|
||||
join "Formularies".nct_to_brands_through_uspdc ntbtu
|
||||
on fdqpe.nct_id = ntbtu.nct_id
|
||||
;
|
||||
|
||||
--example of multiple reopenings
|
||||
select *
|
||||
from formatted_data_mat fdm
|
||||
where nct_id = 'NCT01239797'
|
||||
|
||||
--attempt to automatically find transition periods
|
||||
with cte1 as (
|
||||
select nct_id, min(earliest_date_observed) over (partition by nct_id) as earliest_closed_enrollment
|
||||
from formatted_data_mat fdm
|
||||
where current_status = 'Active, not recruiting'
|
||||
), cte2 as (
|
||||
select nct_id, max(earliest_date_observed) over (partition by nct_id) latest_open_enrollment
|
||||
from formatted_data_mat fdm
|
||||
where current_status != 'Active, not recruiting'
|
||||
)
|
||||
select
|
||||
cte1.nct_id
|
||||
,cte1.earliest_closed_enrollment
|
||||
,cte2.latest_open_enrollment
|
||||
,cte1.earliest_closed_enrollment - cte2.latest_open_enrollment
|
||||
from cte1
|
||||
join cte2 on cte1.nct_id = cte2.nct_id
|
||||
/*group by
|
||||
cte1.nct_id
|
||||
,cte1.earliest_closed_enrollment
|
||||
,cte2.latest_open_enrollment
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* So ocassionally a study reopens enrollment.
|
||||
* If that didn't happen, then I could just find the first enrollment matching X and/or last enrollment matching Y
|
||||
* to get the transitions
|
||||
* Instead I need to create shifts of statuses between snapshots, and then remove all of those that did not change.
|
||||
*
|
||||
* Better yet, just get the last shift to ANR.
|
||||
* */
|
||||
|
||||
|
||||
/* Take each entry and get the status from a lagged snapshot
|
||||
* Then select each snapshot moving from previous_state to ANR
|
||||
* and filter out everything except the last one.
|
||||
* */
|
||||
with cte as (
|
||||
select
|
||||
nct_id
|
||||
,lag(current_status, 1) over (partition by nct_id order by earliest_date_observed) as previous_status
|
||||
,current_status
|
||||
,earliest_date_observed as date_current
|
||||
from formatted_data_mat fdm
|
||||
), cte2 as (
|
||||
select
|
||||
nct_id
|
||||
,previous_status
|
||||
,current_status
|
||||
,max(date_current) as date_current_max
|
||||
from cte
|
||||
where
|
||||
previous_status != current_status
|
||||
and
|
||||
current_status = 'Active, not recruiting'
|
||||
group by
|
||||
nct_id
|
||||
,previous_status
|
||||
,current_status
|
||||
,date_current
|
||||
)
|
||||
select *
|
||||
from formatted_data_mat fdm
|
||||
join cte2
|
||||
on cte2.nct_id = fdm.nct_id
|
||||
and cte2.date_current_max = fdm.earliest_date_observed
|
||||
; --join back into
|
||||
|
||||
Binary file not shown.
@ -0,0 +1,9 @@
|
||||
USP[[:space:]]DC/USP_DC_12_2021_RELEASE_1.0.xlsx filter=lfs diff=lfs merge=lfs -text
|
||||
USP[[:space:]]DC/usp_dc_pub_2023_release_2.0_updated_final.xlsx filter=lfs diff=lfs merge=lfs -text
|
||||
USP[[:space:]]MMG/Final_Report_and_Summary_of_Methodology_and_Approach_v1.1.pdf filter=lfs diff=lfs merge=lfs -text
|
||||
USP[[:space:]]MMG/MMG_v8.0_Alignment_File.xlsx filter=lfs diff=lfs merge=lfs -text
|
||||
USP[[:space:]]MMG/Summary_of_Changes_between_MMGv7.0_and_MMGv8.0.pdf filter=lfs diff=lfs merge=lfs -text
|
||||
USP[[:space:]]MMG/USP_Medicare_Model_Guidelines_v8.0__All_Excel_Spreadsheets_.xlsx filter=lfs diff=lfs merge=lfs -text
|
||||
USP[[:space:]]MMG/USP_Medicare_Model_Guidelines_v8.0__Categories_and_Classes_.pdf filter=lfs diff=lfs merge=lfs -text
|
||||
USP[[:space:]]MMG/USP_Medicare_Model_Guidelines_v8.0__Showing_changes_from_v7.0_.pdf filter=lfs diff=lfs merge=lfs -text
|
||||
USP[[:space:]]MMG/USP_Medicare_Model_Guidelines_v8.0__With_Example_Part_D_Drugs_.pdf filter=lfs diff=lfs merge=lfs -text
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue