You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ClinicalTrialsDataProcessing/Scripts/GroupingTrials.sql

105 lines
2.4 KiB
SQL

select * from "DiseaseBurden".icd10_to_cause itc ;
select * from "DiseaseBurden".cause c ;
select c.id, count(distinct code)
from "DiseaseBurden".cause c
join "DiseaseBurden".icd10_to_cause itc
on c.cause = itc.cause_text
group by c.id
order by c.id
;
select tti.approved,count(distinct nct_id) from "DiseaseBurden".trial_to_icd10 tti
group by tti.approved;
select nct_id, "condition", ui
from "DiseaseBurden".trial_to_icd10 tti
where tti.approved = 'accepted';
drop view trial_to_cause;
---Link trials to their causes
create temp view trial_to_cause as
select tti.nct_id, tti.ui , tti."condition",itc.cause_text, ch.cause_id, ch."level"
from "DiseaseBurden".trial_to_icd10 tti
join "DiseaseBurden".icd10_to_cause itc
on replace(REPLACE(tti.ui,'-',''),'.','') = replace(REPLACE(itc.code ,'-',''),'.','')
join "DiseaseBurden".cause_hierarchy ch
on itc.cause_text = ch.cause_name
where
tti.approved = 'accepted'
order by nct_id
;
select distinct nct_id, count(*), min("level"), max("level")
from trial_to_cause ttc
group by nct_id
;
select nct_id,cause_text,cause_id from trial_to_cause
where level = 3
group by nct_id,cause_text,cause_id
order by cause_id
;
select cause_id,"condition",cause_text,count(distinct nct_id) as c
from trial_to_cause
where level >= 3
group by cause_id,"condition",cause_text
--having count(distinct nct_id) > 2
order by cause_id
;
with cte as (
select
nct_id,
max("level") as max_level
from trial_to_cause
group by nct_id
), cte2 as (
select
ttc.nct_id,
ttc.ui,
ttc."condition",
ttc.cause_text,
ttc.cause_id,
cte.max_level
from trial_to_cause ttc
join cte
on cte.nct_id=ttc.nct_id
where ttc."level"=cte.max_level
group by
ttc.nct_id,
ttc.ui,
ttc."condition",
ttc.cause_text,
ttc.cause_id,
cte.max_level
order by nct_id,ui
), cte3 as (
select
nct_id,
substring(cte2.ui for 3) as code,
cte2."condition",
cte2.cause_text,
cte2.cause_id,
ic.id as category_id,
ic.group_name
from cte2
join "DiseaseBurden".icd10_categories ic
on
substring(cte2.ui for 3) <= ic.end_code
and
substring(cte2.ui for 3) >= ic.start_code
)
select nct_id, cause_id,category_id
from cte3
group by nct_id, cause_id, category_id
;