more work on linking market data

llm-extraction
will king 3 years ago
parent 4283719d3d
commit bbf8c77e6d

@ -1,7 +1,3 @@
/*Get relationships between brands and branded drugs and packs*/
select * from rxnorm_migrated.rxnorm_relations rr
where tty1 = 'BN' and tty2 in ('SBD', 'BPCK')
limit 100;
/*get all the ndc codes associated with an rxcui
* Same as query
@ -11,17 +7,85 @@ limit 100;
* Based on http://will-office:4000/RxNav/search?searchBy=RXCUI&searchTerm=1668240
* it appears that this rxcui is a sbd or bpck (branded drug or pack)
*
* If I grab every brand, then every branded drug or pack associated with that drug and then every
* */
select * from ALLNDC_HISTORY ah
where RXCUI ='1668240'
and SAB='RXNORM'
;
/**
* If I grab every brand, then every branded drug or pack associated with that drug and then attach that to the nsde data I would get the marketing dates required.
* trial -> mesh_term -> IN/MIN (rxcui) -> BN (rxcui) -> SBD/BPCK (rxcui) -> ndc11 -> nsde (marketing dates)
* --get brand names
* trial -> mesh_term -> IN/MIN (rxcui) -> BN (rxcui)
* -- associate brand names to marketing dates
* BN (rxcui) --> SBD/BPCK (RXCUI) --> ndc11 --> nsde
* */
/*
* I do need to figure out a way to change the date types when importing into postgres. In mariadb they ar mmYYYY wheras in the jsonapi they are YYYYmm but I want is YYYY-mm-01
*/*/
*/
---assoicate ingredients, brands, and approved packaging RXCUIs.
create temp table trial_to_pack_rxcui as
with trialncts as (
SELECT DISTINCT nct_id FROM history.trial_snapshots TS
)
SELECT
bi.nct_id ,
bi.downcase_mesh_term,
rp.rxcui AS drug_rxcui,
rn.tty1,
rn.rxcui1 as ingredient_rxcui, --ingredients
rn.tty2 ,
rn.rxcui2 as brand_or_pack_rxcui --brand or pack
FROM ctgov.browse_interventions bi
LEFT OUTER JOIN rxnorm_migrated.rxnorm_props AS rp
on bi.downcase_mesh_term = rp.propvalue1 --Link drug ingredient
LEFT OUTER JOIN rxnorm_migrated.rxnorm_relations AS rn
on rp.rxcui = rn.rxcui1 --Grab brand names
WHERE
bi.nct_id in (
SELECT nct_id FROM trialncts
)
and
bi.mesh_type='mesh-list'
and
rn.tty1 in ('IN','MIN')
and
rn.tty2 = 'BN'
;
--link brand names to drug applications (NDA/ANDA etc)
select rr.rxcui1 as BN, rr.rxcui2 as pack, ah.ndc as pack_ndc11
from
rxnorm_migrated.rxnorm_relations rr
left outer join rxnorm_migrated."ALLNDC_HISTORY" as ah
on rr.rxcui2 = ah.rxcui
where
tty1 = 'BN'
and
tty2 in ('SBD', 'BPCK')
and
ah.sab='RXNORM'
;
---associate NDAs/ANDAs to marketing start dates
---Get start of coverage periods for NSDE dates grouped by arbitrary grouping.
SELECT n.application_number_or_citation, count(*), min( marketing_start_date )
FROM spl.nsde as n
where product_type = 'HUMAN PRESCRIPTION DRUG'
group by n.application_number_or_citation ;
---For a given date, find which NDAs/ANDAs were active were active.
SELECT n.application_number_or_citation, count(*)
FROM spl.nsde as n
where
product_type = 'HUMAN PRESCRIPTION DRUG'
and
marketing_start_date < '2010-05-01'
and
marketing_end_date > '2010-05-01'
group by n.application_number_or_citation ;

@ -133,9 +133,6 @@ if __name__ == "__main__":
#insert the data with page_size
extras.execute_values(pcurse,psql_insert,argslist=a,template=template, page_size=1000)
"""
ISSUE HERE ^^^^^ somehow execute values isn't separating over dictionaries very well
https://www.psycopg.org/docs/extras.html#psycopg2.extras.execute_batch
maybe replace with execute_batch?
"""

Loading…
Cancel
Save