Merge completed: Merged working versions from home and office PCs

llm-extraction
youainti 3 years ago
commit 123fe3b5e4

@ -0,0 +1 @@
Zщ<EFBFBD><EFBFBD>е;&ЎюgхЅ<EFBFBD>r^jNїрдwK<EFBFBD>HЩGF5и&Gй5<EFBFBD>7Ю<EFBFBD>АШМ4єќF'ЪiNcbжђиИj<EFBFBD>K~жCAЙnц<EFBFBD>@]M";кЇЇ7Ђ<>'Л~3<>1:МрНae!ЛЋЏзѕ#7ЬNdхЩ+aWХр5ѓю*xххйыЙUn<55>AъЩ(>Љ<>\<5C>РгU<D0B3>-­ 9сВ"5в<EFBFBD>_@> w,] Ўv2"<EFBFBD>,I<EFBFBD>n@и<EFBFBD><EFBFBD>(ДэщodЬ зул.Wщъeеъc~Щu<EFBFBD>yѕ<EFBFBD>+9ф8<EFBFBD>ЕІ<EFBFBD>hтbfќ (ІЮЗOЪ<EFBFBD>тИшЮБХtR?ѓў

@ -0,0 +1,45 @@
{
"folders": {},
"connections": {
"mariaDB-186c896820e-6ff11b5b802d8b82": {
"provider": "mysql",
"driver": "mariaDB",
"name": "rxnav",
"save-password": true,
"configuration": {
"host": "will-office",
"port": "3306",
"url": "jdbc:mariadb://will-office:3306/",
"configurationType": "MANUAL",
"type": "dev",
"auth-model": "native"
}
},
"postgres-jdbc-186c896a347-2a3d946d2dea4df7": {
"provider": "postgresql",
"driver": "postgres-jdbc",
"name": "aact_db",
"save-password": true,
"configuration": {
"host": "will-office",
"port": "5432",
"database": "aact_db",
"url": "jdbc:postgresql://will-office:5432/aact_db",
"configurationType": "MANUAL",
"type": "dev",
"auth-model": "native"
}
}
},
"connection-types": {
"dev": {
"name": "Development",
"color": "255,255,255",
"description": "Regular development database",
"auto-commit": true,
"confirm-execute": false,
"confirm-data-change": false,
"auto-close-transactions": false
}
}
}

@ -0,0 +1 @@
{"resources":{"development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"}}}

4
.gitignore vendored

@ -189,5 +189,9 @@ containers/RxNav-In-a-box/rxnav-in-a-box-*
#Ignore stuff from AACT_downlaoder #Ignore stuff from AACT_downlaoder
containers/AACT_downloader/postgresql/* containers/AACT_downloader/postgresql/*
containers/AACT_downloader/aact_downloads/* containers/AACT_downloader/aact_downloads/*
#ignore stuff in DrugCentral Downloader
containers/drugcentral/docker-entrypoint-initdb.d/*.sql
containers/drugcentral/docker-entrypoint-initdb.d/*.sql.gz
containers/drugcentral/db_store/*

@ -1,11 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<projectDescription> <projectDescription>
<name>development_sql</name> <name>ClinicalTrialsDataProcessing</name>
<comment></comment> <comment></comment>
<projects> <projects>
</projects> </projects>
<buildSpec> <buildSpec>
</buildSpec> </buildSpec>
<natures> <natures>
<nature>org.jkiss.dbeaver.DBeaverNature</nature>
</natures> </natures>
</projectDescription> </projectDescription>

@ -74,19 +74,18 @@ What this means in regards to this project is that docker makes it
easy to setup containers. easy to setup containers.
Install docker based on instructions for your linux distribution. Install docker based on instructions for your linux distribution.
I use podman (an alternative from RedHat) because it allows for running without root permissions.
### Docker networking ### Docker networking
I have the docker container for the database attached to a It is helpful to construct an external docker network by running
network called "pharmaceutical_research" because I have a
container with pgadmin4 running on that docker network.
This can be adjusted in the dockerfile.
I also have the database container open on port 5432, the typical `docker network create network_name`
postgresql database port.
### Database logins and then including that network in the docker-compose.yaml
I have choosen the database user of *root* with a password of *root*
because I don't really need this database to be secure. # Environment Variables (`.env` file)
I use an single .env file to setup the docker containers and pass configuration variables to
the python scripts. I would suggest changing the default values in `sample.env` to match your needs.
If you do need to think about the security of your database I would recommend If you do need to think about the security of your database I would recommend
you start by changing these. you start by changing these.

@ -0,0 +1,23 @@
version: '3'
networks:
pharmaceutical_research: #because it helps to have a way to link specifically to this.
services:
aact_db:
image: postgres:14-alpine
networks:
- pharmaceutical_research
container_name: DrugCentral
#restart: always #restart after crashes
environment:
POSTGRES_PASSWORD: root
ports:
- "54320:5432" #host:container
volumes: #host:container is the format.
# this is persistant storage for the database
- ./db_store/:/var/lib/postgresql/
# this is the folder containing entrypoint info.
- ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/

@ -0,0 +1,9 @@
#!/bin/bash
filename="drugcentral.dump.08222022.sql.gz"
cd ./docker-entrypoint-initdb.d/
curl "https://unmtid-shinyapps.net/download/$filename" --output "$filename"
gzip -d $filename

@ -1,42 +1,38 @@
/*get all the ndc codes associated with an rxcui /* OVERVIEW
* Same as query
* http://will-office:4000/REST/rxcui/1668240/allhistoricalndcs.json
* note the different formats of the dates.
* *
* Based on http://will-office:4000/RxNav/search?searchBy=RXCUI&searchTerm=1668240 * This links trials to the first date each drug (indexed by NDA/ANDA etc) is
* it appears that this rxcui is a sbd or bpck (branded drug or pack) * put on the market.
* *
* It takes 3 views to build up to it.
* If I grab every brand, then every branded drug or pack associated with that drug and then attach that to the nsde data I would get the marketing dates required.
* --get brand names
* trial -> mesh_term -> IN/MIN (rxcui) -> BN (rxcui)
* -- associate brand names to marketing dates
* BN (rxcui) --> SBD/BPCK (RXCUI) --> ndc11 --> nsde
* */ * */
/* --Match trials to brands and ingredients
* I do need to figure out a way to change the date types when importing into postgres. In mariadb they ar mmYYYY wheras in the jsonapi they are YYYYmm but I want is YYYY-mm-01 create or replace view public.match_trials_to_bn_in as
*/
---assoicate ingredients, brands, and approved packaging RXCUIs.
create temp table tmp_trial_to_ingred as
with trialncts as ( with trialncts as (
SELECT DISTINCT nct_id FROM history.trial_snapshots TS SELECT DISTINCT nct_id FROM history.trial_snapshots TS
) )
SELECT SELECT
bi.nct_id , bi.nct_id ,
bi.downcase_mesh_term, bi.downcase_mesh_term,
rp.rxcui AS drug_rxcui rr.tty2 ,
rr.rxcui2 as bn_or_in_cui --brand or ingredient
,count(*)
FROM ctgov.browse_interventions bi FROM ctgov.browse_interventions bi
INNER JOIN rxnorm_migrated.rxnorm_props AS rp left outer JOIN rxnorm_migrated.rxnorm_props AS rp
on bi.downcase_mesh_term = rp.propvalue1 --Link drug ingredient on bi.downcase_mesh_term = rp.propvalue1 --link names to drug cuis ()
left outer join rxnorm_migrated.rxnorm_relations rr
on rr.rxcui1 = rp.rxcui
WHERE WHERE
bi.nct_id in ( bi.nct_id in (
SELECT nct_id FROM trialncts SELECT nct_id FROM trialncts
) )
and and
bi.mesh_type='mesh-list' bi.mesh_type='mesh-list'
and rp.propname = 'Active_ingredient_name'
and rr.tty2 in ('BN', 'IN', 'MIN')
group by bi.nct_id, bi.downcase_mesh_term , rr.tty2 ,rr.rxcui2
order by bi.nct_id
; ;
--running out of space. --running out of space.
@ -87,42 +83,44 @@ where
--link brand names to drug applications (NDA/ANDA etc) --match trials to through brands NDC11
select rr.rxcui1 as BN, rr.rxcui2 as pack, ah.ndc as pack_ndc11 create or replace view public.match_trial_to_ndc11 as
from select
rxnorm_migrated.rxnorm_relations rr mttbi.nct_id,
ah.ndc,
count(*)
from public.match_trials_to_bn_in as mttbi
left outer join rxnorm_migrated.rxnorm_relations as rr
on mttbi.bn_or_in_cui = rr.rxcui1
left outer join rxnorm_migrated."ALLNDC_HISTORY" as ah left outer join rxnorm_migrated."ALLNDC_HISTORY" as ah
on rr.rxcui2 = ah.rxcui on rr.rxcui2 = ah.rxcui
where where
tty1 = 'BN' rr.tty1 = 'BN'
and and
tty2 in ('SBD', 'BPCK') rr.tty2 in ('SBD', 'BPCK')
and and
ah.sab='RXNORM' ah.sab='RXNORM'
group by mttbi.nct_id, ah.ndc
order by mttbi.nct_id, ah.ndc
; ;
---associate NDAs/ANDAs to marketing start dates
---Get start of coverage periods for NSDE dates grouped by arbitrary grouping.
SELECT n.application_number_or_citation, count(*), min( marketing_start_date )
FROM spl.nsde as n
where product_type = 'HUMAN PRESCRIPTION DRUG'
group by n.application_number_or_citation ;
---For a given date, find which NDAs/ANDAs were active were active.
SELECT n.application_number_or_citation, count(*)
FROM spl.nsde as n
where
product_type = 'HUMAN PRESCRIPTION DRUG'
and
marketing_start_date < '2010-05-01'
and
marketing_end_date > '2010-05-01'
group by n.application_number_or_citation ;
---associate trials to marketing start dates
create or replace view public.match_trial_to_marketing_start_date as
select
mttn.nct_id,
n.application_number_or_citation,
min(n.marketing_start_date )
from match_trial_to_ndc11 mttn
inner join spl.nsde n
on mttn.ndc = n.package_ndc11
where
n.product_type = 'HUMAN PRESCRIPTION DRUG'
and
n.marketing_category in ('NDA','ANDA','BLA', 'NDA authorized generic', 'NDA AUTHORIZED GENERIC')
group by mttn.nct_id,n.application_number_or_citation
order by mttn.nct_id
;

@ -1,5 +1,5 @@
/***************CREATE VIEWS*******************/ /***************CREATE VIEWS*******************/
create view if not exists create or replace view
history.match_drugs_to_trials as history.match_drugs_to_trials as
select nct_id, rxcui, propvalue1 select nct_id, rxcui, propvalue1
from from

@ -3,7 +3,7 @@ import numpy as np
IHME_COD_FILEPATH = "./IHME_GBD_2019_COD_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX" IHME_COD_FILEPATH = "./IHME_GBD_2019_COD_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
IHME_NONFATAL_FILEPATH = "./IHME_GBD_2019_NONFATAL_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX" IHME_NONFATAL_FILEPATH = "./IHME_GBD_2019_NONFATAL_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
ICD10CM_ORDER_FILEPATH = "./icd10-2019_categories_only.psv" ICD10CM_ORDER_FILEPATH = "./icd10_combined-who-cms.psv"
#READ in ICD10CM codes #READ in ICD10CM codes

Loading…
Cancel
Save