From ed49d8728a1e5a208ecd6dab451a4380bf03767c Mon Sep 17 00:00:00 2001 From: Will King Date: Tue, 7 Mar 2023 17:39:10 -0800 Subject: [PATCH 1/3] Update 'README.md' --- README.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index f434863..0137bf7 100644 --- a/README.md +++ b/README.md @@ -74,19 +74,18 @@ What this means in regards to this project is that docker makes it easy to setup containers. Install docker based on instructions for your linux distribution. +I use podman (an alternative from RedHat) because it allows for running without root permissions. ### Docker networking -I have the docker container for the database attached to a -network called "pharmaceutical_research" because I have a -container with pgadmin4 running on that docker network. -This can be adjusted in the dockerfile. +It is helpful to construct an external docker network by running -I also have the database container open on port 5432, the typical -postgresql database port. +`docker network create network_name` -### Database logins -I have choosen the database user of *root* with a password of *root* -because I don't really need this database to be secure. +and then including that network in the docker-compose.yaml + +# Environment Variables (`.env` file) +I use an single .env file to setup the docker containers and pass configuration variables to +the python scripts. I would suggest changing the default values in `sample.env` to match your needs. If you do need to think about the security of your database I would recommend you start by changing these. From f6b56da26120bd1c28ac7f72f5c7be3fe9ef162c Mon Sep 17 00:00:00 2001 From: Will King Date: Wed, 8 Mar 2023 15:17:25 -0800 Subject: [PATCH 2/3] Got working connection between trials and NDA/ANDA start times --- .dbeaver/.credentials-config.json.bak | Bin 0 -> 224 bytes .dbeaver/.data-sources.json.bak | 46 ++++++++ .dbeaver/.project-metadata.json.bak | 1 + development_sql/.project => .project | 3 +- ...ING NCTIDs to NDCs and Marketing dates.sql | 106 ++++++++---------- development_sql/views to history.sql | 2 +- 6 files changed, 97 insertions(+), 61 deletions(-) create mode 100644 .dbeaver/.credentials-config.json.bak create mode 100644 .dbeaver/.data-sources.json.bak create mode 100644 .dbeaver/.project-metadata.json.bak rename development_sql/.project => .project (65%) diff --git a/.dbeaver/.credentials-config.json.bak b/.dbeaver/.credentials-config.json.bak new file mode 100644 index 0000000000000000000000000000000000000000..5d48b6cece9cff94519361a25e23137e02b39008 GIT binary patch literal 224 zcmV<603ZJ!Rn!U~;qc#pj2QT{ZhBT-oH(@+2-ouOpwvk&kh>!(9}Prj3t{5JVb00HyRIc5RIEIAE&)qwD5n zcBT7>oF?>@uS{LYc#ujl&!HXdfgcH{>8xfolql8`Q2Y~mem+UUfskAtP)`QLC?6)f a-I1kb0&rw_60Cc{B!7L0?ZN1*B)D4&rE4(& literal 0 HcmV?d00001 diff --git a/.dbeaver/.data-sources.json.bak b/.dbeaver/.data-sources.json.bak new file mode 100644 index 0000000..115afe3 --- /dev/null +++ b/.dbeaver/.data-sources.json.bak @@ -0,0 +1,46 @@ +{ + "folders": {}, + "connections": { + "postgres-jdbc-186be7b92d8-4d6ef891885b76f5": { + "provider": "postgresql", + "driver": "postgres-jdbc", + "name": "aact_db", + "save-password": true, + "configuration": { + "host": "will-office", + "port": "5432", + "database": "aact_db", + "url": "jdbc:postgresql://will-office:5432/aact_db", + "configurationType": "MANUAL", + "type": "dev", + "auth-model": "native" + } + }, + "mariaDB-186be7ba5a8-7a57b538dcd4188f": { + "provider": "mysql", + "driver": "mariaDB", + "name": "rxnorm_current", + "save-password": true, + "configuration": { + "host": "will-office", + "port": "3306", + "database": "rxnorm_current", + "url": "jdbc:mariadb://will-office:3306/rxnorm_current", + "configurationType": "MANUAL", + "type": "dev", + "auth-model": "native" + } + } + }, + "connection-types": { + "dev": { + "name": "Development", + "color": "255,255,255", + "description": "Regular development database", + "auto-commit": true, + "confirm-execute": false, + "confirm-data-change": false, + "auto-close-transactions": false + } + } +} \ No newline at end of file diff --git a/.dbeaver/.project-metadata.json.bak b/.dbeaver/.project-metadata.json.bak new file mode 100644 index 0000000..d76f603 --- /dev/null +++ b/.dbeaver/.project-metadata.json.bak @@ -0,0 +1 @@ +{"resources":{"development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-datasource":"postgres-jdbc-186be7b92d8-4d6ef891885b76f5","default-catalog":"aact_db","default-schema":"public"}}} \ No newline at end of file diff --git a/development_sql/.project b/.project similarity index 65% rename from development_sql/.project rename to .project index e4f5aba..14a5efa 100644 --- a/development_sql/.project +++ b/.project @@ -1,11 +1,12 @@ - development_sql + ClinicalTrialsDataProcessing + org.jkiss.dbeaver.DBeaverNature diff --git a/development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql b/development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql index b3bd1fd..05deef0 100644 --- a/development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql +++ b/development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql @@ -1,91 +1,79 @@ -/*get all the ndc codes associated with an rxcui - * Same as query - * http://will-office:4000/REST/rxcui/1668240/allhistoricalndcs.json - * note the different formats of the dates. +/* OVERVIEW * - * Based on http://will-office:4000/RxNav/search?searchBy=RXCUI&searchTerm=1668240 - * it appears that this rxcui is a sbd or bpck (branded drug or pack) + * This links trials to the first date each drug (indexed by NDA/ANDA etc) is + * put on the market. * - - * If I grab every brand, then every branded drug or pack associated with that drug and then attach that to the nsde data I would get the marketing dates required. - * --get brand names - * trial -> mesh_term -> IN/MIN (rxcui) -> BN (rxcui) - * -- associate brand names to marketing dates - * BN (rxcui) --> SBD/BPCK (RXCUI) --> ndc11 --> nsde + * It takes 3 views to build up to it. * */ -/* - * I do need to figure out a way to change the date types when importing into postgres. In mariadb they ar mmYYYY wheras in the jsonapi they are YYYYmm but I want is YYYY-mm-01 - */ - ----assoicate ingredients, brands, and approved packaging RXCUIs. -create temp table trial_to_pack_rxcui as +--Match trials to brands and ingredients +create or replace view public.match_trials_to_bn_in as with trialncts as ( SELECT DISTINCT nct_id FROM history.trial_snapshots TS ) SELECT bi.nct_id , bi.downcase_mesh_term, - rp.rxcui AS drug_rxcui, - rn.tty1, - rn.rxcui1 as ingredient_rxcui, --ingredients - rn.tty2 , - rn.rxcui2 as brand_or_pack_rxcui --brand or pack + rr.tty2 , + rr.rxcui2 as bn_or_in_cui --brand or ingredient + ,count(*) FROM ctgov.browse_interventions bi - LEFT OUTER JOIN rxnorm_migrated.rxnorm_props AS rp - on bi.downcase_mesh_term = rp.propvalue1 --Link drug ingredient - LEFT OUTER JOIN rxnorm_migrated.rxnorm_relations AS rn - on rp.rxcui = rn.rxcui1 --Grab brand names + left outer JOIN rxnorm_migrated.rxnorm_props AS rp + on bi.downcase_mesh_term = rp.propvalue1 --link names to drug cuis () + left outer join rxnorm_migrated.rxnorm_relations rr + on rr.rxcui1 = rp.rxcui WHERE bi.nct_id in ( SELECT nct_id FROM trialncts ) and bi.mesh_type='mesh-list' - and - rn.tty1 in ('IN','MIN') - and - rn.tty2 = 'BN' + and rp.propname = 'Active_ingredient_name' + and rr.tty2 in ('BN', 'IN', 'MIN') +group by bi.nct_id, bi.downcase_mesh_term , rr.tty2 ,rr.rxcui2 +order by bi.nct_id ; ---link brand names to drug applications (NDA/ANDA etc) -select rr.rxcui1 as BN, rr.rxcui2 as pack, ah.ndc as pack_ndc11 -from - rxnorm_migrated.rxnorm_relations rr +--match trials to through brands NDC11 +create or replace view public.match_trial_to_ndc11 as +select + mttbi.nct_id, + ah.ndc, + count(*) +from public.match_trials_to_bn_in as mttbi + left outer join rxnorm_migrated.rxnorm_relations as rr + on mttbi.bn_or_in_cui = rr.rxcui1 left outer join rxnorm_migrated."ALLNDC_HISTORY" as ah on rr.rxcui2 = ah.rxcui where - tty1 = 'BN' + rr.tty1 = 'BN' and - tty2 in ('SBD', 'BPCK') + rr.tty2 in ('SBD', 'BPCK') and - ah.sab='RXNORM' - + ah.sab='RXNORM' +group by mttbi.nct_id, ah.ndc +order by mttbi.nct_id, ah.ndc ; ----associate NDAs/ANDAs to marketing start dates ----Get start of coverage periods for NSDE dates grouped by arbitrary grouping. -SELECT n.application_number_or_citation, count(*), min( marketing_start_date ) -FROM spl.nsde as n -where product_type = 'HUMAN PRESCRIPTION DRUG' -group by n.application_number_or_citation ; - - ----For a given date, find which NDAs/ANDAs were active were active. -SELECT n.application_number_or_citation, count(*) -FROM spl.nsde as n -where - product_type = 'HUMAN PRESCRIPTION DRUG' - and - marketing_start_date < '2010-05-01' - and - marketing_end_date > '2010-05-01' -group by n.application_number_or_citation ; - - +---associate trials to marketing start dates +create or replace view public.match_trial_to_marketing_start_date as +select + mttn.nct_id, + n.application_number_or_citation, + min(n.marketing_start_date ) +from match_trial_to_ndc11 mttn + inner join spl.nsde n + on mttn.ndc = n.package_ndc11 +where + n.product_type = 'HUMAN PRESCRIPTION DRUG' + and + n.marketing_category in ('NDA','ANDA','BLA', 'NDA authorized generic', 'NDA AUTHORIZED GENERIC') +group by mttn.nct_id,n.application_number_or_citation +order by mttn.nct_id +; diff --git a/development_sql/views to history.sql b/development_sql/views to history.sql index 3e9d6fc..9d79d31 100644 --- a/development_sql/views to history.sql +++ b/development_sql/views to history.sql @@ -1,5 +1,5 @@ /***************CREATE VIEWS*******************/ -create view if not exists +create or replace view history.match_drugs_to_trials as select nct_id, rxcui, propvalue1 from From ef7ed7001bfbf7a18bfebb343ad9d7c5ec71952f Mon Sep 17 00:00:00 2001 From: will king Date: Fri, 10 Mar 2023 15:32:11 -0800 Subject: [PATCH 3/3] added info for setting up the db from DrugCentral.org --- .dbeaver/.credentials-config.json.bak | Bin 224 -> 240 bytes .dbeaver/.data-sources.json.bak | 27 ++++++++++----------- .dbeaver/.project-metadata.json.bak | 2 +- .gitignore | 4 +++ containers/drugcentral/docker-compose.yaml | 23 ++++++++++++++++++ containers/drugcentral/init.sh | 9 +++++++ development_sql/Script.sql | 0 7 files changed, 50 insertions(+), 15 deletions(-) create mode 100644 containers/drugcentral/docker-compose.yaml create mode 100755 containers/drugcentral/init.sh create mode 100644 development_sql/Script.sql diff --git a/.dbeaver/.credentials-config.json.bak b/.dbeaver/.credentials-config.json.bak index 5d48b6cece9cff94519361a25e23137e02b39008..2d11c2b429ba9d2b06535274604532287cfd7f5e 100644 GIT binary patch literal 240 zcmV57rnJ0`B~XXT}ua$agq_u$lbOM(_i$rndPHP|Lc*#tF?H_n8x$h;La z^!!FA7s_c)V}D}S^4Pd)h6fl+e%3=lxo+l`KwV8DJKCqGHxQzf1t+_HGmkMkyx_fI zWg)w(uh;b>H_T3C<$uX5VOPcAHS_K&c;$W1<=N}GRc@I<>d7cRsghiWz|&QqEv*bW z;j$JYHPVw`Kt2~BcPw3?u68manJh_wZa~-;n}{g1?dfl1%naA#+b&n>>IP-i>SKP% qb(wkfhATPbIFA#xrk`lyVrKjaD5lQ0Ps)(u9JuJtvBh*!KlA=zJbHQn literal 224 zcmV<603ZJ!Rn!U~;qc#pj2QT{ZhBT-oH(@+2-ouOpwvk&kh>!(9}Prj3t{5JVb00HyRIc5RIEIAE&)qwD5n zcBT7>oF?>@uS{LYc#ujl&!HXdfgcH{>8xfolql8`Q2Y~mem+UUfskAtP)`QLC?6)f a-I1kb0&rw_60Cc{B!7L0?ZN1*B)D4&rE4(& diff --git a/.dbeaver/.data-sources.json.bak b/.dbeaver/.data-sources.json.bak index 115afe3..b2d156b 100644 --- a/.dbeaver/.data-sources.json.bak +++ b/.dbeaver/.data-sources.json.bak @@ -1,31 +1,30 @@ { "folders": {}, "connections": { - "postgres-jdbc-186be7b92d8-4d6ef891885b76f5": { - "provider": "postgresql", - "driver": "postgres-jdbc", - "name": "aact_db", + "mariaDB-186c896820e-6ff11b5b802d8b82": { + "provider": "mysql", + "driver": "mariaDB", + "name": "rxnav", "save-password": true, "configuration": { "host": "will-office", - "port": "5432", - "database": "aact_db", - "url": "jdbc:postgresql://will-office:5432/aact_db", + "port": "3306", + "url": "jdbc:mariadb://will-office:3306/", "configurationType": "MANUAL", "type": "dev", "auth-model": "native" } }, - "mariaDB-186be7ba5a8-7a57b538dcd4188f": { - "provider": "mysql", - "driver": "mariaDB", - "name": "rxnorm_current", + "postgres-jdbc-186c896a347-2a3d946d2dea4df7": { + "provider": "postgresql", + "driver": "postgres-jdbc", + "name": "aact_db", "save-password": true, "configuration": { "host": "will-office", - "port": "3306", - "database": "rxnorm_current", - "url": "jdbc:mariadb://will-office:3306/rxnorm_current", + "port": "5432", + "database": "aact_db", + "url": "jdbc:postgresql://will-office:5432/aact_db", "configurationType": "MANUAL", "type": "dev", "auth-model": "native" diff --git a/.dbeaver/.project-metadata.json.bak b/.dbeaver/.project-metadata.json.bak index d76f603..9310679 100644 --- a/.dbeaver/.project-metadata.json.bak +++ b/.dbeaver/.project-metadata.json.bak @@ -1 +1 @@ -{"resources":{"development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-datasource":"postgres-jdbc-186be7b92d8-4d6ef891885b76f5","default-catalog":"aact_db","default-schema":"public"}}} \ No newline at end of file +{"resources":{"development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"}}} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 9da644c..0e6b8e4 100644 --- a/.gitignore +++ b/.gitignore @@ -189,5 +189,9 @@ containers/RxNav-In-a-box/rxnav-in-a-box-* #Ignore stuff from AACT_downlaoder containers/AACT_downloader/postgresql/* containers/AACT_downloader/aact_downloads/* +#ignore stuff in DrugCentral Downloader +containers/drugcentral/docker-entrypoint-initdb.d/*.sql +containers/drugcentral/docker-entrypoint-initdb.d/*.sql.gz +containers/drugcentral/db_store/* diff --git a/containers/drugcentral/docker-compose.yaml b/containers/drugcentral/docker-compose.yaml new file mode 100644 index 0000000..9870c73 --- /dev/null +++ b/containers/drugcentral/docker-compose.yaml @@ -0,0 +1,23 @@ +version: '3' + +networks: + pharmaceutical_research: #because it helps to have a way to link specifically to this. + +services: + aact_db: + image: postgres:14-alpine + networks: + - pharmaceutical_research + container_name: DrugCentral + #restart: always #restart after crashes + environment: + POSTGRES_PASSWORD: root + ports: + - "54320:5432" #host:container + volumes: #host:container is the format. + # this is persistant storage for the database + - ./db_store/:/var/lib/postgresql/ + # this is the folder containing entrypoint info. + - ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/ + + diff --git a/containers/drugcentral/init.sh b/containers/drugcentral/init.sh new file mode 100755 index 0000000..791d337 --- /dev/null +++ b/containers/drugcentral/init.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +filename="drugcentral.dump.08222022.sql.gz" + +cd ./docker-entrypoint-initdb.d/ + +curl "https://unmtid-shinyapps.net/download/$filename" --output "$filename" + +gzip -d $filename diff --git a/development_sql/Script.sql b/development_sql/Script.sql new file mode 100644 index 0000000..e69de29