Merge completed: Merged working versions from home and office PCs

3 years ago · 123fe3b5e4
parent 470dfc2611 ef7ed7001b
commit 123fe3b5e4
12 changed files with 142 additions and 61 deletions
--- a/.dbeaver/.credentials-config.json.bak
+++ b/.dbeaver/.credentials-config.json.bak
@ -0,0 +1 @@
 Zщ<EFBFBD><EFBFBD>е;&ЎюgхЅ<EFBFBD>r^jNїрдwK<EFBFBD>HЩGF5и&Gй5<EFBFBD>7Ю<EFBFBD>АШМ4єќF'ЪiNcbжђиИj<EFBFBD>K~жCAЙnц<EFBFBD>@]M";кЇЇ7Ђ<>'Л~3<>1:МрНae!ЛЋЏзѕ#7ЬNdхЩ+aWХр5ѓю*xх}ЯхйыЙUn<55>AъЩ(>Љ<>\<5C>РгU<D0B3>-9сВ"5в<EFBFBD>_@> w,] Ўv2"<EFBFBD>,I<EFBFBD>n@и<EFBFBD><EFBFBD>(ДэщodЬзул.Wщъeеъc~Щu<EFBFBD>yѕ<EFBFBD>+9ф8<EFBFBD>ЕІ<EFBFBD>hтbfќ	(ІЮЗOЪ<EFBFBD>тИшЮБХtR?ѓў
--- a/.dbeaver/.data-sources.json.bak
+++ b/.dbeaver/.data-sources.json.bak
@ -0,0 +1,45 @@
 {
 	"folders": {},
 	"connections": {
 		"mariaDB-186c896820e-6ff11b5b802d8b82": {
 			"provider": "mysql",
 			"driver": "mariaDB",
 			"name": "rxnav",
 			"save-password": true,
 			"configuration": {
 				"host": "will-office",
 				"port": "3306",
 				"url": "jdbc:mariadb://will-office:3306/",
 				"configurationType": "MANUAL",
 				"type": "dev",
 				"auth-model": "native"
 			}
 		},
 		"postgres-jdbc-186c896a347-2a3d946d2dea4df7": {
 			"provider": "postgresql",
 			"driver": "postgres-jdbc",
 			"name": "aact_db",
 			"save-password": true,
 			"configuration": {
 				"host": "will-office",
 				"port": "5432",
 				"database": "aact_db",
 				"url": "jdbc:postgresql://will-office:5432/aact_db",
 				"configurationType": "MANUAL",
 				"type": "dev",
 				"auth-model": "native"
 			}
 		}
 	},
 	"connection-types": {
 		"dev": {
 			"name": "Development",
 			"color": "255,255,255",
 			"description": "Regular development database",
 			"auto-commit": true,
 			"confirm-execute": false,
 			"confirm-data-change": false,
 			"auto-close-transactions": false
 		}
 	}
 }
--- a/.dbeaver/.project-metadata.json.bak
+++ b/.dbeaver/.project-metadata.json.bak
@ -0,0 +1 @@
 {"resources":{"development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"}}}
--- a/.gitignore
+++ b/.gitignore
@ -189,5 +189,9 @@ containers/RxNav-In-a-box/rxnav-in-a-box-*
 #Ignore stuff from AACT_downlaoder
 containers/AACT_downloader/postgresql/*
 containers/AACT_downloader/aact_downloads/*
 #ignore stuff in DrugCentral Downloader
 containers/drugcentral/docker-entrypoint-initdb.d/*.sql
 containers/drugcentral/docker-entrypoint-initdb.d/*.sql.gz
 containers/drugcentral/db_store/*
--- a/development_sql/.project
+++ b/development_sql/.project
@ -1,11 +1,12 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <projectDescription>
-	<name>development_sql</name>
+	<name>ClinicalTrialsDataProcessing</name>
 	<comment></comment>
 	<projects>
 	</projects>
 	<buildSpec>
 	</buildSpec>
 	<natures>
 		<nature>org.jkiss.dbeaver.DBeaverNature</nature>
 	</natures>
 </projectDescription>
--- a/README.md
+++ b/README.md
@ -74,19 +74,18 @@ What this means in regards to this project is that docker makes it
 easy to setup containers.
 Install docker based on instructions for your linux distribution.
 I use podman (an alternative from RedHat) because it allows for running without root permissions.
 ### Docker networking
-I have the docker container for the database attached to a 
+It is helpful to construct an external docker network by running
 network called "pharmaceutical_research" because I have a 
 container with pgadmin4 running on that docker network.
 This can be adjusted in the dockerfile.
-I also have the database container open on port 5432, the typical 
+`docker network create network_name`
 postgresql database port.
-### Database logins
+and then including that network in the docker-compose.yaml
-I have choosen the database user of *root* with a password of *root*
+
-because I don't really need this database to be secure.
+# Environment Variables (`.env` file)
 I use an single .env file to setup the docker containers and pass configuration variables to 
 the python scripts. I would suggest changing the default values in `sample.env` to match your needs.
 If you do need to think about the security of your database I would recommend 
 you start by changing these.
--- a/containers/drugcentral/docker-compose.yaml
+++ b/containers/drugcentral/docker-compose.yaml
@ -0,0 +1,23 @@
 version: '3'
 networks:
  pharmaceutical_research: #because it helps to have a way to link specifically to this. 
 services:
  aact_db:
    image: postgres:14-alpine
    networks:
      - pharmaceutical_research
    container_name: DrugCentral
    #restart: always #restart after crashes
    environment:
      POSTGRES_PASSWORD: root
    ports:
      - "54320:5432" #host:container
    volumes: #host:container is the format.
      # this is persistant storage for the database  
      - ./db_store/:/var/lib/postgresql/
      # this is the folder containing entrypoint info.
      - ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
--- a/containers/drugcentral/init.sh
+++ b/containers/drugcentral/init.sh
@ -0,0 +1,9 @@
 #!/bin/bash
 filename="drugcentral.dump.08222022.sql.gz"
 cd ./docker-entrypoint-initdb.d/
 curl "https://unmtid-shinyapps.net/download/$filename" --output "$filename"
 gzip -d $filename
--- a/development_sql/ASSOICATING
+++ b/development_sql/ASSOICATING
@ -1,42 +1,38 @@
-/*get all the ndc codes associated with an rxcui
+/* OVERVIEW
 * Same as query
 * http://will-office:4000/REST/rxcui/1668240/allhistoricalndcs.json
 * note the different formats of the dates.
 * 
- * Based on http://will-office:4000/RxNav/search?searchBy=RXCUI&searchTerm=1668240
+ * This links trials to the first date each drug (indexed by NDA/ANDA etc) is
- * it appears that this rxcui is a sbd or bpck (branded drug or pack)
+ * put on the market.
 * 
-
+ * It takes 3 views to build up to it.
 * If I grab every brand, then every branded drug or pack associated with that drug and then attach that to the nsde data I would get the marketing dates required.
 * --get brand names
 * trial -> mesh_term -> IN/MIN (rxcui) -> BN (rxcui) 
 * -- associate brand names to marketing dates
 * BN (rxcui) --> SBD/BPCK (RXCUI) --> ndc11 --> nsde 
 * */
-/*
+--Match trials to brands and ingredients
- * I do need to figure out a way to change the date types when importing into postgres. In mariadb they ar mmYYYY wheras in the jsonapi they are YYYYmm but I want is YYYY-mm-01
+create or replace view public.match_trials_to_bn_in as
 */
 ---assoicate ingredients, brands, and approved packaging RXCUIs.
 create temp table tmp_trial_to_ingred as
 with trialncts as (
        SELECT DISTINCT nct_id  FROM history.trial_snapshots TS
    )
 SELECT 
    bi.nct_id ,
    bi.downcase_mesh_term, 
-    rp.rxcui  AS drug_rxcui
+    rr.tty2 ,
    rr.rxcui2 as bn_or_in_cui --brand or ingredient
    ,count(*)
 FROM ctgov.browse_interventions bi
-    INNER JOIN rxnorm_migrated.rxnorm_props AS rp
+    left outer JOIN rxnorm_migrated.rxnorm_props AS rp
-        on bi.downcase_mesh_term  = rp.propvalue1  --Link drug ingredient
+        on bi.downcase_mesh_term  = rp.propvalue1 --link names to drug cuis ()
    left outer join rxnorm_migrated.rxnorm_relations rr 
    	on rr.rxcui1 = rp.rxcui 
 WHERE
    bi.nct_id in (
        SELECT nct_id  FROM trialncts
        )
    and
    bi.mesh_type='mesh-list'
    and rp.propname = 'Active_ingredient_name'
    and rr.tty2 in ('BN', 'IN', 'MIN')
 group by bi.nct_id, bi.downcase_mesh_term , rr.tty2 ,rr.rxcui2 
 order by bi.nct_id 
 ;
 --running out of space.
@ -87,42 +83,44 @@ where
--link brand names to drug applications (NDA/ANDA etc)
+--match trials to through brands NDC11
-select rr.rxcui1 as BN, rr.rxcui2 as pack, ah.ndc as pack_ndc11
+create or replace view public.match_trial_to_ndc11 as
-from 
+select 
-    rxnorm_migrated.rxnorm_relations rr 
+	mttbi.nct_id,
 	ah.ndc,
 	count(*)
 from public.match_trials_to_bn_in as mttbi
    left outer join rxnorm_migrated.rxnorm_relations as rr 
    	on mttbi.bn_or_in_cui = rr.rxcui1
    left outer join rxnorm_migrated."ALLNDC_HISTORY" as ah 
        on rr.rxcui2 = ah.rxcui 
 where 
-    tty1 = 'BN' 
+    rr.tty1 = 'BN' 
    and
-    tty2 in ('SBD', 'BPCK')
+    rr.tty2 in ('SBD', 'BPCK')
    and 
-    ah.sab='RXNORM'
+    ah.sab='RXNORM' 
-    
+group by mttbi.nct_id, ah.ndc
 order by mttbi.nct_id, ah.ndc
 ;
 ---associate NDAs/ANDAs to marketing start dates
 ---Get start of coverage periods for NSDE dates grouped by arbitrary grouping.
 SELECT n.application_number_or_citation, count(*), min( marketing_start_date )
 FROM spl.nsde as n
 where product_type = 'HUMAN PRESCRIPTION DRUG'
 group by n.application_number_or_citation ;
 ---For a given date, find which NDAs/ANDAs were active were active.
 SELECT n.application_number_or_citation, count(*)
 FROM spl.nsde as n
 where 
    product_type = 'HUMAN PRESCRIPTION DRUG'
    and
    marketing_start_date < '2010-05-01'
    and
    marketing_end_date > '2010-05-01'
 group by n.application_number_or_citation ;
 ---associate trials to marketing start dates
 create or replace view public.match_trial_to_marketing_start_date as
 select 
 	mttn.nct_id,
 	n.application_number_or_citation,
 	min(n.marketing_start_date )
 from match_trial_to_ndc11 mttn 
 	inner join spl.nsde n 
 		on mttn.ndc = n.package_ndc11 
 where 	
 	n.product_type = 'HUMAN PRESCRIPTION DRUG'
 	and
 	n.marketing_category in ('NDA','ANDA','BLA', 'NDA authorized generic', 'NDA AUTHORIZED GENERIC')
 group by mttn.nct_id,n.application_number_or_citation 
 order by mttn.nct_id 
 ;
--- a/development_sql/Script.sql
+++ b/development_sql/Script.sql
--- a/development_sql/views
+++ b/development_sql/views
@ -1,5 +1,5 @@
 /***************CREATE VIEWS*******************/
-create view if not exists 
+create or replace view 
 	history.match_drugs_to_trials as
 select nct_id,  rxcui, propvalue1
 from 
--- a/version)/ICD10-to-GDB_expander.py
+++ b/version)/ICD10-to-GDB_expander.py
@ -3,7 +3,7 @@ import numpy as np
 IHME_COD_FILEPATH = "./IHME_GBD_2019_COD_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
 IHME_NONFATAL_FILEPATH = "./IHME_GBD_2019_NONFATAL_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
-ICD10CM_ORDER_FILEPATH = "./icd10-2019_categories_only.psv"
+ICD10CM_ORDER_FILEPATH = "./icd10_combined-who-cms.psv"
 #READ in ICD10CM codes
		`@ -0,0 +1 @@`
							`Zщ<EFBFBD><EFBFBD>е;&ЎюgхЅ<EFBFBD>r^jNїрдwK<EFBFBD>HЩGF5и&Gй5<EFBFBD>7Ю<EFBFBD>АШМ4єќF'ЪiNcbжђиИj<EFBFBD>K~жCAЙnц<EFBFBD>@]M";кЇЇ7Ђ<>'Л~3<>1:МрНae!ЛЋЏзѕ#7ЬNdхЩ+aWХр5ѓю*xх}ЯхйыЙUn<55>AъЩ(>Љ<>\<5C>РгU<D0B3>-9сВ"5в<EFBFBD>_@> w,] Ўv2"<EFBFBD>,I<EFBFBD>n@и<EFBFBD><EFBFBD>(ДэщodЬзул.Wщъeеъc~Щu<EFBFBD>yѕ<EFBFBD>+9ф8<EFBFBD>ЕІ<EFBFBD>hтbfќ (ІЮЗOЪ<EFBFBD>тИшЮБХtR?ѓў`
		`@ -0,0 +1 @@`
							`{"resources":{"development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"}}}`