Merging work from other computer into home.

Merge branch 'main' of ssh://gitea.kgjk.icu:3022/Research/ClinicalTrialsDataProcessing
llm-extraction
will king 3 years ago
commit c5f3bfcdec

@ -1,2 +0,0 @@
#!/bin/bash
wget --post-data="postgres complete:$(date)" -qO- https://ntfy.sh/$NTFY > /dev/null

@ -1,6 +0,0 @@
#!/bin/bash
#install wget
apt update
apt install -y wget
#send notification
wget --post-data="mariadb complete:$(date)" -qO- https://ntfy.sh/$NTFY > /dev/null

@ -26,7 +26,7 @@ services:
- ./AACT_downloader/aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
# this is the folder containing entrypoint info.
- ./AACT_downloader/docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
shm-size: 1g
shm_size: 512mb
rxnav-db:

@ -34,6 +34,53 @@ WHERE
group by bi.nct_id, bi.downcase_mesh_term , rr.tty2 ,rr.rxcui2
order by bi.nct_id
;
--running out of space.
-- get list of interventions assoicated with trials of interest
create temp table tmp_interventions as
select * from ctgov.browse_interventions bi
where
bi.mesh_type ='mesh-list'
and
bi.nct_id in (select distinct nct_id from history.trial_snapshots)
;
select * from tmp_interventions;
--drop table tmp_join_interv_rxcui;
create temp table tmp_join_interv_rxcui as
select *
from
tmp_interventions tint
inner join
rxnorm_migrated.rxnorm_props rp
on tint.downcase_mesh_term = rp.propvalue1
where propname='RxNorm Name'
;-- get the rxcui for ingredients
select * from tmp_join_interv_rxcui;
--filter rxcui -> is human prescribable
create temp view tmp_view_prescribable as
select count(*) from rxnorm_migrated.rxnorm_props rp
where
rp.propname = 'PRESCRIBABLE'
and
rp.propvalue1 = 'Y'
;
--link prescribable to brand ingredients or brand names.
--get relationships of IN -> BN
select *
from
rxnorm_migrated.rxnorm_relations rr
where
rr.tty1 in ('IN','MIN')
and rr.rxcui1 in (select distinct rxcui from tmp_join_interv_rxcui tjir)
and rr.tty2 = 'BN'
;
--match trials to through brands NDC11

@ -0,0 +1,103 @@
import pandas as pd
import numpy as np
import itertools
IHME_COD_FILEPATH = "./GlobalBurdenDisease/IHME_GBD_2019_COD_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
IHME_NONFATAL_FILEPATH = "./GlobalBurdenDisease/IHME_GBD_2019_NONFATAL_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
ICD10CM_ORDER_FILEPATH = "./icd10_combined-who-cms.psv"
def justify(string):
'''
The purpose of this is to transform codes such as A00 and A000
into a normalized, sortable format e.g. 'A00----' and 'A000---'
'''
return string.ljust(7,"-")
class CodeRange():
def __init__(self,cause,code_book,codes):
self.cause = cause
self.code_book = code_book
self.code_list = []
codes = "" if type(codes) is float else codes #normalize codes to string...
codes = [x.strip().replace('.','').split('-') for x in codes.split(",")]
for rng in codes:
if rng[0] is None:
raise Exception("Listed ICD10 Code (Begin:{}) is not in codebook".format(rng), rng)
#lookup codes
if len(rng) == 1:
begin = justify(rng[0])
if self.code_book.get(begin) is None:
continue
else:
self.code_list.append(begin)
else:
begin = justify(rng[0])
end = justify(rng[1])
begin_bitmask = [x >= begin for x in list(self.code_book)]
end_bitmask = [x <= end for x in list(self.code_book)]
bitmask = [x and y for x,y in zip(begin_bitmask,end_bitmask)]
self.code_list.extend(list(itertools.compress(list(self.code_book),bitmask)))
def __str__(self):
txt = ''
for item in self.code_list:
txt += "{} | {}\n".format(item, self.cause)
return txt
#READ in ICD10CM codes
icd10_codes = {}
with open(ICD10CM_ORDER_FILEPATH,"r") as icd_fh:
for idx,line in enumerate(icd_fh.readlines()):
#read info
code, descr, source = line.split("|")
#cleanup info
code = justify(code.strip())
descr = descr.strip()
source = source.strip()
#Store in code dict
icd10_codes[code] = (idx,descr, source)
cod = pd.read_excel(IHME_COD_FILEPATH,header=1)
with open("COD_cause2code.psv", "w") as outfh:
itt = 0
for row in cod.itertuples():
cause = row[1]
codes = row[2]
c = CodeRange(cause,icd10_codes,codes)
outfh.write(c.__str__())
nonfatal = pd.read_excel(IHME_NONFATAL_FILEPATH,header=1)
with open("NONFATAL_cause2code.psv", "w") as outfh:
itt = 0
for row in nonfatal.itertuples():
cause = row[2]
codes= row[3]
c = CodeRange(cause,icd10_codes,codes)
outfh.write(c.__str__())

@ -0,0 +1,24 @@
This data was obtained by opening each nested portion on the
left navigation bar at
https://icd.who.int/browse10/2019/en
and then copying and pasting the data into a text file (icd10-2019.txt).
This text file was then adjusted to get the pipe-separated values version
which
has the following columns
- code: the icd-10 code in a normalized format
- description: The basic description given
- source: This just says WHO so that it is possible to merge it with other
sources.
The adjustments were as follows (parenthases include vim search and replace
commands used):
- delete tabls (:%s/\t//g)
- delete leading spaces (:%s/^\s//)
- remove excess newlines (:%s/^\n//)
- remove periods in codes (:%s/\.//)
- Convert to Pipe-separated values file (:%s/\s/ | /)
- add column of sources (:%s/\s*$/ | WHO)
- Type in column headers

@ -0,0 +1,6 @@
This contains the CMS version of ICD-10-CM codes.
I have included a version I converted to pip-separated values with the
following columns
- code : the IDC-10-cm code.
- description: a basic description
- source: Says CMS-cm so that it can be combined with other sources.

@ -0,0 +1,10 @@
#!/bin/bash
icd10_who="./WHO ICD-10 (2019)/icd10-2019_categories_only.psv"
icd10cm_cms="./icd10cm_codes_addenda_2019/icd10cm_codes_2019.psv"
#concatenate the two files
#then lexically sort them by first column and then second column(reversed)
#then sort/unique based on first column
#then save to file
cat "$icd10_who" "$icd10cm_cms" | sort -t "|" -k 1,1 -k 3,3r | sort -u -t "|" -k 1,1 > icd10_combined-who-cms.psv

@ -1,5 +1,6 @@
import pymysql
import psycopg2 as psyco
from psycopg2.sql import SQL
from dotenv import dotenv_values
env_path = "../containers/.env"
@ -28,3 +29,15 @@ def postgres_conn(**kwargs):
def get_tables_of_interest():
return ENV["TABLES_OF_INTEREST"].split(",")
def postgres_table_delete_entries(schema,table):
with postgres_conn() as con:
with con.cursor() as curse:
delete_statement = SQL("delete from {schema}.{table}").format(
schema=Identifier(schema),
talbe=Identifier(table)
)
curse.execute(delete_statement)
con.commit()

@ -1,4 +1,4 @@
from drugtools.env_setup import postgres_conn
from .env_setup import postgres_conn
from pathlib import Path

@ -4,7 +4,7 @@ from psycopg2 import extras
import pymysql
from dotenv import load_dotenv
import os
from drugtools.env_setup import postgres_conn, mariadb_conn, get_tables_of_interest
from .env_setup import postgres_conn, mariadb_conn, get_tables_of_interest
##############NOTE

@ -0,0 +1,5 @@
#!/bin/bash
rm -r ../containers/RxNav-In-a-box/rxnav_data/*
rm -r ../containers/AACT_downloader/postgresql/data

@ -0,0 +1,20 @@
from drugtools import env_setup
from drugtools import historical_trial_selector as hts
from drugtools import historical_nct_downloader as hnd
from drugtools import historical_nct_extractor as hne
from drugtools import download_and_extract_nsde as daen
from drugtools import migrate_mysql2pgsql as mm2p
print("Current Environment")
print(env_setup.ENV)
cont = input("Are you willing to continue with the current environmnet? y/[n]")
if cont == "Y" or cont == "y":
hts.run()
hnd.run()
hne.run()
daen.run()
mm2p.run()
else:
print("Please fix your .env file and try again")
Loading…
Cancel
Save