Merging work from other computer into home.
Merge branch 'main' of ssh://gitea.kgjk.icu:3022/Research/ClinicalTrialsDataProcessingllm-extraction
commit
c5f3bfcdec
@ -1,2 +0,0 @@
|
||||
#!/bin/bash
|
||||
wget --post-data="postgres complete:$(date)" -qO- https://ntfy.sh/$NTFY > /dev/null
|
||||
@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
#install wget
|
||||
apt update
|
||||
apt install -y wget
|
||||
#send notification
|
||||
wget --post-data="mariadb complete:$(date)" -qO- https://ntfy.sh/$NTFY > /dev/null
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
@ -0,0 +1,103 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import itertools
|
||||
|
||||
|
||||
|
||||
IHME_COD_FILEPATH = "./GlobalBurdenDisease/IHME_GBD_2019_COD_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
|
||||
IHME_NONFATAL_FILEPATH = "./GlobalBurdenDisease/IHME_GBD_2019_NONFATAL_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
|
||||
ICD10CM_ORDER_FILEPATH = "./icd10_combined-who-cms.psv"
|
||||
|
||||
def justify(string):
|
||||
'''
|
||||
The purpose of this is to transform codes such as A00 and A000
|
||||
into a normalized, sortable format e.g. 'A00----' and 'A000---'
|
||||
'''
|
||||
return string.ljust(7,"-")
|
||||
|
||||
|
||||
class CodeRange():
|
||||
def __init__(self,cause,code_book,codes):
|
||||
self.cause = cause
|
||||
self.code_book = code_book
|
||||
self.code_list = []
|
||||
|
||||
codes = "" if type(codes) is float else codes #normalize codes to string...
|
||||
codes = [x.strip().replace('.','').split('-') for x in codes.split(",")]
|
||||
|
||||
for rng in codes:
|
||||
|
||||
if rng[0] is None:
|
||||
raise Exception("Listed ICD10 Code (Begin:{}) is not in codebook".format(rng), rng)
|
||||
|
||||
#lookup codes
|
||||
if len(rng) == 1:
|
||||
begin = justify(rng[0])
|
||||
if self.code_book.get(begin) is None:
|
||||
continue
|
||||
else:
|
||||
self.code_list.append(begin)
|
||||
else:
|
||||
begin = justify(rng[0])
|
||||
end = justify(rng[1])
|
||||
begin_bitmask = [x >= begin for x in list(self.code_book)]
|
||||
end_bitmask = [x <= end for x in list(self.code_book)]
|
||||
|
||||
bitmask = [x and y for x,y in zip(begin_bitmask,end_bitmask)]
|
||||
|
||||
self.code_list.extend(list(itertools.compress(list(self.code_book),bitmask)))
|
||||
|
||||
|
||||
|
||||
def __str__(self):
|
||||
txt = ''
|
||||
for item in self.code_list:
|
||||
txt += "{} | {}\n".format(item, self.cause)
|
||||
|
||||
return txt
|
||||
|
||||
#READ in ICD10CM codes
|
||||
|
||||
icd10_codes = {}
|
||||
|
||||
|
||||
with open(ICD10CM_ORDER_FILEPATH,"r") as icd_fh:
|
||||
for idx,line in enumerate(icd_fh.readlines()):
|
||||
#read info
|
||||
code, descr, source = line.split("|")
|
||||
#cleanup info
|
||||
code = justify(code.strip())
|
||||
descr = descr.strip()
|
||||
source = source.strip()
|
||||
|
||||
#Store in code dict
|
||||
icd10_codes[code] = (idx,descr, source)
|
||||
|
||||
|
||||
|
||||
cod = pd.read_excel(IHME_COD_FILEPATH,header=1)
|
||||
|
||||
with open("COD_cause2code.psv", "w") as outfh:
|
||||
itt = 0
|
||||
for row in cod.itertuples():
|
||||
cause = row[1]
|
||||
codes = row[2]
|
||||
|
||||
c = CodeRange(cause,icd10_codes,codes)
|
||||
|
||||
outfh.write(c.__str__())
|
||||
|
||||
|
||||
|
||||
|
||||
nonfatal = pd.read_excel(IHME_NONFATAL_FILEPATH,header=1)
|
||||
with open("NONFATAL_cause2code.psv", "w") as outfh:
|
||||
itt = 0
|
||||
for row in nonfatal.itertuples():
|
||||
cause = row[2]
|
||||
codes= row[3]
|
||||
c = CodeRange(cause,icd10_codes,codes)
|
||||
|
||||
outfh.write(c.__str__())
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,24 @@
|
||||
This data was obtained by opening each nested portion on the
|
||||
left navigation bar at
|
||||
https://icd.who.int/browse10/2019/en
|
||||
and then copying and pasting the data into a text file (icd10-2019.txt).
|
||||
|
||||
|
||||
This text file was then adjusted to get the pipe-separated values version
|
||||
which
|
||||
has the following columns
|
||||
- code: the icd-10 code in a normalized format
|
||||
- description: The basic description given
|
||||
- source: This just says WHO so that it is possible to merge it with other
|
||||
sources.
|
||||
|
||||
|
||||
The adjustments were as follows (parenthases include vim search and replace
|
||||
commands used):
|
||||
- delete tabls (:%s/\t//g)
|
||||
- delete leading spaces (:%s/^\s//)
|
||||
- remove excess newlines (:%s/^\n//)
|
||||
- remove periods in codes (:%s/\.//)
|
||||
- Convert to Pipe-separated values file (:%s/\s/ | /)
|
||||
- add column of sources (:%s/\s*$/ | WHO)
|
||||
- Type in column headers
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -0,0 +1,6 @@
|
||||
This contains the CMS version of ICD-10-CM codes.
|
||||
I have included a version I converted to pip-separated values with the
|
||||
following columns
|
||||
- code : the IDC-10-cm code.
|
||||
- description: a basic description
|
||||
- source: Says CMS-cm so that it can be combined with other sources.
|
||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
icd10_who="./WHO ICD-10 (2019)/icd10-2019_categories_only.psv"
|
||||
icd10cm_cms="./icd10cm_codes_addenda_2019/icd10cm_codes_2019.psv"
|
||||
|
||||
#concatenate the two files
|
||||
#then lexically sort them by first column and then second column(reversed)
|
||||
#then sort/unique based on first column
|
||||
#then save to file
|
||||
cat "$icd10_who" "$icd10cm_cms" | sort -t "|" -k 1,1 -k 3,3r | sort -u -t "|" -k 1,1 > icd10_combined-who-cms.psv
|
||||
@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm -r ../containers/RxNav-In-a-box/rxnav_data/*
|
||||
|
||||
rm -r ../containers/AACT_downloader/postgresql/data
|
||||
@ -0,0 +1,20 @@
|
||||
from drugtools import env_setup
|
||||
from drugtools import historical_trial_selector as hts
|
||||
from drugtools import historical_nct_downloader as hnd
|
||||
from drugtools import historical_nct_extractor as hne
|
||||
from drugtools import download_and_extract_nsde as daen
|
||||
from drugtools import migrate_mysql2pgsql as mm2p
|
||||
|
||||
print("Current Environment")
|
||||
print(env_setup.ENV)
|
||||
|
||||
cont = input("Are you willing to continue with the current environmnet? y/[n]")
|
||||
|
||||
if cont == "Y" or cont == "y":
|
||||
hts.run()
|
||||
hnd.run()
|
||||
hne.run()
|
||||
daen.run()
|
||||
mm2p.run()
|
||||
else:
|
||||
print("Please fix your .env file and try again")
|
||||
Loading…
Reference in New Issue