|
|
|
@ -1,72 +1,103 @@
|
|
|
|
import pandas as pd
|
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
import itertools
|
|
|
|
|
|
|
|
|
|
|
|
IHME_COD_FILEPATH = "./IHME_GBD_2019_COD_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
|
|
|
|
|
|
|
|
IHME_NONFATAL_FILEPATH = "./IHME_GBD_2019_NONFATAL_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
|
|
|
|
|
|
|
|
|
|
|
|
IHME_COD_FILEPATH = "./GlobalBurdenDisease/IHME_GBD_2019_COD_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
|
|
|
|
|
|
|
|
IHME_NONFATAL_FILEPATH = "./GlobalBurdenDisease/IHME_GBD_2019_NONFATAL_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
|
|
|
|
ICD10CM_ORDER_FILEPATH = "./icd10_combined-who-cms.psv"
|
|
|
|
ICD10CM_ORDER_FILEPATH = "./icd10_combined-who-cms.psv"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def justify(string):
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
The purpose of this is to transform codes such as A00 and A000
|
|
|
|
|
|
|
|
into a normalized, sortable format e.g. 'A00----' and 'A000---'
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
return string.ljust(7,"-")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CodeRange():
|
|
|
|
|
|
|
|
def __init__(self,cause,code_book,codes):
|
|
|
|
|
|
|
|
self.cause = cause
|
|
|
|
|
|
|
|
self.code_book = code_book
|
|
|
|
|
|
|
|
self.code_list = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
codes = "" if type(codes) is float else codes #normalize codes to string...
|
|
|
|
|
|
|
|
codes = [x.strip().replace('.','').split('-') for x in codes.split(",")]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for rng in codes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if rng[0] is None:
|
|
|
|
|
|
|
|
raise Exception("Listed ICD10 Code (Begin:{}) is not in codebook".format(rng), rng)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#lookup codes
|
|
|
|
|
|
|
|
if len(rng) == 1:
|
|
|
|
|
|
|
|
begin = justify(rng[0])
|
|
|
|
|
|
|
|
if self.code_book.get(begin) is None:
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
self.code_list.append(begin)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
begin = justify(rng[0])
|
|
|
|
|
|
|
|
end = justify(rng[1])
|
|
|
|
|
|
|
|
begin_bitmask = [x >= begin for x in list(self.code_book)]
|
|
|
|
|
|
|
|
end_bitmask = [x <= end for x in list(self.code_book)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bitmask = [x and y for x,y in zip(begin_bitmask,end_bitmask)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.code_list.extend(list(itertools.compress(list(self.code_book),bitmask)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
|
|
|
txt = ''
|
|
|
|
|
|
|
|
for item in self.code_list:
|
|
|
|
|
|
|
|
txt += "{} | {}\n".format(item, self.cause)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return txt
|
|
|
|
|
|
|
|
|
|
|
|
#READ in ICD10CM codes
|
|
|
|
#READ in ICD10CM codes
|
|
|
|
|
|
|
|
|
|
|
|
icd10_codes = {}
|
|
|
|
icd10_codes = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with open(ICD10CM_ORDER_FILEPATH,"r") as icd_fh:
|
|
|
|
with open(ICD10CM_ORDER_FILEPATH,"r") as icd_fh:
|
|
|
|
for idx,line in enumerate(icd_fh.readlines()):
|
|
|
|
for idx,line in enumerate(icd_fh.readlines()):
|
|
|
|
code, descr = line.split("|")
|
|
|
|
#read info
|
|
|
|
code = code.strip()
|
|
|
|
code, descr, source = line.split("|")
|
|
|
|
|
|
|
|
#cleanup info
|
|
|
|
|
|
|
|
code = justify(code.strip())
|
|
|
|
descr = descr.strip()
|
|
|
|
descr = descr.strip()
|
|
|
|
icd10_codes[code] = (idx,descr)
|
|
|
|
source = source.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#Store in code dict
|
|
|
|
|
|
|
|
icd10_codes[code] = (idx,descr, source)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cod = pd.read_excel(IHME_COD_FILEPATH,header=1)
|
|
|
|
cod = pd.read_excel(IHME_COD_FILEPATH,header=1)
|
|
|
|
print(cod.columns)
|
|
|
|
|
|
|
|
|
|
|
|
with open("COD_cause2code.psv", "w") as outfh:
|
|
|
|
itt = 0
|
|
|
|
itt = 0
|
|
|
|
for row in cod.itertuples():
|
|
|
|
for row in cod.itertuples():
|
|
|
|
cause = row[1]
|
|
|
|
cause = row[1]
|
|
|
|
codes = row[2]
|
|
|
|
codes = row[2]
|
|
|
|
|
|
|
|
|
|
|
|
codes = "" if type(codes) is float else codes #normalize codes to string...
|
|
|
|
c = CodeRange(cause,icd10_codes,codes)
|
|
|
|
codes = [x.strip().replace('.','').split('-') for x in codes.split(",")]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(cause)
|
|
|
|
|
|
|
|
#print(codes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for rng in codes:
|
|
|
|
|
|
|
|
begin = rng[0]
|
|
|
|
|
|
|
|
end = rng[1] if len(rng) > 1 else rng[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(rng, icd10_codes.get(begin,None),icd10_codes.get(end,None))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outfh.write(c.__str__())
|
|
|
|
|
|
|
|
|
|
|
|
itt += 1
|
|
|
|
|
|
|
|
if itt > 5:
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cod = pd.read_excel(IHME_NONFATAL_FILEPATH,header=1)
|
|
|
|
nonfatal = pd.read_excel(IHME_NONFATAL_FILEPATH,header=1)
|
|
|
|
print(cod.columns)
|
|
|
|
with open("NONFATAL_cause2code.psv", "w") as outfh:
|
|
|
|
itt = 0
|
|
|
|
itt = 0
|
|
|
|
for row in cod.itertuples():
|
|
|
|
for row in nonfatal.itertuples():
|
|
|
|
print(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cause = row[2]
|
|
|
|
cause = row[2]
|
|
|
|
codes= row[3]
|
|
|
|
codes= row[3]
|
|
|
|
|
|
|
|
c = CodeRange(cause,icd10_codes,codes)
|
|
|
|
|
|
|
|
|
|
|
|
codes = "" if type(codes) is float else codes #normalize codes to string...
|
|
|
|
outfh.write(c.__str__())
|
|
|
|
codes = [x.strip().replace('.','').split('-') for x in codes.split(",")]
|
|
|
|
|
|
|
|
print(cause)
|
|
|
|
|
|
|
|
print(codes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for rng in codes:
|
|
|
|
|
|
|
|
begin = rng[0]
|
|
|
|
|
|
|
|
end = rng[1] if len(rng) > 1 else rng[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(rng, icd10_codes.get(begin,None),icd10_codes.get(end,None))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
itt += 1
|
|
|
|
|
|
|
|
if itt > 5:
|
|
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|