import pandas as pd import numpy as np import itertools IHME_COD_FILEPATH = "./GlobalBurdenDisease/IHME_GBD_2019_COD_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX" IHME_NONFATAL_FILEPATH = "./GlobalBurdenDisease/IHME_GBD_2019_NONFATAL_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX" ICD10CM_ORDER_FILEPATH = "./icd10_combined-who-cms.psv" def justify(string): ''' The purpose of this is to transform codes such as A00 and A000 into a normalized, sortable format e.g. 'A00----' and 'A000---' ''' return string.ljust(7,"-") class CodeRange(): def __init__(self,cause,code_book,codes): self.cause = cause self.code_book = code_book self.code_list = [] codes = "" if type(codes) is float else codes #normalize codes to string... codes = [x.strip().replace('.','').split('-') for x in codes.split(",")] for rng in codes: if rng[0] is None: raise Exception("Listed ICD10 Code (Begin:{}) is not in codebook".format(rng), rng) #lookup codes if len(rng) == 1: begin = justify(rng[0]) if self.code_book.get(begin) is None: continue else: self.code_list.append(begin) else: begin = justify(rng[0]) end = justify(rng[1]) begin_bitmask = [x >= begin for x in list(self.code_book)] end_bitmask = [x <= end for x in list(self.code_book)] bitmask = [x and y for x,y in zip(begin_bitmask,end_bitmask)] self.code_list.extend(list(itertools.compress(list(self.code_book),bitmask))) def __str__(self): txt = '' for item in self.code_list: txt += "{} | {}\n".format(item, self.cause) return txt #READ in ICD10CM codes icd10_codes = {} with open(ICD10CM_ORDER_FILEPATH,"r") as icd_fh: for idx,line in enumerate(icd_fh.readlines()): #read info code, descr, source = line.split("|") #cleanup info code = justify(code.strip()) descr = descr.strip() source = source.strip() #Store in code dict icd10_codes[code] = (idx,descr, source) cod = pd.read_excel(IHME_COD_FILEPATH,header=1) with open("COD_cause2code.psv", "w") as outfh: itt = 0 for row in cod.itertuples(): cause = row[1] codes = row[2] c = CodeRange(cause,icd10_codes,codes) outfh.write(c.__str__()) nonfatal = pd.read_excel(IHME_NONFATAL_FILEPATH,header=1) with open("NONFATAL_cause2code.psv", "w") as outfh: itt = 0 for row in nonfatal.itertuples(): cause = row[2] codes= row[3] c = CodeRange(cause,icd10_codes,codes) outfh.write(c.__str__())