import pandas as pd import numpy as np IHME_COD_FILEPATH = "./IHME_GBD_2019_COD_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX" IHME_NONFATAL_FILEPATH = "./IHME_GBD_2019_NONFATAL_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX" ICD10CM_ORDER_FILEPATH = "./icd10-2019_categories_only.psv" #READ in ICD10CM codes icd10_codes = {} with open(ICD10CM_ORDER_FILEPATH,"r") as icd_fh: for idx,line in enumerate(icd_fh.readlines()): code, descr = line.split("|") code = code.strip() descr = descr.strip() icd10_codes[code] = (idx,descr) cod = pd.read_excel(IHME_COD_FILEPATH,header=1) print(cod.columns) itt = 0 for row in cod.itertuples(): cause = row[1] codes = row[2] codes = "" if type(codes) is float else codes #normalize codes to string... codes = [x.strip().replace('.','').split('-') for x in codes.split(",")] print(cause) #print(codes) for rng in codes: begin = rng[0] end = rng[1] if len(rng) > 1 else rng[0] print(rng, icd10_codes.get(begin,None),icd10_codes.get(end,None)) itt += 1 if itt > 5: break cod = pd.read_excel(IHME_NONFATAL_FILEPATH,header=1) print(cod.columns) itt = 0 for row in cod.itertuples(): print(row) cause = row[2] codes = row[3] codes = "" if type(codes) is float else codes #normalize codes to string... codes = [x.strip().replace('.','').split('-') for x in codes.split(",")] print(cause) print(codes) for rng in codes: begin = rng[0] end = rng[1] if len(rng) > 1 else rng[0] print(rng, icd10_codes.get(begin,None),icd10_codes.get(end,None)) itt += 1 if itt > 5: break