You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ClinicalTrialsDataProcessing/non-db_data_sources/GBD and ICD-10_(2019 version)/ICD10-to-GDB_expander.py

73 lines
1.6 KiB
Python

import pandas as pd
import numpy as np
IHME_COD_FILEPATH = "./IHME_GBD_2019_COD_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
IHME_NONFATAL_FILEPATH = "./IHME_GBD_2019_NONFATAL_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
ICD10CM_ORDER_FILEPATH = "./icd10-2019_categories_only.psv"
#READ in ICD10CM codes
icd10_codes = {}
with open(ICD10CM_ORDER_FILEPATH,"r") as icd_fh:
for idx,line in enumerate(icd_fh.readlines()):
code, descr = line.split("|")
code = code.strip()
descr = descr.strip()
icd10_codes[code] = (idx,descr)
cod = pd.read_excel(IHME_COD_FILEPATH,header=1)
print(cod.columns)
itt = 0
for row in cod.itertuples():
cause = row[1]
codes = row[2]
codes = "" if type(codes) is float else codes #normalize codes to string...
codes = [x.strip().replace('.','').split('-') for x in codes.split(",")]
print(cause)
#print(codes)
for rng in codes:
begin = rng[0]
end = rng[1] if len(rng) > 1 else rng[0]
print(rng, icd10_codes.get(begin,None),icd10_codes.get(end,None))
itt += 1
if itt > 5:
break
cod = pd.read_excel(IHME_NONFATAL_FILEPATH,header=1)
print(cod.columns)
itt = 0
for row in cod.itertuples():
print(row)
cause = row[2]
codes = row[3]
codes = "" if type(codes) is float else codes #normalize codes to string...
codes = [x.strip().replace('.','').split('-') for x in codes.split(",")]
print(cause)
print(codes)
for rng in codes:
begin = rng[0]
end = rng[1] if len(rng) > 1 else rng[0]
print(rng, icd10_codes.get(begin,None),icd10_codes.get(end,None))
itt += 1
if itt > 5:
break