updated import-icd10... to use execute_values to speed up insert.

llm-extraction
will king 3 years ago
parent 9ac4cffe61
commit 47996ba607

@ -1,6 +1,7 @@
from drugtools.env_setup import ENV,postgres_conn from drugtools.env_setup import ENV,postgres_conn
from psycopg2 import extras from psycopg2 import extras
from collections import namedtuple from collections import namedtuple
from tqdm import tqdm
FILES=[ FILES=[
@ -12,21 +13,24 @@ SEP="|"
sql = """ sql = """
INSERT INTO "DiseaseBurden".icd10_to_cause INSERT INTO "DiseaseBurden".icd10_to_cause
(code,cause_text) (code,cause_text)
VALUES VALUES %s
(%(code)s,%(cause)s)
""" """
#read in files
#adjust codes?
#load files to table
with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse: with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse:
entries = []
for fpath in FILES: for fpath in FILES:
print(fpath)
with open(fpath,"r") as fh: with open(fpath,"r") as fh:
for line in fh.readlines(): for line in tqdm(fh.readlines(),desc=fpath):
code,cause = line.split(SEP) code,cause = line.split(SEP)
code = code.strip() code = code.strip()
cause = cause.strip() cause = cause.strip()
pcurse.execute(sql,{"code":code,"cause":cause}) entries.append((code,cause))
extras.execute_values(pcurse, sql , entries)

Loading…
Cancel
Save