adding various changed for which I don't remember the details. I'm adding it to avoid loosing it.

llm-extraction
youainti 3 years ago
parent 4831864805
commit bbdd7552a8

@ -1,24 +0,0 @@
This data was obtained by opening each nested portion on the
left navigation bar at
https://icd.who.int/browse10/2019/en
and then copying and pasting the data into a text file (icd10-2019.txt).
This text file was then adjusted to get the pipe-separated values version
which
has the following columns
- code: the icd-10 code in a normalized format
- description: The basic description given
- source: This just says WHO so that it is possible to merge it with other
sources.
The adjustments were as follows (parenthases include vim search and replace
commands used):
- delete tabls (:%s/\t//g)
- delete leading spaces (:%s/^\s//)
- remove excess newlines (:%s/^\n//)
- remove periods in codes (:%s/\.//)
- Convert to Pipe-separated values file (:%s/\s/ | /)
- add column of sources (:%s/\s*$/ | WHO)
- Type in column headers

File diff suppressed because it is too large Load Diff

@ -1,2 +0,0 @@
#!/bin/bash
wget --post-data="postgres complete:$(date)" -qO- https://ntfy.sh/$NTFY > /dev/null

@ -1,6 +0,0 @@
#!/bin/bash
#install wget
apt update
apt install -y wget
#send notification
wget --post-data="mariadb complete:$(date)" -qO- https://ntfy.sh/$NTFY > /dev/null

@ -26,7 +26,7 @@ services:
- ./AACT_downloader/aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp - ./AACT_downloader/aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
# this is the folder containing entrypoint info. # this is the folder containing entrypoint info.
- ./AACT_downloader/docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/ - ./AACT_downloader/docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
shm-size: 1g shm_size: 512mb
rxnav-db: rxnav-db:

@ -20,34 +20,71 @@
*/ */
---assoicate ingredients, brands, and approved packaging RXCUIs. ---assoicate ingredients, brands, and approved packaging RXCUIs.
create temp table trial_to_pack_rxcui as create temp table tmp_trial_to_ingred as
with trialncts as ( with trialncts as (
SELECT DISTINCT nct_id FROM history.trial_snapshots TS SELECT DISTINCT nct_id FROM history.trial_snapshots TS
) )
SELECT SELECT
bi.nct_id , bi.nct_id ,
bi.downcase_mesh_term, bi.downcase_mesh_term,
rp.rxcui AS drug_rxcui, rp.rxcui AS drug_rxcui
rn.tty1,
rn.rxcui1 as ingredient_rxcui, --ingredients
rn.tty2 ,
rn.rxcui2 as brand_or_pack_rxcui --brand or pack
FROM ctgov.browse_interventions bi FROM ctgov.browse_interventions bi
LEFT OUTER JOIN rxnorm_migrated.rxnorm_props AS rp INNER JOIN rxnorm_migrated.rxnorm_props AS rp
on bi.downcase_mesh_term = rp.propvalue1 --Link drug ingredient on bi.downcase_mesh_term = rp.propvalue1 --Link drug ingredient
LEFT OUTER JOIN rxnorm_migrated.rxnorm_relations AS rn
on rp.rxcui = rn.rxcui1 --Grab brand names
WHERE WHERE
bi.nct_id in ( bi.nct_id in (
SELECT nct_id FROM trialncts SELECT nct_id FROM trialncts
) )
and and
bi.mesh_type='mesh-list' bi.mesh_type='mesh-list'
and
rn.tty1 in ('IN','MIN')
and
rn.tty2 = 'BN'
; ;
--running out of space.
-- get list of interventions assoicated with trials of interest
create temp table tmp_interventions as
select * from ctgov.browse_interventions bi
where
bi.mesh_type ='mesh-list'
and
bi.nct_id in (select distinct nct_id from history.trial_snapshots)
;
select * from tmp_interventions;
--drop table tmp_join_interv_rxcui;
create temp table tmp_join_interv_rxcui as
select *
from
tmp_interventions tint
inner join
rxnorm_migrated.rxnorm_props rp
on tint.downcase_mesh_term = rp.propvalue1
where propname='RxNorm Name'
;-- get the rxcui for ingredients
select * from tmp_join_interv_rxcui;
--filter rxcui -> is human prescribable
create temp view tmp_view_prescribable as
select count(*) from rxnorm_migrated.rxnorm_props rp
where
rp.propname = 'PRESCRIBABLE'
and
rp.propvalue1 = 'Y'
;
--link prescribable to brand ingredients or brand names.
--get relationships of IN -> BN
select *
from
rxnorm_migrated.rxnorm_relations rr
where
rr.tty1 in ('IN','MIN')
and rr.rxcui1 in (select distinct rxcui from tmp_join_interv_rxcui tjir)
and rr.tty2 = 'BN'
;
--link brand names to drug applications (NDA/ANDA etc) --link brand names to drug applications (NDA/ANDA etc)

@ -1,5 +1,6 @@
import pymysql import pymysql
import psycopg2 as psyco import psycopg2 as psyco
from psycopg2.sql import SQL
from dotenv import dotenv_values from dotenv import dotenv_values
env_path = "../containers/.env" env_path = "../containers/.env"
@ -28,3 +29,15 @@ def postgres_conn(**kwargs):
def get_tables_of_interest(): def get_tables_of_interest():
return ENV["TABLES_OF_INTEREST"].split(",") return ENV["TABLES_OF_INTEREST"].split(",")
def postgres_table_delete_entries(schema,table):
with postgres_conn() as con:
with con.cursor() as curse:
delete_statement = SQL("delete from {schema}.{table}").format(
schema=Identifier(schema),
talbe=Identifier(table)
)
curse.execute(delete_statement)
con.commit()

@ -1,4 +1,4 @@
from drugtools.env_setup import postgres_conn from .env_setup import postgres_conn
from pathlib import Path from pathlib import Path

@ -4,7 +4,7 @@ from psycopg2 import extras
import pymysql import pymysql
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
from drugtools.env_setup import postgres_conn, mariadb_conn, get_tables_of_interest from .env_setup import postgres_conn, mariadb_conn, get_tables_of_interest
##############NOTE ##############NOTE

@ -0,0 +1,5 @@
#!/bin/bash
rm -r ../containers/RxNav-In-a-box/rxnav_data/*
rm -r ../containers/AACT_downloader/postgresql/data

@ -0,0 +1,20 @@
from drugtools import env_setup
from drugtools import historical_trial_selector as hts
from drugtools import historical_nct_downloader as hnd
from drugtools import historical_nct_extractor as hne
from drugtools import download_and_extract_nsde as daen
from drugtools import migrate_mysql2pgsql as mm2p
print("Current Environment")
print(env_setup.ENV)
cont = input("Are you willing to continue with the current environmnet? y/[n]")
if cont == "Y" or cont == "y":
hts.run()
hnd.run()
hne.run()
daen.run()
mm2p.run()
else:
print("Please fix your .env file and try again")
Loading…
Cancel
Save