removed unused files, began adjusting docker-composes so that they will work together.

llm-extraction
youainti 3 years ago
parent 1a106a553e
commit 8dbf4e8c2e

@ -1,7 +0,0 @@
import RxMixInABox as rx
a = rx.FindRxcuiByString("Levothyroxine")
print(a)
c= rx.get_brands_from_ingredients(a[0])

@ -1,44 +0,0 @@
SELECT why_stopped FROM ctgov.studies
WHERE why_stopped IS NOT NULL
LIMIT 100;
SELECT study_type, count(*) from ctgov.studies
group by study_type;
SELECT is_fda_regulated_drug, count(*) from ctgov.studies
GROUP BY is_fda_regulated_drug;
/*
Note that there is a decent number of trials that have expanded access
*/
SELECT
study_type
, phase
, has_expanded_access
, has_dmc
, count(*)
FROM ctgov.studies
WHERE
is_fda_regulated_drug is true
AND
study_type = 'Interventional'
AND
start_date > date('2007-01-01')
group by study_type, phase, has_expanded_access, has_dmc;
/*
Find different mesh terms as assigned by clinicaltrials.gov
*/
select * from ctgov.browse_conditions
order by nct_id desc,mesh_type
limit 200;
select * from ctgov.browse_interventions
order by nct_id desc
limit 200;

@ -1,48 +0,0 @@
import psycopg2 as psyco
import pandas as pd
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
import re
def preporcess_text(text):
text = text.lower()
text = re.sub("[^A-Za-z]+", " ", text)
#make tokens
tokens = nltk.word_tokenize(text)
#remove stopwords
tokens = [ w for w in tokens if not w in stopwords.words("english")]
#rejoin
return " ".join(tokens).strip()
if __name__ == "__main__":
conn = psyco.connect(dbname="aact_db", user="analysis", host="localhost", password="test")
curse = conn.cursor()
curse.execute("SELECT why_stopped FROM ctgov.studies WHERE why_stopped IS NOT NULL LIMIT 2000;")
results = curse.fetchall()
curse.close()
conn.close()
data = pd.DataFrame(results, columns = ["corpus"])
data["cleaned"] = data.corpus.apply(preporcess_text)
vectorizer = TfidfVectorizer(sublinear_tf=True)
X = vectorizer.fit_transform(data.cleaned)
kmeans = KMeans(n_clusters=10, random_state=11021585)
kmeans.fit(X)
data["cluster"] = kmeans.labels_
print(data.groupby(["cluster"])["cleaned"].count())

@ -1 +0,0 @@
I believe this is for a ml classification or reasons for terminations.

@ -7,9 +7,9 @@ services:
expose: expose:
- "3306" - "3306"
volumes: volumes:
- ./rxnav-in-a-box/mysql:/docker-entrypoint-initdb.d:ro - ./RxNav-In-a-box/rxnav-in-a-box/mysql:/docker-entrypoint-initdb.d:ro
- ./rxnav_data:/ - ./RxNav-In-a-box/rxnav_data:/
environment: environment:
MYSQL_RANDOM_ROOT_PASSWORD: "yes" MYSQL_RANDOM_ROOT_PASSWORD: "yes"
MYSQL_USER: webuser MYSQL_USER: ${MYSQL_USER}
MYSQL_PASSWORD: 9521354c77aa MYSQL_PASSWORD: ${MYSQL_PASSWORD}

@ -0,0 +1,39 @@
version: '3'
networks:
pharmaceutical_research: #because it helps to have a way to link specifically to this.
services:
aact_db:
image: postgres:14-alpine
networks:
- pharmaceutical_research
container_name: aact_db
#restart: always #restart after crashes
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
ports:
- "5432:5432" #host:container
volumes: #host:container is the format.
# this is persistant storage for the database
- ./db_store/:/var/lib/postgresql/
# this is the database dump to restore from
- ./aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
# this is the folder containing entrypoint info.
- ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
rxnav-db:
image: mariadb:10.4
restart: "no"
ports:
- "3306":"3306"
volumes:
- ./RxNav-In-a-box/rxnav-in-a-box/mysql:/docker-entrypoint-initdb.d:ro
- ./RxNav-In-a-box/rxnav_data:/
environment:
MYSQL_RANDOM_ROOT_PASSWORD: "yes"
MYSQL_USER: ${MYSQL_USER}
MYSQL_PASSWORD: ${MYSQL_PASSWORD}

@ -1,5 +0,0 @@
#Env file for setting up db and
DBNAME="aact_db"
DBUSER="root"
DBHOST="localhost"
DBPASS="root"
Loading…
Cancel
Save