removed unused files, began adjusting docker-composes so that they will work together.
parent
1a106a553e
commit
8dbf4e8c2e
@ -1,7 +0,0 @@
|
|||||||
import RxMixInABox as rx
|
|
||||||
a = rx.FindRxcuiByString("Levothyroxine")
|
|
||||||
print(a)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
c= rx.get_brands_from_ingredients(a[0])
|
|
||||||
@ -1,44 +0,0 @@
|
|||||||
SELECT why_stopped FROM ctgov.studies
|
|
||||||
WHERE why_stopped IS NOT NULL
|
|
||||||
LIMIT 100;
|
|
||||||
|
|
||||||
SELECT study_type, count(*) from ctgov.studies
|
|
||||||
group by study_type;
|
|
||||||
|
|
||||||
SELECT is_fda_regulated_drug, count(*) from ctgov.studies
|
|
||||||
GROUP BY is_fda_regulated_drug;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
Note that there is a decent number of trials that have expanded access
|
|
||||||
*/
|
|
||||||
SELECT
|
|
||||||
study_type
|
|
||||||
, phase
|
|
||||||
, has_expanded_access
|
|
||||||
, has_dmc
|
|
||||||
, count(*)
|
|
||||||
FROM ctgov.studies
|
|
||||||
WHERE
|
|
||||||
is_fda_regulated_drug is true
|
|
||||||
AND
|
|
||||||
study_type = 'Interventional'
|
|
||||||
AND
|
|
||||||
start_date > date('2007-01-01')
|
|
||||||
group by study_type, phase, has_expanded_access, has_dmc;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
Find different mesh terms as assigned by clinicaltrials.gov
|
|
||||||
*/
|
|
||||||
select * from ctgov.browse_conditions
|
|
||||||
order by nct_id desc,mesh_type
|
|
||||||
limit 200;
|
|
||||||
|
|
||||||
select * from ctgov.browse_interventions
|
|
||||||
order by nct_id desc
|
|
||||||
limit 200;
|
|
||||||
|
|
||||||
@ -1,48 +0,0 @@
|
|||||||
import psycopg2 as psyco
|
|
||||||
import pandas as pd
|
|
||||||
import nltk
|
|
||||||
from nltk.corpus import stopwords
|
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
||||||
from sklearn.cluster import KMeans
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
def preporcess_text(text):
|
|
||||||
|
|
||||||
text = text.lower()
|
|
||||||
text = re.sub("[^A-Za-z]+", " ", text)
|
|
||||||
#make tokens
|
|
||||||
tokens = nltk.word_tokenize(text)
|
|
||||||
|
|
||||||
#remove stopwords
|
|
||||||
tokens = [ w for w in tokens if not w in stopwords.words("english")]
|
|
||||||
|
|
||||||
#rejoin
|
|
||||||
return " ".join(tokens).strip()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
conn = psyco.connect(dbname="aact_db", user="analysis", host="localhost", password="test")
|
|
||||||
|
|
||||||
curse = conn.cursor()
|
|
||||||
|
|
||||||
curse.execute("SELECT why_stopped FROM ctgov.studies WHERE why_stopped IS NOT NULL LIMIT 2000;")
|
|
||||||
results = curse.fetchall()
|
|
||||||
|
|
||||||
curse.close()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
data = pd.DataFrame(results, columns = ["corpus"])
|
|
||||||
data["cleaned"] = data.corpus.apply(preporcess_text)
|
|
||||||
|
|
||||||
vectorizer = TfidfVectorizer(sublinear_tf=True)
|
|
||||||
|
|
||||||
X = vectorizer.fit_transform(data.cleaned)
|
|
||||||
|
|
||||||
kmeans = KMeans(n_clusters=10, random_state=11021585)
|
|
||||||
kmeans.fit(X)
|
|
||||||
|
|
||||||
data["cluster"] = kmeans.labels_
|
|
||||||
|
|
||||||
print(data.groupby(["cluster"])["cleaned"].count())
|
|
||||||
|
|
||||||
|
|
||||||
@ -1 +0,0 @@
|
|||||||
I believe this is for a ml classification or reasons for terminations.
|
|
||||||
@ -0,0 +1,39 @@
|
|||||||
|
version: '3'
|
||||||
|
|
||||||
|
networks:
|
||||||
|
pharmaceutical_research: #because it helps to have a way to link specifically to this.
|
||||||
|
|
||||||
|
services:
|
||||||
|
aact_db:
|
||||||
|
image: postgres:14-alpine
|
||||||
|
networks:
|
||||||
|
- pharmaceutical_research
|
||||||
|
container_name: aact_db
|
||||||
|
#restart: always #restart after crashes
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB}
|
||||||
|
ports:
|
||||||
|
- "5432:5432" #host:container
|
||||||
|
volumes: #host:container is the format.
|
||||||
|
# this is persistant storage for the database
|
||||||
|
- ./db_store/:/var/lib/postgresql/
|
||||||
|
# this is the database dump to restore from
|
||||||
|
- ./aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
|
||||||
|
# this is the folder containing entrypoint info.
|
||||||
|
- ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
|
||||||
|
|
||||||
|
|
||||||
|
rxnav-db:
|
||||||
|
image: mariadb:10.4
|
||||||
|
restart: "no"
|
||||||
|
ports:
|
||||||
|
- "3306":"3306"
|
||||||
|
volumes:
|
||||||
|
- ./RxNav-In-a-box/rxnav-in-a-box/mysql:/docker-entrypoint-initdb.d:ro
|
||||||
|
- ./RxNav-In-a-box/rxnav_data:/
|
||||||
|
environment:
|
||||||
|
MYSQL_RANDOM_ROOT_PASSWORD: "yes"
|
||||||
|
MYSQL_USER: ${MYSQL_USER}
|
||||||
|
MYSQL_PASSWORD: ${MYSQL_PASSWORD}
|
||||||
@ -1,5 +0,0 @@
|
|||||||
#Env file for setting up db and
|
|
||||||
DBNAME="aact_db"
|
|
||||||
DBUSER="root"
|
|
||||||
DBHOST="localhost"
|
|
||||||
DBPASS="root"
|
|
||||||
Loading…
Reference in New Issue