diff --git a/RxMix/testing.py b/RxMix/testing.py deleted file mode 100644 index 4432b64..0000000 --- a/RxMix/testing.py +++ /dev/null @@ -1,7 +0,0 @@ -import RxMixInABox as rx -a = rx.FindRxcuiByString("Levothyroxine") -print(a) - - - -c= rx.get_brands_from_ingredients(a[0]) \ No newline at end of file diff --git a/assorted/examining the db.sql b/assorted/examining the db.sql deleted file mode 100644 index 0c3ed1a..0000000 --- a/assorted/examining the db.sql +++ /dev/null @@ -1,44 +0,0 @@ -SELECT why_stopped FROM ctgov.studies -WHERE why_stopped IS NOT NULL -LIMIT 100; - -SELECT study_type, count(*) from ctgov.studies -group by study_type; - -SELECT is_fda_regulated_drug, count(*) from ctgov.studies -GROUP BY is_fda_regulated_drug; - - - -/* -Note that there is a decent number of trials that have expanded access -*/ -SELECT - study_type - , phase - , has_expanded_access - , has_dmc - , count(*) -FROM ctgov.studies -WHERE - is_fda_regulated_drug is true - AND - study_type = 'Interventional' - AND - start_date > date('2007-01-01') -group by study_type, phase, has_expanded_access, has_dmc; - - - - -/* - Find different mesh terms as assigned by clinicaltrials.gov - */ -select * from ctgov.browse_conditions -order by nct_id desc,mesh_type -limit 200; - -select * from ctgov.browse_interventions -order by nct_id desc -limit 200; - diff --git a/classifications/classify_terminations.py b/classifications/classify_terminations.py deleted file mode 100644 index 7984463..0000000 --- a/classifications/classify_terminations.py +++ /dev/null @@ -1,48 +0,0 @@ -import psycopg2 as psyco -import pandas as pd -import nltk -from nltk.corpus import stopwords -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.cluster import KMeans -import re - - -def preporcess_text(text): - - text = text.lower() - text = re.sub("[^A-Za-z]+", " ", text) - #make tokens - tokens = nltk.word_tokenize(text) - - #remove stopwords - tokens = [ w for w in tokens if not w in stopwords.words("english")] - - #rejoin - return " ".join(tokens).strip() - -if __name__ == "__main__": - conn = psyco.connect(dbname="aact_db", user="analysis", host="localhost", password="test") - - curse = conn.cursor() - - curse.execute("SELECT why_stopped FROM ctgov.studies WHERE why_stopped IS NOT NULL LIMIT 2000;") - results = curse.fetchall() - - curse.close() - conn.close() - - data = pd.DataFrame(results, columns = ["corpus"]) - data["cleaned"] = data.corpus.apply(preporcess_text) - - vectorizer = TfidfVectorizer(sublinear_tf=True) - - X = vectorizer.fit_transform(data.cleaned) - - kmeans = KMeans(n_clusters=10, random_state=11021585) - kmeans.fit(X) - - data["cluster"] = kmeans.labels_ - - print(data.groupby(["cluster"])["cleaned"].count()) - - \ No newline at end of file diff --git a/classifications/readme.md b/classifications/readme.md deleted file mode 100644 index 0da5364..0000000 --- a/classifications/readme.md +++ /dev/null @@ -1 +0,0 @@ -I believe this is for a ml classification or reasons for terminations. diff --git a/containers/RxNav-In-a-box/docker-compose.yml b/containers/RxNav-In-a-box/docker-compose.yml index 8ea4830..350e795 100644 --- a/containers/RxNav-In-a-box/docker-compose.yml +++ b/containers/RxNav-In-a-box/docker-compose.yml @@ -7,9 +7,9 @@ services: expose: - "3306" volumes: - - ./rxnav-in-a-box/mysql:/docker-entrypoint-initdb.d:ro - - ./rxnav_data:/ + - ./RxNav-In-a-box/rxnav-in-a-box/mysql:/docker-entrypoint-initdb.d:ro + - ./RxNav-In-a-box/rxnav_data:/ environment: MYSQL_RANDOM_ROOT_PASSWORD: "yes" - MYSQL_USER: webuser - MYSQL_PASSWORD: 9521354c77aa + MYSQL_USER: ${MYSQL_USER} + MYSQL_PASSWORD: ${MYSQL_PASSWORD} diff --git a/containers/docker-compose.yaml b/containers/docker-compose.yaml new file mode 100644 index 0000000..167c877 --- /dev/null +++ b/containers/docker-compose.yaml @@ -0,0 +1,39 @@ +version: '3' + +networks: + pharmaceutical_research: #because it helps to have a way to link specifically to this. + +services: + aact_db: + image: postgres:14-alpine + networks: + - pharmaceutical_research + container_name: aact_db + #restart: always #restart after crashes + environment: + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB} + ports: + - "5432:5432" #host:container + volumes: #host:container is the format. + # this is persistant storage for the database + - ./db_store/:/var/lib/postgresql/ + # this is the database dump to restore from + - ./aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp + # this is the folder containing entrypoint info. + - ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/ + + + rxnav-db: + image: mariadb:10.4 + restart: "no" + ports: + - "3306":"3306" + volumes: + - ./RxNav-In-a-box/rxnav-in-a-box/mysql:/docker-entrypoint-initdb.d:ro + - ./RxNav-In-a-box/rxnav_data:/ + environment: + MYSQL_RANDOM_ROOT_PASSWORD: "yes" + MYSQL_USER: ${MYSQL_USER} + MYSQL_PASSWORD: ${MYSQL_PASSWORD} diff --git a/sample.env b/sample.env deleted file mode 100644 index f5e9d55..0000000 --- a/sample.env +++ /dev/null @@ -1,5 +0,0 @@ -#Env file for setting up db and -DBNAME="aact_db" -DBUSER="root" -DBHOST="localhost" -DBPASS="root"