removed unused files, began adjusting docker-composes so that they will work together.

3 years ago · 8dbf4e8c2e
parent 1a106a553e
commit 8dbf4e8c2e
7 changed files with 43 additions and 109 deletions
--- a/RxMix/testing.py
+++ b/RxMix/testing.py
@ -1,7 +0,0 @@
 import RxMixInABox as rx 
 a = rx.FindRxcuiByString("Levothyroxine")
 print(a)
 c= rx.get_brands_from_ingredients(a[0])
--- a/assorted/examining
+++ b/assorted/examining
@ -1,44 +0,0 @@
 SELECT why_stopped FROM ctgov.studies
 WHERE why_stopped IS NOT NULL
 LIMIT 100;
 SELECT study_type, count(*) from ctgov.studies
 group by study_type;
 SELECT is_fda_regulated_drug, count(*) from ctgov.studies
 GROUP BY is_fda_regulated_drug;
 /*
 Note that there is a decent number of trials that have expanded access
 */
 SELECT 
 	study_type
 	, phase
 	, has_expanded_access
 	, has_dmc
 	, count(*) 
 FROM ctgov.studies
 WHERE 
 	is_fda_regulated_drug is true
 	AND
 	study_type = 'Interventional'
 	AND 
 	start_date > date('2007-01-01')
 group by study_type, phase, has_expanded_access, has_dmc;
 /*
 Find different mesh terms as assigned by clinicaltrials.gov
 */
 select * from ctgov.browse_conditions
 order by nct_id desc,mesh_type
 limit 200;
 select * from ctgov.browse_interventions
 order by nct_id desc
 limit 200;
--- a/classifications/classify_terminations.py
+++ b/classifications/classify_terminations.py
@ -1,48 +0,0 @@
 import psycopg2 as psyco
 import pandas as pd
 import nltk
 from nltk.corpus import stopwords
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.cluster import KMeans
 import re
 def preporcess_text(text):
    text = text.lower()
    text = re.sub("[^A-Za-z]+", " ", text)
    #make tokens
    tokens = nltk.word_tokenize(text)
    #remove stopwords
    tokens = [ w for w in tokens if not w in stopwords.words("english")]
    #rejoin
    return " ".join(tokens).strip()
 if __name__ == "__main__":
    conn = psyco.connect(dbname="aact_db", user="analysis", host="localhost", password="test")
    curse = conn.cursor()
    curse.execute("SELECT why_stopped FROM ctgov.studies WHERE why_stopped IS NOT NULL LIMIT 2000;")
    results = curse.fetchall()
    curse.close()
    conn.close()
    data = pd.DataFrame(results, columns = ["corpus"])
    data["cleaned"] = data.corpus.apply(preporcess_text)
    vectorizer = TfidfVectorizer(sublinear_tf=True)
    X = vectorizer.fit_transform(data.cleaned)
    kmeans = KMeans(n_clusters=10, random_state=11021585)
    kmeans.fit(X)
    data["cluster"] = kmeans.labels_
    print(data.groupby(["cluster"])["cleaned"].count())
--- a/classifications/readme.md
+++ b/classifications/readme.md
@ -1 +0,0 @@
 I believe this is for a ml classification or reasons for terminations.
--- a/containers/RxNav-In-a-box/docker-compose.yml
+++ b/containers/RxNav-In-a-box/docker-compose.yml
@ -7,9 +7,9 @@ services:
    expose:
    - "3306"
    volumes:
-      - ./rxnav-in-a-box/mysql:/docker-entrypoint-initdb.d:ro
+      - ./RxNav-In-a-box/rxnav-in-a-box/mysql:/docker-entrypoint-initdb.d:ro
-      - ./rxnav_data:/
+      - ./RxNav-In-a-box/rxnav_data:/
    environment:
      MYSQL_RANDOM_ROOT_PASSWORD: "yes"
-      MYSQL_USER: webuser
+      MYSQL_USER: ${MYSQL_USER}
-      MYSQL_PASSWORD: 9521354c77aa
+      MYSQL_PASSWORD: ${MYSQL_PASSWORD}
--- a/containers/docker-compose.yaml
+++ b/containers/docker-compose.yaml
@ -0,0 +1,39 @@
 version: '3'
 networks:
  pharmaceutical_research: #because it helps to have a way to link specifically to this. 
 services:
  aact_db:
    image: postgres:14-alpine
    networks:
      - pharmaceutical_research
    container_name: aact_db
    #restart: always #restart after crashes
    environment:
      POSTGRES_USER: ${POSTGRES_USER}
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
      POSTGRES_DB: ${POSTGRES_DB}
    ports:
      - "5432:5432" #host:container
    volumes: #host:container is the format.
      # this is persistant storage for the database  
      - ./db_store/:/var/lib/postgresql/
      # this is the database dump to restore from
      - ./aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
      # this is the folder containing entrypoint info.
      - ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
  rxnav-db:
    image: mariadb:10.4
    restart: "no"
    ports:
      - "3306":"3306"
    volumes:
      - ./RxNav-In-a-box/rxnav-in-a-box/mysql:/docker-entrypoint-initdb.d:ro
      - ./RxNav-In-a-box/rxnav_data:/
    environment:
      MYSQL_RANDOM_ROOT_PASSWORD: "yes"
      MYSQL_USER: ${MYSQL_USER}
      MYSQL_PASSWORD: ${MYSQL_PASSWORD}
--- a/sample.env
+++ b/sample.env
@ -1,5 +0,0 @@
 #Env file for setting up db and 
 DBNAME="aact_db"
 DBUSER="root"
 DBHOST="localhost"
 DBPASS="root"
		`@ -1 +0,0 @@`
			`I believe this is for a ml classification or reasons for terminations.`