Untested but reorganized docker setup

4 years ago · 0bd7965112
parent 9a47199728
commit 0bd7965112
9 changed files with 170 additions and 50 deletions
--- a/.gitignore
+++ b/.gitignore
@ -180,4 +180,5 @@ Manifest.toml
 ###### Custom #####
-**/host_data/*_clinical_trials/
+*_clinical_trials/
 *_clinical_trials.zip
--- a/DockerContainers/ClinicalTrialHistory/Dockerfile
+++ b/DockerContainers/ClinicalTrialHistory/Dockerfile
@ -0,0 +1,7 @@
 FROM  youainti/aact_from_dump
 LABEL AUTHOR 'Will King (youainti@protonmail.com)'
 LABEL DESCRIPTION 'add extra processing to the aact database in preparation for downloading history.'
 #copy additional init scripts
 COPY ./docker-entrypoint-initdb.d/ /docker-entrypoint-initdb.d/
 #these will be run after the database is initialized
--- a/DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/020_HttpSchema.sql
+++ b/DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/020_HttpSchema.sql
@ -0,0 +1,41 @@
 CREATE SCHEMA http;
 /*
 The purpose of this schema, tables, and associated roles to process HTTP responses.
 I may even include a table to keep track of the XML responses
 */
 /*
 Add a role to manage permissions on the http schema
 */
 CREATE ROLE http_requestor;
 GRANT CONNECT ON DATABASE aact_db to http_requestor;
 GRANT USAGE ON SCHEMA http TO http_requestor;
 GRANT INSERT,SELECT ON ALL TABLES IN SCHEMA http TO http_requestor;
 /* Create tables related to http requests
 As not every request will have an xml doc, split them.
 */
 CREATE TABLE IF NOT EXISTS http.responses (
    id SERIAL PRIMARY KEY, 
    nct VARCHAR(15),
    version SMALLINT
    url VARCHAR(255),
    response_code SMALLINT,
    response_date DATE
    );
 CREATE TABLE IF NOT EXISTS http.xml_documents (
    id SERIAL PRIMARY KEY,
    xml XML,
    CONSTRAINT http_response 
        FOREIGN KEY (id) 
        REFERENCES http.responses (id)
        ON DELETE CASCADE --remove xml if the request is deleted
 );
--- a/DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/030_HistoricalSchema.sql
+++ b/DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/030_HistoricalSchema.sql
@ -0,0 +1,26 @@
 -- Create a schema handling trial history.
 CREATE SCHEMA history;
 --Create role for anyone who needs to both select and insert on historical data
 CREATE ROLE history_writer;
 GRANT CONNECT ON DATABASE aact_db to history_writer;
 GRANT USAGE ON SCHEMA history TO history_writer;
 GRANT INSERT,SELECT ON ALL TABLES IN SCHEMA http TO history_writer;
 --Create role for anyone who only needs selection access to historical data, such as for analysis
 CREATE ROLE history_reader;
 GRANT CONNECT ON DATABASE aact_db to history_reader;
 GRANT USAGE ON SCHEMA history TO history_reader;
 GRANT SELECT ON ALL TABLES IN SCHEMA http TO history_reader;
 /* History Tables
 Below is where I would construct the parsed trial history tables that I need.
 */
--- a/DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/090_AnalysisViews.sql
+++ b/DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/090_AnalysisViews.sql
@ -0,0 +1 @@
 --Eventually this should let me add necessary views during the construction of the DB.
--- a/DockerContainers/docker-compose.yaml
+++ b/DockerContainers/docker-compose.yaml
@ -0,0 +1,37 @@
 version: '3'
 volumes:
  aact_pg_database: #This is to hold the database.
 services:
  aact:
    build: ./ClinicalTrialHistory #build and use the clinical trial history db.
    container_name: aact_db
    #restart: always #restart after crashes
    environment:
      POSTGRES_USER: root
      POSTGRES_PASSWORD: root
      POSTGRES_DB: aact_db
    ports:
      - "5432:5432" #host:container
    volumes: #host:container is the format.
      - aact_pg_database:/var/lib/postgresql/ # this is persistant storage for the database 
      - ./20220201_clinical_trials/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
  pgadmin:
    container_name: pgadmin4_webservice
    image: dpage/pgadmin4
    #restart: always
    environment:
      PGADMIN_DEFAULT_EMAIL: admin@admin.com
      PGADMIN_DEFAULT_PASSWORD: root
    ports:
      - "5050:80"
    volumes: #host:container is the format.
      #The volume with server login information.
      - ./pgadmin4/servers.json:/pgadmin4/servers.json
 #Checklist for production
 # uncomment restart: always in both services.
 # add a python environment to run data collection etc
--- a/DockerContainers/downloader/db_connection.py
+++ b/DockerContainers/downloader/db_connection.py
--- a/PostgressDocker/docker-compose.yaml
+++ b/PostgressDocker/docker-compose.yaml
@ -1,49 +0,0 @@
 version: '3'
 volumes:
  aact_pg_database: #This is to hold the database.
  aact_helpful_files: #This is to hold files that need accessed by both pgadmin and postgres. I honestly expect it to usually be empty.
    driver: local
    driver_opts:
      type: 'none'
      o: 'bind'
      device: /home/will/research/ClinicalTrialsDataProcessing/PostgressDocker/host_data/
      #change this path to match the path you are holding the dump file in
 services:
  aact:
    container_name: aact_db
    image: postgres
    #restart: always #restart to run things well.
    environment:
      POSTGRES_USER: admin
      POSTGRES_PASSWORD: root
      POSTGRES_DB: aact_db
    ports:
      - "5432:5432" #host:container
    volumes: #host:container is the format.
      #pull in a single file
      #- $HOME/research/ClinicalTrialsDataProcessing/PostgressDocker/infile:/infile
      #add a storage volumes
      - aact_pg_database:/var/lib/postgresql/ # this is the database that persits between uses
      - aact_helpful_files:/mnt/host_data #use :ro # to give read only permisions
    #entrypoint:
      #none yet
      #- echo "test" > /test.touched
  pgadmin:
    container_name: pgadmin4_webservice
    image: dpage/pgadmin4
    #restart: always
    environment:
      PGADMIN_DEFAULT_EMAIL: admin@admin.com
      PGADMIN_DEFAULT_PASSWORD: root
    ports:
      - "5050:80"
    volumes: #host:container is the format.
      #The volume with server login information.
      - ./pgadmin4/servers.json:/pgadmin4/servers.json
 #Checklist for production
 # uncomment restart: always in both services.
 # add a python environment to run data collection etc
--- a/README.md
+++ b/README.md
@ -3,3 +3,59 @@
 This is used to build tools which process and standardize the data.
 More data later.
 # proposed architecture:
 AACT Dockerfile #when built on its own, allows for the creation of the AACT database.
 - Should create appropriate users.
 - 
 ClinicalTrialHistory Dockerfile #inherits from AACT (multistage builds?) and adds initialization steps to the database.
 - adds schemas, tables, and views for  both http requests and parsed history tables.
 - adds connection roles
 - adds connection users
 ClinicalTrialDataProcessing docker-compose.yaml #deploys a ClinicalTrialHistory container, pgadmin4, and eventually the downloading program/environment.
 # Background on Docker
 Docker uses the following flow
 1. configuration using `docker-compose.yaml` or a `Dockerfile`
 2. `docker build .` to generate an image
 3. `docker run xxxxxx` to take the image and create a container.
    - when the container is created, it starts, running commands as configured in the dockerfile.
    - Consequently, the AACT database image when run must initialize the postgres db, then run the initalization details.
    - Here is where bind mounts come into play.
 ## Multistage builds
 https://stackoverflow.com/questions/53659993/docker-multi-stage-how-to-split-up-into-multiple-dockerfiles
 https://docs.docker.com/develop/develop-images/multistage-build/
 Basically
 ## Dockerfile vs docker-compose.yaml
 A `Dockerfile` is used to create images.
 A `docker-compose.yaml` is used to automate the deployment of containers.
 ## Types of storage
 ### COPY/ADD (Dockerfile)
 In a dockerfile, this adds a file permanently to the image.
 This adds files one way to or from the container when initialized.
 ### Volumes (docker-compose.yaml && Dockerfile)
 Useable in both docker-compose and Dockerfile's, this creates a permanent storage. 
 It can be maintained by docker or stored in a particular location.
 Good for longer term storage such as databases.
 ### Bind mounts (docker-compose.yaml)
 Bind mounts are used to make a host filesystem resource
 available
		`@ -0,0 +1 @@`
							`--Eventually this should let me add necessary views during the construction of the DB.`