Untested but reorganized docker setup

history-download
will king 4 years ago
parent 9a47199728
commit 0bd7965112

3
.gitignore vendored

@ -180,4 +180,5 @@ Manifest.toml
###### Custom ##### ###### Custom #####
**/host_data/*_clinical_trials/ *_clinical_trials/
*_clinical_trials.zip

@ -0,0 +1,7 @@
FROM youainti/aact_from_dump
LABEL AUTHOR 'Will King (youainti@protonmail.com)'
LABEL DESCRIPTION 'add extra processing to the aact database in preparation for downloading history.'
#copy additional init scripts
COPY ./docker-entrypoint-initdb.d/ /docker-entrypoint-initdb.d/
#these will be run after the database is initialized

@ -0,0 +1,41 @@
CREATE SCHEMA http;
/*
The purpose of this schema, tables, and associated roles to process HTTP responses.
I may even include a table to keep track of the XML responses
*/
/*
Add a role to manage permissions on the http schema
*/
CREATE ROLE http_requestor;
GRANT CONNECT ON DATABASE aact_db to http_requestor;
GRANT USAGE ON SCHEMA http TO http_requestor;
GRANT INSERT,SELECT ON ALL TABLES IN SCHEMA http TO http_requestor;
/* Create tables related to http requests
As not every request will have an xml doc, split them.
*/
CREATE TABLE IF NOT EXISTS http.responses (
id SERIAL PRIMARY KEY,
nct VARCHAR(15),
version SMALLINT
url VARCHAR(255),
response_code SMALLINT,
response_date DATE
);
CREATE TABLE IF NOT EXISTS http.xml_documents (
id SERIAL PRIMARY KEY,
xml XML,
CONSTRAINT http_response
FOREIGN KEY (id)
REFERENCES http.responses (id)
ON DELETE CASCADE --remove xml if the request is deleted
);

@ -0,0 +1,26 @@
-- Create a schema handling trial history.
CREATE SCHEMA history;
--Create role for anyone who needs to both select and insert on historical data
CREATE ROLE history_writer;
GRANT CONNECT ON DATABASE aact_db to history_writer;
GRANT USAGE ON SCHEMA history TO history_writer;
GRANT INSERT,SELECT ON ALL TABLES IN SCHEMA http TO history_writer;
--Create role for anyone who only needs selection access to historical data, such as for analysis
CREATE ROLE history_reader;
GRANT CONNECT ON DATABASE aact_db to history_reader;
GRANT USAGE ON SCHEMA history TO history_reader;
GRANT SELECT ON ALL TABLES IN SCHEMA http TO history_reader;
/* History Tables
Below is where I would construct the parsed trial history tables that I need.
*/

@ -0,0 +1 @@
--Eventually this should let me add necessary views during the construction of the DB.

@ -0,0 +1,37 @@
version: '3'
volumes:
aact_pg_database: #This is to hold the database.
services:
aact:
build: ./ClinicalTrialHistory #build and use the clinical trial history db.
container_name: aact_db
#restart: always #restart after crashes
environment:
POSTGRES_USER: root
POSTGRES_PASSWORD: root
POSTGRES_DB: aact_db
ports:
- "5432:5432" #host:container
volumes: #host:container is the format.
- aact_pg_database:/var/lib/postgresql/ # this is persistant storage for the database
- ./20220201_clinical_trials/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
pgadmin:
container_name: pgadmin4_webservice
image: dpage/pgadmin4
#restart: always
environment:
PGADMIN_DEFAULT_EMAIL: admin@admin.com
PGADMIN_DEFAULT_PASSWORD: root
ports:
- "5050:80"
volumes: #host:container is the format.
#The volume with server login information.
- ./pgadmin4/servers.json:/pgadmin4/servers.json
#Checklist for production
# uncomment restart: always in both services.
# add a python environment to run data collection etc

@ -1,49 +0,0 @@
version: '3'
volumes:
aact_pg_database: #This is to hold the database.
aact_helpful_files: #This is to hold files that need accessed by both pgadmin and postgres. I honestly expect it to usually be empty.
driver: local
driver_opts:
type: 'none'
o: 'bind'
device: /home/will/research/ClinicalTrialsDataProcessing/PostgressDocker/host_data/
#change this path to match the path you are holding the dump file in
services:
aact:
container_name: aact_db
image: postgres
#restart: always #restart to run things well.
environment:
POSTGRES_USER: admin
POSTGRES_PASSWORD: root
POSTGRES_DB: aact_db
ports:
- "5432:5432" #host:container
volumes: #host:container is the format.
#pull in a single file
#- $HOME/research/ClinicalTrialsDataProcessing/PostgressDocker/infile:/infile
#add a storage volumes
- aact_pg_database:/var/lib/postgresql/ # this is the database that persits between uses
- aact_helpful_files:/mnt/host_data #use :ro # to give read only permisions
#entrypoint:
#none yet
#- echo "test" > /test.touched
pgadmin:
container_name: pgadmin4_webservice
image: dpage/pgadmin4
#restart: always
environment:
PGADMIN_DEFAULT_EMAIL: admin@admin.com
PGADMIN_DEFAULT_PASSWORD: root
ports:
- "5050:80"
volumes: #host:container is the format.
#The volume with server login information.
- ./pgadmin4/servers.json:/pgadmin4/servers.json
#Checklist for production
# uncomment restart: always in both services.
# add a python environment to run data collection etc

@ -3,3 +3,59 @@
This is used to build tools which process and standardize the data. This is used to build tools which process and standardize the data.
More data later. More data later.
# proposed architecture:
AACT Dockerfile #when built on its own, allows for the creation of the AACT database.
- Should create appropriate users.
-
ClinicalTrialHistory Dockerfile #inherits from AACT (multistage builds?) and adds initialization steps to the database.
- adds schemas, tables, and views for both http requests and parsed history tables.
- adds connection roles
- adds connection users
ClinicalTrialDataProcessing docker-compose.yaml #deploys a ClinicalTrialHistory container, pgadmin4, and eventually the downloading program/environment.
# Background on Docker
Docker uses the following flow
1. configuration using `docker-compose.yaml` or a `Dockerfile`
2. `docker build .` to generate an image
3. `docker run xxxxxx` to take the image and create a container.
- when the container is created, it starts, running commands as configured in the dockerfile.
- Consequently, the AACT database image when run must initialize the postgres db, then run the initalization details.
- Here is where bind mounts come into play.
## Multistage builds
https://stackoverflow.com/questions/53659993/docker-multi-stage-how-to-split-up-into-multiple-dockerfiles
https://docs.docker.com/develop/develop-images/multistage-build/
Basically
## Dockerfile vs docker-compose.yaml
A `Dockerfile` is used to create images.
A `docker-compose.yaml` is used to automate the deployment of containers.
## Types of storage
### COPY/ADD (Dockerfile)
In a dockerfile, this adds a file permanently to the image.
This adds files one way to or from the container when initialized.
### Volumes (docker-compose.yaml && Dockerfile)
Useable in both docker-compose and Dockerfile's, this creates a permanent storage.
It can be maintained by docker or stored in a particular location.
Good for longer term storage such as databases.
### Bind mounts (docker-compose.yaml)
Bind mounts are used to make a host filesystem resource
available
Loading…
Cancel
Save