You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ClinicalTrialsDataProcessing/containers/AACT_downloader/docker-entrypoint-initdb.d/020_HttpSchema.sql

130 lines
3.4 KiB
SQL

CREATE SCHEMA http;
/*
The purpose of this schema, tables, and associated roles to process HTTP responses.
I may even include a table to keep track of the XML responses
*/
/*
Add a role to manage permissions on the http schema
*/
CREATE ROLE http_requestor;
GRANT CONNECT ON DATABASE aact_db to http_requestor;
GRANT USAGE ON SCHEMA http TO http_requestor;
GRANT INSERT,SELECT ON ALL TABLES IN SCHEMA http TO http_requestor;
/* Create tables related to http requests
As not every request will have an xml doc, split them.
*/
CREATE TABLE IF NOT EXISTS http.responses (
id SERIAL PRIMARY KEY,
nct_id VARCHAR(15),
version_a SMALLINT,
version_b SMALLINT,
url VARCHAR(255),
response_code SMALLINT,
response_date TIMESTAMP WITH TIME ZONE,
html TEXT
);
-- Type: history_download_status
-- DROP TYPE IF EXISTS http.history_download_status;
CREATE TYPE http.history_download_status AS ENUM
('Ignored', 'Of Interest', 'Reserved', 'Downloaded', 'Incomplete');
ALTER TYPE http.history_download_status
OWNER TO root;
COMMENT ON TYPE http.history_download_status
IS 'Records the status of the downloaded history from clinicaltrials.gov';
/*
Table: http.download_status
Creates and then initializes the table to record history download status.
*/
--DROP TABLE IF EXISTS http.download_status;
CREATE TABLE IF NOT EXISTS http.download_status
(
id SERIAL NOT NULL,
nct_id character varying(15) COLLATE pg_catalog."default" NOT NULL,
status http.history_download_status NOT NULL DEFAULT 'Ignored'::http.history_download_status,
update_timestamp timestamp with time zone DEFAULT now(),
CONSTRAINT download_status_pkey PRIMARY KEY (id),
CONSTRAINT "nct_id link" FOREIGN KEY (nct_id)
REFERENCES ctgov.studies (nct_id) MATCH SIMPLE
ON UPDATE NO ACTION
ON DELETE NO ACTION
)
TABLESPACE pg_default;
ALTER TABLE IF EXISTS http.download_status
OWNER to root;
COMMENT ON TABLE http.download_status
IS 'Records the download status for each trial''s history';
INSERT INTO http.download_status (nct_id, status)
SELECT DISTINCT nct_id, 'Ignored'::http.history_download_status
FROM ctgov.studies;
/*
Views to allow downloader programs to identify what to download.
*/
-- View: http.most_recent_download_status
-- DROP VIEW http.most_recent_download_status;
CREATE OR REPLACE VIEW http.most_recent_download_status
AS
SELECT t.nct_id,
t.status,
t.update_timestamp
FROM ( SELECT download_status.id,
download_status.nct_id,
download_status.status,
download_status.update_timestamp,
row_number() OVER (PARTITION BY download_status.nct_id ORDER BY download_status.update_timestamp DESC) AS rn
FROM http.download_status) t
WHERE t.rn = 1
ORDER BY t.nct_id;
ALTER TABLE http.most_recent_download_status
OWNER TO root;
COMMENT ON VIEW http.most_recent_download_status
IS 'List of most recent download status';
-- View: http.trials_to_download
-- DROP VIEW http.trials_to_download;
CREATE OR REPLACE VIEW http.trials_to_download
AS
SELECT most_recent_download_status.nct_id
FROM http.most_recent_download_status
WHERE most_recent_download_status.status = 'Of Interest'::http.history_download_status;
ALTER TABLE http.trials_to_download
OWNER TO root;
COMMENT ON VIEW http.trials_to_download
IS 'List of trials for which the history needs downloaded';