CREATE SCHEMA http; /* The purpose of this schema, tables, and associated roles to process HTTP responses. I may even include a table to keep track of the XML responses */ /* Add a role to manage permissions on the http schema */ CREATE ROLE http_requestor; GRANT CONNECT ON DATABASE aact_db to http_requestor; GRANT USAGE ON SCHEMA http TO http_requestor; GRANT INSERT,SELECT ON ALL TABLES IN SCHEMA http TO http_requestor; /* Create tables related to http requests As not every request will have an xml doc, split them. */ CREATE TABLE IF NOT EXISTS http.responses ( id SERIAL PRIMARY KEY, nct_id VARCHAR(15), version_a SMALLINT, version_b SMALLINT, url VARCHAR(255), response_code SMALLINT, response_date TIMESTAMP WITH TIME ZONE, html TEXT ); -- Type: history_download_status -- DROP TYPE IF EXISTS http.history_download_status; CREATE TYPE http.history_download_status AS ENUM ('Ignored', 'Of Interest', 'Reserved', 'Downloaded', 'Incomplete'); ALTER TYPE http.history_download_status OWNER TO root; COMMENT ON TYPE http.history_download_status IS 'Records the status of the downloaded history from clinicaltrials.gov'; /* Table: http.download_status Creates and then initializes the table to record history download status. */ --DROP TABLE IF EXISTS http.download_status; CREATE TABLE IF NOT EXISTS http.download_status ( id SERIAL NOT NULL, nct_id character varying(15) COLLATE pg_catalog."default" NOT NULL, status http.history_download_status NOT NULL DEFAULT 'Ignored'::http.history_download_status, update_timestamp timestamp with time zone DEFAULT now(), CONSTRAINT download_status_pkey PRIMARY KEY (id), CONSTRAINT "nct_id link" FOREIGN KEY (nct_id) REFERENCES ctgov.studies (nct_id) MATCH SIMPLE ON UPDATE NO ACTION ON DELETE NO ACTION ) TABLESPACE pg_default; ALTER TABLE IF EXISTS http.download_status OWNER to root; COMMENT ON TABLE http.download_status IS 'Records the download status for each trial''s history'; INSERT INTO http.download_status (nct_id, status) SELECT DISTINCT nct_id, 'Ignored'::http.history_download_status FROM ctgov.studies; /* Views to allow downloader programs to identify what to download. */ -- View: http.most_recent_download_status -- DROP VIEW http.most_recent_download_status; CREATE OR REPLACE VIEW http.most_recent_download_status AS SELECT t.nct_id, t.status, t.update_timestamp FROM ( SELECT download_status.id, download_status.nct_id, download_status.status, download_status.update_timestamp, row_number() OVER (PARTITION BY download_status.nct_id ORDER BY download_status.update_timestamp DESC) AS rn FROM http.download_status) t WHERE t.rn = 1 ORDER BY t.nct_id; ALTER TABLE http.most_recent_download_status OWNER TO root; COMMENT ON VIEW http.most_recent_download_status IS 'List of most recent download status'; -- View: http.trials_to_download -- DROP VIEW http.trials_to_download; CREATE OR REPLACE VIEW http.trials_to_download AS SELECT most_recent_download_status.nct_id FROM http.most_recent_download_status WHERE most_recent_download_status.status = 'Of Interest'::http.history_download_status; ALTER TABLE http.trials_to_download OWNER TO root; COMMENT ON VIEW http.trials_to_download IS 'List of trials for which the history needs downloaded';