Created downloader, table, and loader for market data
parent
dfbd82de54
commit
266c1c9686
@ -0,0 +1,39 @@
|
||||
|
||||
DROP TABLE IF EXISTS spl.nsde;
|
||||
|
||||
CREATE SEQUENCE IF NOT EXISTS spl.nsde_id_seq
|
||||
INCREMENT 1
|
||||
START 1
|
||||
MINVALUE 1
|
||||
MAXVALUE 9223372036854775807
|
||||
CACHE 1;
|
||||
|
||||
ALTER SEQUENCE spl.nsde_id_seq
|
||||
OWNER TO root;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS spl.nsde
|
||||
(
|
||||
id integer NOT NULL DEFAULT nextval('spl.nsde_id_seq'::regclass),
|
||||
package_ndc11 character varying(11) COLLATE pg_catalog."default",
|
||||
application_number_or_citation character varying(25) COLLATE pg_catalog."default",
|
||||
package_ndc character varying(50) COLLATE pg_catalog."default",
|
||||
proprietary_name character varying(500) COLLATE pg_catalog."default",
|
||||
product_type character varying(90) COLLATE pg_catalog."default",
|
||||
marketing_category character varying(160) COLLATE pg_catalog."default",
|
||||
dosage_form character varying(155) COLLATE pg_catalog."default",
|
||||
billing_unit character varying(35) COLLATE pg_catalog."default",
|
||||
marketing_start_date date,
|
||||
marketing_end_date date,
|
||||
inactivation_date date,
|
||||
reactivation_date date,
|
||||
CONSTRAINT nsde_pkey PRIMARY KEY (id)
|
||||
)
|
||||
|
||||
TABLESPACE pg_default;
|
||||
|
||||
ALTER TABLE IF EXISTS spl.nsde
|
||||
OWNER to root;
|
||||
|
||||
-- if the table is dropped, the sequence is as well
|
||||
ALTER SEQUENCE spl.nsde_id_seq
|
||||
OWNED BY spl.nsde.id;
|
||||
@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
curl https://download.open.fda.gov/other/nsde/other-nsde-0001-of-0003.json.zip > ./nsde_1.zip
|
||||
unzip ./nsde_1.zip
|
||||
rm ./nsde_1.zip
|
||||
|
||||
curl https://download.open.fda.gov/other/nsde/other-nsde-0002-of-0003.json.zip > ./nsde_2.zip
|
||||
unzip ./nsde_2.zip
|
||||
rm ./nsde_2.zip
|
||||
|
||||
curl https://download.open.fda.gov/other/nsde/other-nsde-0003-of-0003.json.zip > ./nsde_3.zip
|
||||
unzip ./nsde_3.zip
|
||||
rm ./nsde_3.zip
|
||||
|
||||
|
||||
@ -0,0 +1,85 @@
|
||||
import json
|
||||
import psycopg2 as psyco
|
||||
from psycopg2.extras import execute_values
|
||||
import datetime as dt
|
||||
|
||||
def file_generator(max_num):
|
||||
for itt in range(1,max_num+1):
|
||||
filename = "other-nsde-{:0>4}-of-{:0>4}.json".format(itt,max_num)
|
||||
yield filename
|
||||
|
||||
def get_date(result,key):
|
||||
r = result.get(key)
|
||||
if r:
|
||||
return dt.datetime.strptime(r, "%Y%m%d")
|
||||
else:
|
||||
return None
|
||||
|
||||
def build_values(result):
|
||||
#adjust types
|
||||
proprietary_name = result.get("proprietary_name")
|
||||
application_number_or_citation = result.get("application_number_or_citation")
|
||||
product_type = result.get("product_type")
|
||||
package_ndc = result.get("package_ndc")
|
||||
marketing_category = result.get("marketing_category")
|
||||
package_ndc11 = result.get("package_ndc11")
|
||||
dosage_form = result.get("dosage_form")
|
||||
billing_unit = result.get("billing_unit")
|
||||
marketing_start_date = get_date(result,"marketing_start_date")
|
||||
marketing_end_date = get_date(result, "marketing_end_date")
|
||||
inactivation_date = get_date(result, "inactivation_date")
|
||||
reactivation_date = get_date(result,"reactivation_date")
|
||||
|
||||
return (
|
||||
proprietary_name
|
||||
,application_number_or_citation
|
||||
,product_type
|
||||
,package_ndc
|
||||
,marketing_category
|
||||
,package_ndc11
|
||||
,dosage_form
|
||||
,billing_unit
|
||||
,marketing_start_date
|
||||
,marketing_end_date
|
||||
,inactivation_date
|
||||
,reactivation_date
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for x in file_generator(3):
|
||||
#It would be nice to replace this^^ file_generator with something that retrieves and unzips the files directly.
|
||||
with (
|
||||
psyco.connect(dbname="aact_db", user="root", host="localhost", password="root") as con,
|
||||
con.cursor() as curse,
|
||||
open("./"+x,"r") as j
|
||||
):
|
||||
print(x)
|
||||
|
||||
results = json.loads(j.read())["results"]
|
||||
query = """
|
||||
INSERT INTO spl.nsde (
|
||||
proprietary_name
|
||||
,application_number_or_citation
|
||||
,product_type
|
||||
,package_ndc
|
||||
,marketing_category
|
||||
,package_ndc11
|
||||
,dosage_form
|
||||
,billing_unit
|
||||
,marketing_end_date
|
||||
,marketing_start_date
|
||||
,inactivation_date
|
||||
,reactivation_date
|
||||
)
|
||||
VALUES %s;
|
||||
"""
|
||||
|
||||
values = [build_values(y) for y in results]
|
||||
execute_values(curse,query,values)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue