Created downloader, table, and loader for market data
parent
dfbd82de54
commit
266c1c9686
@ -0,0 +1,39 @@
|
|||||||
|
|
||||||
|
DROP TABLE IF EXISTS spl.nsde;
|
||||||
|
|
||||||
|
CREATE SEQUENCE IF NOT EXISTS spl.nsde_id_seq
|
||||||
|
INCREMENT 1
|
||||||
|
START 1
|
||||||
|
MINVALUE 1
|
||||||
|
MAXVALUE 9223372036854775807
|
||||||
|
CACHE 1;
|
||||||
|
|
||||||
|
ALTER SEQUENCE spl.nsde_id_seq
|
||||||
|
OWNER TO root;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS spl.nsde
|
||||||
|
(
|
||||||
|
id integer NOT NULL DEFAULT nextval('spl.nsde_id_seq'::regclass),
|
||||||
|
package_ndc11 character varying(11) COLLATE pg_catalog."default",
|
||||||
|
application_number_or_citation character varying(25) COLLATE pg_catalog."default",
|
||||||
|
package_ndc character varying(50) COLLATE pg_catalog."default",
|
||||||
|
proprietary_name character varying(500) COLLATE pg_catalog."default",
|
||||||
|
product_type character varying(90) COLLATE pg_catalog."default",
|
||||||
|
marketing_category character varying(160) COLLATE pg_catalog."default",
|
||||||
|
dosage_form character varying(155) COLLATE pg_catalog."default",
|
||||||
|
billing_unit character varying(35) COLLATE pg_catalog."default",
|
||||||
|
marketing_start_date date,
|
||||||
|
marketing_end_date date,
|
||||||
|
inactivation_date date,
|
||||||
|
reactivation_date date,
|
||||||
|
CONSTRAINT nsde_pkey PRIMARY KEY (id)
|
||||||
|
)
|
||||||
|
|
||||||
|
TABLESPACE pg_default;
|
||||||
|
|
||||||
|
ALTER TABLE IF EXISTS spl.nsde
|
||||||
|
OWNER to root;
|
||||||
|
|
||||||
|
-- if the table is dropped, the sequence is as well
|
||||||
|
ALTER SEQUENCE spl.nsde_id_seq
|
||||||
|
OWNED BY spl.nsde.id;
|
||||||
@ -0,0 +1,16 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
curl https://download.open.fda.gov/other/nsde/other-nsde-0001-of-0003.json.zip > ./nsde_1.zip
|
||||||
|
unzip ./nsde_1.zip
|
||||||
|
rm ./nsde_1.zip
|
||||||
|
|
||||||
|
curl https://download.open.fda.gov/other/nsde/other-nsde-0002-of-0003.json.zip > ./nsde_2.zip
|
||||||
|
unzip ./nsde_2.zip
|
||||||
|
rm ./nsde_2.zip
|
||||||
|
|
||||||
|
curl https://download.open.fda.gov/other/nsde/other-nsde-0003-of-0003.json.zip > ./nsde_3.zip
|
||||||
|
unzip ./nsde_3.zip
|
||||||
|
rm ./nsde_3.zip
|
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,85 @@
|
|||||||
|
import json
|
||||||
|
import psycopg2 as psyco
|
||||||
|
from psycopg2.extras import execute_values
|
||||||
|
import datetime as dt
|
||||||
|
|
||||||
|
def file_generator(max_num):
|
||||||
|
for itt in range(1,max_num+1):
|
||||||
|
filename = "other-nsde-{:0>4}-of-{:0>4}.json".format(itt,max_num)
|
||||||
|
yield filename
|
||||||
|
|
||||||
|
def get_date(result,key):
|
||||||
|
r = result.get(key)
|
||||||
|
if r:
|
||||||
|
return dt.datetime.strptime(r, "%Y%m%d")
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def build_values(result):
|
||||||
|
#adjust types
|
||||||
|
proprietary_name = result.get("proprietary_name")
|
||||||
|
application_number_or_citation = result.get("application_number_or_citation")
|
||||||
|
product_type = result.get("product_type")
|
||||||
|
package_ndc = result.get("package_ndc")
|
||||||
|
marketing_category = result.get("marketing_category")
|
||||||
|
package_ndc11 = result.get("package_ndc11")
|
||||||
|
dosage_form = result.get("dosage_form")
|
||||||
|
billing_unit = result.get("billing_unit")
|
||||||
|
marketing_start_date = get_date(result,"marketing_start_date")
|
||||||
|
marketing_end_date = get_date(result, "marketing_end_date")
|
||||||
|
inactivation_date = get_date(result, "inactivation_date")
|
||||||
|
reactivation_date = get_date(result,"reactivation_date")
|
||||||
|
|
||||||
|
return (
|
||||||
|
proprietary_name
|
||||||
|
,application_number_or_citation
|
||||||
|
,product_type
|
||||||
|
,package_ndc
|
||||||
|
,marketing_category
|
||||||
|
,package_ndc11
|
||||||
|
,dosage_form
|
||||||
|
,billing_unit
|
||||||
|
,marketing_start_date
|
||||||
|
,marketing_end_date
|
||||||
|
,inactivation_date
|
||||||
|
,reactivation_date
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
for x in file_generator(3):
|
||||||
|
#It would be nice to replace this^^ file_generator with something that retrieves and unzips the files directly.
|
||||||
|
with (
|
||||||
|
psyco.connect(dbname="aact_db", user="root", host="localhost", password="root") as con,
|
||||||
|
con.cursor() as curse,
|
||||||
|
open("./"+x,"r") as j
|
||||||
|
):
|
||||||
|
print(x)
|
||||||
|
|
||||||
|
results = json.loads(j.read())["results"]
|
||||||
|
query = """
|
||||||
|
INSERT INTO spl.nsde (
|
||||||
|
proprietary_name
|
||||||
|
,application_number_or_citation
|
||||||
|
,product_type
|
||||||
|
,package_ndc
|
||||||
|
,marketing_category
|
||||||
|
,package_ndc11
|
||||||
|
,dosage_form
|
||||||
|
,billing_unit
|
||||||
|
,marketing_end_date
|
||||||
|
,marketing_start_date
|
||||||
|
,inactivation_date
|
||||||
|
,reactivation_date
|
||||||
|
)
|
||||||
|
VALUES %s;
|
||||||
|
"""
|
||||||
|
|
||||||
|
values = [build_values(y) for y in results]
|
||||||
|
execute_values(curse,query,values)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue