/* I started by creating a formularies schema, then importing the usp - dc formulary data through DBeaver's csv import. */ -- DROP SCHEMA "Formularies"; CREATE SCHEMA "Formularies" AUTHORIZATION root; -- "Formularies".usp_dc_2023 definition -- Drop table -- DROP TABLE "Formularies".usp_dc_2023; CREATE TABLE "Formularies".usp_dc_2023 ( rxcui varchar(15) NULL, --yes even though this is a number, it is represented as a string elsewhere. tty varchar(10) NULL, "Name" varchar(256) NULL, "Related BN" varchar(250) NULL, "Related DF" varchar(25050) NULL, "USP Category" varchar(250) NULL, "USP Class" varchar(250) NULL, "USP Pharmacotherapeutic Group" varchar(250) NULL, "API Concept" varchar(250) NULL ); /* I then linked the data back on itself with a materialized view, using claude.ai for simplicity. Claude.ai > I need a postres sql statement to create a materialized view that will take the following table and link from a given rxcui to the other rxcui's that share the same category and class ```sql CREATE TABLE "Formularies".usp_dc_2023 ( rxcui int4 NULL, tty varchar(10) NULL, "Name" varchar(256) NULL, "Related BN" varchar(250) NULL, "Related DF" varchar(25050) NULL, "USP Category" varchar(250) NULL, "USP Class" varchar(250) NULL, "USP Pharmacotherapeutic Group" varchar(250) NULL, "API Concept" varchar(250) NULL ); ``` It links rxcuis to other rxcuis where they have a matching USP Categories and Class This gives alternative RXCUIs based on category an class. */ CREATE MATERIALIZED VIEW "Formularies".rxcui_category_class_links AS WITH base AS ( SELECT DISTINCT a.rxcui as source_rxcui, b.rxcui as linked_rxcui, a."USP Category" as category, a."USP Class" as class FROM "Formularies".usp_dc_2023 a JOIN "Formularies".usp_dc_2023 b ON a."USP Category" = b."USP Category" AND a."USP Class" = b."USP Class" AND a.rxcui != b.rxcui WHERE a.rxcui IS NOT NULL AND b.rxcui IS NOT NULL ) SELECT * FROM base; -- Add indexes for better query performance CREATE INDEX ON "Formularies".rxcui_category_class_links (source_rxcui); CREATE INDEX ON "Formularies".rxcui_category_class_links (linked_rxcui); /* Next step is linking a given nct -> compounds -> formulary alternatives -> compounds -> brands/generics. I'll' break this into two steps. 1. link formulary alternatives to compounds and brands, 2. link nct_id to formulary alternatives */ drop if exists materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis; drop if exists materialized view "Formularies".rxcui_to_brand_through_uspdc cascade; create materialized view "Formularies".rxcui_to_brand_through_uspdc AS select distinct rccl.source_rxcui ,rccl.linked_rxcui ,rccl.category ,rccl."class" ,rr.tty1 --,rr.tty2 ,rr.rxcui2 from "Formularies".rxcui_category_class_links rccl join rxnorm_migrated.rxnorm_relations rr on rr.rxcui1 = rccl.linked_rxcui where rr.tty2 = 'BN' ; /* So this one takes each RXCUI and it's associated RXCUIs from the same category and class, and filters it down to associated RXCUI's that represent brand names. */ create materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis as select distinct mttbi.nct_id, rtbtu.rxcui2 as brand_rxcuis from match_trials_to_bn_in mttbi join "Formularies".rxcui_to_brand_through_uspdc rtbtu on mttbi.bn_or_in_cui = rtbtu.rxcui2 ; /* This takes the list of ingredients and brands associated with a trial, and links it to the list of alternative brand names. */ --renamed the view CREATE OR REPLACE VIEW "Formularies".nct_to_brand_counts_through_uspdc AS SELECT mtctabr.nct_id, count(*) AS brand_name_counts FROM "Formularies".match_trial_compound_to_alternate_bn_rxcuis mtctabr GROUP BY mtctabr.nct_id;