You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ClinicalTrialsDataProcessing/Scripts/ConfiguringFormularies.sql

118 lines
3.7 KiB
SQL

/*
I started by creating a formularies schema,
then importing the usp - dc formulary data through DBeaver's csv import.
*/
-- DROP SCHEMA "Formularies";
CREATE SCHEMA "Formularies" AUTHORIZATION root;
-- "Formularies".usp_dc_2023 definition
-- Drop table
-- DROP TABLE "Formularies".usp_dc_2023;
CREATE TABLE "Formularies".usp_dc_2023 (
rxcui varchar(15) NULL, --yes even though this is a number, it is represented as a string elsewhere.
tty varchar(10) NULL,
"Name" varchar(256) NULL,
"Related BN" varchar(250) NULL,
"Related DF" varchar(25050) NULL,
"USP Category" varchar(250) NULL,
"USP Class" varchar(250) NULL,
"USP Pharmacotherapeutic Group" varchar(250) NULL,
"API Concept" varchar(250) NULL
);
/*
I then linked the data back on itself with a materialized view, using claude.ai for simplicity.
Claude.ai > I need a postres sql statement to create a materialized view that will take the following table and link from a given rxcui to the other rxcui's that share the same category and class
```sql
CREATE TABLE "Formularies".usp_dc_2023 (
rxcui int4 NULL,
tty varchar(10) NULL,
"Name" varchar(256) NULL,
"Related BN" varchar(250) NULL,
"Related DF" varchar(25050) NULL,
"USP Category" varchar(250) NULL,
"USP Class" varchar(250) NULL,
"USP Pharmacotherapeutic Group" varchar(250) NULL,
"API Concept" varchar(250) NULL
);
```
It links rxcuis to other rxcuis where they have a matching USP Categories and Class
This gives alternative RXCUIs based on category an class.
*/
CREATE MATERIALIZED VIEW "Formularies".rxcui_category_class_links AS
WITH base AS (
SELECT DISTINCT
a.rxcui as source_rxcui,
b.rxcui as linked_rxcui,
a."USP Category" as category,
a."USP Class" as class
FROM "Formularies".usp_dc_2023 a
JOIN "Formularies".usp_dc_2023 b
ON a."USP Category" = b."USP Category"
AND a."USP Class" = b."USP Class"
AND a.rxcui != b.rxcui
WHERE a.rxcui IS NOT NULL
AND b.rxcui IS NOT NULL
)
SELECT * FROM base;
-- Add indexes for better query performance
CREATE INDEX ON "Formularies".rxcui_category_class_links (source_rxcui);
CREATE INDEX ON "Formularies".rxcui_category_class_links (linked_rxcui);
/*
Next step is linking a given nct -> compounds -> formulary alternatives -> compounds -> brands/generics.
I'll' break this into two steps.
1. link formulary alternatives to compounds and brands,
2. link nct_id to formulary alternatives
*/
drop if exists materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis;
drop if exists materialized view "Formularies".rxcui_to_brand_through_uspdc cascade;
create materialized view "Formularies".rxcui_to_brand_through_uspdc AS
select distinct
rccl.source_rxcui
,rccl.linked_rxcui
,rccl.category
,rccl."class"
,rr.tty1
--,rr.tty2
,rr.rxcui2
from "Formularies".rxcui_category_class_links rccl
join rxnorm_migrated.rxnorm_relations rr on rr.rxcui1 = rccl.linked_rxcui
where rr.tty2 = 'BN'
;
/* So this one takes each RXCUI and it's associated RXCUIs from the same
category and class, and filters it down to associated RXCUI's that
represent brand names.
*/
create materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis as
select distinct mttbi.nct_id, rtbtu.rxcui2 as brand_rxcuis
from match_trials_to_bn_in mttbi
join "Formularies".rxcui_to_brand_through_uspdc rtbtu
on mttbi.bn_or_in_cui = rtbtu.rxcui2
;
/*
This takes the list of ingredients and brands associated with a trial, and
links it to the list of alternative brand names.
*/
--renamed the view
CREATE OR REPLACE VIEW "Formularies".nct_to_brand_counts_through_uspdc
AS SELECT mtctabr.nct_id,
count(*) AS brand_name_counts
FROM "Formularies".match_trial_compound_to_alternate_bn_rxcuis mtctabr
GROUP BY mtctabr.nct_id;