From d91240845604619ba829ed4afcb87de508470f6e Mon Sep 17 00:00:00 2001 From: Will King Date: Tue, 7 Jan 2025 13:37:05 -0800 Subject: [PATCH] Got counts of competing drugs back --- Scripts/ConfiguringFormularies.sql | 29 +++++-- logs.org | 128 +++++++++++++++++++++++++++++ 2 files changed, 152 insertions(+), 5 deletions(-) create mode 100644 logs.org diff --git a/Scripts/ConfiguringFormularies.sql b/Scripts/ConfiguringFormularies.sql index e5368ab..27baf96 100644 --- a/Scripts/ConfiguringFormularies.sql +++ b/Scripts/ConfiguringFormularies.sql @@ -43,6 +43,9 @@ CREATE TABLE "Formularies".usp_dc_2023 ( "API Concept" varchar(250) NULL ); ``` + +It links rxcuis to other rxcuis where they have a matching USP Categories and Class +This gives alternative RXCUIs based on category an class. */ CREATE MATERIALIZED VIEW "Formularies".rxcui_category_class_links AS WITH base AS ( @@ -72,27 +75,43 @@ I'll' break this into two steps. 1. link formulary alternatives to compounds and brands, 2. link nct_id to formulary alternatives */ +drop if exists materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis; +drop if exists materialized view "Formularies".rxcui_to_brand_through_uspdc cascade; + create materialized view "Formularies".rxcui_to_brand_through_uspdc AS -select +select distinct rccl.source_rxcui ,rccl.linked_rxcui ,rccl.category ,rccl."class" ,rr.tty1 - ,rr.tty2 + --,rr.tty2 ,rr.rxcui2 from "Formularies".rxcui_category_class_links rccl join rxnorm_migrated.rxnorm_relations rr on rr.rxcui1 = rccl.linked_rxcui where rr.tty2 = 'BN' ; +/* So this one takes each RXCUI and it's associated RXCUIs from the same +category and class, and filters it down to associated RXCUI's that +represent brand names. + */ -create materialized view match_trial_compound_to_alternate_bn_rxcuis as -select distinct mttbi.nct_id, rtbtu.rxcui2 +create materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis as +select distinct mttbi.nct_id, rtbtu.rxcui2 as brand_rxcuis from match_trials_to_bn_in mttbi join "Formularies".rxcui_to_brand_through_uspdc rtbtu on mttbi.bn_or_in_cui = rtbtu.rxcui2 +; /* -Now I need to create a way to link +This takes the list of ingredients and brands associated with a trial, and +links it to the list of alternative brand names. */ + +--renamed the view +CREATE OR REPLACE VIEW "Formularies".nct_to_brand_counts_through_uspdc +AS SELECT mtctabr.nct_id, + count(*) AS brand_name_counts + FROM "Formularies".match_trial_compound_to_alternate_bn_rxcuis mtctabr + GROUP BY mtctabr.nct_id; diff --git a/logs.org b/logs.org new file mode 100644 index 0000000..84ae512 --- /dev/null +++ b/logs.org @@ -0,0 +1,128 @@ +* Plan/Todo [2025-01-06] +Goal is to update the main images with more details, i.e. adding means +etc. + +- get aact_db back up +- attach it to a "research" network +- restart rocker, attaching it to the same research network. + +** NOTES + +aact_db-restored-2024-11-27 didn't successfully restore. It is missing +all the important stuff. + +Figured out why the restore was failing. My code to restore had a faulty +check to see if the DB was up and ready. Fixed that now. + +Waiting for restore (manually triggered) to start. Then I should have +access to the table as needed. + +It seems like I'm missing some data within a schema, specifically the +Formularies and their associated views. + +My options are: + +- search around for documentation or other stuff +- try to rebuild + +my suspision is that I forgot to back it up. I think it is probably +worth looking for. - So I've been looking through my copy of +ClinicalTrialsDataProcessing, and have not found anything referencing +it. The formularies data is required for my analysis though. If I +remember correctly, I manually uploaded the USP datasets in DBeaver, +then created any views etc. + +I think that I'll have to recreate it. This is going to be hard because +I'm not sure what it did. At least I created mildly informative table +names. + +The tables/views I've identified are: - +=Formularies.nct_to_brands_through_uspdc= + +It looks like I need to - import usp-dc dataset - link those drugs to +usp data - create a view that links those automatically - back it up. - +double check the data I get from the request. + +The links will be through RXCUIs, and grouped on =USP Class= In effect, +for a given RXCUI, I want to get the list of RXCUI's which have the same +USP-DC class, and then be able to link back to brands. + +This should have the following links: - RXCUI -> USP-DC category/class +pair - USP-DC category/class pair -> RXCUIs - RXUCIs -> competitors + +Do I want to combine the USP-DC and UPS-MMG datasets? No, there is +enough difference in them that I don't want to have to handle it that +way. + +I've been working on this in scripts/ConfiguringFormularies.sql + +So what I've managed to do so far is export tables, backup the data. + +I've got a version that connects trials to brand names, but there may be +more details to the connection than I thought. I'd like to check if I +need to filter anything or check if there are other ingredients etc that +I need to include. */I probably need to write some descriptions of all +the tables and views to put everything together. An ai would probably be +helpful in doing this./* + + +** Code snippets +#+begin_example +podman run \ + -e POSTGRES_PASSWORD="${POSTGRES_PASSWORD}" \ + -e POSTGRES_USER="${POSTGRES_USER}" \ + -e POSTGRES_DB="${POSTGRES_DB}" \ + --name "${CONTAINER_NAME}" \ + --detach \ + --network research-network \ + --shm-size=512mb \ + --volume ./backup/:/backup/ \ + -p 5432:5432\ + postgres:14-alpine +#+end_example + +#+begin_example +function check_postgres { +podman exec -i "${POSTGRES_DB}" psql -h localhost -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c '\q' > /dev/null 2>&1 +} +#+end_example + +** Notes at end of day +- was reasonably productive in getting stuff unblocked for finishing + JMP, which i'll need to do before I leave town next week. + + +** What I've got to do tomorrow +I've got a version that connects trials to brand names, but there may be +more details to the connection than I thought. I'd like to check if I +need to filter anything or check if there are other ingredients etc that +I need to include. */I probably need to write some descriptions of all +the tables and views to put everything together. An ai would probably be +helpful in doing this./* At the end of it all, I should be able to get a +count of competing drugs per trial. + +Once that is done, I can relink aact_db and rocker, then rerun my +analysis. Then I can adjust the images that I need for my JMP. +* [2025-01-07 Tue 12:01] notes + + So what I've got to do is + +** DONE Investigate what compounds are showing up in my current list + if that is what I want, then I'll be able proceed with redoing my images + if not, then I'll have to work on adjusting the views etc that I have. + + I've looked through it and it seems to correct. + +*** [[/mnt/will/large_data/Research_large_data/ClinicalTrialsDataProcessing/Scripts/ConfiguringFormularies.sql][ConfiguringFormularies.sql:81]] [2025-01-07 Tue 13:24] + I've tweaked these three views to make them clearer. + I also renamed the view of interest to ="Formularies".nct_to_brand_counts_through_uspdc= to better represent what it does. + + +** TODO Rerun the analysis + So it looks like I'll need to + 1. take a backup of aact_db + 2. restore from backup, putting the container in the research network + 3. then rerun the analysis. + +** TODO Add more details to images + This should include means and some quantiles