43 changed files with 75 additions and 12966 deletions
--- a/.dbeaver/.credentials-config.json.bak
+++ b/.dbeaver/.credentials-config.json.bak
--- a/.dbeaver/.data-sources.json.bak
+++ b/.dbeaver/.data-sources.json.bak
@ -0,0 +1,64 @@
+{
+	"folders": {},
+	"connections": {
+		"mariaDB-186c896820e-6ff11b5b802d8b82": {
+			"provider": "mysql",
+			"driver": "mariaDB",
+			"name": "rxnav",
+			"save-password": true,
+			"configuration": {
+				"host": "will-office",
+				"port": "3306",
+				"url": "jdbc:mariadb://will-office:3306/",
+				"configurationType": "MANUAL",
+				"type": "dev",
+				"auth-model": "native"
+			}
+		},
+		"postgres-jdbc-186c896a347-2a3d946d2dea4df7": {
+			"provider": "postgresql",
+			"driver": "postgres-jdbc",
+			"name": "aact_db",
+			"save-password": true,
+			"configuration": {
+				"host": "100.95.169.11",
+				"port": "5432",
+				"database": "aact_db",
+				"url": "jdbc:postgresql://100.95.169.11:5432/aact_db",
+				"configurationType": "MANUAL",
+				"type": "dev",
+				"provider-properties": {},
+				"auth-model": "native"
+			},
+			"custom-properties": {
+				"resultset.maxrows": "500"
+			}
+		},
+		"postgres-jdbc-186cd8f479f-6cc3c10c8adc3359": {
+			"provider": "postgresql",
+			"driver": "postgres-jdbc",
+			"name": "drugcentral",
+			"save-password": true,
+			"configuration": {
+				"host": "localhost",
+				"port": "54320",
+				"database": "postgres",
+				"url": "jdbc:postgresql://localhost:54320/postgres",
+				"configurationType": "MANUAL",
+				"type": "dev",
+				"auth-model": "native"
+			}
+		}
+	},
+	"connection-types": {
+		"dev": {
+			"name": "Development",
+			"color": "255,255,255",
+			"description": "Regular development database",
+			"auto-commit": true,
+			"confirm-execute": false,
+			"confirm-data-change": false,
+			"auto-close-transactions": false
+		}
+	}
+}
--- a/.dbeaver/.project-metadata.json.bak
+++ b/.dbeaver/.project-metadata.json.bak
@ -0,0 +1 @@
+{"resources":{"Scripts/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/Data_summaries.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/DevelopingLinks.sql":{"default-schema":"public","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"},"Scripts/DiseaseBurdens_create_table.sql":{"default-schema":"public","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"},"Scripts/GlobalBurdensOfDisease2019Codebook.sql":{"default-schema":"DiseaseBurden","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"},"Scripts/GroupingTrials.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/Script.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/TablesAndViews_Public.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-schema":"public","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"}}}
--- a/.gitattributes
+++ b/.gitattributes
@ -5,4 +5,3 @@ other_data/USP[[:space:]]DC/usp_dc_pub_2023_release_2.0_updated_final.csv filter
 other_data/USP[[:space:]]MMG/MMG_v8.0_Alignment_File.csv filter=lfs diff=lfs merge=lfs -text
 other_data/VA[[:space:]]Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv filter=lfs diff=lfs merge=lfs -text
 containers/AACT_Reloader/backup/aact_db_backup_20250106_184236.sql.gz filter=lfs diff=lfs merge=lfs -text
-containers/AACT_Reloader/backup/aact_db_backup_20250107_133822.sql.gz filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@ -194,4 +194,4 @@ containers/drugcentral/docker-entrypoint-initdb.d/*.sql
 containers/drugcentral/docker-entrypoint-initdb.d/*.sql.gz
 containers/drugcentral/db_store/*

-.dbeaver/
+
--- a/Scripts/Backup_AACT_work.sh
+++ b/Scripts/Backup_AACT_work.sh
@ -2,9 +2,8 @@
 backup_dir="/mnt/will/large_data/Research_large_data/ClinicalTrialsDataProcessing/containers/AACT_Reloader/backup/"
 date_stamp=$(date +%Y%m%d_%H%M%S)
 filename="aact_db_backup_${date_stamp}.sql"
-container_name = ${1:-aact_db}

-podman exec "$container_name" pg_dump -U root aact_db > "${backup_dir}/${filename}"
+podman exec aact_db pg_dump -U root aact_db > "${backup_dir}/${filename}"

 # Optional: compress the backup
 gzip "${backup_dir}/${filename}"
--- a/Scripts/ConfiguringFormularies.sql
+++ b/Scripts/ConfiguringFormularies.sql
@ -43,9 +43,6 @@ CREATE TABLE "Formularies".usp_dc_2023 (
 	"API Concept" varchar(250) NULL
 );
 ```
-
-It links rxcuis to other rxcuis where they have a matching USP Categories and Class
-This gives alternative RXCUIs based on category an class.
 */
 CREATE MATERIALIZED VIEW "Formularies".rxcui_category_class_links AS
 WITH base AS (
@ -75,43 +72,27 @@ I'll' break this into two steps.
 1. link formulary alternatives to compounds and brands,
 2. link nct_id to formulary alternatives
 */
-drop if exists materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis;
-drop if exists materialized view "Formularies".rxcui_to_brand_through_uspdc cascade;
-
 create materialized view "Formularies".rxcui_to_brand_through_uspdc AS
-select distinct
+select
 	rccl.source_rxcui
 	,rccl.linked_rxcui
 	,rccl.category
 	,rccl."class"
 	,rr.tty1
-	--,rr.tty2
+	,rr.tty2
 	,rr.rxcui2
 from "Formularies".rxcui_category_class_links rccl
 join rxnorm_migrated.rxnorm_relations rr on rr.rxcui1 = rccl.linked_rxcui
 where rr.tty2 = 'BN'
 ;

-/* So this one takes each RXCUI and it's associated RXCUIs from the same 
-category and class, and filters it down to associated RXCUI's that 
-represent brand names.
- */

-create materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis as
-select distinct mttbi.nct_id,  rtbtu.rxcui2 as brand_rxcuis
+create materialized view match_trial_compound_to_alternate_bn_rxcuis as
+select distinct mttbi.nct_id,  rtbtu.rxcui2
 from match_trials_to_bn_in mttbi
 join "Formularies".rxcui_to_brand_through_uspdc rtbtu
 	on mttbi.bn_or_in_cui = rtbtu.rxcui2
-;

 /*
-This takes the list of ingredients and brands associated with a trial, and 
-links it to the list of alternative brand names.
+Now I need to create a way to link
 */
-
--renamed the view
-CREATE OR REPLACE VIEW "Formularies".nct_to_brand_counts_through_uspdc
-AS SELECT mtctabr.nct_id,
-    count(*) AS brand_name_counts
-   FROM "Formularies".match_trial_compound_to_alternate_bn_rxcuis mtctabr
-  GROUP BY mtctabr.nct_id;
--- a/Scripts/VariousDevelopmentsForAnalysis.sql
+++ b/Scripts/VariousDevelopmentsForAnalysis.sql
@ -1,308 +0,0 @@
-select * from formatted_data_with_planned_enrollment fdwpe 
-;
-
-
-select * from formatted_data_mat fdm
-;
-
-select count(distinct condition ) from formatted_data_mat fdm
-
-select nct_id, fdm.current_status , count(*) 
-from formatted_data_mat fdm 
-group by nct_id  , fdm.current_status 
-order by nct_id 
-;
-
-select * from formatted_data_mat fdm ;
-
-
-- group with trial split
-with cte as (
-select nct_id
-from formatted_data_mat fdm 
-group by nct_id
-having count(distinct current_status) > 1
-order by nct_id
-)
-select 
-    fdm.nct_id
-    , current_status
-    , earliest_date_observed 
-    , elapsed_duration 
-    , n_brands
-    , category_id
-    , h_sdi_val 
-    , h_sdi_u95
-    , h_sdi_l95
-    , hm_sdi_val 
-    , hm_sdi_u95
-    , hm_sdi_l95
-    , m_sdi_val 
-    , m_sdi_u95
-    , m_sdi_l95
-    , lm_sdi_val 
-    , lm_sdi_u95
-    , lm_sdi_l95
-    , l_sdi_val 
-    , l_sdi_u95
-    , l_sdi_l95
-from formatted_data_mat fdm
-    join cte on cte.nct_id = fdm.nct_id 
-group by 
-    fdm.nct_id
-    , current_status
-    , earliest_date_observed 
-    , elapsed_duration 
-    , n_brands
-    , category_id
-    , h_sdi_val 
-    , h_sdi_u95
-    , h_sdi_l95
-    , hm_sdi_val 
-    , hm_sdi_u95
-    , hm_sdi_l95
-    , m_sdi_val 
-    , m_sdi_u95
-    , m_sdi_l95
-    , lm_sdi_val 
-    , lm_sdi_u95
-    , lm_sdi_l95
-    , l_sdi_val 
-    , l_sdi_u95
-    , l_sdi_l95
-order by nct_id , earliest_date_observed 
-;
-
-select count(distinct category_id ) from 
-
-
-select distinct category_id  from formatted_data_mat fdm 
-;
-
-
-
-
-
-- group with trial split
-with cte as (
-select nct_id
-from formatted_data_mat fdm 
-group by nct_id
-having count(distinct current_status) > 1
-order by nct_id
-)
-select 
-    fdm.nct_id
-    , current_status
-    , earliest_date_observed 
-    , elapsed_duration 
-    , n_brands
-    , category_id
-    , h_sdi_val 
-    , h_sdi_u95
-    , h_sdi_l95
-    , hm_sdi_val 
-    , hm_sdi_u95
-    , hm_sdi_l95
-    , m_sdi_val 
-    , m_sdi_u95
-    , m_sdi_l95
-    , lm_sdi_val 
-    , lm_sdi_u95
-    , lm_sdi_l95
-    , l_sdi_val 
-    , l_sdi_u95
-    , l_sdi_l95
-from formatted_data_mat fdm
-    join cte on cte.nct_id = fdm.nct_id 
-group by 
-    fdm.nct_id
-    , current_status
-    , earliest_date_observed 
-    , elapsed_duration 
-    , n_brands
-    , category_id
-    , h_sdi_val 
-    , h_sdi_u95
-    , h_sdi_l95
-    , hm_sdi_val 
-    , hm_sdi_u95
-    , hm_sdi_l95
-    , m_sdi_val 
-    , m_sdi_u95
-    , m_sdi_l95
-    , lm_sdi_val 
-    , lm_sdi_u95
-    , lm_sdi_l95
-    , l_sdi_val 
-    , l_sdi_u95
-    , l_sdi_l95
-order by nct_id , earliest_date_observed 
-; --TODO: join to usp dc dataset
-
-
-
-
-WITH trialncts AS (
-         SELECT DISTINCT ts.nct_id
-           FROM history.trial_snapshots ts
-), nct_to_cui AS (
-         SELECT bi.nct_id,
-            bi.downcase_mesh_term,
-            rr.tty2,
-            rr.rxcui2 AS approved_drug_rxcui,
-            count(*) AS count
-           FROM ctgov.browse_interventions bi
-             LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
-             LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui
-          WHERE (bi.nct_id::text IN ( SELECT trialncts.nct_id
-                   FROM trialncts)) AND bi.mesh_type::text = 'mesh-list'::text AND rp.propname::text = 'Active_ingredient_name'::text AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text]))
-          GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2
-        )   
- SELECT nct_to_cui.nct_id,
-    ud."USP Category",
-    ud."USP Class"
-   FROM nct_to_cui
-     JOIN "Formularies".usp_dc ud ON ud.rxcui::bpchar = nct_to_cui.approved_drug_rxcui
-  GROUP BY nct_to_cui.nct_id, ud."USP Category", ud."USP Class"
-  ORDER BY nct_to_cui.nct_id;
-
-  
-  
-  
-CREATE MATERIALIZED VIEW "Formularies".nct_to_brands_through_uspdc
-AS
-WITH trialncts AS (
-         SELECT DISTINCT ts.nct_id
-           FROM history.trial_snapshots ts
-)
-SELECT 
-    bi.nct_id,
-    count( distinct rr2.rxcui2 ) as brand_name_count
-    FROM ctgov.browse_interventions bi
-      LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text --match mesh terms to rxcui
-      LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui -- match rxcui to relations between rxcuis
-      LEFT JOIN rxnorm_migrated.rxnorm_relations rr2 ON rr.rxcui2 = rr2.rxcui1  -- match rxcui to relations between rxcuis
-WHERE 
-    (bi.nct_id::text IN (SELECT trialncts.nct_id FROM trialncts)) --check the nct_id is in our list 
-    AND 
-    bi.mesh_type::text = 'mesh-list'::text --we are only looking at mesh "list" rxcuis
-    AND rp.propname::text = 'Active_ingredient_name'::text  --and we only care about active ingredients linked to \/\/\/\/\/
-    AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text])) --and we are linking from active ingredients ^^^^ to branded packs
-    AND (rr2.tty2::text = 'BN') --and from branded packs back to brand names
-GROUP BY bi.nct_id --remove duplicates
-;
-
-
-
-/* 
- * 
- */
-
-
-select 
-    fdqpe.nct_id
-    --,fdqpe.start_date
-    --,fdqpe.current_enrollment
-    --,fdqpe.enrollment_category
-    ,fdqpe.current_status 
-    ,fdqpe.earliest_date_observed 
-    ,fdqpe.elapsed_duration
-    ,fdqpe.n_brands as identical_brands
-    ,ntbtu.brand_name_count 
-    ,fdqpe.category_id
-    ,fdqpe.final_status
-    ,fdqpe.h_sdi_val
-    --,fdqpe.h_sdi_u95
-    --,fdqpe.h_sdi_l95
-    ,fdqpe.hm_sdi_val
-    --,fdqpe.hm_sdi_u95
-    --,fdqpe.hm_sdi_l95
-    ,fdqpe.m_sdi_val
-    --,fdqpe.m_sdi_u95
-    --,fdqpe.m_sdi_l95
-    ,fdqpe.lm_sdi_val
-    --,fdqpe.lm_sdi_u95
-    --,fdqpe.lm_sdi_l95
-    ,fdqpe.l_sdi_val
-    --,fdqpe.l_sdi_u95
-    --,fdqpe.l_sdi_l95
-from formatted_data_mat fdqpe
-    join "Formularies".nct_to_brands_through_uspdc ntbtu
-        on fdqpe.nct_id = ntbtu.nct_id 
-;
-
--example of multiple reopenings
-select * 
-from formatted_data_mat fdm 
-where nct_id = 'NCT01239797'
-
--attempt to automatically find transition periods
-with cte1 as (
-	select nct_id, min(earliest_date_observed) over (partition by nct_id) as earliest_closed_enrollment
-	from formatted_data_mat fdm 
-	where current_status = 'Active, not recruiting'
-), cte2 as (
-	select nct_id, max(earliest_date_observed) over (partition by nct_id) latest_open_enrollment
-	from formatted_data_mat fdm 
-	where current_status != 'Active, not recruiting'
-)
-select 
-	cte1.nct_id
-	,cte1.earliest_closed_enrollment
-	,cte2.latest_open_enrollment
-	,cte1.earliest_closed_enrollment - cte2.latest_open_enrollment 
-from cte1
-	join cte2 on cte1.nct_id = cte2.nct_id
-/*group by 
-	cte1.nct_id
-	,cte1.earliest_closed_enrollment
-	,cte2.latest_open_enrollment
-*/
-
-	
-
-/* So ocassionally a study reopens enrollment.
- * If that didn't happen, then I could just find the first enrollment matching X and/or last enrollment matching Y
- * to get the transitions
- * Instead I need to create shifts of statuses between snapshots, and then remove all of those that did not change. 
- * 
- * Better yet, just get the last shift to ANR.
- * */
-
-	
-/* Take each entry and get the status from a lagged snapshot
- * Then select each snapshot moving from previous_state to ANR
- * and filter out everything except the last one.
- * */
-with cte as (
-select 
-	nct_id
-	,lag(current_status, 1) over (partition by nct_id order by earliest_date_observed)  as previous_status
-	,current_status
-	,earliest_date_observed as date_current
-from formatted_data_mat fdm
-), cte2 as (
-select 	
-	nct_id 
-	,previous_status
-	,current_status 
-	,max(date_current) as date_current_max
-from cte
-where 
-	previous_status != current_status 
-	and
-	current_status = 'Active, not recruiting'
-group by 
-	nct_id
-	,previous_status
-	,current_status
-	,date_current
-)
-select * 
-from formatted_data_mat fdm
-	join cte2 
-		on cte2.nct_id = fdm.nct_id 
-		and cte2.date_current_max = fdm.earliest_date_observed 
-; --join back into 
-	
--- a/containers/AACT_Reloader/StartRestoreContainer.sh
+++ b/containers/AACT_Reloader/StartRestoreContainer.sh
@ -1,6 +1,6 @@
 #!/bin/bash

-RESTORE_DUMP_GZ="${1:-aact_db_backup_20250107_133822.sql.gz}"
+RESTORE_DUMP_GZ=2023-09-06_aactdb_with_matches.sql.gz
 POSTGRES_USER=root
 POSTGRES_PASSWORD=root
 POSTGRES_DB=aact_db
@ -25,7 +25,7 @@ sleep 10

 # Function to check if PostgreSQL is ready
 function check_postgres {
-    podman exec -i "${CONTAINER_NAME}" psql -h localhost -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c '\q' > /dev/null 2>&1    
+    podman exec -i "${CONTAINER_NAME}" psql -h "${CONTAINER_NAME}" -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c '\q' > /dev/null 2>&1    
 }

 # Wait for PostgreSQL to be ready
@ -37,6 +37,6 @@ done
 echo "PostgreSQL is ready. Restoring the database..."

 # Decompress the dump file and restore it to the database
-podman exec -i "${CONTAINER_NAME}" sh -c "gunzip -c /backup/${RESTORE_DUMP_GZ} | psql -h localhost -U ${POSTGRES_USER} -d ${POSTGRES_DB}"
+podman exec -i "${POSTGRES_DB}" sh -c "gunzip -c /backup/${RESTORE_DUMP_GZ} | psql -h localhost -U ${POSTGRES_USER} -d ${POSTGRES_DB}"

 echo "Database restoration complete."
--- a/containers/AACT_Reloader/backup/aact_db_backup_20250107_133822.sql.gz
+++ b/containers/AACT_Reloader/backup/aact_db_backup_20250107_133822.sql.gz
--- a/logs.org
+++ b/logs.org
@ -1,140 +0,0 @@
-* Plan/Todo [2025-01-06]
-Goal is to update the main images with more details, i.e. adding means
-etc.
-
- get aact_db back up
- attach it to a "research" network
- restart rocker, attaching it to the same research network.
-
-** NOTES
-
-aact_db-restored-2024-11-27 didn't successfully restore. It is missing
-all the important stuff.
-
-Figured out why the restore was failing. My code to restore had a faulty
-check to see if the DB was up and ready. Fixed that now.
-
-Waiting for restore (manually triggered) to start. Then I should have
-access to the table as needed.
-
-It seems like I'm missing some data within a schema, specifically the
-Formularies and their associated views.
-
-My options are:
-
- search around for documentation or other stuff
- try to rebuild
-
-my suspision is that I forgot to back it up. I think it is probably
-worth looking for. - So I've been looking through my copy of
-ClinicalTrialsDataProcessing, and have not found anything referencing
-it. The formularies data is required for my analysis though. If I
-remember correctly, I manually uploaded the USP datasets in DBeaver,
-then created any views etc.
-
-I think that I'll have to recreate it. This is going to be hard because
-I'm not sure what it did. At least I created mildly informative table
-names.
-
-The tables/views I've identified are: -
-=Formularies.nct_to_brands_through_uspdc=
-
-It looks like I need to - import usp-dc dataset - link those drugs to
-usp data - create a view that links those automatically - back it up. -
-double check the data I get from the request.
-
-The links will be through RXCUIs, and grouped on =USP Class= In effect,
-for a given RXCUI, I want to get the list of RXCUI's which have the same
-USP-DC class, and then be able to link back to brands.
-
-This should have the following links: - RXCUI -> USP-DC category/class
-pair - USP-DC category/class pair -> RXCUIs - RXUCIs -> competitors
-
-Do I want to combine the USP-DC and UPS-MMG datasets? No, there is
-enough difference in them that I don't want to have to handle it that
-way.
-
-I've been working on this in scripts/ConfiguringFormularies.sql
-
-So what I've managed to do so far is export tables, backup the data.
-
-I've got a version that connects trials to brand names, but there may be
-more details to the connection than I thought. I'd like to check if I
-need to filter anything or check if there are other ingredients etc that
-I need to include. */I probably need to write some descriptions of all
-the tables and views to put everything together. An ai would probably be
-helpful in doing this./*
-
-
-** Code snippets
-#+begin_example
-podman run \
- -e POSTGRES_PASSWORD="${POSTGRES_PASSWORD}" \
- -e POSTGRES_USER="${POSTGRES_USER}" \
- -e POSTGRES_DB="${POSTGRES_DB}" \
- --name "${CONTAINER_NAME}" \
- --detach \
- --network research-network \
- --shm-size=512mb \
- --volume ./backup/:/backup/ \
- -p 5432:5432\
- postgres:14-alpine
-#+end_example
-
-#+begin_example
-function check_postgres {
-podman exec -i "${POSTGRES_DB}" psql -h localhost -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c '\q' > /dev/null 2>&1    
-}
-#+end_example
-
-** Notes at end of day
- was reasonably productive in getting stuff unblocked for finishing
-  JMP, which i'll need to do before I leave town next week.
-
-
-** What I've got to do tomorrow
-I've got a version that connects trials to brand names, but there may be
-more details to the connection than I thought. I'd like to check if I
-need to filter anything or check if there are other ingredients etc that
-I need to include. */I probably need to write some descriptions of all
-the tables and views to put everything together. An ai would probably be
-helpful in doing this./* At the end of it all, I should be able to get a
-count of competing drugs per trial.
-
-Once that is done, I can relink aact_db and rocker, then rerun my
-analysis. Then I can adjust the images that I need for my JMP.
-* [2025-01-07 Tue 12:01] notes
-
-  So what I've got to do is 
-
-** DONE  Investigate what compounds are showing up in my current list
-    if that is what I want, then I'll be able proceed with redoing my images
-   if not, then I'll have to work on adjusting the views etc that I have.
-
-   I've looked through it and it seems to correct.
-
-*** [[/mnt/will/large_data/Research_large_data/ClinicalTrialsDataProcessing/Scripts/ConfiguringFormularies.sql][ConfiguringFormularies.sql:81]] [2025-01-07 Tue 13:24] 
-  I've tweaked these three views to make them clearer.
-  I also renamed the view of interest to ="Formularies".nct_to_brand_counts_through_uspdc= to better represent what it does.
-
-
-** DONE Rerun the analysis
-   CLOSED: [2025-01-07 Tue 16:39]
-    So it looks like I'll need to 
-    1. take a backup of aact_db
-    2. restore from backup, putting the container in the research network
-    3. then rerun the analysis.
-
-      Ok, I'm pushing the backup and can get started on restoring from backup.
-      Backup is restoring. As I recall, this takes 40 minutes.
-   
-      had some mild tweaks to match the new results
-   it now runs
-
-** DONE Add more details to images
-   CLOSED: [2025-01-13 Mon 10:26]
-    The details I want to add include:
-
-    - [x] sample sizes for breakdowns
-    - [x] box and whisker plot along the bottom of the large values
-   [[https://claude.ai/chat/0e6b6368-130e-4aa8-aa16-97b6c937bba4]] has details
--- a/other_data/.gitattributes
+++ b/other_data/.gitattributes
@ -1,9 +0,0 @@
-USP[[:space:]]DC/USP_DC_12_2021_RELEASE_1.0.xlsx filter=lfs diff=lfs merge=lfs -text
-USP[[:space:]]DC/usp_dc_pub_2023_release_2.0_updated_final.xlsx filter=lfs diff=lfs merge=lfs -text
-USP[[:space:]]MMG/Final_Report_and_Summary_of_Methodology_and_Approach_v1.1.pdf filter=lfs diff=lfs merge=lfs -text
-USP[[:space:]]MMG/MMG_v8.0_Alignment_File.xlsx filter=lfs diff=lfs merge=lfs -text
-USP[[:space:]]MMG/Summary_of_Changes_between_MMGv7.0_and_MMGv8.0.pdf filter=lfs diff=lfs merge=lfs -text
-USP[[:space:]]MMG/USP_Medicare_Model_Guidelines_v8.0__All_Excel_Spreadsheets_.xlsx filter=lfs diff=lfs merge=lfs -text
-USP[[:space:]]MMG/USP_Medicare_Model_Guidelines_v8.0__Categories_and_Classes_.pdf filter=lfs diff=lfs merge=lfs -text
-USP[[:space:]]MMG/USP_Medicare_Model_Guidelines_v8.0__Showing_changes_from_v7.0_.pdf filter=lfs diff=lfs merge=lfs -text
-USP[[:space:]]MMG/USP_Medicare_Model_Guidelines_v8.0__With_Example_Part_D_Drugs_.pdf filter=lfs diff=lfs merge=lfs -text
--- a/other_data/RandomSample_AACT_reasons_why_stopped/reasons_why_stopped.csv
+++ b/other_data/RandomSample_AACT_reasons_why_stopped/reasons_why_stopped.csv
--- a/other_data/RandomSample_AACT_reasons_why_stopped/reasons_why_stopped.fods
+++ b/other_data/RandomSample_AACT_reasons_why_stopped/reasons_why_stopped.fods
--- a/MMG/Final_Report_and_Summary_of_Methodology_and_Approach_v1.1.pdf
+++ b/MMG/Final_Report_and_Summary_of_Methodology_and_Approach_v1.1.pdf
--- a/MMG/Summary_of_Changes_between_MMGv7.0_and_MMGv8.0.pdf
+++ b/MMG/Summary_of_Changes_between_MMGv7.0_and_MMGv8.0.pdf
--- a/MMG/USP_Medicare_Model_Guidelines_v8.0__Categories_and_Classes_.pdf
+++ b/MMG/USP_Medicare_Model_Guidelines_v8.0__Categories_and_Classes_.pdf
--- a/MMG/USP_Medicare_Model_Guidelines_v8.0__Showing_changes_from_v7.0_.pdf
+++ b/MMG/USP_Medicare_Model_Guidelines_v8.0__Showing_changes_from_v7.0_.pdf
--- a/MMG/USP_Medicare_Model_Guidelines_v8.0__With_Example_Part_D_Drugs_.pdf
+++ b/MMG/USP_Medicare_Model_Guidelines_v8.0__With_Example_Part_D_Drugs_.pdf
--- a/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/init.py
+++ b/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/init.py
--- a/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/db_interface.py
+++ b/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/db_interface.py
--- a/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/login.py
+++ b/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/login.py
--- a/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/model.py
+++ b/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/model.py
--- a/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/templates/base.html
+++ b/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/templates/base.html
--- a/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/templates/validation_index.html
+++ b/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/templates/validation_index.html
--- a/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/templates/validation_of_trial.html
+++ b/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/templates/validation_of_trial.html
--- a/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/validation.py
+++ b/data_mgmt_scripts/Icd10ConditionsMatching/Icd10ConditionsMatching/validation.py
--- a/data_mgmt_scripts/Icd10ConditionsMatching/setup.py
+++ b/data_mgmt_scripts/Icd10ConditionsMatching/setup.py
--- a/data_mgmt_scripts/Icd10ConditionsMatching/start.sh
+++ b/data_mgmt_scripts/Icd10ConditionsMatching/start.sh
--- a/data_mgmt_scripts/db_connection_test.py
+++ b/data_mgmt_scripts/db_connection_test.py
--- a/data_mgmt_scripts/download_tests.py
+++ b/data_mgmt_scripts/download_tests.py
--- a/data_mgmt_scripts/drugtools/init.py
+++ b/data_mgmt_scripts/drugtools/init.py
--- a/data_mgmt_scripts/drugtools/download_and_extract_nsde.py
+++ b/data_mgmt_scripts/drugtools/download_and_extract_nsde.py
--- a/data_mgmt_scripts/drugtools/env_setup.py
+++ b/data_mgmt_scripts/drugtools/env_setup.py
--- a/data_mgmt_scripts/drugtools/historical_nct_downloader.py
+++ b/data_mgmt_scripts/drugtools/historical_nct_downloader.py
--- a/data_mgmt_scripts/drugtools/historical_nct_extractor.py
+++ b/data_mgmt_scripts/drugtools/historical_nct_extractor.py
--- a/data_mgmt_scripts/drugtools/historical_trial_selector.py
+++ b/data_mgmt_scripts/drugtools/historical_trial_selector.py
--- a/data_mgmt_scripts/drugtools/migrate_mysql2pgsql.py
+++ b/data_mgmt_scripts/drugtools/migrate_mysql2pgsql.py
--- a/data_mgmt_scripts/drugtools/selected_trials.sql
+++ b/data_mgmt_scripts/drugtools/selected_trials.sql
--- a/data_mgmt_scripts/import-icd10_to_cause.py
+++ b/data_mgmt_scripts/import-icd10_to_cause.py
--- a/data_mgmt_scripts/rm_data.sh
+++ b/data_mgmt_scripts/rm_data.sh
--- a/data_mgmt_scripts/runall.py
+++ b/data_mgmt_scripts/runall.py
--- a/data_mgmt_scripts/umls_requests.py
+++ b/data_mgmt_scripts/umls_requests.py
				`@ -0,0 +1 @@`
				{"resources":{"Scripts/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/Data_summaries.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/DevelopingLinks.sql":{"default-schema":"public","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"},"Scripts/DiseaseBurdens_create_table.sql":{"default-schema":"public","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"},"Scripts/GlobalBurdensOfDisease2019Codebook.sql":{"default-schema":"DiseaseBurden","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"},"Scripts/GroupingTrials.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/Script.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"Scripts/TablesAndViews_Public.sql":{"default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db","default-schema":"public"},"development_sql/ASSOICATING NCTIDs to NDCs and Marketing dates.sql":{"default-schema":"public","default-datasource":"postgres-jdbc-186c896a347-2a3d946d2dea4df7","default-catalog":"aact_db"}}}