Updated analysis graphs and interpretation.

Also renamed some images to match what they represent. The new .png files are from that. The renamed ones are from running the analysis earlier.
2 years ago · 26b3ce9b17
parent d25f5c2a0e
commit 26b3ce9b17
17 changed files with 1168 additions and 38 deletions
--- a/r-analysis/EffectsOfEnrollmentDelay.qmd
+++ b/r-analysis/EffectsOfEnrollmentDelay.qmd
@ -9,6 +9,7 @@ editor: source
 # Setup

 ```{r}
+library(knitr)
 library(bayesplot)
 available_mcmc(pattern = "_nuts_")
 library(ggplot2)
@ -32,6 +33,7 @@ options(mc.cores = parallel::detectCores())
 ```{r}
 ################ Pull data from database ######################
 library(RPostgreSQL)
+host <- 'aact_db-restored-2025-01-07'

 driver <- dbDriver("PostgreSQL")

@ -42,7 +44,7 @@ con <- dbConnect(
    user='root',
    password='root',
    dbname='aact_db',
-    host='will-office'
+    host=host
    )
 on.exit(dbDisconnect(con))

@ -59,7 +61,7 @@ select
    ,fdqpe.earliest_date_observed 
    ,fdqpe.elapsed_duration
    ,fdqpe.n_brands as identical_brands
-    ,ntbtu.brand_name_count 
+    ,ntbtu.brand_name_counts 
    ,fdqpe.category_id
    ,fdqpe.final_status
    ,fdqpe.h_sdi_val
@ -78,7 +80,7 @@ select
    --,fdqpe.l_sdi_u95
    --,fdqpe.l_sdi_l95
 from formatted_data_with_planned_enrollment fdqpe
-    join \"Formularies\".nct_to_brands_through_uspdc ntbtu
+    join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu
        on fdqpe.nct_id = ntbtu.nct_id 
 order by fdqpe.nct_id, fdqpe.earliest_date_observed 
 ;
@ -101,7 +103,7 @@ con <- dbConnect(
    user='root',
    password='root',
    dbname='aact_db',
-    host='will-office'
+    host=host
    )
 on.exit(dbDisconnect(con))

@ -127,7 +129,7 @@ query <- dbSendQuery(
        ,fdqpe.earliest_date_observed 
        ,fdqpe.elapsed_duration
        ,fdqpe.n_brands as identical_brands
-        ,ntbtu.brand_name_count 
+        ,ntbtu.brand_name_counts 
        ,fdqpe.category_id
        ,fdqpe.final_status
        ,fdqpe.h_sdi_val
@ -146,7 +148,7 @@ query <- dbSendQuery(
        --,fdqpe.l_sdi_u95
        --,fdqpe.l_sdi_l95
    from formatted_data_with_planned_enrollment fdqpe
-        join \"Formularies\".nct_to_brands_through_uspdc ntbtu
+        join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu
            on fdqpe.nct_id = ntbtu.nct_id 
    	join cte 
    		on fdqpe.nct_id = cte.nct_id 
@ -235,7 +237,7 @@ inherited_cols <- c(
    ,"m_sdi_val"
    ,"lm_sdi_val"
    ,"l_sdi_val"
-    ,"status_NYR"
+    ,"status_NYR"# TODO: may need to remove
    ,"status_EBI"
    ,"status_Rec"
    ,"status_ANR"
@ -325,6 +327,7 @@ group_mcmc_areas <- function(
        rename=TRUE,
        filter=NULL
        ) {
+    
    #get all parameter names
    params <- get_parameters(stem,class_list)
    
@ -579,19 +582,35 @@ ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_interv


 ```{r}
+get_category_count <- function(tbl, id) {
+  result <- tbl$n[tbl$category_id == id]
+  if(length(result) == 0) 0 else result
+}

+category_names <- sapply(1:length(beta_list$groups), 
+    function(i) sprintf("ICD-10 #%d: %s (n=%d)", 
+                       i, 
+                       beta_list$groups[i],
+                       get_category_count(category_count, i)))

+```
+
+```{r}
 pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |>
-    pivot_longer(X1:X169)
+    pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168

 #TODO: Fix Category names
 pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
 pddf_ib["category"] <-  sapply(pddf_ib$entry_idx, function(i) df$category_id[i])
-pddf_ib["category_name"] <- sapply(pddf_ib$category, function(i) beta_list$groups[i])
+pddf_ib["category_name"] <- sapply(
+    pddf_ib$category, 
+    function(i) category_names[i]
+    )
+  


 ggplot(pddf_ib, aes(x=value,)) +
-    geom_density(bins=100) +
+    geom_density(adjust=1/5) +
    labs(
        title = "Distribution of predicted differences"
        ,subtitle = "Intervention: Delay close of enrollment"
@ -599,14 +618,15 @@ ggplot(pddf_ib, aes(x=value,)) +
        ,y = "Probability Density"
    ) + 
    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") 
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_generic_intervention_distdiff_styled.png")
+    #todo: add median, mean, 40/60 quantiles as well as 
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_distdiff_styled.png")

 ggplot(pddf_ib, aes(x=value,)) +
-    geom_density(bins=100) +
+    geom_density(adjust=1/5) +
    facet_wrap(
        ~factor(
            category_name, 
-            levels=beta_list$groups
+            levels=category_names
            )
        , labeller = label_wrap_gen(multi_line = TRUE)
        , ncol=4) +
@ -618,57 +638,184 @@ ggplot(pddf_ib, aes(x=value,)) +
    ) + 
    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
    theme(strip.text.x = element_text(size = 8))
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_generic_intervention_distdiff_by_group.png")
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_distdiff_by_group.png")

 ggplot(pddf_ib, aes(x=value,)) +
-    geom_histogram(bins=100) +
+    geom_histogram(bins=300) +
    facet_wrap(
        ~factor(
            category_name, 
-            levels=beta_list$groups
+            levels=category_names
            )
        , labeller = label_wrap_gen(multi_line = TRUE)
-        , ncol=5) +
+        , ncol=4) +
    labs(
        title = "Histogram of predicted differences | By Group"
        ,subtitle = "Intervention: Delay close of enrollment"
        ,x = "Difference in probability due to intervention"
        ,y = "Predicted counts"
    ) + 
-    #xlim(-0.25,0.1) +
    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
    theme(strip.text.x = element_text(size = 8))
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_generic_intervention_histdiff_by_group.png")
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_histdiff_by_group.png")
+```
+
+
+```{r}
+p3 <- ggplot(pddf_ib, aes(x=value,)) +
+    geom_histogram(bins=500) +
+    labs(
+        title = "Distribution of predicted differences"
+        ,subtitle = "Intervention: Delay close of enrollment"
+        ,x = "Difference in probability due to intervention"
+        ,y = "Probability Density"
+        ,caption = "Vertical marks: 5/10/25/50/75/90/95th percentiles. Dot shows mean."
+    ) + 
+    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") 
+
+stats <- list(
+ p5 = quantile(pddf_ib$value, 0.05),
+ p10 = quantile(pddf_ib$value, 0.10),
+ q1 = quantile(pddf_ib$value, 0.25),
+ med = median(pddf_ib$value),
+ mean = mean(pddf_ib$value),
+ q3 = quantile(pddf_ib$value, 0.75),
+ p90 = quantile(pddf_ib$value, 0.90),
+ p95 = quantile(pddf_ib$value, 0.95),
+ max_height = max(ggplot_build(p3)$data[[1]]$count),
+ y_offset = -max(ggplot_build(p3)$data[[1]]$count) * 0.05
+)
+
+p3 + 
+ # Box
+ geom_segment(data = data.frame(
+   x = c(stats$q1, stats$q3, stats$med),
+   xend = c(stats$q1, stats$q3, stats$med),
+   y = rep(stats$y_offset, 3), 
+   yend = rep(stats$y_offset * 2, 3)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ geom_segment(data = data.frame(
+   x = rep(stats$q1, 2),
+   xend = rep(stats$q3, 2),
+   y = c(stats$y_offset, stats$y_offset * 2),
+   yend = c(stats$y_offset, stats$y_offset * 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Inner whiskers (Q1->P10, Q3->P90)
+ geom_segment(data = data.frame(
+   x = c(stats$q1, stats$q3),
+   xend = c(stats$p10, stats$p90),
+   y = rep(stats$y_offset * 1.5, 2),
+   yend = rep(stats$y_offset * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P10/P90
+ geom_segment(data = data.frame(
+   x = c(stats$p10, stats$p90),
+   xend = c(stats$p10, stats$p90),
+   y = stats$y_offset * 1.3,
+   yend = stats$y_offset * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Outer whiskers (P10->P5, P90->P95)
+ geom_segment(data = data.frame(
+   x = c(stats$p10, stats$p90),
+   xend = c(stats$p5, stats$p95),
+   y = rep(stats$y_offset * 1.5, 2),
+   yend = rep(stats$y_offset * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P5/P95
+ geom_segment(data = data.frame(
+   x = c(stats$p5, stats$p95),
+   xend = c(stats$p5, stats$p95),
+   y = stats$y_offset * 1.3,
+   yend = stats$y_offset * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Mean dot
+ geom_point(data = data.frame(
+   x = stats$mean,
+   y = stats$y_offset * 1.5
+ ), aes(x = x, y = y))
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_histdiff_boxplot.png")
+```
+
+```{r}
+ ggplot(pddf_ib, aes(x=value)) +
+    stat_ecdf(geom='step') +
+    labs(
+        title = "Cumulative distribution of predicted differences",
+        subtitle = "Intervention: Delay close of enrollment",
+        x = "Difference in probability of termination due to intervention",
+        y = "Cumulative Proportion"
+    ) 
+
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_cumulative_distdiff.png")
 ```

-Get the probability of increase over probability of a decrease

+Get the % of differences in the spike around zero
 ```{r}
-mean(counterfact_predicted_ib$predicted_difference)
+# get values around and above/below spike
+width <- 0.02
+spike_band_centered_zero <- mean( pddf_ib$value >= -width/2 & pddf_ib$value <= width/2)
+above_spike_band <- mean( pddf_ib$value >= width/2)
+below_spike_band <- mean( pddf_ib$value <= -width/2)
+
+# get mass above and mass below zero
+mass_below_zero <- mean( pddf_ib$value <= 0)
 ```
-Thus adding a Delay close of enrollment increases the probability of termination by 16.72% on average for
-the snapshots investigated.
+Looking at the spike around zero, we find that `r spike_band_centered_zero*100`% 
+of the probability mass is contained within the band from 
+[`r -width*100/2`,`r width*100/2`].
+Additionally, there was `r above_spike_band*100`% of the probability above that 
+-- representing those with a general increase in the probability of termination --
+and `r below_spike_band*100`% of the probability mass below the band
+-- representing a decrease in the probability of termination.
+
+On average, if you keep the trial open instead of closing it,
+`r mass_below_zero`% of trials will see a decrease in the probability of termination,
+but, due to the high increase in probability of termination given termination was increased,
+the mean probability of termination increases by `r stats$mean`.

+```{r}
+# 5%-iles
+
+summary(pddf_ib$value)
+
+# Create your quantiles
+quants <- quantile(pddf_ib$value, probs = seq(0,1,0.05), type=4)
+
+# Convert to a data frame
+quant_df <- data.frame(
+  Percentile = names(quants),
+  Value = quants
+)
+kable(quant_df)
+```

+There seems to be some trials that are highly suceptable to this enrollment delay. Specifically, there were some 

 ```{r}
 n = length(counterfact_predicted_ib$p_predicted_intervention)
-mean(rbinom(n,1,as.vector(counterfact_predicted_ib$p_predicted_intervention)))
-mean(rbinom(n,1,as.vector(counterfact_predicted_ib$p_predicted_default)))
+k = 100
+simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention)))
+simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default)))
+
+simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)/k
 ```

+The simulation above shows that this results in a percentage-point increase of about 
+`r simulated_percentages * 100`.
+



 # Diagnostics

 ```{r}
-#| eval: false
+#| eval: true
 #trace plots
 plot(fit, pars=c("mu"), plotfun="trace")


-for (i in 1:4) {
+for (i in 1:3) {
    print(
        mcmc_rank_overlay(
        fit, 
@ -686,10 +833,10 @@ for (i in 1:4) {
 ```

 ```{r}
-#| eval: false
+#| eval: true
 plot(fit, pars=c("sigma"), plotfun="trace")

-for (i in 1:4) {
+for (i in 1:3) {
    print(
        mcmc_rank_overlay(
        fit, 
@ -707,7 +854,7 @@ for (i in 1:4) {
 ```

 ```{r}
-#| eval: false
+#| eval: true
 #other diagnostics
 logpost <- log_posterior(fit)
 nuts_prmts <- nuts_params(fit)
@ -716,15 +863,15 @@ posterior <- as.array(fit)
 ```

 ```{r}
-#| eval: false
+#| eval: true
 color_scheme_set("darkgray")
 div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4)
 mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05)
 ```

 ```{r}
-#| eval: false
-for (i in 1:4) {
+#| eval: true
+for (i in 1:3) {
    mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]"))
    print(
        mcmc_pairs(
@ -744,14 +891,14 @@ for (i in 1:4) {
 ```

 ```{r}
-#| eval: false
+#| eval: true
 mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05)
 ```

 ```{r}
-#| eval: false
+#| eval: true

-for (i in 1:4) {
+for (i in 1:3) {
    params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]"))
    print(
        mcmc_pairs(
@ -769,9 +916,9 @@ for (i in 1:4) {


 ```{r}
-#| eval: false
+#| eval: true
 for (k in 1:22) {
-for (i in 1:4) {
+for (i in 1:3) {
    params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]"))
    print(
        mcmc_pairs(
--- a/r-analysis/EffectsOfEnrollmentDelay.rmarkdown
+++ b/r-analysis/EffectsOfEnrollmentDelay.rmarkdown
@ -0,0 +1,983 @@
+---
+title: "The Effects of Recruitment status on completion of clinical trials"
+author: "Will King"
+format: html
+editor: source
+---
+
+
+
+
+# Setup
+
+
+
+```{r}
+library(knitr)
+library(bayesplot)
+available_mcmc(pattern = "_nuts_")
+library(ggplot2)
+library(patchwork)
+library(tidyverse)
+library(rstan)
+library(tidyr)
+library(ghibli)
+library(xtable)
+#Resources: https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
+
+#save unchanged models instead of recompiling
+rstan_options(auto_write = TRUE)
+#allow for multithreaded sampling
+options(mc.cores = parallel::detectCores())
+
+#test installation, shouldn't get any errors
+#example(stan_model, package = "rstan", run.dontrun = TRUE)
+```
+
+```{r}
+################ Pull data from database ######################
+library(RPostgreSQL)
+host <- 'aact_db-restored-2025-01-07'
+
+driver <- dbDriver("PostgreSQL")
+
+get_data <- function(driver) {
+
+con <- dbConnect(
+    driver,
+    user='root',
+    password='root',
+    dbname='aact_db',
+    host=host
+    )
+on.exit(dbDisconnect(con))
+
+query <- dbSendQuery(
+    con,
+#    "select * from formatted_data_with_planned_enrollment;"
+"
+select 
+    fdqpe.nct_id
+    --,fdqpe.start_date
+    --,fdqpe.current_enrollment
+    --,fdqpe.enrollment_category
+    ,fdqpe.current_status 
+    ,fdqpe.earliest_date_observed 
+    ,fdqpe.elapsed_duration
+    ,fdqpe.n_brands as identical_brands
+    ,ntbtu.brand_name_counts 
+    ,fdqpe.category_id
+    ,fdqpe.final_status
+    ,fdqpe.h_sdi_val
+    --,fdqpe.h_sdi_u95
+    --,fdqpe.h_sdi_l95
+    ,fdqpe.hm_sdi_val
+    --,fdqpe.hm_sdi_u95
+    --,fdqpe.hm_sdi_l95
+    ,fdqpe.m_sdi_val
+    --,fdqpe.m_sdi_u95
+    --,fdqpe.m_sdi_l95
+    ,fdqpe.lm_sdi_val
+    --,fdqpe.lm_sdi_u95
+    --,fdqpe.lm_sdi_l95
+    ,fdqpe.l_sdi_val
+    --,fdqpe.l_sdi_u95
+    --,fdqpe.l_sdi_l95
+from formatted_data_with_planned_enrollment fdqpe
+    join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu
+        on fdqpe.nct_id = ntbtu.nct_id 
+order by fdqpe.nct_id, fdqpe.earliest_date_observed 
+;
+"
+    )
+df <- fetch(query, n = -1)
+df <- na.omit(df)
+
+query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;")
+n_categories <- fetch(query2, n = -1)
+
+return(list(data=df,ncat=n_categories))
+}
+
+
+get_counterfact_base <- function(driver) {
+
+con <- dbConnect(
+    driver,
+    user='root',
+    password='root',
+    dbname='aact_db',
+    host=host
+    )
+on.exit(dbDisconnect(con))
+
+query <- dbSendQuery(
+    con,
+    "
+    with cte as (
+    --get last recruiting state
+    select fd.nct_id, max(fd.earliest_date_observed),min(fd2.earliest_date_observed) as tmstmp
+    from formatted_data fd 
+    	join formatted_data fd2 
+    	on fd.nct_id=fd2.nct_id and fd.earliest_date_observed < fd2.earliest_date_observed 
+    where fd.current_status = 'Recruiting'
+    	and fd2.current_status = 'Active, not recruiting'
+    group by fd.nct_id 
+    )
+    select 
+        fdqpe.nct_id
+        --,fdqpe.start_date
+        --,fdqpe.current_enrollment
+        --,fdqpe.enrollment_category
+        ,fdqpe.current_status 
+        ,fdqpe.earliest_date_observed 
+        ,fdqpe.elapsed_duration
+        ,fdqpe.n_brands as identical_brands
+        ,ntbtu.brand_name_counts 
+        ,fdqpe.category_id
+        ,fdqpe.final_status
+        ,fdqpe.h_sdi_val
+        --,fdqpe.h_sdi_u95
+        --,fdqpe.h_sdi_l95
+        ,fdqpe.hm_sdi_val
+        --,fdqpe.hm_sdi_u95
+        --,fdqpe.hm_sdi_l95
+        ,fdqpe.m_sdi_val
+        --,fdqpe.m_sdi_u95
+        --,fdqpe.m_sdi_l95
+        ,fdqpe.lm_sdi_val
+        --,fdqpe.lm_sdi_u95
+        --,fdqpe.lm_sdi_l95
+        ,fdqpe.l_sdi_val
+        --,fdqpe.l_sdi_u95
+        --,fdqpe.l_sdi_l95
+    from formatted_data_with_planned_enrollment fdqpe
+        join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu
+            on fdqpe.nct_id = ntbtu.nct_id 
+    	join cte 
+    		on fdqpe.nct_id = cte.nct_id 
+    			and fdqpe.earliest_date_observed = cte.tmstmp
+    order by fdqpe.nct_id, fdqpe.earliest_date_observed 
+    ;
+    "
+    )
+df <- fetch(query, n = -1)
+df <- na.omit(df)
+
+query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;")
+n_categories <- fetch(query2, n = -1)
+
+return(list(data=df,ncat=n_categories))
+}
+
+
+d <- get_data(driver)
+df <- d$data
+n_categories <- d$ncat
+
+cf <- get_counterfact_base(driver)
+df_counterfact_base <- cf$data
+
+
+
+################ Format Data ###########################
+
+data_formatter <- function(df) {
+categories <- df["category_id"]
+
+x <- df["elapsed_duration"]
+x["identical_brands"] <- asinh(df$identical_brands)
+x["brand_name_counts"] <- asinh(df$brand_name_count)
+x["h_sdi_val"] <- asinh(df$h_sdi_val)
+x["hm_sdi_val"] <- asinh(df$hm_sdi_val)
+x["m_sdi_val"] <- asinh(df$m_sdi_val)
+x["lm_sdi_val"] <- asinh(df$lm_sdi_val)
+x["l_sdi_val"] <- asinh(df$l_sdi_val)
+
+
+#Setup fixed effects
+x["status_NYR"] <- ifelse(df["current_status"]=="Not yet recruiting",1,0)
+x["status_EBI"] <- ifelse(df["current_status"]=="Enrolling by invitation",1,0)
+x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0) 
+x["status_ANR"] <- ifelse(df["current_status"]=="Active, not recruiting",1,0)
+
+
+y <- ifelse(df["final_status"]=="Terminated",1,0)
+
+#get category list
+
+
+return(list(x=x,y=y))
+}
+
+train <- data_formatter(df)
+counterfact_base <- data_formatter(df_counterfact_base)
+
+categories <- df$category_id
+
+x <- train$x
+y <- train$y
+
+x_cf_base <- counterfact_base$x
+y_cf_base <- counterfact_base$y
+cf_categories <- df_counterfact_base$category_id
+```
+
+
+
+
+
+# Fit Model
+
+
+
+
+
+
+```{r}
+################################# FIT MODEL #########################################
+inherited_cols <- c(
+    "elapsed_duration"
+    #,"identical_brands"
+    #,"brand_name_counts"
+    ,"h_sdi_val"
+    ,"hm_sdi_val"
+    ,"m_sdi_val"
+    ,"lm_sdi_val"
+    ,"l_sdi_val"
+    ,"status_NYR"# TODO: may need to remove
+    ,"status_EBI"
+    ,"status_Rec"
+    ,"status_ANR"
+)
+```
+
+```{r}
+beta_list <- list(
+        groups = c(
+        `1`="Infections & Parasites",
+        `2`="Neoplasms",
+        `3`="Blood & Immune system",
+        `4`="Endocrine, Nutritional, and Metabolic",
+        `5`="Mental & Behavioral",
+        `6`="Nervous System",
+        `7`="Eye and Adnexa",
+        `8`="Ear and Mastoid",
+        `9`="Circulatory",
+        `10`="Respiratory",
+        `11`="Digestive",
+        `12`="Skin & Subcutaneaous tissue",
+        `13`="Musculoskeletal",
+        `14`="Genitourinary",
+        `15`="Pregancy, Childbirth, & Puerperium",
+        `16`="Perinatal Period",
+        `17`="Congential",
+        `18`="Symptoms, Signs etc.",
+        `19`="Injury etc.",
+        `20`="External Causes",
+        `21`="Contact with Healthcare",
+        `22`="Special Purposes"
+    ),
+    parameters = c(
+        `1`="Elapsed Duration",
+        # brands
+        `2`="asinh(Generic Brands)",
+        `3`="asinh(Competitors USPDC)",
+        # population
+        `4`="asinh(High SDI)",
+        `5`="asinh(High-Medium SDI)",
+        `6`="asinh(Medium SDI)",
+        `7`="asinh(Low-Medium SDI)",
+        `8`="asinh(Low SDI)",
+        #Status
+        `9`="status_NYR",
+        `10`="status_EBI",
+        `11`="status_Rec",
+        `12`="status_ANR"
+    )
+)
+
+get_parameters <- function(stem,class_list) {
+    #get categories and lengths
+    named <- names(class_list)
+    lengths <- sapply(named, (function (x) length(class_list[[x]])))
+    
+    #describe the grid needed
+    iter_list <- sapply(named, (function (x) 1:lengths[x]))
+    
+    #generate the list of parameters
+    pardf <- generate_parameter_df(stem, iter_list)
+    
+    #add columns with appropriate human-readable names
+    for (name in named) {
+        pardf[paste(name,"_hr",sep="")] <- as.factor(
+            sapply(pardf[name], (function (i) class_list[[name]][i]))
+        )
+    }
+     
+    return(pardf)   
+}
+
+generate_parameter_df <- function(stem, iter_list) {
+    grid <- expand.grid(iter_list)
+    grid["param_name"] <- grid %>% unite(x,colnames(grid),sep=",")
+    grid["param_name"] <- paste(stem,"[",grid$param_name,"]",sep="")
+    return(grid)
+}
+
+group_mcmc_areas <- function(
+        stem,# = "beta"
+        class_list,# = beta_list
+        stanfit,# = fit
+        group_id,# = 2
+        rename=TRUE,
+        filter=NULL
+        ) {
+    
+    #get all parameter names
+    params <- get_parameters(stem,class_list)
+    
+    #filter down to parameters of interest
+    params <- filter(params,groups == group_id)
+    #Get dataframe with only the rows of interest
+    filtdata <- as.data.frame(stanfit)[params$param_name]
+    #rename columns
+    if (rename) dimnames(filtdata)[[2]] <- params$parameters_hr
+    #get group name for title
+    group_name <- class_list$groups[group_id]
+    #create area plot with appropriate title
+    p <- mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
+        ggtitle(paste("Parameter distributions for ICD-10 class:",group_name)) +
+        geom_vline(xintercept=0,color="grey",alpha=0.75)
+    
+    d <- pivot_longer(filtdata, everything()) |> 
+        group_by(name) |> 
+        summarize(
+            mean=mean(value)
+            ,q025 = quantile(value,probs = 0.025)
+            ,q975 = quantile(value,probs = 0.975)
+            ,q05 = quantile(value,probs = 0.05)
+            ,q95 = quantile(value,probs = 0.95)
+            )
+    return(list(plot=p,quantiles=d,name=group_name))
+}
+
+parameter_mcmc_areas <- function(
+        stem,# = "beta"
+        class_list,# = beta_list
+        stanfit,# = fit
+        parameter_id,# = 2
+        rename=TRUE
+        ) {
+    #get all parameter names
+    params <- get_parameters(stem,class_list)
+    #filter down to parameters of interest
+    params <- filter(params,parameters == parameter_id)
+    #Get dataframe with only the rows of interest
+    filtdata <- as.data.frame(stanfit)[params$param_name]
+    #rename columns
+    if (rename) dimnames(filtdata)[[2]] <- params$groups_hr
+    #get group name for title
+    parameter_name <- class_list$parameters[parameter_id]
+    #create area plot with appropriate title
+    p <- mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
+        ggtitle(parameter_name,"Parameter Distribution")
+    
+    d <- pivot_longer(filtdata, everything()) |> 
+        group_by(name) |> 
+        summarize(
+            mean=mean(value)
+            ,q025 = quantile(value,probs = 0.025)
+            ,q975 = quantile(value,probs = 0.975)
+            ,q05 = quantile(value,probs = 0.05)
+            ,q95 = quantile(value,probs = 0.95)
+            )
+    return(list(plot=p,quantiles=d,name=parameter_name))
+}
+
+
+```
+
+
+
+Plan: select all snapshots that are the first to have closed enrollment (Rec -> ANR)
+
+
+```{r}
+#delay intervention
+intervention_enrollment <- x_cf_base[c(inherited_cols,"brand_name_counts", "identical_brands")]
+intervention_enrollment["status_ANR"] <- 0
+intervention_enrollment["status_Rec"] <- 1
+```
+
+```{r}
+counterfact_delay <- list(
+    D = ncol(x),#
+    N = nrow(x),
+    L = n_categories$count,
+    y = as.vector(y),
+    ll = as.vector(categories),
+    x = as.matrix(x),
+    mu_mean = 0,
+    mu_stdev = 0.05,
+    sigma_shape = 4,
+    sigma_rate = 20,
+    Nx = nrow(x_cf_base),
+    llx = as.vector(cf_categories),
+    counterfact_x_tilde = as.matrix(intervention_enrollment),
+    counterfact_x = as.matrix(x_cf_base)
+)
+```
+
+```{r}
+fit <- stan(
+    file='Hierarchal_Logistic.stan', 
+    data = counterfact_delay,
+    chains = 4,
+    iter = 5000,
+    seed = 11021585
+    )
+```
+
+
+
+
+
+
+
+
+
+
+## Explore data
+
+
+
+
+```{r}
+#get number of trials and snapshots in each category
+group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, max))
+group_trials_by_category <- as.data.frame(group_trials_by_category)
+
+category_count <- group_trials_by_category |> group_by(category_id) |> count()
+
+```
+
+
+
+
+
+
+
+## Fit Results
+
+
+
+
+```{r}
+################################# ANALYZE #####################################
+print(fit)
+```
+
+
+
+
+
+# Counterfactuals
+
+
+
+```{r}
+generated_ib <- gqs(
+    fit@stanmodel,
+    data=counterfact_delay,
+    draws=as.matrix(fit),
+    seed=11021585
+    )
+```
+
+```{r}
+df_ib_p <- data.frame(
+    p_prior=as.vector(extract(generated_ib, pars="p_prior")$p_prior)
+    ,p_predicted = as.vector(extract(generated_ib, pars="p_predicted")$p_predicted)
+)
+
+df_ib_prior <- data.frame(
+    mu_prior = as.vector(extract(generated_ib, pars="mu_prior")$mu_prior)
+    ,sigma_prior = as.vector(extract(generated_ib, pars="sigma_prior")$sigma_prior)
+)
+
+#p_prior
+ggplot(df_ib_p, aes(x=p_prior)) +
+    geom_density() + 
+    labs(
+        title="Implied Prior Distribution P"
+        ,subtitle=""
+        ,x="Probability Domain 'p'"
+        ,y="Probability Density"
+    )
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/prior_p.png")
+
+#p_posterior
+ggplot(df_ib_p, aes(x=p_predicted)) +
+    geom_density() + 
+    labs(
+        title="Implied Posterior Distribution P"
+        ,subtitle=""
+        ,x="Probability Domain 'p'"
+        ,y="Probability Density"
+    )
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/posterior_p.png")
+
+#mu_prior
+ggplot(df_ib_prior) +
+    geom_density(aes(x=mu_prior)) + 
+    labs(
+        title="Prior - Mu"
+        ,subtitle="same prior for all Mu values"
+        ,x="Mu"
+        ,y="Probability"
+    )
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/prior_mu.png")
+
+#sigma_posterior
+ggplot(df_ib_prior) +
+    geom_density(aes(x=sigma_prior)) + 
+    labs(
+        title="Prior - Sigma"
+        ,subtitle="same prior for all Sigma values"
+        ,x="Sigma"
+        ,y="Probability"
+    )
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/prior_sigma.png")
+```
+
+```{r}
+check_hmc_diagnostics(fit)
+```
+
+
+
+
+
+
+
+### Intervention: Delay close of enrollment
+
+
+
+```{r}
+counterfact_predicted_ib <- data.frame(
+    p_predicted_default = as.vector(extract(generated_ib, pars="p_predicted_default")$p_predicted_default)
+    ,p_predicted_intervention = as.vector(extract(generated_ib, pars="p_predicted_intervention")$p_predicted_intervention)
+    ,predicted_difference = as.vector(extract(generated_ib, pars="predicted_difference")$predicted_difference)
+)
+
+
+ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) +
+    geom_density() + 
+    labs(
+        title="Predicted Distribution of 'p'"
+        ,subtitle="Intervention: None"
+        ,x="Probability Domain 'p'"
+        ,y="Probability Density"
+    )
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_base.png")
+
+ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
+    geom_density() + 
+    labs(
+        title="Predicted Distribution of 'p'"
+        ,subtitle="Intervention: Delay close of enrollment"
+        ,x="Probability Domain 'p'"
+        ,y="Probability Density"
+    )
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_interv.png")
+
+ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
+    geom_density() + 
+    labs(
+        title="Predicted Distribution of differences 'p'"
+        ,subtitle="Intervention: Delay close of enrollment"
+        ,x="Difference in 'p' under treatment"
+        ,y="Probability Density"
+    )
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_distdiff.png")
+```
+
+```{r}
+get_category_count <- function(tbl, id) {
+  result <- tbl$n[tbl$category_id == id]
+  if(length(result) == 0) 0 else result
+}
+
+category_names <- sapply(1:length(beta_list$groups), 
+    function(i) sprintf("ICD-10 #%d: %s (n=%d)", 
+                       i, 
+                       beta_list$groups[i],
+                       get_category_count(category_count, i)))
+
+```
+
+```{r}
+pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |>
+    pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168
+
+#TODO: Fix Category names
+pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
+pddf_ib["category"] <-  sapply(pddf_ib$entry_idx, function(i) df$category_id[i])
+pddf_ib["category_name"] <- sapply(
+    pddf_ib$category, 
+    function(i) category_names[i]
+    )
+  
+
+
+ggplot(pddf_ib, aes(x=value,)) +
+    geom_density(adjust=1/5) +
+    labs(
+        title = "Distribution of predicted differences"
+        ,subtitle = "Intervention: Delay close of enrollment"
+        ,x = "Difference in probability due to intervention"
+        ,y = "Probability Density"
+    ) + 
+    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") 
+    #todo: add median, mean, 40/60 quantiles as well as 
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_distdiff_styled.png")
+
+ggplot(pddf_ib, aes(x=value,)) +
+    geom_density(adjust=1/5) +
+    facet_wrap(
+        ~factor(
+            category_name, 
+            levels=category_names
+            )
+        , labeller = label_wrap_gen(multi_line = TRUE)
+        , ncol=4) +
+    labs(
+        title = "Distribution of predicted differences | By Group"
+        ,subtitle = "Intervention: Delay close of enrollment"
+        ,x = "Difference in probability due to intervention"
+        ,y = "Probability Density"
+    ) + 
+    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+    theme(strip.text.x = element_text(size = 8))
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_distdiff_by_group.png")
+
+ggplot(pddf_ib, aes(x=value,)) +
+    geom_histogram(bins=300) +
+    facet_wrap(
+        ~factor(
+            category_name, 
+            levels=category_names
+            )
+        , labeller = label_wrap_gen(multi_line = TRUE)
+        , ncol=4) +
+    labs(
+        title = "Histogram of predicted differences | By Group"
+        ,subtitle = "Intervention: Delay close of enrollment"
+        ,x = "Difference in probability due to intervention"
+        ,y = "Predicted counts"
+    ) + 
+    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+    theme(strip.text.x = element_text(size = 8))
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_histdiff_by_group.png")
+```
+
+```{r}
+p3 <- ggplot(pddf_ib, aes(x=value,)) +
+    geom_histogram(bins=500) +
+    labs(
+        title = "Distribution of predicted differences"
+        ,subtitle = "Intervention: Delay close of enrollment"
+        ,x = "Difference in probability due to intervention"
+        ,y = "Probability Density"
+        ,caption = "Vertical marks: 5/10/25/50/75/90/95th percentiles. Dot shows mean."
+    ) + 
+    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") 
+
+stats <- list(
+ p5 = quantile(pddf_ib$value, 0.05),
+ p10 = quantile(pddf_ib$value, 0.10),
+ q1 = quantile(pddf_ib$value, 0.25),
+ med = median(pddf_ib$value),
+ mean = mean(pddf_ib$value),
+ q3 = quantile(pddf_ib$value, 0.75),
+ p90 = quantile(pddf_ib$value, 0.90),
+ p95 = quantile(pddf_ib$value, 0.95),
+ max_height = max(ggplot_build(p3)$data[[1]]$count),
+ y_offset = -max(ggplot_build(p3)$data[[1]]$count) * 0.05
+)
+
+p3 + 
+ # Box
+ geom_segment(data = data.frame(
+   x = c(stats$q1, stats$q3, stats$med),
+   xend = c(stats$q1, stats$q3, stats$med),
+   y = rep(stats$y_offset, 3), 
+   yend = rep(stats$y_offset * 2, 3)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ geom_segment(data = data.frame(
+   x = rep(stats$q1, 2),
+   xend = rep(stats$q3, 2),
+   y = c(stats$y_offset, stats$y_offset * 2),
+   yend = c(stats$y_offset, stats$y_offset * 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Inner whiskers (Q1->P10, Q3->P90)
+ geom_segment(data = data.frame(
+   x = c(stats$q1, stats$q3),
+   xend = c(stats$p10, stats$p90),
+   y = rep(stats$y_offset * 1.5, 2),
+   yend = rep(stats$y_offset * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P10/P90
+ geom_segment(data = data.frame(
+   x = c(stats$p10, stats$p90),
+   xend = c(stats$p10, stats$p90),
+   y = stats$y_offset * 1.3,
+   yend = stats$y_offset * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Outer whiskers (P10->P5, P90->P95)
+ geom_segment(data = data.frame(
+   x = c(stats$p10, stats$p90),
+   xend = c(stats$p5, stats$p95),
+   y = rep(stats$y_offset * 1.5, 2),
+   yend = rep(stats$y_offset * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P5/P95
+ geom_segment(data = data.frame(
+   x = c(stats$p5, stats$p95),
+   xend = c(stats$p5, stats$p95),
+   y = stats$y_offset * 1.3,
+   yend = stats$y_offset * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Mean dot
+ geom_point(data = data.frame(
+   x = stats$mean,
+   y = stats$y_offset * 1.5
+ ), aes(x = x, y = y))
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_histdiff_boxplot.png")
+```
+
+```{r}
+ ggplot(pddf_ib, aes(x=value)) +
+    stat_ecdf(geom='step') +
+    labs(
+        title = "Cumulative distribution of predicted differences",
+        subtitle = "Intervention: Delay close of enrollment",
+        x = "Difference in probability of termination due to intervention",
+        y = "Cumulative Proportion"
+    ) 
+
+ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_cumulative_distdiff.png")
+```
+
+
+
+
+Get the % of differences in the spike around zero
+
+
+```{r}
+# get values around and above/below spike
+width <- 0.02
+spike_band_centered_zero <- mean( pddf_ib$value >= -width/2 & pddf_ib$value <= width/2)
+above_spike_band <- mean( pddf_ib$value >= width/2)
+below_spike_band <- mean( pddf_ib$value <= -width/2)
+
+# get mass above and mass below zero
+mass_below_zero <- mean( pddf_ib$value <= 0)
+```
+
+
+Looking at the spike around zero, we find that `r spike_band_centered_zero*100`% 
+of the probability mass is contained within the band from 
+[`r -width*100/2`,`r width*100/2`].
+Additionally, there was `r above_spike_band*100`% of the probability above that 
+-- representing those with a general increase in the probability of termination --
+and `r below_spike_band*100`% of the probability mass below the band
+-- representing a decrease in the probability of termination.
+
+On average, if you keep the trial open instead of closing it,
+`r mass_below_zero`% of trials will see a decrease in the probability of termination,
+but, due to the high increase in probability of termination given termination was increased,
+the mean probability of termination increases by `r stats$mean`.
+
+
+
+```{r}
+# 5%-iles
+
+summary(pddf_ib$value)
+
+# Create your quantiles
+quants <- quantile(pddf_ib$value, probs = seq(0,1,0.05), type=4)
+
+# Convert to a data frame
+quant_df <- data.frame(
+  Percentile = names(quants),
+  Value = quants
+)
+kable(quant_df)
+```
+
+```{r}
+n = length(counterfact_predicted_ib$p_predicted_intervention)
+k = 100
+simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention)))
+simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default)))
+
+simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)/k
+```
+
+
+
+The simulation above shows that this results in a percentage-point increase of about 
+`r simulated_percentages * 100`.
+
+
+
+
+# Diagnostics
+
+
+
+```{r}
+#| eval: true
+#trace plots
+plot(fit, pars=c("mu"), plotfun="trace")
+
+
+for (i in 1:3) {
+    print(
+        mcmc_rank_overlay(
+        fit, 
+        pars=c(
+            paste0("mu[",4*i-3,"]"),
+            paste0("mu[",4*i-2,"]"),
+            paste0("mu[",4*i-1,"]"),
+            paste0("mu[",4*i,"]")
+            ), 
+        n_bins=100
+        )+  legend_move("top") +
+             scale_colour_ghibli_d("KikiMedium")
+    )
+}
+```
+
+```{r}
+#| eval: true
+plot(fit, pars=c("sigma"), plotfun="trace")
+
+for (i in 1:3) {
+    print(
+        mcmc_rank_overlay(
+        fit, 
+        pars=c(
+            paste0("sigma[",4*i-3,"]"),
+            paste0("sigma[",4*i-2,"]"),
+            paste0("sigma[",4*i-1,"]"),
+            paste0("sigma[",4*i,"]")
+            ), 
+        n_bins=100
+        )+  legend_move("top") +
+             scale_colour_ghibli_d("KikiMedium")
+    )
+}
+```
+
+```{r}
+#| eval: true
+#other diagnostics
+logpost <- log_posterior(fit)
+nuts_prmts <- nuts_params(fit)
+posterior <- as.array(fit)
+
+```
+
+```{r}
+#| eval: true
+color_scheme_set("darkgray")
+div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4)
+mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05)
+```
+
+```{r}
+#| eval: true
+for (i in 1:3) {
+    mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]"))
+    print(
+        mcmc_pairs(
+            posterior,
+            np = nuts_prmts,
+            pars=c(
+                mus,
+                "lp__"
+            ),
+            off_diag_args = list(size = 0.75)
+        )
+    )
+}
+
+
+
+```
+
+```{r}
+#| eval: true
+mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05)
+```
+
+```{r}
+#| eval: true
+
+for (i in 1:3) {
+    params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]"))
+    print(
+        mcmc_pairs(
+            posterior,
+            np = nuts_prmts,
+            pars=c(
+                params,
+                "lp__"
+            ),
+            off_diag_args = list(size = 0.75)
+        )
+    )
+}
+```
+
+```{r}
+#| eval: true
+for (k in 1:22) {
+for (i in 1:3) {
+    params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]"))
+    print(
+        mcmc_pairs(
+            posterior,
+            np = nuts_prmts,
+            pars=c(
+                params,
+                "lp__"
+            ),
+            off_diag_args = list(size = 0.75)
+        )
+    )
+}}
+```
+
+
+
+
+
+
+
+
+# TODO
+- [ ] Double check data flow. (Write summary of this in human readable form)
+    - Is it the data we want from the database 
+        - Training
+        - Counterfactual Evaluation
+            - choose a single snapshot per trial.
+    - Is the model in STAN well specified.
+- [ ] work on LOO validation of model
+
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_base.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_base.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_distdiff.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_distdiff.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_interv.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_interv.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_cumulative_distdiff.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_cumulative_distdiff.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_distdiff_by_group.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_distdiff_by_group.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_distdiff_styled.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_distdiff_styled.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_histdiff_boxplot.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_histdiff_boxplot.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_histdiff_by_group.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_histdiff_by_group.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_generic_intervention_distdiff_by_group.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_generic_intervention_distdiff_by_group.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_generic_intervention_distdiff_styled.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_generic_intervention_distdiff_styled.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_generic_intervention_histdiff_by_group.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/p_generic_intervention_histdiff_by_group.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/posterior_p.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/posterior_p.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/prior_mu.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/prior_mu.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/prior_p.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/prior_p.png
--- a/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/prior_sigma.png
+++ b/r-analysis/EffectsOfEnrollmentDelay/Images/DirectEffects/prior_sigma.png