diff --git a/r-analysis/EffectsOfEnrollmentDelay.rmarkdown b/r-analysis/EffectsOfEnrollmentDelay.rmarkdown
index 8b68e04..3927b82 100644
--- a/r-analysis/EffectsOfEnrollmentDelay.rmarkdown
+++ b/r-analysis/EffectsOfEnrollmentDelay.rmarkdown
@@ -12,6 +12,8 @@ editor: source
 
 
 
+
+
 ```{r}
 library(knitr)
 library(bayesplot)
@@ -41,10 +43,19 @@ image_parameters_by_groups <-paste0(image_root,"/betas/parameters_by_group")
 image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups")
 ```
 
+
+
+run on `r .QuartoInlineRender(now(tz='UTC'))`
+
+
+
+
+
 ```{r}
 ################ Pull data from database ######################
 library(RPostgreSQL)
-host <- 'aact_db-restored-2025-01-07'
+#host <-'aact_db-restored-2025-01-07'
+host <- '10.89.0.6'
 
 driver <- dbDriver("PostgreSQL")
 
@@ -441,6 +452,7 @@ counterfact_delay <- list(
 ```
 
 ```{r}
+#| label: Fitting
 fit <- stan(
     file='Hierarchal_Logistic.stan', 
     data = counterfact_delay,
@@ -509,7 +521,7 @@ ggplot(data=df3, aes(x=count, fill=final_status)) +
     geom_histogram(binwidth=1) +
     ggtitle("Histogram of snapshots per trial (matched trials)") +
     xlab("Snapshots per trial")
-ggsave(paste0(image_trial_details,"/HistSnapshots.png"))
+ggsave(paste0(image_trial_details,"/HistSnapshots.png"), create.dir = TRUE)
 
 #Plot duration for terminated vs completed
 df4 <- dbGetQuery(
@@ -532,7 +544,7 @@ ggplot(data=df4, aes(x=duration,fill=overall_status)) +
     ggtitle("Histogram of trial durations") +
     xlab("duration")+
     facet_wrap(~overall_status)
-ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"))
+ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"), create.dir = TRUE)
 
 df5 <- dbGetQuery(
     con,
@@ -563,7 +575,7 @@ ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) +
     ggtitle("Comparison of duration, status, and snapshot_count") +
     xlab("duration") +
     ylab("snapshot count") 
-ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"))
+ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"), create.dir = TRUE)
 
 dbDisconnect(con)
 
@@ -579,7 +591,7 @@ ggplot(data = group_trials_by_category, aes(x=category_id)) +
         ,x="Category ID"
         ,y="Count"
     )
-ggsave(paste0(image_trial_details,"/CategoryCounts.png"))
+ggsave(paste0(image_trial_details,"/CategoryCounts.png"), create.dir = TRUE)
     
 
 
@@ -591,8 +603,20 @@ count_snapshots <- sum(df5$snapshot_count)
 
 
 
-the correlation value is `r cor_dur_count` between duration and snapshot count.
-There are `r count_snapshots` snapshots in total.
+the correlation value is `r .QuartoInlineRender(cor_dur_count)` between duration and snapshot count.
+There are `r .QuartoInlineRender(count_snapshots)` snapshots in total, spread over 
+`r .QuartoInlineRender(length(df5$nct_id))` trials.
+
+The number of categories by trial are
+TODO: add in category names, 
+
+
+```{r}
+group_trials_by_category |> count(category_id)
+```
+
+
+
 
 
 
@@ -615,7 +639,7 @@ for (i in category_count$category_id[category_count$n >= 0]) {
     gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
     ggsave(
         paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png")
-        ,plot=gi$plot
+        ,plot=gi$plot, create.dir = TRUE
         )
     gx <- c(gx,gi)
 
@@ -643,7 +667,7 @@ for (i in c(1,2,3,9,10,11,12)) {
     pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
     ggsave(
         paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png")
-        ,plot=pi$plot
+        ,plot=pi$plot, create.dir = TRUE
         )
     px <- c(px,pi)
 
@@ -710,7 +734,7 @@ ggplot(df_ib_p, aes(x=p_prior)) +
         ,x="Probability Domain 'p'"
         ,y="Probability Density"
     )
-ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"))
+ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"), create.dir = TRUE)
 
 #p_posterior
 ggplot(df_ib_p, aes(x=p_predicted)) +
@@ -721,7 +745,7 @@ ggplot(df_ib_p, aes(x=p_predicted)) +
         ,x="Probability Domain 'p'"
         ,y="Probability Density"
     )
-ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"))
+ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"), create.dir = TRUE)
 
 #mu_prior
 ggplot(df_ib_prior) +
@@ -732,7 +756,7 @@ ggplot(df_ib_prior) +
         ,x="Mu"
         ,y="Probability"
     )
-ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"))
+ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"), create.dir = TRUE)
 
 #sigma_posterior
 ggplot(df_ib_prior) +
@@ -743,7 +767,7 @@ ggplot(df_ib_prior) +
         ,x="Sigma"
         ,y="Probability"
     )
-ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"))
+ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"), create.dir = TRUE)
 ```
 
 
@@ -753,7 +777,7 @@ ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"))
 
 
 
-### Intervention: Delay close of enrollment
+# Intervention: Delay close of enrollment
 
 
 
@@ -773,7 +797,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) +
         ,x="Probability Domain 'p'"
         ,y="Probability Density"
     )
-ggsave(paste0(image_dist_diff_analysis,"p_no_intervention.png"))
+ggsave(paste0(image_dist_diff_analysis,"/p_no_intervention.png"), create.dir = TRUE)
 
 ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
     geom_density() + 
@@ -783,7 +807,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
         ,x="Probability Domain 'p'"
         ,y="Probability Density"
     )
-ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"), create.dir = TRUE)
 
 ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
     geom_density() + 
@@ -793,7 +817,7 @@ ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
         ,x="Difference in 'p' under treatment"
         ,y="Probability Density"
     )
-ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"), create.dir = TRUE)
 ```
 
 ```{r}
@@ -815,7 +839,7 @@ pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predict
     pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168
 
 pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
-pddf_ib["category"] <-  sapply(pddf_ib$entry_idx, function(i) df$category_id[i])
+pddf_ib["category"] <-  sapply(pddf_ib$entry_idx, function(i) counterfact_delay$llx[i])
 pddf_ib["category_name"] <- sapply(
     pddf_ib$category, 
     function(i) category_names[i]
@@ -832,7 +856,7 @@ ggplot(pddf_ib, aes(x=value,)) +
         ,y = "Probability Density"
     ) + 
     geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") 
-ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"), create.dir = TRUE)
 ```
 
 ```{r}
@@ -853,7 +877,7 @@ ggplot(pddf_ib, aes(x=value,)) +
     ) + 
     geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
     theme(strip.text.x = element_text(size = 8))
-ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"), create.dir = TRUE)
 ```
 
 ```{r}
@@ -874,7 +898,7 @@ ggplot(pddf_ib, aes(x=value,)) +
     ) + 
     geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
     theme(strip.text.x = element_text(size = 8))
-ggsave(paste0(image_dist_diff_analysis,"p_delay_intervention_histdiff_by_group.png"))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_by_group.png"), create.dir = TRUE)
 ```
 
 ```{r}
@@ -897,6 +921,7 @@ stats <- list(
  q1 = quantile(pddf_ib$value, 0.25),
  med = median(pddf_ib$value),
  mean = mean(pddf_ib$value),
+ stdev = sd(pddf_ib$value),
  q3 = quantile(pddf_ib$value, 0.75),
  p90 = quantile(pddf_ib$value, 0.90),
  p95 = quantile(pddf_ib$value, 0.95),
@@ -952,7 +977,7 @@ p3 +
    x = stats$mean,
    y = stats$y_offset * 1.5
  ), aes(x = x, y = y))
-ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"), create.dir = TRUE)
 ```
 
 ```{r}
@@ -1011,7 +1036,7 @@ p4 <- ggplot(pddf_ib, aes(x = value)) +
    x = stats$mean,
    y = stats$y_offset_density * 1.5
  ), aes(x = x, y = y))
-ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"), create.dir = TRUE)
 
 p4
 ```
@@ -1026,7 +1051,7 @@ p4
         y = "Cumulative Proportion"
     ) 
 
-ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"), create.dir = TRUE)
 ```
 
 
@@ -1047,18 +1072,18 @@ mass_below_zero <- mean( pddf_ib$value <= 0)
 ```
 
 
-Looking at the spike around zero, we find that `r spike_band_centered_zero*100`% 
+Looking at the spike around zero, we find that `r .QuartoInlineRender(spike_band_centered_zero*100)`% 
 of the probability mass is contained within the band from 
-[`r -width*100/2`,`r width*100/2`].
-Additionally, there was `r above_spike_band*100`% of the probability above that 
+[`r .QuartoInlineRender(-width/2)`,`r .QuartoInlineRender(width/2)`].
+Additionally, there was `r .QuartoInlineRender(above_spike_band*100)`% of the probability above that 
 -- representing those with a general increase in the probability of termination --
-and `r below_spike_band*100`% of the probability mass below the band
+and `r .QuartoInlineRender(below_spike_band*100)`% of the probability mass below the band
 -- representing a decrease in the probability of termination.
 
 On average, if you keep the trial open instead of closing it,
-`r mass_below_zero`% of trials will see a decrease in the probability of termination,
+`r .QuartoInlineRender(mass_below_zero * 100)`% of trials will see a decrease in the probability of termination,
 but, due to the high increase in probability of termination given termination was increased,
-the mean probability of termination increases by `r stats$mean`.
+the mean probability of termination increases by `r .QuartoInlineRender(stats$mean (stats$stdev))`.
 
 
 
@@ -1077,7 +1102,7 @@ quant_df <- data.frame(
 )
 # Convert to LaTeX
 table <- xtable(quant_df,
-    digits = rep(3, ncol(d) + 1),
+    digits = rep(3, ncol(quant_df) + 1),
     floating = FALSE,
     latex.environments = NULL,
     booktabs = TRUE
@@ -1096,7 +1121,7 @@ proportion_increase <- mean(pddf_ib$value >= 0)
 
 
 
-about `r proportion_increase * 100` percent probability increase in the probability of terminations
+about `r .QuartoInlineRender(proportion_increase * 100)` percent probability increase in the probability of terminations
 
 
 
@@ -1106,34 +1131,74 @@ k = 100
 simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention)))
 simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default)))
 
-simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)/k
+simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)
 ```
 
 
 
 The simulation above shows that this results in a percentage-point change in terminations of about 
-`r simulated_percentages * 100`.
+`r .QuartoInlineRender(simulated_percentages * 100)`.
+
+
+
+
+```{r}
+distdiff_by_group <- pddf_ib %>% 
+    group_by(category_name, category) %>%
+    summarize(
+        mean = mean(value, na.rm=TRUE) *100
+        ,"P(p>0)" = mean(value>0) *100
+        ,"5%"    = quantile( value, 0.05, na.rm = TRUE)*100
+        ,"25%"   = quantile( value, 0.25, na.rm = TRUE)*100
+        ,"median" = quantile( value, 0.5, na.rm = TRUE)*100
+        ,"75%"   = quantile( value, 0.75, na.rm = TRUE)*100
+        ,"95%"   = quantile( value, 0.95, na.rm = TRUE)*100
+    ) %>% arrange(category) %>% select(!category)
+    
+distdif_by_group_latex<- xtable(distdiff_by_group,
+    digits = rep(2, ncol(distdiff_by_group) +1),
+    floating = FALSE,
+    align = "llccccccc",
+    latex.environments = "tabularx",
+    booktabs = TRUE
+)
+
+# Write to file
+write_lines(
+    print(distdif_by_group_latex, include.rownames = FALSE),
+    paste0(image_root,"/distdiff_anr_vs_rec_by_group.tex")
+)
+
+print(distdif_by_group_latex)
+distdiff_by_group
+```
+
+
+
+
 
 ## fixed effects distributions
 
 
 
 ```{r}
+#| label: Fixed Effects Distributions
 #Get dataframe with only the rows of interest
 filtdata <- as.data.frame(extract(fit, pars="status_diff"))
 #rename columns
-dimnames(filtdata)[[2]] <- beta_list$groups
+dimnames(filtdata)[[2]] <- sprintf("%02d %s", seq_along(beta_list$groups), beta_list$groups)
 #create area plot with appropriate title
 mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
     ggtitle("Differences in Fixed Effects | By ICD-10 Category", 
             subtitle = "Moving from 'Active, not recruiting' to 'Recruiting'"
         ) +
     geom_vline(xintercept=seq(-0.25,0.5,0.25),color="grey",alpha=0.750)  
-ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png"))
+ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png"), create.dir = TRUE)
     
-d <- pivot_longer(filtdata, everything()) |> 
+fixed_effects_table <- pivot_longer(filtdata, everything()) |> 
     group_by(name) |> 
     summarize(
+        `sample size` = 0,
         mean = mean(value),
         `P(≥0)` = mean(value >= 0),
         `2.5%` = quantile(value, probs = 0.025),
@@ -1145,18 +1210,26 @@ d <- pivot_longer(filtdata, everything()) |>
         `97.5%` = quantile(value, probs = 0.975)
     )
 
+# Convert the indexing to be consecutive by using a temporary vector
+sample_sizes <- rep(0, 22)  # Create vector of zeros
+sample_sizes[category_count$category_id] <- category_count$n
+
+# Now assign the whole vector at once
+fixed_effects_table["sample size"] <- sample_sizes
 # Rename the name column
-names(d)[1] <- "ICD-10 Category"
+names(fixed_effects_table)[1] <- "ICD-10 Category"
 
 # Convert to LaTeX
-table <- xtable(d,
-    digits = rep(3, ncol(d) + 1),
+table <- xtable(fixed_effects_table,
+    digits = rep(2, ncol(fixed_effects_table) + 1),
     floating = FALSE,
-    latex.environments = NULL,
+    align = "lccccccccccc",
+    latex.environments = "tabularx",
     booktabs = TRUE
 )
 
 
+
 # Write to file
 write_lines(
     print(table, include.rownames = FALSE),
@@ -1175,12 +1248,13 @@ write_lines(
 
 
 ```{r}
-#| eval: true
+#| label: diagnostics 1
+#| eval: false
 #trace plots
 image_diagnostics <- paste0(image_root,"/diagnostics")
 
 plot(fit, pars=c("mu"), plotfun="trace")
-ggsave(paste0(image_diagnostics,"/trace_plot_mu.png"))
+ggsave(paste0(image_diagnostics,"/trace_plot_mu.png"), create.dir = TRUE)
 
 
 for (i in 1:3) {
@@ -1199,14 +1273,15 @@ for (i in 1:3) {
     )
     mu_range <- paste0(4*i-3,"-",4*i)
     filename <- paste0(image_diagnostics,"/trace_rank_plot_mu_",mu_range,".png")
-    ggsave(filename)
+    ggsave(filename, create.dir = TRUE)
 }
 ```
 
 ```{r}
-#| eval: true
+#| label: diagnostics 2
+#| eval: false
 plot(fit, pars=c("sigma"), plotfun="trace")
-ggsave(paste0(image_diagnostics,"/traceplot_sigma.png"))
+ggsave(paste0(image_diagnostics,"/traceplot_sigma.png"), create.dir = TRUE)
 
 for (i in 1:3) {
     print(
@@ -1224,12 +1299,13 @@ for (i in 1:3) {
     )
     sigma_range <- paste0(4*i-3,"-",4*i)
     filename <- paste0(image_diagnostics,"/trace_rank_plot_sigma_",sigma_range,".png")
-    ggsave(filename)
+    ggsave(filename, create.dir = TRUE)
 }
 ```
 
 ```{r}
-#| eval: true
+#| label: diagnostics 3
+#| eval: false
 #other diagnostics
 logpost <- log_posterior(fit)
 nuts_prmts <- nuts_params(fit)
@@ -1238,15 +1314,17 @@ posterior <- as.array(fit)
 ```
 
 ```{r}
-#| eval: true
+#| label: diagnostics 4
+#| eval: false
 color_scheme_set("darkgray")
 div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4)
 mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05)
-ggsave(paste0(image_diagnostics,"/parcoord_mu.png"))
+ggsave(paste0(image_diagnostics,"/parcoord_mu.png"), create.dir = TRUE)
 ```
 
 ```{r}
-#| eval: true
+#| label: diagnostics 5
+#| eval: false
 for (i in 1:3) {
     mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]"))
     print(
@@ -1262,7 +1340,7 @@ for (i in 1:3) {
     )
     mu_range <- paste0(4*i-3,"-",4*i)
     filename <- paste0(image_diagnostics,"/correlation_plot_mu_",mu_range,".png")
-    ggsave(filename)
+    ggsave(filename, create.dir = TRUE)
 }
 
 
@@ -1270,13 +1348,15 @@ for (i in 1:3) {
 ```
 
 ```{r}
-#| eval: true
+#| label: diagnostics 6
+#| eval: false
 mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05)
-ggsave(paste0(image_diagnostics,"/parcoord_sigma.png"))
+ggsave(paste0(image_diagnostics,"/parcoord_sigma.png"), create.dir = TRUE)
 ```
 
 ```{r}
-#| eval: true
+#| label: diagnostics 7
+#| eval: false
 
 for (i in 1:3) {
     params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]"))
@@ -1293,12 +1373,13 @@ for (i in 1:3) {
     )
     sigma_range <- paste0(4*i-3,"-",4*i)
     filename <- paste0(image_diagnostics,"/correlation_plot_sigma_",sigma_range,".png")
-    ggsave(filename)
+    ggsave(filename, create.dir = TRUE)
 }
 ```
 
 ```{r}
-#| eval: true
+#| eval: false
+#| label: diagnostics 8
 for (k in 1:22) {
 for (i in 1:3) {
     params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]"))
@@ -1316,7 +1397,7 @@ for (i in 1:3) {
     
     beta_range <- paste0("k_",k,"_i_",4*i-3,"-",4*i)
     filename <- paste0(image_diagnostics,"/correlation_plot_beta_",beta_range,".png")
-    ggsave(filename)
+    ggsave(filename, create.dir = TRUE)
 }}
 ```
 
diff --git a/r-analysis/EoED_Diagnostics.qmd b/r-analysis/EoED_Diagnostics.qmd
new file mode 100644
index 0000000..24435d4
--- /dev/null
+++ b/r-analysis/EoED_Diagnostics.qmd
@@ -0,0 +1,75 @@
+---
+title: "TrialCountExtraction"
+author: "Will"
+format: html
+editor: source
+---
+
+
+```{r}
+#| eval: false
+#| include: true
+
+#Full set
+categories %>% unique() %>% sort() %>% length()
+
+#Evaluation set
+cf_categories %>% unique() %>% sort() %>% length()
+
+```
+
+
+```{r}
+# Pulled from df
+group_trials_by_category %>% group_by(category_id) %>% count()
+```
+
+# Actual data from Evaluation and counterfactual
+```{r}
+# Original Evaluation
+# - Pulled from `categories` above when defined
+counterfact_delay$ll %>% unique() %>% sort() %>% length()
+
+
+# Counterfactual
+# - Pulled from `cf_categories` above when defined
+counterfact_delay$llx %>% unique() %>% sort() %>% length()
+```
+Those came from
+```{r}
+df$category_id %>% unique() %>% sort() %>% length()
+df_counterfact_base$category_id %>% unique() %>% sort() %>% length()
+```
+
+The difference between those is that the counterfactual imposes the constraint
+that there must be a snapshot where it moves from "ANR" to "Rec", implying that
+it can't just terminate.
+
+# Where do the other values drop
+
+When we find the counterfactual, the table looses some of the categories etc.
+Here is the extracted data
+
+```{r}
+data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference)
+```
+
+```{r}
+pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |>
+    pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168
+
+
+pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
+pddf_ib["category"] <-  sapply(pddf_ib$entry_idx, function(i) counterfact_delay$llx[i])
+pddf_ib["category_name"] <- sapply(
+    pddf_ib$category, 
+    function(i) category_names[i]
+    )
+```
+and yet it seems that we predict the difference for all 168 trials
+
+It looks like there is an error where I apply category IDs. Because I'm pulling them from 
+```{r}
+ground_truth <- df$category_id[1:168]
+```
+
diff --git a/r-analysis/Hierarchal_Logistic.rds b/r-analysis/Hierarchal_Logistic.rds
index 0192834..0d90149 100644
Binary files a/r-analysis/Hierarchal_Logistic.rds and b/r-analysis/Hierarchal_Logistic.rds differ
diff --git a/r-analysis/SimplestEED.qmd b/r-analysis/SimplestEED.qmd
new file mode 100644
index 0000000..1c761fc
--- /dev/null
+++ b/r-analysis/SimplestEED.qmd
@@ -0,0 +1,1274 @@
+---
+title: "Simplified - The Effects of Recruitment status on completion of clinical trials"
+author: "Will King"
+format: html
+editor: source
+---
+
+The purpose of this is to help me investigate some of my code. It was not used to perform the analysis.
+
+# Setup
+
+
+
+```{r}
+library(knitr)
+library(bayesplot)
+available_mcmc(pattern = "_nuts_")
+library(ggplot2)
+library(patchwork)
+library(tidyverse)
+library(rstan)
+library(tidyr)
+library(ghibli)
+library(xtable)
+#Resources: https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
+
+#save unchanged models instead of recompiling
+rstan_options(auto_write = TRUE)
+#allow for multithreaded sampling
+options(mc.cores = parallel::detectCores())
+
+#test installation, shouldn't get any errors
+#example(stan_model, package = "rstan", run.dontrun = TRUE)
+
+
+image_root <- "./output/EffectsOfEnrollmentDelay"
+image_dist_diff_analysis <- paste0(image_root,"/dist_diff_analysis")
+image_trial_details <-paste0(image_root,"/trials_details")
+image_parameters_by_groups <-paste0(image_root,"/betas/parameters_by_group")
+image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups")
+```
+
+run on `{r} now(tz='UTC')`
+
+
+
+```{r}
+################ Pull data from database ######################
+library(RPostgreSQL)
+#host <-'aact_db-restored-2025-01-07'
+host <- '10.89.0.6'
+
+driver <- dbDriver("PostgreSQL")
+
+get_data <- function(driver) {
+
+con <- dbConnect(
+    driver,
+    user='root',
+    password='root',
+    dbname='aact_db',
+    host=host
+    )
+on.exit(dbDisconnect(con))
+
+query <- dbSendQuery(
+    con,
+#    "select * from formatted_data_with_planned_enrollment;"
+"
+select 
+    fdqpe.nct_id
+    --,fdqpe.start_date
+    --,fdqpe.current_enrollment
+    --,fdqpe.enrollment_category
+    ,fdqpe.current_status 
+    ,fdqpe.earliest_date_observed 
+    ,fdqpe.elapsed_duration
+    ,fdqpe.n_brands as identical_brands
+    ,ntbtu.brand_name_counts 
+    ,fdqpe.category_id
+    ,fdqpe.final_status
+    ,fdqpe.h_sdi_val
+    --,fdqpe.h_sdi_u95
+    --,fdqpe.h_sdi_l95
+    ,fdqpe.hm_sdi_val
+    --,fdqpe.hm_sdi_u95
+    --,fdqpe.hm_sdi_l95
+    ,fdqpe.m_sdi_val
+    --,fdqpe.m_sdi_u95
+    --,fdqpe.m_sdi_l95
+    ,fdqpe.lm_sdi_val
+    --,fdqpe.lm_sdi_u95
+    --,fdqpe.lm_sdi_l95
+    ,fdqpe.l_sdi_val
+    --,fdqpe.l_sdi_u95
+    --,fdqpe.l_sdi_l95
+from formatted_data_with_planned_enrollment fdqpe
+    join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu
+        on fdqpe.nct_id = ntbtu.nct_id 
+order by fdqpe.nct_id, fdqpe.earliest_date_observed 
+;
+"
+    )
+df <- fetch(query, n = -1)
+df <- na.omit(df)
+
+query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;")
+n_categories <- fetch(query2, n = -1)
+
+return(list(data=df,ncat=n_categories))
+}
+
+
+get_counterfact_base <- function(driver) {
+
+con <- dbConnect(
+    driver,
+    user='root',
+    password='root',
+    dbname='aact_db',
+    host=host
+    )
+on.exit(dbDisconnect(con))
+
+query <- dbSendQuery(
+    con,
+    "
+    with cte as (
+    --get last recruiting state
+    select fd.nct_id, max(fd.earliest_date_observed),min(fd2.earliest_date_observed) as tmstmp
+    from formatted_data fd 
+    	join formatted_data fd2 
+    	on fd.nct_id=fd2.nct_id and fd.earliest_date_observed < fd2.earliest_date_observed 
+    where fd.current_status = 'Recruiting'
+    	and fd2.current_status = 'Active, not recruiting'
+    group by fd.nct_id 
+    )
+    select 
+        fdqpe.nct_id
+        --,fdqpe.start_date
+        --,fdqpe.current_enrollment
+        --,fdqpe.enrollment_category
+        ,fdqpe.current_status 
+        ,fdqpe.earliest_date_observed 
+        ,fdqpe.elapsed_duration
+        ,fdqpe.n_brands as identical_brands
+        ,ntbtu.brand_name_counts 
+        ,fdqpe.category_id
+        ,fdqpe.final_status
+        ,fdqpe.h_sdi_val
+        --,fdqpe.h_sdi_u95
+        --,fdqpe.h_sdi_l95
+        ,fdqpe.hm_sdi_val
+        --,fdqpe.hm_sdi_u95
+        --,fdqpe.hm_sdi_l95
+        ,fdqpe.m_sdi_val
+        --,fdqpe.m_sdi_u95
+        --,fdqpe.m_sdi_l95
+        ,fdqpe.lm_sdi_val
+        --,fdqpe.lm_sdi_u95
+        --,fdqpe.lm_sdi_l95
+        ,fdqpe.l_sdi_val
+        --,fdqpe.l_sdi_u95
+        --,fdqpe.l_sdi_l95
+    from formatted_data_with_planned_enrollment fdqpe
+        join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu
+            on fdqpe.nct_id = ntbtu.nct_id 
+    	join cte 
+    		on fdqpe.nct_id = cte.nct_id 
+    			and fdqpe.earliest_date_observed = cte.tmstmp
+    order by fdqpe.nct_id, fdqpe.earliest_date_observed 
+    ;
+    "
+    )
+df <- fetch(query, n = -1)
+df <- na.omit(df)
+
+query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;")
+n_categories <- fetch(query2, n = -1)
+
+return(list(data=df,ncat=n_categories))
+}
+
+
+d <- get_data(driver)
+df <- d$data
+n_categories <- d$ncat
+
+cf <- get_counterfact_base(driver)
+df_counterfact_base <- cf$data
+```
+
+```{r}
+#number of trials in the base dataset 
+df$nct_id |> unique() %>% length
+#Number of trials in the conterfactual dataset 
+df_counterfact_base$nct_id %>% unique %>% length
+
+```
+
+
+```{r}
+################ Format Data ###########################
+
+data_formatter <- function(df) {
+categories <- df["category_id"]
+
+x <- df["elapsed_duration"]
+x["identical_brands"] <- asinh(df$identical_brands)
+x["brand_name_counts"] <- asinh(df$brand_name_count)
+x["h_sdi_val"] <- asinh(df$h_sdi_val)
+x["hm_sdi_val"] <- asinh(df$hm_sdi_val)
+x["m_sdi_val"] <- asinh(df$m_sdi_val)
+x["lm_sdi_val"] <- asinh(df$lm_sdi_val)
+x["l_sdi_val"] <- asinh(df$l_sdi_val)
+
+
+#Setup fixed effects
+x["status_NYR"] <- ifelse(df["current_status"]=="Not yet recruiting",1,0)
+x["status_EBI"] <- ifelse(df["current_status"]=="Enrolling by invitation",1,0)
+x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0) 
+x["status_ANR"] <- ifelse(df["current_status"]=="Active, not recruiting",1,0)
+
+
+y <- ifelse(df["final_status"]=="Terminated",1,0)
+
+#get category list
+
+
+return(list(x=x,y=y))
+}
+
+train <- data_formatter(df)
+counterfact_base <- data_formatter(df_counterfact_base)
+
+categories <- df$category_id
+
+x <- as.matrix(train$x)
+y <- as.vector(train$y)
+
+x_cf_base <- counterfact_base$x 
+y_cf_base <- counterfact_base$y
+cf_categories <- df_counterfact_base$category_id
+```
+
+note how the base training has `{r} length(y)` snapshots while 
+the counterfactual has `{r} length(y_cf_base)`.
+
+There are `{r} categories |> unique() %>% length` categories 
+respresented in the base dataset and 
+`{r} cf_categories |> unique() %>% length` 
+categories in the counterfactual dataset
+
+```{r}
+length(y) #Snapshots
+length(y_cf_base) # trials
+
+categories |> unique() %>% length #base categories (snapshots dataset)
+cf_categories |> unique() %>% length #counterfactual categories
+```
+
+
+# Explore data
+
+
+```{r}
+#get number of trials and snapshots in each category
+group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, min))
+#SOMETHING GOING ON HERE ^^^^^
+# It looks like it grabs the max category id, i.e. if there are multiple categories assigned, it defaults to the highest
+
+#Trying something
+group_trials_by_category2 <- as.data.frame(aggregate(category_id ~ nct_id, df, max))
+
+for (i in 1:22) {
+    print(group_trials_by_category$category_id[i] - group_trials_by_category2$category_id[i])
+}
+#ok, it looks like the max an min give the same categories. So I don't have to worry about multiple classifications
+```
+
+
+```{r}
+category_count <- group_trials_by_category |> group_by(category_id) |> count()
+category_count
+sum(category_count$n)
+```
+
+Aggregating category counts from df, we get `{r} sum(category_count$n)`
+
+```{r}
+################################# DATA EXPLORATION ############################
+driver <- dbDriver("PostgreSQL")
+
+con <- dbConnect(
+    driver,
+    user='root',
+    password='root',
+    dbname='aact_db',
+    host=host
+    )
+#Plot histogram of count of snapshots
+df3 <- dbGetQuery(
+    con,
+    "select nct_id,final_status,count(*) from formatted_data_with_planned_enrollment fdwpe 
+    group by nct_id,final_status ;"
+    )
+#df3 <- fetch(query3, n = -1)
+
+ggplot(data=df3, aes(x=count, fill=final_status)) + 
+    geom_histogram(binwidth=1) +
+    ggtitle("Histogram of snapshots per trial (matched trials)") +
+    xlab("Snapshots per trial")
+#ggsave(paste0(image_trial_details,"/HistSnapshots.png"), create.dir = TRUE)
+
+#Plot duration for terminated vs completed
+df4 <- dbGetQuery(
+    con,
+    "
+    select 
+        nct_id, 
+        start_date , 
+        primary_completion_date, 
+        overall_status ,
+        primary_completion_date - start_date as duration
+    from ctgov.studies s 
+    where nct_id in (select distinct nct_id from http.download_status ds)
+    ;"
+    )
+#df4 <- fetch(query4, n = -1)
+
+ggplot(data=df4, aes(x=duration,fill=overall_status)) +
+    geom_histogram()+
+    ggtitle("Histogram of trial durations") +
+    xlab("duration")+
+    facet_wrap(~overall_status)
+#ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"), create.dir = TRUE)
+
+df5 <- dbGetQuery(
+    con,
+    "
+    with cte1 as (
+    select 
+        nct_id, 
+        start_date , 
+        primary_completion_date, 
+        overall_status ,
+        primary_completion_date - start_date as duration
+    from ctgov.studies s 
+    where nct_id in (select distinct nct_id from http.download_status ds)
+    ), cte2 as (
+    select nct_id,count(*) as snapshot_count from formatted_data_with_planned_enrollment fdwpe
+    group by nct_id
+    )
+    select a.nct_id, a.overall_status, a.duration,b.snapshot_count
+    from cte1 as a
+        join cte2 as b  
+            on a.nct_id=b.nct_id
+    ;"
+    )
+df5$overall_status <- as.factor(df5$overall_status)
+
+ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) +
+    geom_jitter() +
+    ggtitle("Comparison of duration, status, and snapshot_count") +
+    xlab("duration") +
+    ylab("snapshot count") 
+#ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"), create.dir = TRUE)
+
+dbDisconnect(con)
+
+#get number of trials and snapshots in each category
+group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, max))
+group_trials_by_category <- as.data.frame(group_trials_by_category)
+
+ggplot(data = group_trials_by_category, aes(x=category_id)) +
+    geom_bar(binwidth=1,color="black",fill="lightblue") +
+    scale_x_continuous(breaks=scales::pretty_breaks(n=22)) + 
+    labs(
+        title="bar chart of trial categories"
+        ,x="Category ID"
+        ,y="Count"
+    )
+#ggsave(paste0(image_trial_details,"/CategoryCounts.png"), create.dir = TRUE)
+    
+
+
+summary(df5)
+
+cor_dur_count <- cor(df5$duration,df5$snapshot_count)
+count_snapshots <- sum(df5$snapshot_count)
+```
+
+the correlation value is `{r} cor_dur_count` between duration and snapshot count.
+There are `{r} count_snapshots` snapshots in total, spread over 
+`{r} length(df5%nct_id)` trials.
+
+The number of categories by trial are
+TODO: add in category names, 
+```{r}
+group_trials_by_category |> count(category_id)
+```
+
+### Tracking differences
+OK, all results from df5 are from a different selection process than that of the standard result.
+Turns out that there is a single trial in df5 that isn't in the counterfactual list.
+```{r}
+df5_trials <- df5$nct_id %>% unique
+counterfact_trials <- group_trials_by_category$nct_id %>% unique
+
+length(df5_trials) - length(counterfact_trials)
+setdiff(df5_trials, counterfact_trials)
+```
+
+
+# Fit Model
+
+
+
+
+```{r}
+################################# FIT MODEL #########################################
+inherited_cols <- c(
+    "elapsed_duration"
+    ,"identical_brands"
+    ,"brand_name_counts"
+    ,"h_sdi_val"
+    ,"hm_sdi_val"
+    ,"m_sdi_val"
+    ,"lm_sdi_val"
+    ,"l_sdi_val"
+    ,"status_NYR"
+    ,"status_EBI"
+    ,"status_Rec"
+    ,"status_ANR"
+)
+```
+
+
+
+```{r}
+beta_list <- list(
+        groups = c(
+        `1`="Infections & Parasites",
+        `2`="Neoplasms",
+        `3`="Blood & Immune system",
+        `4`="Endocrine, Nutritional, and Metabolic",
+        `5`="Mental & Behavioral",
+        `6`="Nervous System",
+        `7`="Eye and Adnexa",
+        `8`="Ear and Mastoid",
+        `9`="Circulatory",
+        `10`="Respiratory",
+        `11`="Digestive",
+        `12`="Skin & Subcutaneaous tissue",
+        `13`="Musculoskeletal",
+        `14`="Genitourinary",
+        `15`="Pregancy, Childbirth, & Puerperium",
+        `16`="Perinatal Period",
+        `17`="Congential",
+        `18`="Symptoms, Signs etc.",
+        `19`="Injury etc.",
+        `20`="External Causes",
+        `21`="Contact with Healthcare",
+        `22`="Special Purposes"
+    ),
+    parameters = c(
+        `1`="Elapsed Duration",
+        # brands
+        `2`="asinh(Generic Brands)",
+        `3`="asinh(Competitors USPDC)",
+        # population
+        `4`="asinh(High SDI)",
+        `5`="asinh(High-Medium SDI)",
+        `6`="asinh(Medium SDI)",
+        `7`="asinh(Low-Medium SDI)",
+        `8`="asinh(Low SDI)",
+        #Status
+        `9`="status_NYR",
+        `10`="status_EBI",
+        `11`="status_Rec",
+        `12`="status_ANR"
+    )
+)
+
+get_parameters <- function(stem,class_list) {
+    #get categories and lengths
+    named <- names(class_list)
+    lengths <- sapply(named, (function (x) length(class_list[[x]])))
+    
+    #describe the grid needed
+    iter_list <- sapply(named, (function (x) 1:lengths[x]))
+    
+    #generate the list of parameters
+    pardf <- generate_parameter_df(stem, iter_list)
+    
+    #add columns with appropriate human-readable names
+    for (name in named) {
+        pardf[paste(name,"_hr",sep="")] <- as.factor(
+            sapply(pardf[name], (function (i) class_list[[name]][i]))
+        )
+    }
+     
+    return(pardf)   
+}
+
+generate_parameter_df <- function(stem, iter_list) {
+    grid <- expand.grid(iter_list)
+    grid["param_name"] <- grid %>% unite(x,colnames(grid),sep=",")
+    grid["param_name"] <- paste(stem,"[",grid$param_name,"]",sep="")
+    return(grid)
+}
+
+group_mcmc_areas <- function(
+        stem,# = "beta"
+        class_list,# = beta_list
+        stanfit,# = fit
+        group_id,# = 2
+        rename=TRUE,
+        filter=NULL
+        ) {
+    
+    #get all parameter names
+    params <- get_parameters(stem,class_list)
+    
+    #filter down to parameters of interest
+    params <- filter(params,groups == group_id)
+    #Get dataframe with only the rows of interest
+    filtdata <- as.data.frame(stanfit)[params$param_name]
+    #rename columns
+    if (rename) dimnames(filtdata)[[2]] <- params$parameters_hr
+    #get group name for title
+    group_name <- class_list$groups[group_id]
+    #create area plot with appropriate title
+    p <- mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
+        ggtitle(paste("Parameter distributions for ICD-10 class:",group_name)) +
+        geom_vline(xintercept=seq(-2,2,0.5),color="grey",alpha=0.750)  
+    
+    d <- pivot_longer(filtdata, everything()) |> 
+        group_by(name) |> 
+        summarize(
+            mean=mean(value)
+            ,q025 = quantile(value,probs = 0.025)
+            ,q975 = quantile(value,probs = 0.975)
+            ,q05 = quantile(value,probs = 0.05)
+            ,q95 = quantile(value,probs = 0.95)
+            )
+    return(list(plot=p,quantiles=d,name=group_name))
+}
+
+parameter_mcmc_areas <- function(
+        stem,# = "beta"
+        class_list,# = beta_list
+        stanfit,# = fit
+        parameter_id,# = 2
+        rename=TRUE
+        ) {
+    #get all parameter names
+    params <- get_parameters(stem,class_list)
+    #filter down to parameters of interest
+    params <- filter(params,parameters == parameter_id)
+    #Get dataframe with only the rows of interest
+    filtdata <- as.data.frame(stanfit)[params$param_name]
+    #rename columns
+    if (rename) dimnames(filtdata)[[2]] <- params$groups_hr
+    #get group name for title
+    parameter_name <- class_list$parameters[parameter_id]
+    #create area plot with appropriate title
+    p <- mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
+        ggtitle(parameter_name,"Parameter Distribution") +
+        geom_vline(xintercept=seq(-2,2,0.5),color="grey",alpha=0.750)  
+    
+    d <- pivot_longer(filtdata, everything()) |> 
+        group_by(name) |> 
+        summarize(
+            mean=mean(value)
+            ,q025 = quantile(value,probs = 0.025)
+            ,q975 = quantile(value,probs = 0.975)
+            ,q05 = quantile(value,probs = 0.05)
+            ,q95 = quantile(value,probs = 0.95)
+            )
+    return(list(plot=p,quantiles=d,name=parameter_name))
+}
+
+
+```
+
+Plan: select all snapshots that are the first to have closed enrollment (Rec -> ANR)
+```{r}
+#delay intervention
+intervention_enrollment <- x_cf_base[c(inherited_cols)]
+intervention_enrollment["status_ANR"] <- 0
+intervention_enrollment["status_Rec"] <- 1
+```
+
+There are `{r} length(intervention_enrollment$identical_brands)` trials in the intervention enrollment dataset
+
+```{r}
+
+counterfact_delay <- list(
+    D = ncol(x),
+    N = nrow(x),
+    L = n_categories$count,
+    y = y,
+    ll = as.vector(categories),
+    x = x,
+    mu_mean = 0,
+    mu_stdev = 0.05,
+    sigma_location = -2.1,
+    sigma_scale = 0.2,
+    Nx = nrow(x_cf_base),
+    llx = as.vector(cf_categories),
+    counterfact_x_tilde = as.matrix(intervention_enrollment),
+    counterfact_x = as.matrix(x_cf_base),
+    status_indexes = c(11,12) #subtract anr from recruiting to get movement from anr to recruiting
+
+)
+```
+
+
+Ok, so I cut out the estimation because there is some problem of some sort. And I'm not really working on that right now.
+
+TODO: Next step: get this working, then figure out how many trials I analyzed, and if they match the DF5 trials list
+```{r}
+#| label: Fitting
+#| eval: false
+#| include: true
+fit <- stan(
+    file='Hierarchal_Logistic.stan', 
+    data = counterfact_delay,
+    chains = 8,
+    iter = 12000,
+    warmup = 4000,
+    seed = 11021585
+    )
+```
+
+
+## Fit Results
+```{r}
+print(check_hmc_diagnostics(fit))
+print(get_bfmi(fit))
+```
+
+
+```{r}
+print(fit)
+```
+
+
+
+
+
+
+
+
+# Parameter Distributions
+
+
+```{r}
+#| eval: false
+#| include: true
+#g1 <- group_mcmc_areas("beta",beta_list,fit,1)
+
+
+gx <- c()
+
+#grab parameters for every category with more than 8 observations
+for (i in category_count$category_id[category_count$n >= 0]) {
+    print(i)
+    
+    #Print parameter distributions
+    gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
+    #ggsave(
+        paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png")
+        ,plot=gi$plot, create.dir = TRUE
+        )
+    gx <- c(gx,gi)
+
+    #Get Quantiles and means for parameters
+    table <- xtable(gi$quantiles,
+      floating=FALSE
+      ,latex.environments = NULL
+      ,booktabs = TRUE
+      ,zap=getOption("digits")
+      )
+    write_lines(
+        table,
+        paste0(image_parameters_by_groups,"/group_table_",i,"_",gi$name,".tex")
+    )
+}
+```
+
+
+
+```{r}
+#| eval: false
+#| include: true
+px <- c()
+
+
+for (i in c(1,2,3,9,10,11,12)) {
+    
+    #Print parameter distributions
+    pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
+    #ggsave(
+        paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png")
+        ,plot=pi$plot, create.dir = TRUE
+        )
+    px <- c(px,pi)
+
+    #Get Quantiles and means for parameters
+    table <- xtable(pi$quantiles,
+      floating=FALSE
+      ,latex.environments = NULL
+      ,booktabs = TRUE
+      ,zap=getOption("digits")
+      )
+    write_lines(
+        table,
+        paste0(image_parameters_across_groups,"/parameters_tables_",i,"_",pi$name,".tex")
+    )
+    
+}
+```
+
+Note these have 95% outer CI and 80% inner (shaded)
+
+
+
+
+
+
+
+# Priors
+```{r}
+#| eval: false
+#| include: true
+df_ib_p <- data.frame(
+    p_prior=as.vector(extract(generated_ib, pars="p_prior")$p_prior)
+    ,p_predicted = as.vector(extract(generated_ib, pars="p_predicted")$p_predicted)
+)
+
+df_ib_prior <- data.frame(
+    mu_prior = as.vector(extract(generated_ib, pars="mu_prior")$mu_prior)
+    ,sigma_prior = as.vector(extract(generated_ib, pars="sigma_prior")$sigma_prior)
+)
+```
+
+
+```{r}
+#| eval: false
+#| include: true
+#p_prior
+ggplot(df_ib_p, aes(x=p_prior)) +
+    geom_density() + 
+    labs(
+        title="Implied Prior Distribution P"
+        ,subtitle=""
+        ,x="Probability Domain 'p'"
+        ,y="Probability Density"
+    )
+#ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"), create.dir = TRUE)
+
+#p_posterior
+ggplot(df_ib_p, aes(x=p_predicted)) +
+    geom_density() + 
+    labs(
+        title="Implied Posterior Distribution P"
+        ,subtitle=""
+        ,x="Probability Domain 'p'"
+        ,y="Probability Density"
+    )
+#ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"), create.dir = TRUE)
+
+#mu_prior
+ggplot(df_ib_prior) +
+    geom_density(aes(x=mu_prior)) + 
+    labs(
+        title="Prior - Mu"
+        ,subtitle="same prior for all Mu values"
+        ,x="Mu"
+        ,y="Probability"
+    )
+#ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"), create.dir = TRUE)
+
+#sigma_posterior
+ggplot(df_ib_prior) +
+    geom_density(aes(x=sigma_prior)) + 
+    labs(
+        title="Prior - Sigma"
+        ,subtitle="same prior for all Sigma values"
+        ,x="Sigma"
+        ,y="Probability"
+    )
+#ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"), create.dir = TRUE)
+```
+
+
+
+
+
+
+# Counterfactual: Delay close of enrollment
+```{r}
+generated_ib <- gqs(
+    fit@stanmodel,
+    data=counterfact_delay,
+    draws=as.matrix(fit),
+    seed=11021585
+    )
+```
+
+```{r}
+counterfact_predicted_ib <- data.frame(
+    p_predicted_default = as.vector(extract(generated_ib, pars="p_predicted_default")$p_predicted_default)
+    ,p_predicted_intervention = as.vector(extract(generated_ib, pars="p_predicted_intervention")$p_predicted_intervention)
+    ,predicted_difference = as.vector(extract(generated_ib, pars="predicted_difference")$predicted_difference)
+)
+```
+
+
+```{r}
+#| eval: false
+#| include: true
+ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) +
+    geom_density() + 
+    labs(
+        title="Predicted Distribution of 'p'"
+        ,subtitle="Intervention: None"
+        ,x="Probability Domain 'p'"
+        ,y="Probability Density"
+    )
+##ggsave(paste0(image_dist_diff_analysis,"/p_no_intervention.png"), create.dir = TRUE)
+
+ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
+    geom_density() + 
+    labs(
+        title="Predicted Distribution of 'p'"
+        ,subtitle="Intervention: Delay close of enrollment"
+        ,x="Probability Domain 'p'"
+        ,y="Probability Density"
+    )
+##ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"), create.dir = TRUE)
+
+ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
+    geom_density() + 
+    labs(
+        title="Predicted Distribution of differences 'p'"
+        ,subtitle="Intervention: Delay close of enrollment"
+        ,x="Difference in 'p' under delay"
+        ,y="Probability Density"
+    )
+##ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"), create.dir = TRUE)
+```
+
+
+```{r}
+get_category_count <- function(tbl, id) {
+  result <- tbl$n[tbl$category_id == id]
+  if(length(result) == 0) 0 else result
+}
+
+category_names <- sapply(1:length(beta_list$groups), 
+    function(i) sprintf("ICD-10 #%d: %s (n=%d)", 
+                       i, 
+                       beta_list$groups[i],
+                       get_category_count(category_count, i)))
+
+```
+
+```{r}
+pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |>
+    pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168
+
+pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
+pddf_ib["category"] <-  sapply(pddf_ib$entry_idx, function(i) df$category_id[i])
+pddf_ib["category_name"] <- sapply(
+    pddf_ib$category, 
+    function(i) category_names[i]
+    )
+```
+
+
+```{r}
+# Compare group counts
+counterfact_delay$ll %>% unique() %>% sort()
+counterfact_delay$llx %>% unique() %>% sort()
+df$category_id %>% unique() %>% sort()
+```
+
+
+```{r}
+ggplot(pddf_ib, aes(x=value,)) +
+    geom_density(adjust=1/5) +
+    labs(
+        title = "Distribution of predicted differences"
+        ,subtitle = "Intervention: Delay close of enrollment"
+        ,x = "Difference in probability due to intervention"
+        ,y = "Probability Density"
+    ) + 
+    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") 
+#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"), create.dir = TRUE)
+```
+
+
+```{r}
+ggplot(pddf_ib, aes(x=value,)) +
+    geom_density(adjust=1/5) +
+    facet_wrap(
+        ~factor(
+            category_name, 
+            levels=category_names
+            )
+        , labeller = label_wrap_gen(multi_line = TRUE)
+        , ncol=4) +
+    labs(
+        title = "Distribution of predicted differences | By Group"
+        ,subtitle = "Intervention: Delay close of enrollment"
+        ,x = "Difference in probability due to intervention"
+        ,y = "Probability Density"
+    ) + 
+    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+    theme(strip.text.x = element_text(size = 8))
+#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"), create.dir = TRUE)
+```
+
+
+```{r}
+ggplot(pddf_ib, aes(x=value,)) +
+    geom_histogram(bins=300) +
+    facet_wrap(
+        ~factor(
+            category_name, 
+            levels=category_names
+            )
+        , labeller = label_wrap_gen(multi_line = TRUE)
+        , ncol=4) +
+    labs(
+        title = "Histogram of predicted differences | By Group"
+        ,subtitle = "Intervention: Delay close of enrollment"
+        ,x = "Difference in probability due to intervention"
+        ,y = "Predicted counts"
+    ) + 
+    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+    theme(strip.text.x = element_text(size = 8))
+#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_by_group.png"), create.dir = TRUE)
+```
+
+
+```{r}
+p3 <- ggplot(pddf_ib, aes(x=value,)) +
+    geom_histogram(bins=500) +
+    labs(
+        title = "Distribution of predicted differences"
+        ,subtitle = "Intervention: Delay close of enrollment"
+        ,x = "Difference in probability due to intervention"
+        ,y = "Probability Density"
+        ,caption = "Vertical marks: 5/10/25/50/75/90/95th percentiles. Dot shows mean."
+    ) + 
+    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") 
+
+density_hight <- max(density(pddf_ib$value)$y)
+
+stats <- list(
+ p5 = quantile(pddf_ib$value, 0.05),
+ p10 = quantile(pddf_ib$value, 0.10),
+ q1 = quantile(pddf_ib$value, 0.25),
+ med = median(pddf_ib$value),
+ mean = mean(pddf_ib$value),
+ stdev = sd(pddf_ib$value),
+ q3 = quantile(pddf_ib$value, 0.75),
+ p90 = quantile(pddf_ib$value, 0.90),
+ p95 = quantile(pddf_ib$value, 0.95),
+ max_height = max(ggplot_build(p3)$data[[1]]$count),
+ y_offset = -max(ggplot_build(p3)$data[[1]]$count) * 0.05,
+ y_offset_density = -density_hight * 0.15
+)
+
+p3 + 
+ # Box
+ geom_segment(data = data.frame(
+   x = c(stats$q1, stats$q3, stats$med),
+   xend = c(stats$q1, stats$q3, stats$med),
+   y = rep(stats$y_offset, 3), 
+   yend = rep(stats$y_offset * 2, 3)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ geom_segment(data = data.frame(
+   x = rep(stats$q1, 2),
+   xend = rep(stats$q3, 2),
+   y = c(stats$y_offset, stats$y_offset * 2),
+   yend = c(stats$y_offset, stats$y_offset * 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Inner whiskers (Q1->P10, Q3->P90)
+ geom_segment(data = data.frame(
+   x = c(stats$q1, stats$q3),
+   xend = c(stats$p10, stats$p90),
+   y = rep(stats$y_offset * 1.5, 2),
+   yend = rep(stats$y_offset * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P10/P90
+ geom_segment(data = data.frame(
+   x = c(stats$p10, stats$p90),
+   xend = c(stats$p10, stats$p90),
+   y = stats$y_offset * 1.3,
+   yend = stats$y_offset * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Outer whiskers (P10->P5, P90->P95)
+ geom_segment(data = data.frame(
+   x = c(stats$p10, stats$p90),
+   xend = c(stats$p5, stats$p95),
+   y = rep(stats$y_offset * 1.5, 2),
+   yend = rep(stats$y_offset * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P5/P95
+ geom_segment(data = data.frame(
+   x = c(stats$p5, stats$p95),
+   xend = c(stats$p5, stats$p95),
+   y = stats$y_offset * 1.3,
+   yend = stats$y_offset * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Mean dot
+ geom_point(data = data.frame(
+   x = stats$mean,
+   y = stats$y_offset * 1.5
+ ), aes(x = x, y = y))
+#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"), create.dir = TRUE)
+```
+
+
+```{r}
+p4 <- ggplot(pddf_ib, aes(x = value)) +
+  geom_density() +
+    labs(
+        title = "Distribution of predicted differences"
+        ,subtitle = "Intervention: Delay close of enrollment"
+        ,x = "Difference in probability due to intervention"
+        ,y = "Probability Density"
+        ,caption = "Vertical marks: 5/10/25/50/75/90/95th percentiles. Dot shows mean."
+    ) + 
+    geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+ geom_segment(data = data.frame(
+   x = c(stats$q1, stats$q3, stats$med),
+   xend = c(stats$q1, stats$q3, stats$med),
+   y = rep(stats$y_offset_density, 3), 
+   yend = rep(stats$y_offset_density * 2, 3)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ geom_segment(data = data.frame(
+   x = rep(stats$q1, 2),
+   xend = rep(stats$q3, 2),
+   y = c(stats$y_offset_density, stats$y_offset_density * 2),
+   yend = c(stats$y_offset_density, stats$y_offset_density * 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Inner whiskers (Q1->P10, Q3->P90)
+ geom_segment(data = data.frame(
+   x = c(stats$q1, stats$q3),
+   xend = c(stats$p10, stats$p90),
+   y = rep(stats$y_offset_density * 1.5, 2),
+   yend = rep(stats$y_offset_density * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P10/P90
+ geom_segment(data = data.frame(
+   x = c(stats$p10, stats$p90),
+   xend = c(stats$p10, stats$p90),
+   y = stats$y_offset_density * 1.3,
+   yend = stats$y_offset_density * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Outer whiskers (P10->P5, P90->P95)
+ geom_segment(data = data.frame(
+   x = c(stats$p10, stats$p90),
+   xend = c(stats$p5, stats$p95),
+   y = rep(stats$y_offset_density * 1.5, 2),
+   yend = rep(stats$y_offset_density * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P5/P95
+ geom_segment(data = data.frame(
+   x = c(stats$p5, stats$p95),
+   xend = c(stats$p5, stats$p95),
+   y = stats$y_offset_density * 1.3,
+   yend = stats$y_offset_density * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Mean dot
+ geom_point(data = data.frame(
+   x = stats$mean,
+   y = stats$y_offset_density * 1.5
+ ), aes(x = x, y = y))
+#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"), create.dir = TRUE)
+
+p4
+```
+
+```{r}
+#| eval: false
+#| include: true
+ ggplot(pddf_ib, aes(x=value)) +
+    stat_ecdf(geom='step') +
+    labs(
+        title = "Cumulative distribution of predicted differences",
+        subtitle = "Intervention: Delay close of enrollment",
+        x = "Difference in probability of termination due to intervention",
+        y = "Cumulative Proportion"
+    ) 
+
+#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"), create.dir = TRUE)
+```
+
+
+Get the % of differences in the spike around zero
+```{r}
+# get values around and above/below spike
+width <- 0.02
+spike_band_centered_zero <- mean( pddf_ib$value >= -width/2 & pddf_ib$value <= width/2)
+above_spike_band <- mean( pddf_ib$value >= width/2)
+below_spike_band <- mean( pddf_ib$value <= -width/2)
+
+# get mass above and mass below zero
+mass_below_zero <- mean( pddf_ib$value <= 0)
+```
+Looking at the spike around zero, we find that `{r} spike_band_centered_zero*100`% 
+of the probability mass is contained within the band from 
+[`{r} -width/2`,`{r} width/2`].
+Additionally, there was `{r} above_spike_band*100`% of the probability above that 
+-- representing those with a general increase in the probability of termination --
+and `{r} below_spike_band*100`% of the probability mass below the band
+-- representing a decrease in the probability of termination.
+
+On average, if you keep the trial open instead of closing it,
+`{r} mass_below_zero * 100`% of trials will see a decrease in the probability of termination,
+but, due to the high increase in probability of termination given termination was increased,
+the mean probability of termination increases by `{r} stats$mean (stats$stdev)`.
+
+```{r}
+# 5%-iles
+
+summary(pddf_ib$value)
+
+# Create your quantiles
+quants <- quantile(pddf_ib$value, probs = seq(0,1,0.05), type=4)
+
+# Convert to a data frame
+quant_df <- data.frame(
+  #Percentile = names(quants),
+  Value = quants
+)
+# Convert to LaTeX
+table <- xtable(quant_df,
+    digits = rep(3, ncol(quant_df) + 1),
+    floating = FALSE,
+    latex.environments = NULL,
+    booktabs = TRUE
+)
+
+# Write to file
+write_lines(
+    print(table, include.rownames = FALSE),
+    paste0(image_root,"/distdiff_5percentile_table.tex")
+)
+
+kable(quant_df)
+
+proportion_increase <- mean(pddf_ib$value >= 0)
+```
+
+about `{r} proportion_increase * 100` percent probability increase in the probability of terminations
+
+```{r}
+n = length(counterfact_predicted_ib$p_predicted_intervention)
+k = 100
+simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention)))
+simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default)))
+
+simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)
+```
+
+The simulation above shows that this results in a percentage-point change in terminations of about 
+`{r} simulated_percentages * 100`.
+
+
+```{r}
+distdiff_by_group <- pddf_ib %>% 
+    group_by(category_name, category) %>%
+    summarize(
+        mean = mean(value, na.rm=TRUE) *100
+        ,"P(p>0)" = mean(value>0) *100
+        ,"5%"    = quantile( value, 0.05, na.rm = TRUE)*100
+        ,"25%"   = quantile( value, 0.25, na.rm = TRUE)*100
+        ,"median" = quantile( value, 0.5, na.rm = TRUE)*100
+        ,"75%"   = quantile( value, 0.75, na.rm = TRUE)*100
+        ,"95%"   = quantile( value, 0.95, na.rm = TRUE)*100
+    ) %>% arrange(category) %>% select(!category)
+    
+distdif_by_group_latex<- xtable(distdiff_by_group,
+    digits = rep(2, ncol(distdiff_by_group) +1),
+    floating = FALSE,
+    align = "llccccccc",
+    latex.environments = "tabularx",
+    booktabs = TRUE
+)
+
+
+
+print(distdif_by_group_latex)
+distdiff_by_group
+```
+
+
+
+## fixed effects distributions
+
+```{r}
+#| eval: false
+#| include: true
+#Get dataframe with only the rows of interest
+filtdata <- as.data.frame(extract(fit, pars="status_diff"))
+#rename columns
+dimnames(filtdata)[[2]] <- sprintf("%02d %s", seq_along(beta_list$groups), beta_list$groups)
+#create area plot with appropriate title
+mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
+    ggtitle("Differences in Fixed Effects | By ICD-10 Category", 
+            subtitle = "Moving from 'Active, not recruiting' to 'Recruiting'"
+        ) +
+    geom_vline(xintercept=seq(-0.25,0.5,0.25),color="grey",alpha=0.750)  
+#ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png"), create.dir = TRUE)
+    
+fixed_effects_table <- pivot_longer(filtdata, everything()) |> 
+    group_by(name) |> 
+    summarize(
+        `sample size` = 0,
+        mean = mean(value),
+        `P(≥0)` = mean(value >= 0),
+        `2.5%` = quantile(value, probs = 0.025),
+        `5%` = quantile(value, probs = 0.05),
+        `25%` = quantile(value, probs = 0.25),
+        `50% median` = quantile(value, probs = 0.5),
+        `75%` = quantile(value, probs = 0.75),
+        `95%` = quantile(value, probs = 0.95),
+        `97.5%` = quantile(value, probs = 0.975)
+    )
+
+# Convert the indexing to be consecutive by using a temporary vector
+sample_sizes <- rep(0, 22)  # Create vector of zeros
+sample_sizes[category_count$category_id] <- category_count$n
+
+# Now assign the whole vector at once
+fixed_effects_table["sample size"] <- sample_sizes
+# Rename the name column
+names(fixed_effects_table)[1] <- "ICD-10 Category"
+
+# Convert to LaTeX
+table <- xtable(fixed_effects_table,
+    digits = rep(2, ncol(fixed_effects_table) + 1),
+    floating = FALSE,
+    align = "lccccccccccc",
+    latex.environments = "tabularx",
+    booktabs = TRUE
+)
+
+
+
+# Write to file
+write_lines(
+    print(table, include.rownames = FALSE),
+    paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.tex")
+)
+
+
+
+```
+
+
+
+
+
+
+  
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.png
deleted file mode 100644
index e2b0f98..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.tex b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.tex
deleted file mode 100644
index 8c4fb84..0000000
--- a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.tex
+++ /dev/null
@@ -1,34 +0,0 @@
-% latex table generated in R 4.4.2 by xtable 1.8-4 package
-% Mon Feb  3 23:56:27 2025
-\begin{table}[ht]
-\centering
-\begin{tabular}{ccccccccccc}
-  \hline
-ICD-10 Category & sample size & mean & P(≥0) & 2.5\% & 5\% & 25\% & 50\% median & 75\% & 95\% & 97.5\% \\ 
-  \hline
-01 Infections \& Parasites & 20.000 & 0.135 & 0.772 & -0.223 & -0.163 & 0.013 & 0.134 & 0.257 & 0.438 & 0.498 \\ 
-  02 Neoplasms & 49.000 & 0.211 & 0.902 & -0.107 & -0.056 & 0.099 & 0.208 & 0.320 & 0.489 & 0.546 \\ 
-  03 Blood \& Immune system & 1.000 & 0.061 & 0.624 & -0.333 & -0.267 & -0.071 & 0.061 & 0.194 & 0.388 & 0.454 \\ 
-  04 Endocrine, Nutritional, and Metabolic & 10.000 & 0.177 & 0.820 & -0.201 & -0.137 & 0.045 & 0.173 & 0.304 & 0.505 & 0.573 \\ 
-  05 Mental \& Behavioral & 8.000 & 0.142 & 0.767 & -0.239 & -0.175 & 0.010 & 0.140 & 0.270 & 0.469 & 0.537 \\ 
-  06 Nervous System & 8.000 & 0.009 & 0.524 & -0.383 & -0.315 & -0.120 & 0.011 & 0.140 & 0.327 & 0.389 \\ 
-  07 Eye and Adnexa & 11.000 & 0.061 & 0.624 & -0.333 & -0.265 & -0.069 & 0.062 & 0.192 & 0.386 & 0.452 \\ 
-  08 Ear and Mastoid & 0.000 & 0.063 & 0.625 & -0.332 & -0.263 & -0.069 & 0.064 & 0.196 & 0.393 & 0.457 \\ 
-  09 Circulatory & 3.000 & 0.037 & 0.577 & -0.362 & -0.293 & -0.094 & 0.038 & 0.170 & 0.363 & 0.426 \\ 
-  10 Respiratory & 7.000 & 0.061 & 0.622 & -0.335 & -0.269 & -0.072 & 0.061 & 0.195 & 0.391 & 0.458 \\ 
-  11 Digestive & 12.000 & 0.060 & 0.621 & -0.339 & -0.270 & -0.071 & 0.059 & 0.192 & 0.388 & 0.454 \\ 
-  12 Skin \& Subcutaneaous tissue & 9.000 & 0.104 & 0.704 & -0.283 & -0.220 & -0.027 & 0.103 & 0.234 & 0.429 & 0.494 \\ 
-  13 Musculoskeletal & 17.000 & 0.159 & 0.794 & -0.218 & -0.156 & 0.027 & 0.156 & 0.286 & 0.482 & 0.550 \\ 
-  14 Genitourinary & 2.000 & 0.063 & 0.627 & -0.337 & -0.270 & -0.070 & 0.065 & 0.195 & 0.389 & 0.456 \\ 
-  15 Pregancy, Childbirth, \& Puerperium & 0.000 & 0.061 & 0.622 & -0.336 & -0.270 & -0.071 & 0.061 & 0.194 & 0.392 & 0.458 \\ 
-  16 Perinatal Period & 0.000 & 0.062 & 0.626 & -0.338 & -0.270 & -0.068 & 0.062 & 0.194 & 0.390 & 0.457 \\ 
-  17 Congential & 3.000 & 0.063 & 0.628 & -0.332 & -0.264 & -0.068 & 0.063 & 0.195 & 0.391 & 0.456 \\ 
-  18 Symptoms, Signs etc. & 1.000 & 0.062 & 0.625 & -0.332 & -0.265 & -0.069 & 0.062 & 0.194 & 0.390 & 0.456 \\ 
-  19 Injury etc. & 0.000 & 0.063 & 0.625 & -0.337 & -0.268 & -0.069 & 0.062 & 0.196 & 0.392 & 0.459 \\ 
-  20 External Causes & 0.000 & 0.063 & 0.625 & -0.330 & -0.266 & -0.070 & 0.064 & 0.194 & 0.392 & 0.460 \\ 
-  21 Contact with Healthcare & 0.000 & 0.063 & 0.627 & -0.335 & -0.267 & -0.070 & 0.064 & 0.194 & 0.390 & 0.454 \\ 
-  22 Special Purposes & 0.000 & 0.062 & 0.625 & -0.335 & -0.268 & -0.070 & 0.062 & 0.194 & 0.391 & 0.456 \\ 
-   \hline
-\end{tabular}
-\end{table}
-
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_mu.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_mu.png
deleted file mode 100644
index 42b31e6..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_mu.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_plot_mu.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_plot_mu.png
deleted file mode 100644
index 34a2680..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_plot_mu.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_1-4.png
deleted file mode 100644
index cb01392..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_1-4.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_5-8.png
deleted file mode 100644
index 6840565..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_5-8.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_9-12.png
deleted file mode 100644
index 6ea4f17..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_9-12.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_1-4.png
deleted file mode 100644
index f9844fc..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_1-4.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_5-8.png
deleted file mode 100644
index 6b12dc0..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_5-8.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_9-12.png
deleted file mode 100644
index ce398ff..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_9-12.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/traceplot_sigma.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/traceplot_sigma.png
deleted file mode 100644
index b0d288c..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/traceplot_sigma.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention.png
index 9bc0f1d..8a9ef0e 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_cumulative_distdiff.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_cumulative_distdiff.png
index 53b5c3e..96c8e55 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_cumulative_distdiff.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_cumulative_distdiff.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_1.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_1.png
index c92093e..a582481 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_1.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_1.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_by_group.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_by_group.png
index ebe9fa7..ea8fc81 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_by_group.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_by_group.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_styled.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_styled.png
index cabac99..f8fae30 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_styled.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_styled.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_boxplot.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_boxplot.png
index 05eeeb3..772baec 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_boxplot.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_boxplot.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_by_group.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_by_group.png
new file mode 100644
index 0000000..422b4dd
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_by_group.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_no_intervention.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_no_intervention.png
new file mode 100644
index 0000000..2edbe21
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_no_intervention.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/posterior_p.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/posterior_p.png
index a8ecd13..5b34098 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/posterior_p.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/posterior_p.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_mu.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_mu.png
index eb2efe4..182815f 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_mu.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_mu.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_p.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_p.png
index f555b17..cb72e74 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_p.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_p.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_sigma.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_sigma.png
index 7c853a1..18487f1 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_sigma.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_sigma.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_delay_intervention_histdiff_by_group.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_delay_intervention_histdiff_by_group.png
deleted file mode 100644
index 9c3b3f0..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_delay_intervention_histdiff_by_group.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_no_intervention.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_no_intervention.png
deleted file mode 100644
index 88453bc..0000000
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_no_intervention.png and /dev/null differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_5percentile_table.tex b/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_5percentile_table.tex
index b969241..36a4e56 100644
--- a/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_5percentile_table.tex
+++ b/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_5percentile_table.tex
@@ -1,5 +1,5 @@
-% latex table generated in R 4.4.2 by xtable 1.8-4 package
-% Sun Feb  2 01:37:17 2025
+% latex table generated in R 4.5.1 by xtable 1.8-4 package
+% Mon Mar 23 21:59:57 2026
 \begin{table}[ht]
 \centering
 \begin{tabular}{r}
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_anr_vs_rec_by_group.tex b/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_anr_vs_rec_by_group.tex
new file mode 100644
index 0000000..027bf62
--- /dev/null
+++ b/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_anr_vs_rec_by_group.tex
@@ -0,0 +1,26 @@
+% latex table generated in R 4.5.1 by xtable 1.8-4 package
+% Mon Mar 23 23:42:59 2026
+\begin{table}[ht]
+\centering
+\begin{tabular}{lcccccccc}
+  \hline
+category\_name & N & mean & P(p$>$0) & 5\% & 25\% & median & 75\% & 95\% \\ 
+  \hline
+ICD-10 \#1: Infections \& Parasites (n=20) & XX & 1.60 & 77.23 & -1.79 & 0.11 & 1.19 & 2.77 & 6.30 \\ 
+  ICD-10 \#2: Neoplasms (n=49) & XX & 3.63 & 90.17 & -0.90 & 1.52 & 3.37 & 5.52 & 9.06 \\ 
+  ICD-10 \#3: Blood \& Immune system (n=1) & XX & 0.15 & 62.35 & -0.80 & -0.02 & 0.01 & 0.16 & 1.63 \\ 
+  ICD-10 \#4: Endocrine, Nutritional, and Metabolic (n=10) & XX & 2.23 & 82.05 & -1.75 & 0.52 & 2.03 & 3.77 & 6.89 \\ 
+  ICD-10 \#5: Mental \& Behavioral (n=8) & XX & 3.42 & 76.72 & -4.21 & 0.24 & 3.36 & 6.47 & 11.28 \\ 
+  ICD-10 \#6: Nervous System (n=8) & XX & 0.05 & 52.37 & -2.98 & -0.83 & 0.07 & 0.95 & 3.01 \\ 
+  ICD-10 \#7: Eye and Adnexa (n=11) & XX & 0.03 & 62.40 & -0.13 & -0.01 & 0.00 & 0.04 & 0.25 \\ 
+  ICD-10 \#9: Circulatory (n=3) & XX & 0.69 & 57.69 & -5.03 & -1.33 & 0.50 & 2.66 & 6.88 \\ 
+  ICD-10 \#10: Respiratory (n=7) & XX & 0.04 & 62.21 & -0.17 & -0.00 & 0.00 & 0.03 & 0.37 \\ 
+  ICD-10 \#11: Digestive (n=12) & XX & 0.02 & 62.13 & -0.10 & -0.00 & 0.00 & 0.02 & 0.18 \\ 
+  ICD-10 \#12: Skin \& Subcutaneaous tissue (n=9) & XX & 0.55 & 70.42 & -1.06 & -0.11 & 0.41 & 1.10 & 2.56 \\ 
+  ICD-10 \#13: Musculoskeletal (n=17) & XX & 1.16 & 79.42 & -1.13 & 0.18 & 1.02 & 2.02 & 3.93 \\ 
+  ICD-10 \#14: Genitourinary (n=2) & XX & 0.13 & 62.72 & -0.54 & -0.01 & 0.00 & 0.10 & 1.27 \\ 
+  ICD-10 \#17: Congential (n=3) & XX & 0.06 & 62.82 & -0.27 & -0.00 & 0.00 & 0.05 & 0.63 \\ 
+   \hline
+\end{tabular}
+\end{table}
+
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/CategoryCounts.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/CategoryCounts.png
index 4968c4a..60b1db0 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/CategoryCounts.png and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/CategoryCounts.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistSnapshots.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistSnapshots.png
index 7b5bbcd..29e0cad 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistSnapshots.png and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistSnapshots.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistTrialDurations_Faceted.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistTrialDurations_Faceted.png
index f185f66..e63a6b9 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistTrialDurations_Faceted.png and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistTrialDurations_Faceted.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/SnapshotsVsDurationVsTermination.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/SnapshotsVsDurationVsTermination.png
index edb928d..ae50707 100644
Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/SnapshotsVsDurationVsTermination.png and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/SnapshotsVsDurationVsTermination.png differ
diff --git a/r-analysis/r-analysis.Rproj b/r-analysis/r-analysis.Rproj
index 8a81d49..e1fb2b3 100644
--- a/r-analysis/r-analysis.Rproj
+++ b/r-analysis/r-analysis.Rproj
@@ -1,4 +1,5 @@
 Version: 1.0
+ProjectId: af114abe-061f-43bf-8ae1-a953900da3ff
 
 RestoreWorkspace: Yes
 SaveWorkspace: Ask