-
for (k in 1 : 22 ) {
-for (i in 1 : 3 ) {
- params = sapply (3 : 0 , function (j) paste0 ("beta[" ,k,"," ,4 * i- j ,"]" ))
- print (
- mcmc_pairs (
- posterior,
- np = nuts_prmts,
- pars= c (
- params,
- "lp__"
- ),
- off_diag_args = list (size = 0.75 )
- )
- )
-
- beta_range <- paste0 ("k_" ,k,"_i_" ,4 * i-3 ,"-" ,4 * i)
- filename <- paste0 ("./EffectsOfEnrollmentDelay/Images/diagnostics/correlation_plot_beta_" ,beta_range,".png" )
- ggsave (filename)
- }}
+
for (k in 1 : 22 ) {
+for (i in 1 : 3 ) {
+ params = sapply (3 : 0 , function (j) paste0 ("beta[" ,k,"," ,4 * i- j ,"]" ))
+ print (
+ mcmc_pairs (
+ posterior,
+ np = nuts_prmts,
+ pars= c (
+ params,
+ "lp__"
+ ),
+ off_diag_args = list (size = 0.75 )
+ )
+ )
+
+ beta_range <- paste0 ("k_" ,k,"_i_" ,4 * i-3 ,"-" ,4 * i)
+ filename <- paste0 (image_diagnostics,"/correlation_plot_beta_" ,beta_range,".png" )
+ ggsave (filename)
+ }}
@@ -9472,24 +9635,6 @@ Adding another scale for colour, which will replace the existing scale.
-
diff --git a/r-analysis/EffectsOfEnrollmentDelay.qmd b/r-analysis/EffectsOfEnrollmentDelay.qmd
index dd2c502..1aea9e5 100644
--- a/r-analysis/EffectsOfEnrollmentDelay.qmd
+++ b/r-analysis/EffectsOfEnrollmentDelay.qmd
@@ -28,6 +28,14 @@ options(mc.cores = parallel::detectCores())
#test installation, shouldn't get any errors
#example(stan_model, package = "rstan", run.dontrun = TRUE)
+
+
+image_root <- "./output/EffectsOfEnrollmentDelay"
+image_dist_diff_analysis <- paste0(image_root,"/dist_diff_analysis")
+image_trial_details <-paste0(image_root,"/trials_details")
+image_parameters_by_groups <-paste0(image_root,"/betas/parameters_by_group")
+image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups")
+
```
```{r}
@@ -473,7 +481,7 @@ ggplot(data=df3, aes(x=count, fill=final_status)) +
geom_histogram(binwidth=1) +
ggtitle("Histogram of snapshots per trial (matched trials)") +
xlab("Snapshots per trial")
-ggsave("./Images/HistSnapshots.png")
+ggsave(paste0(image_trial_details,"/HistSnapshots.png"))
#Plot duration for terminated vs completed
df4 <- dbGetQuery(
@@ -496,7 +504,7 @@ ggplot(data=df4, aes(x=duration,fill=overall_status)) +
ggtitle("Histogram of trial durations") +
xlab("duration")+
facet_wrap(~overall_status)
-ggsave("./Images/HistTrialDurations_Faceted.png")
+ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"))
df5 <- dbGetQuery(
con,
@@ -516,7 +524,7 @@ df5 <- dbGetQuery(
)
select a.nct_id, a.overall_status, a.duration,b.snapshot_count
from cte1 as a
- join cte2 as b
+ join cte2 as b
on a.nct_id=b.nct_id
;"
)
@@ -527,7 +535,7 @@ ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) +
ggtitle("Comparison of duration, status, and snapshot_count") +
xlab("duration") +
ylab("snapshot count")
-ggsave("./Images/SnapshotsVsDurationVsTermination.png")
+ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"))
dbDisconnect(con)
@@ -536,14 +544,14 @@ group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, ma
group_trials_by_category <- as.data.frame(group_trials_by_category)
ggplot(data = group_trials_by_category, aes(x=category_id)) +
- geom_bar(binwidth=1,color="black",fill="seagreen") +
+ geom_bar(binwidth=1,color="black",fill="lightblue") +
scale_x_continuous(breaks=scales::pretty_breaks(n=22)) +
labs(
title="bar chart of trial categories"
,x="Category ID"
,y="Count"
)
-ggsave("./Images/CategoryCounts.png")
+ggsave(paste0(image_trial_details,"/CategoryCounts.png"))
@@ -574,25 +582,25 @@ print(fit)
gx <- c()
#grab parameters for every category with more than 8 observations
-for (i in category_count$category_id[category_count$n >= 8]) {
+for (i in category_count$category_id[category_count$n >= 0]) {
print(i)
#Print parameter distributions
gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
ggsave(
- paste0("./Images/DirectEffects/Parameters/group_",i,"_",gi$name,".png")
+ paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png")
,plot=gi$plot
)
gx <- c(gx,gi)
#Get Quantiles and means for parameters
- table <- xtable(gi$quantiles,
- floating=FALSE
- ,latex.environments = NULL
- ,booktabs = TRUE
- ,zap=getOption("digits")
- )
- write_lines(table,paste0("./latex_output/DirectEffects/group_",gi$name,".tex"))
+# table <- xtable(gi$quantiles,
+# floating=FALSE
+# ,latex.environments = NULL
+# ,booktabs = TRUE
+# ,zap=getOption("digits")
+# )
+# write_lines(table,paste0("./latex_output/DirectEffects/group_",gi$name,".tex"))
}
```
@@ -607,19 +615,19 @@ for (i in c(1,2,3,9,10,11,12)) {
#Print parameter distributions
pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
ggsave(
- paste0("./Images/DirectEffects/Parameters/parameters_",i,"_",pi$name,".png")
+ paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png")
,plot=pi$plot
)
px <- c(px,pi)
#Get Quantiles and means for parameters
- table <- xtable(pi$quantiles,
- floating=FALSE
- ,latex.environments = NULL
- ,booktabs = TRUE
- ,zap=getOption("digits")
- )
- write_lines(table,paste0("./latex_output/DirectEffects/parameters_",i,"_",pi$name,".tex"))
+# table <- xtable(pi$quantiles,
+# floating=FALSE
+# ,latex.environments = NULL
+# ,booktabs = TRUE
+# ,zap=getOption("digits")
+# )
+# write_lines(table,paste0("./latex_output/DirectEffects/parameters_",i,"_",pi$name,".tex"))
}
```
@@ -631,8 +639,13 @@ Note these have 95% outer CI and 80% inner (shaded)
```{r}
+#get the generic and uspdc parameters
print(px[4]$plot + px[7]$plot)
-ggsave("./Images/DirectEffects/Parameters/2+3_generic_and_uspdc.png")
+ggsave(paste0(image_parameters_across_groups,"2+3_generic_and_uspdc.png"))
+
+#get the parameters associated with duration
+px[16]$plot + px[19]$plot
+ggsave(paste0(image_parameters_across_groups,"11+12_statusREC_and_statusANR.png"))
```
@@ -667,7 +680,7 @@ ggplot(df_ib_p, aes(x=p_prior)) +
,x="Probability Domain 'p'"
,y="Probability Density"
)
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/prior_p.png")
+ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"))
#p_posterior
ggplot(df_ib_p, aes(x=p_predicted)) +
@@ -678,7 +691,7 @@ ggplot(df_ib_p, aes(x=p_predicted)) +
,x="Probability Domain 'p'"
,y="Probability Density"
)
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/posterior_p.png")
+ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"))
#mu_prior
ggplot(df_ib_prior) +
@@ -689,7 +702,7 @@ ggplot(df_ib_prior) +
,x="Mu"
,y="Probability"
)
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/prior_mu.png")
+ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"))
#sigma_posterior
ggplot(df_ib_prior) +
@@ -700,7 +713,7 @@ ggplot(df_ib_prior) +
,x="Sigma"
,y="Probability"
)
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/prior_sigma.png")
+ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"))
```
@@ -731,7 +744,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) +
,x="Probability Domain 'p'"
,y="Probability Density"
)
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_base.png")
+ggsave(paste0(image_dist_diff_analysis,"p_no_intervention.png"))
ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
geom_density() +
@@ -741,7 +754,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
,x="Probability Domain 'p'"
,y="Probability Density"
)
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_interv.png")
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"))
ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
geom_density() +
@@ -751,7 +764,7 @@ ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
,x="Difference in 'p' under treatment"
,y="Probability Density"
)
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/default_p_generic_intervention_distdiff.png")
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"))
```
@@ -773,7 +786,6 @@ category_names <- sapply(1:length(beta_list$groups),
pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |>
pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168
-#TODO: Fix Category names
pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) df$category_id[i])
pddf_ib["category_name"] <- sapply(
@@ -792,8 +804,7 @@ ggplot(pddf_ib, aes(x=value,)) +
,y = "Probability Density"
) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
- #todo: add median, mean, 40/60 quantiles as well as
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_distdiff_styled.png")
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"))
ggplot(pddf_ib, aes(x=value,)) +
geom_density(adjust=1/5) +
@@ -812,7 +823,7 @@ ggplot(pddf_ib, aes(x=value,)) +
) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8))
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_distdiff_by_group.png")
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"))
ggplot(pddf_ib, aes(x=value,)) +
geom_histogram(bins=300) +
@@ -831,7 +842,7 @@ ggplot(pddf_ib, aes(x=value,)) +
) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8))
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_histdiff_by_group.png")
+ggsave(paste0(image_dist_diff_analysis,"p_delay_intervention_histdiff_by_group.png"))
```
@@ -847,6 +858,8 @@ p3 <- ggplot(pddf_ib, aes(x=value,)) +
) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
+density_hight <- max(density(pddf_ib$value)$y)
+
stats <- list(
p5 = quantile(pddf_ib$value, 0.05),
p10 = quantile(pddf_ib$value, 0.10),
@@ -857,7 +870,8 @@ stats <- list(
p90 = quantile(pddf_ib$value, 0.90),
p95 = quantile(pddf_ib$value, 0.95),
max_height = max(ggplot_build(p3)$data[[1]]$count),
- y_offset = -max(ggplot_build(p3)$data[[1]]$count) * 0.05
+ y_offset = -max(ggplot_build(p3)$data[[1]]$count) * 0.05,
+ y_offset_density = -density_hight * 0.15
)
p3 +
@@ -907,7 +921,65 @@ p3 +
x = stats$mean,
y = stats$y_offset * 1.5
), aes(x = x, y = y))
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_histdiff_boxplot.png")
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"))
+
+
+p4 <- ggplot(pddf_ib, aes(x = value)) +
+ geom_density() +
+ labs(
+ title = "Distribution of predicted differences"
+ ,subtitle = "Intervention: Delay close of enrollment"
+ ,x = "Difference in probability due to intervention"
+ ,y = "Probability Density"
+ ,caption = "Vertical marks: 5/10/25/50/75/90/95th percentiles. Dot shows mean."
+ ) +
+ geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+ geom_segment(data = data.frame(
+ x = c(stats$q1, stats$q3, stats$med),
+ xend = c(stats$q1, stats$q3, stats$med),
+ y = rep(stats$y_offset_density, 3),
+ yend = rep(stats$y_offset_density * 2, 3)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ geom_segment(data = data.frame(
+ x = rep(stats$q1, 2),
+ xend = rep(stats$q3, 2),
+ y = c(stats$y_offset_density, stats$y_offset_density * 2),
+ yend = c(stats$y_offset_density, stats$y_offset_density * 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Inner whiskers (Q1->P10, Q3->P90)
+ geom_segment(data = data.frame(
+ x = c(stats$q1, stats$q3),
+ xend = c(stats$p10, stats$p90),
+ y = rep(stats$y_offset_density * 1.5, 2),
+ yend = rep(stats$y_offset_density * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P10/P90
+ geom_segment(data = data.frame(
+ x = c(stats$p10, stats$p90),
+ xend = c(stats$p10, stats$p90),
+ y = stats$y_offset_density * 1.3,
+ yend = stats$y_offset_density * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Outer whiskers (P10->P5, P90->P95)
+ geom_segment(data = data.frame(
+ x = c(stats$p10, stats$p90),
+ xend = c(stats$p5, stats$p95),
+ y = rep(stats$y_offset_density * 1.5, 2),
+ yend = rep(stats$y_offset_density * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P5/P95
+ geom_segment(data = data.frame(
+ x = c(stats$p5, stats$p95),
+ xend = c(stats$p5, stats$p95),
+ y = stats$y_offset_density * 1.3,
+ yend = stats$y_offset_density * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Mean dot
+ geom_point(data = data.frame(
+ x = stats$mean,
+ y = stats$y_offset_density * 1.5
+ ), aes(x = x, y = y))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"))
```
```{r}
@@ -920,7 +992,7 @@ ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_his
y = "Cumulative Proportion"
)
-ggsave("./EffectsOfEnrollmentDelay/Images/DirectEffects/p_delay_intervention_cumulative_distdiff.png")
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"))
```
@@ -986,8 +1058,10 @@ The simulation above shows that this results in a percentage-point increase of a
```{r}
#| eval: true
#trace plots
+image_diagnostics <- paste0(image_root,"/diagnostics")
+
plot(fit, pars=c("mu"), plotfun="trace")
-ggsave("./EffectsOfEnrollmentDelay/Images/diagnostics/trace_plot_mu.png")
+ggsave(paste0(image_diagnostics,"/trace_plot_mu.png"))
for (i in 1:3) {
@@ -1005,7 +1079,7 @@ for (i in 1:3) {
scale_colour_ghibli_d("KikiMedium")
)
mu_range <- paste0(4*i-3,"-",4*i)
- filename <- paste0("./EffectsOfEnrollmentDelay/Images/diagnostics/trace_rank_plot_mu_",mu_range,".png")
+ filename <- paste0(image_diagnostics,"/trace_rank_plot_mu_",mu_range,".png")
ggsave(filename)
}
```
@@ -1013,7 +1087,7 @@ for (i in 1:3) {
```{r}
#| eval: true
plot(fit, pars=c("sigma"), plotfun="trace")
-ggsave("./EffectsOfEnrollmentDelay/Images/diagnostics/traceplot_sigma.png")
+ggsave(paste0(image_diagnostics,"/traceplot_sigma.png"))
for (i in 1:3) {
print(
@@ -1030,7 +1104,7 @@ for (i in 1:3) {
scale_colour_ghibli_d("KikiMedium")
)
sigma_range <- paste0(4*i-3,"-",4*i)
- filename <- paste0("./EffectsOfEnrollmentDelay/Images/diagnostics/trace_rank_plot_sigma_",sigma_range,".png")
+ filename <- paste0(image_diagnostics,"/trace_rank_plot_sigma_",sigma_range,".png")
ggsave(filename)
}
```
@@ -1049,7 +1123,7 @@ posterior <- as.array(fit)
color_scheme_set("darkgray")
div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4)
mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05)
-ggsave("./EffectsOfEnrollmentDelay/Images/diagnostics/parcoord_mu.png")
+ggsave(paste0(image_diagnostics,"/parcoord_mu.png"))
```
```{r}
@@ -1068,7 +1142,7 @@ for (i in 1:3) {
)
)
mu_range <- paste0(4*i-3,"-",4*i)
- filename <- paste0("./EffectsOfEnrollmentDelay/Images/diagnostics/correlation_plot_mu_",mu_range,".png")
+ filename <- paste0(image_diagnostics,"/correlation_plot_mu_",mu_range,".png")
ggsave(filename)
}
@@ -1079,7 +1153,7 @@ for (i in 1:3) {
```{r}
#| eval: true
mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05)
-ggsave("./EffectsOfEnrollmentDelay/Images/diagnostics/parcoord_sigma.png")
+ggsave(paste0(image_diagnostics,"/parcoord_sigma.png"))
```
```{r}
@@ -1099,7 +1173,7 @@ for (i in 1:3) {
)
)
sigma_range <- paste0(4*i-3,"-",4*i)
- filename <- paste0("./EffectsOfEnrollmentDelay/Images/diagnostics/correlation_plot_sigma_",sigma_range,".png")
+ filename <- paste0(image_diagnostics,"/correlation_plot_sigma_",sigma_range,".png")
ggsave(filename)
}
```
@@ -1123,7 +1197,7 @@ for (i in 1:3) {
)
beta_range <- paste0("k_",k,"_i_",4*i-3,"-",4*i)
- filename <- paste0("./EffectsOfEnrollmentDelay/Images/diagnostics/correlation_plot_beta_",beta_range,".png")
+ filename <- paste0(image_diagnostics,"/correlation_plot_beta_",beta_range,".png")
ggsave(filename)
}}
```
@@ -1133,11 +1207,4 @@ for (i in 1:3) {
-# TODO
-- [ ] Double check data flow. (Write summary of this in human readable form)
- - Is it the data we want from the database
- - Training
- - Counterfactual Evaluation
- - choose a single snapshot per trial.
- - Is the model in STAN well specified.
-- [ ] work on LOO validation of model
+
\ No newline at end of file
diff --git a/r-analysis/EffectsOfEnrollmentDelay_const.qmd b/r-analysis/EffectsOfEnrollmentDelay_const.qmd
new file mode 100644
index 0000000..d50f988
--- /dev/null
+++ b/r-analysis/EffectsOfEnrollmentDelay_const.qmd
@@ -0,0 +1,1214 @@
+---
+title: "The Effects of Recruitment status on completion of clinical trials"
+author: "Will King"
+format: html
+editor: source
+---
+
+
+# Setup
+
+```{r}
+library(knitr)
+library(bayesplot)
+available_mcmc(pattern = "_nuts_")
+library(ggplot2)
+library(patchwork)
+library(tidyverse)
+library(rstan)
+library(tidyr)
+library(ghibli)
+library(xtable)
+#Resources: https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
+
+#save unchanged models instead of recompiling
+rstan_options(auto_write = TRUE)
+#allow for multithreaded sampling
+options(mc.cores = parallel::detectCores())
+
+#test installation, shouldn't get any errors
+#example(stan_model, package = "rstan", run.dontrun = TRUE)
+
+
+image_root <- "./output/const_anr/EffectsOfEnrollmentDelay"
+image_dist_diff_analysis <- paste0(image_root,"/dist_diff_analysis")
+image_trial_details <-paste0(image_root,"/trials_details")
+image_parameters_by_groups <-paste0(image_root,"/betas/parameters_by_group")
+image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups")
+
+```
+
+```{r}
+################ Pull data from database ######################
+library(RPostgreSQL)
+host <- 'aact_db-restored-2025-01-07'
+
+driver <- dbDriver("PostgreSQL")
+
+get_data <- function(driver) {
+
+con <- dbConnect(
+ driver,
+ user='root',
+ password='root',
+ dbname='aact_db',
+ host=host
+ )
+on.exit(dbDisconnect(con))
+
+query <- dbSendQuery(
+ con,
+# "select * from formatted_data_with_planned_enrollment;"
+"
+select
+ fdqpe.nct_id
+ --,fdqpe.start_date
+ --,fdqpe.current_enrollment
+ --,fdqpe.enrollment_category
+ ,fdqpe.current_status
+ ,fdqpe.earliest_date_observed
+ ,fdqpe.elapsed_duration
+ ,fdqpe.n_brands as identical_brands
+ ,ntbtu.brand_name_counts
+ ,fdqpe.category_id
+ ,fdqpe.final_status
+ ,fdqpe.h_sdi_val
+ --,fdqpe.h_sdi_u95
+ --,fdqpe.h_sdi_l95
+ ,fdqpe.hm_sdi_val
+ --,fdqpe.hm_sdi_u95
+ --,fdqpe.hm_sdi_l95
+ ,fdqpe.m_sdi_val
+ --,fdqpe.m_sdi_u95
+ --,fdqpe.m_sdi_l95
+ ,fdqpe.lm_sdi_val
+ --,fdqpe.lm_sdi_u95
+ --,fdqpe.lm_sdi_l95
+ ,fdqpe.l_sdi_val
+ --,fdqpe.l_sdi_u95
+ --,fdqpe.l_sdi_l95
+from formatted_data_with_planned_enrollment fdqpe
+ join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu
+ on fdqpe.nct_id = ntbtu.nct_id
+order by fdqpe.nct_id, fdqpe.earliest_date_observed
+;
+"
+ )
+df <- fetch(query, n = -1)
+df <- na.omit(df)
+
+query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;")
+n_categories <- fetch(query2, n = -1)
+
+return(list(data=df,ncat=n_categories))
+}
+
+
+get_counterfact_base <- function(driver) {
+
+con <- dbConnect(
+ driver,
+ user='root',
+ password='root',
+ dbname='aact_db',
+ host=host
+ )
+on.exit(dbDisconnect(con))
+
+query <- dbSendQuery(
+ con,
+ "
+ with cte as (
+ --get last recruiting state
+ select fd.nct_id, max(fd.earliest_date_observed),min(fd2.earliest_date_observed) as tmstmp
+ from formatted_data fd
+ join formatted_data fd2
+ on fd.nct_id=fd2.nct_id and fd.earliest_date_observed < fd2.earliest_date_observed
+ where fd.current_status = 'Recruiting'
+ and fd2.current_status = 'Active, not recruiting'
+ group by fd.nct_id
+ )
+ select
+ fdqpe.nct_id
+ --,fdqpe.start_date
+ --,fdqpe.current_enrollment
+ --,fdqpe.enrollment_category
+ ,fdqpe.current_status
+ ,fdqpe.earliest_date_observed
+ ,fdqpe.elapsed_duration
+ ,fdqpe.n_brands as identical_brands
+ ,ntbtu.brand_name_counts
+ ,fdqpe.category_id
+ ,fdqpe.final_status
+ ,fdqpe.h_sdi_val
+ --,fdqpe.h_sdi_u95
+ --,fdqpe.h_sdi_l95
+ ,fdqpe.hm_sdi_val
+ --,fdqpe.hm_sdi_u95
+ --,fdqpe.hm_sdi_l95
+ ,fdqpe.m_sdi_val
+ --,fdqpe.m_sdi_u95
+ --,fdqpe.m_sdi_l95
+ ,fdqpe.lm_sdi_val
+ --,fdqpe.lm_sdi_u95
+ --,fdqpe.lm_sdi_l95
+ ,fdqpe.l_sdi_val
+ --,fdqpe.l_sdi_u95
+ --,fdqpe.l_sdi_l95
+ from formatted_data_with_planned_enrollment fdqpe
+ join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu
+ on fdqpe.nct_id = ntbtu.nct_id
+ join cte
+ on fdqpe.nct_id = cte.nct_id
+ and fdqpe.earliest_date_observed = cte.tmstmp
+ order by fdqpe.nct_id, fdqpe.earliest_date_observed
+ ;
+ "
+ )
+df <- fetch(query, n = -1)
+df <- na.omit(df)
+
+query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;")
+n_categories <- fetch(query2, n = -1)
+
+return(list(data=df,ncat=n_categories))
+}
+
+
+d <- get_data(driver)
+df <- d$data
+n_categories <- d$ncat
+
+cf <- get_counterfact_base(driver)
+df_counterfact_base <- cf$data
+
+
+
+################ Format Data ###########################
+
+data_formatter <- function(df) {
+categories <- df["category_id"]
+
+x <- df["elapsed_duration"]
+x["identical_brands"] <- asinh(df$identical_brands)
+x["brand_name_counts"] <- asinh(df$brand_name_count)
+x["h_sdi_val"] <- asinh(df$h_sdi_val)
+x["hm_sdi_val"] <- asinh(df$hm_sdi_val)
+x["m_sdi_val"] <- asinh(df$m_sdi_val)
+x["lm_sdi_val"] <- asinh(df$lm_sdi_val)
+x["l_sdi_val"] <- asinh(df$l_sdi_val)
+
+
+#Setup fixed effects
+
+x["status_NYR"] <- ifelse(df["current_status"]=="Not yet recruiting",1,0)
+x["status_EBI"] <- ifelse(df["current_status"]=="Enrolling by invitation",1,0)
+x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0)
+
+#set base condition
+x["status_ANR"] <- 1 #ifelse(df["current_status"]=="Active, not recruiting",1,0) #
+
+
+y <- ifelse(df["final_status"]=="Terminated",1,0)
+
+#get category list
+
+
+return(list(x=x,y=y))
+}
+
+train <- data_formatter(df)
+counterfact_base <- data_formatter(df_counterfact_base)
+
+categories <- df$category_id
+
+x <- train$x
+y <- train$y
+
+x_cf_base <- counterfact_base$x
+y_cf_base <- counterfact_base$y
+cf_categories <- df_counterfact_base$category_id
+```
+
+
+
+# Fit Model
+
+
+
+
+```{r}
+################################# FIT MODEL #########################################
+inherited_cols <- c(
+ "elapsed_duration"
+ #,"identical_brands"
+ #,"brand_name_counts"
+ ,"h_sdi_val"
+ ,"hm_sdi_val"
+ ,"m_sdi_val"
+ ,"lm_sdi_val"
+ ,"l_sdi_val"
+ ,"status_NYR"# TODO: may need to remove
+ ,"status_EBI"
+ ,"status_Rec"
+ ,"status_ANR"
+)
+```
+
+
+
+```{r}
+beta_list <- list(
+ groups = c(
+ `1`="Infections & Parasites",
+ `2`="Neoplasms",
+ `3`="Blood & Immune system",
+ `4`="Endocrine, Nutritional, and Metabolic",
+ `5`="Mental & Behavioral",
+ `6`="Nervous System",
+ `7`="Eye and Adnexa",
+ `8`="Ear and Mastoid",
+ `9`="Circulatory",
+ `10`="Respiratory",
+ `11`="Digestive",
+ `12`="Skin & Subcutaneaous tissue",
+ `13`="Musculoskeletal",
+ `14`="Genitourinary",
+ `15`="Pregancy, Childbirth, & Puerperium",
+ `16`="Perinatal Period",
+ `17`="Congential",
+ `18`="Symptoms, Signs etc.",
+ `19`="Injury etc.",
+ `20`="External Causes",
+ `21`="Contact with Healthcare",
+ `22`="Special Purposes"
+ ),
+ parameters = c(
+ `1`="Elapsed Duration",
+ # brands
+ `2`="asinh(Generic Brands)",
+ `3`="asinh(Competitors USPDC)",
+ # population
+ `4`="asinh(High SDI)",
+ `5`="asinh(High-Medium SDI)",
+ `6`="asinh(Medium SDI)",
+ `7`="asinh(Low-Medium SDI)",
+ `8`="asinh(Low SDI)",
+ #Status
+ `9`="status_NYR",
+ `10`="status_EBI",
+ `11`="status_Rec",
+ `12`="status_ANR"
+ )
+)
+
+get_parameters <- function(stem,class_list) {
+ #get categories and lengths
+ named <- names(class_list)
+ lengths <- sapply(named, (function (x) length(class_list[[x]])))
+
+ #describe the grid needed
+ iter_list <- sapply(named, (function (x) 1:lengths[x]))
+
+ #generate the list of parameters
+ pardf <- generate_parameter_df(stem, iter_list)
+
+ #add columns with appropriate human-readable names
+ for (name in named) {
+ pardf[paste(name,"_hr",sep="")] <- as.factor(
+ sapply(pardf[name], (function (i) class_list[[name]][i]))
+ )
+ }
+
+ return(pardf)
+}
+
+generate_parameter_df <- function(stem, iter_list) {
+ grid <- expand.grid(iter_list)
+ grid["param_name"] <- grid %>% unite(x,colnames(grid),sep=",")
+ grid["param_name"] <- paste(stem,"[",grid$param_name,"]",sep="")
+ return(grid)
+}
+
+group_mcmc_areas <- function(
+ stem,# = "beta"
+ class_list,# = beta_list
+ stanfit,# = fit
+ group_id,# = 2
+ rename=TRUE,
+ filter=NULL
+ ) {
+
+ #get all parameter names
+ params <- get_parameters(stem,class_list)
+
+ #filter down to parameters of interest
+ params <- filter(params,groups == group_id)
+ #Get dataframe with only the rows of interest
+ filtdata <- as.data.frame(stanfit)[params$param_name]
+ #rename columns
+ if (rename) dimnames(filtdata)[[2]] <- params$parameters_hr
+ #get group name for title
+ group_name <- class_list$groups[group_id]
+ #create area plot with appropriate title
+ p <- mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
+ ggtitle(paste("Parameter distributions for ICD-10 class:",group_name)) +
+ geom_vline(xintercept=seq(-2,2,0.5),color="grey",alpha=0.750)
+
+ d <- pivot_longer(filtdata, everything()) |>
+ group_by(name) |>
+ summarize(
+ mean=mean(value)
+ ,q025 = quantile(value,probs = 0.025)
+ ,q975 = quantile(value,probs = 0.975)
+ ,q05 = quantile(value,probs = 0.05)
+ ,q95 = quantile(value,probs = 0.95)
+ )
+ return(list(plot=p,quantiles=d,name=group_name))
+}
+
+parameter_mcmc_areas <- function(
+ stem,# = "beta"
+ class_list,# = beta_list
+ stanfit,# = fit
+ parameter_id,# = 2
+ rename=TRUE
+ ) {
+ #get all parameter names
+ params <- get_parameters(stem,class_list)
+ #filter down to parameters of interest
+ params <- filter(params,parameters == parameter_id)
+ #Get dataframe with only the rows of interest
+ filtdata <- as.data.frame(stanfit)[params$param_name]
+ #rename columns
+ if (rename) dimnames(filtdata)[[2]] <- params$groups_hr
+ #get group name for title
+ parameter_name <- class_list$parameters[parameter_id]
+ #create area plot with appropriate title
+ p <- mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
+ ggtitle(parameter_name,"Parameter Distribution") +
+ geom_vline(xintercept=seq(-2,2,0.5),color="grey",alpha=0.750)
+
+ d <- pivot_longer(filtdata, everything()) |>
+ group_by(name) |>
+ summarize(
+ mean=mean(value)
+ ,q025 = quantile(value,probs = 0.025)
+ ,q975 = quantile(value,probs = 0.975)
+ ,q05 = quantile(value,probs = 0.05)
+ ,q95 = quantile(value,probs = 0.95)
+ )
+ return(list(plot=p,quantiles=d,name=parameter_name))
+}
+
+
+```
+
+Plan: select all snapshots that are the first to have closed
+enrollment (Rec -> ANR when comparing across snapshots), and then
+```{r}
+#delay intervention
+intervention_enrollment <- x_cf_base[c(inherited_cols,"brand_name_counts", "identical_brands")]
+intervention_enrollment["status_ANR"] <- 0
+intervention_enrollment["status_Rec"] <- 1
+```
+
+```{r}
+counterfact_delay <- list(
+ D = ncol(x),#
+ N = nrow(x),
+ L = n_categories$count,
+ y = as.vector(y),
+ ll = as.vector(categories),
+ x = as.matrix(x),
+ mu_mean = 0,
+ mu_stdev = 0.05,
+ sigma_shape = 4,
+ sigma_rate = 20,
+ Nx = nrow(x_cf_base),
+ llx = as.vector(cf_categories),
+ counterfact_x_tilde = as.matrix(intervention_enrollment),
+ counterfact_x = as.matrix(x_cf_base),
+ status_indexes = c(11,12) #subtract anr from recruiting to get movement from anr to recruiting
+)
+```
+
+```{r}
+fit <- stan(
+ file='Hierarchal_Logistic.stan',
+ data = counterfact_delay,
+ chains = 4,
+ iter = 5000,
+ seed = 11021585
+ )
+```
+
+
+
+## Calculate relative difference in parameters between recruiting and active not recruiting states
+
+
+
+## Explore data
+
+
+```{r}
+#get number of trials and snapshots in each category
+group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, max))
+group_trials_by_category <- as.data.frame(group_trials_by_category)
+
+category_count <- group_trials_by_category |> group_by(category_id) |> count()
+
+```
+
+
+```{r}
+################################# DATA EXPLORATION ############################
+driver <- dbDriver("PostgreSQL")
+
+con <- dbConnect(
+ driver,
+ user='root',
+ password='root',
+ dbname='aact_db',
+ host=host
+ )
+#Plot histogram of count of snapshots
+df3 <- dbGetQuery(
+ con,
+ "select nct_id,final_status,count(*) from formatted_data_with_planned_enrollment fdwpe
+ group by nct_id,final_status ;"
+ )
+#df3 <- fetch(query3, n = -1)
+
+ggplot(data=df3, aes(x=count, fill=final_status)) +
+ geom_histogram(binwidth=1) +
+ ggtitle("Histogram of snapshots per trial (matched trials)") +
+ xlab("Snapshots per trial")
+ggsave(paste0(image_trial_details,"/HistSnapshots.png"))
+
+#Plot duration for terminated vs completed
+df4 <- dbGetQuery(
+ con,
+ "
+ select
+ nct_id,
+ start_date ,
+ primary_completion_date,
+ overall_status ,
+ primary_completion_date - start_date as duration
+ from ctgov.studies s
+ where nct_id in (select distinct nct_id from http.download_status ds)
+ ;"
+ )
+#df4 <- fetch(query4, n = -1)
+
+ggplot(data=df4, aes(x=duration,fill=overall_status)) +
+ geom_histogram()+
+ ggtitle("Histogram of trial durations") +
+ xlab("duration")+
+ facet_wrap(~overall_status)
+ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"))
+
+df5 <- dbGetQuery(
+ con,
+ "
+ with cte1 as (
+ select
+ nct_id,
+ start_date ,
+ primary_completion_date,
+ overall_status ,
+ primary_completion_date - start_date as duration
+ from ctgov.studies s
+ where nct_id in (select distinct nct_id from http.download_status ds)
+ ), cte2 as (
+ select nct_id,count(*) as snapshot_count from formatted_data_with_planned_enrollment fdwpe
+ group by nct_id
+ )
+ select a.nct_id, a.overall_status, a.duration,b.snapshot_count
+ from cte1 as a
+ join cte2 as b
+ on a.nct_id=b.nct_id
+ ;"
+ )
+df5$overall_status <- as.factor(df5$overall_status)
+
+ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) +
+ geom_jitter() +
+ ggtitle("Comparison of duration, status, and snapshot_count") +
+ xlab("duration") +
+ ylab("snapshot count")
+ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"))
+
+dbDisconnect(con)
+
+#get number of trials and snapshots in each category
+group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, max))
+group_trials_by_category <- as.data.frame(group_trials_by_category)
+
+ggplot(data = group_trials_by_category, aes(x=category_id)) +
+ geom_bar(binwidth=1,color="black",fill="lightblue") +
+ scale_x_continuous(breaks=scales::pretty_breaks(n=22)) +
+ labs(
+ title="bar chart of trial categories"
+ ,x="Category ID"
+ ,y="Count"
+ )
+ggsave(paste0(image_trial_details,"/CategoryCounts.png"))
+
+
+
+summary(df5)
+
+cor(df5$duration,df5$snapshot_count)
+sum(df5$snapshot_count)
+```
+
+
+
+
+## Fit Results
+
+
+```{r}
+################################# ANALYZE #####################################
+print(fit)
+```
+
+# Parameter Distributions
+
+
+```{r}
+#g1 <- group_mcmc_areas("beta",beta_list,fit,1)
+
+
+gx <- c()
+
+#grab parameters for every category with more than 8 observations
+for (i in category_count$category_id[category_count$n >= 0]) {
+ print(i)
+
+ #Print parameter distributions
+ gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
+ ggsave(
+ paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png")
+ ,plot=gi$plot
+ )
+ gx <- c(gx,gi)
+
+ #Get Quantiles and means for parameters
+# table <- xtable(gi$quantiles,
+# floating=FALSE
+# ,latex.environments = NULL
+# ,booktabs = TRUE
+# ,zap=getOption("digits")
+# )
+# write_lines(table,paste0("./latex_output/DirectEffects/group_",gi$name,".tex"))
+}
+```
+
+
+
+```{r}
+px <- c()
+
+
+for (i in c(1,2,3,9,10,11,12)) {
+
+ #Print parameter distributions
+ pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
+ ggsave(
+ paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png")
+ ,plot=pi$plot
+ )
+ px <- c(px,pi)
+
+ #Get Quantiles and means for parameters
+# table <- xtable(pi$quantiles,
+# floating=FALSE
+# ,latex.environments = NULL
+# ,booktabs = TRUE
+# ,zap=getOption("digits")
+# )
+# write_lines(table,paste0("./latex_output/DirectEffects/parameters_",i,"_",pi$name,".tex"))
+
+}
+```
+
+Note these have 95% outer CI and 80% inner (shaded)
+
+
+
+
+
+```{r}
+#get the generic and uspdc parameters
+print(px[4]$plot + px[7]$plot)
+ggsave(paste0(image_parameters_across_groups,"2+3_generic_and_uspdc.png"))
+
+#get the parameters associated with duration
+px[16]$plot + px[19]$plot
+ggsave(paste0(image_parameters_across_groups,"11+12_statusREC_and_statusANR.png"))
+```
+
+
+# Counterfactuals
+
+```{r}
+generated_ib <- gqs(
+ fit@stanmodel,
+ data=counterfact_delay,
+ draws=as.matrix(fit),
+ seed=11021585
+ )
+```
+
+```{r}
+df_ib_p <- data.frame(
+ p_prior=as.vector(extract(generated_ib, pars="p_prior")$p_prior)
+ ,p_predicted = as.vector(extract(generated_ib, pars="p_predicted")$p_predicted)
+)
+
+df_ib_prior <- data.frame(
+ mu_prior = as.vector(extract(generated_ib, pars="mu_prior")$mu_prior)
+ ,sigma_prior = as.vector(extract(generated_ib, pars="sigma_prior")$sigma_prior)
+)
+
+#p_prior
+ggplot(df_ib_p, aes(x=p_prior)) +
+ geom_density() +
+ labs(
+ title="Implied Prior Distribution P"
+ ,subtitle=""
+ ,x="Probability Domain 'p'"
+ ,y="Probability Density"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"))
+
+#p_posterior
+ggplot(df_ib_p, aes(x=p_predicted)) +
+ geom_density() +
+ labs(
+ title="Implied Posterior Distribution P"
+ ,subtitle=""
+ ,x="Probability Domain 'p'"
+ ,y="Probability Density"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"))
+
+#mu_prior
+ggplot(df_ib_prior) +
+ geom_density(aes(x=mu_prior)) +
+ labs(
+ title="Prior - Mu"
+ ,subtitle="same prior for all Mu values"
+ ,x="Mu"
+ ,y="Probability"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"))
+
+#sigma_posterior
+ggplot(df_ib_prior) +
+ geom_density(aes(x=sigma_prior)) +
+ labs(
+ title="Prior - Sigma"
+ ,subtitle="same prior for all Sigma values"
+ ,x="Sigma"
+ ,y="Probability"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"))
+```
+
+
+
+```{r}
+check_hmc_diagnostics(fit)
+```
+
+
+
+
+
+### Intervention: Delay close of enrollment
+
+```{r}
+counterfact_predicted_ib <- data.frame(
+ p_predicted_default = as.vector(extract(generated_ib, pars="p_predicted_default")$p_predicted_default)
+ ,p_predicted_intervention = as.vector(extract(generated_ib, pars="p_predicted_intervention")$p_predicted_intervention)
+ ,predicted_difference = as.vector(extract(generated_ib, pars="predicted_difference")$predicted_difference)
+)
+
+
+ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) +
+ geom_density() +
+ labs(
+ title="Predicted Distribution of 'p'"
+ ,subtitle="Intervention: None"
+ ,x="Probability Domain 'p'"
+ ,y="Probability Density"
+ )
+ggsave(paste0(image_dist_diff_analysis,"p_no_intervention.png"))
+
+ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
+ geom_density() +
+ labs(
+ title="Predicted Distribution of 'p'"
+ ,subtitle="Intervention: Delay close of enrollment"
+ ,x="Probability Domain 'p'"
+ ,y="Probability Density"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"))
+
+ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
+ geom_density() +
+ labs(
+ title="Predicted Distribution of differences 'p'"
+ ,subtitle="Intervention: Delay close of enrollment"
+ ,x="Difference in 'p' under treatment"
+ ,y="Probability Density"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"))
+```
+
+
+```{r}
+get_category_count <- function(tbl, id) {
+ result <- tbl$n[tbl$category_id == id]
+ if(length(result) == 0) 0 else result
+}
+
+category_names <- sapply(1:length(beta_list$groups),
+ function(i) sprintf("ICD-10 #%d: %s (n=%d)",
+ i,
+ beta_list$groups[i],
+ get_category_count(category_count, i)))
+
+```
+
+```{r}
+pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |>
+ pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168
+
+pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
+pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) df$category_id[i])
+pddf_ib["category_name"] <- sapply(
+ pddf_ib$category,
+ function(i) category_names[i]
+ )
+
+
+
+ggplot(pddf_ib, aes(x=value,)) +
+ geom_density(adjust=1/5) +
+ labs(
+ title = "Distribution of predicted differences"
+ ,subtitle = "Intervention: Delay close of enrollment"
+ ,x = "Difference in probability due to intervention"
+ ,y = "Probability Density"
+ ) +
+ geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"))
+
+ggplot(pddf_ib, aes(x=value,)) +
+ geom_density(adjust=1/5) +
+ facet_wrap(
+ ~factor(
+ category_name,
+ levels=category_names
+ )
+ , labeller = label_wrap_gen(multi_line = TRUE)
+ , ncol=4) +
+ labs(
+ title = "Distribution of predicted differences | By Group"
+ ,subtitle = "Intervention: Delay close of enrollment"
+ ,x = "Difference in probability due to intervention"
+ ,y = "Probability Density"
+ ) +
+ geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+ theme(strip.text.x = element_text(size = 8))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"))
+
+ggplot(pddf_ib, aes(x=value,)) +
+ geom_histogram(bins=300) +
+ facet_wrap(
+ ~factor(
+ category_name,
+ levels=category_names
+ )
+ , labeller = label_wrap_gen(multi_line = TRUE)
+ , ncol=4) +
+ labs(
+ title = "Histogram of predicted differences | By Group"
+ ,subtitle = "Intervention: Delay close of enrollment"
+ ,x = "Difference in probability due to intervention"
+ ,y = "Predicted counts"
+ ) +
+ geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+ theme(strip.text.x = element_text(size = 8))
+ggsave(paste0(image_dist_diff_analysis,"p_delay_intervention_histdiff_by_group.png"))
+```
+
+
+```{r}
+p3 <- ggplot(pddf_ib, aes(x=value,)) +
+ geom_histogram(bins=500) +
+ labs(
+ title = "Distribution of predicted differences"
+ ,subtitle = "Intervention: Delay close of enrollment"
+ ,x = "Difference in probability due to intervention"
+ ,y = "Probability Density"
+ ,caption = "Vertical marks: 5/10/25/50/75/90/95th percentiles. Dot shows mean."
+ ) +
+ geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
+
+density_hight <- max(density(pddf_ib$value)$y)
+
+stats <- list(
+ p5 = quantile(pddf_ib$value, 0.05),
+ p10 = quantile(pddf_ib$value, 0.10),
+ q1 = quantile(pddf_ib$value, 0.25),
+ med = median(pddf_ib$value),
+ mean = mean(pddf_ib$value),
+ q3 = quantile(pddf_ib$value, 0.75),
+ p90 = quantile(pddf_ib$value, 0.90),
+ p95 = quantile(pddf_ib$value, 0.95),
+ max_height = max(ggplot_build(p3)$data[[1]]$count),
+ y_offset = -max(ggplot_build(p3)$data[[1]]$count) * 0.05,
+ y_offset_density = -density_hight * 0.15
+)
+
+p3 +
+ # Box
+ geom_segment(data = data.frame(
+ x = c(stats$q1, stats$q3, stats$med),
+ xend = c(stats$q1, stats$q3, stats$med),
+ y = rep(stats$y_offset, 3),
+ yend = rep(stats$y_offset * 2, 3)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ geom_segment(data = data.frame(
+ x = rep(stats$q1, 2),
+ xend = rep(stats$q3, 2),
+ y = c(stats$y_offset, stats$y_offset * 2),
+ yend = c(stats$y_offset, stats$y_offset * 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Inner whiskers (Q1->P10, Q3->P90)
+ geom_segment(data = data.frame(
+ x = c(stats$q1, stats$q3),
+ xend = c(stats$p10, stats$p90),
+ y = rep(stats$y_offset * 1.5, 2),
+ yend = rep(stats$y_offset * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P10/P90
+ geom_segment(data = data.frame(
+ x = c(stats$p10, stats$p90),
+ xend = c(stats$p10, stats$p90),
+ y = stats$y_offset * 1.3,
+ yend = stats$y_offset * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Outer whiskers (P10->P5, P90->P95)
+ geom_segment(data = data.frame(
+ x = c(stats$p10, stats$p90),
+ xend = c(stats$p5, stats$p95),
+ y = rep(stats$y_offset * 1.5, 2),
+ yend = rep(stats$y_offset * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P5/P95
+ geom_segment(data = data.frame(
+ x = c(stats$p5, stats$p95),
+ xend = c(stats$p5, stats$p95),
+ y = stats$y_offset * 1.3,
+ yend = stats$y_offset * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Mean dot
+ geom_point(data = data.frame(
+ x = stats$mean,
+ y = stats$y_offset * 1.5
+ ), aes(x = x, y = y))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"))
+
+
+p4 <- ggplot(pddf_ib, aes(x = value)) +
+ geom_density() +
+ labs(
+ title = "Distribution of predicted differences"
+ ,subtitle = "Intervention: Delay close of enrollment"
+ ,x = "Difference in probability due to intervention"
+ ,y = "Probability Density"
+ ,caption = "Vertical marks: 5/10/25/50/75/90/95th percentiles. Dot shows mean."
+ ) +
+ geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+ geom_segment(data = data.frame(
+ x = c(stats$q1, stats$q3, stats$med),
+ xend = c(stats$q1, stats$q3, stats$med),
+ y = rep(stats$y_offset_density, 3),
+ yend = rep(stats$y_offset_density * 2, 3)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ geom_segment(data = data.frame(
+ x = rep(stats$q1, 2),
+ xend = rep(stats$q3, 2),
+ y = c(stats$y_offset_density, stats$y_offset_density * 2),
+ yend = c(stats$y_offset_density, stats$y_offset_density * 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Inner whiskers (Q1->P10, Q3->P90)
+ geom_segment(data = data.frame(
+ x = c(stats$q1, stats$q3),
+ xend = c(stats$p10, stats$p90),
+ y = rep(stats$y_offset_density * 1.5, 2),
+ yend = rep(stats$y_offset_density * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P10/P90
+ geom_segment(data = data.frame(
+ x = c(stats$p10, stats$p90),
+ xend = c(stats$p10, stats$p90),
+ y = stats$y_offset_density * 1.3,
+ yend = stats$y_offset_density * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Outer whiskers (P10->P5, P90->P95)
+ geom_segment(data = data.frame(
+ x = c(stats$p10, stats$p90),
+ xend = c(stats$p5, stats$p95),
+ y = rep(stats$y_offset_density * 1.5, 2),
+ yend = rep(stats$y_offset_density * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P5/P95
+ geom_segment(data = data.frame(
+ x = c(stats$p5, stats$p95),
+ xend = c(stats$p5, stats$p95),
+ y = stats$y_offset_density * 1.3,
+ yend = stats$y_offset_density * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Mean dot
+ geom_point(data = data.frame(
+ x = stats$mean,
+ y = stats$y_offset_density * 1.5
+ ), aes(x = x, y = y))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"))
+```
+
+```{r}
+ ggplot(pddf_ib, aes(x=value)) +
+ stat_ecdf(geom='step') +
+ labs(
+ title = "Cumulative distribution of predicted differences",
+ subtitle = "Intervention: Delay close of enrollment",
+ x = "Difference in probability of termination due to intervention",
+ y = "Cumulative Proportion"
+ )
+
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"))
+```
+
+
+Get the % of differences in the spike around zero
+```{r}
+# get values around and above/below spike
+width <- 0.02
+spike_band_centered_zero <- mean( pddf_ib$value >= -width/2 & pddf_ib$value <= width/2)
+above_spike_band <- mean( pddf_ib$value >= width/2)
+below_spike_band <- mean( pddf_ib$value <= -width/2)
+
+# get mass above and mass below zero
+mass_below_zero <- mean( pddf_ib$value <= 0)
+```
+Looking at the spike around zero, we find that `r spike_band_centered_zero*100`%
+of the probability mass is contained within the band from
+[`r -width*100/2`,`r width*100/2`].
+Additionally, there was `r above_spike_band*100`% of the probability above that
+-- representing those with a general increase in the probability of termination --
+and `r below_spike_band*100`% of the probability mass below the band
+-- representing a decrease in the probability of termination.
+
+On average, if you keep the trial open instead of closing it,
+`r mass_below_zero`% of trials will see a decrease in the probability of termination,
+but, due to the high increase in probability of termination given termination was increased,
+the mean probability of termination increases by `r stats$mean`.
+
+```{r}
+# 5%-iles
+
+summary(pddf_ib$value)
+
+# Create your quantiles
+quants <- quantile(pddf_ib$value, probs = seq(0,1,0.05), type=4)
+
+# Convert to a data frame
+quant_df <- data.frame(
+ Percentile = names(quants),
+ Value = quants
+)
+kable(quant_df)
+```
+
+There seems to be some trials that are highly suceptable to this enrollment delay. Specifically, there were some
+
+```{r}
+n = length(counterfact_predicted_ib$p_predicted_intervention)
+k = 100
+simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention)))
+simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default)))
+
+simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)/k
+```
+
+The simulation above shows that this results in a percentage-point increase of about
+`r simulated_percentages * 100`.
+
+
+
+
+# Diagnostics
+
+```{r}
+#| eval: true
+#trace plots
+image_diagnostics <- paste0(image_root,"/diagnostics")
+
+plot(fit, pars=c("mu"), plotfun="trace")
+ggsave(paste0(image_diagnostics,"/trace_plot_mu.png"))
+
+
+for (i in 1:3) {
+ print(
+ mcmc_rank_overlay(
+ fit,
+ pars=c(
+ paste0("mu[",4*i-3,"]"),
+ paste0("mu[",4*i-2,"]"),
+ paste0("mu[",4*i-1,"]"),
+ paste0("mu[",4*i,"]")
+ ),
+ n_bins=100
+ )+ legend_move("top") +
+ scale_colour_ghibli_d("KikiMedium")
+ )
+ mu_range <- paste0(4*i-3,"-",4*i)
+ filename <- paste0(image_diagnostics,"/trace_rank_plot_mu_",mu_range,".png")
+ ggsave(filename)
+}
+```
+
+```{r}
+#| eval: true
+plot(fit, pars=c("sigma"), plotfun="trace")
+ggsave(paste0(image_diagnostics,"/traceplot_sigma.png"))
+
+for (i in 1:3) {
+ print(
+ mcmc_rank_overlay(
+ fit,
+ pars=c(
+ paste0("sigma[",4*i-3,"]"),
+ paste0("sigma[",4*i-2,"]"),
+ paste0("sigma[",4*i-1,"]"),
+ paste0("sigma[",4*i,"]")
+ ),
+ n_bins=100
+ )+ legend_move("top") +
+ scale_colour_ghibli_d("KikiMedium")
+ )
+ sigma_range <- paste0(4*i-3,"-",4*i)
+ filename <- paste0(image_diagnostics,"/trace_rank_plot_sigma_",sigma_range,".png")
+ ggsave(filename)
+}
+```
+
+```{r}
+#| eval: true
+#other diagnostics
+logpost <- log_posterior(fit)
+nuts_prmts <- nuts_params(fit)
+posterior <- as.array(fit)
+
+```
+
+```{r}
+#| eval: true
+color_scheme_set("darkgray")
+div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4)
+mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05)
+ggsave(paste0(image_diagnostics,"/parcoord_mu.png"))
+```
+
+```{r}
+#| eval: true
+for (i in 1:3) {
+ mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]"))
+ print(
+ mcmc_pairs(
+ posterior,
+ np = nuts_prmts,
+ pars=c(
+ mus,
+ "lp__"
+ ),
+ off_diag_args = list(size = 0.75)
+ )
+ )
+ mu_range <- paste0(4*i-3,"-",4*i)
+ filename <- paste0(image_diagnostics,"/correlation_plot_mu_",mu_range,".png")
+ ggsave(filename)
+}
+
+
+
+```
+
+```{r}
+#| eval: true
+mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05)
+ggsave(paste0(image_diagnostics,"/parcoord_sigma.png"))
+```
+
+```{r}
+#| eval: true
+
+for (i in 1:3) {
+ params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]"))
+ print(
+ mcmc_pairs(
+ posterior,
+ np = nuts_prmts,
+ pars=c(
+ params,
+ "lp__"
+ ),
+ off_diag_args = list(size = 0.75)
+ )
+ )
+ sigma_range <- paste0(4*i-3,"-",4*i)
+ filename <- paste0(image_diagnostics,"/correlation_plot_sigma_",sigma_range,".png")
+ ggsave(filename)
+}
+```
+
+
+```{r}
+#| eval: true
+for (k in 1:22) {
+for (i in 1:3) {
+ params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]"))
+ print(
+ mcmc_pairs(
+ posterior,
+ np = nuts_prmts,
+ pars=c(
+ params,
+ "lp__"
+ ),
+ off_diag_args = list(size = 0.75)
+ )
+ )
+
+ beta_range <- paste0("k_",k,"_i_",4*i-3,"-",4*i)
+ filename <- paste0(image_diagnostics,"/correlation_plot_beta_",beta_range,".png")
+ ggsave(filename)
+}}
+```
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/r-analysis/EffectsOfEnrollmentDelay_rebased.qmd b/r-analysis/EffectsOfEnrollmentDelay_rebased.qmd
new file mode 100644
index 0000000..98f0c77
--- /dev/null
+++ b/r-analysis/EffectsOfEnrollmentDelay_rebased.qmd
@@ -0,0 +1,1222 @@
+---
+title: "The Effects of Recruitment status on completion of clinical trials"
+author: "Will King"
+format: html
+editor: source
+---
+
+
+# Setup
+
+```{r}
+library(knitr)
+library(bayesplot)
+available_mcmc(pattern = "_nuts_")
+library(ggplot2)
+library(patchwork)
+library(tidyverse)
+library(rstan)
+library(tidyr)
+library(ghibli)
+library(xtable)
+#Resources: https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
+
+#save unchanged models instead of recompiling
+rstan_options(auto_write = TRUE)
+#allow for multithreaded sampling
+options(mc.cores = parallel::detectCores())
+
+#test installation, shouldn't get any errors
+#example(stan_model, package = "rstan", run.dontrun = TRUE)
+
+
+image_root <- "./output/withdiff/EffectsOfEnrollmentDelay"
+image_dist_diff_analysis <- paste0(image_root,"/dist_diff_analysis")
+image_trial_details <-paste0(image_root,"/trials_details")
+image_parameters_by_groups <-paste0(image_root,"/betas/parameters_by_group")
+image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups")
+
+```
+
+```{r}
+################ Pull data from database ######################
+library(RPostgreSQL)
+host <- 'aact_db-restored-2025-01-07'
+
+driver <- dbDriver("PostgreSQL")
+
+get_data <- function(driver) {
+
+con <- dbConnect(
+ driver,
+ user='root',
+ password='root',
+ dbname='aact_db',
+ host=host
+ )
+on.exit(dbDisconnect(con))
+
+query <- dbSendQuery(
+ con,
+# "select * from formatted_data_with_planned_enrollment;"
+"
+select
+ fdqpe.nct_id
+ --,fdqpe.start_date
+ --,fdqpe.current_enrollment
+ --,fdqpe.enrollment_category
+ ,fdqpe.current_status
+ ,fdqpe.earliest_date_observed
+ ,fdqpe.elapsed_duration
+ ,fdqpe.n_brands as identical_brands
+ ,ntbtu.brand_name_counts
+ ,fdqpe.category_id
+ ,fdqpe.final_status
+ ,fdqpe.h_sdi_val
+ --,fdqpe.h_sdi_u95
+ --,fdqpe.h_sdi_l95
+ ,fdqpe.hm_sdi_val
+ --,fdqpe.hm_sdi_u95
+ --,fdqpe.hm_sdi_l95
+ ,fdqpe.m_sdi_val
+ --,fdqpe.m_sdi_u95
+ --,fdqpe.m_sdi_l95
+ ,fdqpe.lm_sdi_val
+ --,fdqpe.lm_sdi_u95
+ --,fdqpe.lm_sdi_l95
+ ,fdqpe.l_sdi_val
+ --,fdqpe.l_sdi_u95
+ --,fdqpe.l_sdi_l95
+from formatted_data_with_planned_enrollment fdqpe
+ join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu
+ on fdqpe.nct_id = ntbtu.nct_id
+order by fdqpe.nct_id, fdqpe.earliest_date_observed
+;
+"
+ )
+df <- fetch(query, n = -1)
+df <- na.omit(df)
+
+query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;")
+n_categories <- fetch(query2, n = -1)
+
+return(list(data=df,ncat=n_categories))
+}
+
+
+get_counterfact_base <- function(driver) {
+
+con <- dbConnect(
+ driver,
+ user='root',
+ password='root',
+ dbname='aact_db',
+ host=host
+ )
+on.exit(dbDisconnect(con))
+
+query <- dbSendQuery(
+ con,
+ "
+ with cte as (
+ --get last recruiting state
+ select fd.nct_id, max(fd.earliest_date_observed),min(fd2.earliest_date_observed) as tmstmp
+ from formatted_data fd
+ join formatted_data fd2
+ on fd.nct_id=fd2.nct_id and fd.earliest_date_observed < fd2.earliest_date_observed
+ where fd.current_status = 'Recruiting'
+ and fd2.current_status = 'Active, not recruiting'
+ group by fd.nct_id
+ )
+ select
+ fdqpe.nct_id
+ --,fdqpe.start_date
+ --,fdqpe.current_enrollment
+ --,fdqpe.enrollment_category
+ ,fdqpe.current_status
+ ,fdqpe.earliest_date_observed
+ ,fdqpe.elapsed_duration
+ ,fdqpe.n_brands as identical_brands
+ ,ntbtu.brand_name_counts
+ ,fdqpe.category_id
+ ,fdqpe.final_status
+ ,fdqpe.h_sdi_val
+ --,fdqpe.h_sdi_u95
+ --,fdqpe.h_sdi_l95
+ ,fdqpe.hm_sdi_val
+ --,fdqpe.hm_sdi_u95
+ --,fdqpe.hm_sdi_l95
+ ,fdqpe.m_sdi_val
+ --,fdqpe.m_sdi_u95
+ --,fdqpe.m_sdi_l95
+ ,fdqpe.lm_sdi_val
+ --,fdqpe.lm_sdi_u95
+ --,fdqpe.lm_sdi_l95
+ ,fdqpe.l_sdi_val
+ --,fdqpe.l_sdi_u95
+ --,fdqpe.l_sdi_l95
+ from formatted_data_with_planned_enrollment fdqpe
+ join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu
+ on fdqpe.nct_id = ntbtu.nct_id
+ join cte
+ on fdqpe.nct_id = cte.nct_id
+ and fdqpe.earliest_date_observed = cte.tmstmp
+ order by fdqpe.nct_id, fdqpe.earliest_date_observed
+ ;
+ "
+ )
+df <- fetch(query, n = -1)
+df <- na.omit(df)
+
+query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;")
+n_categories <- fetch(query2, n = -1)
+
+return(list(data=df,ncat=n_categories))
+}
+
+
+d <- get_data(driver)
+df <- d$data
+n_categories <- d$ncat
+
+cf <- get_counterfact_base(driver)
+df_counterfact_base <- cf$data
+
+
+
+################ Format Data ###########################
+
+data_formatter <- function(df) {
+categories <- df["category_id"]
+
+x <- df["elapsed_duration"]
+x["identical_brands"] <- asinh(df$identical_brands)
+x["brand_name_counts"] <- asinh(df$brand_name_count)
+x["h_sdi_val"] <- asinh(df$h_sdi_val)
+x["hm_sdi_val"] <- asinh(df$hm_sdi_val)
+x["m_sdi_val"] <- asinh(df$m_sdi_val)
+x["lm_sdi_val"] <- asinh(df$lm_sdi_val)
+x["l_sdi_val"] <- asinh(df$l_sdi_val)
+
+
+#Setup fixed effects
+
+x["status_NYR"] <- ifelse(df["current_status"]=="Not yet recruiting",1,0)
+x["status_EBI"] <- ifelse(df["current_status"]=="Enrolling by invitation",1,0)
+x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0)
+x["status_ANR"] <- ifelse(df["current_status"]=="Active, not recruiting",1,0)
+
+
+y <- ifelse(df["final_status"]=="Terminated",1,0)
+
+#get category list
+
+
+return(list(x=x,y=y))
+}
+
+train <- data_formatter(df)
+counterfact_base <- data_formatter(df_counterfact_base)
+
+categories <- df$category_id
+
+x <- train$x
+y <- train$y
+
+x_cf_base <- counterfact_base$x
+y_cf_base <- counterfact_base$y
+cf_categories <- df_counterfact_base$category_id
+```
+
+
+
+# Fit Model
+
+
+
+
+```{r}
+################################# FIT MODEL #########################################
+inherited_cols <- c(
+ "elapsed_duration"
+ #,"identical_brands"
+ #,"brand_name_counts"
+ ,"h_sdi_val"
+ ,"hm_sdi_val"
+ ,"m_sdi_val"
+ ,"lm_sdi_val"
+ ,"l_sdi_val"
+ ,"status_NYR"# TODO: may need to remove
+ ,"status_EBI"
+ ,"status_Rec"
+ ,"status_ANR"
+)
+```
+
+
+
+```{r}
+beta_list <- list(
+ groups = c(
+ `1`="Infections & Parasites",
+ `2`="Neoplasms",
+ `3`="Blood & Immune system",
+ `4`="Endocrine, Nutritional, and Metabolic",
+ `5`="Mental & Behavioral",
+ `6`="Nervous System",
+ `7`="Eye and Adnexa",
+ `8`="Ear and Mastoid",
+ `9`="Circulatory",
+ `10`="Respiratory",
+ `11`="Digestive",
+ `12`="Skin & Subcutaneaous tissue",
+ `13`="Musculoskeletal",
+ `14`="Genitourinary",
+ `15`="Pregancy, Childbirth, & Puerperium",
+ `16`="Perinatal Period",
+ `17`="Congential",
+ `18`="Symptoms, Signs etc.",
+ `19`="Injury etc.",
+ `20`="External Causes",
+ `21`="Contact with Healthcare",
+ `22`="Special Purposes"
+ ),
+ parameters = c(
+ `1`="Elapsed Duration",
+ # brands
+ `2`="asinh(Generic Brands)",
+ `3`="asinh(Competitors USPDC)",
+ # population
+ `4`="asinh(High SDI)",
+ `5`="asinh(High-Medium SDI)",
+ `6`="asinh(Medium SDI)",
+ `7`="asinh(Low-Medium SDI)",
+ `8`="asinh(Low SDI)",
+ #Status
+ `9`="status_NYR",
+ `10`="status_EBI",
+ `11`="status_Rec",
+ `12`="status_ANR"
+ )
+)
+
+get_parameters <- function(stem,class_list) {
+ #get categories and lengths
+ named <- names(class_list)
+ lengths <- sapply(named, (function (x) length(class_list[[x]])))
+
+ #describe the grid needed
+ iter_list <- sapply(named, (function (x) 1:lengths[x]))
+
+ #generate the list of parameters
+ pardf <- generate_parameter_df(stem, iter_list)
+
+ #add columns with appropriate human-readable names
+ for (name in named) {
+ pardf[paste(name,"_hr",sep="")] <- as.factor(
+ sapply(pardf[name], (function (i) class_list[[name]][i]))
+ )
+ }
+
+ return(pardf)
+}
+
+generate_parameter_df <- function(stem, iter_list) {
+ grid <- expand.grid(iter_list)
+ grid["param_name"] <- grid %>% unite(x,colnames(grid),sep=",")
+ grid["param_name"] <- paste(stem,"[",grid$param_name,"]",sep="")
+ return(grid)
+}
+
+group_mcmc_areas <- function(
+ stem,# = "beta"
+ class_list,# = beta_list
+ stanfit,# = fit
+ group_id,# = 2
+ rename=TRUE,
+ filter=NULL
+ ) {
+
+ #get all parameter names
+ params <- get_parameters(stem,class_list)
+
+ #filter down to parameters of interest
+ params <- filter(params,groups == group_id)
+ #Get dataframe with only the rows of interest
+ filtdata <- as.data.frame(stanfit)[params$param_name]
+ #rename columns
+ if (rename) dimnames(filtdata)[[2]] <- params$parameters_hr
+ #get group name for title
+ group_name <- class_list$groups[group_id]
+ #create area plot with appropriate title
+ p <- mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
+ ggtitle(paste("Parameter distributions for ICD-10 class:",group_name)) +
+ geom_vline(xintercept=seq(-2,2,0.5),color="grey",alpha=0.750)
+
+ d <- pivot_longer(filtdata, everything()) |>
+ group_by(name) |>
+ summarize(
+ mean=mean(value)
+ ,q025 = quantile(value,probs = 0.025)
+ ,q975 = quantile(value,probs = 0.975)
+ ,q05 = quantile(value,probs = 0.05)
+ ,q95 = quantile(value,probs = 0.95)
+ )
+ return(list(plot=p,quantiles=d,name=group_name))
+}
+
+parameter_mcmc_areas <- function(
+ stem,# = "beta"
+ class_list,# = beta_list
+ stanfit,# = fit
+ parameter_id,# = 2
+ rename=TRUE
+ ) {
+ #get all parameter names
+ params <- get_parameters(stem,class_list)
+ #filter down to parameters of interest
+ params <- filter(params,parameters == parameter_id)
+ #Get dataframe with only the rows of interest
+ filtdata <- as.data.frame(stanfit)[params$param_name]
+ #rename columns
+ if (rename) dimnames(filtdata)[[2]] <- params$groups_hr
+ #get group name for title
+ parameter_name <- class_list$parameters[parameter_id]
+ #create area plot with appropriate title
+ p <- mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
+ ggtitle(parameter_name,"Parameter Distribution") +
+ geom_vline(xintercept=seq(-2,2,0.5),color="grey",alpha=0.750)
+
+ d <- pivot_longer(filtdata, everything()) |>
+ group_by(name) |>
+ summarize(
+ mean=mean(value)
+ ,q025 = quantile(value,probs = 0.025)
+ ,q975 = quantile(value,probs = 0.975)
+ ,q05 = quantile(value,probs = 0.05)
+ ,q95 = quantile(value,probs = 0.95)
+ )
+ return(list(plot=p,quantiles=d,name=parameter_name))
+}
+
+
+```
+
+Plan: select all snapshots that are the first to have closed
+enrollment (Rec -> ANR when comparing across snapshots), and then
+```{r}
+#delay intervention
+intervention_enrollment <- x_cf_base[c(inherited_cols,"brand_name_counts", "identical_brands")]
+intervention_enrollment["status_ANR"] <- 0
+intervention_enrollment["status_Rec"] <- 1
+```
+
+```{r}
+counterfact_delay <- list(
+ D = ncol(x),#
+ N = nrow(x),
+ L = n_categories$count,
+ y = as.vector(y),
+ ll = as.vector(categories),
+ x = as.matrix(x),
+ mu_mean = 0,
+ mu_stdev = 0.05,
+ sigma_shape = 4,
+ sigma_rate = 20,
+ Nx = nrow(x_cf_base),
+ llx = as.vector(cf_categories),
+ counterfact_x_tilde = as.matrix(intervention_enrollment),
+ counterfact_x = as.matrix(x_cf_base),
+ status_indexes = c(11,12) #subtract anr from recruiting to get movement from anr to recruiting
+)
+```
+
+```{r}
+fit <- stan(
+ file='Hierarchal_Logistic.stan',
+ data = counterfact_delay,
+ chains = 4,
+ iter = 5000,
+ seed = 11021585
+ )
+```
+
+
+
+## Calculate relative difference in parameters between recruiting and active not recruiting states
+
+
+```{r}
+filt_data <- as.data.frame(extract(fit,pars="status_diff"))
+dimnames(filt_data)[[2]] <- beta_list$groups
+mcmc_areas(filt_data,prob = 0.8, prob_outer = 0.95) +
+ ggtitle("Relative fixed effects across groups", subtitle ="moving from `Active, not recruiting` to `Recruiting`") +
+ geom_vline(xintercept=seq(-2,2,0.5),color="grey",alpha=0.750)
+```
+
+I've got an issue here, because this should be a movement from ANR -> recruiting
+but that is suggesting that
+
+## Explore data
+
+
+```{r}
+#get number of trials and snapshots in each category
+group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, max))
+group_trials_by_category <- as.data.frame(group_trials_by_category)
+
+category_count <- group_trials_by_category |> group_by(category_id) |> count()
+
+```
+
+
+```{r}
+################################# DATA EXPLORATION ############################
+driver <- dbDriver("PostgreSQL")
+
+con <- dbConnect(
+ driver,
+ user='root',
+ password='root',
+ dbname='aact_db',
+ host=host
+ )
+#Plot histogram of count of snapshots
+df3 <- dbGetQuery(
+ con,
+ "select nct_id,final_status,count(*) from formatted_data_with_planned_enrollment fdwpe
+ group by nct_id,final_status ;"
+ )
+#df3 <- fetch(query3, n = -1)
+
+ggplot(data=df3, aes(x=count, fill=final_status)) +
+ geom_histogram(binwidth=1) +
+ ggtitle("Histogram of snapshots per trial (matched trials)") +
+ xlab("Snapshots per trial")
+ggsave(paste0(image_trial_details,"/HistSnapshots.png"))
+
+#Plot duration for terminated vs completed
+df4 <- dbGetQuery(
+ con,
+ "
+ select
+ nct_id,
+ start_date ,
+ primary_completion_date,
+ overall_status ,
+ primary_completion_date - start_date as duration
+ from ctgov.studies s
+ where nct_id in (select distinct nct_id from http.download_status ds)
+ ;"
+ )
+#df4 <- fetch(query4, n = -1)
+
+ggplot(data=df4, aes(x=duration,fill=overall_status)) +
+ geom_histogram()+
+ ggtitle("Histogram of trial durations") +
+ xlab("duration")+
+ facet_wrap(~overall_status)
+ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"))
+
+df5 <- dbGetQuery(
+ con,
+ "
+ with cte1 as (
+ select
+ nct_id,
+ start_date ,
+ primary_completion_date,
+ overall_status ,
+ primary_completion_date - start_date as duration
+ from ctgov.studies s
+ where nct_id in (select distinct nct_id from http.download_status ds)
+ ), cte2 as (
+ select nct_id,count(*) as snapshot_count from formatted_data_with_planned_enrollment fdwpe
+ group by nct_id
+ )
+ select a.nct_id, a.overall_status, a.duration,b.snapshot_count
+ from cte1 as a
+ join cte2 as b
+ on a.nct_id=b.nct_id
+ ;"
+ )
+df5$overall_status <- as.factor(df5$overall_status)
+
+ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) +
+ geom_jitter() +
+ ggtitle("Comparison of duration, status, and snapshot_count") +
+ xlab("duration") +
+ ylab("snapshot count")
+ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"))
+
+dbDisconnect(con)
+
+#get number of trials and snapshots in each category
+group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, max))
+group_trials_by_category <- as.data.frame(group_trials_by_category)
+
+ggplot(data = group_trials_by_category, aes(x=category_id)) +
+ geom_bar(binwidth=1,color="black",fill="lightblue") +
+ scale_x_continuous(breaks=scales::pretty_breaks(n=22)) +
+ labs(
+ title="bar chart of trial categories"
+ ,x="Category ID"
+ ,y="Count"
+ )
+ggsave(paste0(image_trial_details,"/CategoryCounts.png"))
+
+
+
+summary(df5)
+
+cor(df5$duration,df5$snapshot_count)
+sum(df5$snapshot_count)
+```
+
+
+
+
+## Fit Results
+
+
+```{r}
+################################# ANALYZE #####################################
+print(fit)
+```
+
+# Parameter Distributions
+
+
+```{r}
+#g1 <- group_mcmc_areas("beta",beta_list,fit,1)
+
+
+gx <- c()
+
+#grab parameters for every category with more than 8 observations
+for (i in category_count$category_id[category_count$n >= 0]) {
+ print(i)
+
+ #Print parameter distributions
+ gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
+ ggsave(
+ paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png")
+ ,plot=gi$plot
+ )
+ gx <- c(gx,gi)
+
+ #Get Quantiles and means for parameters
+# table <- xtable(gi$quantiles,
+# floating=FALSE
+# ,latex.environments = NULL
+# ,booktabs = TRUE
+# ,zap=getOption("digits")
+# )
+# write_lines(table,paste0("./latex_output/DirectEffects/group_",gi$name,".tex"))
+}
+```
+
+
+
+```{r}
+px <- c()
+
+
+for (i in c(1,2,3,9,10,11,12)) {
+
+ #Print parameter distributions
+ pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
+ ggsave(
+ paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png")
+ ,plot=pi$plot
+ )
+ px <- c(px,pi)
+
+ #Get Quantiles and means for parameters
+# table <- xtable(pi$quantiles,
+# floating=FALSE
+# ,latex.environments = NULL
+# ,booktabs = TRUE
+# ,zap=getOption("digits")
+# )
+# write_lines(table,paste0("./latex_output/DirectEffects/parameters_",i,"_",pi$name,".tex"))
+
+}
+```
+
+Note these have 95% outer CI and 80% inner (shaded)
+
+
+
+
+
+```{r}
+#get the generic and uspdc parameters
+print(px[4]$plot + px[7]$plot)
+ggsave(paste0(image_parameters_across_groups,"2+3_generic_and_uspdc.png"))
+
+#get the parameters associated with duration
+px[16]$plot + px[19]$plot
+ggsave(paste0(image_parameters_across_groups,"11+12_statusREC_and_statusANR.png"))
+```
+
+
+# Counterfactuals
+
+```{r}
+generated_ib <- gqs(
+ fit@stanmodel,
+ data=counterfact_delay,
+ draws=as.matrix(fit),
+ seed=11021585
+ )
+```
+
+```{r}
+df_ib_p <- data.frame(
+ p_prior=as.vector(extract(generated_ib, pars="p_prior")$p_prior)
+ ,p_predicted = as.vector(extract(generated_ib, pars="p_predicted")$p_predicted)
+)
+
+df_ib_prior <- data.frame(
+ mu_prior = as.vector(extract(generated_ib, pars="mu_prior")$mu_prior)
+ ,sigma_prior = as.vector(extract(generated_ib, pars="sigma_prior")$sigma_prior)
+)
+
+#p_prior
+ggplot(df_ib_p, aes(x=p_prior)) +
+ geom_density() +
+ labs(
+ title="Implied Prior Distribution P"
+ ,subtitle=""
+ ,x="Probability Domain 'p'"
+ ,y="Probability Density"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"))
+
+#p_posterior
+ggplot(df_ib_p, aes(x=p_predicted)) +
+ geom_density() +
+ labs(
+ title="Implied Posterior Distribution P"
+ ,subtitle=""
+ ,x="Probability Domain 'p'"
+ ,y="Probability Density"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"))
+
+#mu_prior
+ggplot(df_ib_prior) +
+ geom_density(aes(x=mu_prior)) +
+ labs(
+ title="Prior - Mu"
+ ,subtitle="same prior for all Mu values"
+ ,x="Mu"
+ ,y="Probability"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"))
+
+#sigma_posterior
+ggplot(df_ib_prior) +
+ geom_density(aes(x=sigma_prior)) +
+ labs(
+ title="Prior - Sigma"
+ ,subtitle="same prior for all Sigma values"
+ ,x="Sigma"
+ ,y="Probability"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"))
+```
+
+
+
+```{r}
+check_hmc_diagnostics(fit)
+```
+
+
+
+
+
+### Intervention: Delay close of enrollment
+
+```{r}
+counterfact_predicted_ib <- data.frame(
+ p_predicted_default = as.vector(extract(generated_ib, pars="p_predicted_default")$p_predicted_default)
+ ,p_predicted_intervention = as.vector(extract(generated_ib, pars="p_predicted_intervention")$p_predicted_intervention)
+ ,predicted_difference = as.vector(extract(generated_ib, pars="predicted_difference")$predicted_difference)
+)
+
+
+ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) +
+ geom_density() +
+ labs(
+ title="Predicted Distribution of 'p'"
+ ,subtitle="Intervention: None"
+ ,x="Probability Domain 'p'"
+ ,y="Probability Density"
+ )
+ggsave(paste0(image_dist_diff_analysis,"p_no_intervention.png"))
+
+ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
+ geom_density() +
+ labs(
+ title="Predicted Distribution of 'p'"
+ ,subtitle="Intervention: Delay close of enrollment"
+ ,x="Probability Domain 'p'"
+ ,y="Probability Density"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"))
+
+ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
+ geom_density() +
+ labs(
+ title="Predicted Distribution of differences 'p'"
+ ,subtitle="Intervention: Delay close of enrollment"
+ ,x="Difference in 'p' under treatment"
+ ,y="Probability Density"
+ )
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"))
+```
+
+
+```{r}
+get_category_count <- function(tbl, id) {
+ result <- tbl$n[tbl$category_id == id]
+ if(length(result) == 0) 0 else result
+}
+
+category_names <- sapply(1:length(beta_list$groups),
+ function(i) sprintf("ICD-10 #%d: %s (n=%d)",
+ i,
+ beta_list$groups[i],
+ get_category_count(category_count, i)))
+
+```
+
+```{r}
+pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |>
+ pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168
+
+pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
+pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) df$category_id[i])
+pddf_ib["category_name"] <- sapply(
+ pddf_ib$category,
+ function(i) category_names[i]
+ )
+
+
+
+ggplot(pddf_ib, aes(x=value,)) +
+ geom_density(adjust=1/5) +
+ labs(
+ title = "Distribution of predicted differences"
+ ,subtitle = "Intervention: Delay close of enrollment"
+ ,x = "Difference in probability due to intervention"
+ ,y = "Probability Density"
+ ) +
+ geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"))
+
+ggplot(pddf_ib, aes(x=value,)) +
+ geom_density(adjust=1/5) +
+ facet_wrap(
+ ~factor(
+ category_name,
+ levels=category_names
+ )
+ , labeller = label_wrap_gen(multi_line = TRUE)
+ , ncol=4) +
+ labs(
+ title = "Distribution of predicted differences | By Group"
+ ,subtitle = "Intervention: Delay close of enrollment"
+ ,x = "Difference in probability due to intervention"
+ ,y = "Probability Density"
+ ) +
+ geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+ theme(strip.text.x = element_text(size = 8))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"))
+
+ggplot(pddf_ib, aes(x=value,)) +
+ geom_histogram(bins=300) +
+ facet_wrap(
+ ~factor(
+ category_name,
+ levels=category_names
+ )
+ , labeller = label_wrap_gen(multi_line = TRUE)
+ , ncol=4) +
+ labs(
+ title = "Histogram of predicted differences | By Group"
+ ,subtitle = "Intervention: Delay close of enrollment"
+ ,x = "Difference in probability due to intervention"
+ ,y = "Predicted counts"
+ ) +
+ geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+ theme(strip.text.x = element_text(size = 8))
+ggsave(paste0(image_dist_diff_analysis,"p_delay_intervention_histdiff_by_group.png"))
+```
+
+
+```{r}
+p3 <- ggplot(pddf_ib, aes(x=value,)) +
+ geom_histogram(bins=500) +
+ labs(
+ title = "Distribution of predicted differences"
+ ,subtitle = "Intervention: Delay close of enrollment"
+ ,x = "Difference in probability due to intervention"
+ ,y = "Probability Density"
+ ,caption = "Vertical marks: 5/10/25/50/75/90/95th percentiles. Dot shows mean."
+ ) +
+ geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
+
+density_hight <- max(density(pddf_ib$value)$y)
+
+stats <- list(
+ p5 = quantile(pddf_ib$value, 0.05),
+ p10 = quantile(pddf_ib$value, 0.10),
+ q1 = quantile(pddf_ib$value, 0.25),
+ med = median(pddf_ib$value),
+ mean = mean(pddf_ib$value),
+ q3 = quantile(pddf_ib$value, 0.75),
+ p90 = quantile(pddf_ib$value, 0.90),
+ p95 = quantile(pddf_ib$value, 0.95),
+ max_height = max(ggplot_build(p3)$data[[1]]$count),
+ y_offset = -max(ggplot_build(p3)$data[[1]]$count) * 0.05,
+ y_offset_density = -density_hight * 0.15
+)
+
+p3 +
+ # Box
+ geom_segment(data = data.frame(
+ x = c(stats$q1, stats$q3, stats$med),
+ xend = c(stats$q1, stats$q3, stats$med),
+ y = rep(stats$y_offset, 3),
+ yend = rep(stats$y_offset * 2, 3)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ geom_segment(data = data.frame(
+ x = rep(stats$q1, 2),
+ xend = rep(stats$q3, 2),
+ y = c(stats$y_offset, stats$y_offset * 2),
+ yend = c(stats$y_offset, stats$y_offset * 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Inner whiskers (Q1->P10, Q3->P90)
+ geom_segment(data = data.frame(
+ x = c(stats$q1, stats$q3),
+ xend = c(stats$p10, stats$p90),
+ y = rep(stats$y_offset * 1.5, 2),
+ yend = rep(stats$y_offset * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P10/P90
+ geom_segment(data = data.frame(
+ x = c(stats$p10, stats$p90),
+ xend = c(stats$p10, stats$p90),
+ y = stats$y_offset * 1.3,
+ yend = stats$y_offset * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Outer whiskers (P10->P5, P90->P95)
+ geom_segment(data = data.frame(
+ x = c(stats$p10, stats$p90),
+ xend = c(stats$p5, stats$p95),
+ y = rep(stats$y_offset * 1.5, 2),
+ yend = rep(stats$y_offset * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P5/P95
+ geom_segment(data = data.frame(
+ x = c(stats$p5, stats$p95),
+ xend = c(stats$p5, stats$p95),
+ y = stats$y_offset * 1.3,
+ yend = stats$y_offset * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Mean dot
+ geom_point(data = data.frame(
+ x = stats$mean,
+ y = stats$y_offset * 1.5
+ ), aes(x = x, y = y))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"))
+
+
+p4 <- ggplot(pddf_ib, aes(x = value)) +
+ geom_density() +
+ labs(
+ title = "Distribution of predicted differences"
+ ,subtitle = "Intervention: Delay close of enrollment"
+ ,x = "Difference in probability due to intervention"
+ ,y = "Probability Density"
+ ,caption = "Vertical marks: 5/10/25/50/75/90/95th percentiles. Dot shows mean."
+ ) +
+ geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
+ geom_segment(data = data.frame(
+ x = c(stats$q1, stats$q3, stats$med),
+ xend = c(stats$q1, stats$q3, stats$med),
+ y = rep(stats$y_offset_density, 3),
+ yend = rep(stats$y_offset_density * 2, 3)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ geom_segment(data = data.frame(
+ x = rep(stats$q1, 2),
+ xend = rep(stats$q3, 2),
+ y = c(stats$y_offset_density, stats$y_offset_density * 2),
+ yend = c(stats$y_offset_density, stats$y_offset_density * 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Inner whiskers (Q1->P10, Q3->P90)
+ geom_segment(data = data.frame(
+ x = c(stats$q1, stats$q3),
+ xend = c(stats$p10, stats$p90),
+ y = rep(stats$y_offset_density * 1.5, 2),
+ yend = rep(stats$y_offset_density * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P10/P90
+ geom_segment(data = data.frame(
+ x = c(stats$p10, stats$p90),
+ xend = c(stats$p10, stats$p90),
+ y = stats$y_offset_density * 1.3,
+ yend = stats$y_offset_density * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Outer whiskers (P10->P5, P90->P95)
+ geom_segment(data = data.frame(
+ x = c(stats$p10, stats$p90),
+ xend = c(stats$p5, stats$p95),
+ y = rep(stats$y_offset_density * 1.5, 2),
+ yend = rep(stats$y_offset_density * 1.5, 2)
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Crossbars at P5/P95
+ geom_segment(data = data.frame(
+ x = c(stats$p5, stats$p95),
+ xend = c(stats$p5, stats$p95),
+ y = stats$y_offset_density * 1.3,
+ yend = stats$y_offset_density * 1.7
+ ), aes(x = x, xend = xend, y = y, yend = yend)) +
+ # Mean dot
+ geom_point(data = data.frame(
+ x = stats$mean,
+ y = stats$y_offset_density * 1.5
+ ), aes(x = x, y = y))
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"))
+```
+
+```{r}
+ ggplot(pddf_ib, aes(x=value)) +
+ stat_ecdf(geom='step') +
+ labs(
+ title = "Cumulative distribution of predicted differences",
+ subtitle = "Intervention: Delay close of enrollment",
+ x = "Difference in probability of termination due to intervention",
+ y = "Cumulative Proportion"
+ )
+
+ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"))
+```
+
+
+Get the % of differences in the spike around zero
+```{r}
+# get values around and above/below spike
+width <- 0.02
+spike_band_centered_zero <- mean( pddf_ib$value >= -width/2 & pddf_ib$value <= width/2)
+above_spike_band <- mean( pddf_ib$value >= width/2)
+below_spike_band <- mean( pddf_ib$value <= -width/2)
+
+# get mass above and mass below zero
+mass_below_zero <- mean( pddf_ib$value <= 0)
+```
+Looking at the spike around zero, we find that `r spike_band_centered_zero*100`%
+of the probability mass is contained within the band from
+[`r -width*100/2`,`r width*100/2`].
+Additionally, there was `r above_spike_band*100`% of the probability above that
+-- representing those with a general increase in the probability of termination --
+and `r below_spike_band*100`% of the probability mass below the band
+-- representing a decrease in the probability of termination.
+
+On average, if you keep the trial open instead of closing it,
+`r mass_below_zero`% of trials will see a decrease in the probability of termination,
+but, due to the high increase in probability of termination given termination was increased,
+the mean probability of termination increases by `r stats$mean`.
+
+```{r}
+# 5%-iles
+
+summary(pddf_ib$value)
+
+# Create your quantiles
+quants <- quantile(pddf_ib$value, probs = seq(0,1,0.05), type=4)
+
+# Convert to a data frame
+quant_df <- data.frame(
+ Percentile = names(quants),
+ Value = quants
+)
+kable(quant_df)
+```
+
+There seems to be some trials that are highly suceptable to this enrollment delay. Specifically, there were some
+
+```{r}
+n = length(counterfact_predicted_ib$p_predicted_intervention)
+k = 100
+simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention)))
+simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default)))
+
+simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)/k
+```
+
+The simulation above shows that this results in a percentage-point increase of about
+`r simulated_percentages * 100`.
+
+
+
+
+# Diagnostics
+
+```{r}
+#| eval: true
+#trace plots
+image_diagnostics <- paste0(image_root,"/diagnostics")
+
+plot(fit, pars=c("mu"), plotfun="trace")
+ggsave(paste0(image_diagnostics,"/trace_plot_mu.png"))
+
+
+for (i in 1:3) {
+ print(
+ mcmc_rank_overlay(
+ fit,
+ pars=c(
+ paste0("mu[",4*i-3,"]"),
+ paste0("mu[",4*i-2,"]"),
+ paste0("mu[",4*i-1,"]"),
+ paste0("mu[",4*i,"]")
+ ),
+ n_bins=100
+ )+ legend_move("top") +
+ scale_colour_ghibli_d("KikiMedium")
+ )
+ mu_range <- paste0(4*i-3,"-",4*i)
+ filename <- paste0(image_diagnostics,"/trace_rank_plot_mu_",mu_range,".png")
+ ggsave(filename)
+}
+```
+
+```{r}
+#| eval: true
+plot(fit, pars=c("sigma"), plotfun="trace")
+ggsave(paste0(image_diagnostics,"/traceplot_sigma.png"))
+
+for (i in 1:3) {
+ print(
+ mcmc_rank_overlay(
+ fit,
+ pars=c(
+ paste0("sigma[",4*i-3,"]"),
+ paste0("sigma[",4*i-2,"]"),
+ paste0("sigma[",4*i-1,"]"),
+ paste0("sigma[",4*i,"]")
+ ),
+ n_bins=100
+ )+ legend_move("top") +
+ scale_colour_ghibli_d("KikiMedium")
+ )
+ sigma_range <- paste0(4*i-3,"-",4*i)
+ filename <- paste0(image_diagnostics,"/trace_rank_plot_sigma_",sigma_range,".png")
+ ggsave(filename)
+}
+```
+
+```{r}
+#| eval: true
+#other diagnostics
+logpost <- log_posterior(fit)
+nuts_prmts <- nuts_params(fit)
+posterior <- as.array(fit)
+
+```
+
+```{r}
+#| eval: true
+color_scheme_set("darkgray")
+div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4)
+mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05)
+ggsave(paste0(image_diagnostics,"/parcoord_mu.png"))
+```
+
+```{r}
+#| eval: true
+for (i in 1:3) {
+ mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]"))
+ print(
+ mcmc_pairs(
+ posterior,
+ np = nuts_prmts,
+ pars=c(
+ mus,
+ "lp__"
+ ),
+ off_diag_args = list(size = 0.75)
+ )
+ )
+ mu_range <- paste0(4*i-3,"-",4*i)
+ filename <- paste0(image_diagnostics,"/correlation_plot_mu_",mu_range,".png")
+ ggsave(filename)
+}
+
+
+
+```
+
+```{r}
+#| eval: true
+mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05)
+ggsave(paste0(image_diagnostics,"/parcoord_sigma.png"))
+```
+
+```{r}
+#| eval: true
+
+for (i in 1:3) {
+ params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]"))
+ print(
+ mcmc_pairs(
+ posterior,
+ np = nuts_prmts,
+ pars=c(
+ params,
+ "lp__"
+ ),
+ off_diag_args = list(size = 0.75)
+ )
+ )
+ sigma_range <- paste0(4*i-3,"-",4*i)
+ filename <- paste0(image_diagnostics,"/correlation_plot_sigma_",sigma_range,".png")
+ ggsave(filename)
+}
+```
+
+
+```{r}
+#| eval: true
+for (k in 1:22) {
+for (i in 1:3) {
+ params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]"))
+ print(
+ mcmc_pairs(
+ posterior,
+ np = nuts_prmts,
+ pars=c(
+ params,
+ "lp__"
+ ),
+ off_diag_args = list(size = 0.75)
+ )
+ )
+
+ beta_range <- paste0("k_",k,"_i_",4*i-3,"-",4*i)
+ filename <- paste0(image_diagnostics,"/correlation_plot_beta_",beta_range,".png")
+ ggsave(filename)
+}}
+```
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/r-analysis/Hierarchal_Logistic.rds b/r-analysis/Hierarchal_Logistic.rds
index faa4da2..bf18da2 100644
Binary files a/r-analysis/Hierarchal_Logistic.rds and b/r-analysis/Hierarchal_Logistic.rds differ
diff --git a/r-analysis/Hierarchal_Logistic.stan b/r-analysis/Hierarchal_Logistic.stan
index 0fdacae..8aa7a04 100644
--- a/r-analysis/Hierarchal_Logistic.stan
+++ b/r-analysis/Hierarchal_Logistic.stan
@@ -26,6 +26,8 @@ data {
array[Nx] int
llx;//vec of categories
array[Nx] row_vector[D] counterfact_x_tilde; // Posterior Prediction intervention
array[Nx] row_vector[D] counterfact_x; // Posterior Prediction intervention
+ //the two statuses to catch relative difference
+ array[2] int status_indexes; //the two status indexes to compare (order is x[1] - x[2])
}
parameters {
array[D] real mu;
@@ -57,6 +59,13 @@ generated quantities {
real p_predicted_default[Nx]; //predicted p_values
real p_predicted_intervention[Nx]; //predicted p_values
real predicted_difference[Nx]; //difference in predicted values
+
+ //collect array of relative status differences between
+ array[L] real status_diff;
+ //GENERATE RELATIVE DIFFERENCES BETWEEN STATUSES
+ for (l in 1:L) {
+ status_diff[l] = beta[l,status_indexes[1]] - beta[l,status_indexes[2]];
+ }
//GENERATE PRIOR PREDICTIONS
{
diff --git a/r-analysis/Hierarchal_Logistic_with_status_diff.stan b/r-analysis/Hierarchal_Logistic_with_status_diff.stan
new file mode 100644
index 0000000..8aa7a04
--- /dev/null
+++ b/r-analysis/Hierarchal_Logistic_with_status_diff.stan
@@ -0,0 +1,102 @@
+//
+// This Stan program defines a simple model, with a
+// vector of values 'y' modeled as normally distributed
+// with mean 'mu' and standard deviation 'sigma'.
+//
+// Learn more about model development with Stan at:
+//
+// http://mc-stan.org/users/interfaces/rstan.html
+// https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
+//
+
+// The input data is a vector 'y' of length 'N'.
+data {
+ int D; // number of features
+ int N; // number of observations
+ int L; // number of categories
+ array[N] int y; // vector of dependent variables
+ array[N] int ll; // vector of categories
+ array[N] row_vector[D] x; // independent variables
+ real mu_mean; //hyperprior
+ real mu_stdev; //hyperprior
+ real sigma_shape; //hyperprior
+ real sigma_rate; //hyperprior
+ //counterfactuals
+ int Nx;
+ array[Nx] int llx;//vec of categories
+ array[Nx] row_vector[D] counterfact_x_tilde; // Posterior Prediction intervention
+ array[Nx] row_vector[D] counterfact_x; // Posterior Prediction intervention
+ //the two statuses to catch relative difference
+ array[2] int status_indexes; //the two status indexes to compare (order is x[1] - x[2])
+}
+parameters {
+ array[D] real mu;
+ array[D] real sigma;
+ array[L] vector[D] beta;
+}
+model {
+ sigma ~ gamma(sigma_shape,sigma_rate); //hyperprior for stdev: shape and inverse scale
+ mu ~ normal(mu_mean, mu_stdev); //hyperprior for mean //TODO: convert to mvnormal
+ for (l in 1:L) {
+ beta[l] ~ normal(mu, sigma);
+ }
+ {
+ vector[N] x_beta_ll;
+ for (n in 1:N) {
+ x_beta_ll[n] = x[n] * beta[ll[n]];
+ }
+ y ~ bernoulli_logit(x_beta_ll);
+ }
+}
+generated quantities {
+ //SETUP PRIOR PREDICTION
+ //preallocate
+ real mu_prior[D];
+ real sigma_prior[D];
+ real p_prior[N]; // what I have priors about
+ real p_predicted[N]; //predicted p_values
+ //intervention
+ real p_predicted_default[Nx]; //predicted p_values
+ real p_predicted_intervention[Nx]; //predicted p_values
+ real predicted_difference[Nx]; //difference in predicted values
+
+ //collect array of relative status differences between
+ array[L] real status_diff;
+ //GENERATE RELATIVE DIFFERENCES BETWEEN STATUSES
+ for (l in 1:L) {
+ status_diff[l] = beta[l,status_indexes[1]] - beta[l,status_indexes[2]];
+ }
+
+ //GENERATE PRIOR PREDICTIONS
+ {
+ vector[D] beta_prior[L];//local var
+ //sample parameters
+ for (d in 1:D) {
+ mu_prior[d] = normal_rng(mu_mean,mu_stdev);
+ sigma_prior[d] = gamma_rng(sigma_shape,sigma_rate);
+ }
+ for (l in 1:L) {
+ for (d in 1:D) {
+ beta_prior[l,d] = normal_rng(mu_prior[d],sigma_prior[d]);
+ }
+ }
+ //generate probabilities
+ vector[D] b_prior[N];//local var
+ for (n in 1:N){
+ b_prior[n] = beta_prior[ll[n]];
+ p_prior[n] = inv_logit( x[n] * b_prior[n] );
+ }
+ }
+ //GENERATE POSTERIOR PREDICTIONS
+ for (n in 1:N) {
+ p_predicted[n] = inv_logit( x[n] * beta[ll[n]] );
+ }
+ //GENERATE POSTERIOR DISTRIBUTION OF DIFFERENCES
+ for (n in 1:Nx) {
+ p_predicted_default[n] = inv_logit( counterfact_x[n] * beta[llx[n]] );
+ p_predicted_intervention[n] = inv_logit( counterfact_x_tilde[n] * beta[llx[n]] ); //intervention
+
+ //intervention - base case
+ predicted_difference[n] = p_predicted_intervention[n] - p_predicted_default[n];
+ }
+}
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_10_status_EBI.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_10_status_EBI.png
new file mode 100644
index 0000000..2e5f64e
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_10_status_EBI.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_11_status_Rec.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_11_status_Rec.png
new file mode 100644
index 0000000..38897e4
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_11_status_Rec.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_12_status_ANR.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_12_status_ANR.png
new file mode 100644
index 0000000..c76ec47
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_12_status_ANR.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_1_Elapsed Duration.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_1_Elapsed Duration.png
new file mode 100644
index 0000000..566cfe3
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_1_Elapsed Duration.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_2_asinh(Generic Brands).png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_2_asinh(Generic Brands).png
new file mode 100644
index 0000000..5225a16
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_2_asinh(Generic Brands).png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_3_asinh(Competitors USPDC).png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_3_asinh(Competitors USPDC).png
new file mode 100644
index 0000000..f88e9dd
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_3_asinh(Competitors USPDC).png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_9_status_NYR.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_9_status_NYR.png
new file mode 100644
index 0000000..ad19939
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/parameters_9_status_NYR.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups11+12_statusREC_and_statusANR.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups11+12_statusREC_and_statusANR.png
new file mode 100644
index 0000000..dbedd54
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups11+12_statusREC_and_statusANR.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups2+3_generic_and_uspdc.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups2+3_generic_and_uspdc.png
new file mode 100644
index 0000000..2ab4160
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups2+3_generic_and_uspdc.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_10_Respiratory.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_10_Respiratory.png
new file mode 100644
index 0000000..a705ccb
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_10_Respiratory.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_11_Digestive.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_11_Digestive.png
new file mode 100644
index 0000000..a4c834c
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_11_Digestive.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_12_Skin & Subcutaneaous tissue.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_12_Skin & Subcutaneaous tissue.png
new file mode 100644
index 0000000..8339180
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_12_Skin & Subcutaneaous tissue.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_13_Musculoskeletal.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_13_Musculoskeletal.png
new file mode 100644
index 0000000..4ede4e5
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_13_Musculoskeletal.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_14_Genitourinary.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_14_Genitourinary.png
new file mode 100644
index 0000000..87795bc
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_14_Genitourinary.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_17_Congential.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_17_Congential.png
new file mode 100644
index 0000000..5190e33
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_17_Congential.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_18_Symptoms, Signs etc..png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_18_Symptoms, Signs etc..png
new file mode 100644
index 0000000..bf05e1b
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_18_Symptoms, Signs etc..png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_1_Infections & Parasites.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_1_Infections & Parasites.png
new file mode 100644
index 0000000..29ebf13
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_1_Infections & Parasites.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_2_Neoplasms.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_2_Neoplasms.png
new file mode 100644
index 0000000..7b1cf44
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_2_Neoplasms.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_3_Blood & Immune system.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_3_Blood & Immune system.png
new file mode 100644
index 0000000..b3d599e
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_3_Blood & Immune system.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_4_Endocrine, Nutritional, and Metabolic.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_4_Endocrine, Nutritional, and Metabolic.png
new file mode 100644
index 0000000..46fac47
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_4_Endocrine, Nutritional, and Metabolic.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_5_Mental & Behavioral.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_5_Mental & Behavioral.png
new file mode 100644
index 0000000..11a4f98
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_5_Mental & Behavioral.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_6_Nervous System.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_6_Nervous System.png
new file mode 100644
index 0000000..775521e
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_6_Nervous System.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_7_Eye and Adnexa.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_7_Eye and Adnexa.png
new file mode 100644
index 0000000..ee61d76
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_7_Eye and Adnexa.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_9_Circulatory.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_9_Circulatory.png
new file mode 100644
index 0000000..b8d3384
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameters_by_group/group_9_Circulatory.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_10_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_10_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_10_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_10_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_10_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_10_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_10_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_10_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_10_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_11_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_11_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_11_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_11_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_11_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_11_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_11_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_11_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_11_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_12_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_12_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_12_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_12_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_12_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_12_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_12_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_12_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_12_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_13_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_13_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_13_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_13_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_13_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_13_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_13_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_13_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_13_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_14_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_14_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_14_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_14_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_14_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_14_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_14_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_14_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_14_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_15_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_15_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_15_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_15_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_15_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_15_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_15_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_15_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_15_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_16_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_16_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_16_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_16_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_16_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_16_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_16_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_16_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_16_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_17_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_17_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_17_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_17_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_17_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_17_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_17_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_17_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_17_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_18_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_18_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_18_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_18_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_18_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_18_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_18_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_18_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_18_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_19_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_19_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_19_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_19_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_19_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_19_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_19_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_19_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_19_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_1_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_1_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_1_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_1_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_1_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_1_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_1_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_1_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_1_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_20_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_20_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_20_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_20_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_20_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_20_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_20_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_20_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_20_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_21_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_21_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_21_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_21_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_21_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_21_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_21_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_21_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_21_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_22_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_22_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_22_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_22_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_22_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_22_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_22_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_22_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_22_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_2_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_2_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_2_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_2_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_2_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_2_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_2_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_2_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_2_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_3_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_3_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_3_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_3_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_3_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_3_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_3_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_3_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_3_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_4_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_4_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_4_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_4_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_4_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_4_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_4_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_4_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_4_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_5_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_5_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_5_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_5_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_5_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_5_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_5_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_5_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_5_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_6_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_6_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_6_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_6_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_6_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_6_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_6_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_6_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_6_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_7_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_7_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_7_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_7_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_7_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_7_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_7_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_7_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_7_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_8_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_8_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_8_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_8_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_8_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_8_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_8_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_8_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_8_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_9_i_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_9_i_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_9_i_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_9_i_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_9_i_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_9_i_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_9_i_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_9_i_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_beta_k_9_i_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_mu_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_mu_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_mu_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_mu_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_mu_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_mu_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_mu_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_mu_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_mu_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_sigma_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_sigma_1-4.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_sigma_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_sigma_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_sigma_5-8.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_sigma_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_sigma_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_sigma_9-12.png
new file mode 100644
index 0000000..e8b99f0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/correlation_plot_sigma_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_mu.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_mu.png
new file mode 100644
index 0000000..f642eb9
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_mu.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_sigma.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_sigma.png
new file mode 100644
index 0000000..d220aed
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_sigma.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_plot_mu.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_plot_mu.png
new file mode 100644
index 0000000..cc3607e
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_plot_mu.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_1-4.png
new file mode 100644
index 0000000..5a50236
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_5-8.png
new file mode 100644
index 0000000..48322c3
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_9-12.png
new file mode 100644
index 0000000..1884a72
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_1-4.png
new file mode 100644
index 0000000..e2a8545
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_1-4.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_5-8.png
new file mode 100644
index 0000000..bed15e7
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_5-8.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_9-12.png
new file mode 100644
index 0000000..0f99e0a
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_9-12.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/traceplot_sigma.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/traceplot_sigma.png
new file mode 100644
index 0000000..813b7b2
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/traceplot_sigma.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention.png
new file mode 100644
index 0000000..7fea2ef
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_cumulative_distdiff.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_cumulative_distdiff.png
new file mode 100644
index 0000000..2734869
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_cumulative_distdiff.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_1.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_1.png
new file mode 100644
index 0000000..7bd3c2e
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_1.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_boxplot.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_boxplot.png
new file mode 100644
index 0000000..82a1dab
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_boxplot.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_by_group.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_by_group.png
new file mode 100644
index 0000000..8b74c6f
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_by_group.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_styled.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_styled.png
new file mode 100644
index 0000000..dadcb6a
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_styled.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_boxplot.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_boxplot.png
new file mode 100644
index 0000000..41f1afa
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_boxplot.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/posterior_p.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/posterior_p.png
new file mode 100644
index 0000000..bc26966
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/posterior_p.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_mu.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_mu.png
new file mode 100644
index 0000000..6dbb2f8
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_mu.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_p.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_p.png
new file mode 100644
index 0000000..725aee7
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_p.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_sigma.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_sigma.png
new file mode 100644
index 0000000..68022dd
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_sigma.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_delay_intervention_histdiff_by_group.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_delay_intervention_histdiff_by_group.png
new file mode 100644
index 0000000..35f1649
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_delay_intervention_histdiff_by_group.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_no_intervention.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_no_intervention.png
new file mode 100644
index 0000000..8291f95
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_no_intervention.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/CategoryCounts.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/CategoryCounts.png
new file mode 100644
index 0000000..4294778
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/CategoryCounts.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistSnapshots.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistSnapshots.png
new file mode 100644
index 0000000..092a30d
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistSnapshots.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistTrialDurations_Faceted.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistTrialDurations_Faceted.png
new file mode 100644
index 0000000..ae57450
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistTrialDurations_Faceted.png differ
diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/SnapshotsVsDurationVsTermination.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/SnapshotsVsDurationVsTermination.png
new file mode 100644
index 0000000..1e99ef0
Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/SnapshotsVsDurationVsTermination.png differ