diff --git a/r-analysis/EffectsOfEnrollmentDelay.rmarkdown b/r-analysis/EffectsOfEnrollmentDelay.rmarkdown index 8b68e04..3927b82 100644 --- a/r-analysis/EffectsOfEnrollmentDelay.rmarkdown +++ b/r-analysis/EffectsOfEnrollmentDelay.rmarkdown @@ -12,6 +12,8 @@ editor: source + + ```{r} library(knitr) library(bayesplot) @@ -41,10 +43,19 @@ image_parameters_by_groups <-paste0(image_root,"/betas/parameters_by_group") image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups") ``` + + +run on `r .QuartoInlineRender(now(tz='UTC'))` + + + + + ```{r} ################ Pull data from database ###################### library(RPostgreSQL) -host <- 'aact_db-restored-2025-01-07' +#host <-'aact_db-restored-2025-01-07' +host <- '10.89.0.6' driver <- dbDriver("PostgreSQL") @@ -441,6 +452,7 @@ counterfact_delay <- list( ``` ```{r} +#| label: Fitting fit <- stan( file='Hierarchal_Logistic.stan', data = counterfact_delay, @@ -509,7 +521,7 @@ ggplot(data=df3, aes(x=count, fill=final_status)) + geom_histogram(binwidth=1) + ggtitle("Histogram of snapshots per trial (matched trials)") + xlab("Snapshots per trial") -ggsave(paste0(image_trial_details,"/HistSnapshots.png")) +ggsave(paste0(image_trial_details,"/HistSnapshots.png"), create.dir = TRUE) #Plot duration for terminated vs completed df4 <- dbGetQuery( @@ -532,7 +544,7 @@ ggplot(data=df4, aes(x=duration,fill=overall_status)) + ggtitle("Histogram of trial durations") + xlab("duration")+ facet_wrap(~overall_status) -ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png")) +ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"), create.dir = TRUE) df5 <- dbGetQuery( con, @@ -563,7 +575,7 @@ ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) + ggtitle("Comparison of duration, status, and snapshot_count") + xlab("duration") + ylab("snapshot count") -ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png")) +ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"), create.dir = TRUE) dbDisconnect(con) @@ -579,7 +591,7 @@ ggplot(data = group_trials_by_category, aes(x=category_id)) + ,x="Category ID" ,y="Count" ) -ggsave(paste0(image_trial_details,"/CategoryCounts.png")) +ggsave(paste0(image_trial_details,"/CategoryCounts.png"), create.dir = TRUE) @@ -591,8 +603,20 @@ count_snapshots <- sum(df5$snapshot_count) -the correlation value is `r cor_dur_count` between duration and snapshot count. -There are `r count_snapshots` snapshots in total. +the correlation value is `r .QuartoInlineRender(cor_dur_count)` between duration and snapshot count. +There are `r .QuartoInlineRender(count_snapshots)` snapshots in total, spread over +`r .QuartoInlineRender(length(df5$nct_id))` trials. + +The number of categories by trial are +TODO: add in category names, + + +```{r} +group_trials_by_category |> count(category_id) +``` + + + @@ -615,7 +639,7 @@ for (i in category_count$category_id[category_count$n >= 0]) { gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups ggsave( paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png") - ,plot=gi$plot + ,plot=gi$plot, create.dir = TRUE ) gx <- c(gx,gi) @@ -643,7 +667,7 @@ for (i in c(1,2,3,9,10,11,12)) { pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups ggsave( paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png") - ,plot=pi$plot + ,plot=pi$plot, create.dir = TRUE ) px <- c(px,pi) @@ -710,7 +734,7 @@ ggplot(df_ib_p, aes(x=p_prior)) + ,x="Probability Domain 'p'" ,y="Probability Density" ) -ggsave(paste0(image_dist_diff_analysis,"/prior_p.png")) +ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"), create.dir = TRUE) #p_posterior ggplot(df_ib_p, aes(x=p_predicted)) + @@ -721,7 +745,7 @@ ggplot(df_ib_p, aes(x=p_predicted)) + ,x="Probability Domain 'p'" ,y="Probability Density" ) -ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png")) +ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"), create.dir = TRUE) #mu_prior ggplot(df_ib_prior) + @@ -732,7 +756,7 @@ ggplot(df_ib_prior) + ,x="Mu" ,y="Probability" ) -ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png")) +ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"), create.dir = TRUE) #sigma_posterior ggplot(df_ib_prior) + @@ -743,7 +767,7 @@ ggplot(df_ib_prior) + ,x="Sigma" ,y="Probability" ) -ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png")) +ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"), create.dir = TRUE) ``` @@ -753,7 +777,7 @@ ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png")) -### Intervention: Delay close of enrollment +# Intervention: Delay close of enrollment @@ -773,7 +797,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) + ,x="Probability Domain 'p'" ,y="Probability Density" ) -ggsave(paste0(image_dist_diff_analysis,"p_no_intervention.png")) +ggsave(paste0(image_dist_diff_analysis,"/p_no_intervention.png"), create.dir = TRUE) ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) + geom_density() + @@ -783,7 +807,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) + ,x="Probability Domain 'p'" ,y="Probability Density" ) -ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png")) +ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"), create.dir = TRUE) ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) + geom_density() + @@ -793,7 +817,7 @@ ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) + ,x="Difference in 'p' under treatment" ,y="Probability Density" ) -ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png")) +ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"), create.dir = TRUE) ``` ```{r} @@ -815,7 +839,7 @@ pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predict pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168 pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name)) -pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) df$category_id[i]) +pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) counterfact_delay$llx[i]) pddf_ib["category_name"] <- sapply( pddf_ib$category, function(i) category_names[i] @@ -832,7 +856,7 @@ ggplot(pddf_ib, aes(x=value,)) + ,y = "Probability Density" ) + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") -ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png")) +ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"), create.dir = TRUE) ``` ```{r} @@ -853,7 +877,7 @@ ggplot(pddf_ib, aes(x=value,)) + ) + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + theme(strip.text.x = element_text(size = 8)) -ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png")) +ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"), create.dir = TRUE) ``` ```{r} @@ -874,7 +898,7 @@ ggplot(pddf_ib, aes(x=value,)) + ) + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + theme(strip.text.x = element_text(size = 8)) -ggsave(paste0(image_dist_diff_analysis,"p_delay_intervention_histdiff_by_group.png")) +ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_by_group.png"), create.dir = TRUE) ``` ```{r} @@ -897,6 +921,7 @@ stats <- list( q1 = quantile(pddf_ib$value, 0.25), med = median(pddf_ib$value), mean = mean(pddf_ib$value), + stdev = sd(pddf_ib$value), q3 = quantile(pddf_ib$value, 0.75), p90 = quantile(pddf_ib$value, 0.90), p95 = quantile(pddf_ib$value, 0.95), @@ -952,7 +977,7 @@ p3 + x = stats$mean, y = stats$y_offset * 1.5 ), aes(x = x, y = y)) -ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png")) +ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"), create.dir = TRUE) ``` ```{r} @@ -1011,7 +1036,7 @@ p4 <- ggplot(pddf_ib, aes(x = value)) + x = stats$mean, y = stats$y_offset_density * 1.5 ), aes(x = x, y = y)) -ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png")) +ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"), create.dir = TRUE) p4 ``` @@ -1026,7 +1051,7 @@ p4 y = "Cumulative Proportion" ) -ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png")) +ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"), create.dir = TRUE) ``` @@ -1047,18 +1072,18 @@ mass_below_zero <- mean( pddf_ib$value <= 0) ``` -Looking at the spike around zero, we find that `r spike_band_centered_zero*100`% +Looking at the spike around zero, we find that `r .QuartoInlineRender(spike_band_centered_zero*100)`% of the probability mass is contained within the band from -[`r -width*100/2`,`r width*100/2`]. -Additionally, there was `r above_spike_band*100`% of the probability above that +[`r .QuartoInlineRender(-width/2)`,`r .QuartoInlineRender(width/2)`]. +Additionally, there was `r .QuartoInlineRender(above_spike_band*100)`% of the probability above that -- representing those with a general increase in the probability of termination -- -and `r below_spike_band*100`% of the probability mass below the band +and `r .QuartoInlineRender(below_spike_band*100)`% of the probability mass below the band -- representing a decrease in the probability of termination. On average, if you keep the trial open instead of closing it, -`r mass_below_zero`% of trials will see a decrease in the probability of termination, +`r .QuartoInlineRender(mass_below_zero * 100)`% of trials will see a decrease in the probability of termination, but, due to the high increase in probability of termination given termination was increased, -the mean probability of termination increases by `r stats$mean`. +the mean probability of termination increases by `r .QuartoInlineRender(stats$mean (stats$stdev))`. @@ -1077,7 +1102,7 @@ quant_df <- data.frame( ) # Convert to LaTeX table <- xtable(quant_df, - digits = rep(3, ncol(d) + 1), + digits = rep(3, ncol(quant_df) + 1), floating = FALSE, latex.environments = NULL, booktabs = TRUE @@ -1096,7 +1121,7 @@ proportion_increase <- mean(pddf_ib$value >= 0) -about `r proportion_increase * 100` percent probability increase in the probability of terminations +about `r .QuartoInlineRender(proportion_increase * 100)` percent probability increase in the probability of terminations @@ -1106,34 +1131,74 @@ k = 100 simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention))) simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default))) -simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)/k +simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base) ``` The simulation above shows that this results in a percentage-point change in terminations of about -`r simulated_percentages * 100`. +`r .QuartoInlineRender(simulated_percentages * 100)`. + + + + +```{r} +distdiff_by_group <- pddf_ib %>% + group_by(category_name, category) %>% + summarize( + mean = mean(value, na.rm=TRUE) *100 + ,"P(p>0)" = mean(value>0) *100 + ,"5%" = quantile( value, 0.05, na.rm = TRUE)*100 + ,"25%" = quantile( value, 0.25, na.rm = TRUE)*100 + ,"median" = quantile( value, 0.5, na.rm = TRUE)*100 + ,"75%" = quantile( value, 0.75, na.rm = TRUE)*100 + ,"95%" = quantile( value, 0.95, na.rm = TRUE)*100 + ) %>% arrange(category) %>% select(!category) + +distdif_by_group_latex<- xtable(distdiff_by_group, + digits = rep(2, ncol(distdiff_by_group) +1), + floating = FALSE, + align = "llccccccc", + latex.environments = "tabularx", + booktabs = TRUE +) + +# Write to file +write_lines( + print(distdif_by_group_latex, include.rownames = FALSE), + paste0(image_root,"/distdiff_anr_vs_rec_by_group.tex") +) + +print(distdif_by_group_latex) +distdiff_by_group +``` + + + + ## fixed effects distributions ```{r} +#| label: Fixed Effects Distributions #Get dataframe with only the rows of interest filtdata <- as.data.frame(extract(fit, pars="status_diff")) #rename columns -dimnames(filtdata)[[2]] <- beta_list$groups +dimnames(filtdata)[[2]] <- sprintf("%02d %s", seq_along(beta_list$groups), beta_list$groups) #create area plot with appropriate title mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) + ggtitle("Differences in Fixed Effects | By ICD-10 Category", subtitle = "Moving from 'Active, not recruiting' to 'Recruiting'" ) + geom_vline(xintercept=seq(-0.25,0.5,0.25),color="grey",alpha=0.750) -ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png")) +ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png"), create.dir = TRUE) -d <- pivot_longer(filtdata, everything()) |> +fixed_effects_table <- pivot_longer(filtdata, everything()) |> group_by(name) |> summarize( + `sample size` = 0, mean = mean(value), `P(≥0)` = mean(value >= 0), `2.5%` = quantile(value, probs = 0.025), @@ -1145,18 +1210,26 @@ d <- pivot_longer(filtdata, everything()) |> `97.5%` = quantile(value, probs = 0.975) ) +# Convert the indexing to be consecutive by using a temporary vector +sample_sizes <- rep(0, 22) # Create vector of zeros +sample_sizes[category_count$category_id] <- category_count$n + +# Now assign the whole vector at once +fixed_effects_table["sample size"] <- sample_sizes # Rename the name column -names(d)[1] <- "ICD-10 Category" +names(fixed_effects_table)[1] <- "ICD-10 Category" # Convert to LaTeX -table <- xtable(d, - digits = rep(3, ncol(d) + 1), +table <- xtable(fixed_effects_table, + digits = rep(2, ncol(fixed_effects_table) + 1), floating = FALSE, - latex.environments = NULL, + align = "lccccccccccc", + latex.environments = "tabularx", booktabs = TRUE ) + # Write to file write_lines( print(table, include.rownames = FALSE), @@ -1175,12 +1248,13 @@ write_lines( ```{r} -#| eval: true +#| label: diagnostics 1 +#| eval: false #trace plots image_diagnostics <- paste0(image_root,"/diagnostics") plot(fit, pars=c("mu"), plotfun="trace") -ggsave(paste0(image_diagnostics,"/trace_plot_mu.png")) +ggsave(paste0(image_diagnostics,"/trace_plot_mu.png"), create.dir = TRUE) for (i in 1:3) { @@ -1199,14 +1273,15 @@ for (i in 1:3) { ) mu_range <- paste0(4*i-3,"-",4*i) filename <- paste0(image_diagnostics,"/trace_rank_plot_mu_",mu_range,".png") - ggsave(filename) + ggsave(filename, create.dir = TRUE) } ``` ```{r} -#| eval: true +#| label: diagnostics 2 +#| eval: false plot(fit, pars=c("sigma"), plotfun="trace") -ggsave(paste0(image_diagnostics,"/traceplot_sigma.png")) +ggsave(paste0(image_diagnostics,"/traceplot_sigma.png"), create.dir = TRUE) for (i in 1:3) { print( @@ -1224,12 +1299,13 @@ for (i in 1:3) { ) sigma_range <- paste0(4*i-3,"-",4*i) filename <- paste0(image_diagnostics,"/trace_rank_plot_sigma_",sigma_range,".png") - ggsave(filename) + ggsave(filename, create.dir = TRUE) } ``` ```{r} -#| eval: true +#| label: diagnostics 3 +#| eval: false #other diagnostics logpost <- log_posterior(fit) nuts_prmts <- nuts_params(fit) @@ -1238,15 +1314,17 @@ posterior <- as.array(fit) ``` ```{r} -#| eval: true +#| label: diagnostics 4 +#| eval: false color_scheme_set("darkgray") div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4) mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05) -ggsave(paste0(image_diagnostics,"/parcoord_mu.png")) +ggsave(paste0(image_diagnostics,"/parcoord_mu.png"), create.dir = TRUE) ``` ```{r} -#| eval: true +#| label: diagnostics 5 +#| eval: false for (i in 1:3) { mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]")) print( @@ -1262,7 +1340,7 @@ for (i in 1:3) { ) mu_range <- paste0(4*i-3,"-",4*i) filename <- paste0(image_diagnostics,"/correlation_plot_mu_",mu_range,".png") - ggsave(filename) + ggsave(filename, create.dir = TRUE) } @@ -1270,13 +1348,15 @@ for (i in 1:3) { ``` ```{r} -#| eval: true +#| label: diagnostics 6 +#| eval: false mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05) -ggsave(paste0(image_diagnostics,"/parcoord_sigma.png")) +ggsave(paste0(image_diagnostics,"/parcoord_sigma.png"), create.dir = TRUE) ``` ```{r} -#| eval: true +#| label: diagnostics 7 +#| eval: false for (i in 1:3) { params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]")) @@ -1293,12 +1373,13 @@ for (i in 1:3) { ) sigma_range <- paste0(4*i-3,"-",4*i) filename <- paste0(image_diagnostics,"/correlation_plot_sigma_",sigma_range,".png") - ggsave(filename) + ggsave(filename, create.dir = TRUE) } ``` ```{r} -#| eval: true +#| eval: false +#| label: diagnostics 8 for (k in 1:22) { for (i in 1:3) { params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]")) @@ -1316,7 +1397,7 @@ for (i in 1:3) { beta_range <- paste0("k_",k,"_i_",4*i-3,"-",4*i) filename <- paste0(image_diagnostics,"/correlation_plot_beta_",beta_range,".png") - ggsave(filename) + ggsave(filename, create.dir = TRUE) }} ``` diff --git a/r-analysis/EoED_Diagnostics.qmd b/r-analysis/EoED_Diagnostics.qmd new file mode 100644 index 0000000..24435d4 --- /dev/null +++ b/r-analysis/EoED_Diagnostics.qmd @@ -0,0 +1,75 @@ +--- +title: "TrialCountExtraction" +author: "Will" +format: html +editor: source +--- + + +```{r} +#| eval: false +#| include: true + +#Full set +categories %>% unique() %>% sort() %>% length() + +#Evaluation set +cf_categories %>% unique() %>% sort() %>% length() + +``` + + +```{r} +# Pulled from df +group_trials_by_category %>% group_by(category_id) %>% count() +``` + +# Actual data from Evaluation and counterfactual +```{r} +# Original Evaluation +# - Pulled from `categories` above when defined +counterfact_delay$ll %>% unique() %>% sort() %>% length() + + +# Counterfactual +# - Pulled from `cf_categories` above when defined +counterfact_delay$llx %>% unique() %>% sort() %>% length() +``` +Those came from +```{r} +df$category_id %>% unique() %>% sort() %>% length() +df_counterfact_base$category_id %>% unique() %>% sort() %>% length() +``` + +The difference between those is that the counterfactual imposes the constraint +that there must be a snapshot where it moves from "ANR" to "Rec", implying that +it can't just terminate. + +# Where do the other values drop + +When we find the counterfactual, the table looses some of the categories etc. +Here is the extracted data + +```{r} +data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) +``` + +```{r} +pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |> + pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168 + + +pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name)) +pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) counterfact_delay$llx[i]) +pddf_ib["category_name"] <- sapply( + pddf_ib$category, + function(i) category_names[i] + ) +``` +and yet it seems that we predict the difference for all 168 trials + +It looks like there is an error where I apply category IDs. Because I'm pulling them from +```{r} +ground_truth <- df$category_id[1:168] +``` + diff --git a/r-analysis/Hierarchal_Logistic.rds b/r-analysis/Hierarchal_Logistic.rds index 0192834..0d90149 100644 Binary files a/r-analysis/Hierarchal_Logistic.rds and b/r-analysis/Hierarchal_Logistic.rds differ diff --git a/r-analysis/SimplestEED.qmd b/r-analysis/SimplestEED.qmd new file mode 100644 index 0000000..1c761fc --- /dev/null +++ b/r-analysis/SimplestEED.qmd @@ -0,0 +1,1274 @@ +--- +title: "Simplified - The Effects of Recruitment status on completion of clinical trials" +author: "Will King" +format: html +editor: source +--- + +The purpose of this is to help me investigate some of my code. It was not used to perform the analysis. + +# Setup + + + +```{r} +library(knitr) +library(bayesplot) +available_mcmc(pattern = "_nuts_") +library(ggplot2) +library(patchwork) +library(tidyverse) +library(rstan) +library(tidyr) +library(ghibli) +library(xtable) +#Resources: https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started + +#save unchanged models instead of recompiling +rstan_options(auto_write = TRUE) +#allow for multithreaded sampling +options(mc.cores = parallel::detectCores()) + +#test installation, shouldn't get any errors +#example(stan_model, package = "rstan", run.dontrun = TRUE) + + +image_root <- "./output/EffectsOfEnrollmentDelay" +image_dist_diff_analysis <- paste0(image_root,"/dist_diff_analysis") +image_trial_details <-paste0(image_root,"/trials_details") +image_parameters_by_groups <-paste0(image_root,"/betas/parameters_by_group") +image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups") +``` + +run on `{r} now(tz='UTC')` + + + +```{r} +################ Pull data from database ###################### +library(RPostgreSQL) +#host <-'aact_db-restored-2025-01-07' +host <- '10.89.0.6' + +driver <- dbDriver("PostgreSQL") + +get_data <- function(driver) { + +con <- dbConnect( + driver, + user='root', + password='root', + dbname='aact_db', + host=host + ) +on.exit(dbDisconnect(con)) + +query <- dbSendQuery( + con, +# "select * from formatted_data_with_planned_enrollment;" +" +select + fdqpe.nct_id + --,fdqpe.start_date + --,fdqpe.current_enrollment + --,fdqpe.enrollment_category + ,fdqpe.current_status + ,fdqpe.earliest_date_observed + ,fdqpe.elapsed_duration + ,fdqpe.n_brands as identical_brands + ,ntbtu.brand_name_counts + ,fdqpe.category_id + ,fdqpe.final_status + ,fdqpe.h_sdi_val + --,fdqpe.h_sdi_u95 + --,fdqpe.h_sdi_l95 + ,fdqpe.hm_sdi_val + --,fdqpe.hm_sdi_u95 + --,fdqpe.hm_sdi_l95 + ,fdqpe.m_sdi_val + --,fdqpe.m_sdi_u95 + --,fdqpe.m_sdi_l95 + ,fdqpe.lm_sdi_val + --,fdqpe.lm_sdi_u95 + --,fdqpe.lm_sdi_l95 + ,fdqpe.l_sdi_val + --,fdqpe.l_sdi_u95 + --,fdqpe.l_sdi_l95 +from formatted_data_with_planned_enrollment fdqpe + join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu + on fdqpe.nct_id = ntbtu.nct_id +order by fdqpe.nct_id, fdqpe.earliest_date_observed +; +" + ) +df <- fetch(query, n = -1) +df <- na.omit(df) + +query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;") +n_categories <- fetch(query2, n = -1) + +return(list(data=df,ncat=n_categories)) +} + + +get_counterfact_base <- function(driver) { + +con <- dbConnect( + driver, + user='root', + password='root', + dbname='aact_db', + host=host + ) +on.exit(dbDisconnect(con)) + +query <- dbSendQuery( + con, + " + with cte as ( + --get last recruiting state + select fd.nct_id, max(fd.earliest_date_observed),min(fd2.earliest_date_observed) as tmstmp + from formatted_data fd + join formatted_data fd2 + on fd.nct_id=fd2.nct_id and fd.earliest_date_observed < fd2.earliest_date_observed + where fd.current_status = 'Recruiting' + and fd2.current_status = 'Active, not recruiting' + group by fd.nct_id + ) + select + fdqpe.nct_id + --,fdqpe.start_date + --,fdqpe.current_enrollment + --,fdqpe.enrollment_category + ,fdqpe.current_status + ,fdqpe.earliest_date_observed + ,fdqpe.elapsed_duration + ,fdqpe.n_brands as identical_brands + ,ntbtu.brand_name_counts + ,fdqpe.category_id + ,fdqpe.final_status + ,fdqpe.h_sdi_val + --,fdqpe.h_sdi_u95 + --,fdqpe.h_sdi_l95 + ,fdqpe.hm_sdi_val + --,fdqpe.hm_sdi_u95 + --,fdqpe.hm_sdi_l95 + ,fdqpe.m_sdi_val + --,fdqpe.m_sdi_u95 + --,fdqpe.m_sdi_l95 + ,fdqpe.lm_sdi_val + --,fdqpe.lm_sdi_u95 + --,fdqpe.lm_sdi_l95 + ,fdqpe.l_sdi_val + --,fdqpe.l_sdi_u95 + --,fdqpe.l_sdi_l95 + from formatted_data_with_planned_enrollment fdqpe + join \"Formularies\".nct_to_brand_counts_through_uspdc ntbtu + on fdqpe.nct_id = ntbtu.nct_id + join cte + on fdqpe.nct_id = cte.nct_id + and fdqpe.earliest_date_observed = cte.tmstmp + order by fdqpe.nct_id, fdqpe.earliest_date_observed + ; + " + ) +df <- fetch(query, n = -1) +df <- na.omit(df) + +query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;") +n_categories <- fetch(query2, n = -1) + +return(list(data=df,ncat=n_categories)) +} + + +d <- get_data(driver) +df <- d$data +n_categories <- d$ncat + +cf <- get_counterfact_base(driver) +df_counterfact_base <- cf$data +``` + +```{r} +#number of trials in the base dataset +df$nct_id |> unique() %>% length +#Number of trials in the conterfactual dataset +df_counterfact_base$nct_id %>% unique %>% length + +``` + + +```{r} +################ Format Data ########################### + +data_formatter <- function(df) { +categories <- df["category_id"] + +x <- df["elapsed_duration"] +x["identical_brands"] <- asinh(df$identical_brands) +x["brand_name_counts"] <- asinh(df$brand_name_count) +x["h_sdi_val"] <- asinh(df$h_sdi_val) +x["hm_sdi_val"] <- asinh(df$hm_sdi_val) +x["m_sdi_val"] <- asinh(df$m_sdi_val) +x["lm_sdi_val"] <- asinh(df$lm_sdi_val) +x["l_sdi_val"] <- asinh(df$l_sdi_val) + + +#Setup fixed effects +x["status_NYR"] <- ifelse(df["current_status"]=="Not yet recruiting",1,0) +x["status_EBI"] <- ifelse(df["current_status"]=="Enrolling by invitation",1,0) +x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0) +x["status_ANR"] <- ifelse(df["current_status"]=="Active, not recruiting",1,0) + + +y <- ifelse(df["final_status"]=="Terminated",1,0) + +#get category list + + +return(list(x=x,y=y)) +} + +train <- data_formatter(df) +counterfact_base <- data_formatter(df_counterfact_base) + +categories <- df$category_id + +x <- as.matrix(train$x) +y <- as.vector(train$y) + +x_cf_base <- counterfact_base$x +y_cf_base <- counterfact_base$y +cf_categories <- df_counterfact_base$category_id +``` + +note how the base training has `{r} length(y)` snapshots while +the counterfactual has `{r} length(y_cf_base)`. + +There are `{r} categories |> unique() %>% length` categories +respresented in the base dataset and +`{r} cf_categories |> unique() %>% length` +categories in the counterfactual dataset + +```{r} +length(y) #Snapshots +length(y_cf_base) # trials + +categories |> unique() %>% length #base categories (snapshots dataset) +cf_categories |> unique() %>% length #counterfactual categories +``` + + +# Explore data + + +```{r} +#get number of trials and snapshots in each category +group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, min)) +#SOMETHING GOING ON HERE ^^^^^ +# It looks like it grabs the max category id, i.e. if there are multiple categories assigned, it defaults to the highest + +#Trying something +group_trials_by_category2 <- as.data.frame(aggregate(category_id ~ nct_id, df, max)) + +for (i in 1:22) { + print(group_trials_by_category$category_id[i] - group_trials_by_category2$category_id[i]) +} +#ok, it looks like the max an min give the same categories. So I don't have to worry about multiple classifications +``` + + +```{r} +category_count <- group_trials_by_category |> group_by(category_id) |> count() +category_count +sum(category_count$n) +``` + +Aggregating category counts from df, we get `{r} sum(category_count$n)` + +```{r} +################################# DATA EXPLORATION ############################ +driver <- dbDriver("PostgreSQL") + +con <- dbConnect( + driver, + user='root', + password='root', + dbname='aact_db', + host=host + ) +#Plot histogram of count of snapshots +df3 <- dbGetQuery( + con, + "select nct_id,final_status,count(*) from formatted_data_with_planned_enrollment fdwpe + group by nct_id,final_status ;" + ) +#df3 <- fetch(query3, n = -1) + +ggplot(data=df3, aes(x=count, fill=final_status)) + + geom_histogram(binwidth=1) + + ggtitle("Histogram of snapshots per trial (matched trials)") + + xlab("Snapshots per trial") +#ggsave(paste0(image_trial_details,"/HistSnapshots.png"), create.dir = TRUE) + +#Plot duration for terminated vs completed +df4 <- dbGetQuery( + con, + " + select + nct_id, + start_date , + primary_completion_date, + overall_status , + primary_completion_date - start_date as duration + from ctgov.studies s + where nct_id in (select distinct nct_id from http.download_status ds) + ;" + ) +#df4 <- fetch(query4, n = -1) + +ggplot(data=df4, aes(x=duration,fill=overall_status)) + + geom_histogram()+ + ggtitle("Histogram of trial durations") + + xlab("duration")+ + facet_wrap(~overall_status) +#ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"), create.dir = TRUE) + +df5 <- dbGetQuery( + con, + " + with cte1 as ( + select + nct_id, + start_date , + primary_completion_date, + overall_status , + primary_completion_date - start_date as duration + from ctgov.studies s + where nct_id in (select distinct nct_id from http.download_status ds) + ), cte2 as ( + select nct_id,count(*) as snapshot_count from formatted_data_with_planned_enrollment fdwpe + group by nct_id + ) + select a.nct_id, a.overall_status, a.duration,b.snapshot_count + from cte1 as a + join cte2 as b + on a.nct_id=b.nct_id + ;" + ) +df5$overall_status <- as.factor(df5$overall_status) + +ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) + + geom_jitter() + + ggtitle("Comparison of duration, status, and snapshot_count") + + xlab("duration") + + ylab("snapshot count") +#ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"), create.dir = TRUE) + +dbDisconnect(con) + +#get number of trials and snapshots in each category +group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, max)) +group_trials_by_category <- as.data.frame(group_trials_by_category) + +ggplot(data = group_trials_by_category, aes(x=category_id)) + + geom_bar(binwidth=1,color="black",fill="lightblue") + + scale_x_continuous(breaks=scales::pretty_breaks(n=22)) + + labs( + title="bar chart of trial categories" + ,x="Category ID" + ,y="Count" + ) +#ggsave(paste0(image_trial_details,"/CategoryCounts.png"), create.dir = TRUE) + + + +summary(df5) + +cor_dur_count <- cor(df5$duration,df5$snapshot_count) +count_snapshots <- sum(df5$snapshot_count) +``` + +the correlation value is `{r} cor_dur_count` between duration and snapshot count. +There are `{r} count_snapshots` snapshots in total, spread over +`{r} length(df5%nct_id)` trials. + +The number of categories by trial are +TODO: add in category names, +```{r} +group_trials_by_category |> count(category_id) +``` + +### Tracking differences +OK, all results from df5 are from a different selection process than that of the standard result. +Turns out that there is a single trial in df5 that isn't in the counterfactual list. +```{r} +df5_trials <- df5$nct_id %>% unique +counterfact_trials <- group_trials_by_category$nct_id %>% unique + +length(df5_trials) - length(counterfact_trials) +setdiff(df5_trials, counterfact_trials) +``` + + +# Fit Model + + + + +```{r} +################################# FIT MODEL ######################################### +inherited_cols <- c( + "elapsed_duration" + ,"identical_brands" + ,"brand_name_counts" + ,"h_sdi_val" + ,"hm_sdi_val" + ,"m_sdi_val" + ,"lm_sdi_val" + ,"l_sdi_val" + ,"status_NYR" + ,"status_EBI" + ,"status_Rec" + ,"status_ANR" +) +``` + + + +```{r} +beta_list <- list( + groups = c( + `1`="Infections & Parasites", + `2`="Neoplasms", + `3`="Blood & Immune system", + `4`="Endocrine, Nutritional, and Metabolic", + `5`="Mental & Behavioral", + `6`="Nervous System", + `7`="Eye and Adnexa", + `8`="Ear and Mastoid", + `9`="Circulatory", + `10`="Respiratory", + `11`="Digestive", + `12`="Skin & Subcutaneaous tissue", + `13`="Musculoskeletal", + `14`="Genitourinary", + `15`="Pregancy, Childbirth, & Puerperium", + `16`="Perinatal Period", + `17`="Congential", + `18`="Symptoms, Signs etc.", + `19`="Injury etc.", + `20`="External Causes", + `21`="Contact with Healthcare", + `22`="Special Purposes" + ), + parameters = c( + `1`="Elapsed Duration", + # brands + `2`="asinh(Generic Brands)", + `3`="asinh(Competitors USPDC)", + # population + `4`="asinh(High SDI)", + `5`="asinh(High-Medium SDI)", + `6`="asinh(Medium SDI)", + `7`="asinh(Low-Medium SDI)", + `8`="asinh(Low SDI)", + #Status + `9`="status_NYR", + `10`="status_EBI", + `11`="status_Rec", + `12`="status_ANR" + ) +) + +get_parameters <- function(stem,class_list) { + #get categories and lengths + named <- names(class_list) + lengths <- sapply(named, (function (x) length(class_list[[x]]))) + + #describe the grid needed + iter_list <- sapply(named, (function (x) 1:lengths[x])) + + #generate the list of parameters + pardf <- generate_parameter_df(stem, iter_list) + + #add columns with appropriate human-readable names + for (name in named) { + pardf[paste(name,"_hr",sep="")] <- as.factor( + sapply(pardf[name], (function (i) class_list[[name]][i])) + ) + } + + return(pardf) +} + +generate_parameter_df <- function(stem, iter_list) { + grid <- expand.grid(iter_list) + grid["param_name"] <- grid %>% unite(x,colnames(grid),sep=",") + grid["param_name"] <- paste(stem,"[",grid$param_name,"]",sep="") + return(grid) +} + +group_mcmc_areas <- function( + stem,# = "beta" + class_list,# = beta_list + stanfit,# = fit + group_id,# = 2 + rename=TRUE, + filter=NULL + ) { + + #get all parameter names + params <- get_parameters(stem,class_list) + + #filter down to parameters of interest + params <- filter(params,groups == group_id) + #Get dataframe with only the rows of interest + filtdata <- as.data.frame(stanfit)[params$param_name] + #rename columns + if (rename) dimnames(filtdata)[[2]] <- params$parameters_hr + #get group name for title + group_name <- class_list$groups[group_id] + #create area plot with appropriate title + p <- mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) + + ggtitle(paste("Parameter distributions for ICD-10 class:",group_name)) + + geom_vline(xintercept=seq(-2,2,0.5),color="grey",alpha=0.750) + + d <- pivot_longer(filtdata, everything()) |> + group_by(name) |> + summarize( + mean=mean(value) + ,q025 = quantile(value,probs = 0.025) + ,q975 = quantile(value,probs = 0.975) + ,q05 = quantile(value,probs = 0.05) + ,q95 = quantile(value,probs = 0.95) + ) + return(list(plot=p,quantiles=d,name=group_name)) +} + +parameter_mcmc_areas <- function( + stem,# = "beta" + class_list,# = beta_list + stanfit,# = fit + parameter_id,# = 2 + rename=TRUE + ) { + #get all parameter names + params <- get_parameters(stem,class_list) + #filter down to parameters of interest + params <- filter(params,parameters == parameter_id) + #Get dataframe with only the rows of interest + filtdata <- as.data.frame(stanfit)[params$param_name] + #rename columns + if (rename) dimnames(filtdata)[[2]] <- params$groups_hr + #get group name for title + parameter_name <- class_list$parameters[parameter_id] + #create area plot with appropriate title + p <- mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) + + ggtitle(parameter_name,"Parameter Distribution") + + geom_vline(xintercept=seq(-2,2,0.5),color="grey",alpha=0.750) + + d <- pivot_longer(filtdata, everything()) |> + group_by(name) |> + summarize( + mean=mean(value) + ,q025 = quantile(value,probs = 0.025) + ,q975 = quantile(value,probs = 0.975) + ,q05 = quantile(value,probs = 0.05) + ,q95 = quantile(value,probs = 0.95) + ) + return(list(plot=p,quantiles=d,name=parameter_name)) +} + + +``` + +Plan: select all snapshots that are the first to have closed enrollment (Rec -> ANR) +```{r} +#delay intervention +intervention_enrollment <- x_cf_base[c(inherited_cols)] +intervention_enrollment["status_ANR"] <- 0 +intervention_enrollment["status_Rec"] <- 1 +``` + +There are `{r} length(intervention_enrollment$identical_brands)` trials in the intervention enrollment dataset + +```{r} + +counterfact_delay <- list( + D = ncol(x), + N = nrow(x), + L = n_categories$count, + y = y, + ll = as.vector(categories), + x = x, + mu_mean = 0, + mu_stdev = 0.05, + sigma_location = -2.1, + sigma_scale = 0.2, + Nx = nrow(x_cf_base), + llx = as.vector(cf_categories), + counterfact_x_tilde = as.matrix(intervention_enrollment), + counterfact_x = as.matrix(x_cf_base), + status_indexes = c(11,12) #subtract anr from recruiting to get movement from anr to recruiting + +) +``` + + +Ok, so I cut out the estimation because there is some problem of some sort. And I'm not really working on that right now. + +TODO: Next step: get this working, then figure out how many trials I analyzed, and if they match the DF5 trials list +```{r} +#| label: Fitting +#| eval: false +#| include: true +fit <- stan( + file='Hierarchal_Logistic.stan', + data = counterfact_delay, + chains = 8, + iter = 12000, + warmup = 4000, + seed = 11021585 + ) +``` + + +## Fit Results +```{r} +print(check_hmc_diagnostics(fit)) +print(get_bfmi(fit)) +``` + + +```{r} +print(fit) +``` + + + + + + + + +# Parameter Distributions + + +```{r} +#| eval: false +#| include: true +#g1 <- group_mcmc_areas("beta",beta_list,fit,1) + + +gx <- c() + +#grab parameters for every category with more than 8 observations +for (i in category_count$category_id[category_count$n >= 0]) { + print(i) + + #Print parameter distributions + gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups + #ggsave( + paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png") + ,plot=gi$plot, create.dir = TRUE + ) + gx <- c(gx,gi) + + #Get Quantiles and means for parameters + table <- xtable(gi$quantiles, + floating=FALSE + ,latex.environments = NULL + ,booktabs = TRUE + ,zap=getOption("digits") + ) + write_lines( + table, + paste0(image_parameters_by_groups,"/group_table_",i,"_",gi$name,".tex") + ) +} +``` + + + +```{r} +#| eval: false +#| include: true +px <- c() + + +for (i in c(1,2,3,9,10,11,12)) { + + #Print parameter distributions + pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups + #ggsave( + paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png") + ,plot=pi$plot, create.dir = TRUE + ) + px <- c(px,pi) + + #Get Quantiles and means for parameters + table <- xtable(pi$quantiles, + floating=FALSE + ,latex.environments = NULL + ,booktabs = TRUE + ,zap=getOption("digits") + ) + write_lines( + table, + paste0(image_parameters_across_groups,"/parameters_tables_",i,"_",pi$name,".tex") + ) + +} +``` + +Note these have 95% outer CI and 80% inner (shaded) + + + + + + + +# Priors +```{r} +#| eval: false +#| include: true +df_ib_p <- data.frame( + p_prior=as.vector(extract(generated_ib, pars="p_prior")$p_prior) + ,p_predicted = as.vector(extract(generated_ib, pars="p_predicted")$p_predicted) +) + +df_ib_prior <- data.frame( + mu_prior = as.vector(extract(generated_ib, pars="mu_prior")$mu_prior) + ,sigma_prior = as.vector(extract(generated_ib, pars="sigma_prior")$sigma_prior) +) +``` + + +```{r} +#| eval: false +#| include: true +#p_prior +ggplot(df_ib_p, aes(x=p_prior)) + + geom_density() + + labs( + title="Implied Prior Distribution P" + ,subtitle="" + ,x="Probability Domain 'p'" + ,y="Probability Density" + ) +#ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"), create.dir = TRUE) + +#p_posterior +ggplot(df_ib_p, aes(x=p_predicted)) + + geom_density() + + labs( + title="Implied Posterior Distribution P" + ,subtitle="" + ,x="Probability Domain 'p'" + ,y="Probability Density" + ) +#ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"), create.dir = TRUE) + +#mu_prior +ggplot(df_ib_prior) + + geom_density(aes(x=mu_prior)) + + labs( + title="Prior - Mu" + ,subtitle="same prior for all Mu values" + ,x="Mu" + ,y="Probability" + ) +#ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"), create.dir = TRUE) + +#sigma_posterior +ggplot(df_ib_prior) + + geom_density(aes(x=sigma_prior)) + + labs( + title="Prior - Sigma" + ,subtitle="same prior for all Sigma values" + ,x="Sigma" + ,y="Probability" + ) +#ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"), create.dir = TRUE) +``` + + + + + + +# Counterfactual: Delay close of enrollment +```{r} +generated_ib <- gqs( + fit@stanmodel, + data=counterfact_delay, + draws=as.matrix(fit), + seed=11021585 + ) +``` + +```{r} +counterfact_predicted_ib <- data.frame( + p_predicted_default = as.vector(extract(generated_ib, pars="p_predicted_default")$p_predicted_default) + ,p_predicted_intervention = as.vector(extract(generated_ib, pars="p_predicted_intervention")$p_predicted_intervention) + ,predicted_difference = as.vector(extract(generated_ib, pars="predicted_difference")$predicted_difference) +) +``` + + +```{r} +#| eval: false +#| include: true +ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) + + geom_density() + + labs( + title="Predicted Distribution of 'p'" + ,subtitle="Intervention: None" + ,x="Probability Domain 'p'" + ,y="Probability Density" + ) +##ggsave(paste0(image_dist_diff_analysis,"/p_no_intervention.png"), create.dir = TRUE) + +ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) + + geom_density() + + labs( + title="Predicted Distribution of 'p'" + ,subtitle="Intervention: Delay close of enrollment" + ,x="Probability Domain 'p'" + ,y="Probability Density" + ) +##ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"), create.dir = TRUE) + +ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) + + geom_density() + + labs( + title="Predicted Distribution of differences 'p'" + ,subtitle="Intervention: Delay close of enrollment" + ,x="Difference in 'p' under delay" + ,y="Probability Density" + ) +##ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"), create.dir = TRUE) +``` + + +```{r} +get_category_count <- function(tbl, id) { + result <- tbl$n[tbl$category_id == id] + if(length(result) == 0) 0 else result +} + +category_names <- sapply(1:length(beta_list$groups), + function(i) sprintf("ICD-10 #%d: %s (n=%d)", + i, + beta_list$groups[i], + get_category_count(category_count, i))) + +``` + +```{r} +pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |> + pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168 + +pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name)) +pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) df$category_id[i]) +pddf_ib["category_name"] <- sapply( + pddf_ib$category, + function(i) category_names[i] + ) +``` + + +```{r} +# Compare group counts +counterfact_delay$ll %>% unique() %>% sort() +counterfact_delay$llx %>% unique() %>% sort() +df$category_id %>% unique() %>% sort() +``` + + +```{r} +ggplot(pddf_ib, aes(x=value,)) + + geom_density(adjust=1/5) + + labs( + title = "Distribution of predicted differences" + ,subtitle = "Intervention: Delay close of enrollment" + ,x = "Difference in probability due to intervention" + ,y = "Probability Density" + ) + + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"), create.dir = TRUE) +``` + + +```{r} +ggplot(pddf_ib, aes(x=value,)) + + geom_density(adjust=1/5) + + facet_wrap( + ~factor( + category_name, + levels=category_names + ) + , labeller = label_wrap_gen(multi_line = TRUE) + , ncol=4) + + labs( + title = "Distribution of predicted differences | By Group" + ,subtitle = "Intervention: Delay close of enrollment" + ,x = "Difference in probability due to intervention" + ,y = "Probability Density" + ) + + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + + theme(strip.text.x = element_text(size = 8)) +#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"), create.dir = TRUE) +``` + + +```{r} +ggplot(pddf_ib, aes(x=value,)) + + geom_histogram(bins=300) + + facet_wrap( + ~factor( + category_name, + levels=category_names + ) + , labeller = label_wrap_gen(multi_line = TRUE) + , ncol=4) + + labs( + title = "Histogram of predicted differences | By Group" + ,subtitle = "Intervention: Delay close of enrollment" + ,x = "Difference in probability due to intervention" + ,y = "Predicted counts" + ) + + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + + theme(strip.text.x = element_text(size = 8)) +#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_by_group.png"), create.dir = TRUE) +``` + + +```{r} +p3 <- ggplot(pddf_ib, aes(x=value,)) + + geom_histogram(bins=500) + + labs( + title = "Distribution of predicted differences" + ,subtitle = "Intervention: Delay close of enrollment" + ,x = "Difference in probability due to intervention" + ,y = "Probability Density" + ,caption = "Vertical marks: 5/10/25/50/75/90/95th percentiles. Dot shows mean." + ) + + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + +density_hight <- max(density(pddf_ib$value)$y) + +stats <- list( + p5 = quantile(pddf_ib$value, 0.05), + p10 = quantile(pddf_ib$value, 0.10), + q1 = quantile(pddf_ib$value, 0.25), + med = median(pddf_ib$value), + mean = mean(pddf_ib$value), + stdev = sd(pddf_ib$value), + q3 = quantile(pddf_ib$value, 0.75), + p90 = quantile(pddf_ib$value, 0.90), + p95 = quantile(pddf_ib$value, 0.95), + max_height = max(ggplot_build(p3)$data[[1]]$count), + y_offset = -max(ggplot_build(p3)$data[[1]]$count) * 0.05, + y_offset_density = -density_hight * 0.15 +) + +p3 + + # Box + geom_segment(data = data.frame( + x = c(stats$q1, stats$q3, stats$med), + xend = c(stats$q1, stats$q3, stats$med), + y = rep(stats$y_offset, 3), + yend = rep(stats$y_offset * 2, 3) + ), aes(x = x, xend = xend, y = y, yend = yend)) + + geom_segment(data = data.frame( + x = rep(stats$q1, 2), + xend = rep(stats$q3, 2), + y = c(stats$y_offset, stats$y_offset * 2), + yend = c(stats$y_offset, stats$y_offset * 2) + ), aes(x = x, xend = xend, y = y, yend = yend)) + + # Inner whiskers (Q1->P10, Q3->P90) + geom_segment(data = data.frame( + x = c(stats$q1, stats$q3), + xend = c(stats$p10, stats$p90), + y = rep(stats$y_offset * 1.5, 2), + yend = rep(stats$y_offset * 1.5, 2) + ), aes(x = x, xend = xend, y = y, yend = yend)) + + # Crossbars at P10/P90 + geom_segment(data = data.frame( + x = c(stats$p10, stats$p90), + xend = c(stats$p10, stats$p90), + y = stats$y_offset * 1.3, + yend = stats$y_offset * 1.7 + ), aes(x = x, xend = xend, y = y, yend = yend)) + + # Outer whiskers (P10->P5, P90->P95) + geom_segment(data = data.frame( + x = c(stats$p10, stats$p90), + xend = c(stats$p5, stats$p95), + y = rep(stats$y_offset * 1.5, 2), + yend = rep(stats$y_offset * 1.5, 2) + ), aes(x = x, xend = xend, y = y, yend = yend)) + + # Crossbars at P5/P95 + geom_segment(data = data.frame( + x = c(stats$p5, stats$p95), + xend = c(stats$p5, stats$p95), + y = stats$y_offset * 1.3, + yend = stats$y_offset * 1.7 + ), aes(x = x, xend = xend, y = y, yend = yend)) + + # Mean dot + geom_point(data = data.frame( + x = stats$mean, + y = stats$y_offset * 1.5 + ), aes(x = x, y = y)) +#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"), create.dir = TRUE) +``` + + +```{r} +p4 <- ggplot(pddf_ib, aes(x = value)) + + geom_density() + + labs( + title = "Distribution of predicted differences" + ,subtitle = "Intervention: Delay close of enrollment" + ,x = "Difference in probability due to intervention" + ,y = "Probability Density" + ,caption = "Vertical marks: 5/10/25/50/75/90/95th percentiles. Dot shows mean." + ) + + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + + geom_segment(data = data.frame( + x = c(stats$q1, stats$q3, stats$med), + xend = c(stats$q1, stats$q3, stats$med), + y = rep(stats$y_offset_density, 3), + yend = rep(stats$y_offset_density * 2, 3) + ), aes(x = x, xend = xend, y = y, yend = yend)) + + geom_segment(data = data.frame( + x = rep(stats$q1, 2), + xend = rep(stats$q3, 2), + y = c(stats$y_offset_density, stats$y_offset_density * 2), + yend = c(stats$y_offset_density, stats$y_offset_density * 2) + ), aes(x = x, xend = xend, y = y, yend = yend)) + + # Inner whiskers (Q1->P10, Q3->P90) + geom_segment(data = data.frame( + x = c(stats$q1, stats$q3), + xend = c(stats$p10, stats$p90), + y = rep(stats$y_offset_density * 1.5, 2), + yend = rep(stats$y_offset_density * 1.5, 2) + ), aes(x = x, xend = xend, y = y, yend = yend)) + + # Crossbars at P10/P90 + geom_segment(data = data.frame( + x = c(stats$p10, stats$p90), + xend = c(stats$p10, stats$p90), + y = stats$y_offset_density * 1.3, + yend = stats$y_offset_density * 1.7 + ), aes(x = x, xend = xend, y = y, yend = yend)) + + # Outer whiskers (P10->P5, P90->P95) + geom_segment(data = data.frame( + x = c(stats$p10, stats$p90), + xend = c(stats$p5, stats$p95), + y = rep(stats$y_offset_density * 1.5, 2), + yend = rep(stats$y_offset_density * 1.5, 2) + ), aes(x = x, xend = xend, y = y, yend = yend)) + + # Crossbars at P5/P95 + geom_segment(data = data.frame( + x = c(stats$p5, stats$p95), + xend = c(stats$p5, stats$p95), + y = stats$y_offset_density * 1.3, + yend = stats$y_offset_density * 1.7 + ), aes(x = x, xend = xend, y = y, yend = yend)) + + # Mean dot + geom_point(data = data.frame( + x = stats$mean, + y = stats$y_offset_density * 1.5 + ), aes(x = x, y = y)) +#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"), create.dir = TRUE) + +p4 +``` + +```{r} +#| eval: false +#| include: true + ggplot(pddf_ib, aes(x=value)) + + stat_ecdf(geom='step') + + labs( + title = "Cumulative distribution of predicted differences", + subtitle = "Intervention: Delay close of enrollment", + x = "Difference in probability of termination due to intervention", + y = "Cumulative Proportion" + ) + +#ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"), create.dir = TRUE) +``` + + +Get the % of differences in the spike around zero +```{r} +# get values around and above/below spike +width <- 0.02 +spike_band_centered_zero <- mean( pddf_ib$value >= -width/2 & pddf_ib$value <= width/2) +above_spike_band <- mean( pddf_ib$value >= width/2) +below_spike_band <- mean( pddf_ib$value <= -width/2) + +# get mass above and mass below zero +mass_below_zero <- mean( pddf_ib$value <= 0) +``` +Looking at the spike around zero, we find that `{r} spike_band_centered_zero*100`% +of the probability mass is contained within the band from +[`{r} -width/2`,`{r} width/2`]. +Additionally, there was `{r} above_spike_band*100`% of the probability above that +-- representing those with a general increase in the probability of termination -- +and `{r} below_spike_band*100`% of the probability mass below the band +-- representing a decrease in the probability of termination. + +On average, if you keep the trial open instead of closing it, +`{r} mass_below_zero * 100`% of trials will see a decrease in the probability of termination, +but, due to the high increase in probability of termination given termination was increased, +the mean probability of termination increases by `{r} stats$mean (stats$stdev)`. + +```{r} +# 5%-iles + +summary(pddf_ib$value) + +# Create your quantiles +quants <- quantile(pddf_ib$value, probs = seq(0,1,0.05), type=4) + +# Convert to a data frame +quant_df <- data.frame( + #Percentile = names(quants), + Value = quants +) +# Convert to LaTeX +table <- xtable(quant_df, + digits = rep(3, ncol(quant_df) + 1), + floating = FALSE, + latex.environments = NULL, + booktabs = TRUE +) + +# Write to file +write_lines( + print(table, include.rownames = FALSE), + paste0(image_root,"/distdiff_5percentile_table.tex") +) + +kable(quant_df) + +proportion_increase <- mean(pddf_ib$value >= 0) +``` + +about `{r} proportion_increase * 100` percent probability increase in the probability of terminations + +```{r} +n = length(counterfact_predicted_ib$p_predicted_intervention) +k = 100 +simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention))) +simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default))) + +simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base) +``` + +The simulation above shows that this results in a percentage-point change in terminations of about +`{r} simulated_percentages * 100`. + + +```{r} +distdiff_by_group <- pddf_ib %>% + group_by(category_name, category) %>% + summarize( + mean = mean(value, na.rm=TRUE) *100 + ,"P(p>0)" = mean(value>0) *100 + ,"5%" = quantile( value, 0.05, na.rm = TRUE)*100 + ,"25%" = quantile( value, 0.25, na.rm = TRUE)*100 + ,"median" = quantile( value, 0.5, na.rm = TRUE)*100 + ,"75%" = quantile( value, 0.75, na.rm = TRUE)*100 + ,"95%" = quantile( value, 0.95, na.rm = TRUE)*100 + ) %>% arrange(category) %>% select(!category) + +distdif_by_group_latex<- xtable(distdiff_by_group, + digits = rep(2, ncol(distdiff_by_group) +1), + floating = FALSE, + align = "llccccccc", + latex.environments = "tabularx", + booktabs = TRUE +) + + + +print(distdif_by_group_latex) +distdiff_by_group +``` + + + +## fixed effects distributions + +```{r} +#| eval: false +#| include: true +#Get dataframe with only the rows of interest +filtdata <- as.data.frame(extract(fit, pars="status_diff")) +#rename columns +dimnames(filtdata)[[2]] <- sprintf("%02d %s", seq_along(beta_list$groups), beta_list$groups) +#create area plot with appropriate title +mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) + + ggtitle("Differences in Fixed Effects | By ICD-10 Category", + subtitle = "Moving from 'Active, not recruiting' to 'Recruiting'" + ) + + geom_vline(xintercept=seq(-0.25,0.5,0.25),color="grey",alpha=0.750) +#ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png"), create.dir = TRUE) + +fixed_effects_table <- pivot_longer(filtdata, everything()) |> + group_by(name) |> + summarize( + `sample size` = 0, + mean = mean(value), + `P(≥0)` = mean(value >= 0), + `2.5%` = quantile(value, probs = 0.025), + `5%` = quantile(value, probs = 0.05), + `25%` = quantile(value, probs = 0.25), + `50% median` = quantile(value, probs = 0.5), + `75%` = quantile(value, probs = 0.75), + `95%` = quantile(value, probs = 0.95), + `97.5%` = quantile(value, probs = 0.975) + ) + +# Convert the indexing to be consecutive by using a temporary vector +sample_sizes <- rep(0, 22) # Create vector of zeros +sample_sizes[category_count$category_id] <- category_count$n + +# Now assign the whole vector at once +fixed_effects_table["sample size"] <- sample_sizes +# Rename the name column +names(fixed_effects_table)[1] <- "ICD-10 Category" + +# Convert to LaTeX +table <- xtable(fixed_effects_table, + digits = rep(2, ncol(fixed_effects_table) + 1), + floating = FALSE, + align = "lccccccccccc", + latex.environments = "tabularx", + booktabs = TRUE +) + + + +# Write to file +write_lines( + print(table, include.rownames = FALSE), + paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.tex") +) + + + +``` + + + + + + + diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.png b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.png deleted file mode 100644 index e2b0f98..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.tex b/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.tex deleted file mode 100644 index 8c4fb84..0000000 --- a/r-analysis/output/EffectsOfEnrollmentDelay/betas/parameter_across_groups/fixed_effects_anr_vs_rec_by_group.tex +++ /dev/null @@ -1,34 +0,0 @@ -% latex table generated in R 4.4.2 by xtable 1.8-4 package -% Mon Feb 3 23:56:27 2025 -\begin{table}[ht] -\centering -\begin{tabular}{ccccccccccc} - \hline -ICD-10 Category & sample size & mean & P(≥0) & 2.5\% & 5\% & 25\% & 50\% median & 75\% & 95\% & 97.5\% \\ - \hline -01 Infections \& Parasites & 20.000 & 0.135 & 0.772 & -0.223 & -0.163 & 0.013 & 0.134 & 0.257 & 0.438 & 0.498 \\ - 02 Neoplasms & 49.000 & 0.211 & 0.902 & -0.107 & -0.056 & 0.099 & 0.208 & 0.320 & 0.489 & 0.546 \\ - 03 Blood \& Immune system & 1.000 & 0.061 & 0.624 & -0.333 & -0.267 & -0.071 & 0.061 & 0.194 & 0.388 & 0.454 \\ - 04 Endocrine, Nutritional, and Metabolic & 10.000 & 0.177 & 0.820 & -0.201 & -0.137 & 0.045 & 0.173 & 0.304 & 0.505 & 0.573 \\ - 05 Mental \& Behavioral & 8.000 & 0.142 & 0.767 & -0.239 & -0.175 & 0.010 & 0.140 & 0.270 & 0.469 & 0.537 \\ - 06 Nervous System & 8.000 & 0.009 & 0.524 & -0.383 & -0.315 & -0.120 & 0.011 & 0.140 & 0.327 & 0.389 \\ - 07 Eye and Adnexa & 11.000 & 0.061 & 0.624 & -0.333 & -0.265 & -0.069 & 0.062 & 0.192 & 0.386 & 0.452 \\ - 08 Ear and Mastoid & 0.000 & 0.063 & 0.625 & -0.332 & -0.263 & -0.069 & 0.064 & 0.196 & 0.393 & 0.457 \\ - 09 Circulatory & 3.000 & 0.037 & 0.577 & -0.362 & -0.293 & -0.094 & 0.038 & 0.170 & 0.363 & 0.426 \\ - 10 Respiratory & 7.000 & 0.061 & 0.622 & -0.335 & -0.269 & -0.072 & 0.061 & 0.195 & 0.391 & 0.458 \\ - 11 Digestive & 12.000 & 0.060 & 0.621 & -0.339 & -0.270 & -0.071 & 0.059 & 0.192 & 0.388 & 0.454 \\ - 12 Skin \& Subcutaneaous tissue & 9.000 & 0.104 & 0.704 & -0.283 & -0.220 & -0.027 & 0.103 & 0.234 & 0.429 & 0.494 \\ - 13 Musculoskeletal & 17.000 & 0.159 & 0.794 & -0.218 & -0.156 & 0.027 & 0.156 & 0.286 & 0.482 & 0.550 \\ - 14 Genitourinary & 2.000 & 0.063 & 0.627 & -0.337 & -0.270 & -0.070 & 0.065 & 0.195 & 0.389 & 0.456 \\ - 15 Pregancy, Childbirth, \& Puerperium & 0.000 & 0.061 & 0.622 & -0.336 & -0.270 & -0.071 & 0.061 & 0.194 & 0.392 & 0.458 \\ - 16 Perinatal Period & 0.000 & 0.062 & 0.626 & -0.338 & -0.270 & -0.068 & 0.062 & 0.194 & 0.390 & 0.457 \\ - 17 Congential & 3.000 & 0.063 & 0.628 & -0.332 & -0.264 & -0.068 & 0.063 & 0.195 & 0.391 & 0.456 \\ - 18 Symptoms, Signs etc. & 1.000 & 0.062 & 0.625 & -0.332 & -0.265 & -0.069 & 0.062 & 0.194 & 0.390 & 0.456 \\ - 19 Injury etc. & 0.000 & 0.063 & 0.625 & -0.337 & -0.268 & -0.069 & 0.062 & 0.196 & 0.392 & 0.459 \\ - 20 External Causes & 0.000 & 0.063 & 0.625 & -0.330 & -0.266 & -0.070 & 0.064 & 0.194 & 0.392 & 0.460 \\ - 21 Contact with Healthcare & 0.000 & 0.063 & 0.627 & -0.335 & -0.267 & -0.070 & 0.064 & 0.194 & 0.390 & 0.454 \\ - 22 Special Purposes & 0.000 & 0.062 & 0.625 & -0.335 & -0.268 & -0.070 & 0.062 & 0.194 & 0.391 & 0.456 \\ - \hline -\end{tabular} -\end{table} - diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_mu.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_mu.png deleted file mode 100644 index 42b31e6..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/parcoord_mu.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_plot_mu.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_plot_mu.png deleted file mode 100644 index 34a2680..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_plot_mu.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_1-4.png deleted file mode 100644 index cb01392..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_1-4.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_5-8.png deleted file mode 100644 index 6840565..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_5-8.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_9-12.png deleted file mode 100644 index 6ea4f17..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_mu_9-12.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_1-4.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_1-4.png deleted file mode 100644 index f9844fc..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_1-4.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_5-8.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_5-8.png deleted file mode 100644 index 6b12dc0..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_5-8.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_9-12.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_9-12.png deleted file mode 100644 index ce398ff..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/trace_rank_plot_sigma_9-12.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/traceplot_sigma.png b/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/traceplot_sigma.png deleted file mode 100644 index b0d288c..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/diagnostics/traceplot_sigma.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention.png index 9bc0f1d..8a9ef0e 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_cumulative_distdiff.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_cumulative_distdiff.png index 53b5c3e..96c8e55 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_cumulative_distdiff.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_cumulative_distdiff.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_1.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_1.png index c92093e..a582481 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_1.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_1.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_by_group.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_by_group.png index ebe9fa7..ea8fc81 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_by_group.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_by_group.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_styled.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_styled.png index cabac99..f8fae30 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_styled.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_distdiff_styled.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_boxplot.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_boxplot.png index 05eeeb3..772baec 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_boxplot.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_boxplot.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_by_group.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_by_group.png new file mode 100644 index 0000000..422b4dd Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_delay_intervention_histdiff_by_group.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_no_intervention.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_no_intervention.png new file mode 100644 index 0000000..2edbe21 Binary files /dev/null and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/p_no_intervention.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/posterior_p.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/posterior_p.png index a8ecd13..5b34098 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/posterior_p.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/posterior_p.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_mu.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_mu.png index eb2efe4..182815f 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_mu.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_mu.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_p.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_p.png index f555b17..cb72e74 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_p.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_p.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_sigma.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_sigma.png index 7c853a1..18487f1 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_sigma.png and b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysis/prior_sigma.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_delay_intervention_histdiff_by_group.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_delay_intervention_histdiff_by_group.png deleted file mode 100644 index 9c3b3f0..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_delay_intervention_histdiff_by_group.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_no_intervention.png b/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_no_intervention.png deleted file mode 100644 index 88453bc..0000000 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/dist_diff_analysisp_no_intervention.png and /dev/null differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_5percentile_table.tex b/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_5percentile_table.tex index b969241..36a4e56 100644 --- a/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_5percentile_table.tex +++ b/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_5percentile_table.tex @@ -1,5 +1,5 @@ -% latex table generated in R 4.4.2 by xtable 1.8-4 package -% Sun Feb 2 01:37:17 2025 +% latex table generated in R 4.5.1 by xtable 1.8-4 package +% Mon Mar 23 21:59:57 2026 \begin{table}[ht] \centering \begin{tabular}{r} diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_anr_vs_rec_by_group.tex b/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_anr_vs_rec_by_group.tex new file mode 100644 index 0000000..027bf62 --- /dev/null +++ b/r-analysis/output/EffectsOfEnrollmentDelay/distdiff_anr_vs_rec_by_group.tex @@ -0,0 +1,26 @@ +% latex table generated in R 4.5.1 by xtable 1.8-4 package +% Mon Mar 23 23:42:59 2026 +\begin{table}[ht] +\centering +\begin{tabular}{lcccccccc} + \hline +category\_name & N & mean & P(p$>$0) & 5\% & 25\% & median & 75\% & 95\% \\ + \hline +ICD-10 \#1: Infections \& Parasites (n=20) & XX & 1.60 & 77.23 & -1.79 & 0.11 & 1.19 & 2.77 & 6.30 \\ + ICD-10 \#2: Neoplasms (n=49) & XX & 3.63 & 90.17 & -0.90 & 1.52 & 3.37 & 5.52 & 9.06 \\ + ICD-10 \#3: Blood \& Immune system (n=1) & XX & 0.15 & 62.35 & -0.80 & -0.02 & 0.01 & 0.16 & 1.63 \\ + ICD-10 \#4: Endocrine, Nutritional, and Metabolic (n=10) & XX & 2.23 & 82.05 & -1.75 & 0.52 & 2.03 & 3.77 & 6.89 \\ + ICD-10 \#5: Mental \& Behavioral (n=8) & XX & 3.42 & 76.72 & -4.21 & 0.24 & 3.36 & 6.47 & 11.28 \\ + ICD-10 \#6: Nervous System (n=8) & XX & 0.05 & 52.37 & -2.98 & -0.83 & 0.07 & 0.95 & 3.01 \\ + ICD-10 \#7: Eye and Adnexa (n=11) & XX & 0.03 & 62.40 & -0.13 & -0.01 & 0.00 & 0.04 & 0.25 \\ + ICD-10 \#9: Circulatory (n=3) & XX & 0.69 & 57.69 & -5.03 & -1.33 & 0.50 & 2.66 & 6.88 \\ + ICD-10 \#10: Respiratory (n=7) & XX & 0.04 & 62.21 & -0.17 & -0.00 & 0.00 & 0.03 & 0.37 \\ + ICD-10 \#11: Digestive (n=12) & XX & 0.02 & 62.13 & -0.10 & -0.00 & 0.00 & 0.02 & 0.18 \\ + ICD-10 \#12: Skin \& Subcutaneaous tissue (n=9) & XX & 0.55 & 70.42 & -1.06 & -0.11 & 0.41 & 1.10 & 2.56 \\ + ICD-10 \#13: Musculoskeletal (n=17) & XX & 1.16 & 79.42 & -1.13 & 0.18 & 1.02 & 2.02 & 3.93 \\ + ICD-10 \#14: Genitourinary (n=2) & XX & 0.13 & 62.72 & -0.54 & -0.01 & 0.00 & 0.10 & 1.27 \\ + ICD-10 \#17: Congential (n=3) & XX & 0.06 & 62.82 & -0.27 & -0.00 & 0.00 & 0.05 & 0.63 \\ + \hline +\end{tabular} +\end{table} + diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/CategoryCounts.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/CategoryCounts.png index 4968c4a..60b1db0 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/CategoryCounts.png and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/CategoryCounts.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistSnapshots.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistSnapshots.png index 7b5bbcd..29e0cad 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistSnapshots.png and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistSnapshots.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistTrialDurations_Faceted.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistTrialDurations_Faceted.png index f185f66..e63a6b9 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistTrialDurations_Faceted.png and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/HistTrialDurations_Faceted.png differ diff --git a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/SnapshotsVsDurationVsTermination.png b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/SnapshotsVsDurationVsTermination.png index edb928d..ae50707 100644 Binary files a/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/SnapshotsVsDurationVsTermination.png and b/r-analysis/output/EffectsOfEnrollmentDelay/trials_details/SnapshotsVsDurationVsTermination.png differ diff --git a/r-analysis/r-analysis.Rproj b/r-analysis/r-analysis.Rproj index 8a81d49..e1fb2b3 100644 --- a/r-analysis/r-analysis.Rproj +++ b/r-analysis/r-analysis.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: af114abe-061f-43bf-8ae1-a953900da3ff RestoreWorkspace: Yes SaveWorkspace: Ask