Compare commits

...

5 Commits

Author SHA1 Message Date
Will King 562154719c Added all the updates for fixed run
I ran the analysis and got the following updated results.
4 weeks ago
Will King 3d1024920e Fixed group assigments. 4 weeks ago
Will King 52f30367c6 proposed change 4 weeks ago
Will King d753db81c7 Changes to enrollment delay without ICD10Group fix
I wanted to fix the ICD-10 groups with a record of other changes I made.
4 weeks ago
Will King c9ce21a815 updating table output 1 year ago

@ -8,6 +8,8 @@ editor: source
# Setup # Setup
```{r} ```{r}
library(knitr) library(knitr)
library(bayesplot) library(bayesplot)
@ -37,10 +39,15 @@ image_parameters_by_groups <-paste0(image_root,"/betas/parameters_by_group")
image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups") image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups")
``` ```
run on `{r} now(tz='UTC')`
```{r} ```{r}
################ Pull data from database ###################### ################ Pull data from database ######################
library(RPostgreSQL) library(RPostgreSQL)
host <- 'aact_db-restored-2025-01-07' #host <-'aact_db-restored-2025-01-07'
host <- '10.89.0.6'
driver <- dbDriver("PostgreSQL") driver <- dbDriver("PostgreSQL")
@ -431,6 +438,7 @@ counterfact_delay <- list(
``` ```
```{r} ```{r}
#| label: Fitting
fit <- stan( fit <- stan(
file='Hierarchal_Logistic.stan', file='Hierarchal_Logistic.stan',
data = counterfact_delay, data = counterfact_delay,
@ -493,7 +501,7 @@ ggplot(data=df3, aes(x=count, fill=final_status)) +
geom_histogram(binwidth=1) + geom_histogram(binwidth=1) +
ggtitle("Histogram of snapshots per trial (matched trials)") + ggtitle("Histogram of snapshots per trial (matched trials)") +
xlab("Snapshots per trial") xlab("Snapshots per trial")
ggsave(paste0(image_trial_details,"/HistSnapshots.png")) ggsave(paste0(image_trial_details,"/HistSnapshots.png"), create.dir = TRUE)
#Plot duration for terminated vs completed #Plot duration for terminated vs completed
df4 <- dbGetQuery( df4 <- dbGetQuery(
@ -516,7 +524,7 @@ ggplot(data=df4, aes(x=duration,fill=overall_status)) +
ggtitle("Histogram of trial durations") + ggtitle("Histogram of trial durations") +
xlab("duration")+ xlab("duration")+
facet_wrap(~overall_status) facet_wrap(~overall_status)
ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png")) ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"), create.dir = TRUE)
df5 <- dbGetQuery( df5 <- dbGetQuery(
con, con,
@ -547,7 +555,7 @@ ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) +
ggtitle("Comparison of duration, status, and snapshot_count") + ggtitle("Comparison of duration, status, and snapshot_count") +
xlab("duration") + xlab("duration") +
ylab("snapshot count") ylab("snapshot count")
ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png")) ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"), create.dir = TRUE)
dbDisconnect(con) dbDisconnect(con)
@ -563,7 +571,7 @@ ggplot(data = group_trials_by_category, aes(x=category_id)) +
,x="Category ID" ,x="Category ID"
,y="Count" ,y="Count"
) )
ggsave(paste0(image_trial_details,"/CategoryCounts.png")) ggsave(paste0(image_trial_details,"/CategoryCounts.png"), create.dir = TRUE)
@ -573,8 +581,16 @@ cor_dur_count <- cor(df5$duration,df5$snapshot_count)
count_snapshots <- sum(df5$snapshot_count) count_snapshots <- sum(df5$snapshot_count)
``` ```
the correlation value is `r cor_dur_count` between duration and snapshot count. the correlation value is `{r} cor_dur_count` between duration and snapshot count.
There are `r count_snapshots` snapshots in total. There are `{r} count_snapshots` snapshots in total, spread over
`{r} length(df5$nct_id)` trials.
The number of categories by trial are
TODO: add in category names,
```{r}
group_trials_by_category |> count(category_id)
```
@ -595,7 +611,7 @@ for (i in category_count$category_id[category_count$n >= 0]) {
gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
ggsave( ggsave(
paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png") paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png")
,plot=gi$plot ,plot=gi$plot, create.dir = TRUE
) )
gx <- c(gx,gi) gx <- c(gx,gi)
@ -625,7 +641,7 @@ for (i in c(1,2,3,9,10,11,12)) {
pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
ggsave( ggsave(
paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png") paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png")
,plot=pi$plot ,plot=pi$plot, create.dir = TRUE
) )
px <- c(px,pi) px <- c(px,pi)
@ -685,7 +701,7 @@ ggplot(df_ib_p, aes(x=p_prior)) +
,x="Probability Domain 'p'" ,x="Probability Domain 'p'"
,y="Probability Density" ,y="Probability Density"
) )
ggsave(paste0(image_dist_diff_analysis,"/prior_p.png")) ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"), create.dir = TRUE)
#p_posterior #p_posterior
ggplot(df_ib_p, aes(x=p_predicted)) + ggplot(df_ib_p, aes(x=p_predicted)) +
@ -696,7 +712,7 @@ ggplot(df_ib_p, aes(x=p_predicted)) +
,x="Probability Domain 'p'" ,x="Probability Domain 'p'"
,y="Probability Density" ,y="Probability Density"
) )
ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png")) ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"), create.dir = TRUE)
#mu_prior #mu_prior
ggplot(df_ib_prior) + ggplot(df_ib_prior) +
@ -707,7 +723,7 @@ ggplot(df_ib_prior) +
,x="Mu" ,x="Mu"
,y="Probability" ,y="Probability"
) )
ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png")) ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"), create.dir = TRUE)
#sigma_posterior #sigma_posterior
ggplot(df_ib_prior) + ggplot(df_ib_prior) +
@ -718,7 +734,7 @@ ggplot(df_ib_prior) +
,x="Sigma" ,x="Sigma"
,y="Probability" ,y="Probability"
) )
ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png")) ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"), create.dir = TRUE)
``` ```
@ -726,7 +742,7 @@ ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"))
### Intervention: Delay close of enrollment # Intervention: Delay close of enrollment
```{r} ```{r}
counterfact_predicted_ib <- data.frame( counterfact_predicted_ib <- data.frame(
@ -744,7 +760,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) +
,x="Probability Domain 'p'" ,x="Probability Domain 'p'"
,y="Probability Density" ,y="Probability Density"
) )
ggsave(paste0(image_dist_diff_analysis,"/p_no_intervention.png")) ggsave(paste0(image_dist_diff_analysis,"/p_no_intervention.png"), create.dir = TRUE)
ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) + ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
geom_density() + geom_density() +
@ -754,7 +770,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
,x="Probability Domain 'p'" ,x="Probability Domain 'p'"
,y="Probability Density" ,y="Probability Density"
) )
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"), create.dir = TRUE)
ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) + ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
geom_density() + geom_density() +
@ -764,7 +780,7 @@ ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
,x="Difference in 'p' under treatment" ,x="Difference in 'p' under treatment"
,y="Probability Density" ,y="Probability Density"
) )
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"), create.dir = TRUE)
``` ```
@ -787,7 +803,7 @@ pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predict
pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168 pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168
pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name)) pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) df$category_id[i]) pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) counterfact_delay$llx[i])
pddf_ib["category_name"] <- sapply( pddf_ib["category_name"] <- sapply(
pddf_ib$category, pddf_ib$category,
function(i) category_names[i] function(i) category_names[i]
@ -805,7 +821,7 @@ ggplot(pddf_ib, aes(x=value,)) +
,y = "Probability Density" ,y = "Probability Density"
) + ) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"), create.dir = TRUE)
``` ```
@ -827,7 +843,7 @@ ggplot(pddf_ib, aes(x=value,)) +
) + ) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8)) theme(strip.text.x = element_text(size = 8))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"), create.dir = TRUE)
``` ```
@ -849,7 +865,7 @@ ggplot(pddf_ib, aes(x=value,)) +
) + ) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8)) theme(strip.text.x = element_text(size = 8))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_by_group.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_by_group.png"), create.dir = TRUE)
``` ```
@ -873,6 +889,7 @@ stats <- list(
q1 = quantile(pddf_ib$value, 0.25), q1 = quantile(pddf_ib$value, 0.25),
med = median(pddf_ib$value), med = median(pddf_ib$value),
mean = mean(pddf_ib$value), mean = mean(pddf_ib$value),
stdev = sd(pddf_ib$value),
q3 = quantile(pddf_ib$value, 0.75), q3 = quantile(pddf_ib$value, 0.75),
p90 = quantile(pddf_ib$value, 0.90), p90 = quantile(pddf_ib$value, 0.90),
p95 = quantile(pddf_ib$value, 0.95), p95 = quantile(pddf_ib$value, 0.95),
@ -928,7 +945,7 @@ p3 +
x = stats$mean, x = stats$mean,
y = stats$y_offset * 1.5 y = stats$y_offset * 1.5
), aes(x = x, y = y)) ), aes(x = x, y = y))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"), create.dir = TRUE)
``` ```
@ -988,7 +1005,7 @@ p4 <- ggplot(pddf_ib, aes(x = value)) +
x = stats$mean, x = stats$mean,
y = stats$y_offset_density * 1.5 y = stats$y_offset_density * 1.5
), aes(x = x, y = y)) ), aes(x = x, y = y))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"), create.dir = TRUE)
p4 p4
``` ```
@ -1003,7 +1020,7 @@ p4
y = "Cumulative Proportion" y = "Cumulative Proportion"
) )
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"), create.dir = TRUE)
``` ```
@ -1018,18 +1035,18 @@ below_spike_band <- mean( pddf_ib$value <= -width/2)
# get mass above and mass below zero # get mass above and mass below zero
mass_below_zero <- mean( pddf_ib$value <= 0) mass_below_zero <- mean( pddf_ib$value <= 0)
``` ```
Looking at the spike around zero, we find that `r spike_band_centered_zero*100`% Looking at the spike around zero, we find that `{r} spike_band_centered_zero*100`%
of the probability mass is contained within the band from of the probability mass is contained within the band from
[`r -width*100/2`,`r width*100/2`]. [`{r} -width/2`,`{r} width/2`].
Additionally, there was `r above_spike_band*100`% of the probability above that Additionally, there was `{r} above_spike_band*100`% of the probability above that
-- representing those with a general increase in the probability of termination -- -- representing those with a general increase in the probability of termination --
and `r below_spike_band*100`% of the probability mass below the band and `{r} below_spike_band*100`% of the probability mass below the band
-- representing a decrease in the probability of termination. -- representing a decrease in the probability of termination.
On average, if you keep the trial open instead of closing it, On average, if you keep the trial open instead of closing it,
`r mass_below_zero`% of trials will see a decrease in the probability of termination, `{r} mass_below_zero * 100`% of trials will see a decrease in the probability of termination,
but, due to the high increase in probability of termination given termination was increased, but, due to the high increase in probability of termination given termination was increased,
the mean probability of termination increases by `r stats$mean`. the mean probability of termination increases by `{r} stats$mean (stats$stdev)`.
```{r} ```{r}
# 5%-iles # 5%-iles
@ -1063,7 +1080,7 @@ kable(quant_df)
proportion_increase <- mean(pddf_ib$value >= 0) proportion_increase <- mean(pddf_ib$value >= 0)
``` ```
about `r proportion_increase * 100` percent probability increase in the probability of terminations about `{r} proportion_increase * 100` percent probability increase in the probability of terminations
```{r} ```{r}
n = length(counterfact_predicted_ib$p_predicted_intervention) n = length(counterfact_predicted_ib$p_predicted_intervention)
@ -1071,30 +1088,66 @@ k = 100
simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention))) simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention)))
simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default))) simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default)))
simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)/k simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)
``` ```
The simulation above shows that this results in a percentage-point change in terminations of about The simulation above shows that this results in a percentage-point change in terminations of about
`r simulated_percentages * 100`. `{r} simulated_percentages * 100`.
```{r}
distdiff_by_group <- pddf_ib %>%
group_by(category_name, category) %>%
summarize(
mean = mean(value, na.rm=TRUE) *100
,"P(p>0)" = mean(value>0) *100
,"5%" = quantile( value, 0.05, na.rm = TRUE)*100
,"25%" = quantile( value, 0.25, na.rm = TRUE)*100
,"median" = quantile( value, 0.5, na.rm = TRUE)*100
,"75%" = quantile( value, 0.75, na.rm = TRUE)*100
,"95%" = quantile( value, 0.95, na.rm = TRUE)*100
) %>% arrange(category) %>% select(!category)
distdif_by_group_latex<- xtable(distdiff_by_group,
digits = rep(2, ncol(distdiff_by_group) +1),
floating = FALSE,
align = "llccccccc",
latex.environments = "tabularx",
booktabs = TRUE
)
# Write to file
write_lines(
print(distdif_by_group_latex, include.rownames = FALSE),
paste0(image_root,"/distdiff_anr_vs_rec_by_group.tex")
)
print(distdif_by_group_latex)
distdiff_by_group
```
## fixed effects distributions ## fixed effects distributions
```{r} ```{r}
#| label: Fixed Effects Distributions
#Get dataframe with only the rows of interest #Get dataframe with only the rows of interest
filtdata <- as.data.frame(extract(fit, pars="status_diff")) filtdata <- as.data.frame(extract(fit, pars="status_diff"))
#rename columns #rename columns
dimnames(filtdata)[[2]] <- beta_list$groups dimnames(filtdata)[[2]] <- sprintf("%02d %s", seq_along(beta_list$groups), beta_list$groups)
#create area plot with appropriate title #create area plot with appropriate title
mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) + mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
ggtitle("Differences in Fixed Effects | By ICD-10 Category", ggtitle("Differences in Fixed Effects | By ICD-10 Category",
subtitle = "Moving from 'Active, not recruiting' to 'Recruiting'" subtitle = "Moving from 'Active, not recruiting' to 'Recruiting'"
) + ) +
geom_vline(xintercept=seq(-0.25,0.5,0.25),color="grey",alpha=0.750) geom_vline(xintercept=seq(-0.25,0.5,0.25),color="grey",alpha=0.750)
ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png")) ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png"), create.dir = TRUE)
d <- pivot_longer(filtdata, everything()) |> fixed_effects_table <- pivot_longer(filtdata, everything()) |>
group_by(name) |> group_by(name) |>
summarize( summarize(
`sample size` = 0,
mean = mean(value), mean = mean(value),
`P(≥0)` = mean(value >= 0), `P(≥0)` = mean(value >= 0),
`2.5%` = quantile(value, probs = 0.025), `2.5%` = quantile(value, probs = 0.025),
@ -1106,18 +1159,26 @@ d <- pivot_longer(filtdata, everything()) |>
`97.5%` = quantile(value, probs = 0.975) `97.5%` = quantile(value, probs = 0.975)
) )
# Convert the indexing to be consecutive by using a temporary vector
sample_sizes <- rep(0, 22) # Create vector of zeros
sample_sizes[category_count$category_id] <- category_count$n
# Now assign the whole vector at once
fixed_effects_table["sample size"] <- sample_sizes
# Rename the name column # Rename the name column
names(d)[1] <- "ICD-10 Category" names(fixed_effects_table)[1] <- "ICD-10 Category"
# Convert to LaTeX # Convert to LaTeX
table <- xtable(d, table <- xtable(fixed_effects_table,
digits = rep(3, ncol(d) + 1), digits = rep(2, ncol(fixed_effects_table) + 1),
floating = FALSE, floating = FALSE,
latex.environments = NULL, align = "lccccccccccc",
latex.environments = "tabularx",
booktabs = TRUE booktabs = TRUE
) )
# Write to file # Write to file
write_lines( write_lines(
print(table, include.rownames = FALSE), print(table, include.rownames = FALSE),
@ -1132,12 +1193,13 @@ write_lines(
# Diagnostics # Diagnostics
```{r} ```{r}
#| eval: true #| label: diagnostics 1
#| eval: false
#trace plots #trace plots
image_diagnostics <- paste0(image_root,"/diagnostics") image_diagnostics <- paste0(image_root,"/diagnostics")
plot(fit, pars=c("mu"), plotfun="trace") plot(fit, pars=c("mu"), plotfun="trace")
ggsave(paste0(image_diagnostics,"/trace_plot_mu.png")) ggsave(paste0(image_diagnostics,"/trace_plot_mu.png"), create.dir = TRUE)
for (i in 1:3) { for (i in 1:3) {
@ -1156,14 +1218,15 @@ for (i in 1:3) {
) )
mu_range <- paste0(4*i-3,"-",4*i) mu_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/trace_rank_plot_mu_",mu_range,".png") filename <- paste0(image_diagnostics,"/trace_rank_plot_mu_",mu_range,".png")
ggsave(filename) ggsave(filename, create.dir = TRUE)
} }
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 2
#| eval: false
plot(fit, pars=c("sigma"), plotfun="trace") plot(fit, pars=c("sigma"), plotfun="trace")
ggsave(paste0(image_diagnostics,"/traceplot_sigma.png")) ggsave(paste0(image_diagnostics,"/traceplot_sigma.png"), create.dir = TRUE)
for (i in 1:3) { for (i in 1:3) {
print( print(
@ -1181,12 +1244,13 @@ for (i in 1:3) {
) )
sigma_range <- paste0(4*i-3,"-",4*i) sigma_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/trace_rank_plot_sigma_",sigma_range,".png") filename <- paste0(image_diagnostics,"/trace_rank_plot_sigma_",sigma_range,".png")
ggsave(filename) ggsave(filename, create.dir = TRUE)
} }
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 3
#| eval: false
#other diagnostics #other diagnostics
logpost <- log_posterior(fit) logpost <- log_posterior(fit)
nuts_prmts <- nuts_params(fit) nuts_prmts <- nuts_params(fit)
@ -1195,15 +1259,17 @@ posterior <- as.array(fit)
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 4
#| eval: false
color_scheme_set("darkgray") color_scheme_set("darkgray")
div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4) div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4)
mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05) mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05)
ggsave(paste0(image_diagnostics,"/parcoord_mu.png")) ggsave(paste0(image_diagnostics,"/parcoord_mu.png"), create.dir = TRUE)
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 5
#| eval: false
for (i in 1:3) { for (i in 1:3) {
mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]")) mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]"))
print( print(
@ -1219,7 +1285,7 @@ for (i in 1:3) {
) )
mu_range <- paste0(4*i-3,"-",4*i) mu_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/correlation_plot_mu_",mu_range,".png") filename <- paste0(image_diagnostics,"/correlation_plot_mu_",mu_range,".png")
ggsave(filename) ggsave(filename, create.dir = TRUE)
} }
@ -1227,13 +1293,15 @@ for (i in 1:3) {
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 6
#| eval: false
mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05) mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05)
ggsave(paste0(image_diagnostics,"/parcoord_sigma.png")) ggsave(paste0(image_diagnostics,"/parcoord_sigma.png"), create.dir = TRUE)
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 7
#| eval: false
for (i in 1:3) { for (i in 1:3) {
params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]")) params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]"))
@ -1250,13 +1318,14 @@ for (i in 1:3) {
) )
sigma_range <- paste0(4*i-3,"-",4*i) sigma_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/correlation_plot_sigma_",sigma_range,".png") filename <- paste0(image_diagnostics,"/correlation_plot_sigma_",sigma_range,".png")
ggsave(filename) ggsave(filename, create.dir = TRUE)
} }
``` ```
```{r} ```{r}
#| eval: true #| eval: false
#| label: diagnostics 8
for (k in 1:22) { for (k in 1:22) {
for (i in 1:3) { for (i in 1:3) {
params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]")) params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]"))
@ -1274,7 +1343,7 @@ for (i in 1:3) {
beta_range <- paste0("k_",k,"_i_",4*i-3,"-",4*i) beta_range <- paste0("k_",k,"_i_",4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/correlation_plot_beta_",beta_range,".png") filename <- paste0(image_diagnostics,"/correlation_plot_beta_",beta_range,".png")
ggsave(filename) ggsave(filename, create.dir = TRUE)
}} }}
``` ```

@ -12,6 +12,8 @@ editor: source
```{r} ```{r}
library(knitr) library(knitr)
library(bayesplot) library(bayesplot)
@ -41,10 +43,19 @@ image_parameters_by_groups <-paste0(image_root,"/betas/parameters_by_group")
image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups") image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups")
``` ```
run on `r .QuartoInlineRender(now(tz='UTC'))`
```{r} ```{r}
################ Pull data from database ###################### ################ Pull data from database ######################
library(RPostgreSQL) library(RPostgreSQL)
host <- 'aact_db-restored-2025-01-07' #host <-'aact_db-restored-2025-01-07'
host <- '10.89.0.6'
driver <- dbDriver("PostgreSQL") driver <- dbDriver("PostgreSQL")
@ -441,6 +452,7 @@ counterfact_delay <- list(
``` ```
```{r} ```{r}
#| label: Fitting
fit <- stan( fit <- stan(
file='Hierarchal_Logistic.stan', file='Hierarchal_Logistic.stan',
data = counterfact_delay, data = counterfact_delay,
@ -509,7 +521,7 @@ ggplot(data=df3, aes(x=count, fill=final_status)) +
geom_histogram(binwidth=1) + geom_histogram(binwidth=1) +
ggtitle("Histogram of snapshots per trial (matched trials)") + ggtitle("Histogram of snapshots per trial (matched trials)") +
xlab("Snapshots per trial") xlab("Snapshots per trial")
ggsave(paste0(image_trial_details,"/HistSnapshots.png")) ggsave(paste0(image_trial_details,"/HistSnapshots.png"), create.dir = TRUE)
#Plot duration for terminated vs completed #Plot duration for terminated vs completed
df4 <- dbGetQuery( df4 <- dbGetQuery(
@ -532,7 +544,7 @@ ggplot(data=df4, aes(x=duration,fill=overall_status)) +
ggtitle("Histogram of trial durations") + ggtitle("Histogram of trial durations") +
xlab("duration")+ xlab("duration")+
facet_wrap(~overall_status) facet_wrap(~overall_status)
ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png")) ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"), create.dir = TRUE)
df5 <- dbGetQuery( df5 <- dbGetQuery(
con, con,
@ -563,7 +575,7 @@ ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) +
ggtitle("Comparison of duration, status, and snapshot_count") + ggtitle("Comparison of duration, status, and snapshot_count") +
xlab("duration") + xlab("duration") +
ylab("snapshot count") ylab("snapshot count")
ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png")) ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"), create.dir = TRUE)
dbDisconnect(con) dbDisconnect(con)
@ -579,7 +591,7 @@ ggplot(data = group_trials_by_category, aes(x=category_id)) +
,x="Category ID" ,x="Category ID"
,y="Count" ,y="Count"
) )
ggsave(paste0(image_trial_details,"/CategoryCounts.png")) ggsave(paste0(image_trial_details,"/CategoryCounts.png"), create.dir = TRUE)
@ -591,8 +603,20 @@ count_snapshots <- sum(df5$snapshot_count)
the correlation value is `r cor_dur_count` between duration and snapshot count. the correlation value is `r .QuartoInlineRender(cor_dur_count)` between duration and snapshot count.
There are `r count_snapshots` snapshots in total. There are `r .QuartoInlineRender(count_snapshots)` snapshots in total, spread over
`r .QuartoInlineRender(length(df5$nct_id))` trials.
The number of categories by trial are
TODO: add in category names,
```{r}
group_trials_by_category |> count(category_id)
```
@ -615,7 +639,7 @@ for (i in category_count$category_id[category_count$n >= 0]) {
gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
ggsave( ggsave(
paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png") paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png")
,plot=gi$plot ,plot=gi$plot, create.dir = TRUE
) )
gx <- c(gx,gi) gx <- c(gx,gi)
@ -643,7 +667,7 @@ for (i in c(1,2,3,9,10,11,12)) {
pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
ggsave( ggsave(
paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png") paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png")
,plot=pi$plot ,plot=pi$plot, create.dir = TRUE
) )
px <- c(px,pi) px <- c(px,pi)
@ -710,7 +734,7 @@ ggplot(df_ib_p, aes(x=p_prior)) +
,x="Probability Domain 'p'" ,x="Probability Domain 'p'"
,y="Probability Density" ,y="Probability Density"
) )
ggsave(paste0(image_dist_diff_analysis,"/prior_p.png")) ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"), create.dir = TRUE)
#p_posterior #p_posterior
ggplot(df_ib_p, aes(x=p_predicted)) + ggplot(df_ib_p, aes(x=p_predicted)) +
@ -721,7 +745,7 @@ ggplot(df_ib_p, aes(x=p_predicted)) +
,x="Probability Domain 'p'" ,x="Probability Domain 'p'"
,y="Probability Density" ,y="Probability Density"
) )
ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png")) ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"), create.dir = TRUE)
#mu_prior #mu_prior
ggplot(df_ib_prior) + ggplot(df_ib_prior) +
@ -732,7 +756,7 @@ ggplot(df_ib_prior) +
,x="Mu" ,x="Mu"
,y="Probability" ,y="Probability"
) )
ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png")) ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"), create.dir = TRUE)
#sigma_posterior #sigma_posterior
ggplot(df_ib_prior) + ggplot(df_ib_prior) +
@ -743,7 +767,7 @@ ggplot(df_ib_prior) +
,x="Sigma" ,x="Sigma"
,y="Probability" ,y="Probability"
) )
ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png")) ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"), create.dir = TRUE)
``` ```
@ -753,7 +777,7 @@ ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"))
### Intervention: Delay close of enrollment # Intervention: Delay close of enrollment
@ -773,7 +797,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) +
,x="Probability Domain 'p'" ,x="Probability Domain 'p'"
,y="Probability Density" ,y="Probability Density"
) )
ggsave(paste0(image_dist_diff_analysis,"p_no_intervention.png")) ggsave(paste0(image_dist_diff_analysis,"/p_no_intervention.png"), create.dir = TRUE)
ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) + ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
geom_density() + geom_density() +
@ -783,7 +807,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
,x="Probability Domain 'p'" ,x="Probability Domain 'p'"
,y="Probability Density" ,y="Probability Density"
) )
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"), create.dir = TRUE)
ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) + ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
geom_density() + geom_density() +
@ -793,7 +817,7 @@ ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
,x="Difference in 'p' under treatment" ,x="Difference in 'p' under treatment"
,y="Probability Density" ,y="Probability Density"
) )
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"), create.dir = TRUE)
``` ```
```{r} ```{r}
@ -815,7 +839,7 @@ pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predict
pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168 pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168
pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name)) pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) df$category_id[i]) pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) counterfact_delay$llx[i])
pddf_ib["category_name"] <- sapply( pddf_ib["category_name"] <- sapply(
pddf_ib$category, pddf_ib$category,
function(i) category_names[i] function(i) category_names[i]
@ -832,7 +856,7 @@ ggplot(pddf_ib, aes(x=value,)) +
,y = "Probability Density" ,y = "Probability Density"
) + ) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"), create.dir = TRUE)
``` ```
```{r} ```{r}
@ -853,7 +877,7 @@ ggplot(pddf_ib, aes(x=value,)) +
) + ) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8)) theme(strip.text.x = element_text(size = 8))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"), create.dir = TRUE)
``` ```
```{r} ```{r}
@ -874,7 +898,7 @@ ggplot(pddf_ib, aes(x=value,)) +
) + ) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8)) theme(strip.text.x = element_text(size = 8))
ggsave(paste0(image_dist_diff_analysis,"p_delay_intervention_histdiff_by_group.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_by_group.png"), create.dir = TRUE)
``` ```
```{r} ```{r}
@ -897,6 +921,7 @@ stats <- list(
q1 = quantile(pddf_ib$value, 0.25), q1 = quantile(pddf_ib$value, 0.25),
med = median(pddf_ib$value), med = median(pddf_ib$value),
mean = mean(pddf_ib$value), mean = mean(pddf_ib$value),
stdev = sd(pddf_ib$value),
q3 = quantile(pddf_ib$value, 0.75), q3 = quantile(pddf_ib$value, 0.75),
p90 = quantile(pddf_ib$value, 0.90), p90 = quantile(pddf_ib$value, 0.90),
p95 = quantile(pddf_ib$value, 0.95), p95 = quantile(pddf_ib$value, 0.95),
@ -952,7 +977,7 @@ p3 +
x = stats$mean, x = stats$mean,
y = stats$y_offset * 1.5 y = stats$y_offset * 1.5
), aes(x = x, y = y)) ), aes(x = x, y = y))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"), create.dir = TRUE)
``` ```
```{r} ```{r}
@ -1011,7 +1036,7 @@ p4 <- ggplot(pddf_ib, aes(x = value)) +
x = stats$mean, x = stats$mean,
y = stats$y_offset_density * 1.5 y = stats$y_offset_density * 1.5
), aes(x = x, y = y)) ), aes(x = x, y = y))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"), create.dir = TRUE)
p4 p4
``` ```
@ -1026,7 +1051,7 @@ p4
y = "Cumulative Proportion" y = "Cumulative Proportion"
) )
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png")) ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"), create.dir = TRUE)
``` ```
@ -1047,18 +1072,18 @@ mass_below_zero <- mean( pddf_ib$value <= 0)
``` ```
Looking at the spike around zero, we find that `r spike_band_centered_zero*100`% Looking at the spike around zero, we find that `r .QuartoInlineRender(spike_band_centered_zero*100)`%
of the probability mass is contained within the band from of the probability mass is contained within the band from
[`r -width*100/2`,`r width*100/2`]. [`r .QuartoInlineRender(-width/2)`,`r .QuartoInlineRender(width/2)`].
Additionally, there was `r above_spike_band*100`% of the probability above that Additionally, there was `r .QuartoInlineRender(above_spike_band*100)`% of the probability above that
-- representing those with a general increase in the probability of termination -- -- representing those with a general increase in the probability of termination --
and `r below_spike_band*100`% of the probability mass below the band and `r .QuartoInlineRender(below_spike_band*100)`% of the probability mass below the band
-- representing a decrease in the probability of termination. -- representing a decrease in the probability of termination.
On average, if you keep the trial open instead of closing it, On average, if you keep the trial open instead of closing it,
`r mass_below_zero`% of trials will see a decrease in the probability of termination, `r .QuartoInlineRender(mass_below_zero * 100)`% of trials will see a decrease in the probability of termination,
but, due to the high increase in probability of termination given termination was increased, but, due to the high increase in probability of termination given termination was increased,
the mean probability of termination increases by `r stats$mean`. the mean probability of termination increases by `r .QuartoInlineRender(stats$mean (stats$stdev))`.
@ -1077,7 +1102,7 @@ quant_df <- data.frame(
) )
# Convert to LaTeX # Convert to LaTeX
table <- xtable(quant_df, table <- xtable(quant_df,
digits = rep(3, ncol(d) + 1), digits = rep(3, ncol(quant_df) + 1),
floating = FALSE, floating = FALSE,
latex.environments = NULL, latex.environments = NULL,
booktabs = TRUE booktabs = TRUE
@ -1096,7 +1121,7 @@ proportion_increase <- mean(pddf_ib$value >= 0)
about `r proportion_increase * 100` percent probability increase in the probability of terminations about `r .QuartoInlineRender(proportion_increase * 100)` percent probability increase in the probability of terminations
@ -1106,34 +1131,74 @@ k = 100
simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention))) simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention)))
simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default))) simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default)))
simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)/k simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)
``` ```
The simulation above shows that this results in a percentage-point change in terminations of about The simulation above shows that this results in a percentage-point change in terminations of about
`r simulated_percentages * 100`. `r .QuartoInlineRender(simulated_percentages * 100)`.
```{r}
distdiff_by_group <- pddf_ib %>%
group_by(category_name, category) %>%
summarize(
mean = mean(value, na.rm=TRUE) *100
,"P(p>0)" = mean(value>0) *100
,"5%" = quantile( value, 0.05, na.rm = TRUE)*100
,"25%" = quantile( value, 0.25, na.rm = TRUE)*100
,"median" = quantile( value, 0.5, na.rm = TRUE)*100
,"75%" = quantile( value, 0.75, na.rm = TRUE)*100
,"95%" = quantile( value, 0.95, na.rm = TRUE)*100
) %>% arrange(category) %>% select(!category)
distdif_by_group_latex<- xtable(distdiff_by_group,
digits = rep(2, ncol(distdiff_by_group) +1),
floating = FALSE,
align = "llccccccc",
latex.environments = "tabularx",
booktabs = TRUE
)
# Write to file
write_lines(
print(distdif_by_group_latex, include.rownames = FALSE),
paste0(image_root,"/distdiff_anr_vs_rec_by_group.tex")
)
print(distdif_by_group_latex)
distdiff_by_group
```
## fixed effects distributions ## fixed effects distributions
```{r} ```{r}
#| label: Fixed Effects Distributions
#Get dataframe with only the rows of interest #Get dataframe with only the rows of interest
filtdata <- as.data.frame(extract(fit, pars="status_diff")) filtdata <- as.data.frame(extract(fit, pars="status_diff"))
#rename columns #rename columns
dimnames(filtdata)[[2]] <- beta_list$groups dimnames(filtdata)[[2]] <- sprintf("%02d %s", seq_along(beta_list$groups), beta_list$groups)
#create area plot with appropriate title #create area plot with appropriate title
mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) + mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
ggtitle("Differences in Fixed Effects | By ICD-10 Category", ggtitle("Differences in Fixed Effects | By ICD-10 Category",
subtitle = "Moving from 'Active, not recruiting' to 'Recruiting'" subtitle = "Moving from 'Active, not recruiting' to 'Recruiting'"
) + ) +
geom_vline(xintercept=seq(-0.25,0.5,0.25),color="grey",alpha=0.750) geom_vline(xintercept=seq(-0.25,0.5,0.25),color="grey",alpha=0.750)
ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png")) ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png"), create.dir = TRUE)
d <- pivot_longer(filtdata, everything()) |> fixed_effects_table <- pivot_longer(filtdata, everything()) |>
group_by(name) |> group_by(name) |>
summarize( summarize(
`sample size` = 0,
mean = mean(value), mean = mean(value),
`P(≥0)` = mean(value >= 0), `P(≥0)` = mean(value >= 0),
`2.5%` = quantile(value, probs = 0.025), `2.5%` = quantile(value, probs = 0.025),
@ -1145,18 +1210,26 @@ d <- pivot_longer(filtdata, everything()) |>
`97.5%` = quantile(value, probs = 0.975) `97.5%` = quantile(value, probs = 0.975)
) )
# Convert the indexing to be consecutive by using a temporary vector
sample_sizes <- rep(0, 22) # Create vector of zeros
sample_sizes[category_count$category_id] <- category_count$n
# Now assign the whole vector at once
fixed_effects_table["sample size"] <- sample_sizes
# Rename the name column # Rename the name column
names(d)[1] <- "ICD-10 Category" names(fixed_effects_table)[1] <- "ICD-10 Category"
# Convert to LaTeX # Convert to LaTeX
table <- xtable(d, table <- xtable(fixed_effects_table,
digits = rep(3, ncol(d) + 1), digits = rep(2, ncol(fixed_effects_table) + 1),
floating = FALSE, floating = FALSE,
latex.environments = NULL, align = "lccccccccccc",
latex.environments = "tabularx",
booktabs = TRUE booktabs = TRUE
) )
# Write to file # Write to file
write_lines( write_lines(
print(table, include.rownames = FALSE), print(table, include.rownames = FALSE),
@ -1175,12 +1248,13 @@ write_lines(
```{r} ```{r}
#| eval: true #| label: diagnostics 1
#| eval: false
#trace plots #trace plots
image_diagnostics <- paste0(image_root,"/diagnostics") image_diagnostics <- paste0(image_root,"/diagnostics")
plot(fit, pars=c("mu"), plotfun="trace") plot(fit, pars=c("mu"), plotfun="trace")
ggsave(paste0(image_diagnostics,"/trace_plot_mu.png")) ggsave(paste0(image_diagnostics,"/trace_plot_mu.png"), create.dir = TRUE)
for (i in 1:3) { for (i in 1:3) {
@ -1199,14 +1273,15 @@ for (i in 1:3) {
) )
mu_range <- paste0(4*i-3,"-",4*i) mu_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/trace_rank_plot_mu_",mu_range,".png") filename <- paste0(image_diagnostics,"/trace_rank_plot_mu_",mu_range,".png")
ggsave(filename) ggsave(filename, create.dir = TRUE)
} }
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 2
#| eval: false
plot(fit, pars=c("sigma"), plotfun="trace") plot(fit, pars=c("sigma"), plotfun="trace")
ggsave(paste0(image_diagnostics,"/traceplot_sigma.png")) ggsave(paste0(image_diagnostics,"/traceplot_sigma.png"), create.dir = TRUE)
for (i in 1:3) { for (i in 1:3) {
print( print(
@ -1224,12 +1299,13 @@ for (i in 1:3) {
) )
sigma_range <- paste0(4*i-3,"-",4*i) sigma_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/trace_rank_plot_sigma_",sigma_range,".png") filename <- paste0(image_diagnostics,"/trace_rank_plot_sigma_",sigma_range,".png")
ggsave(filename) ggsave(filename, create.dir = TRUE)
} }
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 3
#| eval: false
#other diagnostics #other diagnostics
logpost <- log_posterior(fit) logpost <- log_posterior(fit)
nuts_prmts <- nuts_params(fit) nuts_prmts <- nuts_params(fit)
@ -1238,15 +1314,17 @@ posterior <- as.array(fit)
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 4
#| eval: false
color_scheme_set("darkgray") color_scheme_set("darkgray")
div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4) div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4)
mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05) mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05)
ggsave(paste0(image_diagnostics,"/parcoord_mu.png")) ggsave(paste0(image_diagnostics,"/parcoord_mu.png"), create.dir = TRUE)
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 5
#| eval: false
for (i in 1:3) { for (i in 1:3) {
mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]")) mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]"))
print( print(
@ -1262,7 +1340,7 @@ for (i in 1:3) {
) )
mu_range <- paste0(4*i-3,"-",4*i) mu_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/correlation_plot_mu_",mu_range,".png") filename <- paste0(image_diagnostics,"/correlation_plot_mu_",mu_range,".png")
ggsave(filename) ggsave(filename, create.dir = TRUE)
} }
@ -1270,13 +1348,15 @@ for (i in 1:3) {
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 6
#| eval: false
mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05) mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05)
ggsave(paste0(image_diagnostics,"/parcoord_sigma.png")) ggsave(paste0(image_diagnostics,"/parcoord_sigma.png"), create.dir = TRUE)
``` ```
```{r} ```{r}
#| eval: true #| label: diagnostics 7
#| eval: false
for (i in 1:3) { for (i in 1:3) {
params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]")) params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]"))
@ -1293,12 +1373,13 @@ for (i in 1:3) {
) )
sigma_range <- paste0(4*i-3,"-",4*i) sigma_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/correlation_plot_sigma_",sigma_range,".png") filename <- paste0(image_diagnostics,"/correlation_plot_sigma_",sigma_range,".png")
ggsave(filename) ggsave(filename, create.dir = TRUE)
} }
``` ```
```{r} ```{r}
#| eval: true #| eval: false
#| label: diagnostics 8
for (k in 1:22) { for (k in 1:22) {
for (i in 1:3) { for (i in 1:3) {
params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]")) params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]"))
@ -1316,7 +1397,7 @@ for (i in 1:3) {
beta_range <- paste0("k_",k,"_i_",4*i-3,"-",4*i) beta_range <- paste0("k_",k,"_i_",4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/correlation_plot_beta_",beta_range,".png") filename <- paste0(image_diagnostics,"/correlation_plot_beta_",beta_range,".png")
ggsave(filename) ggsave(filename, create.dir = TRUE)
}} }}
``` ```

@ -0,0 +1,75 @@
---
title: "TrialCountExtraction"
author: "Will"
format: html
editor: source
---
```{r}
#| eval: false
#| include: true
#Full set
categories %>% unique() %>% sort() %>% length()
#Evaluation set
cf_categories %>% unique() %>% sort() %>% length()
```
```{r}
# Pulled from df
group_trials_by_category %>% group_by(category_id) %>% count()
```
# Actual data from Evaluation and counterfactual
```{r}
# Original Evaluation
# - Pulled from `categories` above when defined
counterfact_delay$ll %>% unique() %>% sort() %>% length()
# Counterfactual
# - Pulled from `cf_categories` above when defined
counterfact_delay$llx %>% unique() %>% sort() %>% length()
```
Those came from
```{r}
df$category_id %>% unique() %>% sort() %>% length()
df_counterfact_base$category_id %>% unique() %>% sort() %>% length()
```
The difference between those is that the counterfactual imposes the constraint
that there must be a snapshot where it moves from "ANR" to "Rec", implying that
it can't just terminate.
# Where do the other values drop
When we find the counterfactual, the table looses some of the categories etc.
Here is the extracted data
```{r}
data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference)
```
```{r}
pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |>
pivot_longer(X1:X168) #CHANGE_NOTE: moved from X169 to X168
pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) counterfact_delay$llx[i])
pddf_ib["category_name"] <- sapply(
pddf_ib$category,
function(i) category_names[i]
)
```
and yet it seems that we predict the difference for all 168 trials
It looks like there is an error where I apply category IDs. Because I'm pulling them from
```{r}
ground_truth <- df$category_id[1:168]
```

File diff suppressed because it is too large Load Diff

@ -1,34 +0,0 @@
% latex table generated in R 4.4.2 by xtable 1.8-4 package
% Sun Feb 2 01:37:36 2025
\begin{table}[ht]
\centering
\begin{tabular}{lrrrrrrrrr}
\hline
ICD-10 Category & mean & P(≥0) & 2.5\% & 5\% & 25\% & 50\% median & 75\% & 95\% & 97.5\% \\
\hline
Blood \& Immune system & 0.061 & 0.624 & -0.333 & -0.267 & -0.071 & 0.061 & 0.194 & 0.388 & 0.454 \\
Circulatory & 0.037 & 0.577 & -0.362 & -0.293 & -0.094 & 0.038 & 0.170 & 0.363 & 0.426 \\
Congential & 0.063 & 0.628 & -0.332 & -0.264 & -0.068 & 0.063 & 0.195 & 0.391 & 0.456 \\
Contact with Healthcare & 0.063 & 0.627 & -0.335 & -0.267 & -0.070 & 0.064 & 0.194 & 0.390 & 0.454 \\
Digestive & 0.060 & 0.621 & -0.339 & -0.270 & -0.071 & 0.059 & 0.192 & 0.388 & 0.454 \\
Ear and Mastoid & 0.063 & 0.625 & -0.332 & -0.263 & -0.069 & 0.064 & 0.196 & 0.393 & 0.457 \\
Endocrine, Nutritional, and Metabolic & 0.177 & 0.820 & -0.201 & -0.137 & 0.045 & 0.173 & 0.304 & 0.505 & 0.573 \\
External Causes & 0.063 & 0.625 & -0.330 & -0.266 & -0.070 & 0.064 & 0.194 & 0.392 & 0.460 \\
Eye and Adnexa & 0.061 & 0.624 & -0.333 & -0.265 & -0.069 & 0.062 & 0.192 & 0.386 & 0.452 \\
Genitourinary & 0.063 & 0.627 & -0.337 & -0.270 & -0.070 & 0.065 & 0.195 & 0.389 & 0.456 \\
Infections \& Parasites & 0.135 & 0.772 & -0.223 & -0.163 & 0.013 & 0.134 & 0.257 & 0.438 & 0.498 \\
Injury etc. & 0.063 & 0.625 & -0.337 & -0.268 & -0.069 & 0.062 & 0.196 & 0.392 & 0.459 \\
Mental \& Behavioral & 0.142 & 0.767 & -0.239 & -0.175 & 0.010 & 0.140 & 0.270 & 0.469 & 0.537 \\
Musculoskeletal & 0.159 & 0.794 & -0.218 & -0.156 & 0.027 & 0.156 & 0.286 & 0.482 & 0.550 \\
Neoplasms & 0.211 & 0.902 & -0.107 & -0.056 & 0.099 & 0.208 & 0.320 & 0.489 & 0.546 \\
Nervous System & 0.009 & 0.524 & -0.383 & -0.315 & -0.120 & 0.011 & 0.140 & 0.327 & 0.389 \\
Perinatal Period & 0.062 & 0.626 & -0.338 & -0.270 & -0.068 & 0.062 & 0.194 & 0.390 & 0.457 \\
Pregancy, Childbirth, \& Puerperium & 0.061 & 0.622 & -0.336 & -0.270 & -0.071 & 0.061 & 0.194 & 0.392 & 0.458 \\
Respiratory & 0.061 & 0.622 & -0.335 & -0.269 & -0.072 & 0.061 & 0.195 & 0.391 & 0.458 \\
Skin \& Subcutaneaous tissue & 0.104 & 0.704 & -0.283 & -0.220 & -0.027 & 0.103 & 0.234 & 0.429 & 0.494 \\
Special Purposes & 0.062 & 0.625 & -0.335 & -0.268 & -0.070 & 0.062 & 0.194 & 0.391 & 0.456 \\
Symptoms, Signs etc. & 0.062 & 0.625 & -0.332 & -0.265 & -0.069 & 0.062 & 0.194 & 0.390 & 0.456 \\
\hline
\end{tabular}
\end{table}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 356 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 246 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 249 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 248 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 253 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 251 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 248 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 345 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 82 KiB

After

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 71 KiB

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 74 KiB

After

Width:  |  Height:  |  Size: 75 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 172 KiB

After

Width:  |  Height:  |  Size: 280 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 80 KiB

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 73 KiB

After

Width:  |  Height:  |  Size: 74 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 69 KiB

After

Width:  |  Height:  |  Size: 69 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 77 KiB

After

Width:  |  Height:  |  Size: 77 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 75 KiB

After

Width:  |  Height:  |  Size: 75 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 78 KiB

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 80 KiB

@ -1,5 +1,5 @@
% latex table generated in R 4.4.2 by xtable 1.8-4 package % latex table generated in R 4.5.1 by xtable 1.8-4 package
% Sun Feb 2 01:37:17 2025 % Mon Mar 23 21:59:57 2026
\begin{table}[ht] \begin{table}[ht]
\centering \centering
\begin{tabular}{r} \begin{tabular}{r}

@ -0,0 +1,26 @@
% latex table generated in R 4.5.1 by xtable 1.8-4 package
% Mon Mar 23 23:42:59 2026
\begin{table}[ht]
\centering
\begin{tabular}{lcccccccc}
\hline
category\_name & N & mean & P(p$>$0) & 5\% & 25\% & median & 75\% & 95\% \\
\hline
ICD-10 \#1: Infections \& Parasites (n=20) & XX & 1.60 & 77.23 & -1.79 & 0.11 & 1.19 & 2.77 & 6.30 \\
ICD-10 \#2: Neoplasms (n=49) & XX & 3.63 & 90.17 & -0.90 & 1.52 & 3.37 & 5.52 & 9.06 \\
ICD-10 \#3: Blood \& Immune system (n=1) & XX & 0.15 & 62.35 & -0.80 & -0.02 & 0.01 & 0.16 & 1.63 \\
ICD-10 \#4: Endocrine, Nutritional, and Metabolic (n=10) & XX & 2.23 & 82.05 & -1.75 & 0.52 & 2.03 & 3.77 & 6.89 \\
ICD-10 \#5: Mental \& Behavioral (n=8) & XX & 3.42 & 76.72 & -4.21 & 0.24 & 3.36 & 6.47 & 11.28 \\
ICD-10 \#6: Nervous System (n=8) & XX & 0.05 & 52.37 & -2.98 & -0.83 & 0.07 & 0.95 & 3.01 \\
ICD-10 \#7: Eye and Adnexa (n=11) & XX & 0.03 & 62.40 & -0.13 & -0.01 & 0.00 & 0.04 & 0.25 \\
ICD-10 \#9: Circulatory (n=3) & XX & 0.69 & 57.69 & -5.03 & -1.33 & 0.50 & 2.66 & 6.88 \\
ICD-10 \#10: Respiratory (n=7) & XX & 0.04 & 62.21 & -0.17 & -0.00 & 0.00 & 0.03 & 0.37 \\
ICD-10 \#11: Digestive (n=12) & XX & 0.02 & 62.13 & -0.10 & -0.00 & 0.00 & 0.02 & 0.18 \\
ICD-10 \#12: Skin \& Subcutaneaous tissue (n=9) & XX & 0.55 & 70.42 & -1.06 & -0.11 & 0.41 & 1.10 & 2.56 \\
ICD-10 \#13: Musculoskeletal (n=17) & XX & 1.16 & 79.42 & -1.13 & 0.18 & 1.02 & 2.02 & 3.93 \\
ICD-10 \#14: Genitourinary (n=2) & XX & 0.13 & 62.72 & -0.54 & -0.01 & 0.00 & 0.10 & 1.27 \\
ICD-10 \#17: Congential (n=3) & XX & 0.06 & 62.82 & -0.27 & -0.00 & 0.00 & 0.05 & 0.63 \\
\hline
\end{tabular}
\end{table}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 57 KiB

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 57 KiB

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 KiB

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 119 KiB

After

Width:  |  Height:  |  Size: 117 KiB

@ -1,4 +1,5 @@
Version: 1.0 Version: 1.0
ProjectId: af114abe-061f-43bf-8ae1-a953900da3ff
RestoreWorkspace: Yes RestoreWorkspace: Yes
SaveWorkspace: Ask SaveWorkspace: Ask

Loading…
Cancel
Save