Changes to enrollment delay without ICD10Group fix

I wanted to fix the ICD-10 groups with a record of other changes I made.
lognorm_sigma_prior
Will King 3 weeks ago
parent c9ce21a815
commit d753db81c7

@ -8,6 +8,8 @@ editor: source
# Setup
```{r}
library(knitr)
library(bayesplot)
@ -37,10 +39,15 @@ image_parameters_by_groups <-paste0(image_root,"/betas/parameters_by_group")
image_parameters_across_groups <-paste0(image_root,"/betas/parameter_across_groups")
```
run on `{r} now(tz='UTC')`
```{r}
################ Pull data from database ######################
library(RPostgreSQL)
host <- 'aact_db-restored-2025-01-07'
#host <-'aact_db-restored-2025-01-07'
host <- '10.89.0.6'
driver <- dbDriver("PostgreSQL")
@ -431,6 +438,7 @@ counterfact_delay <- list(
```
```{r}
#| label: Fitting
fit <- stan(
file='Hierarchal_Logistic.stan',
data = counterfact_delay,
@ -493,7 +501,7 @@ ggplot(data=df3, aes(x=count, fill=final_status)) +
geom_histogram(binwidth=1) +
ggtitle("Histogram of snapshots per trial (matched trials)") +
xlab("Snapshots per trial")
ggsave(paste0(image_trial_details,"/HistSnapshots.png"))
ggsave(paste0(image_trial_details,"/HistSnapshots.png"), create.dir = TRUE)
#Plot duration for terminated vs completed
df4 <- dbGetQuery(
@ -516,7 +524,7 @@ ggplot(data=df4, aes(x=duration,fill=overall_status)) +
ggtitle("Histogram of trial durations") +
xlab("duration")+
facet_wrap(~overall_status)
ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"))
ggsave(paste0(image_trial_details,"/HistTrialDurations_Faceted.png"), create.dir = TRUE)
df5 <- dbGetQuery(
con,
@ -547,7 +555,7 @@ ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) +
ggtitle("Comparison of duration, status, and snapshot_count") +
xlab("duration") +
ylab("snapshot count")
ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"))
ggsave(paste0(image_trial_details,"/SnapshotsVsDurationVsTermination.png"), create.dir = TRUE)
dbDisconnect(con)
@ -563,7 +571,7 @@ ggplot(data = group_trials_by_category, aes(x=category_id)) +
,x="Category ID"
,y="Count"
)
ggsave(paste0(image_trial_details,"/CategoryCounts.png"))
ggsave(paste0(image_trial_details,"/CategoryCounts.png"), create.dir = TRUE)
@ -573,8 +581,16 @@ cor_dur_count <- cor(df5$duration,df5$snapshot_count)
count_snapshots <- sum(df5$snapshot_count)
```
the correlation value is `r cor_dur_count` between duration and snapshot count.
There are `r count_snapshots` snapshots in total.
the correlation value is `{r} cor_dur_count` between duration and snapshot count.
There are `{r} count_snapshots` snapshots in total, spread over
`{r} length(df5%nct_id)` trials.
The number of categories by trial are
TODO: add in category names,
```{r}
group_trials_by_category |> count(category_id)
```
@ -595,7 +611,7 @@ for (i in category_count$category_id[category_count$n >= 0]) {
gi <- group_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
ggsave(
paste0(image_parameters_by_groups,"/group_",i,"_",gi$name,".png")
,plot=gi$plot
,plot=gi$plot, create.dir = TRUE
)
gx <- c(gx,gi)
@ -625,7 +641,7 @@ for (i in c(1,2,3,9,10,11,12)) {
pi <- parameter_mcmc_areas("beta",beta_list,fit,i) #add way to filter groups
ggsave(
paste0(image_parameters_across_groups,"/parameters_",i,"_",pi$name,".png")
,plot=pi$plot
,plot=pi$plot, create.dir = TRUE
)
px <- c(px,pi)
@ -685,7 +701,7 @@ ggplot(df_ib_p, aes(x=p_prior)) +
,x="Probability Domain 'p'"
,y="Probability Density"
)
ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"))
ggsave(paste0(image_dist_diff_analysis,"/prior_p.png"), create.dir = TRUE)
#p_posterior
ggplot(df_ib_p, aes(x=p_predicted)) +
@ -696,7 +712,7 @@ ggplot(df_ib_p, aes(x=p_predicted)) +
,x="Probability Domain 'p'"
,y="Probability Density"
)
ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"))
ggsave(paste0(image_dist_diff_analysis,"/posterior_p.png"), create.dir = TRUE)
#mu_prior
ggplot(df_ib_prior) +
@ -707,7 +723,7 @@ ggplot(df_ib_prior) +
,x="Mu"
,y="Probability"
)
ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"))
ggsave(paste0(image_dist_diff_analysis,"/prior_mu.png"), create.dir = TRUE)
#sigma_posterior
ggplot(df_ib_prior) +
@ -718,7 +734,7 @@ ggplot(df_ib_prior) +
,x="Sigma"
,y="Probability"
)
ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"))
ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"), create.dir = TRUE)
```
@ -726,7 +742,7 @@ ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"))
### Intervention: Delay close of enrollment
# Intervention: Delay close of enrollment
```{r}
counterfact_predicted_ib <- data.frame(
@ -744,7 +760,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) +
,x="Probability Domain 'p'"
,y="Probability Density"
)
ggsave(paste0(image_dist_diff_analysis,"/p_no_intervention.png"))
ggsave(paste0(image_dist_diff_analysis,"/p_no_intervention.png"), create.dir = TRUE)
ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
geom_density() +
@ -754,7 +770,7 @@ ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
,x="Probability Domain 'p'"
,y="Probability Density"
)
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention.png"), create.dir = TRUE)
ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
geom_density() +
@ -764,7 +780,7 @@ ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
,x="Difference in 'p' under treatment"
,y="Probability Density"
)
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_1.png"), create.dir = TRUE)
```
@ -805,7 +821,7 @@ ggplot(pddf_ib, aes(x=value,)) +
,y = "Probability Density"
) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_styled.png"), create.dir = TRUE)
```
@ -827,7 +843,7 @@ ggplot(pddf_ib, aes(x=value,)) +
) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_by_group.png"), create.dir = TRUE)
```
@ -849,7 +865,7 @@ ggplot(pddf_ib, aes(x=value,)) +
) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_by_group.png"))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_by_group.png"), create.dir = TRUE)
```
@ -873,6 +889,7 @@ stats <- list(
q1 = quantile(pddf_ib$value, 0.25),
med = median(pddf_ib$value),
mean = mean(pddf_ib$value),
stdev = sd(pddf_ib$value),
q3 = quantile(pddf_ib$value, 0.75),
p90 = quantile(pddf_ib$value, 0.90),
p95 = quantile(pddf_ib$value, 0.95),
@ -928,7 +945,7 @@ p3 +
x = stats$mean,
y = stats$y_offset * 1.5
), aes(x = x, y = y))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_histdiff_boxplot.png"), create.dir = TRUE)
```
@ -988,7 +1005,7 @@ p4 <- ggplot(pddf_ib, aes(x = value)) +
x = stats$mean,
y = stats$y_offset_density * 1.5
), aes(x = x, y = y))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_distdiff_boxplot.png"), create.dir = TRUE)
p4
```
@ -1003,7 +1020,7 @@ p4
y = "Cumulative Proportion"
)
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"))
ggsave(paste0(image_dist_diff_analysis,"/p_delay_intervention_cumulative_distdiff.png"), create.dir = TRUE)
```
@ -1018,18 +1035,18 @@ below_spike_band <- mean( pddf_ib$value <= -width/2)
# get mass above and mass below zero
mass_below_zero <- mean( pddf_ib$value <= 0)
```
Looking at the spike around zero, we find that `r spike_band_centered_zero*100`%
Looking at the spike around zero, we find that `{r} spike_band_centered_zero*100`%
of the probability mass is contained within the band from
[`r -width*100/2`,`r width*100/2`].
Additionally, there was `r above_spike_band*100`% of the probability above that
[`{r} -width/2`,`{r} width/2`].
Additionally, there was `{r} above_spike_band*100`% of the probability above that
-- representing those with a general increase in the probability of termination --
and `r below_spike_band*100`% of the probability mass below the band
and `{r} below_spike_band*100`% of the probability mass below the band
-- representing a decrease in the probability of termination.
On average, if you keep the trial open instead of closing it,
`r mass_below_zero`% of trials will see a decrease in the probability of termination,
`{r} mass_below_zero * 100`% of trials will see a decrease in the probability of termination,
but, due to the high increase in probability of termination given termination was increased,
the mean probability of termination increases by `r stats$mean`.
the mean probability of termination increases by `{r} stats$mean (stats$stdev)`.
```{r}
# 5%-iles
@ -1063,7 +1080,7 @@ kable(quant_df)
proportion_increase <- mean(pddf_ib$value >= 0)
```
about `r proportion_increase * 100` percent probability increase in the probability of terminations
about `{r} proportion_increase * 100` percent probability increase in the probability of terminations
```{r}
n = length(counterfact_predicted_ib$p_predicted_intervention)
@ -1071,11 +1088,45 @@ k = 100
simulated_terminations_intervention <- mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_intervention)))
simulated_terminations_base <-mean(rbinom(n,k,as.vector(counterfact_predicted_ib$p_predicted_default)))
simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)/k
simulated_percentages <- (simulated_terminations_intervention - simulated_terminations_base)
```
The simulation above shows that this results in a percentage-point change in terminations of about
`r simulated_percentages * 100`.
`{r} simulated_percentages * 100`.
```{r}
distdiff_by_group <- pddf_ib %>%
group_by(category_name, category) %>%
summarize(
mean = mean(value, na.rm=TRUE) *100
,"P(p>0)" = mean(value>0) *100
,"5%" = quantile( value, 0.05, na.rm = TRUE)*100
,"25%" = quantile( value, 0.25, na.rm = TRUE)*100
,"median" = quantile( value, 0.5, na.rm = TRUE)*100
,"75%" = quantile( value, 0.75, na.rm = TRUE)*100
,"95%" = quantile( value, 0.95, na.rm = TRUE)*100
) %>% arrange(category) %>% select(!category)
distdif_by_group_latex<- xtable(distdiff_by_group,
digits = rep(2, ncol(distdiff_by_group) +1),
floating = FALSE,
align = "llccccccc",
latex.environments = "tabularx",
booktabs = TRUE
)
# Write to file
write_lines(
print(distdif_by_group_latex, include.rownames = FALSE),
paste0(image_root,"/distdiff_anr_vs_rec_by_group.tex")
)
print(distdif_by_group_latex)
distdiff_by_group
```
## fixed effects distributions
@ -1090,7 +1141,7 @@ mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
subtitle = "Moving from 'Active, not recruiting' to 'Recruiting'"
) +
geom_vline(xintercept=seq(-0.25,0.5,0.25),color="grey",alpha=0.750)
ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png"))
ggsave(paste0(image_parameters_across_groups,"/fixed_effects_anr_vs_rec_by_group.png"), create.dir = TRUE)
fixed_effects_table <- pivot_longer(filtdata, everything()) |>
group_by(name) |>
@ -1118,7 +1169,7 @@ names(fixed_effects_table)[1] <- "ICD-10 Category"
# Convert to LaTeX
table <- xtable(fixed_effects_table,
digits = rep(3, ncol(fixed_effects_table) + 1),
digits = rep(2, ncol(fixed_effects_table) + 1),
floating = FALSE,
align = "lccccccccccc",
latex.environments = "tabularx",
@ -1141,12 +1192,13 @@ write_lines(
# Diagnostics
```{r}
#| eval: true
#| label: diagnostics 1
#| eval: false
#trace plots
image_diagnostics <- paste0(image_root,"/diagnostics")
plot(fit, pars=c("mu"), plotfun="trace")
ggsave(paste0(image_diagnostics,"/trace_plot_mu.png"))
ggsave(paste0(image_diagnostics,"/trace_plot_mu.png"), create.dir = TRUE)
for (i in 1:3) {
@ -1165,14 +1217,15 @@ for (i in 1:3) {
)
mu_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/trace_rank_plot_mu_",mu_range,".png")
ggsave(filename)
ggsave(filename, create.dir = TRUE)
}
```
```{r}
#| eval: true
#| label: diagnostics 2
#| eval: false
plot(fit, pars=c("sigma"), plotfun="trace")
ggsave(paste0(image_diagnostics,"/traceplot_sigma.png"))
ggsave(paste0(image_diagnostics,"/traceplot_sigma.png"), create.dir = TRUE)
for (i in 1:3) {
print(
@ -1190,12 +1243,13 @@ for (i in 1:3) {
)
sigma_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/trace_rank_plot_sigma_",sigma_range,".png")
ggsave(filename)
ggsave(filename, create.dir = TRUE)
}
```
```{r}
#| eval: true
#| label: diagnostics 3
#| eval: false
#other diagnostics
logpost <- log_posterior(fit)
nuts_prmts <- nuts_params(fit)
@ -1204,15 +1258,17 @@ posterior <- as.array(fit)
```
```{r}
#| eval: true
#| label: diagnostics 4
#| eval: false
color_scheme_set("darkgray")
div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4)
mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05)
ggsave(paste0(image_diagnostics,"/parcoord_mu.png"))
ggsave(paste0(image_diagnostics,"/parcoord_mu.png"), create.dir = TRUE)
```
```{r}
#| eval: true
#| label: diagnostics 5
#| eval: false
for (i in 1:3) {
mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]"))
print(
@ -1228,7 +1284,7 @@ for (i in 1:3) {
)
mu_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/correlation_plot_mu_",mu_range,".png")
ggsave(filename)
ggsave(filename, create.dir = TRUE)
}
@ -1236,13 +1292,15 @@ for (i in 1:3) {
```
```{r}
#| eval: true
#| label: diagnostics 6
#| eval: false
mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05)
ggsave(paste0(image_diagnostics,"/parcoord_sigma.png"))
ggsave(paste0(image_diagnostics,"/parcoord_sigma.png"), create.dir = TRUE)
```
```{r}
#| eval: true
#| label: diagnostics 7
#| eval: false
for (i in 1:3) {
params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]"))
@ -1259,13 +1317,14 @@ for (i in 1:3) {
)
sigma_range <- paste0(4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/correlation_plot_sigma_",sigma_range,".png")
ggsave(filename)
ggsave(filename, create.dir = TRUE)
}
```
```{r}
#| eval: true
#| eval: false
#| label: diagnostics 8
for (k in 1:22) {
for (i in 1:3) {
params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]"))
@ -1283,7 +1342,7 @@ for (i in 1:3) {
beta_range <- paste0("k_",k,"_i_",4*i-3,"-",4*i)
filename <- paste0(image_diagnostics,"/correlation_plot_beta_",beta_range,".png")
ggsave(filename)
ggsave(filename, create.dir = TRUE)
}}
```

Loading…
Cancel
Save