recording current model

check_reversion
Will King 3 years ago
parent eabf858955
commit 0d0225ecdb

@ -117,19 +117,10 @@ x["l_sdi_val"] <- asinh(df$l_sdi_val)
#Setup fixed effects
x["status_NYR"] <- ifelse(df["current_status"]=="Not yet recruiting",1,0)
x["status_EBI"] <- ifelse(df["current_status"]=="Enrolling by invitation",1,0)
#x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0) # Base case is Recruiting
x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0)
x["status_ANR"] <- ifelse(df["current_status"]=="Active, not recruiting",1,0)
#interaction terms for competitors
#x["ib*elapsed"] <- x["elapsed_duration"]*x["identical_brands"]
#x["bnc*elapsed"] <- x["elapsed_duration"] * x["brand_name_counts"]
#interaction terms for status effects
#x["sNYR*elapsed"] <- x["elapsed_duration"]*x["status_NYR"]
#x["sEBI*elapsed"] <- x["elapsed_duration"]*x["status_EBI"]
#x["sANR*elapsed"] <- x["elapsed_duration"]*x["status_ANR"]
y <- ifelse(df["final_status"]=="Terminated",1,0)
#get category list
@ -166,6 +157,7 @@ inherited_cols <- c(
,"l_sdi_val"
,"status_NYR"
,"status_EBI"
,"status_Rec"
,"status_ANR"
)
```
@ -212,15 +204,8 @@ beta_list <- list(
#Status
`9`="status_NYR",
`10`="status_EBI",
`11`="status_ANR",
#interactions for brands
`12`="ib*elapsed",
`13`="bnc*elapsed",
# interactions for status
`14`="sNYR*elapsed",
`15`="sEBI*elapsed",
`16`="sANR*elapsed"
`11`="status_Rec",
`12`="status_ANR"
)
)
@ -344,19 +329,66 @@ generated_ib <- gqs(
)
```
```{r}
df_ib_p <- data.frame(
p_prior=as.vector(extract(generated_ib, pars="p_prior")$p_prior)
,p_predicted = as.vector(extract(generated_ib, pars="p_predicted")$p_predicted)
)
df_ib_prior <- data.frame(
mu_prior = as.vector(extract(generated_ib, pars="mu_prior")$mu_prior)
,sigma_prior = as.vector(extract(generated_ib, pars="sigma_prior")$sigma_prior)
)
```{r}
hist(as.vector(extract(generated_ib, pars="p_prior")$p_prior))
hist(as.vector(extract(generated_ib, pars="mu_prior")$mu_prior), )
hist(as.vector(extract(generated_ib, pars="sigma_prior")$sigma_prior))
#p_prior
ggplot(df_ib_p, aes(x=p_prior)) +
geom_density() +
labs(
title="Implied Prior Distribution P"
,subtitle=""
,x="Probability Domain 'p'"
,y="Probability Density"
)
ggsave("./Images/DirectEffects/prior_p.png")
#p_posterior
ggplot(df_ib_p, aes(x=p_predicted)) +
geom_density() +
labs(
title="Implied Posterior Distribution P"
,subtitle=""
,x="Probability Domain 'p'"
,y="Probability Density"
)
ggsave("./Images/DirectEffects/posterior_p.png")
#mu_prior
ggplot(df_ib_prior) +
geom_density(aes(x=mu_prior)) +
labs(
title="Prior - Mu"
,subtitle="same prior for all Mu values"
,x="Mu"
,y="Probability"
)
ggsave("./Images/DirectEffects/prior_mu.png")
#sigma_posterior
ggplot(df_ib_prior) +
geom_density(aes(x=sigma_prior)) +
labs(
title="Prior - Sigma"
,subtitle="same prior for all Sigma values"
,x="Sigma"
,y="Probability"
)
ggsave("./Images/DirectEffects/prior_sigma.png")
```
```{r}
check_hmc_diagnostics(fit)
hist(as.vector(extract(generated_ib, pars="p_predicted")$p_predicted))
```
@ -365,13 +397,44 @@ hist(as.vector(extract(generated_ib, pars="p_predicted")$p_predicted))
### Intervention: Adding a single competitor
```{r}
#TODO: Convert to ggplot, stabilize y axis
hist(as.vector(extract(generated_ib, pars="p_predicted_default")$p_predicted_default))
hist(as.vector(extract(generated_ib, pars="p_predicted_intervention")$p_predicted_intervention))
hist(as.vector(extract(generated_ib, pars="predicted_difference")$predicted_difference))
```
counterfact_predicted_ib <- data.frame(
p_predicted_default = as.vector(extract(generated_ib, pars="p_predicted_default")$p_predicted_default)
,p_predicted_intervention = as.vector(extract(generated_ib, pars="p_predicted_intervention")$p_predicted_intervention)
,predicted_difference = as.vector(extract(generated_ib, pars="predicted_difference")$predicted_difference)
)
ggplot(counterfact_predicted_ib, aes(x=p_predicted_default)) +
geom_density() +
labs(
title="Predicted Distribution of 'p'"
,subtitle="Intervention: None"
,x="Probability Domain 'p'"
,y="Probability Density"
)
ggsave("./Images/DirectEffects/default_p_generic_intervention_base.png")
ggplot(counterfact_predicted_ib, aes(x=p_predicted_intervention)) +
geom_density() +
labs(
title="Predicted Distribution of 'p'"
,subtitle="Intervention: Add a single generic competitor"
,x="Probability Domain 'p'"
,y="Probability Density"
)
ggsave("./Images/DirectEffects/default_p_generic_intervention_interv.png")
ggplot(counterfact_predicted_ib, aes(x=predicted_difference)) +
geom_density() +
labs(
title="Predicted Distribution of differences 'p'"
,subtitle="Intervention: Add a single generic competitor"
,x="Difference in 'p' under treatment"
,y="Probability Density"
)
ggsave("./Images/DirectEffects/default_p_generic_intervention_distdiff.png")
```
```{r}
@ -383,20 +446,37 @@ pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predict
pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) df$category_id[i])
pddf_ib["category_name"] <- sapply(pddf_ib$category, function(i) beta_list$groups[i])
```
```{r}
ggplot(pddf_ib, aes(x=value,)) +
geom_histogram(bins=100) +
geom_density(bins=100) +
labs(
title = "Distribution of predicted differences"
,subtitle = "Intervention: add a single generic competitor"
,x = "Difference in probability due to intervention"
,y = "Predicted counts"
,y = "Probability Density"
) +
#xlim(-0.3,0.1) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
ggsave("./Images/DirectEffects/p_generic_intervention_distdiff_styled.png")
ggplot(pddf_ib, aes(x=value,)) +
geom_density(bins=100) +
facet_wrap(
~factor(
category_name,
levels=beta_list$groups
)
, labeller = label_wrap_gen(multi_line = TRUE)
, ncol=5) +
labs(
title = "Distribution of predicted differences | By Group"
,subtitle = "Intervention: add a single generic competitor"
,x = "Difference in probability due to intervention"
,y = "Probability Density"
) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8))
ggsave("./Images/DirectEffects/p_generic_intervention_distdiff_by_group.png")
ggplot(pddf_ib, aes(x=value,)) +
geom_histogram(bins=100) +
@ -408,15 +488,15 @@ ggplot(pddf_ib, aes(x=value,)) +
, labeller = label_wrap_gen(multi_line = TRUE)
, ncol=5) +
labs(
title = "Distribution of predicted differences",
subtitle = "By group"
title = "Histogram of predicted differences | By Group"
,subtitle = "Intervention: add a single generic competitor"
,x = "Difference in probability due to intervention"
,y = "Predicted counts"
) +
#xlim(-0.25,0.1) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8))
ggsave("./Images/DirectEffects/p_generic_intervention_histdiff_by_group.png")
```
@ -458,11 +538,44 @@ generated_bnc <- gqs(
)
```
```{r}
#TODO: Convert to ggplot, stabilize y axis
hist(as.vector(extract(generated_bnc, pars="p_predicted_default")$p_predicted_default), bins=100)
hist(as.vector(extract(generated_bnc, pars="p_predicted_intervention")$p_predicted_intervention), bins=100)
hist(as.vector(extract(generated_bnc, pars="predicted_difference")$predicted_difference), bins=100)
counterfact_predicted_bnc <- data.frame(
p_predicted_default = as.vector(extract(generated_bnc, pars="p_predicted_default")$p_predicted_default)
,p_predicted_intervention = as.vector(extract(generated_bnc, pars="p_predicted_intervention")$p_predicted_intervention)
,predicted_difference = as.vector(extract(generated_bnc, pars="predicted_difference")$predicted_difference)
)
ggplot(counterfact_predicted_bnc, aes(x=p_predicted_default)) +
geom_density() +
labs(
title="Predicted Distribution of 'p'"
,subtitle="Intervention: None"
,x="Probability Domain 'p'"
,y="Probability Density"
)
ggsave("./Images/DirectEffects/default_p_uspdc_intervention_base.png")
ggplot(counterfact_predicted_bnc, aes(x=p_predicted_intervention)) +
geom_density() +
labs(
title="Predicted Distribution of 'p'"
,subtitle="Intervention: Add a single USP DC competitor"
,x="Probability Domain 'p'"
,y="Probability Density"
)
ggsave("./Images/DirectEffects/default_p_uspdc_intervention_interv.png")
ggplot(counterfact_predicted_bnc, aes(x=predicted_difference)) +
geom_density() +
labs(
title="Predicted Distribution of differences 'p'"
,subtitle="Intervention: Add a single USP DC competitor"
,x="Difference in 'p' under treatment"
,y="Probability Density"
)
ggsave("./Images/DirectEffects/default_p_uspdc_intervention_distdiff.png")
```
@ -475,20 +588,37 @@ pddf_bnc["entry_idx"] <- as.numeric(gsub("\\D","",pddf_bnc$name))
pddf_bnc["category"] <- sapply(pddf_bnc$entry_idx, function(i) df$category_id[i])
pddf_bnc["category_name"] <- sapply(pddf_bnc$category, function(i) beta_list$groups[i])
```
```{r}
ggplot(pddf_bnc, aes(x=value,)) +
geom_histogram(bins=100) +
geom_density(bins=100) +
labs(
title = "Distribution of predicted differences"
,subtitle = "Intervention: add a single USP DC competitor"
,x = "Difference in probability due to intervention"
,y = "Predicted counts"
,y = "Probability Density"
) +
#xlim(-0.3,0.1) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
ggsave("./Images/DirectEffects/p_uspdc_intervention_distdiff_styled.png")
ggplot(pddf_bnc, aes(x=value,)) +
geom_density(bins=100) +
facet_wrap(
~factor(
category_name,
levels=beta_list$groups
)
, labeller = label_wrap_gen(multi_line = TRUE)
, ncol=5) +
labs(
title = "Distribution of predicted differences in 'p' | By Group"
,subtitle = "Intervention: add a single USP DC competitor"
,x = "Difference in probability due to intervention"
,y = "Probability Density"
) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8))
ggsave("./Images/DirectEffects/p_uspdc_intervention_distdiff_by_group.png")
ggplot(pddf_bnc, aes(x=value,)) +
geom_histogram(bins=100) +
@ -500,17 +630,18 @@ ggplot(pddf_bnc, aes(x=value,)) +
, labeller = label_wrap_gen(multi_line = TRUE)
, ncol=5) +
labs(
title = "Distribution of predicted differences",
subtitle = "By group"
title = "Histogram of predicted differences in 'p' | By Group"
,subtitle = "Intervention: add a single USP DC competitor"
,x = "Difference in probability due to intervention"
,y = "Predicted counts"
) +
#xlim(-0.25,0.1) +
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
theme(strip.text.x = element_text(size = 8))
ggsave("./Images/DirectEffects/p_uspdc_intervention_histdiff_by_group.png")
```
TODO: add density plot of (x,y,z) (date,value,counts)
- with and without faceting
@ -539,6 +670,7 @@ ggplot(data=df3, aes(x=count, fill=final_status)) +
geom_histogram(binwidth=1) +
ggtitle("Histogram of snapshots per trial (matched trials)") +
xlab("Snapshots per trial")
ggsave("./Images/HistSnapshots.png")
#Plot duration for terminated vs completed
df4 <- dbGetQuery(
@ -561,6 +693,7 @@ ggplot(data=df4, aes(x=duration,fill=overall_status)) +
ggtitle("Histogram of trial durations") +
xlab("duration")+
facet_wrap(~overall_status)
ggsave("./Images/HistTrialDurations_Faceted.png")
df5 <- dbGetQuery(
con,
@ -585,23 +718,29 @@ df5 <- dbGetQuery(
;"
)
df5$overall_status <- as.factor(df5$overall_status)
#df5 <- fetch(query5, n = -1)
ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) +
geom_jitter() +
ggtitle("duration, status, and snapshot_count") +
ggtitle("Comparison of duration, status, and snapshot_count") +
xlab("duration") +
ylab("snapshot count")
ggsave("./Images/SnapshotsVsDurationVsTermination.png")
dbDisconnect(con)
#get number of trials and snapshots in each category
group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, max))
group_trials_by_category <- as.data.frame(group_trials_by_category)
ggplot(data = group_trials_by_category, aes(x=category_id)) +
geom_histogram(binwidth=1,color="black",fill="seagreen") +
scale_x_continuous(breaks=scales::pretty_breaks(n=22))
geom_bar(binwidth=1,color="black",fill="seagreen") +
scale_x_continuous(breaks=scales::pretty_breaks(n=22)) +
labs(
title="bar chart of trial categories"
,x="Category ID"
,y="Count"
)
ggsave("./Images/CategoryCounts.png")
@ -616,6 +755,7 @@ summary(df5)
# Diagnostics
```{r}
#| eval: false
#trace plots
plot(fit, pars=c("mu"), plotfun="trace")
@ -638,6 +778,7 @@ for (i in 1:4) {
```
```{r}
#| eval: false
plot(fit, pars=c("sigma"), plotfun="trace")
for (i in 1:4) {
@ -658,6 +799,7 @@ for (i in 1:4) {
```
```{r}
#| eval: false
#other diagnostics
logpost <- log_posterior(fit)
nuts_prmts <- nuts_params(fit)
@ -666,12 +808,14 @@ posterior <- as.array(fit)
```
```{r}
#| eval: false
color_scheme_set("darkgray")
div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4)
mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05)
```
```{r}
#| eval: false
for (i in 1:4) {
mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]"))
print(
@ -692,10 +836,12 @@ for (i in 1:4) {
```
```{r}
#| eval: false
mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05)
```
```{r}
#| eval: false
for (i in 1:4) {
params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]"))
@ -715,6 +861,7 @@ for (i in 1:4) {
```{r}
#| eval: false
for (k in 1:22) {
for (i in 1:4) {
params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]"))
@ -750,9 +897,6 @@ I wonder if a lot of the variance is due to the 2 values that are sitting out.
```{r}
#mcmc_intervals(fit, pars=get_parameters("beta",beta_list)$param_name)
```
### Investigating parameter distributions
@ -782,21 +926,24 @@ I wonder if a lot of the variance is due to the 2 values that are sitting out.
p1 <- parameter_mcmc_areas("beta",beta_list,fit,1)
ggsave("./Images/DirectEffects/Parameters/01_elapsed_duration.png")
p2 <- parameter_mcmc_areas("beta",beta_list,fit,2)
ggsave("./Images/DirectEffects/Parameters/02_generic.png")
p3 <- parameter_mcmc_areas("beta",beta_list,fit,3)
ggsave("./Images/DirectEffects/Parameters/03_uspdc.png")
#p4 <- parameter_mcmc_areas("beta",beta_list,fit,4)
#p5 <- parameter_mcmc_areas("beta",beta_list,fit,5)
#p6 <- parameter_mcmc_areas("beta",beta_list,fit,6)
#p7 <- parameter_mcmc_areas("beta",beta_list,fit,7)
#p8 <- parameter_mcmc_areas("beta",beta_list,fit,8)
p9 <- parameter_mcmc_areas("beta",beta_list,fit,9)
ggsave("./Images/DirectEffects/Parameters/09_NYR.png")
p10 <- parameter_mcmc_areas("beta",beta_list,fit,10)
ggsave("./Images/DirectEffects/Parameters/10_EBI.png")
p11 <- parameter_mcmc_areas("beta",beta_list,fit,11)
ggsave("./Images/DirectEffects/Parameters/11_Rec.png")
p12 <- parameter_mcmc_areas("beta",beta_list,fit,12)
p13 <- parameter_mcmc_areas("beta",beta_list,fit,13)
p14 <- parameter_mcmc_areas("beta",beta_list,fit,14)
p15 <- parameter_mcmc_areas("beta",beta_list,fit,15)
p16 <- parameter_mcmc_areas("beta",beta_list,fit,16)
ggsave("./Images/DirectEffects/Parameters/12_ANR.png")
```
Note these have 95% outer CI and 80% inner (shaded)
@ -812,73 +959,25 @@ Note these have 95% outer CI and 80% inner (shaded)
8) "asinh(Low SDI)",
9) "status_NYR",
10) "status_EBI",
11) "status_ANR",
12) "ib*elapsed",
13) "bnc*elapsed",
14) "sNYR*elapsed",
15) "sEBI*elapsed",
16) "sANR*elapsed"
of interest
- p1 + p2
- p3 + p2
- p2 + p12
- p3 + p13
- p9 + p14
- p10 + p15
- p11 + p16
```{r}
p1 + p2
```
```{r}
p2 + p3
```
```{r}
p2 + p12
```
```{r}
p3 + p13
```
```{r}
p9 + p14
```
11) "status_Rec",
12) "status_ANR",
```{r}
p10 + p15
```
```{r}
p11 + p16
```
# Posterior Prediction
```{r}
#TODO: Convert to ggplot, stabilize y axis
hist(as.vector(extract(generated, pars="p_predicted_default")$p_predicted_default))
hist(as.vector(extract(generated, pars="p_predicted_intervention")$p_predicted_intervention))
p2 + p3
ggsave("./Images/DirectEffects/Parameters/2+3_generic_and_uspdc.png")
```
## Distribution of Predicted Differences
### Intervention: Marginal increase in time to finish enrollment
```{r}
#| eval: false
pddf <- data.frame(extract(generated, pars="predicted_difference")$predicted_difference) |> pivot_longer(X1:X189)
pddf["entry_idx"] <- as.numeric(gsub("\\D","",pddf$name))

Loading…
Cancel
Save