|
|
|
@ -122,13 +122,13 @@ x["status_ANR"] <- ifelse(df["current_status"]=="Active, not recruiting",1,0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#interaction terms for competitors
|
|
|
|
#interaction terms for competitors
|
|
|
|
x["ib*elapsed"] <- x["elapsed_duration"]*x["identical_brands"]
|
|
|
|
#x["ib*elapsed"] <- x["elapsed_duration"]*x["identical_brands"]
|
|
|
|
x["bnc*elapsed"] <- x["elapsed_duration"] * x["brand_name_counts"]
|
|
|
|
#x["bnc*elapsed"] <- x["elapsed_duration"] * x["brand_name_counts"]
|
|
|
|
|
|
|
|
|
|
|
|
#interaction terms for status effects
|
|
|
|
#interaction terms for status effects
|
|
|
|
x["sNYR*elapsed"] <- x["elapsed_duration"]*x["status_NYR"]
|
|
|
|
#x["sNYR*elapsed"] <- x["elapsed_duration"]*x["status_NYR"]
|
|
|
|
x["sEBI*elapsed"] <- x["elapsed_duration"]*x["status_EBI"]
|
|
|
|
#x["sEBI*elapsed"] <- x["elapsed_duration"]*x["status_EBI"]
|
|
|
|
x["sANR*elapsed"] <- x["elapsed_duration"]*x["status_ANR"]
|
|
|
|
#x["sANR*elapsed"] <- x["elapsed_duration"]*x["status_ANR"]
|
|
|
|
|
|
|
|
|
|
|
|
y <- ifelse(df["final_status"]=="Terminated",1,0)
|
|
|
|
y <- ifelse(df["final_status"]=="Terminated",1,0)
|
|
|
|
|
|
|
|
|
|
|
|
@ -147,100 +147,372 @@ y <- train$y
|
|
|
|
```
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Fit Model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
```{r}
|
|
|
|
# get data for counterfactuals
|
|
|
|
################################# FIT MODEL #########################################
|
|
|
|
get_counterfactuals <- function(driver) {
|
|
|
|
inherited_cols <- c(
|
|
|
|
con <- dbConnect(
|
|
|
|
"elapsed_duration"
|
|
|
|
driver,
|
|
|
|
#,"identical_brands"
|
|
|
|
user='root',
|
|
|
|
#,"brand_name_counts"
|
|
|
|
password='root',
|
|
|
|
,"h_sdi_val"
|
|
|
|
dbname='aact_db',
|
|
|
|
,"hm_sdi_val"
|
|
|
|
host='will-office'
|
|
|
|
,"m_sdi_val"
|
|
|
|
|
|
|
|
,"lm_sdi_val"
|
|
|
|
|
|
|
|
,"l_sdi_val"
|
|
|
|
|
|
|
|
,"status_NYR"
|
|
|
|
|
|
|
|
,"status_EBI"
|
|
|
|
|
|
|
|
,"status_ANR"
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
beta_list <- list(
|
|
|
|
|
|
|
|
groups = c(
|
|
|
|
|
|
|
|
`1`="Infections & Parasites",
|
|
|
|
|
|
|
|
`2`="Neoplasms",
|
|
|
|
|
|
|
|
`3`="Blood & Immune system",
|
|
|
|
|
|
|
|
`4`="Endocrine, Nutritional, and Metabolic",
|
|
|
|
|
|
|
|
`5`="Mental & Behavioral",
|
|
|
|
|
|
|
|
`6`="Nervous System",
|
|
|
|
|
|
|
|
`7`="Eye and Adnexa",
|
|
|
|
|
|
|
|
`8`="Ear and Mastoid",
|
|
|
|
|
|
|
|
`9`="Circulatory",
|
|
|
|
|
|
|
|
`10`="Respiratory",
|
|
|
|
|
|
|
|
`11`="Digestive",
|
|
|
|
|
|
|
|
`12`="Skin & Subcutaneaous tissue",
|
|
|
|
|
|
|
|
`13`="Musculoskeletal",
|
|
|
|
|
|
|
|
`14`="Genitourinary",
|
|
|
|
|
|
|
|
`15`="Pregancy, Childbirth, & Puerperium",
|
|
|
|
|
|
|
|
`16`="Perinatal Period",
|
|
|
|
|
|
|
|
`17`="Congential",
|
|
|
|
|
|
|
|
`18`="Symptoms, Signs etc.",
|
|
|
|
|
|
|
|
`19`="Injury etc.",
|
|
|
|
|
|
|
|
`20`="External Causes",
|
|
|
|
|
|
|
|
`21`="Contact with Healthcare",
|
|
|
|
|
|
|
|
`22`="Special Purposes"
|
|
|
|
|
|
|
|
),
|
|
|
|
|
|
|
|
parameters = c(
|
|
|
|
|
|
|
|
`1`="Elapsed Duration",
|
|
|
|
|
|
|
|
# brands
|
|
|
|
|
|
|
|
`2`="asinh(Generic Brands)",
|
|
|
|
|
|
|
|
`3`="asinh(Competitors USPDC)",
|
|
|
|
|
|
|
|
# population
|
|
|
|
|
|
|
|
`4`="asinh(High SDI)",
|
|
|
|
|
|
|
|
`5`="asinh(High-Medium SDI)",
|
|
|
|
|
|
|
|
`6`="asinh(Medium SDI)",
|
|
|
|
|
|
|
|
`7`="asinh(Low-Medium SDI)",
|
|
|
|
|
|
|
|
`8`="asinh(Low SDI)",
|
|
|
|
|
|
|
|
#Status
|
|
|
|
|
|
|
|
`9`="status_NYR",
|
|
|
|
|
|
|
|
`10`="status_EBI",
|
|
|
|
|
|
|
|
`11`="status_ANR",
|
|
|
|
|
|
|
|
#interactions for brands
|
|
|
|
|
|
|
|
`12`="ib*elapsed",
|
|
|
|
|
|
|
|
`13`="bnc*elapsed",
|
|
|
|
|
|
|
|
# interactions for status
|
|
|
|
|
|
|
|
`14`="sNYR*elapsed",
|
|
|
|
|
|
|
|
`15`="sEBI*elapsed",
|
|
|
|
|
|
|
|
`16`="sANR*elapsed"
|
|
|
|
|
|
|
|
|
|
|
|
)
|
|
|
|
)
|
|
|
|
on.exit(dbDisconnect(con))
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
get_parameters <- function(stem,class_list) {
|
|
|
|
|
|
|
|
#get categories and lengths
|
|
|
|
|
|
|
|
named <- names(class_list)
|
|
|
|
|
|
|
|
lengths <- sapply(named, (function (x) length(class_list[[x]])))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#describe the grid needed
|
|
|
|
|
|
|
|
iter_list <- sapply(named, (function (x) 1:lengths[x]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#generate the list of parameters
|
|
|
|
|
|
|
|
pardf <- generate_parameter_df(stem, iter_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#add columns with appropriate human-readable names
|
|
|
|
|
|
|
|
for (name in named) {
|
|
|
|
|
|
|
|
pardf[paste(name,"_hr",sep="")] <- as.factor(
|
|
|
|
|
|
|
|
sapply(pardf[name], (function (i) class_list[[name]][i]))
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return(pardf)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_parameter_df <- function(stem, iter_list) {
|
|
|
|
|
|
|
|
grid <- expand.grid(iter_list)
|
|
|
|
|
|
|
|
grid["param_name"] <- grid %>% unite(x,colnames(grid),sep=",")
|
|
|
|
|
|
|
|
grid["param_name"] <- paste(stem,"[",grid$param_name,"]",sep="")
|
|
|
|
|
|
|
|
return(grid)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
group_mcmc_areas <- function(
|
|
|
|
|
|
|
|
stem,# = "beta"
|
|
|
|
|
|
|
|
class_list,# = beta_list
|
|
|
|
|
|
|
|
stanfit,# = fit
|
|
|
|
|
|
|
|
group_id,# = 2
|
|
|
|
|
|
|
|
rename=TRUE
|
|
|
|
|
|
|
|
) {
|
|
|
|
|
|
|
|
#get all parameter names
|
|
|
|
|
|
|
|
params <- get_parameters(stem,class_list)
|
|
|
|
|
|
|
|
#filter down to parameters of interest
|
|
|
|
|
|
|
|
params <- filter(params,groups == group_id)
|
|
|
|
|
|
|
|
#Get dataframe with only the rows of interest
|
|
|
|
|
|
|
|
filtdata <- as.data.frame(stanfit)[params$param_name]
|
|
|
|
|
|
|
|
#rename columns
|
|
|
|
|
|
|
|
if (rename) dimnames(filtdata)[[2]] <- params$parameters_hr
|
|
|
|
|
|
|
|
#get group name for title
|
|
|
|
|
|
|
|
group_name <- class_list$groups[group_id]
|
|
|
|
|
|
|
|
#create area plot with appropriate title
|
|
|
|
|
|
|
|
mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
|
|
|
|
|
|
|
|
ggtitle(paste("Parameter distributions for ICD-10 class:",group_name))
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
parameter_mcmc_areas <- function(
|
|
|
|
|
|
|
|
stem,# = "beta"
|
|
|
|
|
|
|
|
class_list,# = beta_list
|
|
|
|
|
|
|
|
stanfit,# = fit
|
|
|
|
|
|
|
|
parameter_id,# = 2
|
|
|
|
|
|
|
|
rename=TRUE
|
|
|
|
|
|
|
|
) {
|
|
|
|
|
|
|
|
#get all parameter names
|
|
|
|
|
|
|
|
params <- get_parameters(stem,class_list)
|
|
|
|
|
|
|
|
#filter down to parameters of interest
|
|
|
|
|
|
|
|
params <- filter(params,parameters == parameter_id)
|
|
|
|
|
|
|
|
#Get dataframe with only the rows of interest
|
|
|
|
|
|
|
|
filtdata <- as.data.frame(stanfit)[params$param_name]
|
|
|
|
|
|
|
|
#rename columns
|
|
|
|
|
|
|
|
if (rename) dimnames(filtdata)[[2]] <- params$groups_hr
|
|
|
|
|
|
|
|
#get group name for title
|
|
|
|
|
|
|
|
parameter_name <- class_list$parameters[parameter_id]
|
|
|
|
|
|
|
|
#create area plot with appropriate title
|
|
|
|
|
|
|
|
mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
|
|
|
|
|
|
|
|
ggtitle(parameter_name,"Parameter Distribution")
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
#generics intervention
|
|
|
|
|
|
|
|
brand_intervention_ib <- x[c(inherited_cols,"brand_name_counts")]
|
|
|
|
|
|
|
|
brand_intervention_ib["identical_brands"] <- asinh(sinh(x$identical_brands)+1) #add a single generic brand
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
counterfact_marketing_ib <- list(
|
|
|
|
|
|
|
|
D = ncol(x),#
|
|
|
|
|
|
|
|
N = nrow(x),
|
|
|
|
|
|
|
|
L = n_categories$count,
|
|
|
|
|
|
|
|
y = as.vector(y),
|
|
|
|
|
|
|
|
ll = as.vector(categories),
|
|
|
|
|
|
|
|
x = as.matrix(x),
|
|
|
|
|
|
|
|
mu_mean = 0,
|
|
|
|
|
|
|
|
mu_stdev = 0.05,
|
|
|
|
|
|
|
|
sigma_shape = 4,
|
|
|
|
|
|
|
|
sigma_rate = 20,
|
|
|
|
|
|
|
|
Nx = nrow(x),
|
|
|
|
|
|
|
|
llx = as.vector(categories),
|
|
|
|
|
|
|
|
counterfact_x_tilde = as.matrix(brand_intervention_ib),
|
|
|
|
|
|
|
|
counterfact_x = as.matrix(x)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
fit <- stan(
|
|
|
|
|
|
|
|
file='Hierarchal_Logistic.stan',
|
|
|
|
|
|
|
|
data = counterfact_marketing_ib,
|
|
|
|
|
|
|
|
chains = 4,
|
|
|
|
|
|
|
|
iter = 5000,
|
|
|
|
|
|
|
|
seed = 11021585
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
generated_ib <- gqs(
|
|
|
|
|
|
|
|
fit@stanmodel,
|
|
|
|
|
|
|
|
data=counterfact_marketing_ib,
|
|
|
|
|
|
|
|
draws=as.matrix(fit),
|
|
|
|
|
|
|
|
seed=11021585
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_ib, pars="p_prior")$p_prior))
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_ib, pars="mu_prior")$mu_prior), )
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_ib, pars="sigma_prior")$sigma_prior))
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
check_hmc_diagnostics(fit)
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_ib, pars="p_predicted")$p_predicted))
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### Intervention: Adding a single competitor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
#TODO: Convert to ggplot, stabilize y axis
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_ib, pars="p_predicted_default")$p_predicted_default))
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_ib, pars="p_predicted_intervention")$p_predicted_intervention))
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_ib, pars="predicted_difference")$predicted_difference))
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |>
|
|
|
|
|
|
|
|
pivot_longer(X1:X1343)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#TODO: Fix Category names
|
|
|
|
|
|
|
|
pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
|
|
|
|
|
|
|
|
pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) df$category_id[i])
|
|
|
|
|
|
|
|
pddf_ib["category_name"] <- sapply(pddf_ib$category, function(i) beta_list$groups[i])
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ggplot(pddf_ib, aes(x=value,)) +
|
|
|
|
|
|
|
|
geom_histogram(bins=100) +
|
|
|
|
|
|
|
|
labs(
|
|
|
|
|
|
|
|
title = "Distribution of predicted differences"
|
|
|
|
|
|
|
|
,x = "Difference in probability due to intervention"
|
|
|
|
|
|
|
|
,y = "Predicted counts"
|
|
|
|
|
|
|
|
) +
|
|
|
|
|
|
|
|
#xlim(-0.3,0.1) +
|
|
|
|
|
|
|
|
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ggplot(pddf_ib, aes(x=value,)) +
|
|
|
|
|
|
|
|
geom_histogram(bins=100) +
|
|
|
|
|
|
|
|
facet_wrap(
|
|
|
|
|
|
|
|
~factor(
|
|
|
|
|
|
|
|
category_name,
|
|
|
|
|
|
|
|
levels=beta_list$groups
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
, labeller = label_wrap_gen(multi_line = TRUE)
|
|
|
|
|
|
|
|
, ncol=5) +
|
|
|
|
|
|
|
|
labs(
|
|
|
|
|
|
|
|
title = "Distribution of predicted differences",
|
|
|
|
|
|
|
|
subtitle = "By group"
|
|
|
|
|
|
|
|
,x = "Difference in probability due to intervention"
|
|
|
|
|
|
|
|
,y = "Predicted counts"
|
|
|
|
|
|
|
|
) +
|
|
|
|
|
|
|
|
#xlim(-0.25,0.1) +
|
|
|
|
|
|
|
|
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
|
|
|
|
|
|
|
|
theme(strip.text.x = element_text(size = 8))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#### USP DC
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
#formulary intervention
|
|
|
|
|
|
|
|
brand_intervention_bnc <- x[c(inherited_cols,"identical_brands")]
|
|
|
|
|
|
|
|
brand_intervention_bnc["brand_name_counts"] <- asinh(sinh(x$brand_name_counts)+1) #add a single formulary competitor brand
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
counterfact_marketing_bnc <- list(
|
|
|
|
|
|
|
|
D = ncol(x),#
|
|
|
|
|
|
|
|
N = nrow(x),
|
|
|
|
|
|
|
|
L = n_categories$count,
|
|
|
|
|
|
|
|
y = as.vector(y),
|
|
|
|
|
|
|
|
ll = as.vector(categories),
|
|
|
|
|
|
|
|
x = as.matrix(x),
|
|
|
|
|
|
|
|
mu_mean = 0,
|
|
|
|
|
|
|
|
mu_stdev = 0.05,
|
|
|
|
|
|
|
|
sigma_shape = 4,
|
|
|
|
|
|
|
|
sigma_rate = 20,
|
|
|
|
|
|
|
|
Nx = nrow(x),
|
|
|
|
|
|
|
|
llx = as.vector(categories),
|
|
|
|
|
|
|
|
counterfact_x_tilde = as.matrix(brand_intervention_bnc),
|
|
|
|
|
|
|
|
counterfact_x = as.matrix(x)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
generated_bnc <- gqs(
|
|
|
|
|
|
|
|
fit@stanmodel,
|
|
|
|
|
|
|
|
data=counterfact_marketing_bnc,
|
|
|
|
|
|
|
|
draws=as.matrix(fit),
|
|
|
|
|
|
|
|
seed=11021585
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
#TODO: Convert to ggplot, stabilize y axis
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_bnc, pars="p_predicted_default")$p_predicted_default), bins=100)
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_bnc, pars="p_predicted_intervention")$p_predicted_intervention), bins=100)
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_bnc, pars="predicted_difference")$predicted_difference), bins=100)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
pddf_bnc <- data.frame(extract(generated_bnc, pars="predicted_difference")$predicted_difference) |>
|
|
|
|
|
|
|
|
pivot_longer(X1:X1343)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#Add Category names
|
|
|
|
|
|
|
|
pddf_bnc["entry_idx"] <- as.numeric(gsub("\\D","",pddf_bnc$name))
|
|
|
|
|
|
|
|
pddf_bnc["category"] <- sapply(pddf_bnc$entry_idx, function(i) df$category_id[i])
|
|
|
|
|
|
|
|
pddf_bnc["category_name"] <- sapply(pddf_bnc$category, function(i) beta_list$groups[i])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
query <- dbSendQuery(
|
|
|
|
|
|
|
|
con,
|
|
|
|
```{r}
|
|
|
|
"
|
|
|
|
|
|
|
|
with cte as (
|
|
|
|
ggplot(pddf_bnc, aes(x=value,)) +
|
|
|
|
select
|
|
|
|
geom_histogram(bins=100) +
|
|
|
|
nct_id
|
|
|
|
labs(
|
|
|
|
,lag(current_status, 1) over (partition by nct_id order by earliest_date_observed) as previous_status
|
|
|
|
title = "Distribution of predicted differences"
|
|
|
|
,current_status
|
|
|
|
,x = "Difference in probability due to intervention"
|
|
|
|
,earliest_date_observed as date_current
|
|
|
|
,y = "Predicted counts"
|
|
|
|
from formatted_data_mat fdm
|
|
|
|
) +
|
|
|
|
), cte2 as (
|
|
|
|
#xlim(-0.3,0.1) +
|
|
|
|
select
|
|
|
|
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
|
|
|
|
nct_id
|
|
|
|
|
|
|
|
,previous_status
|
|
|
|
ggplot(pddf_bnc, aes(x=value,)) +
|
|
|
|
,current_status
|
|
|
|
geom_histogram(bins=100) +
|
|
|
|
,max(date_current) as date_current_max
|
|
|
|
facet_wrap(
|
|
|
|
from cte
|
|
|
|
~factor(
|
|
|
|
where
|
|
|
|
category_name,
|
|
|
|
previous_status != current_status
|
|
|
|
levels=beta_list$groups
|
|
|
|
and
|
|
|
|
|
|
|
|
current_status = 'Active, not recruiting'
|
|
|
|
|
|
|
|
group by
|
|
|
|
|
|
|
|
nct_id
|
|
|
|
|
|
|
|
,previous_status
|
|
|
|
|
|
|
|
,current_status
|
|
|
|
|
|
|
|
,date_current
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
select
|
|
|
|
|
|
|
|
fdqpe.nct_id
|
|
|
|
|
|
|
|
--,fdqpe.start_date
|
|
|
|
|
|
|
|
--,fdqpe.current_enrollment
|
|
|
|
|
|
|
|
--,fdqpe.enrollment_category
|
|
|
|
|
|
|
|
,fdqpe.current_status
|
|
|
|
|
|
|
|
,fdqpe.earliest_date_observed
|
|
|
|
|
|
|
|
,fdqpe.elapsed_duration
|
|
|
|
|
|
|
|
,fdqpe.n_brands as identical_brands
|
|
|
|
|
|
|
|
,ntbtu.brand_name_count
|
|
|
|
|
|
|
|
,fdqpe.category_id
|
|
|
|
|
|
|
|
,fdqpe.final_status
|
|
|
|
|
|
|
|
,fdqpe.h_sdi_val
|
|
|
|
|
|
|
|
--,fdqpe.h_sdi_u95
|
|
|
|
|
|
|
|
--,fdqpe.h_sdi_l95
|
|
|
|
|
|
|
|
,fdqpe.hm_sdi_val
|
|
|
|
|
|
|
|
--,fdqpe.hm_sdi_u95
|
|
|
|
|
|
|
|
--,fdqpe.hm_sdi_l95
|
|
|
|
|
|
|
|
,fdqpe.m_sdi_val
|
|
|
|
|
|
|
|
--,fdqpe.m_sdi_u95
|
|
|
|
|
|
|
|
--,fdqpe.m_sdi_l95
|
|
|
|
|
|
|
|
,fdqpe.lm_sdi_val
|
|
|
|
|
|
|
|
--,fdqpe.lm_sdi_u95
|
|
|
|
|
|
|
|
--,fdqpe.lm_sdi_l95
|
|
|
|
|
|
|
|
,fdqpe.l_sdi_val
|
|
|
|
|
|
|
|
--,fdqpe.l_sdi_u95
|
|
|
|
|
|
|
|
--,fdqpe.l_sdi_l95
|
|
|
|
|
|
|
|
from formatted_data_with_planned_enrollment fdqpe
|
|
|
|
|
|
|
|
join \"Formularies\".nct_to_brands_through_uspdc ntbtu
|
|
|
|
|
|
|
|
on fdqpe.nct_id = ntbtu.nct_id
|
|
|
|
|
|
|
|
join cte2
|
|
|
|
|
|
|
|
on cte2.nct_id = fdqpe.nct_id
|
|
|
|
|
|
|
|
and cte2.date_current_max = fdqpe.earliest_date_observed
|
|
|
|
|
|
|
|
order by fdqpe.nct_id, fdqpe.earliest_date_observed
|
|
|
|
|
|
|
|
;
|
|
|
|
|
|
|
|
"
|
|
|
|
|
|
|
|
)
|
|
|
|
)
|
|
|
|
df <- fetch(query, n = -1)
|
|
|
|
, labeller = label_wrap_gen(multi_line = TRUE)
|
|
|
|
df <- na.omit(df)
|
|
|
|
, ncol=5) +
|
|
|
|
|
|
|
|
labs(
|
|
|
|
|
|
|
|
title = "Distribution of predicted differences",
|
|
|
|
|
|
|
|
subtitle = "By group"
|
|
|
|
|
|
|
|
,x = "Difference in probability due to intervention"
|
|
|
|
|
|
|
|
,y = "Predicted counts"
|
|
|
|
|
|
|
|
) +
|
|
|
|
|
|
|
|
#xlim(-0.25,0.1) +
|
|
|
|
|
|
|
|
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
|
|
|
|
|
|
|
|
theme(strip.text.x = element_text(size = 8))
|
|
|
|
|
|
|
|
|
|
|
|
return(df)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
counterfact_raw <- get_counterfactuals(driver)
|
|
|
|
|
|
|
|
#extract data
|
|
|
|
|
|
|
|
counterfact_list <- data_formatter(counterfact_raw)
|
|
|
|
|
|
|
|
counterfact_list$y <- NULL #remove the chance of accidentally training on the wrong data
|
|
|
|
|
|
|
|
counterfact_categories <- counterfact_raw$category_id
|
|
|
|
|
|
|
|
#setup the two counterfactuals
|
|
|
|
|
|
|
|
counterfact_x <- counterfact_list$x #no change
|
|
|
|
|
|
|
|
counterfact_x_tilde <- counterfact_list$x #to be changed
|
|
|
|
|
|
|
|
#make changes, set it to Recruiting #TODO: change this so it matches previous state.
|
|
|
|
|
|
|
|
counterfact_x_tilde$status_ANR <- 0
|
|
|
|
|
|
|
|
counterfact_x_tilde["sANR*elapsed"] <- 0
|
|
|
|
|
|
|
|
```
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TODO: add density plot of (x,y,z) (date,value,counts)
|
|
|
|
|
|
|
|
- with and without faceting
|
|
|
|
|
|
|
|
|
|
|
|
## Explore data
|
|
|
|
## Explore data
|
|
|
|
|
|
|
|
|
|
|
|
@ -338,70 +610,6 @@ summary(df5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Fit Model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
################################# FIT MODEL #########################################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#setup data (named list)
|
|
|
|
|
|
|
|
trials_data <- list(
|
|
|
|
|
|
|
|
D = ncol(x),#
|
|
|
|
|
|
|
|
N = nrow(x),
|
|
|
|
|
|
|
|
L = n_categories$count,
|
|
|
|
|
|
|
|
y = as.vector(y),
|
|
|
|
|
|
|
|
ll = as.vector(categories),
|
|
|
|
|
|
|
|
x = as.matrix(x),
|
|
|
|
|
|
|
|
mu_mean = 0,
|
|
|
|
|
|
|
|
mu_stdev = 0.05,
|
|
|
|
|
|
|
|
sigma_shape = 4,
|
|
|
|
|
|
|
|
sigma_rate = 20,
|
|
|
|
|
|
|
|
Nx = 189,
|
|
|
|
|
|
|
|
llx = as.vector(counterfact_categories),
|
|
|
|
|
|
|
|
counterfact_x_tilde = as.matrix(counterfact_x_tilde),
|
|
|
|
|
|
|
|
counterfact_x = as.matrix(counterfact_x)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fit <- stan(
|
|
|
|
|
|
|
|
file='Hierarchal_Logistic.stan',
|
|
|
|
|
|
|
|
data = trials_data,
|
|
|
|
|
|
|
|
chains = 4,
|
|
|
|
|
|
|
|
iter = 5000,
|
|
|
|
|
|
|
|
seed = 11021585
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
#pull out prior predictions
|
|
|
|
|
|
|
|
generated <- gqs(
|
|
|
|
|
|
|
|
fit@stanmodel,
|
|
|
|
|
|
|
|
data=trials_data,
|
|
|
|
|
|
|
|
draws=as.matrix(fit),
|
|
|
|
|
|
|
|
seed=11021585
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
# to implement distribution of differences:
|
|
|
|
|
|
|
|
# create two datasets with interventions
|
|
|
|
|
|
|
|
# simulate both with the same seed and draws
|
|
|
|
|
|
|
|
# figure out how to difference the posterior (and maybe look at prior) probabilities
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated, pars="p_prior")$p_prior))
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated, pars="mu_prior")$mu_prior), )
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated, pars="sigma_prior")$sigma_prior))
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
check_hmc_diagnostics(fit)
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated, pars="p_predicted")$p_predicted))
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -503,172 +711,45 @@ for (i in 1:4) {
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
```
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
for (k in 1:22) {
|
|
|
|
|
|
|
|
for (i in 1:4) {
|
|
|
|
|
|
|
|
params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]"))
|
|
|
|
|
|
|
|
print(
|
|
|
|
|
|
|
|
mcmc_pairs(
|
|
|
|
|
|
|
|
posterior,
|
|
|
|
|
|
|
|
np = nuts_prmts,
|
|
|
|
|
|
|
|
pars=c(
|
|
|
|
|
|
|
|
params,
|
|
|
|
|
|
|
|
"lp__"
|
|
|
|
|
|
|
|
),
|
|
|
|
|
|
|
|
off_diag_args = list(size = 0.75)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
}}
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
################################# ANALYZE #####################################
|
|
|
|
|
|
|
|
print(fit)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## Result Plots
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Note the regular large difference in variance.
|
|
|
|
|
|
|
|
I would guess those are the beta[1:22,2] values.
|
|
|
|
|
|
|
|
I wonder if a lot of the variance is due to the 2 values that are sitting out.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
beta_list <- list(
|
|
|
|
|
|
|
|
groups = c(
|
|
|
|
|
|
|
|
`1`="Infections & Parasites",
|
|
|
|
|
|
|
|
`2`="Neoplasms",
|
|
|
|
|
|
|
|
`3`="Blood & Immune system",
|
|
|
|
|
|
|
|
`4`="Endocrine, Nutritional, and Metabolic",
|
|
|
|
|
|
|
|
`5`="Mental & Behavioral",
|
|
|
|
|
|
|
|
`6`="Nervous System",
|
|
|
|
|
|
|
|
`7`="Eye and Adnexa",
|
|
|
|
|
|
|
|
`8`="Ear and Mastoid",
|
|
|
|
|
|
|
|
`9`="Circulatory",
|
|
|
|
|
|
|
|
`10`="Respiratory",
|
|
|
|
|
|
|
|
`11`="Digestive",
|
|
|
|
|
|
|
|
`12`="Skin & Subcutaneaous tissue",
|
|
|
|
|
|
|
|
`13`="Musculoskeletal",
|
|
|
|
|
|
|
|
`14`="Genitourinary",
|
|
|
|
|
|
|
|
`15`="Pregancy, Childbirth, & Puerperium",
|
|
|
|
|
|
|
|
`16`="Perinatal Period",
|
|
|
|
|
|
|
|
`17`="Congential",
|
|
|
|
|
|
|
|
`18`="Symptoms, Signs etc.",
|
|
|
|
|
|
|
|
`19`="Injury etc.",
|
|
|
|
|
|
|
|
`20`="External Causes",
|
|
|
|
|
|
|
|
`21`="Contact with Healthcare",
|
|
|
|
|
|
|
|
`22`="Special Purposes"
|
|
|
|
|
|
|
|
),
|
|
|
|
|
|
|
|
parameters = c(
|
|
|
|
|
|
|
|
`1`="Elapsed Duration",
|
|
|
|
|
|
|
|
# brands
|
|
|
|
|
|
|
|
`2`="asinh(Generic Brands)",
|
|
|
|
|
|
|
|
`3`="asinh(Competitors USPDC)",
|
|
|
|
|
|
|
|
# population
|
|
|
|
|
|
|
|
`4`="asinh(High SDI)",
|
|
|
|
|
|
|
|
`5`="asinh(High-Medium SDI)",
|
|
|
|
|
|
|
|
`6`="asinh(Medium SDI)",
|
|
|
|
|
|
|
|
`7`="asinh(Low-Medium SDI)",
|
|
|
|
|
|
|
|
`8`="asinh(Low SDI)",
|
|
|
|
|
|
|
|
#Status
|
|
|
|
|
|
|
|
`9`="status_NYR",
|
|
|
|
|
|
|
|
`10`="status_EBI",
|
|
|
|
|
|
|
|
`11`="status_ANR",
|
|
|
|
|
|
|
|
#interactions for brands
|
|
|
|
|
|
|
|
`12`="ib*elapsed",
|
|
|
|
|
|
|
|
`13`="bnc*elapsed",
|
|
|
|
|
|
|
|
# interactions for status
|
|
|
|
|
|
|
|
`14`="sNYR*elapsed",
|
|
|
|
|
|
|
|
`15`="sEBI*elapsed",
|
|
|
|
|
|
|
|
`16`="sANR*elapsed"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
get_parameters <- function(stem,class_list) {
|
|
|
|
|
|
|
|
#get categories and lengths
|
|
|
|
|
|
|
|
named <- names(class_list)
|
|
|
|
|
|
|
|
lengths <- sapply(named, (function (x) length(class_list[[x]])))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#describe the grid needed
|
|
|
|
|
|
|
|
iter_list <- sapply(named, (function (x) 1:lengths[x]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#generate the list of parameters
|
|
|
|
|
|
|
|
pardf <- generate_parameter_df(stem, iter_list)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#add columns with appropriate human-readable names
|
|
|
|
|
|
|
|
for (name in named) {
|
|
|
|
|
|
|
|
pardf[paste(name,"_hr",sep="")] <- as.factor(
|
|
|
|
|
|
|
|
sapply(pardf[name], (function (i) class_list[[name]][i]))
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return(pardf)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_parameter_df <- function(stem, iter_list) {
|
|
|
|
|
|
|
|
grid <- expand.grid(iter_list)
|
|
|
|
|
|
|
|
grid["param_name"] <- grid %>% unite(x,colnames(grid),sep=",")
|
|
|
|
|
|
|
|
grid["param_name"] <- paste(stem,"[",grid$param_name,"]",sep="")
|
|
|
|
|
|
|
|
return(grid)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
group_mcmc_areas <- function(
|
|
|
|
|
|
|
|
stem,# = "beta"
|
|
|
|
|
|
|
|
class_list,# = beta_list
|
|
|
|
|
|
|
|
stanfit,# = fit
|
|
|
|
|
|
|
|
group_id,# = 2
|
|
|
|
|
|
|
|
rename=TRUE
|
|
|
|
|
|
|
|
) {
|
|
|
|
|
|
|
|
#get all parameter names
|
|
|
|
|
|
|
|
params <- get_parameters(stem,class_list)
|
|
|
|
|
|
|
|
#filter down to parameters of interest
|
|
|
|
|
|
|
|
params <- filter(params,groups == group_id)
|
|
|
|
|
|
|
|
#Get dataframe with only the rows of interest
|
|
|
|
|
|
|
|
filtdata <- as.data.frame(stanfit)[params$param_name]
|
|
|
|
|
|
|
|
#rename columns
|
|
|
|
|
|
|
|
if (rename) dimnames(filtdata)[[2]] <- params$parameters_hr
|
|
|
|
|
|
|
|
#get group name for title
|
|
|
|
|
|
|
|
group_name <- class_list$groups[group_id]
|
|
|
|
|
|
|
|
#create area plot with appropriate title
|
|
|
|
|
|
|
|
mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
|
|
|
|
|
|
|
|
ggtitle(paste("Parameter distributions for ICD-10 class:",group_name))
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
parameter_mcmc_areas <- function(
|
|
|
|
|
|
|
|
stem,# = "beta"
|
|
|
|
```{r}
|
|
|
|
class_list,# = beta_list
|
|
|
|
for (k in 1:22) {
|
|
|
|
stanfit,# = fit
|
|
|
|
for (i in 1:4) {
|
|
|
|
parameter_id,# = 2
|
|
|
|
params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]"))
|
|
|
|
rename=TRUE
|
|
|
|
print(
|
|
|
|
) {
|
|
|
|
mcmc_pairs(
|
|
|
|
#get all parameter names
|
|
|
|
posterior,
|
|
|
|
params <- get_parameters(stem,class_list)
|
|
|
|
np = nuts_prmts,
|
|
|
|
#filter down to parameters of interest
|
|
|
|
pars=c(
|
|
|
|
params <- filter(params,parameters == parameter_id)
|
|
|
|
params,
|
|
|
|
#Get dataframe with only the rows of interest
|
|
|
|
"lp__"
|
|
|
|
filtdata <- as.data.frame(stanfit)[params$param_name]
|
|
|
|
),
|
|
|
|
#rename columns
|
|
|
|
off_diag_args = list(size = 0.75)
|
|
|
|
if (rename) dimnames(filtdata)[[2]] <- params$groups_hr
|
|
|
|
)
|
|
|
|
#get group name for title
|
|
|
|
)
|
|
|
|
parameter_name <- class_list$parameters[parameter_id]
|
|
|
|
}}
|
|
|
|
#create area plot with appropriate title
|
|
|
|
```
|
|
|
|
mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) +
|
|
|
|
|
|
|
|
ggtitle(parameter_name,"Parameter Distribution")
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
################################# ANALYZE #####################################
|
|
|
|
|
|
|
|
print(fit)
|
|
|
|
```
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## Result Plots
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Note the regular large difference in variance.
|
|
|
|
|
|
|
|
I would guess those are the beta[1:22,2] values.
|
|
|
|
|
|
|
|
I wonder if a lot of the variance is due to the 2 values that are sitting out.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
```{r}
|
|
|
|
#mcmc_intervals(fit, pars=get_parameters("beta",beta_list)$param_name)
|
|
|
|
#mcmc_intervals(fit, pars=get_parameters("beta",beta_list)$param_name)
|
|
|
|
```
|
|
|
|
```
|
|
|
|
@ -840,219 +921,3 @@ ggplot(pddf, aes(x=value,)) +
|
|
|
|
Recall that we had really tight zero priors.
|
|
|
|
Recall that we had really tight zero priors.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
### Intervention: Adding a single competitor
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
inherited_cols <- c(
|
|
|
|
|
|
|
|
"elapsed_duration"
|
|
|
|
|
|
|
|
#,"identical_brands"
|
|
|
|
|
|
|
|
#,"brand_name_counts"
|
|
|
|
|
|
|
|
,"h_sdi_val"
|
|
|
|
|
|
|
|
,"hm_sdi_val"
|
|
|
|
|
|
|
|
,"m_sdi_val"
|
|
|
|
|
|
|
|
,"lm_sdi_val"
|
|
|
|
|
|
|
|
,"l_sdi_val"
|
|
|
|
|
|
|
|
,"status_NYR"
|
|
|
|
|
|
|
|
,"status_EBI"
|
|
|
|
|
|
|
|
,"status_ANR"
|
|
|
|
|
|
|
|
#,"ib*elapsed"
|
|
|
|
|
|
|
|
#,"bnc*elapsed"
|
|
|
|
|
|
|
|
,"sNYR*elapsed"
|
|
|
|
|
|
|
|
,"sEBI*elapsed"
|
|
|
|
|
|
|
|
,"sANR*elapsed"
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#### Generics
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
#generics intervention
|
|
|
|
|
|
|
|
brand_intervention_ib <- x[c(inherited_cols,"brand_name_counts","bnc*elapsed")]
|
|
|
|
|
|
|
|
brand_intervention_ib["identical_brands"] <- asinh(sinh(x$identical_brands)+1) #add a single generic brand
|
|
|
|
|
|
|
|
brand_intervention_ib["ib*elapsed"] <- brand_intervention_ib$identical_brands * brand_intervention_ib$elapsed_duration
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
counterfact_marketing_ib <- list(
|
|
|
|
|
|
|
|
D = ncol(x),#
|
|
|
|
|
|
|
|
N = nrow(x),
|
|
|
|
|
|
|
|
L = n_categories$count,
|
|
|
|
|
|
|
|
y = as.vector(y),
|
|
|
|
|
|
|
|
ll = as.vector(categories),
|
|
|
|
|
|
|
|
x = as.matrix(x),
|
|
|
|
|
|
|
|
mu_mean = 0,
|
|
|
|
|
|
|
|
mu_stdev = 0.05,
|
|
|
|
|
|
|
|
sigma_shape = 4,
|
|
|
|
|
|
|
|
sigma_rate = 20,
|
|
|
|
|
|
|
|
Nx = nrow(x),
|
|
|
|
|
|
|
|
llx = as.vector(categories),
|
|
|
|
|
|
|
|
counterfact_x_tilde = as.matrix(brand_intervention_ib),
|
|
|
|
|
|
|
|
counterfact_x = as.matrix(x)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
generated_ib <- gqs(
|
|
|
|
|
|
|
|
fit@stanmodel,
|
|
|
|
|
|
|
|
data=counterfact_marketing_ib,
|
|
|
|
|
|
|
|
draws=as.matrix(fit),
|
|
|
|
|
|
|
|
seed=11021585
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
#TODO: Convert to ggplot, stabilize y axis
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_ib, pars="p_predicted_default")$p_predicted_default), bins=100)
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_ib, pars="p_predicted_intervention")$p_predicted_intervention), bins=100)
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_ib, pars="predicted_difference")$predicted_difference), bins=100)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |>
|
|
|
|
|
|
|
|
pivot_longer(X1:X1343)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#TODO: Fix Category names
|
|
|
|
|
|
|
|
pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name))
|
|
|
|
|
|
|
|
pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) df$category_id[i])
|
|
|
|
|
|
|
|
pddf_ib["category_name"] <- sapply(pddf_ib$category, function(i) beta_list$groups[i])
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ggplot(pddf_ib, aes(x=value,)) +
|
|
|
|
|
|
|
|
geom_histogram(bins=100) +
|
|
|
|
|
|
|
|
labs(
|
|
|
|
|
|
|
|
title = "Distribution of predicted differences"
|
|
|
|
|
|
|
|
,x = "Difference in probability due to intervention"
|
|
|
|
|
|
|
|
,y = "Predicted counts"
|
|
|
|
|
|
|
|
) +
|
|
|
|
|
|
|
|
#xlim(-0.3,0.1) +
|
|
|
|
|
|
|
|
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ggplot(pddf_ib, aes(x=value,)) +
|
|
|
|
|
|
|
|
geom_histogram(bins=100) +
|
|
|
|
|
|
|
|
facet_wrap(
|
|
|
|
|
|
|
|
~factor(
|
|
|
|
|
|
|
|
category_name,
|
|
|
|
|
|
|
|
levels=beta_list$groups
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
, labeller = label_wrap_gen(multi_line = TRUE)
|
|
|
|
|
|
|
|
, ncol=5) +
|
|
|
|
|
|
|
|
labs(
|
|
|
|
|
|
|
|
title = "Distribution of predicted differences",
|
|
|
|
|
|
|
|
subtitle = "By group"
|
|
|
|
|
|
|
|
,x = "Difference in probability due to intervention"
|
|
|
|
|
|
|
|
,y = "Predicted counts"
|
|
|
|
|
|
|
|
) +
|
|
|
|
|
|
|
|
#xlim(-0.25,0.1) +
|
|
|
|
|
|
|
|
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
|
|
|
|
|
|
|
|
theme(strip.text.x = element_text(size = 8))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#### USP DC
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
#formulary intervention
|
|
|
|
|
|
|
|
brand_intervention_bnc <- x[c(inherited_cols,"identical_brands","ib*elapsed")]
|
|
|
|
|
|
|
|
brand_intervention_bnc["brand_name_counts"] <- asinh(sinh(x$brand_name_counts)+1) #add a single formulary competitor brand
|
|
|
|
|
|
|
|
brand_intervention_bnc["bnc*elapsed"] <- brand_intervention_bnc$brand_name_counts * brand_intervention_bnc$elapsed_duration
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
counterfact_marketing_bnc <- list(
|
|
|
|
|
|
|
|
D = ncol(x),#
|
|
|
|
|
|
|
|
N = nrow(x),
|
|
|
|
|
|
|
|
L = n_categories$count,
|
|
|
|
|
|
|
|
y = as.vector(y),
|
|
|
|
|
|
|
|
ll = as.vector(categories),
|
|
|
|
|
|
|
|
x = as.matrix(x),
|
|
|
|
|
|
|
|
mu_mean = 0,
|
|
|
|
|
|
|
|
mu_stdev = 0.05,
|
|
|
|
|
|
|
|
sigma_shape = 4,
|
|
|
|
|
|
|
|
sigma_rate = 20,
|
|
|
|
|
|
|
|
Nx = nrow(x),
|
|
|
|
|
|
|
|
llx = as.vector(categories),
|
|
|
|
|
|
|
|
counterfact_x_tilde = as.matrix(brand_intervention_bnc),
|
|
|
|
|
|
|
|
counterfact_x = as.matrix(x)
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
generated_bnc <- gqs(
|
|
|
|
|
|
|
|
fit@stanmodel,
|
|
|
|
|
|
|
|
data=counterfact_marketing_bnc,
|
|
|
|
|
|
|
|
draws=as.matrix(fit),
|
|
|
|
|
|
|
|
seed=11021585
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
#TODO: Convert to ggplot, stabilize y axis
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_bnc, pars="p_predicted_default")$p_predicted_default), bins=100)
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_bnc, pars="p_predicted_intervention")$p_predicted_intervention), bins=100)
|
|
|
|
|
|
|
|
hist(as.vector(extract(generated_bnc, pars="predicted_difference")$predicted_difference), bins=100)
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
pddf_bnc <- data.frame(extract(generated_bnc, pars="predicted_difference")$predicted_difference) |>
|
|
|
|
|
|
|
|
pivot_longer(X1:X1343)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#Add Category names
|
|
|
|
|
|
|
|
pddf_bnc["entry_idx"] <- as.numeric(gsub("\\D","",pddf_bnc$name))
|
|
|
|
|
|
|
|
pddf_bnc["category"] <- sapply(pddf_bnc$entry_idx, function(i) df$category_id[i])
|
|
|
|
|
|
|
|
pddf_bnc["category_name"] <- sapply(pddf_bnc$category, function(i) beta_list$groups[i])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#add snapshot date
|
|
|
|
|
|
|
|
pddf_bnc["snapshot_date"] <- sapply(pddf_bnc$entry_idx, function(i) df$earliest_date_observed[i])#changed values
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```{r}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ggplot(pddf_bnc, aes(x=value,)) +
|
|
|
|
|
|
|
|
geom_histogram(bins=100) +
|
|
|
|
|
|
|
|
labs(
|
|
|
|
|
|
|
|
title = "Distribution of predicted differences"
|
|
|
|
|
|
|
|
,x = "Difference in probability due to intervention"
|
|
|
|
|
|
|
|
,y = "Predicted counts"
|
|
|
|
|
|
|
|
) +
|
|
|
|
|
|
|
|
#xlim(-0.3,0.1) +
|
|
|
|
|
|
|
|
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ggplot(pddf_bnc, aes(x=value,)) +
|
|
|
|
|
|
|
|
geom_histogram(bins=100) +
|
|
|
|
|
|
|
|
facet_wrap(
|
|
|
|
|
|
|
|
~factor(
|
|
|
|
|
|
|
|
category_name,
|
|
|
|
|
|
|
|
levels=beta_list$groups
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
, labeller = label_wrap_gen(multi_line = TRUE)
|
|
|
|
|
|
|
|
, ncol=5) +
|
|
|
|
|
|
|
|
labs(
|
|
|
|
|
|
|
|
title = "Distribution of predicted differences",
|
|
|
|
|
|
|
|
subtitle = "By group"
|
|
|
|
|
|
|
|
,x = "Difference in probability due to intervention"
|
|
|
|
|
|
|
|
,y = "Predicted counts"
|
|
|
|
|
|
|
|
) +
|
|
|
|
|
|
|
|
#xlim(-0.25,0.1) +
|
|
|
|
|
|
|
|
geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") +
|
|
|
|
|
|
|
|
theme(strip.text.x = element_text(size = 8))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TODO: add density plot of (x,y,z) (date,value,counts)
|
|
|
|
|
|
|
|
- with and without faceting
|
|
|
|
|
|
|
|
|