--- title: "The Effects of market conditions on recruitment and completion of clinical trials" author: "Will King" format: html editor: source --- # Setup ```{r} library(bayesplot) available_mcmc(pattern = "_nuts_") library(ggplot2) library(patchwork) library(tidyverse) library(rstan) library(tidyr) library(ghibli) #Resources: https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started #save unchanged models instead of recompiling rstan_options(auto_write = TRUE) #allow for multithreaded sampling options(mc.cores = parallel::detectCores()) #test installation, shouldn't get any errors #example(stan_model, package = "rstan", run.dontrun = TRUE) ``` ```{r} ################ Pull data from database ###################### library(RPostgreSQL) driver <- dbDriver("PostgreSQL") get_data <- function(driver) { con <- dbConnect( driver, user='root', password='root', dbname='aact_db', host='will-office' ) on.exit(dbDisconnect(con)) query <- dbSendQuery( con, # "select * from formatted_data_with_planned_enrollment;" " select fdqpe.nct_id --,fdqpe.start_date --,fdqpe.current_enrollment --,fdqpe.enrollment_category ,fdqpe.current_status ,fdqpe.earliest_date_observed ,fdqpe.elapsed_duration ,fdqpe.n_brands as identical_brands ,ntbtu.brand_name_count ,fdqpe.category_id ,fdqpe.final_status ,fdqpe.h_sdi_val --,fdqpe.h_sdi_u95 --,fdqpe.h_sdi_l95 ,fdqpe.hm_sdi_val --,fdqpe.hm_sdi_u95 --,fdqpe.hm_sdi_l95 ,fdqpe.m_sdi_val --,fdqpe.m_sdi_u95 --,fdqpe.m_sdi_l95 ,fdqpe.lm_sdi_val --,fdqpe.lm_sdi_u95 --,fdqpe.lm_sdi_l95 ,fdqpe.l_sdi_val --,fdqpe.l_sdi_u95 --,fdqpe.l_sdi_l95 from formatted_data_with_planned_enrollment fdqpe join \"Formularies\".nct_to_brands_through_uspdc ntbtu on fdqpe.nct_id = ntbtu.nct_id order by fdqpe.nct_id, fdqpe.earliest_date_observed ; " ) df <- fetch(query, n = -1) df <- na.omit(df) query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;") n_categories <- fetch(query2, n = -1) return(list(data=df,ncat=n_categories)) } d <- get_data(driver) df <- d$data n_categories <- d$ncat ################ Format Data ########################### data_formatter <- function(df) { categories <- df["category_id"] x <- df["elapsed_duration"] x["identical_brands"] <- asinh(df$identical_brands) x["brand_name_counts"] <- asinh(df$brand_name_count) x["h_sdi_val"] <- asinh(df$h_sdi_val) x["hm_sdi_val"] <- asinh(df$hm_sdi_val) x["m_sdi_val"] <- asinh(df$m_sdi_val) x["lm_sdi_val"] <- asinh(df$lm_sdi_val) x["l_sdi_val"] <- asinh(df$l_sdi_val) #Setup fixed effects x["status_NYR"] <- ifelse(df["current_status"]=="Not yet recruiting",1,0) x["status_EBI"] <- ifelse(df["current_status"]=="Enrolling by invitation",1,0) #x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0) # Base case is Recruiting x["status_ANR"] <- ifelse(df["current_status"]=="Active, not recruiting",1,0) #interaction terms for competitors x["ib*elapsed"] <- x["elapsed_duration"]*x["identical_brands"] x["bnc*elapsed"] <- x["elapsed_duration"] * x["brand_name_counts"] #interaction terms for status effects x["sNYR*elapsed"] <- x["elapsed_duration"]*x["status_NYR"] x["sEBI*elapsed"] <- x["elapsed_duration"]*x["status_EBI"] x["sANR*elapsed"] <- x["elapsed_duration"]*x["status_ANR"] y <- ifelse(df["final_status"]=="Terminated",1,0) #get category list return(list(x=x,y=y)) } train <- data_formatter(df) categories <- df$category_id x <- train$x y <- train$y ``` ```{r} # get data for counterfactuals get_counterfactuals <- function(driver) { con <- dbConnect( driver, user='root', password='root', dbname='aact_db', host='will-office' ) on.exit(dbDisconnect(con)) query <- dbSendQuery( con, " with cte as ( select nct_id ,lag(current_status, 1) over (partition by nct_id order by earliest_date_observed) as previous_status ,current_status ,earliest_date_observed as date_current from formatted_data_mat fdm ), cte2 as ( select nct_id ,previous_status ,current_status ,max(date_current) as date_current_max from cte where previous_status != current_status and current_status = 'Active, not recruiting' group by nct_id ,previous_status ,current_status ,date_current ) select fdqpe.nct_id --,fdqpe.start_date --,fdqpe.current_enrollment --,fdqpe.enrollment_category ,fdqpe.current_status ,fdqpe.earliest_date_observed ,fdqpe.elapsed_duration ,fdqpe.n_brands as identical_brands ,ntbtu.brand_name_count ,fdqpe.category_id ,fdqpe.final_status ,fdqpe.h_sdi_val --,fdqpe.h_sdi_u95 --,fdqpe.h_sdi_l95 ,fdqpe.hm_sdi_val --,fdqpe.hm_sdi_u95 --,fdqpe.hm_sdi_l95 ,fdqpe.m_sdi_val --,fdqpe.m_sdi_u95 --,fdqpe.m_sdi_l95 ,fdqpe.lm_sdi_val --,fdqpe.lm_sdi_u95 --,fdqpe.lm_sdi_l95 ,fdqpe.l_sdi_val --,fdqpe.l_sdi_u95 --,fdqpe.l_sdi_l95 from formatted_data_with_planned_enrollment fdqpe join \"Formularies\".nct_to_brands_through_uspdc ntbtu on fdqpe.nct_id = ntbtu.nct_id join cte2 on cte2.nct_id = fdqpe.nct_id and cte2.date_current_max = fdqpe.earliest_date_observed order by fdqpe.nct_id, fdqpe.earliest_date_observed ; " ) df <- fetch(query, n = -1) df <- na.omit(df) return(df) } counterfact_raw <- get_counterfactuals(driver) #extract data counterfact_list <- data_formatter(counterfact_raw) counterfact_list$y <- NULL #remove the chance of accidentally training on the wrong data counterfact_categories <- counterfact_raw$category_id #setup the two counterfactuals counterfact_x <- counterfact_list$x #no change counterfact_x_tilde <- counterfact_list$x #to be changed #make changes, set it to Recruiting #TODO: change this so it matches previous state. counterfact_x_tilde$status_ANR <- 0 counterfact_x_tilde["sANR*elapsed"] <- 0 ``` ## Explore data ```{r} ################################# DATA EXPLORATION ############################ driver <- dbDriver("PostgreSQL") con <- dbConnect( driver, user='root', password='root', dbname='aact_db', host='will-office' ) #Plot histogram of count of snapshots df3 <- dbGetQuery( con, "select nct_id,final_status,count(*) from formatted_data_with_planned_enrollment fdwpe group by nct_id,final_status ;" ) #df3 <- fetch(query3, n = -1) ggplot(data=df3, aes(x=count, fill=final_status)) + geom_histogram(binwidth=1) + ggtitle("Histogram of snapshots per trial (matched trials)") + xlab("Snapshots per trial") #Plot duration for terminated vs completed df4 <- dbGetQuery( con, " select nct_id, start_date , primary_completion_date, overall_status , primary_completion_date - start_date as duration from ctgov.studies s where nct_id in (select distinct nct_id from http.download_status ds) ;" ) #df4 <- fetch(query4, n = -1) ggplot(data=df4, aes(x=duration,fill=overall_status)) + geom_histogram()+ ggtitle("Histogram of trial durations") + xlab("duration")+ facet_wrap(~overall_status) df5 <- dbGetQuery( con, " with cte1 as ( select nct_id, start_date , primary_completion_date, overall_status , primary_completion_date - start_date as duration from ctgov.studies s where nct_id in (select distinct nct_id from http.download_status ds) ), cte2 as ( select nct_id,count(*) as snapshot_count from formatted_data_with_planned_enrollment fdwpe group by nct_id ) select a.nct_id, a.overall_status, a.duration,b.snapshot_count from cte1 as a join cte2 as b on a.nct_id=b.nct_id ;" ) df5$overall_status <- as.factor(df5$overall_status) #df5 <- fetch(query5, n = -1) ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) + geom_jitter() + ggtitle("duration, status, and snapshot_count") + xlab("duration") + ylab("snapshot count") dbDisconnect(con) #get number of trials and snapshots in each category group_trials_by_category <- as.data.frame(aggregate(category_id ~ nct_id, df, max)) group_trials_by_category <- as.data.frame(group_trials_by_category) ggplot(data = group_trials_by_category, aes(x=category_id)) + geom_histogram(binwidth=1,color="black",fill="seagreen") + scale_x_continuous(breaks=scales::pretty_breaks(n=22)) summary(df5) ``` # Fit Model ```{r} ################################# FIT MODEL ######################################### #setup data (named list) trials_data <- list( D = ncol(x),# N = nrow(x), L = n_categories$count, y = as.vector(y), ll = as.vector(categories), x = as.matrix(x), mu_mean = 0, mu_stdev = 0.05, sigma_shape = 4, sigma_rate = 20, Nx = 189, llx = as.vector(counterfact_categories), counterfact_x_tilde = as.matrix(counterfact_x_tilde), counterfact_x = as.matrix(counterfact_x) ) fit <- stan( file='Hierarchal_Logistic.stan', data = trials_data, chains = 6, iter = 5000, seed = 11021585 ) ``` ```{r} #pull out prior predictions generated <- gqs( fit@stanmodel, data=trials_data, draws=as.matrix(fit), seed=11021585 ) # to implement distribution of differences: # create two datasets with interventions # simulate both with the same seed and draws # figure out how to difference the posterior (and maybe look at prior) probabilities ``` ```{r} hist(as.vector(extract(generated, pars="p_prior")$p_prior)) hist(as.vector(extract(generated, pars="mu_prior")$mu_prior), ) hist(as.vector(extract(generated, pars="sigma_prior")$sigma_prior)) ``` ```{r} check_hmc_diagnostics(fit) hist(as.vector(extract(generated, pars="p_predicted")$p_predicted)) ``` # Diagnostics ```{r} #trace plots plot(fit, pars=c("mu"), plotfun="trace") for (i in 1:4) { print( mcmc_rank_overlay( fit, pars=c( paste0("mu[",4*i-3,"]"), paste0("mu[",4*i-2,"]"), paste0("mu[",4*i-1,"]"), paste0("mu[",4*i,"]") ), n_bins=100 )+ legend_move("top") + scale_colour_ghibli_d("KikiMedium") ) } ``` ```{r} plot(fit, pars=c("sigma"), plotfun="trace") for (i in 1:4) { print( mcmc_rank_overlay( fit, pars=c( paste0("sigma[",4*i-3,"]"), paste0("sigma[",4*i-2,"]"), paste0("sigma[",4*i-1,"]"), paste0("sigma[",4*i,"]") ), n_bins=100 )+ legend_move("top") + scale_colour_ghibli_d("KikiMedium") ) } ``` ```{r} #other diagnostics logpost <- log_posterior(fit) nuts_prmts <- nuts_params(fit) posterior <- as.array(fit) ``` ```{r} color_scheme_set("darkgray") div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4) mcmc_parcoord(posterior, regex_pars = "mu", np=nuts_prmts, np_style = div_style, alpha = 0.05) ``` ```{r} for (i in 1:4) { mus = sapply(3:0, function(j) paste0("mu[",4*i-j ,"]")) print( mcmc_pairs( posterior, np = nuts_prmts, pars=c( mus, "lp__" ), off_diag_args = list(size = 0.75) ) ) } ``` ```{r} mcmc_parcoord(posterior,regex_pars = "sigma", np=nuts_prmts, alpha=0.05) ``` ```{r} for (i in 1:4) { params = sapply(3:0, function(j) paste0("sigma[",4*i-j ,"]")) print( mcmc_pairs( posterior, np = nuts_prmts, pars=c( params, "lp__" ), off_diag_args = list(size = 0.75) ) ) } ``` ```{r} for (k in 1:22) { for (i in 1:4) { params = sapply(3:0, function(j) paste0("beta[",k,",",4*i-j ,"]")) print( mcmc_pairs( posterior, np = nuts_prmts, pars=c( params, "lp__" ), off_diag_args = list(size = 0.75) ) ) }} ``` # Results ```{r} ################################# ANALYZE ##################################### print(fit) ``` ## Result Plots Note the regular large difference in variance. I would guess those are the beta[1:22,2] values. I wonder if a lot of the variance is due to the 2 values that are sitting out. ```{r} beta_list <- list( groups = c( `1`="Infections & Parasites", `2`="Neoplasms", `3`="Blood & Immune system", `4`="Endocrine, Nutritional, and Metabolic", `5`="Mental & Behavioral", `6`="Nervous System", `7`="Eye and Adnexa", `8`="Ear and Mastoid", `9`="Circulatory", `10`="Respiratory", `11`="Digestive", `12`="Skin & Subcutaneaous tissue", `13`="Musculoskeletal", `14`="Genitourinary", `15`="Pregancy, Childbirth, & Puerperium", `16`="Perinatal Period", `17`="Congential", `18`="Symptoms, Signs etc.", `19`="Injury etc.", `20`="External Causes", `21`="Contact with Healthcare", `22`="Special Purposes" ), parameters = c( `1`="Elapsed Duration", # brands `2`="asinh(Generic Brands)", `3`="asinh(Competitors USPDC)", # population `4`="asinh(High SDI)", `5`="asinh(High-Medium SDI)", `6`="asinh(Medium SDI)", `7`="asinh(Low-Medium SDI)", `8`="asinh(Low SDI)", #Status `9`="status_NYR", `10`="status_EBI", `11`="status_ANR", #interactions for brands `12`="ib*elapsed", `13`="bnc*elapsed", # interactions for status `14`="sNYR*elapsed", `15`="sEBI*elapsed", `16`="sANR*elapsed" ) ) get_parameters <- function(stem,class_list) { #get categories and lengths named <- names(class_list) lengths <- sapply(named, (function (x) length(class_list[[x]]))) #describe the grid needed iter_list <- sapply(named, (function (x) 1:lengths[x])) #generate the list of parameters pardf <- generate_parameter_df(stem, iter_list) #add columns with appropriate human-readable names for (name in named) { pardf[paste(name,"_hr",sep="")] <- as.factor( sapply(pardf[name], (function (i) class_list[[name]][i])) ) } return(pardf) } generate_parameter_df <- function(stem, iter_list) { grid <- expand.grid(iter_list) grid["param_name"] <- grid %>% unite(x,colnames(grid),sep=",") grid["param_name"] <- paste(stem,"[",grid$param_name,"]",sep="") return(grid) } group_mcmc_areas <- function( stem,# = "beta" class_list,# = beta_list stanfit,# = fit group_id,# = 2 rename=TRUE ) { #get all parameter names params <- get_parameters(stem,class_list) #filter down to parameters of interest params <- filter(params,groups == group_id) #Get dataframe with only the rows of interest filtdata <- as.data.frame(stanfit)[params$param_name] #rename columns if (rename) dimnames(filtdata)[[2]] <- params$parameters_hr #get group name for title group_name <- class_list$groups[group_id] #create area plot with appropriate title mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) + ggtitle(paste("Parameter distributions for ICD-10 class:",group_name)) } parameter_mcmc_areas <- function( stem,# = "beta" class_list,# = beta_list stanfit,# = fit parameter_id,# = 2 rename=TRUE ) { #get all parameter names params <- get_parameters(stem,class_list) #filter down to parameters of interest params <- filter(params,parameters == parameter_id) #Get dataframe with only the rows of interest filtdata <- as.data.frame(stanfit)[params$param_name] #rename columns if (rename) dimnames(filtdata)[[2]] <- params$groups_hr #get group name for title parameter_name <- class_list$parameters[parameter_id] #create area plot with appropriate title mcmc_areas(filtdata,prob = 0.8, prob_outer = 0.95) + ggtitle(parameter_name,"Parameter Distribution") } ``` ```{r} #mcmc_intervals(fit, pars=get_parameters("beta",beta_list)$param_name) ``` ### Investigating parameter distributions ```{r} #g1 <- group_mcmc_areas("beta",beta_list,fit,2) #g2 <- group_mcmc_areas("beta",beta_list,fit,2) #g3 <- group_mcmc_areas("beta",beta_list,fit,2) #g4 <- group_mcmc_areas("beta",beta_list,fit,2) #g5 <- group_mcmc_areas("beta",beta_list,fit,2) #g6 <- group_mcmc_areas("beta",beta_list,fit,2) #g7 <- group_mcmc_areas("beta",beta_list,fit,2) #g8 <- group_mcmc_areas("beta",beta_list,fit,2) #g9 <- group_mcmc_areas("beta",beta_list,fit,2) #g10 <- group_mcmc_areas("beta",beta_list,fit,2) #g11 <- group_mcmc_areas("beta",beta_list,fit,2) #g12 <- group_mcmc_areas("beta",beta_list,fit,2) #g13 <- group_mcmc_areas("beta",beta_list,fit,2) #g14 <- group_mcmc_areas("beta",beta_list,fit,2) #g15 <- group_mcmc_areas("beta",beta_list,fit,2) #g16 <- group_mcmc_areas("beta",beta_list,fit,2) #g17 <- group_mcmc_areas("beta",beta_list,fit,2) #g18 <- group_mcmc_areas("beta",beta_list,fit,2) #g19 <- group_mcmc_areas("beta",beta_list,fit,2) #g20 <- group_mcmc_areas("beta",beta_list,fit,2) #g21 <- group_mcmc_areas("beta",beta_list,fit,2) #g22 <- group_mcmc_areas("beta",beta_list,fit,2) p1 <- parameter_mcmc_areas("beta",beta_list,fit,1) p2 <- parameter_mcmc_areas("beta",beta_list,fit,2) p3 <- parameter_mcmc_areas("beta",beta_list,fit,3) #p4 <- parameter_mcmc_areas("beta",beta_list,fit,4) #p5 <- parameter_mcmc_areas("beta",beta_list,fit,5) #p6 <- parameter_mcmc_areas("beta",beta_list,fit,6) #p7 <- parameter_mcmc_areas("beta",beta_list,fit,7) #p8 <- parameter_mcmc_areas("beta",beta_list,fit,8) p9 <- parameter_mcmc_areas("beta",beta_list,fit,9) p10 <- parameter_mcmc_areas("beta",beta_list,fit,10) p11 <- parameter_mcmc_areas("beta",beta_list,fit,11) p12 <- parameter_mcmc_areas("beta",beta_list,fit,12) p13 <- parameter_mcmc_areas("beta",beta_list,fit,13) p14 <- parameter_mcmc_areas("beta",beta_list,fit,14) p15 <- parameter_mcmc_areas("beta",beta_list,fit,15) p16 <- parameter_mcmc_areas("beta",beta_list,fit,16) ``` Note these have 95% outer CI and 80% inner (shaded) 1) "Elapsed Duration", 2) "asinh(Generic Brands)", 3) "asinh(Competitors USPDC)", 4) "asinh(High SDI)", 5) "asinh(High-Medium SDI)", 6) "asinh(Medium SDI)", 7) "asinh(Low-Medium SDI)", 8) "asinh(Low SDI)", 9) "status_NYR", 10) "status_EBI", 11) "status_ANR", 12) "ib*elapsed", 13) "bnc*elapsed", 14) "sNYR*elapsed", 15) "sEBI*elapsed", 16) "sANR*elapsed" of interest - p1 + p2 - p3 + p2 - p2 + p12 - p3 + p13 - p9 + p14 - p10 + p15 - p11 + p16 ```{r} p1 + p2 ``` ```{r} p2 + p3 ``` ```{r} p2 + p12 ``` ```{r} p3 + p13 ``` ```{r} p9 + p14 ``` ```{r} p10 + p15 ``` ```{r} p11 + p16 ``` # Posterior Prediction ```{r} #TODO: Convert to ggplot, stabilize y axis hist(as.vector(extract(generated, pars="p_predicted_default")$p_predicted_default)) hist(as.vector(extract(generated, pars="p_predicted_intervention")$p_predicted_intervention)) ``` ## Distribution of Predicted Differences ### Intervention: Marginal increase in time to finish enrollment ```{r} pddf <- data.frame(extract(generated, pars="predicted_difference")$predicted_difference) |> pivot_longer(X1:X189) pddf["entry_idx"] <- as.numeric(gsub("\\D","",pddf$name)) pddf["category"] <- sapply(pddf$entry_idx, function(i) counterfact_categories[i]) pddf["category_name"] <- sapply(pddf$category, function(i) beta_list$groups[i]) ggplot(pddf, aes(x=value,)) + geom_histogram(bins=100) + labs( title = "Distribution of predicted differences" ,x = "Difference in probability due to intervention" ,y = "Predicted counts" ) + xlim(-0.3,0.1) + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") ggplot(pddf, aes(x=value,)) + geom_histogram(bins=100) + facet_wrap( ~factor( category_name, levels=beta_list$groups ) , labeller = label_wrap_gen(multi_line = TRUE) , ncol=5) + labs( title = "Distribution of predicted differences", subtitle = "By group" ,x = "Difference in probability due to intervention" ,y = "Predicted counts" ) + xlim(-0.25,0.1) + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + theme(strip.text.x = element_text(size = 8)) ``` Recall that we had really tight zero priors. ### Intervention: Adding a single competitor ```{r} inherited_cols <- c( "elapsed_duration" #,"identical_brands" #,"brand_name_counts" ,"h_sdi_val" ,"hm_sdi_val" ,"m_sdi_val" ,"lm_sdi_val" ,"l_sdi_val" ,"status_NYR" ,"status_EBI" ,"status_ANR" #,"ib*elapsed" #,"bnc*elapsed" ,"sNYR*elapsed" ,"sEBI*elapsed" ,"sANR*elapsed" ) ``` #### Generics ```{r} #generics intervention brand_intervention_ib <- x[c(inherited_cols,"brand_name_counts","bnc*elapsed")] brand_intervention_ib["identical_brands"] <- asinh(sinh(x$identical_brands)+1) #add a single generic brand brand_intervention_ib["ib*elapsed"] <- brand_intervention_ib$identical_brands * brand_intervention_ib$elapsed_duration ``` ```{r} counterfact_marketing_ib <- list( D = ncol(x),# N = nrow(x), L = n_categories$count, y = as.vector(y), ll = as.vector(categories), x = as.matrix(x), mu_mean = 0, mu_stdev = 0.05, sigma_shape = 4, sigma_rate = 20, Nx = nrow(x), llx = as.vector(categories), counterfact_x_tilde = as.matrix(brand_intervention_ib), counterfact_x = as.matrix(x) ) ``` ```{r} generated_ib <- gqs( fit@stanmodel, data=counterfact_marketing_ib, draws=as.matrix(fit), seed=11021585 ) ``` ```{r} #TODO: Convert to ggplot, stabilize y axis hist(as.vector(extract(generated_ib, pars="p_predicted_default")$p_predicted_default), bins=100) hist(as.vector(extract(generated_ib, pars="p_predicted_intervention")$p_predicted_intervention), bins=100) hist(as.vector(extract(generated_ib, pars="predicted_difference")$predicted_difference), bins=100) ``` ```{r} pddf_ib <- data.frame(extract(generated_ib, pars="predicted_difference")$predicted_difference) |> pivot_longer(X1:X1343) #TODO: Fix Category names pddf_ib["entry_idx"] <- as.numeric(gsub("\\D","",pddf_ib$name)) pddf_ib["category"] <- sapply(pddf_ib$entry_idx, function(i) df$category_id[i]) pddf_ib["category_name"] <- sapply(pddf_ib$category, function(i) beta_list$groups[i]) ``` ```{r} ggplot(pddf_ib, aes(x=value,)) + geom_histogram(bins=100) + labs( title = "Distribution of predicted differences" ,x = "Difference in probability due to intervention" ,y = "Predicted counts" ) + #xlim(-0.3,0.1) + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") ggplot(pddf_ib, aes(x=value,)) + geom_histogram(bins=100) + facet_wrap( ~factor( category_name, levels=beta_list$groups ) , labeller = label_wrap_gen(multi_line = TRUE) , ncol=5) + labs( title = "Distribution of predicted differences", subtitle = "By group" ,x = "Difference in probability due to intervention" ,y = "Predicted counts" ) + #xlim(-0.25,0.1) + geom_vline(aes(xintercept = 0), color = "skyblue", linetype="dashed") + theme(strip.text.x = element_text(size = 8)) ``` #### USP DC ```{r} #formulary intervention brand_intervention_bnc <- x[c(inherited_cols,"identical_brands","ib*elapsed")] brand_intervention_bnc["brand_name_counts"] <- asinh(sinh(x$brand_name_counts)+1) #add a single formulary competitor brand brand_intervention_bnc["bnc*elapsed"] <- brand_intervention_bnc$brand_name_counts * brand_intervention_bnc$elapsed_duration ```