library(bayesplot) available_mcmc(pattern = "_nuts_") library(ggplot2) library(posterior) library(cmdstanr) library(rstan) # for stanfit #Resources: https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started #save unchanged models instead of recompiling rstan_options(auto_write = TRUE) #allow for multithreaded sampling options(mc.cores = parallel::detectCores()) #test installation, shouldn't get any errors #example(stan_model, package = "rstan", run.dontrun = TRUE) ################ Pull data from database ###################### library(RPostgreSQL) driver <- dbDriver("PostgreSQL") con <- dbConnect( driver, user='root', password='root', dbname='aact_db', host='will-office' ) query <- dbSendQuery( con, "select * from formatted_data_with_planned_enrollment;" ) df <- fetch(query,Inf) df <- na.omit(df) n_categories <- 22 #number of categories in ICD-10 ################ Format Data ########################### categories <- df["category_id"] #Convert log(x+1) to arcsinh x <- df["elapsed_duration"] x["log_n_brands"] <- log(df$n_brands + 1) x["h_sdi_val"] <- log(df$h_sdi_val + 1) x["hm_sdi_val"] <- log(df$hm_sdi_val + 1) x["m_sdi_val"] <- log(df$m_sdi_val + 1) x["lm_sdi_val"] <- log(df$lm_sdi_val + 1) x["l_sdi_val"] <- log(df$l_sdi_val + 1) #x["enrollment"] <- df["current_enrollment"] / df["planned_enrollment"] #square terms #x["enrollment^2"] <- x["enrollment"]^2 #x["elapsed_duration^2"] <- x["elapsed_duration"]^2 #x["n_brands^2"] <- x["n_brands"]^2 #break out x["status_NYR"] <- ifelse(df["current_status"]=="Not yet recruiting",1,0) x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0) x["status_ANR"] <- ifelse(df["current_status"]=="Active, not recruiting",1,0) x["status_EBI"] <- ifelse(df["current_status"]=="Enrolling by invitation",1,0) y <- ifelse(df["final_status"]=="Terminated",1,0) ################################# DATA EXPLORATION ############################ #Plot terminated vs completed #Plot duration for terminated vs completed #Plot different times of ################################# FIT ######################################### #setup data (named list) trials_data <- list( D = ncol(x),# N = nrow(x), L = n_categories, y = as.vector(y), ll = categories$category_id, x = as.matrix(x), mu_mean = 0, mu_stdev = 0.5, sigma_shape = 6, sigma_rate = 12 ) model <- cmdstan_model(file.path("Hierarchal_Logistic.stan")) fit <- model$sample( data = trials_data, seed = 11021585, chains = 4, parallel_chains = 4, refresh = 500 ) ################################# ANALYZE ##################################### color_scheme_set("darkgray") div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4) print(fit$summary(),n=265) stanfitted <- rstan::read_stan_csv(fit$output_files()) #analyze mu values draw_mu <- fit$draws("mu") mcmc_hist(draw_mu) mcmc_trace(draw_mu) mcmc_pairs(draw_mu) mcmc_parcoord(posterior,pars=c( "mu[1]", "mu[2]", "mu[3]", "mu[4]", "mu[5]", "mu[6]", "mu[7]", "mu[8]", "mu[9]", "mu[10]", "mu[11]" ), np=nuts_prmts, np_style = div_style ) #check sigma draw_sigma <- fit$draws("sigma") mcmc_hist(draw_sigma) mcmc_trace(draw_sigma) mcmc_parcoord(posterior,pars=c( "sigma[1]", "sigma[2]", "sigma[3]", "sigma[4]", "sigma[5]", "sigma[6]", "sigma[7]", "sigma[8]", "sigma[9]", "sigma[10]", "sigma[11]" ), np=nuts_prmts, np_style = div_style ) #other diagnostics logpost <- log_posterior(fit) nuts_prmts <- nuts_params(fit) posterior <- fit$draws() mcmc_pairs( posterior, np = nuts_prmts, pars=c( "mu[1]", "mu[2]", "mu[3]", "mu[4]", "mu[5]", "mu[6]", "mu[7]", "mu[8]", "mu[9]", "mu[10]", "mu[11]" ), off_diag_args = list(size = 0.75) ) #Interpretation mcmc_areas(posterior, pars=c( "mu[1]", "mu[2]", "mu[3]", "mu[4]", "mu[5]", "mu[6]", "mu[7]", "mu[8]", "mu[9]", "mu[10]", "mu[11]" ), prob = 0.95 ) #Interpretation mcmc_areas(posterior, pars=c( "sigma[1]", "sigma[2]", "sigma[3]", "sigma[4]", "sigma[5]", "sigma[6]", "sigma[7]", "sigma[8]", "sigma[9]", "sigma[10]", "sigma[11]" ), prob = 0.95 ) #iterate through betas mcmc_areas(posterior, pars=c( "beta[1,*]" ), prob = 0.95 ) #generate array of betas betas_array <- sapply(1:11, function(param) sapply(1:22, function(group) paste0("beta[",group,",",param,"]"))) for (group in 1:22) { print( mcmc_areas(posterior, pars=betas_array[group,], prob = 0.95 ) ) } for (param in 1:11) { print( mcmc_areas(posterior, pars=betas_array[,param], prob = 0.95 ) ) }