You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
240 lines
5.1 KiB
R
240 lines
5.1 KiB
R
library(bayesplot)
|
|
available_mcmc(pattern = "_nuts_")
|
|
library(ggplot2)
|
|
library(posterior)
|
|
library(cmdstanr)
|
|
library(rstan) # for stanfit
|
|
#Resources: https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
|
|
|
|
#save unchanged models instead of recompiling
|
|
rstan_options(auto_write = TRUE)
|
|
#allow for multithreaded sampling
|
|
options(mc.cores = parallel::detectCores())
|
|
|
|
#test installation, shouldn't get any errors
|
|
#example(stan_model, package = "rstan", run.dontrun = TRUE)
|
|
|
|
################ Pull data from database ######################
|
|
library(RPostgreSQL)
|
|
|
|
driver <- dbDriver("PostgreSQL")
|
|
|
|
con <- dbConnect(
|
|
driver,
|
|
user='root',
|
|
password='root',
|
|
dbname='aact_db',
|
|
host='will-office'
|
|
)
|
|
|
|
query <- dbSendQuery(
|
|
con,
|
|
"select * from formatted_data_with_planned_enrollment;"
|
|
)
|
|
df <- fetch(query,Inf)
|
|
df <- na.omit(df)
|
|
|
|
n_categories <- 22 #number of categories in ICD-10
|
|
|
|
################ Format Data ###########################
|
|
|
|
|
|
categories <- df["category_id"]
|
|
|
|
#Convert log(x+1) to arcsinh
|
|
|
|
x <- df["elapsed_duration"]
|
|
x["log_n_brands"] <- log(df$n_brands + 1)
|
|
x["h_sdi_val"] <- log(df$h_sdi_val + 1)
|
|
x["hm_sdi_val"] <- log(df$hm_sdi_val + 1)
|
|
x["m_sdi_val"] <- log(df$m_sdi_val + 1)
|
|
x["lm_sdi_val"] <- log(df$lm_sdi_val + 1)
|
|
x["l_sdi_val"] <- log(df$l_sdi_val + 1)
|
|
#x["enrollment"] <- df["current_enrollment"] / df["planned_enrollment"]
|
|
#square terms
|
|
#x["enrollment^2"] <- x["enrollment"]^2
|
|
#x["elapsed_duration^2"] <- x["elapsed_duration"]^2
|
|
#x["n_brands^2"] <- x["n_brands"]^2
|
|
#break out
|
|
x["status_NYR"] <- ifelse(df["current_status"]=="Not yet recruiting",1,0)
|
|
x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0)
|
|
x["status_ANR"] <- ifelse(df["current_status"]=="Active, not recruiting",1,0)
|
|
x["status_EBI"] <- ifelse(df["current_status"]=="Enrolling by invitation",1,0)
|
|
|
|
y <- ifelse(df["final_status"]=="Terminated",1,0)
|
|
|
|
################################# DATA EXPLORATION ############################
|
|
|
|
#Plot terminated vs completed
|
|
#Plot duration for terminated vs completed
|
|
#Plot different times of
|
|
|
|
################################# FIT #########################################
|
|
#setup data (named list)
|
|
trials_data <- list(
|
|
D = ncol(x),#
|
|
N = nrow(x),
|
|
L = n_categories,
|
|
y = as.vector(y),
|
|
ll = categories$category_id,
|
|
x = as.matrix(x),
|
|
mu_mean = 0,
|
|
mu_stdev = 0.5,
|
|
sigma_shape = 6,
|
|
sigma_rate = 12
|
|
)
|
|
|
|
model <- cmdstan_model(file.path("Hierarchal_Logistic.stan"))
|
|
|
|
fit <- model$sample(
|
|
data = trials_data,
|
|
seed = 11021585,
|
|
chains = 4,
|
|
parallel_chains = 4,
|
|
refresh = 500
|
|
)
|
|
|
|
################################# ANALYZE #####################################
|
|
color_scheme_set("darkgray")
|
|
div_style <- parcoord_style_np(div_color = "green", div_size = 0.05, div_alpha = 0.4)
|
|
|
|
print(fit$summary(),n=265)
|
|
|
|
stanfitted <- rstan::read_stan_csv(fit$output_files())
|
|
|
|
#analyze mu values
|
|
draw_mu <- fit$draws("mu")
|
|
mcmc_hist(draw_mu)
|
|
mcmc_trace(draw_mu)
|
|
mcmc_pairs(draw_mu)
|
|
mcmc_parcoord(posterior,pars=c(
|
|
"mu[1]",
|
|
"mu[2]",
|
|
"mu[3]",
|
|
"mu[4]",
|
|
"mu[5]",
|
|
"mu[6]",
|
|
"mu[7]",
|
|
"mu[8]",
|
|
"mu[9]",
|
|
"mu[10]",
|
|
"mu[11]"
|
|
),
|
|
np=nuts_prmts,
|
|
np_style = div_style
|
|
)
|
|
|
|
|
|
#check sigma
|
|
draw_sigma <- fit$draws("sigma")
|
|
mcmc_hist(draw_sigma)
|
|
mcmc_trace(draw_sigma)
|
|
|
|
mcmc_parcoord(posterior,pars=c(
|
|
"sigma[1]",
|
|
"sigma[2]",
|
|
"sigma[3]",
|
|
"sigma[4]",
|
|
"sigma[5]",
|
|
"sigma[6]",
|
|
"sigma[7]",
|
|
"sigma[8]",
|
|
"sigma[9]",
|
|
"sigma[10]",
|
|
"sigma[11]"
|
|
),
|
|
np=nuts_prmts,
|
|
np_style = div_style
|
|
)
|
|
|
|
#other diagnostics
|
|
logpost <- log_posterior(fit)
|
|
nuts_prmts <- nuts_params(fit)
|
|
posterior <- fit$draws()
|
|
|
|
mcmc_pairs(
|
|
posterior,
|
|
np = nuts_prmts,
|
|
pars=c(
|
|
"mu[1]",
|
|
"mu[2]",
|
|
"mu[3]",
|
|
"mu[4]",
|
|
"mu[5]",
|
|
"mu[6]",
|
|
"mu[7]",
|
|
"mu[8]",
|
|
"mu[9]",
|
|
"mu[10]",
|
|
"mu[11]"
|
|
),
|
|
off_diag_args = list(size = 0.75)
|
|
)
|
|
|
|
#Interpretation
|
|
mcmc_areas(posterior,
|
|
pars=c(
|
|
"mu[1]",
|
|
"mu[2]",
|
|
"mu[3]",
|
|
"mu[4]",
|
|
"mu[5]",
|
|
"mu[6]",
|
|
"mu[7]",
|
|
"mu[8]",
|
|
"mu[9]",
|
|
"mu[10]",
|
|
"mu[11]"
|
|
),
|
|
prob = 0.95
|
|
)
|
|
|
|
#Interpretation
|
|
mcmc_areas(posterior,
|
|
pars=c(
|
|
"sigma[1]",
|
|
"sigma[2]",
|
|
"sigma[3]",
|
|
"sigma[4]",
|
|
"sigma[5]",
|
|
"sigma[6]",
|
|
"sigma[7]",
|
|
"sigma[8]",
|
|
"sigma[9]",
|
|
"sigma[10]",
|
|
"sigma[11]"
|
|
),
|
|
prob = 0.95
|
|
)
|
|
|
|
#iterate through betas
|
|
|
|
|
|
mcmc_areas(posterior,
|
|
pars=c(
|
|
"beta[1,*]"
|
|
),
|
|
prob = 0.95
|
|
)
|
|
|
|
#generate array of betas
|
|
betas_array <- sapply(1:11, function(param) sapply(1:22, function(group) paste0("beta[",group,",",param,"]")))
|
|
|
|
for (group in 1:22) {
|
|
print(
|
|
mcmc_areas(posterior,
|
|
pars=betas_array[group,],
|
|
prob = 0.95
|
|
)
|
|
)
|
|
}
|
|
|
|
for (param in 1:11) {
|
|
print(
|
|
mcmc_areas(posterior,
|
|
pars=betas_array[,param],
|
|
prob = 0.95
|
|
)
|
|
)
|
|
}
|