Merge branch 'main' of ssh://git.youainti.com:3022/youainti/ClinicalTrialsEstimation
commit
f2cfb776a0
@ -0,0 +1,150 @@
|
||||
library(bayesplot)
|
||||
available_mcmc(pattern = "_nuts_")
|
||||
library(ggplot2)
|
||||
|
||||
library(rstan)
|
||||
#Resources: https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
|
||||
|
||||
#save unchanged models instead of recompiling
|
||||
rstan_options(auto_write = TRUE)
|
||||
#allow for multithreaded sampling
|
||||
options(mc.cores = parallel::detectCores())
|
||||
|
||||
#test installation, shouldn't get any errors
|
||||
#example(stan_model, package = "rstan", run.dontrun = TRUE)
|
||||
|
||||
################ Pull data from database ######################
|
||||
library(RPostgreSQL)
|
||||
|
||||
driver <- dbDriver("PostgreSQL")
|
||||
|
||||
con <- dbConnect(
|
||||
driver,
|
||||
user='root',
|
||||
password='root',
|
||||
dbname='aact_db',
|
||||
host='will-office'
|
||||
)
|
||||
|
||||
query <- dbSendQuery(
|
||||
con,
|
||||
"select * from formatted_data_with_planned_enrollment;"
|
||||
)
|
||||
df <- fetch(query)
|
||||
df <- na.omit(df)
|
||||
|
||||
query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic ;")
|
||||
n_categories <- fetch(query2)
|
||||
|
||||
################ Format Data ###########################
|
||||
|
||||
|
||||
categories <- df["category_id"]
|
||||
|
||||
x <- df["elapsed_duration"]
|
||||
x["log_n_brands"] <- log(df$n_brands + 1)
|
||||
x["h_sdi_val"] <- log(df$h_sdi_val + 1)
|
||||
x["hm_sdi_val"] <- log(df$hm_sdi_val + 1)
|
||||
x["m_sdi_val"] <- log(df$m_sdi_val + 1)
|
||||
x["lm_sdi_val"] <- log(df$lm_sdi_val + 1)
|
||||
x["l_sdi_val"] <- log(df$l_sdi_val + 1)
|
||||
x["enrollment"] <- df["current_enrollment"] / df["planned_enrollment"]
|
||||
#square terms
|
||||
#x["enrollment^2"] <- x["enrollment"]^2
|
||||
#x["elapsed_duration^2"] <- x["elapsed_duration"]^2
|
||||
#x["n_brands^2"] <- x["n_brands"]^2
|
||||
#break out
|
||||
x["status_NYR"] <- ifelse(df["current_status"]=="Not yet recruiting",1,0)
|
||||
x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0)
|
||||
x["status_ANR"] <- ifelse(df["current_status"]=="Active, not recruiting",1,0)
|
||||
x["status_EBI"] <- ifelse(df["current_status"]=="Enrolling by invitation",1,0)
|
||||
|
||||
y <- ifelse(df["final_status"]=="Terminated",1,0)
|
||||
|
||||
################################# DATA EXPLORATION ############################
|
||||
|
||||
#Plot terminated vs completed
|
||||
#Plot duration for terminated vs completed
|
||||
#Plot different times of
|
||||
|
||||
################################# FIT #########################################
|
||||
#setup data (named list)
|
||||
trials_data <- list(
|
||||
D = ncol(x),#
|
||||
N = nrow(x),
|
||||
# L = n_categories$count,
|
||||
y = as.vector(y),
|
||||
# ll = as.vector(categories$category_id),
|
||||
x = as.matrix(x)
|
||||
)
|
||||
|
||||
#fit <- stan(file='Logistic.stan', data = trials_data) #attempt at simple model
|
||||
fit <- stan(file='Hierarchal_Logistic.stan', data = trials_data)
|
||||
|
||||
################################# ANALYZE #####################################
|
||||
print(fit)
|
||||
|
||||
color_scheme_set("darkgray")
|
||||
#trace plots
|
||||
plot(fit, pars=c("mu"), plotfun="trace")
|
||||
plot(fit, pars=c("sigma"), plotfun="trace")
|
||||
|
||||
#other diagnostics
|
||||
logpost <- log_posterior(fit)
|
||||
nuts_prmts <- nuts_params(fit)
|
||||
posterior <- as.array(fit)
|
||||
|
||||
|
||||
mcmc_parcoord(posterior,pars=c(
|
||||
"mu[1]",
|
||||
"mu[2]",
|
||||
"mu[3]",
|
||||
"mu[4]",
|
||||
"mu[5]",
|
||||
"mu[6]",
|
||||
"mu[7]",
|
||||
"mu[8]",
|
||||
"mu[9]",
|
||||
"mu[10]",
|
||||
"mu[11]",
|
||||
"mu[12]"
|
||||
),
|
||||
np=nuts_prmts
|
||||
)
|
||||
|
||||
mcmc_pairs(
|
||||
posterior,
|
||||
np = nuts_prmts,
|
||||
pars=c(
|
||||
"mu[1]",
|
||||
"mu[2]",
|
||||
"sigma[1]",
|
||||
"sigma[2]"
|
||||
),
|
||||
off_diag_args = list(size = 0.75)
|
||||
)
|
||||
|
||||
mcmc_parcoord(posterior,pars=c(
|
||||
"sigma[1]",
|
||||
"sigma[2]",
|
||||
"sigma[3]",
|
||||
"sigma[4]",
|
||||
"sigma[5]",
|
||||
"sigma[6]",
|
||||
"sigma[7]",
|
||||
"sigma[8]",
|
||||
"sigma[9]",
|
||||
"sigma[10]",
|
||||
"sigma[11]",
|
||||
"sigma[12]"
|
||||
),
|
||||
np=nuts_prmts
|
||||
)
|
||||
# check model estimates
|
||||
#mu
|
||||
plot(fit, pars=c("mu"), show_density =TRUE, ci_level=0.8)
|
||||
plot(fit, pars=c("mu"), plotfun = "hist")
|
||||
|
||||
#sigma
|
||||
plot(fit, pars=c("sigma"), show_density =TRUE, ci_level=0.8)
|
||||
plot(fit, pars=c("sigma"), plotfun = "hist")
|
||||
@ -0,0 +1,322 @@
|
||||
---
|
||||
title: "The Effects of market conditions on recruitment and completion of clinical trials"
|
||||
author: "Will King"
|
||||
format: html
|
||||
editor: source
|
||||
---
|
||||
|
||||
```{r}
|
||||
library(bayesplot)
|
||||
available_mcmc(pattern = "_nuts_")
|
||||
library(ggplot2)
|
||||
|
||||
library(rstan)
|
||||
#Resources: https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
|
||||
|
||||
#save unchanged models instead of recompiling
|
||||
rstan_options(auto_write = TRUE)
|
||||
#allow for multithreaded sampling
|
||||
options(mc.cores = parallel::detectCores())
|
||||
|
||||
#test installation, shouldn't get any errors
|
||||
#example(stan_model, package = "rstan", run.dontrun = TRUE)
|
||||
```
|
||||
|
||||
```{r}
|
||||
################ Pull data from database ######################
|
||||
library(RPostgreSQL)
|
||||
|
||||
driver <- dbDriver("PostgreSQL")
|
||||
|
||||
get_data <- function(driver) {
|
||||
|
||||
con <- dbConnect(
|
||||
driver,
|
||||
user='root',
|
||||
password='root',
|
||||
dbname='aact_db',
|
||||
host='will-office'
|
||||
)
|
||||
on.exit(dbDisconnect(con))
|
||||
|
||||
query <- dbSendQuery(
|
||||
con,
|
||||
"select * from formatted_data_with_planned_enrollment;"
|
||||
)
|
||||
df <- fetch(query, n = -1)
|
||||
df <- na.omit(df)
|
||||
|
||||
query2 <-dbSendQuery(con,"select count(*) from \"DiseaseBurden\".icd10_categories ic where \"level\"=1;")
|
||||
n_categories <- fetch(query2, n = -1)
|
||||
|
||||
return(list(data=df,ncat=n_categories))
|
||||
}
|
||||
|
||||
d <- get_data(driver)
|
||||
df <- d$data
|
||||
n_categories <- d$ncat
|
||||
|
||||
|
||||
|
||||
################ Format Data ###########################
|
||||
|
||||
|
||||
categories <- df["category_id"]
|
||||
|
||||
x <- df["elapsed_duration"]
|
||||
x["log_n_brands"] <- asinh(df$n_brands) #todo - convert to arcsinh() transform
|
||||
x["h_sdi_val"] <- asinh(df$h_sdi_val)
|
||||
x["hm_sdi_val"] <- asinh(df$hm_sdi_val)
|
||||
x["m_sdi_val"] <- asinh(df$m_sdi_val)
|
||||
x["lm_sdi_val"] <- asinh(df$lm_sdi_val)
|
||||
x["l_sdi_val"] <- asinh(df$l_sdi_val)
|
||||
#x["enrollment"] <- df["current_enrollment"] / df["planned_enrollment"]
|
||||
#square terms
|
||||
#x["enrollment^2"] <- x["enrollment"]^2
|
||||
#x["elapsed_duration^2"] <- x["elapsed_duration"]^2
|
||||
#x["n_brands^2"] <- x["n_brands"]^2
|
||||
#break out
|
||||
#x["status_NYR"] <- ifelse(df["current_status"]=="Not yet recruiting",1,0)
|
||||
#x["status_Rec"] <- ifelse(df["current_status"]=="Recruiting",1,0)
|
||||
#x["status_ANR"] <- ifelse(df["current_status"]=="Active, not recruiting",1,0)
|
||||
#x["status_EBI"] <- ifelse(df["current_status"]=="Enrolling by invitation",1,0)
|
||||
|
||||
y <- ifelse(df["final_status"]=="Terminated",1,0)
|
||||
```
|
||||
|
||||
```{r}
|
||||
################################# DATA EXPLORATION ############################
|
||||
driver <- dbDriver("PostgreSQL")
|
||||
|
||||
con <- dbConnect(
|
||||
driver,
|
||||
user='root',
|
||||
password='root',
|
||||
dbname='aact_db',
|
||||
host='will-office'
|
||||
)
|
||||
#Plot histogram of count of snapshots
|
||||
df3 <- dbGetQuery(
|
||||
con,
|
||||
"select nct_id,final_status,count(*) from formatted_data_with_planned_enrollment fdwpe
|
||||
group by nct_id,final_status ;"
|
||||
)
|
||||
#df3 <- fetch(query3, n = -1)
|
||||
|
||||
ggplot(data=df3, aes(x=count, fill=final_status)) +
|
||||
geom_histogram(binwidth=1) +
|
||||
ggtitle("Histogram of snapshots per trial (matched trials)") +
|
||||
xlab("Snapshots per trial")
|
||||
|
||||
#Plot duration for terminated vs completed
|
||||
df4 <- dbGetQuery(
|
||||
con,
|
||||
"
|
||||
select
|
||||
nct_id,
|
||||
start_date ,
|
||||
primary_completion_date,
|
||||
overall_status ,
|
||||
primary_completion_date - start_date as duration
|
||||
from ctgov.studies s
|
||||
where nct_id in (select distinct nct_id from http.download_status ds)
|
||||
;"
|
||||
)
|
||||
#df4 <- fetch(query4, n = -1)
|
||||
|
||||
ggplot(data=df4, aes(x=duration,fill=overall_status)) +
|
||||
geom_histogram()+
|
||||
ggtitle("Histogram of trial durations") +
|
||||
xlab("duration")+
|
||||
facet_wrap(~overall_status)
|
||||
|
||||
df5 <- dbGetQuery(
|
||||
con,
|
||||
"
|
||||
with cte1 as (
|
||||
select
|
||||
nct_id,
|
||||
start_date ,
|
||||
primary_completion_date,
|
||||
overall_status ,
|
||||
primary_completion_date - start_date as duration
|
||||
from ctgov.studies s
|
||||
where nct_id in (select distinct nct_id from http.download_status ds)
|
||||
), cte2 as (
|
||||
select nct_id,count(*) as snapshot_count from formatted_data_with_planned_enrollment fdwpe
|
||||
group by nct_id
|
||||
)
|
||||
select a.nct_id, a.overall_status, a.duration,b.snapshot_count
|
||||
from cte1 as a
|
||||
join cte2 as b
|
||||
on a.nct_id=b.nct_id
|
||||
;"
|
||||
)
|
||||
#df5 <- fetch(query5, n = -1)
|
||||
|
||||
ggplot(data=df5, aes(x=duration,y=snapshot_count,color=overall_status)) +
|
||||
geom_point() +
|
||||
ggtitle("duration, status, and snapshot_count") +
|
||||
xlab("duration") +
|
||||
ylab("snapshot count")
|
||||
|
||||
|
||||
dbDisconnect(con)
|
||||
```
|
||||
|
||||
```{r}
|
||||
################################# FIT #########################################
|
||||
|
||||
#setup data (named list)
|
||||
trials_data <- list(
|
||||
D = ncol(x),#
|
||||
N = nrow(x),
|
||||
L = n_categories$count,
|
||||
y = as.vector(y),
|
||||
ll = as.vector(categories$category_id),
|
||||
x = as.matrix(x)
|
||||
)
|
||||
|
||||
#fit <- stan(file='Logistic.stan', data = trials_data) #attempt at simple model
|
||||
fit <- stan(
|
||||
file='Hierarchal_Logistic.stan',
|
||||
data = trials_data,
|
||||
chains = 6,
|
||||
iter = 5000,
|
||||
seed = 11021585
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
```{r}
|
||||
################################# ANALYZE #####################################
|
||||
print(fit)
|
||||
```
|
||||
|
||||
```{r}
|
||||
color_scheme_set("darkgray")
|
||||
#trace plots
|
||||
plot(fit, pars=c("mu"), plotfun="trace")
|
||||
color_scheme_set("viridis")
|
||||
mcmc_rank_overlay(
|
||||
fit,
|
||||
pars=c(
|
||||
"mu[1]",
|
||||
"mu[2]",
|
||||
"mu[3]",
|
||||
"mu[4]",
|
||||
"mu[5]",
|
||||
"mu[6]",
|
||||
"mu[7]"
|
||||
),
|
||||
n_bins=100
|
||||
)+ legend_move("top")
|
||||
```
|
||||
|
||||
```{r}
|
||||
color_scheme_set("viridis")
|
||||
plot(fit, pars=c("sigma"), plotfun="trace")
|
||||
color_scheme_set("viridis")
|
||||
mcmc_rank_overlay(
|
||||
fit,
|
||||
pars=c(
|
||||
"sigma[1]",
|
||||
"sigma[2]",
|
||||
"sigma[3]",
|
||||
"sigma[4]",
|
||||
"sigma[5]",
|
||||
"sigma[6]",
|
||||
"sigma[7]"
|
||||
),
|
||||
n_bins=100
|
||||
)+ legend_move("top")
|
||||
```
|
||||
|
||||
```{r}
|
||||
#other diagnostics
|
||||
logpost <- log_posterior(fit)
|
||||
nuts_prmts <- nuts_params(fit)
|
||||
posterior <- as.array(fit)
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
color_scheme_set("darkgray")
|
||||
mcmc_parcoord(posterior,pars=c(
|
||||
"mu[1]",
|
||||
"mu[2]",
|
||||
"mu[3]",
|
||||
"mu[4]",
|
||||
"mu[5]",
|
||||
"mu[6]",
|
||||
"mu[7]"
|
||||
),
|
||||
np=nuts_prmts
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
mcmc_pairs(
|
||||
posterior,
|
||||
np = nuts_prmts,
|
||||
pars=c(
|
||||
"mu[1]",
|
||||
"mu[2]",
|
||||
"mu[3]",
|
||||
"mu[4]",
|
||||
"mu[5]",
|
||||
"mu[6]",
|
||||
"mu[7]"
|
||||
),
|
||||
off_diag_args = list(size = 0.75)
|
||||
)
|
||||
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
mcmc_parcoord(posterior,pars=c(
|
||||
"sigma[1]",
|
||||
"sigma[2]",
|
||||
"sigma[3]",
|
||||
"sigma[4]",
|
||||
"sigma[5]",
|
||||
"sigma[6]",
|
||||
"sigma[7]"
|
||||
),
|
||||
np=nuts_prmts
|
||||
)
|
||||
```
|
||||
|
||||
```{r}
|
||||
mcmc_pairs(
|
||||
posterior,
|
||||
np = nuts_prmts,
|
||||
pars=c(
|
||||
"sigma[1]",
|
||||
"sigma[2]",
|
||||
"sigma[3]",
|
||||
"sigma[4]",
|
||||
"sigma[5]",
|
||||
"sigma[6]",
|
||||
"sigma[7]"
|
||||
),
|
||||
off_diag_args = list(size = 0.75)
|
||||
)
|
||||
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
# check model estimates
|
||||
#mu
|
||||
plot(fit, pars=c("mu"), show_density =TRUE, ci_level=0.8)
|
||||
plot(fit, pars=c("mu"), plotfun = "hist", bins=50)
|
||||
```
|
||||
|
||||
```{r}
|
||||
#sigma
|
||||
plot(fit, pars=c("sigma"), show_density =TRUE, ci_level=0.8)
|
||||
plot(fit, pars=c("sigma"), plotfun = "hist",bins=50)
|
||||
|
||||
```
|
||||
Binary file not shown.
@ -0,0 +1,40 @@
|
||||
//
|
||||
// This Stan program defines a simple model, with a
|
||||
// vector of values 'y' modeled as normally distributed
|
||||
// with mean 'mu' and standard deviation 'sigma'.
|
||||
//
|
||||
// Learn more about model development with Stan at:
|
||||
//
|
||||
// http://mc-stan.org/users/interfaces/rstan.html
|
||||
// https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
|
||||
//
|
||||
|
||||
// The input data is a vector 'y' of length 'N'.
|
||||
data {
|
||||
int<lower=1> D;
|
||||
int<lower=1> N;
|
||||
int<lower=1> L;
|
||||
int<lower=0, upper=1> y[N];
|
||||
int<lower=1, upper=L> ll[N];
|
||||
row_vector[D] x[N];
|
||||
}
|
||||
parameters {
|
||||
real mu[D];
|
||||
real<lower=0> sigma[D];
|
||||
vector[D] beta[L];
|
||||
}
|
||||
model {
|
||||
sigma ~ gamma(2,1); //shape and inverse scale
|
||||
mu ~ normal(0, 1); //convert to mvnormal
|
||||
for (l in 1:L) {
|
||||
beta[l] ~ normal(mu, sigma);
|
||||
}
|
||||
{
|
||||
vector[N] x_beta_ll;
|
||||
for (n in 1:N) {
|
||||
x_beta_ll[n] = x[n] * beta[ll[n]];
|
||||
}
|
||||
y ~ bernoulli_logit(x_beta_ll);
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
//
|
||||
// This Stan program defines a simple model, with a
|
||||
// vector of values 'y' modeled as normally distributed
|
||||
// with mean 'mu' and standard deviation 'sigma'.
|
||||
//
|
||||
// Learn more about model development with Stan at:
|
||||
//
|
||||
// http://mc-stan.org/users/interfaces/rstan.html
|
||||
// https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
|
||||
//
|
||||
|
||||
// The input data is a vector 'y' of length 'N'.
|
||||
data {
|
||||
int<lower=1> D;
|
||||
int<lower=1> N;
|
||||
int<lower=0, upper=1> y[N];
|
||||
row_vector[D] x[N];
|
||||
}
|
||||
parameters {
|
||||
real mu[D];
|
||||
real<lower=0> sigma[D];
|
||||
}
|
||||
model {
|
||||
sigma ~ gamma(2,0.1);
|
||||
mu ~ normal(0, sigma); //convert to mvnormal
|
||||
|
||||
for (n in 1:N) {
|
||||
y[n] ~ bernoulli_logit(x[n] * mu);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,32 @@
|
||||
//
|
||||
// This Stan program defines a simple model, with a
|
||||
// vector of values 'y' modeled as normally distributed
|
||||
// with mean 'mu' and standard deviation 'sigma'.
|
||||
//
|
||||
// Learn more about model development with Stan at:
|
||||
//
|
||||
// http://mc-stan.org/users/interfaces/rstan.html
|
||||
// https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
|
||||
//
|
||||
|
||||
// The input data is a vector 'y' of length 'N'.
|
||||
data {
|
||||
int<lower=0> N;
|
||||
int<lower=0> k;
|
||||
matrix[N,k] X;
|
||||
vector[N] int<lower=0, upper=1> y;
|
||||
}
|
||||
|
||||
// The parameters accepted by the model. Our model
|
||||
// accepts two parameters 'mu' and 'sigma'.
|
||||
parameters {
|
||||
vector[k] beta;
|
||||
}
|
||||
|
||||
// The model to be estimated. We model the output
|
||||
// 'y' to be normally distributed with mean 'mu'
|
||||
// and standard deviation 'sigma'.
|
||||
model {
|
||||
y ~ bernoulli_logit( X * beta);
|
||||
}
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
Version: 1.0
|
||||
|
||||
RestoreWorkspace: Yes
|
||||
SaveWorkspace: Ask
|
||||
AlwaysSaveHistory: Yes
|
||||
|
||||
EnableCodeIndexing: Yes
|
||||
UseSpacesForTab: Yes
|
||||
NumSpacesForTab: 4
|
||||
Encoding: UTF-8
|
||||
|
||||
RnwWeave: Sweave
|
||||
LaTeX: pdfLaTeX
|
||||
|
||||
AutoAppendNewline: Yes
|
||||
StripTrailingWhitespace: Yes
|
||||
LineEndingConversion: Native
|
||||
Loading…
Reference in New Issue