Fixed column issue in enrollment delay

main^2
Will King 1 year ago
parent be77d1d38a
commit 6fcc8cda22

@ -238,14 +238,14 @@ cf_categories <- df_counterfact_base$category_id
################################# FIT MODEL #########################################
inherited_cols <- c(
"elapsed_duration"
#,"identical_brands"
#,"brand_name_counts"
,"identical_brands"
,"brand_name_counts"
,"h_sdi_val"
,"hm_sdi_val"
,"m_sdi_val"
,"lm_sdi_val"
,"l_sdi_val"
,"status_NYR"# TODO: may need to remove
,"status_NYR"
,"status_EBI"
,"status_Rec"
,"status_ANR"
@ -404,7 +404,7 @@ parameter_mcmc_areas <- function(
Plan: select all snapshots that are the first to have closed enrollment (Rec -> ANR)
```{r}
#delay intervention
intervention_enrollment <- x_cf_base[c(inherited_cols,"brand_name_counts", "identical_brands")]
intervention_enrollment <- x_cf_base[c(inherited_cols)]
intervention_enrollment["status_ANR"] <- 0
intervention_enrollment["status_Rec"] <- 1
```
@ -419,12 +419,14 @@ counterfact_delay <- list(
x = as.matrix(x),
mu_mean = 0,
mu_stdev = 0.05,
sigma_shape = 4,
sigma_rate = 20,
sigma_shape = 8,
sigma_rate = ,
Nx = nrow(x_cf_base),
llx = as.vector(cf_categories),
counterfact_x_tilde = as.matrix(intervention_enrollment),
counterfact_x = as.matrix(x_cf_base)
counterfact_x = as.matrix(x_cf_base),
status_indexes = c(11,12) #subtract anr from recruiting to get movement from anr to recruiting
)
```
@ -432,14 +434,24 @@ counterfact_delay <- list(
fit <- stan(
file='Hierarchal_Logistic.stan',
data = counterfact_delay,
chains = 4,
iter = 5000,
chains = 8,
iter = 12000,
warmup = 4000,
seed = 11021585
)
```
## Fit Results
```{r}
print(check_hmc_diagnostics(fit))
print(get_bfmi(fit))
```
```{r}
print(fit)
```
@ -564,13 +576,6 @@ sum(df5$snapshot_count)
## Fit Results
```{r}
################################# ANALYZE #####################################
print(fit)
```
# Parameter Distributions
@ -639,13 +644,13 @@ Note these have 95% outer CI and 80% inner (shaded)
```{r}
#get the generic and uspdc parameters
print(px[4]$plot + px[7]$plot)
ggsave(paste0(image_parameters_across_groups,"2+3_generic_and_uspdc.png"))
##get the generic and uspdc parameters
#print(px[4]$plot + px[7]$plot)
#ggsave(paste0(image_parameters_across_groups,"2+3_generic_and_uspdc.png"))
#get the parameters associated with duration
px[16]$plot + px[19]$plot
ggsave(paste0(image_parameters_across_groups,"11+12_statusREC_and_statusANR.png"))
#px[16]$plot + px[19]$plot
#ggsave(paste0(image_parameters_across_groups,"11+12_statusREC_and_statusANR.png"))
```
@ -718,11 +723,6 @@ ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"))
```{r}
check_hmc_diagnostics(fit)
```
@ -1207,4 +1207,4 @@ for (i in 1:3) {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,102 +0,0 @@
//
// This Stan program defines a simple model, with a
// vector of values 'y' modeled as normally distributed
// with mean 'mu' and standard deviation 'sigma'.
//
// Learn more about model development with Stan at:
//
// http://mc-stan.org/users/interfaces/rstan.html
// https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
//
// The input data is a vector 'y' of length 'N'.
data {
int<lower=1> D; // number of features
int<lower=1> N; // number of observations
int<lower=1> L; // number of categories
array[N] int<lower=0, upper=1> y; // vector of dependent variables
array[N] int<lower=1, upper=L> ll; // vector of categories
array[N] row_vector[D] x; // independent variables
real mu_mean; //hyperprior
real<lower=0> mu_stdev; //hyperprior
real sigma_shape; //hyperprior
real sigma_rate; //hyperprior
//counterfactuals
int<lower=0> Nx;
array[Nx] int<lower=1, upper=L> llx;//vec of categories
array[Nx] row_vector[D] counterfact_x_tilde; // Posterior Prediction intervention
array[Nx] row_vector[D] counterfact_x; // Posterior Prediction intervention
//the two statuses to catch relative difference
array[2] int status_indexes; //the two status indexes to compare (order is x[1] - x[2])
}
parameters {
array[D] real mu;
array[D] real<lower=0> sigma;
array[L] vector[D] beta;
}
model {
sigma ~ gamma(sigma_shape,sigma_rate); //hyperprior for stdev: shape and inverse scale
mu ~ normal(mu_mean, mu_stdev); //hyperprior for mean //TODO: convert to mvnormal
for (l in 1:L) {
beta[l] ~ normal(mu, sigma);
}
{
vector[N] x_beta_ll;
for (n in 1:N) {
x_beta_ll[n] = x[n] * beta[ll[n]];
}
y ~ bernoulli_logit(x_beta_ll);
}
}
generated quantities {
//SETUP PRIOR PREDICTION
//preallocate
real mu_prior[D];
real sigma_prior[D];
real p_prior[N]; // what I have priors about
real p_predicted[N]; //predicted p_values
//intervention
real p_predicted_default[Nx]; //predicted p_values
real p_predicted_intervention[Nx]; //predicted p_values
real predicted_difference[Nx]; //difference in predicted values
//collect array of relative status differences between
array[L] real status_diff;
//GENERATE RELATIVE DIFFERENCES BETWEEN STATUSES
for (l in 1:L) {
status_diff[l] = beta[l,status_indexes[1]] - beta[l,status_indexes[2]];
}
//GENERATE PRIOR PREDICTIONS
{
vector[D] beta_prior[L];//local var
//sample parameters
for (d in 1:D) {
mu_prior[d] = normal_rng(mu_mean,mu_stdev);
sigma_prior[d] = gamma_rng(sigma_shape,sigma_rate);
}
for (l in 1:L) {
for (d in 1:D) {
beta_prior[l,d] = normal_rng(mu_prior[d],sigma_prior[d]);
}
}
//generate probabilities
vector[D] b_prior[N];//local var
for (n in 1:N){
b_prior[n] = beta_prior[ll[n]];
p_prior[n] = inv_logit( x[n] * b_prior[n] );
}
}
//GENERATE POSTERIOR PREDICTIONS
for (n in 1:N) {
p_predicted[n] = inv_logit( x[n] * beta[ll[n]] );
}
//GENERATE POSTERIOR DISTRIBUTION OF DIFFERENCES
for (n in 1:Nx) {
p_predicted_default[n] = inv_logit( counterfact_x[n] * beta[llx[n]] );
p_predicted_intervention[n] = inv_logit( counterfact_x_tilde[n] * beta[llx[n]] ); //intervention
//intervention - base case
predicted_difference[n] = p_predicted_intervention[n] - p_predicted_default[n];
}
}
Loading…
Cancel
Save