Fixed column issue in enrollment delay

1 year ago · 6fcc8cda22
parent be77d1d38a
commit 6fcc8cda22
5 changed files with 27 additions and 5653 deletions
--- a/r-analysis/EffectsOfEnrollmentDelay.qmd
+++ b/r-analysis/EffectsOfEnrollmentDelay.qmd
@ -238,14 +238,14 @@ cf_categories <- df_counterfact_base$category_id
 ################################# FIT MODEL #########################################
 inherited_cols <- c(
    "elapsed_duration"
-    #,"identical_brands"
-    #,"brand_name_counts"
+    ,"identical_brands"
+    ,"brand_name_counts"
    ,"h_sdi_val"
    ,"hm_sdi_val"
    ,"m_sdi_val"
    ,"lm_sdi_val"
    ,"l_sdi_val"
-    ,"status_NYR"# TODO: may need to remove
+    ,"status_NYR"
    ,"status_EBI"
    ,"status_Rec"
    ,"status_ANR"
@ -404,7 +404,7 @@ parameter_mcmc_areas <- function(
 Plan: select all snapshots that are the first to have closed enrollment (Rec -> ANR)
 ```{r}
 #delay intervention
-intervention_enrollment <- x_cf_base[c(inherited_cols,"brand_name_counts", "identical_brands")]
+intervention_enrollment <- x_cf_base[c(inherited_cols)]
 intervention_enrollment["status_ANR"] <- 0
 intervention_enrollment["status_Rec"] <- 1
 ```
@ -419,12 +419,14 @@ counterfact_delay <- list(
    x = as.matrix(x),
    mu_mean = 0,
    mu_stdev = 0.05,
-    sigma_shape = 4,
-    sigma_rate = 20,
+    sigma_shape = 8,
+    sigma_rate = ,
    Nx = nrow(x_cf_base),
    llx = as.vector(cf_categories),
    counterfact_x_tilde = as.matrix(intervention_enrollment),
-    counterfact_x = as.matrix(x_cf_base)
+    counterfact_x = as.matrix(x_cf_base),
+    status_indexes = c(11,12) #subtract anr from recruiting to get movement from anr to recruiting
+
 )
 ```

@ -432,14 +434,24 @@ counterfact_delay <- list(
 fit <- stan(
    file='Hierarchal_Logistic.stan', 
    data = counterfact_delay,
-    chains = 4,
-    iter = 5000,
+    chains = 8,
+    iter = 12000,
+    warmup = 4000,
    seed = 11021585
    )
 ```


+## Fit Results
+```{r}
+print(check_hmc_diagnostics(fit))
+print(get_bfmi(fit))
+```
+

+```{r}
+print(fit)
+```



@ -564,13 +576,6 @@ sum(df5$snapshot_count)



-## Fit Results
-
-
-```{r}
-################################# ANALYZE #####################################
-print(fit)
-```

 # Parameter Distributions

@ -639,13 +644,13 @@ Note these have 95% outer CI and 80% inner (shaded)


 ```{r}
-#get the generic and uspdc parameters
-print(px[4]$plot + px[7]$plot)
-ggsave(paste0(image_parameters_across_groups,"2+3_generic_and_uspdc.png"))
+##get the generic and uspdc parameters
+#print(px[4]$plot + px[7]$plot)
+#ggsave(paste0(image_parameters_across_groups,"2+3_generic_and_uspdc.png"))

 #get the parameters associated with duration
-px[16]$plot + px[19]$plot
-ggsave(paste0(image_parameters_across_groups,"11+12_statusREC_and_statusANR.png"))
+#px[16]$plot + px[19]$plot
+#ggsave(paste0(image_parameters_across_groups,"11+12_statusREC_and_statusANR.png"))
 ```


@ -718,11 +723,6 @@ ggsave(paste0(image_dist_diff_analysis,"/prior_sigma.png"))



-```{r}
-check_hmc_diagnostics(fit)
-```
-
-



@ -1207,4 +1207,4 @@ for (i in 1:3) {



-  
+  
--- a/r-analysis/EffectsOfEnrollmentDelay_const.qmd
+++ b/r-analysis/EffectsOfEnrollmentDelay_const.qmd
--- a/r-analysis/EffectsOfEnrollmentDelay_rebased.html
+++ b/r-analysis/EffectsOfEnrollmentDelay_rebased.html
--- a/r-analysis/EffectsOfEnrollmentDelay_rebased.qmd
+++ b/r-analysis/EffectsOfEnrollmentDelay_rebased.qmd
--- a/r-analysis/Hierarchal_Logistic_with_status_diff.stan
+++ b/r-analysis/Hierarchal_Logistic_with_status_diff.stan
@ -1,102 +0,0 @@
-//
-// This Stan program defines a simple model, with a
-// vector of values 'y' modeled as normally distributed
-// with mean 'mu' and standard deviation 'sigma'.
-//
-// Learn more about model development with Stan at:
-//
-//    http://mc-stan.org/users/interfaces/rstan.html
-//    https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started
-//
-
-// The input data is a vector 'y' of length 'N'.
-data {
-  int<lower=1> D; // number of features
-  int<lower=1> N; // number of observations
-  int<lower=1> L; // number of categories
-  array[N] int<lower=0, upper=1> y; // vector of dependent variables
-  array[N] int<lower=1, upper=L> ll; // vector of categories
-  array[N] row_vector[D] x; // independent variables
-  real mu_mean; //hyperprior
-  real<lower=0> mu_stdev; //hyperprior
-  real sigma_shape; //hyperprior
-  real sigma_rate; //hyperprior
-  //counterfactuals
-  int<lower=0> Nx;
-  array[Nx] int<lower=1, upper=L> llx;//vec of categories
-  array[Nx] row_vector[D] counterfact_x_tilde; // Posterior Prediction intervention
-  array[Nx] row_vector[D] counterfact_x; // Posterior Prediction intervention
-  //the two statuses to catch relative difference
-  array[2] int status_indexes; //the two status indexes to compare (order is x[1] - x[2])
-}
-parameters {
-  array[D] real mu;
-  array[D] real<lower=0> sigma;
-  array[L] vector[D] beta;
-}
-model {
-  sigma ~ gamma(sigma_shape,sigma_rate); //hyperprior for stdev: shape and inverse scale
-  mu ~ normal(mu_mean, mu_stdev); //hyperprior for mean //TODO: convert to mvnormal
-  for (l in 1:L) {
-    beta[l] ~ normal(mu, sigma);
-  }
-  {
-    vector[N] x_beta_ll;
-    for (n in 1:N) {
-      x_beta_ll[n] = x[n] * beta[ll[n]];
-    }
-    y ~ bernoulli_logit(x_beta_ll);
-  }
-}
-generated quantities {
-    //SETUP PRIOR PREDICTION
-    //preallocate
-    real mu_prior[D];
-    real sigma_prior[D];
-    real p_prior[N]; // what I have priors about
-    real p_predicted[N]; //predicted p_values
-    //intervention
-    real p_predicted_default[Nx]; //predicted p_values
-    real p_predicted_intervention[Nx]; //predicted p_values
-    real predicted_difference[Nx]; //difference in predicted values
-    
-    //collect array of relative status differences between 
-    array[L] real status_diff;
-    //GENERATE RELATIVE DIFFERENCES BETWEEN STATUSES
-    for (l in 1:L) {
-      status_diff[l] = beta[l,status_indexes[1]] - beta[l,status_indexes[2]];
-    }
-
-    //GENERATE PRIOR PREDICTIONS
-    {
-        vector[D] beta_prior[L];//local var
-        //sample parameters
-        for (d in 1:D) {
-            mu_prior[d] = normal_rng(mu_mean,mu_stdev);
-            sigma_prior[d] = gamma_rng(sigma_shape,sigma_rate);
-        }
-        for (l in 1:L) {
-            for (d in 1:D) {
-                beta_prior[l,d] = normal_rng(mu_prior[d],sigma_prior[d]);
-            }
-        }
-        //generate probabilities
-        vector[D] b_prior[N];//local var
-        for (n in 1:N){
-            b_prior[n] = beta_prior[ll[n]];
-            p_prior[n] = inv_logit(  x[n] * b_prior[n] );
-        }
-    }
-    //GENERATE POSTERIOR PREDICTIONS
-    for (n in 1:N) {
-        p_predicted[n] = inv_logit( x[n] * beta[ll[n]] );
-    }
-    //GENERATE POSTERIOR  DISTRIBUTION OF DIFFERENCES
-    for (n in 1:Nx) {
-        p_predicted_default[n] = inv_logit(  counterfact_x[n] * beta[llx[n]] );
-        p_predicted_intervention[n] = inv_logit(  counterfact_x_tilde[n] * beta[llx[n]] ); //intervention
-
-        //intervention - base case
-        predicted_difference[n] = p_predicted_intervention[n] - p_predicted_default[n];
-    }
-}