Update Traceable Model

Example of copying and revising Stan models using bbr.bayes.

bbr
model management

Introduction

This page demonstrates the following bbr functionality:

  • Iterative model development
  • Annotation of models with tags, notes, etc.

Tools used


MetrumRG packages

bbr Manage, track, and report modeling activities, through simple R objects, with a user-friendly interface between R and NONMEM®.

CRAN packages

dplyr A grammar of data manipulation.

Set up


Load required packages and set file paths to your model and figure directories.

library(bbr)
library(bbr.bayes)
library(dplyr)
library(here)
library(tidyverse)
library(yspec)
library(tidyvpc)
library(bayesplot)

MODEL_DIR <- here("model", "stan","expo")
FIGURE_DIR <- here("deliv", "figure","expo")
. # A tibble: 6 × 8
.      ID weight   age   sex  time  dose  cobs fxa.inh
.   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
. 1     1     59    30     1 0      1.25  0       2.78
. 2     1     59    30     1 0.083  1.25  6.19   15.2 
. 3     1     59    30     1 0.167  1.25 13.2     9.16
. 4     1     59    30     1 0.25   1.25 15.3    13.7 
. 5     1     59    30     1 0.5    1.25 22.3    21.5 
. 6     1     59    30     1 0.75   1.25 21.4    37.2

Model creation and submission

mod0 <- read_model(file.path(MODEL_DIR, 'mod0'))

Revised model - correlated random effects

The marginal PPCs describing the relationships between response, exposure, and time look adequate. However, the model does not capture the within-subject response to treatment. There is a consistent and moderately strong, negative correlation, while the base model assumes that there are no associations. To address this, we’ll add correlation between subject-level effects on E0 and EC50

We’ll use the copy_model function to generate a new model based on the base model.

Use caution when using the .overwrite= TRUE argument in new_model and copy_model_from. It will overwrite existing files and output directory, if they exist.

mod1 <- copy_model_from(mod0, 
                        .new_model = 'mod1',
                        .overwrite = TRUE)

This will copy the key files from mod0 to mod1.

list.files(path=file.path(MODEL_DIR,'mod1'))
. [1] "mod1-init.R"        "mod1-output"        "mod1-stanargs.R"   
. [4] "mod1-standata.json" "mod1-standata.R"    "mod1.stan"

We’ll revise the Stan model code to include the correlated random effects. We’ll open the Stan model file and edit it to have the code shown below.

open_stanmod_file(mod1)

data {
  int<lower=0> N ;
  int<lower=0> n_id ;
  vector[N] DV;
  vector[N] Conc;
  array[N] int ID;
}

parameters {
  real<lower=0, upper=100> emax;
  real tv_log_ec50;
  real tv_e0;
  real log_gamma;
  
  array[n_id] vector[2] etas; // etas[i,1] = log_ec50; etas[i,2] = e0;

  real<lower=0> sigma;
  vector<lower=0>[2] omega_sds; // SDs for IIV
  corr_matrix[2] Omega_corr;
}

transformed parameters {
  real tv_ec50 = exp(tv_log_ec50);
  real gamma = exp(log_gamma);
  vector[n_id] ec50;
  vector[n_id] e0;
  vector[N] mu;
  
  
  for (i in 1:n_id) {
    ec50[i] = exp(tv_log_ec50 + etas[i,1]);
    e0[i] = tv_e0 + etas[i,2];
  }
  
  for (i in 1:N) {
    mu[i] = e0[ID[i]] + (emax-e0[ID[i]])*pow(Conc[i]/100,gamma) / (pow(ec50[ID[i]]/100,gamma) + pow(Conc[i]/100,gamma));
  }
}

model {
  // Priors
  tv_e0 ~ normal(0, 10);
  tv_log_ec50 ~ normal(4,2);
  emax ~ uniform(0,100);
  log_gamma ~ normal(0,1);
  sigma ~ normal(0,10);
  
  Omega_corr ~ lkj_corr(2);
  omega_sds ~ cauchy(0,2.5);

  etas ~ multi_normal([0,0],quad_form_diag(Omega_corr, omega_sds));

// Likelihhood
  
  DV ~ normal(mu, sigma);

}


generated quantities {

  vector[N] simdv_obs;
  vector[N] simdv_new;

  // Simulated observations for observed subjects
  for (i in 1:N) {
    simdv_obs[i] = normal_rng(mu[i],sigma);
  }
  
  // Simulate observations for new subject
  {
    array[n_id] vector[2] etas_new; // etas[i,1] = log_ec50; etas[i,2] = e0;
    vector[n_id] ec50_new;
    vector[n_id] e0_new;
    vector[N] mu_new;
    
    for (i in 1:n_id) {
      etas_new[i,] = multi_normal_rng([0.0,0.0], quad_form_diag(Omega_corr, omega_sds));
        ec50_new[i] = exp(tv_log_ec50 + etas_new[i,1]);
        e0_new[i] = tv_e0 + etas_new[i,2];
    }
    
    for (i in 1:N) {
      mu_new[i] = e0_new[ID[i]] + (emax-e0_new[ID[i]])*pow(Conc[i]/100,gamma) / (pow(ec50_new[ID[i]]/100,gamma) + pow(Conc[i]/100,gamma));
      simdv_new[i] = normal_rng(mu_new[i] , sigma);
    }
  }
  
}

For this new model structure, we’ll need slightly different initial values:

open_staninit_file(mod1)
# Create Stan initial values
#
# This function must return something that can be passed to the `init` argument
#   of `cmdstanr::sample()`. There are several options; see `?cmdstanr::sample`
#   for details.
#
# `.data` represents the list returned from `make_standata()` for this model.
#   This is provided in case any of your initial values are dependent on some
#   aspect of the data (e.g. the number of rows).
#
# `.args` represents the list of attached arguments that will be passed through to
#   cmdstanr::sample(). This is provided in case any of your initial values are
#   dependent on any of these arguments (e.g. the number of chains).
#
# Note: you _don't_ need to pass anything to either of these arguments, you only
#   use it within the function. `bbr` will pass in the correct objects when it calls
#   `make_init()` under the hood.
#
make_init <- function(.data, .args) {
  # returning NULL causes cmdstanr::sample() to use the default initial values
  return(
    function() {
      list(tv_e0 = rnorm(1),
           emax = runif(1,90,100),
           tv_log_ec50 = rnorm(1,log(100),1),
           log_gamma = rnorm(1,0,.1),
           etas = matrix(rnorm(2*.data$n_id, 0, 1), ncol=2),
           omega_sds = abs(rnorm(2)),
           Omega_corr = diag(2),
           sigma = abs(rnorm(1,10,2)))
    }
  )
}

However, the data file doesn’t change, so we don’t need to make any modifications to the mod1-standata.R file.

Let’s add a description and tags and then submit the model.

mod1 <- mod1 %>% 
  replace_description('Add correlation between subject-specific e0 and ec50')
mod1 <- mod1 %>% 
  add_tags(.tags = c(TAGS$eta_e0,TAGS$eta_ec50, TAGS$eta_cor))
fit1 <- submit_model(mod1)
fit1$cmdstan_diagnose()
. Processing csv files: /data/expo2-stan/model/stan/expo/mod1/mod1-output/mod1-202310241803-1-2a950e.csv, /data/expo2-stan/model/stan/expo/mod1/mod1-output/mod1-202310241803-2-2a950e.csv, /data/expo2-stan/model/stan/expo/mod1/mod1-output/mod1-202310241803-3-2a950e.csv, /data/expo2-stan/model/stan/expo/mod1/mod1-output/mod1-202310241803-4-2a950e.csv
. 
. Checking sampler transitions treedepth.
. Treedepth satisfactory for all transitions.
. 
. Checking sampler transitions for divergences.
. No divergent transitions found.
. 
. Checking E-BFMI - sampler transitions HMC potential energy.
. The E-BFMI, 0.05, is below the nominal threshold of 0.30 which suggests that HMC may have trouble exploring the target distribution.
. If possible, try to reparameterize the model.
. 
. Effective sample size satisfactory.
. 
. The following parameters had split R-hat greater than 1.05:
.   omega_sds[2]
. Such high values indicate incomplete mixing and biased estimation.
. You should consider regularizating your model with additional prior information or a more effective parameterization.
. 
. Processing complete.

We see some warnings about low E-BFMI and a parameter with a high split R-hat. These indicate that we probably want to consider an alternative parameterization.

Before we do that, let’s look at the parameter table and diagnostic plots.

mod1_params <- c('tv_e0','tv_log_ec50','emax','gamma',
                 'omega_sds','Omega_corr[1,2]')
fit1$summary(variables=mod1_params)
. # A tibble: 7 × 10
.   variable       mean median     sd    mad     q5    q95  rhat ess_bulk ess_tail
.   <chr>         <num>  <num>  <num>  <num>  <num>  <num> <num>    <num>    <num>
. 1 tv_e0        -0.290 -0.267 0.971  0.955  -1.92   1.29   1.00   4097.    3040. 
. 2 tv_log_ec50   4.58   4.58  0.0464 0.0462  4.50   4.65   1.00   2542.    2637. 
. 3 emax         98.6   98.8   1.09   1.05   96.5   99.9    1.00   2557.    2029. 
. 4 gamma         0.984  0.982 0.0328 0.0322  0.934  1.04   1.00   3036.    2700. 
. 5 omega_sds[1]  0.212  0.210 0.0321 0.0317  0.162  0.267  1.00    898.    1538. 
. 6 omega_sds[2]  1.27   1.12  0.835  0.809   0.212  2.85   1.06     35.6     75.0
. 7 Omega_corr[…  0.103  0.126 0.405  0.441  -0.599  0.734  1.01    443.     650.

While the split R-hat values look good for most of the parameters, the R-hat values for omega_sds[2] (the interindividual standard deviation for E0) indicates that the chains are not mixing well for that parameter. Let’s look at the trace plots.

bayesplot::mcmc_trace(fit1$draws(variables=mod1_params))

bayesplot::mcmc_dens_overlay(fit1$draws(variables=mod1_params))

These tell us the same story as the R-hat values: the chains aren’t mixed well for the interindividual standard deviation on e0. Let’s add a note to this effect then modify the parameterization of the model.

mod1 <- mod1 %>% add_notes("Poor mixing for omega_sds[2], the between-subject standard deviation on e0.  Try reparameterizing the model.")

Revised model - fully non-centered parameterization

Let’s try using a fully non-centered parameterization to see if that helps with the convergence issues.

Use caution when using the .overwrite= TRUE argument in new_model and copy_model_from. It will overwrite existing files and output directory, if they exist.

mod2 <- copy_model_from(mod1, 
                        .new_model = 'mod2', 
                        .inherit_tags = TRUE,
                        .overwrite = TRUE)

We’ll revise the Stan model code to include the correlated random effects. We’ll open the Stan model file and edit it to have the code shown below.

open_stanmod_file(mod2)

We can compare two model files using the model_diff command. By default, the model is compared to the model on which it was based (in this case, mod1).

model_diff(mod2)
< mod2                                   > mod1                                 
@@ 6,11 @@                               @@ 6,6 @@                              
    vector[N] Conc;                          vector[N] Conc;                    
    array[N] int ID;                         array[N] int ID;                   
<   array[n_id] int<lower=1, upper=N> i  ~                                      
: d_start;                               ~                                      
<   array[n_id] int<lower=1, upper=N> i  ~                                      
: d_end;                                 ~                                      
<                                        ~                                      
<   int<lower=0,upper=1> prior_only;     ~                                      
  }                                        }                                    
                                                                                
<                                        ~                                      
  parameters {                             parameters {                         
    real<lower=0, upper=100> emax;           real<lower=0, upper=100> emax;     
@@ 19,9 @@                               @@ 14,9 @@                             
    real log_gamma;                          real log_gamma;                    
                                                                                
<   matrix[2,n_id] etas; // etas[i,1] =  >   array[n_id] vector[2] etas; // etas
:  log_ec50; etas[i,2] = e0;             : [i,1] = log_ec50; etas[i,2] = e0;    
                                                                                
    real<lower=0> sigma;                     real<lower=0> sigma;               
    vector<lower=0>[2] omega_sds; // SD      vector<lower=0>[2] omega_sds; // SD
  s for IIV                                s for IIV                            
<   cholesky_factor_corr[2] L_Omega_cor  >   corr_matrix[2] Omega_corr;         
: r;                                     ~                                      
  }                                        }                                    
                                                                                
@@ 31,27 @@                              @@ 26,16 @@                            
    vector[n_id] ec50;                       vector[n_id] ec50;                 
    vector[n_id] e0;                         vector[n_id] e0;                   
<   matrix[2,2] Omega_corr;              ~                                      
<   cov_matrix[2] Omega;                 ~                                      
    vector[N] mu;                            vector[N] mu;                      
<                                        ~                                      
                                                                                
<   {                                    ~                                      
<     vector[2] tmp_eta;                 ~                                      
                                                                                
      for (i in 1:n_id) {                    for (i in 1:n_id) {                
<       tmp_eta = diag_pre_multiply(ome  ~                                      
: ga_sds, L_Omega_corr)*etas[,i];        ~                                      
<       ec50[i] = exp(tv_log_ec50 + tmp  >     ec50[i] = exp(tv_log_ec50 + etas[
: _eta[1]);                              : i,1]);                               
<       e0[i] = tv_e0 + tmp_eta[2];      >     e0[i] = tv_e0 + etas[i,2];       
<     }                                  ~                                      
    }                                        }                                  
                                                                                
<   Omega_corr = L_Omega_corr * L_Omega  ~                                      
: _corr';                                ~                                      
<   Omega = quad_form_diag(Omega_corr,   ~                                      
: omega_sds);                            ~                                      
<                                        ~                                      
    for (i in 1:N) {                         for (i in 1:N) {                   
      mu[i] = e0[ID[i]] + (emax-e0[ID[i        mu[i] = e0[ID[i]] + (emax-e0[ID[i
  ]])*pow(Conc[i]/100,gamma) / (pow(ec5    ]])*pow(Conc[i]/100,gamma) / (pow(ec5
  0[ID[i]]/100,gamma) + pow(Conc[i]/100    0[ID[i]]/100,gamma) + pow(Conc[i]/100
  ,gamma));                                ,gamma));                            
    }                                        }                                  
  }                                        }                                    
<                                        ~                                      
                                                                                
  model {                                  model {                              
@@ 63,17 @@                              @@ 47,15 @@                            
    sigma ~ normal(0,10);                    sigma ~ normal(0,10);              
                                                                                
<   L_Omega_corr ~ lkj_corr_cholesky(2)  >   Omega_corr ~ lkj_corr(2);          
: ;                                      ~                                      
    omega_sds ~ cauchy(0,2.5);               omega_sds ~ cauchy(0,2.5);         
                                                                                
<   for (i in 1:n_id) {                  ~                                      
<     etas[,i] ~ multi_normal([0.0,0.0]  >   etas ~ multi_normal([0,0],quad_form
: ,diag_matrix([1.0,1.0]'));             : _diag(Omega_corr, omega_sds));       
<   }                                    ~                                      
                                                                                
  // Likelihhood                           // Likelihhood                       
                                                                                
<   if (prior_only==0) {                 ~                                      
      DV ~ normal(mu, sigma);                DV ~ normal(mu, sigma);            
<   }                                    >                                      
  }                                        }                                    
~                                        >                                      
                                                                                
  generated quantities {                   generated quantities {               

For this new model structure, we’ll need slightly different initial values:

open_staninit_file(mod2)

We can also use model_diff to compare initial value and other types of model files:

model_diff(mod2, .file='init')
< mod2-init                              > mod1-init                            
@@ 25,7 @@                               @@ 25,7 @@                             
             tv_log_ec50 = rnorm(1,log(               tv_log_ec50 = rnorm(1,log(
  100),1),                                 100),1),                             
             log_gamma = rnorm(1,0,.1),               log_gamma = rnorm(1,0,.1),
<            etas = matrix(rnorm(2*.dat  >            etas = matrix(rnorm(2*.dat
: a$n_id, 0, 1),nrow=2),                 : a$n_id, 0, 1), ncol=2),              
             omega_sds = abs(rnorm(2)),               omega_sds = abs(rnorm(2)),
<            L_Omega_corr = diag(2),     >            Omega_corr = diag(2),     
             sigma = abs(rnorm(1,10,2))               sigma = abs(rnorm(1,10,2))
  )                                        )                                    
      }                                        }                                

However, the data file doesn’t change, so we don’t need to make any modifications to the mod1-standata.R file.

Let’s add a short description, tags, and then submit the model.

mod2 <- mod2 %>% replace_description("Fully non-centered parameterization with correlated random effects.")

mod2 <- mod2 %>% add_tags(TAGS$NCP)
fit2 <- submit_model(mod2)
fit2$cmdstan_diagnose()
. Processing csv files: /data/expo2-stan/model/stan/expo/mod2/mod2-output/mod2-202310241811-1-6ed348.csv, /data/expo2-stan/model/stan/expo/mod2/mod2-output/mod2-202310241811-2-6ed348.csv, /data/expo2-stan/model/stan/expo/mod2/mod2-output/mod2-202310241811-3-6ed348.csv, /data/expo2-stan/model/stan/expo/mod2/mod2-output/mod2-202310241811-4-6ed348.csv
. 
. Checking sampler transitions treedepth.
. Treedepth satisfactory for all transitions.
. 
. Checking sampler transitions for divergences.
. No divergent transitions found.
. 
. Checking E-BFMI - sampler transitions HMC potential energy.
. E-BFMI satisfactory.
. 
. Effective sample size satisfactory.
. 
. Split R-hat values satisfactory all parameters.
. 
. Processing complete, no problems detected.

It looks like the re-parameterization has addressed all of the issues with convergence.
Let’s confirm by looking at the parameter table and diagnostic plots.

mod2_params <- c('tv_e0','tv_log_ec50','emax','gamma',
                 'omega_sds','Omega_corr[1,2]')
fit2$summary(variables=mod2_params)
. # A tibble: 7 × 10
.   variable      mean median     sd    mad      q5    q95  rhat ess_bulk ess_tail
.   <chr>        <num>  <num>  <num>  <num>   <num>  <num> <num>    <num>    <num>
. 1 tv_e0      -0.267  -0.271 0.944  0.951  -1.84    1.25   1.00    2971.    3077.
. 2 tv_log_ec…  4.58    4.58  0.0458 0.0461  4.50    4.65   1.00    2231.    2697.
. 3 emax       98.6    98.8   1.07   1.05   96.6    99.9    1.00    2704.    2013.
. 4 gamma       0.984   0.982 0.0323 0.0313  0.934   1.04   1.00    3279.    2475.
. 5 omega_sds…  0.212   0.211 0.0315 0.0316  0.162   0.266  1.00    1848.    2738.
. 6 omega_sds…  1.07    0.875 0.856  0.793   0.0902  2.79   1.00     886.    1166.
. 7 Omega_cor…  0.0914  0.125 0.425  0.471  -0.642   0.741  1.00    4211.    3075.

All of the the split R-hat values look good and the ESS bulk and tail values are reasonably large. Let’s look at the trace plots.

bayesplot::mcmc_trace(fit2$draws(variables=mod2_params))

bayesplot::mcmc_dens_overlay(fit2$draws(variables=mod2_params))

The trace and density plots look good, particularly for omega_sds[2]. We’ll add notes to this effect.

mod2 <- mod2 %>% add_notes("Reparameterization fixed the mixing problem for omega_sds[2].  All Rhat values look good.")

Finally, let’s see if this model has improved the predictive check for the correlation with the baseline value.

Model evaluation - Posterior Predictive Checks

In order to help generate the plots more quickly, we’ll use a subset of 500 posterior samples. In practice, we would use all of the posterior samples.

  • FXa inhibition vs Concentration (median, fifth, 95th percentiles)
ppc <- tidybayes::spread_draws(fit2, 
                               simdv_obs[num], 
                               simdv_new[num],
                               ndraws = 500,
                               seed = 9753)
ppc2 <- left_join(ppc, exdata3 %>% mutate(num=1:n()))
head(ppc)
. # A tibble: 6 × 6
. # Groups:   num [1]
.     num simdv_obs .chain .iteration .draw simdv_new
.   <int>     <dbl>  <int>      <int> <int>     <dbl>
. 1     1      6.76      4        878  3878    -5.39 
. 2     1      8.32      4        467  3467     0.441
. 3     1      3.71      2        363  1363    -6.96 
. 4     1     24.2       4        409  3409     5.07 
. 5     1    -15.7       4        802  3802    -9.80 
. 6     1     -5.51      4         48  3048    -4.29
vpc_conc <- observed(exdata3, x=cobs, y=fxa.inh) %>% 
  simulated(ppc2 %>% arrange(.draw,num), y=simdv_new) %>% 
  stratify(~dose) %>% 
  binning(bin='jenks', nbins=10) %>% 
  vpcstats()
plot(vpc_conc, legend.position = 'right')

  • FXa inhibition vs time by dose (median, fifth, 95th percentiles)

Because the samples were obtained at a fixed grid of times, we’ll use the nominal times as our binning variable.

vpc_time <- observed(exdata3, x=time, y=fxa.inh) %>% 
  simulated(ppc2 %>% arrange(.draw,num), y=simdv_new) %>% 
  stratify(~dose) %>% 
  binning(bin=time) %>% 
  vpcstats()
plot(vpc_time, legend.position = 'right')

  • Correlation between baseline and post-baseline FXa inhibition
sim_correlations <- ppc2 %>% 
  ungroup() %>% 
  filter(time > 0) %>% 
  left_join(ppc2 %>% 
              filter(time==0) %>% 
              ungroup() %>% 
              select(.draw,ID, bsl=simdv_new)) %>% 
  group_by(.draw,dose,time) %>% 
  summarise(cor=cor(bsl,simdv_new),
            cfb = mean(simdv_new -bsl)) %>% 
  group_by(dose,time) %>% 
  summarize(median=median(cor),
           qlo = quantile(cor, prob=0.05),
           qhi = quantile(cor, prob=0.95),
           median_cfb=median(cfb),
           qlo_cfb = quantile(cfb, prob=0.05),
           qhi_cfb = quantile(cfb, prob=0.95))

obs_correlations <- exdata3 %>% 
  ungroup() %>% 
  filter(time > 0) %>% 
  left_join(exdata3 %>% 
              filter(time==0) %>% 
              ungroup() %>% 
              select(ID, bsl=fxa.inh)) %>% 
  group_by(dose,time) %>% 
  summarise(cor=cor(bsl,fxa.inh), cfb=mean(fxa.inh-bsl))
sim_correlations %>% 
  ggplot(aes(x=factor(time), y=median)) +
  geom_pointrange(aes(ymin=qlo, ymax=qhi)) +
  geom_point(data=obs_correlations, aes(y=cor), col='red')

The problem with the correlation is still there. Nonetheless, we’ll move forward to add covariate effects into the model.

Revised model - add covariates

ind_params <- tidybayes::spread_draws(fit2, ec50[id], e0[id])
ind_params2 <- ind_params %>% 
  group_by(id) %>% 
  summarise(log_ec50 = median(log(ec50)), 
            e0 = median(e0))

ind_params3 <- left_join(ind_params2, ind_exdata3, by=c('id'='ID'))

ind_params4 <- ind_params3 %>% 
  pivot_longer(cols=c(log_ec50,e0))
ind_params4 %>% 
  ggplot(aes(x=factor(dose), y=value)) +
  geom_boxplot() +
  facet_wrap(~name, scales='free') +
  labs(x=labs$dose,
       y = 'Individual Parameter Value')

ind_params4 %>% 
  ggplot(aes(x=sex_f, y=value)) +
  geom_boxplot() +
  facet_wrap(~name, scales = 'free') +
  labs(x=labs$sex_f,
       y='Individual Parameter Value')

ind_params4 %>% 
  ggplot(aes(x=weight, y=value)) +
  geom_point() +
  geom_smooth( method='loess' ) +
  facet_wrap(~name, scales = 'free') +
  labs(x=labs$weight, y='Individual Parameter Value')

ind_params4 %>% 
  ggplot(aes(x=age, y=value)) +
  geom_point() +
  geom_smooth( method='loess' ) +
  facet_wrap(~name, scales = 'free') +
  labs(x=labs$age, y = 'Individual Parameter Value')

Let’s consider adding sex as a covariate effect on both the baseline FXa inhibition and the EC50.

Use caution when using the .overwrite= TRUE argument in new_model and copy_model_from. It will overwrite existing files and output directory, if they exist.

mod4 <- copy_model_from(mod2, 
                        .new_model = 'mod4', 
                        .inherit_tags = TRUE, 
                        .overwrite = TRUE)
open_stanmod_file(mod4)

data {
  int<lower=0> N ; // Number of observations
  int<lower=0> K;  // Number of covariates
  int<lower=0> n_id ; // Number of individuals
  vector[N] DV;
  vector[N] Conc;
  matrix[n_id, K] X_e0;  // Matrix of covariates (one row per subject) on E0
  matrix[n_id, K] X_ec50;  // Matrix of covariates (one row per subject) on EC50
  array[N] int ID;
  array[n_id] int<lower=1, upper=N> id_start;
  array[n_id] int<lower=1, upper=N> id_end;
  
  int<lower=0,upper=1> prior_only;
}


parameters {
  real<lower=0, upper=100> emax;
  real tv_log_ec50;
  real tv_e0;
  real log_gamma;
  vector[K] beta_e0;
  vector[K] beta_log_ec50;

  matrix[2,n_id] etas; // etas[i,1] = log_ec50; etas[i,2] = e0;

  real<lower=0> sigma;
  vector<lower=0>[2] omega_sds; // SDs for IIV
  cholesky_factor_corr[2] L_Omega_corr;
}

transformed parameters {
  real tv_ec50 = exp(tv_log_ec50);
  real gamma = exp(log_gamma);
  vector[n_id] ec50;
  vector[n_id] e0;
  matrix[2,2] Omega_corr;
  cov_matrix[2] Omega;
  vector[N] mu;
  

  {
    vector[2] tmp_eta;
    
    for (i in 1:n_id) {
      tmp_eta = diag_pre_multiply(omega_sds, L_Omega_corr)*etas[,i];
      ec50[i] = exp(tv_log_ec50 + X_ec50[i,] * beta_log_ec50 + tmp_eta[1]);
      e0[i] = tv_e0 + X_e0[i,] * beta_e0 + tmp_eta[2];
    }
  }
  
  Omega_corr = L_Omega_corr * L_Omega_corr';
  Omega = quad_form_diag(Omega_corr, omega_sds);
  
  for (i in 1:N) {
    mu[i] = e0[ID[i]] + (emax-e0[ID[i]])*pow(Conc[i]/100,gamma) / (pow(ec50[ID[i]]/100,gamma) + pow(Conc[i]/100,gamma));
  }
}


model {
  // Priors
  tv_e0 ~ normal(0, 10);
  tv_log_ec50 ~ normal(4,2);
  beta_e0 ~ normal(0,5);
  beta_log_ec50 ~ normal(0,2);
  emax ~ uniform(0,100);
  log_gamma ~ normal(0,1);
  sigma ~ normal(0,10);
  
  L_Omega_corr ~ lkj_corr_cholesky(2);
  omega_sds ~ cauchy(0,2.5);

  for (i in 1:n_id) {
    etas[,i] ~ multi_normal([0.0,0.0],diag_matrix([1.0,1.0]'));
  }

// Likelihhood

  if (prior_only==0) {
    DV ~ normal(mu, sigma);
  }
}

generated quantities {

  vector[N] simdv_obs;
  vector[N] simdv_new;

  // Simulated observations for observed subjects
  for (i in 1:N) {
    simdv_obs[i] = normal_rng(mu[i],sigma);
  }
  
  // Simulate observations for new subject
  {
    array[n_id] vector[2] etas_new; // etas[i,1] = log_ec50; etas[i,2] = e0;
    vector[n_id] ec50_new;
    vector[n_id] e0_new;
    vector[N] mu_new;
    
    for (i in 1:n_id) {
      etas_new[i,] = multi_normal_rng([0.0,0.0], quad_form_diag(Omega_corr, omega_sds));
        ec50_new[i] = exp(tv_log_ec50 + X_ec50[i,]*beta_log_ec50 + etas_new[i,1]);
        e0_new[i] = tv_e0 + X_e0[i,] * beta_e0 + etas_new[i,2];
    }
    
    for (i in 1:N) {
      mu_new[i] = e0_new[ID[i]] + (emax-e0_new[ID[i]])*pow(Conc[i]/100,gamma) / (pow(ec50_new[ID[i]]/100,gamma) + pow(Conc[i]/100,gamma));
      simdv_new[i] = normal_rng(mu_new[i] , sigma);
    }
  }
  
}

For this new model structure, we’ll need slightly different initial values:

open_staninit_file(mod4)
# Create Stan initial values
#
# This function must return something that can be passed to the `init` argument
#   of `cmdstanr::sample()`. There are several options; see `?cmdstanr::sample`
#   for details.
#
# `.data` represents the list returned from `make_standata()` for this model.
#   This is provided in case any of your initial values are dependent on some
#   aspect of the data (e.g. the number of rows).
#
# `.args` represents the list of attached arguments that will be passed through to
#   cmdstanr::sample(). This is provided in case any of your initial values are
#   dependent on any of these arguments (e.g. the number of chains).
#
# Note: you _don't_ need to pass anything to either of these arguments, you only
#   use it within the function. `bbr` will pass in the correct objects when it calls
#   `make_init()` under the hood.
#
make_init <- function(.data, .args) {
  # returning NULL causes cmdstanr::sample() to use the default initial values
  return(
    function() {
      list(tv_e0 = rnorm(1),
           emax = runif(1,90,100),
           beta_e0 = as.array(rnorm(.data$K,0,1)),
           beta_log_ec50 = as.array(rnorm(.data$K,0,1)),
           tv_log_ec50 = rnorm(1,log(100),1),
           log_gamma = rnorm(1,0,.1),
           etas = matrix(rnorm(2*.data$n_id, 0, 1),nrow=2),
           omega_sds = abs(rnorm(2)),
           L_Omega_corr = diag(2),
           sigma = abs(rnorm(1,10,2)))
    }
  )
}

And we need to update the data to pass in the covariate matrices and number of covariates.

open_standata_file(mod4)
# Create Stan data
#
# This function must return the list that will be passed to `data` argument
#   of `cmdstanr::sample()`
#
# The `.dir` argument represents the absolute path to the directory containing
#   this file. This is useful for building file paths to the input files you will
#   load. Note: you _don't_ need to pass anything to this argument, you only use
#   it within the function. `bbr` will pass in the correct path when it calls
#   `make_standata()` under the hood.
make_standata <- function(.dir) {
  # read in any input data
  in_data <- readr::read_csv(
    file.path(.dir, "..", "..", "..", "..", "data/derived/exdata3.csv"),
    show_col_types = FALSE
  )
  
  in_data <- in_data %>% mutate(row=1:n())
  
  list(
    N = nrow(in_data),
    n_id = length(unique(in_data$ID)),
    K = 1, 
    DV = in_data$fxa.inh,
    X_e0 = in_data %>% distinct(ID,sex) %>% mutate(sex=sex-1) %>% pull(sex) %>% as.matrix(),
    X_ec50 = in_data %>% distinct(ID,sex) %>% mutate(sex=sex-1) %>% pull(sex) %>% as.matrix(),
    Conc = in_data$cobs,
    ID = in_data$ID,
    id_start = in_data %>% group_by(ID) %>% summarise(min=min(row)) %>% pull(min),
    id_end = in_data %>% group_by(ID) %>% summarise(max=min(row)) %>% pull(max),
    prior_only=0
  )
}

Before submitting the model, we’ll add the model description.

mod4 <- mod4 %>% replace_description("Add effect of sex on E0 and EC50.")
fit4 <- submit_model(mod4)
fit4 <- read_fit_model(mod4)
fit4$cmdstan_diagnose()
. Processing csv files: /data/expo2-stan/model/stan/expo/mod4/mod4-output/mod4-202310251154-1-6d0058.csv, /data/expo2-stan/model/stan/expo/mod4/mod4-output/mod4-202310251154-2-6d0058.csv, /data/expo2-stan/model/stan/expo/mod4/mod4-output/mod4-202310251154-3-6d0058.csv, /data/expo2-stan/model/stan/expo/mod4/mod4-output/mod4-202310251154-4-6d0058.csv
. 
. Checking sampler transitions treedepth.
. Treedepth satisfactory for all transitions.
. 
. Checking sampler transitions for divergences.
. No divergent transitions found.
. 
. Checking E-BFMI - sampler transitions HMC potential energy.
. E-BFMI satisfactory.
. 
. Effective sample size satisfactory.
. 
. Split R-hat values satisfactory all parameters.
. 
. Processing complete, no problems detected.

It looks like the sampler for the covariate model seems to have converged. Let’s confirm by looking at the parameter table and diagnostic plots.

mod4_params <- c('tv_e0','tv_log_ec50','emax','gamma', 
                 'beta_e0','beta_log_ec50', 'omega_sds',
                 'Omega_corr[1,2]')
fit4$summary(variables=mod4_params) %>% 
  mutate(across(-variable, pmtables::sig))
. # A tibble: 9 × 10
.   variable         mean   median sd    mad   q5    q95   rhat  ess_bulk ess_tail
.   <chr>            <chr>  <chr>  <chr> <chr> <chr> <chr> <chr> <chr>    <chr>   
. 1 tv_e0            0.0214 0.0328 1.11  1.08  -1.80 1.85  1.00  2.44e+03 3.11e+03
. 2 tv_log_ec50      4.58   4.58   0.06… 0.06… 4.48  4.68  1.00  1.34e+03 1.79e+03
. 3 emax             98.6   98.8   1.08  1.09  96.5  99.9  1.00  2.11e+03 1.85e+03
. 4 gamma            0.983  0.981  0.03… 0.03… 0.932 1.04  1.00  2.67e+03 2.54e+03
. 5 beta_e0[1]       -0.791 -0.793 1.48  1.53  -3.17 1.64  1.00  2.70e+03 3.09e+03
. 6 beta_log_ec50[1] -0.01… -0.01… 0.07… 0.07… -0.1… 0.112 1.00  1.28e+03 1.94e+03
. 7 omega_sds[1]     0.215  0.214  0.03… 0.03… 0.165 0.272 1.00  1.29e+03 2.26e+03
. 8 omega_sds[2]     1.13   0.912  0.910 0.832 0.08… 2.95  1.00  825      1.15e+03
. 9 Omega_corr[1,2]  0.118  0.157  0.416 0.462 -0.6… 0.734 1.00  3.46e+03 2.96e+03

All of the the split R-hat values look good and the ESS bulk and tail values are reasonably large. Let’s look at the trace plots.

bayesplot::mcmc_trace(fit4$draws(variables=mod4_params))

bayesplot::mcmc_dens_overlay(fit4$draws(variables=mod4_params))

Finally, let’s see if this model has improved the predictive check for the correlation with the baseline value.

Model evaluation - Posterior Predictive Checks

ppc <- tidybayes::spread_draws(fit4, 
                               simdv_obs[num], 
                               simdv_new[num],
                               ndraws = 500,
                               seed = 9753)
ppc2 <- left_join(ppc, exdata3 %>% mutate(num=1:n()))
head(ppc)
. # A tibble: 6 × 6
. # Groups:   num [1]
.     num simdv_obs .chain .iteration .draw simdv_new
.   <int>     <dbl>  <int>      <int> <int>     <dbl>
. 1     1     5.74       4        878  3878    -0.119
. 2     1     2.01       4        467  3467     7.97 
. 3     1   -13.7        2        363  1363    -2.89 
. 4     1    -0.102      4        409  3409    -8.48 
. 5     1   -10.0        4        802  3802    12.4  
. 6     1     2.66       4         48  3048    -1.30
  • Association between FXa inhibition and concentration, stratified by sex

Based on the posterior predictive check, it looks like the model adequately describes the effect of sex.

vpc_conc <- observed(exdata3, x=cobs, y=fxa.inh) %>% 
  simulated(ppc2 %>% arrange(.draw,num), y=simdv_new) %>% 
  stratify(~sex_f) %>% 
  binning(bin='jenks', nbins=10) %>% 
  vpcstats()
plot(vpc_conc, legend.position = 'right')

  • Correlation between baseline and post-baseline FXa inhibition

Based on the posterior predictive check below, the model appears to adequately capture the association between baseline and post-baseline FXa values.

sim_correlations <- ppc2 %>% 
  ungroup() %>% 
  filter(time > 0) %>% 
  left_join(ppc2 %>% 
              filter(time==0) %>% 
              ungroup() %>% 
              select(.draw,ID, bsl=simdv_new)) %>% 
  group_by(.draw,dose,time) %>% 
  summarise(cor=cor(bsl,simdv_new),
            cfb = mean(simdv_new -bsl)) %>% 
  group_by(dose,time) %>% 
  summarize(median=median(cor),
           qlo = quantile(cor, prob=0.05),
           qhi = quantile(cor, prob=0.95),
           median_cfb=median(cfb),
           qlo_cfb = quantile(cfb, prob=0.05),
           qhi_cfb = quantile(cfb, prob=0.95))

obs_correlations <- exdata3 %>% 
  ungroup() %>% 
  filter(time > 0) %>% 
  left_join(exdata3 %>% 
              filter(time==0) %>% 
              ungroup() %>% 
              select(ID, bsl=fxa.inh)) %>% 
  group_by(dose,time) %>% 
  summarise(cor=cor(bsl,fxa.inh), cfb=mean(fxa.inh-bsl))
sim_correlations %>% 
  ggplot(aes(x=factor(time), y=median)) +
  geom_pointrange(aes(ymin=qlo, ymax=qhi)) +
  geom_point(data=obs_correlations, aes(y=cor), col='red') +
  facet_wrap(~dose)

Other resources


The following script from the GitHub repository is discussed on this page. If you’re interested running this code, visit the About the GitHub Repo page first.

Update Traceable Model script: revised-models.Rmd