Example trials

Published

April 16, 2024

Modified

April 5, 2024

Example trials are provided to give insight into typical cell sample sizes as well as the level of uncertainty associated with the parameter estimation process. Examples are from trials at their maximum sample size with all follow up completed. Sequential variants with adaptations will be added later.

Null scenario

Table 1 shows a summary of the treatment sucesses based on the \(n\) patients associated with each combination of design variables when no treatment effects (non-membership effects still retained) in the simulated data of 2500 patients. Given that this is a summary of a single data set, some variation from the underlying simulation parameters is to be expected.

Code
set.seed(11)

sim_spec <- get_sim_spec()

sim_spec$b['r1'] <- 0
sim_spec$b['r2'] <- 0
sim_spec$b['r1d'] <- 0
sim_spec$b['r2d'] <- 0
sim_spec$b['f'] <- 0

ll <- get_trial_data(N = 2500, sim_spec = sim_spec)

gt_tbl <- tbl_ex_trial(ll$d)
gt_tbl
Surgical Da Duration Db Type Dc Response
reveal assigned received reveal assigned reveal assigned y n MLE (py) TRUE (py)1
early
0 0 0 0 0 0 0 188 270 0.70 0.68
0 0 0 0 0 1 0 122 195 0.63 0.63
0 0 0 0 0 1 1 149 228 0.65 0.63
0 0 1 1 0 0 0 9 13 0.69 0.67
0 0 1 1 0 1 0 8 13 0.62 0.62
0 0 1 1 0 1 1 6 10 0.60 0.62
0 0 1 1 1 0 0 12 18 0.67 0.67
0 0 1 1 1 1 0 5 8 0.62 0.62
0 0 1 1 1 1 1 6 10 0.60 0.62
subtotal 505 765 0.66
late
0 0 0 0 0 0 0 1 1 1.00 0.61
0 0 0 0 0 1 0 2 5 0.40 0.55
0 0 0 0 0 1 1 1 1 1.00 0.55
0 0 1 1 0 1 1 1 3 0.33 0.53
0 0 1 1 1 0 0 3 4 0.75 0.59
0 0 1 1 1 1 0 0 1 0.00 0.53
0 0 1 1 1 1 1 0 2 0.00 0.53
0 0 2 1 0 0 0 3 3 1.00 0.62
0 0 2 1 0 1 0 2 2 1.00 0.56
0 0 2 1 0 1 1 0 3 0.00 0.56
0 0 2 1 1 0 0 1 3 0.33 0.62
0 0 2 1 1 1 0 3 4 0.75 0.56
0 0 2 1 1 1 1 1 2 0.50 0.56
1 0 0 0 0 0 0 146 233 0.63 0.63
1 0 0 0 0 1 0 108 192 0.56 0.57
1 0 0 0 0 1 1 99 172 0.58 0.57
1 1 1 1 0 0 0 21 41 0.51 0.63
1 1 1 1 0 1 0 15 28 0.54 0.57
1 1 1 1 0 1 1 26 45 0.58 0.57
1 1 1 1 1 0 0 26 44 0.59 0.63
1 1 1 1 1 1 0 19 30 0.63 0.57
1 1 1 1 1 1 1 15 25 0.60 0.57
1 1 2 1 0 0 0 48 79 0.61 0.63
1 1 2 1 0 1 0 34 62 0.55 0.57
1 1 2 1 0 1 1 33 56 0.59 0.57
1 1 2 1 1 0 0 54 78 0.69 0.63
1 1 2 1 1 1 0 35 61 0.57 0.57
1 1 2 1 1 1 1 28 54 0.52 0.57
subtotal 725 1234 0.59
chronic
0 0 0 0 0 0 0 23 38 0.61 0.66
0 0 0 0 0 1 0 23 30 0.77 0.61
0 0 0 0 0 1 1 21 33 0.64 0.61
0 0 1 1 0 0 0 10 17 0.59 0.65
0 0 1 1 0 1 0 12 19 0.63 0.59
0 0 1 1 0 1 1 12 17 0.71 0.59
0 0 1 1 1 0 0 12 20 0.60 0.65
0 0 1 1 1 1 0 8 17 0.47 0.59
0 0 1 1 1 1 1 5 12 0.42 0.59
0 0 2 1 0 0 0 46 60 0.77 0.68
0 0 2 1 0 1 0 27 47 0.57 0.62
0 0 2 1 0 1 1 23 33 0.70 0.62
0 0 2 1 1 0 0 41 56 0.73 0.68
0 0 2 1 1 1 0 31 44 0.70 0.62
0 0 2 1 1 1 1 30 58 0.52 0.62
subtotal 324 501 0.65
total 1554 2500 0.62
1 Transformed from the log-odds of response as used in the linear predictor to simulate data.
Table 1: Summary of simulated trial data when no treatment effects present

Model the simulated data first using standard normal priors on the domain level treatment effects, then increasing the prior standard deviation to ten in order to see if there is any movement in the posterior summary.

Code
lsd <- get_stan_data(ll$d)
ld <- lsd$ld
d_s <- copy(lsd$d_s)

m2 <- cmdstanr::cmdstan_model("stan/model-sim-04.stan")

f_null_1 <- m2$sample(
  ld, iter_warmup = 1000, iter_sampling = 10000,
  parallel_chains = 2, chains = 2, refresh = 0, show_exceptions = F, 
  max_treedepth = 13)
Running MCMC with 2 parallel chains...

Chain 1 finished in 4.5 seconds.
Chain 2 finished in 5.0 seconds.

Both chains finished successfully.
Mean chain execution time: 4.8 seconds.
Total execution time: 5.1 seconds.
Code
post_1 <- data.table(f_null_1$draws(variables = c(c("a0", "m", "b")), format = "matrix"))
post_1 <- melt(post_1, measure.vars = names(post_1))
d_tbl_1 <- post_1[, .(
  prior = "normal(0, 1)",
  mu = mean(value),
  q_025 = quantile(value, prob = 0.025), 
  q_975 = quantile(value, prob = 0.975)
), keyby = variable]
d_tbl_1[, name_tru := names(unlist(sim_spec))]
d_tbl_1[, tru := unlist(sim_spec)]

# compare when prior sd is set to 10 for trt effects

ld$pri_m_sd <- rep(10, length(ld$pri_m_sd))
ld$pri_b_sd <- rep(10, length(ld$pri_b_sd))

f_null_2 <- m2$sample(
  ld, iter_warmup = 1000, iter_sampling = 10000,
  parallel_chains = 2, chains = 2, refresh = 0, show_exceptions = F, 
  max_treedepth = 13)
Running MCMC with 2 parallel chains...

Chain 2 finished in 5.0 seconds.
Chain 1 finished in 5.5 seconds.

Both chains finished successfully.
Mean chain execution time: 5.3 seconds.
Total execution time: 5.6 seconds.
Code
post_2 <- data.table(f_null_2$draws(variables = c(c("a0", "m", "b")), format = "matrix"))
post_2 <- melt(post_2, measure.vars = names(post_2))
d_tbl_2 <- post_2[, .(
  prior = "normal(0, 10)",
  mu = mean(value), 
  q_025 = quantile(value, prob = 0.025), 
  q_975 = quantile(value, prob = 0.975)
), keyby = variable]
d_tbl_2[, name_tru := names(unlist(sim_spec))]
d_tbl_2[, tru := unlist(sim_spec)]
Code
d_fig <- rbind(
  d_tbl_1, d_tbl_2
)
d_fig$name_tru <- factor(d_fig$name_tru, levels = unique(d_fig$name_tru))

# https://www.andrewheiss.com/blog/2022/12/08/log10-natural-log-scales-ggplot/

ggplot(d_fig, aes(x = name_tru, y = mu, col = prior)) +
  scale_x_discrete("") +
  scale_y_continuous("log-OR") + 
  scale_color_discrete("Prior sd on log-OR") +
  geom_point(data = d_fig, aes(x = name_tru, y = mu), position = position_dodge(width = 0.4)) +
  geom_linerange(aes(ymin = q_025, ymax = q_975), position = position_dodge2(width = 0.4)) +
  geom_point(data = d_fig, aes(x = name_tru, y = tru), col = 1, pch = 2) 
Figure 1: Posterior median and 95% CI for baseline log-odds of treatment success domain A (independent estimates for late and chronic silo).

All domains effective scenario

shows a summary of the treatment sucesses based on the \(n\) patients associated with each combination of design variables when all treatment effects set to log(2) (with non-membership effects retained as before) in the simulated data of 2500 patients.

Code
set.seed(2)
sim_spec$b['r1'] <- log(2)
sim_spec$b['r2'] <- log(2)
sim_spec$b['r1d'] <- log(2)
sim_spec$b['r2d'] <- log(2)
sim_spec$b['f'] <- log(2)

ll <- get_trial_data(N = 2500, sim_spec = sim_spec)

gt_tbl <- tbl_ex_trial(ll$d)
gt_tbl
Surgical Da Duration Db Type Dc Response
reveal assigned received reveal assigned reveal assigned y n MLE (py) TRUE (py)1
early
0 0 0 0 0 0 0 166 245 0.68 0.68
0 0 0 0 0 1 0 112 172 0.65 0.63
0 0 0 0 0 1 1 156 207 0.75 0.77
0 0 1 1 0 0 0 17 23 0.74 0.67
0 0 1 1 0 1 0 6 7 0.86 0.62
0 0 1 1 0 1 1 10 12 0.83 0.76
0 0 1 1 1 0 0 9 13 0.69 0.80
0 0 1 1 1 1 0 6 6 1.00 0.76
0 0 1 1 1 1 1 12 14 0.86 0.86
subtotal 494 699 0.71
late
0 0 0 0 0 0 0 1 1 1.00 0.61
0 0 0 0 0 1 0 0 1 0.00 0.55
0 0 0 0 0 1 1 3 4 0.75 0.71
0 0 1 1 0 1 1 1 2 0.50 0.70
0 0 1 1 1 0 0 2 2 1.00 0.75
0 0 1 1 1 1 0 1 1 1.00 0.70
0 0 1 1 1 1 1 3 3 1.00 0.82
0 0 2 1 0 0 0 5 7 0.71 0.62
0 0 2 1 0 1 0 0 2 0.00 0.56
0 0 2 1 0 1 1 3 3 1.00 0.72
0 0 2 1 1 0 0 2 2 1.00 0.76
0 0 2 1 1 1 0 4 5 0.80 0.72
0 0 2 1 1 1 1 1 1 1.00 0.83
1 0 0 0 0 0 0 156 245 0.64 0.63
1 0 0 0 0 1 0 111 192 0.58 0.57
1 0 0 0 0 1 1 133 181 0.73 0.73
1 1 1 1 0 0 0 25 30 0.83 0.77
1 1 1 1 0 1 0 18 31 0.58 0.73
1 1 1 1 0 1 1 30 35 0.86 0.84
1 1 1 1 1 0 0 26 30 0.87 0.87
1 1 1 1 1 1 0 28 32 0.88 0.84
1 1 1 1 1 1 1 23 30 0.77 0.91
1 1 2 1 0 0 0 53 68 0.78 0.77
1 1 2 1 0 1 0 43 61 0.70 0.73
1 1 2 1 0 1 1 50 58 0.86 0.84
1 1 2 1 1 0 0 81 92 0.88 0.87
1 1 2 1 1 1 0 58 67 0.87 0.84
1 1 2 1 1 1 1 60 67 0.90 0.91
subtotal 921 1253 0.74
chronic
0 0 0 0 0 0 0 25 37 0.68 0.66
0 0 0 0 0 1 0 20 34 0.59 0.61
0 0 0 0 0 1 1 29 43 0.67 0.75
0 0 1 1 0 0 0 11 22 0.50 0.65
0 0 1 1 0 1 0 9 12 0.75 0.59
0 0 1 1 0 1 1 13 18 0.72 0.75
0 0 1 1 1 0 0 18 21 0.86 0.79
0 0 1 1 1 1 0 23 29 0.79 0.75
0 0 1 1 1 1 1 17 20 0.85 0.85
0 0 2 1 0 0 0 51 69 0.74 0.68
0 0 2 1 0 1 0 30 45 0.67 0.62
0 0 2 1 0 1 1 35 55 0.64 0.76
0 0 2 1 1 0 0 43 57 0.75 0.81
0 0 2 1 1 1 0 24 36 0.67 0.76
0 0 2 1 1 1 1 41 50 0.82 0.87
subtotal 389 548 0.71
total 1804 2500 0.72
1 Transformed from the log-odds of response as used in the linear predictor to simulate data.
Table 2: Summary of simulated trial data when all domains associated with positive effects
Code
lsd <- get_stan_data(ll$d)
ld <- lsd$ld
d_s <- copy(lsd$d_s)

m2 <- cmdstanr::cmdstan_model("stan/model-sim-04.stan")

f_alleff_1 <- m2$sample(
  ld, iter_warmup = 1000, iter_sampling = 10000,
  parallel_chains = 2, chains = 2, refresh = 0, show_exceptions = F, 
  max_treedepth = 13)
Running MCMC with 2 parallel chains...

Chain 1 finished in 4.4 seconds.
Chain 2 finished in 4.5 seconds.

Both chains finished successfully.
Mean chain execution time: 4.5 seconds.
Total execution time: 4.6 seconds.
Code
post_1 <- data.table(f_alleff_1$draws(variables = c(c("a0", "m", "b")), format = "matrix"))
post_1 <- melt(post_1, measure.vars = names(post_1))
d_tbl_1 <- post_1[, .(
  prior = "normal(0, 1)",
  mu = mean(value),
  q_025 = quantile(value, prob = 0.025), 
  q_975 = quantile(value, prob = 0.975)
), keyby = variable]
d_tbl_1[, name_tru := names(unlist(sim_spec))]
d_tbl_1[, tru := unlist(sim_spec)]

# compare when prior sd is set to 10 for trt effects

ld$pri_m_sd <- rep(10, length(ld$pri_m_sd))
ld$pri_b_sd <- rep(10, length(ld$pri_b_sd))

f_alleff_2 <- m2$sample(
  ld, iter_warmup = 1000, iter_sampling = 10000,
  parallel_chains = 2, chains = 2, refresh = 0, show_exceptions = F, 
  max_treedepth = 13)
Running MCMC with 2 parallel chains...

Chain 1 finished in 5.4 seconds.
Chain 2 finished in 5.9 seconds.

Both chains finished successfully.
Mean chain execution time: 5.6 seconds.
Total execution time: 5.9 seconds.
Code
post_2 <- data.table(f_alleff_2$draws(variables = c(c("a0", "m", "b")), format = "matrix"))
post_2 <- melt(post_2, measure.vars = names(post_2))
d_tbl_2 <- post_2[, .(
  prior = "normal(0, 10)",
  mu = mean(value), 
  q_025 = quantile(value, prob = 0.025), 
  q_975 = quantile(value, prob = 0.975)
), keyby = variable]
d_tbl_2[, name_tru := names(unlist(sim_spec))]
d_tbl_2[, tru := unlist(sim_spec)]
Code
d_fig <- rbind(
  d_tbl_1, d_tbl_2
)
d_fig$name_tru <- factor(d_fig$name_tru, levels = unique(d_fig$name_tru))

# https://www.andrewheiss.com/blog/2022/12/08/log10-natural-log-scales-ggplot/

ggplot(d_fig, aes(x = name_tru, y = mu, col = prior)) +
  scale_x_discrete("") +
  scale_y_continuous("log-OR") + 
  scale_color_discrete("Prior sd on log-OR") +
  geom_point(data = d_fig, aes(x = name_tru, y = mu), position = position_dodge(width = 0.4)) +
  geom_linerange(aes(ymin = q_025, ymax = q_975), position = position_dodge2(width = 0.4)) +
  geom_point(data = d_fig, aes(x = name_tru, y = tru), col = 1, pch = 2) 
Figure 2: Posterior median and 95% CI for baseline log-odds of treatment success (triangles show true values).