Example trials are provided to give insight into typical cell sample sizes as well as the level of uncertainty associated with the parameter estimation process. Examples are from trials at their maximum sample size with all follow up completed. Sequential variants with adaptations will be added later.
Null scenario
Table 1 shows a summary of the treatment sucesses based on the \(n\) patients associated with each combination of design variables when no treatment effects (non-membership effects still retained) in the simulated data of 2500 patients. Given that this is a summary of a single data set, some variation from the underlying simulation parameters is to be expected.
Code
set.seed (11 )
sim_spec <- get_sim_spec ()
sim_spec$ b['r1' ] <- 0
sim_spec$ b['r2' ] <- 0
sim_spec$ b['r1d' ] <- 0
sim_spec$ b['r2d' ] <- 0
sim_spec$ b['f' ] <- 0
ll <- get_trial_data (N = 2500 , sim_spec = sim_spec)
gt_tbl <- tbl_ex_trial (ll$ d)
gt_tbl
reveal
assigned
received
reveal
assigned
reveal
assigned
y
n
MLE (py )
TRUE (py )
early
0
0
0
0
0
0
0
188
270
0.70
0.68
0
0
0
0
0
1
0
122
195
0.63
0.63
0
0
0
0
0
1
1
149
228
0.65
0.63
0
0
1
1
0
0
0
9
13
0.69
0.67
0
0
1
1
0
1
0
8
13
0.62
0.62
0
0
1
1
0
1
1
6
10
0.60
0.62
0
0
1
1
1
0
0
12
18
0.67
0.67
0
0
1
1
1
1
0
5
8
0.62
0.62
0
0
1
1
1
1
1
6
10
0.60
0.62
subtotal
—
—
—
—
—
—
—
505
765
0.66
—
late
0
0
0
0
0
0
0
1
1
1.00
0.61
0
0
0
0
0
1
0
2
5
0.40
0.55
0
0
0
0
0
1
1
1
1
1.00
0.55
0
0
1
1
0
1
1
1
3
0.33
0.53
0
0
1
1
1
0
0
3
4
0.75
0.59
0
0
1
1
1
1
0
0
1
0.00
0.53
0
0
1
1
1
1
1
0
2
0.00
0.53
0
0
2
1
0
0
0
3
3
1.00
0.62
0
0
2
1
0
1
0
2
2
1.00
0.56
0
0
2
1
0
1
1
0
3
0.00
0.56
0
0
2
1
1
0
0
1
3
0.33
0.62
0
0
2
1
1
1
0
3
4
0.75
0.56
0
0
2
1
1
1
1
1
2
0.50
0.56
1
0
0
0
0
0
0
146
233
0.63
0.63
1
0
0
0
0
1
0
108
192
0.56
0.57
1
0
0
0
0
1
1
99
172
0.58
0.57
1
1
1
1
0
0
0
21
41
0.51
0.63
1
1
1
1
0
1
0
15
28
0.54
0.57
1
1
1
1
0
1
1
26
45
0.58
0.57
1
1
1
1
1
0
0
26
44
0.59
0.63
1
1
1
1
1
1
0
19
30
0.63
0.57
1
1
1
1
1
1
1
15
25
0.60
0.57
1
1
2
1
0
0
0
48
79
0.61
0.63
1
1
2
1
0
1
0
34
62
0.55
0.57
1
1
2
1
0
1
1
33
56
0.59
0.57
1
1
2
1
1
0
0
54
78
0.69
0.63
1
1
2
1
1
1
0
35
61
0.57
0.57
1
1
2
1
1
1
1
28
54
0.52
0.57
subtotal
—
—
—
—
—
—
—
725
1234
0.59
—
chronic
0
0
0
0
0
0
0
23
38
0.61
0.66
0
0
0
0
0
1
0
23
30
0.77
0.61
0
0
0
0
0
1
1
21
33
0.64
0.61
0
0
1
1
0
0
0
10
17
0.59
0.65
0
0
1
1
0
1
0
12
19
0.63
0.59
0
0
1
1
0
1
1
12
17
0.71
0.59
0
0
1
1
1
0
0
12
20
0.60
0.65
0
0
1
1
1
1
0
8
17
0.47
0.59
0
0
1
1
1
1
1
5
12
0.42
0.59
0
0
2
1
0
0
0
46
60
0.77
0.68
0
0
2
1
0
1
0
27
47
0.57
0.62
0
0
2
1
0
1
1
23
33
0.70
0.62
0
0
2
1
1
0
0
41
56
0.73
0.68
0
0
2
1
1
1
0
31
44
0.70
0.62
0
0
2
1
1
1
1
30
58
0.52
0.62
subtotal
—
—
—
—
—
—
—
324
501
0.65
—
total
—
—
—
—
—
—
—
1554
2500
0.62
—
Table 1: Summary of simulated trial data when no treatment effects present
Model the simulated data first using standard normal priors on the domain level treatment effects, then increasing the prior standard deviation to ten in order to see if there is any movement in the posterior summary.
Code
lsd <- get_stan_data (ll$ d)
ld <- lsd$ ld
d_s <- copy (lsd$ d_s)
m2 <- cmdstanr:: cmdstan_model ("stan/model-sim-04.stan" )
f_null_1 <- m2$ sample (
ld, iter_warmup = 1000 , iter_sampling = 10000 ,
parallel_chains = 2 , chains = 2 , refresh = 0 , show_exceptions = F,
max_treedepth = 13 )
Running MCMC with 2 parallel chains...
Chain 1 finished in 4.5 seconds.
Chain 2 finished in 5.1 seconds.
Both chains finished successfully.
Mean chain execution time: 4.8 seconds.
Total execution time: 5.2 seconds.
Code
post_1 <- data.table (f_null_1$ draws (variables = c (c ("a0" , "m" , "b" )), format = "matrix" ))
post_1 <- melt (post_1, measure.vars = names (post_1))
d_tbl_1 <- post_1[, .(
prior = "normal(0, 1)" ,
mu = mean (value),
q_025 = quantile (value, prob = 0.025 ),
q_975 = quantile (value, prob = 0.975 )
), keyby = variable]
d_tbl_1[, name_tru : = names (unlist (sim_spec))]
d_tbl_1[, tru : = unlist (sim_spec)]
# compare when prior sd is set to 10 for trt effects
ld$ pri_m_sd <- rep (10 , length (ld$ pri_m_sd))
ld$ pri_b_sd <- rep (10 , length (ld$ pri_b_sd))
f_null_2 <- m2$ sample (
ld, iter_warmup = 1000 , iter_sampling = 10000 ,
parallel_chains = 2 , chains = 2 , refresh = 0 , show_exceptions = F,
max_treedepth = 13 )
Running MCMC with 2 parallel chains...
Chain 2 finished in 5.0 seconds.
Chain 1 finished in 5.6 seconds.
Both chains finished successfully.
Mean chain execution time: 5.3 seconds.
Total execution time: 5.7 seconds.
Code
post_2 <- data.table (f_null_2$ draws (variables = c (c ("a0" , "m" , "b" )), format = "matrix" ))
post_2 <- melt (post_2, measure.vars = names (post_2))
d_tbl_2 <- post_2[, .(
prior = "normal(0, 10)" ,
mu = mean (value),
q_025 = quantile (value, prob = 0.025 ),
q_975 = quantile (value, prob = 0.975 )
), keyby = variable]
d_tbl_2[, name_tru : = names (unlist (sim_spec))]
d_tbl_2[, tru : = unlist (sim_spec)]
Code
d_fig <- rbind (
d_tbl_1, d_tbl_2
)
d_fig$ name_tru <- factor (d_fig$ name_tru, levels = unique (d_fig$ name_tru))
# https://www.andrewheiss.com/blog/2022/12/08/log10-natural-log-scales-ggplot/
ggplot (d_fig, aes (x = name_tru, y = mu, col = prior)) +
scale_x_discrete ("" ) +
scale_y_continuous ("log-OR" ) +
scale_color_discrete ("Prior sd on log-OR" ) +
geom_point (data = d_fig, aes (x = name_tru, y = mu), position = position_dodge (width = 0.4 )) +
geom_linerange (aes (ymin = q_025, ymax = q_975), position = position_dodge2 (width = 0.4 )) +
geom_point (data = d_fig, aes (x = name_tru, y = tru), col = 1 , pch = 2 )
Figure 1: Posterior median and 95% CI for baseline log-odds of treatment success domain A (independent estimates for late and chronic silo).
All domains effective scenario
shows a summary of the treatment sucesses based on the \(n\) patients associated with each combination of design variables when all treatment effects set to log(2)
(with non-membership effects retained as before) in the simulated data of 2500 patients.
Code
set.seed (2 )
sim_spec$ b['r1' ] <- log (2 )
sim_spec$ b['r2' ] <- log (2 )
sim_spec$ b['r1d' ] <- log (2 )
sim_spec$ b['r2d' ] <- log (2 )
sim_spec$ b['f' ] <- log (2 )
ll <- get_trial_data (N = 2500 , sim_spec = sim_spec)
gt_tbl <- tbl_ex_trial (ll$ d)
gt_tbl
reveal
assigned
received
reveal
assigned
reveal
assigned
y
n
MLE (py )
TRUE (py )
early
0
0
0
0
0
0
0
166
245
0.68
0.68
0
0
0
0
0
1
0
112
172
0.65
0.63
0
0
0
0
0
1
1
156
207
0.75
0.77
0
0
1
1
0
0
0
17
23
0.74
0.67
0
0
1
1
0
1
0
6
7
0.86
0.62
0
0
1
1
0
1
1
10
12
0.83
0.76
0
0
1
1
1
0
0
9
13
0.69
0.80
0
0
1
1
1
1
0
6
6
1.00
0.76
0
0
1
1
1
1
1
12
14
0.86
0.86
subtotal
—
—
—
—
—
—
—
494
699
0.71
—
late
0
0
0
0
0
0
0
1
1
1.00
0.61
0
0
0
0
0
1
0
0
1
0.00
0.55
0
0
0
0
0
1
1
3
4
0.75
0.71
0
0
1
1
0
1
1
1
2
0.50
0.70
0
0
1
1
1
0
0
2
2
1.00
0.75
0
0
1
1
1
1
0
1
1
1.00
0.70
0
0
1
1
1
1
1
3
3
1.00
0.82
0
0
2
1
0
0
0
5
7
0.71
0.62
0
0
2
1
0
1
0
0
2
0.00
0.56
0
0
2
1
0
1
1
3
3
1.00
0.72
0
0
2
1
1
0
0
2
2
1.00
0.76
0
0
2
1
1
1
0
4
5
0.80
0.72
0
0
2
1
1
1
1
1
1
1.00
0.83
1
0
0
0
0
0
0
156
245
0.64
0.63
1
0
0
0
0
1
0
111
192
0.58
0.57
1
0
0
0
0
1
1
133
181
0.73
0.73
1
1
1
1
0
0
0
25
30
0.83
0.77
1
1
1
1
0
1
0
18
31
0.58
0.73
1
1
1
1
0
1
1
30
35
0.86
0.84
1
1
1
1
1
0
0
26
30
0.87
0.87
1
1
1
1
1
1
0
28
32
0.88
0.84
1
1
1
1
1
1
1
23
30
0.77
0.91
1
1
2
1
0
0
0
53
68
0.78
0.77
1
1
2
1
0
1
0
43
61
0.70
0.73
1
1
2
1
0
1
1
50
58
0.86
0.84
1
1
2
1
1
0
0
81
92
0.88
0.87
1
1
2
1
1
1
0
58
67
0.87
0.84
1
1
2
1
1
1
1
60
67
0.90
0.91
subtotal
—
—
—
—
—
—
—
921
1253
0.74
—
chronic
0
0
0
0
0
0
0
25
37
0.68
0.66
0
0
0
0
0
1
0
20
34
0.59
0.61
0
0
0
0
0
1
1
29
43
0.67
0.75
0
0
1
1
0
0
0
11
22
0.50
0.65
0
0
1
1
0
1
0
9
12
0.75
0.59
0
0
1
1
0
1
1
13
18
0.72
0.75
0
0
1
1
1
0
0
18
21
0.86
0.79
0
0
1
1
1
1
0
23
29
0.79
0.75
0
0
1
1
1
1
1
17
20
0.85
0.85
0
0
2
1
0
0
0
51
69
0.74
0.68
0
0
2
1
0
1
0
30
45
0.67
0.62
0
0
2
1
0
1
1
35
55
0.64
0.76
0
0
2
1
1
0
0
43
57
0.75
0.81
0
0
2
1
1
1
0
24
36
0.67
0.76
0
0
2
1
1
1
1
41
50
0.82
0.87
subtotal
—
—
—
—
—
—
—
389
548
0.71
—
total
—
—
—
—
—
—
—
1804
2500
0.72
—
Table 2: Summary of simulated trial data when all domains associated with positive effects
Code
lsd <- get_stan_data (ll$ d)
ld <- lsd$ ld
d_s <- copy (lsd$ d_s)
m2 <- cmdstanr:: cmdstan_model ("stan/model-sim-04.stan" )
f_alleff_1 <- m2$ sample (
ld, iter_warmup = 1000 , iter_sampling = 10000 ,
parallel_chains = 2 , chains = 2 , refresh = 0 , show_exceptions = F,
max_treedepth = 13 )
Running MCMC with 2 parallel chains...
Chain 1 finished in 4.5 seconds.
Chain 2 finished in 4.5 seconds.
Both chains finished successfully.
Mean chain execution time: 4.5 seconds.
Total execution time: 4.7 seconds.
Code
post_1 <- data.table (f_alleff_1$ draws (variables = c (c ("a0" , "m" , "b" )), format = "matrix" ))
post_1 <- melt (post_1, measure.vars = names (post_1))
d_tbl_1 <- post_1[, .(
prior = "normal(0, 1)" ,
mu = mean (value),
q_025 = quantile (value, prob = 0.025 ),
q_975 = quantile (value, prob = 0.975 )
), keyby = variable]
d_tbl_1[, name_tru : = names (unlist (sim_spec))]
d_tbl_1[, tru : = unlist (sim_spec)]
# compare when prior sd is set to 10 for trt effects
ld$ pri_m_sd <- rep (10 , length (ld$ pri_m_sd))
ld$ pri_b_sd <- rep (10 , length (ld$ pri_b_sd))
f_alleff_2 <- m2$ sample (
ld, iter_warmup = 1000 , iter_sampling = 10000 ,
parallel_chains = 2 , chains = 2 , refresh = 0 , show_exceptions = F,
max_treedepth = 13 )
Running MCMC with 2 parallel chains...
Chain 1 finished in 5.5 seconds.
Chain 2 finished in 5.9 seconds.
Both chains finished successfully.
Mean chain execution time: 5.7 seconds.
Total execution time: 5.9 seconds.
Code
post_2 <- data.table (f_alleff_2$ draws (variables = c (c ("a0" , "m" , "b" )), format = "matrix" ))
post_2 <- melt (post_2, measure.vars = names (post_2))
d_tbl_2 <- post_2[, .(
prior = "normal(0, 10)" ,
mu = mean (value),
q_025 = quantile (value, prob = 0.025 ),
q_975 = quantile (value, prob = 0.975 )
), keyby = variable]
d_tbl_2[, name_tru : = names (unlist (sim_spec))]
d_tbl_2[, tru : = unlist (sim_spec)]
Code
d_fig <- rbind (
d_tbl_1, d_tbl_2
)
d_fig$ name_tru <- factor (d_fig$ name_tru, levels = unique (d_fig$ name_tru))
# https://www.andrewheiss.com/blog/2022/12/08/log10-natural-log-scales-ggplot/
ggplot (d_fig, aes (x = name_tru, y = mu, col = prior)) +
scale_x_discrete ("" ) +
scale_y_continuous ("log-OR" ) +
scale_color_discrete ("Prior sd on log-OR" ) +
geom_point (data = d_fig, aes (x = name_tru, y = mu), position = position_dodge (width = 0.4 )) +
geom_linerange (aes (ymin = q_025, ymax = q_975), position = position_dodge2 (width = 0.4 )) +
geom_point (data = d_fig, aes (x = name_tru, y = tru), col = 1 , pch = 2 )
Figure 2: Posterior median and 95% CI for baseline log-odds of treatment success (triangles show true values).