Example trials

Published

July 29, 2024

Modified

April 5, 2024

Example trials are provided to give insight into typical cell sample sizes as well as the level of uncertainty associated with the parameter estimation process. Examples are from trials at their maximum sample size with all follow up completed. Sequential variants with adaptations will be added later.

Null scenario

Table 1 shows a summary of the treatment sucesses based on the \(n\) patients associated with each combination of design variables when no treatment effects (non-membership effects still retained) in the simulated data of 2500 patients. Given that this is a summary of a single data set, some variation from the underlying simulation parameters is to be expected.

Code

set.seed(11)

sim_spec <- get_sim_spec()

sim_spec$b['r1'] <- 0
sim_spec$b['r2'] <- 0
sim_spec$b['r1d'] <- 0
sim_spec$b['r2d'] <- 0
sim_spec$b['f'] <- 0

ll <- get_trial_data(N = 2500, sim_spec = sim_spec)

gt_tbl <- tbl_ex_trial(ll$d)
gt_tbl

	Surgical D_a			Duration D_b		Type D_c		Response
	reveal	assigned	received	reveal	assigned	reveal	assigned	y	n	MLE (p_y)	TRUE (p_y)¹
early
	0	0	0	0	0	0	0	188	270	0.70	0.68
	0	0	0	0	0	1	0	122	195	0.63	0.63
	0	0	0	0	0	1	1	149	228	0.65	0.63
	0	0	1	1	0	0	0	9	13	0.69	0.67
	0	0	1	1	0	1	0	8	13	0.62	0.62
	0	0	1	1	0	1	1	6	10	0.60	0.62
	0	0	1	1	1	0	0	12	18	0.67	0.67
	0	0	1	1	1	1	0	5	8	0.62	0.62
	0	0	1	1	1	1	1	6	10	0.60	0.62
subtotal	—	—	—	—	—	—	—	505	765	0.66	—
late
	0	0	0	0	0	0	0	1	1	1.00	0.61
	0	0	0	0	0	1	0	2	5	0.40	0.55
	0	0	0	0	0	1	1	1	1	1.00	0.55
	0	0	1	1	0	1	1	1	3	0.33	0.53
	0	0	1	1	1	0	0	3	4	0.75	0.59
	0	0	1	1	1	1	0	0	1	0.00	0.53
	0	0	1	1	1	1	1	0	2	0.00	0.53
	0	0	2	1	0	0	0	3	3	1.00	0.62
	0	0	2	1	0	1	0	2	2	1.00	0.56
	0	0	2	1	0	1	1	0	3	0.00	0.56
	0	0	2	1	1	0	0	1	3	0.33	0.62
	0	0	2	1	1	1	0	3	4	0.75	0.56
	0	0	2	1	1	1	1	1	2	0.50	0.56
	1	0	0	0	0	0	0	146	233	0.63	0.63
	1	0	0	0	0	1	0	108	192	0.56	0.57
	1	0	0	0	0	1	1	99	172	0.58	0.57
	1	1	1	1	0	0	0	21	41	0.51	0.63
	1	1	1	1	0	1	0	15	28	0.54	0.57
	1	1	1	1	0	1	1	26	45	0.58	0.57
	1	1	1	1	1	0	0	26	44	0.59	0.63
	1	1	1	1	1	1	0	19	30	0.63	0.57
	1	1	1	1	1	1	1	15	25	0.60	0.57
	1	1	2	1	0	0	0	48	79	0.61	0.63
	1	1	2	1	0	1	0	34	62	0.55	0.57
	1	1	2	1	0	1	1	33	56	0.59	0.57
	1	1	2	1	1	0	0	54	78	0.69	0.63
	1	1	2	1	1	1	0	35	61	0.57	0.57
	1	1	2	1	1	1	1	28	54	0.52	0.57
subtotal	—	—	—	—	—	—	—	725	1234	0.59	—
chronic
	0	0	0	0	0	0	0	23	38	0.61	0.66
	0	0	0	0	0	1	0	23	30	0.77	0.61
	0	0	0	0	0	1	1	21	33	0.64	0.61
	0	0	1	1	0	0	0	10	17	0.59	0.65
	0	0	1	1	0	1	0	12	19	0.63	0.59
	0	0	1	1	0	1	1	12	17	0.71	0.59
	0	0	1	1	1	0	0	12	20	0.60	0.65
	0	0	1	1	1	1	0	8	17	0.47	0.59
	0	0	1	1	1	1	1	5	12	0.42	0.59
	0	0	2	1	0	0	0	46	60	0.77	0.68
	0	0	2	1	0	1	0	27	47	0.57	0.62
	0	0	2	1	0	1	1	23	33	0.70	0.62
	0	0	2	1	1	0	0	41	56	0.73	0.68
	0	0	2	1	1	1	0	31	44	0.70	0.62
	0	0	2	1	1	1	1	30	58	0.52	0.62
subtotal	—	—	—	—	—	—	—	324	501	0.65	—
total	—	—	—	—	—	—	—	1554	2500	0.62	—
¹ Transformed from the log-odds of response as used in the linear predictor to simulate data.

Table 1: Summary of simulated trial data when no treatment effects present

Model the simulated data first using standard normal priors on the domain level treatment effects, then increasing the prior standard deviation to ten in order to see if there is any movement in the posterior summary.

Code

lsd <- get_stan_data(ll$d)
ld <- lsd$ld
d_s <- copy(lsd$d_s)

m2 <- cmdstanr::cmdstan_model("stan/model-sim-04.stan")

f_null_1 <- m2$sample(
  ld, iter_warmup = 1000, iter_sampling = 10000,
  parallel_chains = 2, chains = 2, refresh = 0, show_exceptions = F, 
  max_treedepth = 13)

Running MCMC with 2 parallel chains...

Chain 1 finished in 4.5 seconds.
Chain 2 finished in 5.1 seconds.

Both chains finished successfully.
Mean chain execution time: 4.8 seconds.
Total execution time: 5.2 seconds.

Code

post_1 <- data.table(f_null_1$draws(variables = c(c("a0", "m", "b")), format = "matrix"))
post_1 <- melt(post_1, measure.vars = names(post_1))
d_tbl_1 <- post_1[, .(
  prior = "normal(0, 1)",
  mu = mean(value),
  q_025 = quantile(value, prob = 0.025), 
  q_975 = quantile(value, prob = 0.975)
), keyby = variable]
d_tbl_1[, name_tru := names(unlist(sim_spec))]
d_tbl_1[, tru := unlist(sim_spec)]

# compare when prior sd is set to 10 for trt effects

ld$pri_m_sd <- rep(10, length(ld$pri_m_sd))
ld$pri_b_sd <- rep(10, length(ld$pri_b_sd))

f_null_2 <- m2$sample(
  ld, iter_warmup = 1000, iter_sampling = 10000,
  parallel_chains = 2, chains = 2, refresh = 0, show_exceptions = F, 
  max_treedepth = 13)

Running MCMC with 2 parallel chains...

Chain 2 finished in 5.0 seconds.
Chain 1 finished in 5.6 seconds.

Both chains finished successfully.
Mean chain execution time: 5.3 seconds.
Total execution time: 5.7 seconds.

Code

post_2 <- data.table(f_null_2$draws(variables = c(c("a0", "m", "b")), format = "matrix"))
post_2 <- melt(post_2, measure.vars = names(post_2))
d_tbl_2 <- post_2[, .(
  prior = "normal(0, 10)",
  mu = mean(value), 
  q_025 = quantile(value, prob = 0.025), 
  q_975 = quantile(value, prob = 0.975)
), keyby = variable]
d_tbl_2[, name_tru := names(unlist(sim_spec))]
d_tbl_2[, tru := unlist(sim_spec)]

Code

d_fig <- rbind(
  d_tbl_1, d_tbl_2
)
d_fig$name_tru <- factor(d_fig$name_tru, levels = unique(d_fig$name_tru))

# https://www.andrewheiss.com/blog/2022/12/08/log10-natural-log-scales-ggplot/

ggplot(d_fig, aes(x = name_tru, y = mu, col = prior)) +
  scale_x_discrete("") +
  scale_y_continuous("log-OR") + 
  scale_color_discrete("Prior sd on log-OR") +
  geom_point(data = d_fig, aes(x = name_tru, y = mu), position = position_dodge(width = 0.4)) +
  geom_linerange(aes(ymin = q_025, ymax = q_975), position = position_dodge2(width = 0.4)) +
  geom_point(data = d_fig, aes(x = name_tru, y = tru), col = 1, pch = 2)

Figure 1: Posterior median and 95% CI for baseline log-odds of treatment success domain A (independent estimates for late and chronic silo).

All domains effective scenario

shows a summary of the treatment sucesses based on the \(n\) patients associated with each combination of design variables when all treatment effects set to log(2) (with non-membership effects retained as before) in the simulated data of 2500 patients.

Code

set.seed(2)
sim_spec$b['r1'] <- log(2)
sim_spec$b['r2'] <- log(2)
sim_spec$b['r1d'] <- log(2)
sim_spec$b['r2d'] <- log(2)
sim_spec$b['f'] <- log(2)

ll <- get_trial_data(N = 2500, sim_spec = sim_spec)

gt_tbl <- tbl_ex_trial(ll$d)
gt_tbl

	Surgical D_a			Duration D_b		Type D_c		Response
	reveal	assigned	received	reveal	assigned	reveal	assigned	y	n	MLE (p_y)	TRUE (p_y)¹
early
	0	0	0	0	0	0	0	166	245	0.68	0.68
	0	0	0	0	0	1	0	112	172	0.65	0.63
	0	0	0	0	0	1	1	156	207	0.75	0.77
	0	0	1	1	0	0	0	17	23	0.74	0.67
	0	0	1	1	0	1	0	6	7	0.86	0.62
	0	0	1	1	0	1	1	10	12	0.83	0.76
	0	0	1	1	1	0	0	9	13	0.69	0.80
	0	0	1	1	1	1	0	6	6	1.00	0.76
	0	0	1	1	1	1	1	12	14	0.86	0.86
subtotal	—	—	—	—	—	—	—	494	699	0.71	—
late
	0	0	0	0	0	0	0	1	1	1.00	0.61
	0	0	0	0	0	1	0	0	1	0.00	0.55
	0	0	0	0	0	1	1	3	4	0.75	0.71
	0	0	1	1	0	1	1	1	2	0.50	0.70
	0	0	1	1	1	0	0	2	2	1.00	0.75
	0	0	1	1	1	1	0	1	1	1.00	0.70
	0	0	1	1	1	1	1	3	3	1.00	0.82
	0	0	2	1	0	0	0	5	7	0.71	0.62
	0	0	2	1	0	1	0	0	2	0.00	0.56
	0	0	2	1	0	1	1	3	3	1.00	0.72
	0	0	2	1	1	0	0	2	2	1.00	0.76
	0	0	2	1	1	1	0	4	5	0.80	0.72
	0	0	2	1	1	1	1	1	1	1.00	0.83
	1	0	0	0	0	0	0	156	245	0.64	0.63
	1	0	0	0	0	1	0	111	192	0.58	0.57
	1	0	0	0	0	1	1	133	181	0.73	0.73
	1	1	1	1	0	0	0	25	30	0.83	0.77
	1	1	1	1	0	1	0	18	31	0.58	0.73
	1	1	1	1	0	1	1	30	35	0.86	0.84
	1	1	1	1	1	0	0	26	30	0.87	0.87
	1	1	1	1	1	1	0	28	32	0.88	0.84
	1	1	1	1	1	1	1	23	30	0.77	0.91
	1	1	2	1	0	0	0	53	68	0.78	0.77
	1	1	2	1	0	1	0	43	61	0.70	0.73
	1	1	2	1	0	1	1	50	58	0.86	0.84
	1	1	2	1	1	0	0	81	92	0.88	0.87
	1	1	2	1	1	1	0	58	67	0.87	0.84
	1	1	2	1	1	1	1	60	67	0.90	0.91
subtotal	—	—	—	—	—	—	—	921	1253	0.74	—
chronic
	0	0	0	0	0	0	0	25	37	0.68	0.66
	0	0	0	0	0	1	0	20	34	0.59	0.61
	0	0	0	0	0	1	1	29	43	0.67	0.75
	0	0	1	1	0	0	0	11	22	0.50	0.65
	0	0	1	1	0	1	0	9	12	0.75	0.59
	0	0	1	1	0	1	1	13	18	0.72	0.75
	0	0	1	1	1	0	0	18	21	0.86	0.79
	0	0	1	1	1	1	0	23	29	0.79	0.75
	0	0	1	1	1	1	1	17	20	0.85	0.85
	0	0	2	1	0	0	0	51	69	0.74	0.68
	0	0	2	1	0	1	0	30	45	0.67	0.62
	0	0	2	1	0	1	1	35	55	0.64	0.76
	0	0	2	1	1	0	0	43	57	0.75	0.81
	0	0	2	1	1	1	0	24	36	0.67	0.76
	0	0	2	1	1	1	1	41	50	0.82	0.87
subtotal	—	—	—	—	—	—	—	389	548	0.71	—
total	—	—	—	—	—	—	—	1804	2500	0.72	—
¹ Transformed from the log-odds of response as used in the linear predictor to simulate data.

Table 2: Summary of simulated trial data when all domains associated with positive effects

Code

lsd <- get_stan_data(ll$d)
ld <- lsd$ld
d_s <- copy(lsd$d_s)

m2 <- cmdstanr::cmdstan_model("stan/model-sim-04.stan")

f_alleff_1 <- m2$sample(
  ld, iter_warmup = 1000, iter_sampling = 10000,
  parallel_chains = 2, chains = 2, refresh = 0, show_exceptions = F, 
  max_treedepth = 13)

Running MCMC with 2 parallel chains...

Chain 1 finished in 4.5 seconds.
Chain 2 finished in 4.5 seconds.

Both chains finished successfully.
Mean chain execution time: 4.5 seconds.
Total execution time: 4.7 seconds.

Code

post_1 <- data.table(f_alleff_1$draws(variables = c(c("a0", "m", "b")), format = "matrix"))
post_1 <- melt(post_1, measure.vars = names(post_1))
d_tbl_1 <- post_1[, .(
  prior = "normal(0, 1)",
  mu = mean(value),
  q_025 = quantile(value, prob = 0.025), 
  q_975 = quantile(value, prob = 0.975)
), keyby = variable]
d_tbl_1[, name_tru := names(unlist(sim_spec))]
d_tbl_1[, tru := unlist(sim_spec)]

# compare when prior sd is set to 10 for trt effects

ld$pri_m_sd <- rep(10, length(ld$pri_m_sd))
ld$pri_b_sd <- rep(10, length(ld$pri_b_sd))

f_alleff_2 <- m2$sample(
  ld, iter_warmup = 1000, iter_sampling = 10000,
  parallel_chains = 2, chains = 2, refresh = 0, show_exceptions = F, 
  max_treedepth = 13)

Running MCMC with 2 parallel chains...

Chain 1 finished in 5.5 seconds.
Chain 2 finished in 5.9 seconds.

Both chains finished successfully.
Mean chain execution time: 5.7 seconds.
Total execution time: 5.9 seconds.

Code

post_2 <- data.table(f_alleff_2$draws(variables = c(c("a0", "m", "b")), format = "matrix"))
post_2 <- melt(post_2, measure.vars = names(post_2))
d_tbl_2 <- post_2[, .(
  prior = "normal(0, 10)",
  mu = mean(value), 
  q_025 = quantile(value, prob = 0.025), 
  q_975 = quantile(value, prob = 0.975)
), keyby = variable]
d_tbl_2[, name_tru := names(unlist(sim_spec))]
d_tbl_2[, tru := unlist(sim_spec)]

Code

d_fig <- rbind(
  d_tbl_1, d_tbl_2
)
d_fig$name_tru <- factor(d_fig$name_tru, levels = unique(d_fig$name_tru))

# https://www.andrewheiss.com/blog/2022/12/08/log10-natural-log-scales-ggplot/

ggplot(d_fig, aes(x = name_tru, y = mu, col = prior)) +
  scale_x_discrete("") +
  scale_y_continuous("log-OR") + 
  scale_color_discrete("Prior sd on log-OR") +
  geom_point(data = d_fig, aes(x = name_tru, y = mu), position = position_dodge(width = 0.4)) +
  geom_linerange(aes(ymin = q_025, ymax = q_975), position = position_dodge2(width = 0.4)) +
  geom_point(data = d_fig, aes(x = name_tru, y = tru), col = 1, pch = 2)

Figure 2: Posterior median and 95% CI for baseline log-odds of treatment success (triangles show true values).