simulate <- function(n_subjects = 100, n_items = 50,
  b_0 = 0.847, b_e = 1.350, b_a = -1.253, b_c = 2.603,
  b_ea = 0.790, b_ec = -1.393,
  sd_u0s = 0.5, sd_u0i = 0.5, ...){
  require(dplyr)
  require(faux)
  # simulate design
  dat <- add_random(subject = n_subjects, item = n_items) %>%
    add_between("subject", expert = c(1, 0), .prob = c(0.25, 0.75)) %>%
    mutate(advice_present = rbinom(n(), 1, prob = 2/3)) %>%
    mutate(advice_correct = if_else(advice_present == 1L, 
                                    rbinom(n(), 1L, prob = 0.8), 0L)) %>%
    # add random effects
    add_ranef("subject", u0s = sd_u0s) %>%
    add_ranef("item", u0i = sd_u0i) %>%
    # compute dependent variable
    mutate(linpred = b_0 + u0i + u0s +
      b_e * expert + b_a * advice_present + b_c * advice_correct +
      b_ea * expert * advice_present + b_ec * expert * advice_correct) %>%
    mutate(y_prob = plogis(linpred)) %>%
    mutate(y_bin = rbinom(n = n(), size = 1, prob = y_prob))
  dat
}
Note
This Notebook contains only the R code from the PRACTICE sections of the manuscript. The number of samples and repetitions for all simulations have been greatly reduced to ensure that the code runs quickly and with limited computational resources.
Simulate the data generating process
In [1]:
Specify the population parameters
In [2]:
b_0 <- qlogis(0.7)
b_e <- qlogis(0.9) - b_0
b_a <- qlogis(0.4) - b_0
b_ea <- qlogis(0.85) - b_0 - b_e - b_a
b_c <- qlogis(0.9) - b_0 - b_a
b_ec <- qlogis(0.95) - b_0 - b_e - b_a - b_c - b_ea
c(b_0 = b_0, b_e = b_e, b_a = b_a, b_c = b_c, b_ea = b_ea, b_ec = b_ec)In [3]:
plogis(b_0 + b_e + b_a + b_c + b_ea + b_ec)Insightful descriptive statistics
In [4]:
library(tidyverse)
set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500,
  sd_u0s = 0.5, sd_u0i = 0.5)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
 "student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
 "student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
 "student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  group_by(condition) %>%
  summarize(relative_frequency = sum(y_bin) / n())Insightful model based quantities
In [5]:
library(ggdist)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))In [6]:
set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500, sd_u0i = 0.01)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))In [7]:
set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500, sd_u0s = 0.01)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))In [8]:
set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500, sd_u0s = 3, sd_u0i = 3)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))Estimate the statistical model
In [9]:
library(tidyverse)
library(lme4)
set.seed(1)
dat <- simulate(n_subjects = 100, n_items = 50)
f <- y_bin ~ 1 + expert + advice_present + advice_correct + 
  expert:advice_present + expert:advice_correct +
  (1|subject) + (1|item)
fit <- glmer(f, data = dat, family = "binomial")
summary(fit)Compute the estimate
In [10]:
grid1 <- data.frame(advice_present = c(1, 0), advice_correct = c(1, 0), 
  expert = c(1, 1))
grid1
pred <- predict(fit, newdata = grid1, type = "response", re.form = NA)
pred
pred[1] - pred[2]In [11]:
library(marginaleffects)
library(tinytable)
grid2 <- expand_grid(advice_present = 0:1, 
  advice_correct = 0:1, expert = 0:1)
grid2
preds <- predictions(fit, newdata = grid2, 
  type = "response", re.form = NA)
print(preds, style = "tinytable") %>% theme_tt(theme = "resize")In [12]:
contrasts <- preds %>% 
  hypotheses(hypothesis = c(
    "b8 = b2",  # (correct advice, expert) - (no advice, expert)
    "b2 = b6",  # (no advice, expert) - (incorrect advice, expert) 
    "b7 = b1",  # (correct advice, student) - (no advice, student)
    "b1 = b5"), # (no advice, student) - (incorrect advice, student)
    equivalence = c(0, 0))
print(contrasts, style = "tinytable") %>% theme_tt(theme = "resize")Perform repeated simulations
In [13]:
sim_and_analyse <- function(
  formula_chr = "y_bin ~ 1 + expert + advice_present + advice_correct + 
    expert:advice_present + expert:advice_correct + (1|subject) + (1|item)",
  contrasts = c("b8 = b2", "b2 = b6", "b7 = b1", "b1 = b5"), ...){
  require(lme4)
  require(marginaleffects)
  require(tidyr)
  # simulate data
  dat <- simulate(...)
  # fit model
  model <- glmer(as.formula(formula_chr), data = dat, family = "binomial")
  # compute contrasts
  contr_df <- expand_grid(advice_present = 0:1, advice_correct = 0:1,
    expert = 0:1)
  predictions(model, newdata = contr_df, type = "response", re.form = NA) %>%
    hypotheses(hypothesis = contrasts, equivalence = c(0, 0)) %>%
    data.frame()
}In [14]:
library(future)
plan("multisession", workers = 4)
set.seed(2)In [15]:
library(furrr)
sim_result <- crossing(
  rep = 1:5,
  n_subjects = c(100, 150, 200, 250),
  n_items = c(10, 30, 50, 70)
) %>%
  mutate(res = future_pmap(., sim_and_analyse, 
    .options = furrr_options(seed = TRUE))) %>%
  unnest(col = res)Power results
In [16]:
library(binom)
alpha <- 0.05
power <- sim_result %>%
  pivot_wider(names_from = term, names_sep = "_", 
    values_from = estimate:p.value.equiv) %>%
  group_by(n_subjects, n_items) %>% 
  summarise(
    power = mean(`p.value.noninf_b1=b5` < alpha & 
        `p.value.noninf_b8=b2` < alpha & `p.value.noninf_b2=b6` < alpha & 
        `p.value.noninf_b7=b1` < alpha), 
    n_sig = sum(`p.value.noninf_b1=b5` < alpha & 
        `p.value.noninf_b8=b2` < alpha & `p.value.noninf_b2=b6` < alpha & 
        `p.value.noninf_b7=b1` < alpha),
    n = n(),
    ci.lwr = binom.confint(n_sig, n, method = "wilson")$lower,
    ci.upr = binom.confint(n_sig, n, method = "wilson")$upper, 
    .groups = "drop")
power %>%
  mutate(across(c(n_subjects, n_items), factor)) %>%
  ggplot(aes(n_subjects, n_items, fill = power)) +
  geom_tile() +
  geom_text(aes(label = sprintf("%.2f \n [%.2f; %.2f]", 
                                power, ci.lwr, ci.upr)), 
    color = "white", size = 4) +
  scale_fill_viridis_c(limits = c(0, 1)) +
  xlab("number of subjects") + ylab("number of items")Precision results
In [17]:
precision <- sim_result %>%
  pivot_wider(names_from = term, names_sep = "_", 
    values_from = estimate:p.value.equiv) %>%
  group_by(n_subjects, n_items) %>%
  mutate(width = `conf.high_b8=b2` - `conf.low_b8=b2`) %>%
  summarise(precision = mean(width),
    ci.lwr = t.test(width)$conf.int[1],
    ci.upr = t.test(width)$conf.int[2], 
    .groups = "drop")
precision %>%
  mutate(across(c(n_subjects, n_items), factor)) %>%
  ggplot(aes(n_subjects, n_items, fill = precision)) +
  geom_tile() +
  geom_text(aes(label = sprintf("%.2f \n [%.2f; %.2f]", 
                                precision, ci.lwr, ci.upr)), 
    color = "white", size = 4) +
  scale_fill_viridis_c(limits = c(0, 0.2), direction = -1) +
  guides(fill = guide_colourbar(reverse = TRUE)) +
  xlab("number of subjects") + ylab("number of items")