R code from PRACTICE sections

Note

This Notebook contains only the R code from the PRACTICE sections of the manuscript. The number of samples and repetitions for all simulations have been greatly reduced to ensure that the code runs quickly and with limited computational resources.

Simulate the data generating process

In [1]:

simulate <- function(n_subjects = 100, n_items = 50,
  b_0 = 0.847, b_e = 1.350, b_a = -1.253, b_c = 2.603,
  b_ea = 0.790, b_ec = -1.393,
  sd_u0s = 0.5, sd_u0i = 0.5, ...){
  require(dplyr)
  require(faux)
  # simulate design
  dat <- add_random(subject = n_subjects, item = n_items) %>%
    add_between("subject", expert = c(1, 0), .prob = c(0.25, 0.75)) %>%
    mutate(advice_present = rbinom(n(), 1, prob = 2/3)) %>%
    mutate(advice_correct = if_else(advice_present == 1L, 
                                    rbinom(n(), 1L, prob = 0.8), 0L)) %>%
    # add random effects
    add_ranef("subject", u0s = sd_u0s) %>%
    add_ranef("item", u0i = sd_u0i) %>%
    # compute dependent variable
    mutate(linpred = b_0 + u0i + u0s +
      b_e * expert + b_a * advice_present + b_c * advice_correct +
      b_ea * expert * advice_present + b_ec * expert * advice_correct) %>%
    mutate(y_prob = plogis(linpred)) %>%
    mutate(y_bin = rbinom(n = n(), size = 1, prob = y_prob))
  dat
}

Specify the population parameters

In [2]:

b_0 <- qlogis(0.7)
b_e <- qlogis(0.9) - b_0
b_a <- qlogis(0.4) - b_0
b_ea <- qlogis(0.85) - b_0 - b_e - b_a
b_c <- qlogis(0.9) - b_0 - b_a
b_ec <- qlogis(0.95) - b_0 - b_e - b_a - b_c - b_ea
c(b_0 = b_0, b_e = b_e, b_a = b_a, b_c = b_c, b_ea = b_ea, b_ec = b_ec)

In [3]:

plogis(b_0 + b_e + b_a + b_c + b_ea + b_ec)

Insightful descriptive statistics

In [4]:

library(tidyverse)
set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500,
  sd_u0s = 0.5, sd_u0i = 0.5)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
 "student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
 "student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
 "student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  group_by(condition) %>%
  summarize(relative_frequency = sum(y_bin) / n())

Insightful model based quantities

In [5]:


library(ggdist)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))

In [6]:

set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500, sd_u0i = 0.01)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))

In [7]:

set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500, sd_u0s = 0.01)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))

In [8]:

set.seed(1)
dat <- simulate(n_subjects = 500, n_items = 500, sd_u0s = 3, sd_u0i = 3)
dat %>% 
  mutate(condition = fct_cross(
    factor(expert), factor(advice_present), factor(advice_correct))) %>%
  mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0", 
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>% 
  ggplot(aes(x = y_prob, y = condition)) +
  stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
    breaks = "Sturges") +
  scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))

Estimate the statistical model

In [9]:

library(tidyverse)
library(lme4)
set.seed(1)
dat <- simulate(n_subjects = 100, n_items = 50)
f <- y_bin ~ 1 + expert + advice_present + advice_correct + 
  expert:advice_present + expert:advice_correct +
  (1|subject) + (1|item)
fit <- glmer(f, data = dat, family = "binomial")
summary(fit)

Compute the estimate

In [10]:

grid1 <- data.frame(advice_present = c(1, 0), advice_correct = c(1, 0), 
  expert = c(1, 1))
grid1
pred <- predict(fit, newdata = grid1, type = "response", re.form = NA)
pred
pred[1] - pred[2]

In [11]:

library(marginaleffects)
library(tinytable)
grid2 <- expand_grid(advice_present = 0:1, 
  advice_correct = 0:1, expert = 0:1)
grid2
preds <- predictions(fit, newdata = grid2, 
  type = "response", re.form = NA)
print(preds, style = "tinytable") %>% theme_tt(theme = "resize")

In [12]:

contrasts <- preds %>% 
  hypotheses(hypothesis = c(
    "b8 = b2",  # (correct advice, expert) - (no advice, expert)
    "b2 = b6",  # (no advice, expert) - (incorrect advice, expert) 
    "b7 = b1",  # (correct advice, student) - (no advice, student)
    "b1 = b5"), # (no advice, student) - (incorrect advice, student)
    equivalence = c(0, 0))
print(contrasts, style = "tinytable") %>% theme_tt(theme = "resize")

Perform repeated simulations

In [13]:

sim_and_analyse <- function(
  formula_chr = "y_bin ~ 1 + expert + advice_present + advice_correct + 
    expert:advice_present + expert:advice_correct + (1|subject) + (1|item)",
  contrasts = c("b8 = b2", "b2 = b6", "b7 = b1", "b1 = b5"), ...){
  require(lme4)
  require(marginaleffects)
  require(tidyr)
  # simulate data
  dat <- simulate(...)
  # fit model
  model <- glmer(as.formula(formula_chr), data = dat, family = "binomial")
  # compute contrasts
  contr_df <- expand_grid(advice_present = 0:1, advice_correct = 0:1,
    expert = 0:1)
  predictions(model, newdata = contr_df, type = "response", re.form = NA) %>%
    hypotheses(hypothesis = contrasts, equivalence = c(0, 0)) %>%
    data.frame()
}

In [14]:

library(future)
plan("multisession", workers = 4)
set.seed(2)

In [15]:

library(furrr)
sim_result <- crossing(
  rep = 1:5,
  n_subjects = c(100, 150, 200, 250),
  n_items = c(10, 30, 50, 70)
) %>%
  mutate(res = future_pmap(., sim_and_analyse, 
    .options = furrr_options(seed = TRUE))) %>%
  unnest(col = res)

Power results

In [16]:

library(binom)
alpha <- 0.05
power <- sim_result %>%
  pivot_wider(names_from = term, names_sep = "_", 
    values_from = estimate:p.value.equiv) %>%
  group_by(n_subjects, n_items) %>% 
  summarise(
    power = mean(`p.value.noninf_b1=b5` < alpha & 
        `p.value.noninf_b8=b2` < alpha & `p.value.noninf_b2=b6` < alpha & 
        `p.value.noninf_b7=b1` < alpha), 
    n_sig = sum(`p.value.noninf_b1=b5` < alpha & 
        `p.value.noninf_b8=b2` < alpha & `p.value.noninf_b2=b6` < alpha & 
        `p.value.noninf_b7=b1` < alpha),
    n = n(),
    ci.lwr = binom.confint(n_sig, n, method = "wilson")$lower,
    ci.upr = binom.confint(n_sig, n, method = "wilson")$upper, 
    .groups = "drop")
power %>%
  mutate(across(c(n_subjects, n_items), factor)) %>%
  ggplot(aes(n_subjects, n_items, fill = power)) +
  geom_tile() +
  geom_text(aes(label = sprintf("%.2f \n [%.2f; %.2f]", 
                                power, ci.lwr, ci.upr)), 
    color = "white", size = 4) +
  scale_fill_viridis_c(limits = c(0, 1)) +
  xlab("number of subjects") + ylab("number of items")

Precision results

In [17]:

precision <- sim_result %>%
  pivot_wider(names_from = term, names_sep = "_", 
    values_from = estimate:p.value.equiv) %>%
  group_by(n_subjects, n_items) %>%
  mutate(width = `conf.high_b8=b2` - `conf.low_b8=b2`) %>%
  summarise(precision = mean(width),
    ci.lwr = t.test(width)$conf.int[1],
    ci.upr = t.test(width)$conf.int[2], 
    .groups = "drop")
precision %>%
  mutate(across(c(n_subjects, n_items), factor)) %>%
  ggplot(aes(n_subjects, n_items, fill = precision)) +
  geom_tile() +
  geom_text(aes(label = sprintf("%.2f \n [%.2f; %.2f]", 
                                precision, ci.lwr, ci.upr)), 
    color = "white", size = 4) +
  scale_fill_viridis_c(limits = c(0, 0.2), direction = -1) +
  guides(fill = guide_colourbar(reverse = TRUE)) +
  xlab("number of subjects") + ylab("number of items")