<- function(n_subjects = 100, n_items = 50,
simulate b_0 = 0.847, b_e = 1.350, b_a = -1.253, b_c = 2.603,
b_ea = 0.790, b_ec = -1.393,
sd_u0s = 0.5, sd_u0i = 0.5, ...){
require(dplyr)
require(faux)
# simulate design
<- add_random(subject = n_subjects, item = n_items) %>%
dat add_between("subject", expert = c(1, 0), .prob = c(0.25, 0.75)) %>%
mutate(advice_present = rbinom(n(), 1, prob = 2/3)) %>%
mutate(advice_correct = if_else(advice_present == 1L,
rbinom(n(), 1L, prob = 0.8), 0L)) %>%
# add random effects
add_ranef("subject", u0s = sd_u0s) %>%
add_ranef("item", u0i = sd_u0i) %>%
# compute dependent variable
mutate(linpred = b_0 + u0i + u0s +
* expert + b_a * advice_present + b_c * advice_correct +
b_e * expert * advice_present + b_ec * expert * advice_correct) %>%
b_ea mutate(y_prob = plogis(linpred)) %>%
mutate(y_bin = rbinom(n = n(), size = 1, prob = y_prob))
dat }
Note
This Notebook contains only the R code from the PRACTICE sections of the manuscript. The number of samples and repetitions for all simulations have been greatly reduced to ensure that the code runs quickly and with limited computational resources.
Simulate the data generating process
In [1]:
Specify the population parameters
In [2]:
<- qlogis(0.7)
b_0 <- qlogis(0.9) - b_0
b_e <- qlogis(0.4) - b_0
b_a <- qlogis(0.85) - b_0 - b_e - b_a
b_ea <- qlogis(0.9) - b_0 - b_a
b_c <- qlogis(0.95) - b_0 - b_e - b_a - b_c - b_ea
b_ec c(b_0 = b_0, b_e = b_e, b_a = b_a, b_c = b_c, b_ea = b_ea, b_ec = b_ec)
In [3]:
plogis(b_0 + b_e + b_a + b_c + b_ea + b_ec)
Insightful descriptive statistics
In [4]:
library(tidyverse)
set.seed(1)
<- simulate(n_subjects = 500, n_items = 500,
dat sd_u0s = 0.5, sd_u0i = 0.5)
%>%
dat mutate(condition = fct_cross(
factor(expert), factor(advice_present), factor(advice_correct))) %>%
mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0",
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>%
group_by(condition) %>%
summarize(relative_frequency = sum(y_bin) / n())
Insightful model based quantities
In [5]:
library(ggdist)
%>%
dat mutate(condition = fct_cross(
factor(expert), factor(advice_present), factor(advice_correct))) %>%
mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0",
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>%
ggplot(aes(x = y_prob, y = condition)) +
stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
breaks = "Sturges") +
scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))
In [6]:
set.seed(1)
<- simulate(n_subjects = 500, n_items = 500, sd_u0i = 0.01)
dat %>%
dat mutate(condition = fct_cross(
factor(expert), factor(advice_present), factor(advice_correct))) %>%
mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0",
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>%
ggplot(aes(x = y_prob, y = condition)) +
stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
breaks = "Sturges") +
scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))
In [7]:
set.seed(1)
<- simulate(n_subjects = 500, n_items = 500, sd_u0s = 0.01)
dat %>%
dat mutate(condition = fct_cross(
factor(expert), factor(advice_present), factor(advice_correct))) %>%
mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0",
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>%
ggplot(aes(x = y_prob, y = condition)) +
stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
breaks = "Sturges") +
scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))
In [8]:
set.seed(1)
<- simulate(n_subjects = 500, n_items = 500, sd_u0s = 3, sd_u0i = 3)
dat %>%
dat mutate(condition = fct_cross(
factor(expert), factor(advice_present), factor(advice_correct))) %>%
mutate(condition = fct_recode(condition,
"student, no advice" = "0:0:0", "expert, no advice" = "1:0:0",
"student, incorrect advice" = "0:1:0", "expert, incorrect advice" = "1:1:0",
"student, correct advice" = "0:1:1", "expert, correct advice" = "1:1:1")) %>%
ggplot(aes(x = y_prob, y = condition)) +
stat_histinterval(point_interval = "mean_qi", slab_color = "gray45",
breaks = "Sturges") +
scale_x_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1))
Estimate the statistical model
In [9]:
library(tidyverse)
library(lme4)
set.seed(1)
<- simulate(n_subjects = 100, n_items = 50)
dat <- y_bin ~ 1 + expert + advice_present + advice_correct +
f :advice_present + expert:advice_correct +
expert1|subject) + (1|item)
(<- glmer(f, data = dat, family = "binomial")
fit summary(fit)
Compute the estimate
In [10]:
<- data.frame(advice_present = c(1, 0), advice_correct = c(1, 0),
grid1 expert = c(1, 1))
grid1<- predict(fit, newdata = grid1, type = "response", re.form = NA)
pred
pred1] - pred[2] pred[
In [11]:
library(marginaleffects)
library(tinytable)
<- expand_grid(advice_present = 0:1,
grid2 advice_correct = 0:1, expert = 0:1)
grid2<- predictions(fit, newdata = grid2,
preds type = "response", re.form = NA)
print(preds, style = "tinytable") %>% theme_tt(theme = "resize")
In [12]:
<- preds %>%
contrasts hypotheses(hypothesis = c(
"b8 = b2", # (correct advice, expert) - (no advice, expert)
"b2 = b6", # (no advice, expert) - (incorrect advice, expert)
"b7 = b1", # (correct advice, student) - (no advice, student)
"b1 = b5"), # (no advice, student) - (incorrect advice, student)
equivalence = c(0, 0))
print(contrasts, style = "tinytable") %>% theme_tt(theme = "resize")
Perform repeated simulations
In [13]:
<- function(
sim_and_analyse formula_chr = "y_bin ~ 1 + expert + advice_present + advice_correct +
expert:advice_present + expert:advice_correct + (1|subject) + (1|item)",
contrasts = c("b8 = b2", "b2 = b6", "b7 = b1", "b1 = b5"), ...){
require(lme4)
require(marginaleffects)
require(tidyr)
# simulate data
<- simulate(...)
dat # fit model
<- glmer(as.formula(formula_chr), data = dat, family = "binomial")
model # compute contrasts
<- expand_grid(advice_present = 0:1, advice_correct = 0:1,
contr_df expert = 0:1)
predictions(model, newdata = contr_df, type = "response", re.form = NA) %>%
hypotheses(hypothesis = contrasts, equivalence = c(0, 0)) %>%
data.frame()
}
In [14]:
library(future)
plan("multisession", workers = 4)
set.seed(2)
In [15]:
library(furrr)
<- crossing(
sim_result rep = 1:5,
n_subjects = c(100, 150, 200, 250),
n_items = c(10, 30, 50, 70)
%>%
) mutate(res = future_pmap(., sim_and_analyse,
.options = furrr_options(seed = TRUE))) %>%
unnest(col = res)
Power results
In [16]:
library(binom)
<- 0.05
alpha <- sim_result %>%
power pivot_wider(names_from = term, names_sep = "_",
values_from = estimate:p.value.equiv) %>%
group_by(n_subjects, n_items) %>%
summarise(
power = mean(`p.value.noninf_b1=b5` < alpha &
`p.value.noninf_b8=b2` < alpha & `p.value.noninf_b2=b6` < alpha &
`p.value.noninf_b7=b1` < alpha),
n_sig = sum(`p.value.noninf_b1=b5` < alpha &
`p.value.noninf_b8=b2` < alpha & `p.value.noninf_b2=b6` < alpha &
`p.value.noninf_b7=b1` < alpha),
n = n(),
ci.lwr = binom.confint(n_sig, n, method = "wilson")$lower,
ci.upr = binom.confint(n_sig, n, method = "wilson")$upper,
.groups = "drop")
%>%
power mutate(across(c(n_subjects, n_items), factor)) %>%
ggplot(aes(n_subjects, n_items, fill = power)) +
geom_tile() +
geom_text(aes(label = sprintf("%.2f \n [%.2f; %.2f]",
power, ci.lwr, ci.upr)), color = "white", size = 4) +
scale_fill_viridis_c(limits = c(0, 1)) +
xlab("number of subjects") + ylab("number of items")
Precision results
In [17]:
<- sim_result %>%
precision pivot_wider(names_from = term, names_sep = "_",
values_from = estimate:p.value.equiv) %>%
group_by(n_subjects, n_items) %>%
mutate(width = `conf.high_b8=b2` - `conf.low_b8=b2`) %>%
summarise(precision = mean(width),
ci.lwr = t.test(width)$conf.int[1],
ci.upr = t.test(width)$conf.int[2],
.groups = "drop")
%>%
precision mutate(across(c(n_subjects, n_items), factor)) %>%
ggplot(aes(n_subjects, n_items, fill = precision)) +
geom_tile() +
geom_text(aes(label = sprintf("%.2f \n [%.2f; %.2f]",
precision, ci.lwr, ci.upr)), color = "white", size = 4) +
scale_fill_viridis_c(limits = c(0, 0.2), direction = -1) +
guides(fill = guide_colourbar(reverse = TRUE)) +
xlab("number of subjects") + ylab("number of items")