1 Load packages

library("knitr")        # for knitting documents
library("emmeans")      # for computing estimated marginal means 
library("broom.mixed")  # for tidying up model objects
library("kableExtra")   # for formatting tables
library("brms")         # for Bayesian regression modeling
library("Hmisc")        # for miscellaneous statistical functions
library("boot")         # for bootstrap resampling
library("nnet")         # for multinomial log-linear models
library("modelr")       # for modeling functions with tidy data
library("lme4")         # for linear mixed-effects models
library("corrr")        # for correlation analysis
library("tidybayes")    # for working with Bayesian models in a tidy way
library("janitor")      # for data cleaning
library("patchwork")    # for combining ggplot2 plots
library("ggtext")       # for rich text in ggplot2
library("rstatix")      # for pipe-friendly statistical tests
library("rsample")      # for resampling and splitting data
library("png")          # for reading and writing PNG images
library("grid")         # for grid graphics
library("egg")          # for arranging ggplot2 plots
library("glue")         # for string interpolation
library("ggeffects")    # for computing marginal effects from regression models
library("xtable")       # for exporting tables to LaTeX or HTML
library("jsonlite")     # for JSON data handling
library("tidyverse")    # for data science packages (ggplot2, dplyr, etc.)

2 Set options

theme_set(theme_classic() + 
    theme(text = element_text(size = 24)))

opts_chunk$set(comment = "",
               fig.show = "hold")

# suppress grouping warning 
options(dplyr.summarise.inform = F)

# use effects contrast coding to interpret effects from categorical variables as main effects 
options(contrasts = c("contr.sum", "contr.poly"))

3 Functions

3.2 Round off

round.off = function (x, digits=0) 
{
  posneg = sign(x)
  z = trunc(abs(x) * 10 ^ (digits + 1)) / 10
  z = floor(z * posneg + 0.5) / 10 ^ digits
  return(z)
}

3.3 Blickets: Selections plot

fun_plot_selections_blickets = function(data){
  data = data %>%
    mutate(total = sum(n), 
           perc = (n / total) * 100,
           low_perc = (low / total) * 100,
           high_perc = (high / total) * 100)
  
  df.symbols = tibble(x = sort(data$blicket_response),
                      y = 95,
                      symbol = c("\u25A0", "\u25A1", "\u25CF", "\u25CB"))
  
  plot = ggplot(data = data,
                mapping = aes(x = blicket_response,
                              y = perc)) +
    geom_col(mapping = aes(fill = color),
             color = "black",
             show.legend = F) +
    geom_text(data = df.symbols,
              mapping = aes(x = x,
                            y = y,
                            label = symbol),
              size = 20,
              show.legend = F,
              family = "Arial Unicode MS") + 
    geom_linerange(mapping = aes(x = blicket_response,
                                 ymin = low_perc, 
                                 ymax = high_perc)) +
    scale_fill_manual(values = c("0" = "gray80",
                                 "1" = "orange",
                                 "2" = "darkgreen")) + 
    scale_y_continuous(limits = c(0, 102),
                       breaks = seq(0, 100, 20),
                       labels = function(x) paste0(x, "%"),
                       expand = expansion(add = c(0, 0))) + 
    labs(y = "% selected") +
    theme(axis.title.x = element_blank(),
          text = element_text(size = 24))
  return(plot)
}

3.4 Physics: Selections plot

fun_plot_selections_physics = function(data, exp="exp1"){
  
  if(exp == "exp2.conjunctive"){
    fun_load_image = function(image){
      readPNG(str_c("../../figures/stimuli/physics/conjunction_position", image, ".png"))
    }
  }
  else if(exp == "exp3"){
    ramp_index = unique(data$ramp_orientation)
    fun_load_image = function(image){
      readPNG(str_c("../../figures/stimuli/physics/",
                    ramp_index,
                    "_position",
                    image,
                    ".png"))
    }
  }else{
    fun_load_image = function(image){
      readPNG(str_c("../../figures/stimuli/physics/position", image, ".png"))
    }
  }

  # Calculate percentages and CIs as percent
  df.percent = data %>%
    group_by(end_position) %>%
    mutate(total = sum(n)) %>%
    ungroup() %>%
    mutate(percentage = (n / total) * 100,
           low_pct = (low / total) * 100,
           high_pct = (high / total) * 100)

  df.images = df.percent %>%
    distinct(end_position) %>%
    mutate(grob = map(.x = end_position,
                      .f = ~ fun_load_image(image = .x)))

  df.text = df.percent %>%
    distinct(end_position) %>%
    mutate(x = 4,
           y = 160,
           text = 1:4)

  if(exp == 8){
    df.text$y = 165
  }
  
  plot = ggplot(data = df.percent,
                mapping = aes(x = selected_position,
                              y = percentage,
                              fill = fill)) +
    geom_bar(stat = "identity",
             color = "black") +
    geom_linerange(mapping = aes(ymin = low_pct,
                                 ymax = high_pct)) +
    geom_custom(data = df.images,
                mapping = aes(data = grob,
                              x = -Inf,
                              y = Inf,
                              fill = NA),
                grob_fun = function(x) rasterGrob(x,
                                                  interpolate = T,
                                                  vjust = -0.05,
                                                  hjust = 0)) +
    geom_text(data = df.text,
              mapping = aes(x = x,
                            y = y,
                            label = text,
                            fill = NA),
              color = "white",
              size = 8) +
    facet_grid(cols = vars(end_position),
               rows = vars(condition)) +
    labs(x = "response option",
         y = "% selected") +
    scale_fill_manual(values = c("0" = "gray80",
                                 "1" = "darkgreen",
                                 "2" = "orange")) +
    scale_y_continuous(breaks = seq(0, 100, 20),
                       labels = function(x) paste0(x, "%"),
                       expand = expansion(add = c(0, 5))) +
    coord_cartesian(clip = "off",
                    ylim = c(0, 105)) +
    theme(legend.position = "none",
          panel.grid.major.y = element_line(),
          strip.background.x = element_blank(),
          strip.text.x = element_blank(),
          plot.margin = margin(t = 3, l = 0.2, r = 0.2, b = 0.1, unit = "cm"))

  return(plot)
}

3.5 Accuracy plot

fun_plot_accuracy = function(data, limit){
  plot = ggplot(data = data %>% 
                  mutate(trial_index = trial_index - 1),
         mapping = aes(x = trial_index,
                       y = accuracy)) + 
    geom_hline(yintercept = 0.5,
               linetype = "dashed") +
    geom_smooth(method = "glm",
                formula = "y ~ 0 + x",
                method.args = list(family = "binomial"),
                se = F,
                color = "gray80") +
    stat_summary(fun.data = "mean_cl_boot",
                 fill = "darkgreen",
                 shape = 21,
                 size = 1) + 
    scale_x_continuous(breaks = 0:(limit-1),
                       labels = 1:limit,
                       limits = c(0, limit-1)) +
    scale_y_continuous(breaks = seq(0.4, 1, 0.1),
                       labels = str_c(seq(40, 100, 10), "%")) +
    coord_cartesian(ylim = c(0.35, 1)) + 
    labs( x = "trial", y = "accuracy") + 
    theme(panel.grid.major.y = element_line(),
          text = element_text(size = 24))
  return(plot)
}

3.6 Multinomial regression output

fun_regression_output = function(estimate, conf.low, conf.high, p.value){
  str_c("B = ", estimate, 
      ", 95% CI [", conf.low, ", ", conf.high, "],",
      " p = ", p_format(p.value, leading.zero = F))
}

4 EXPERIMENT 1: FEEDBACK

4.1 DATA

4.1.1 Read data

df.exp1.feedback.trials = read.csv("../../data/experiment1/feedback/experiment1_feedback-trials.csv")
df.exp1.feedback.participants = read.csv("../../data/experiment1/feedback/experiment1_feedback-participants.csv")
df.exp1.feedback.prolific_ids = read.csv("../../data/experiment1/feedback/experiment1_feedback-workerids.csv")

4.1.2 Wrangle

# remove pilot data
df.exp1.feedback.trials = df.exp1.feedback.trials %>% 
  filter(proliferate.condition != "pilot")

# summarize pop quiz performance for each participant
df.exp1.feedback.pop_quiz = df.exp1.feedback.trials %>%
  filter(trial=="pop_quiz") %>% 
  mutate(
    correct_color = case_when(substr(correct_response, 1, 1) == substr(response, 1, 1) ~ 1, TRUE ~ 0),
    correct_shape = case_when(substr(correct_response, 2, 2) == substr(response, 2, 2) ~ 1, TRUE ~ 0),
    rule_class = case_when(rule=="cube"|rule=="cylinder" ~ "shape",
                           rule=="dark"|rule=="light" ~ "color"),
    blicket_response = case_when(
      correct_color == 1 & correct_shape == 1 ~ "fully_congruent",
      correct_color == 0 & correct_shape == 0 ~ "fully_incongruent",
      rule_class == "color" & correct_color == 1 ~ "rule_congruent",
      rule_class == "shape" & correct_shape == 1 ~ "rule_congruent",
      rule_class == "color" & correct_shape == 1 ~ "rule_incongruent",
      rule_class == "shape" & correct_color == 1 ~ "rule_incongruent"),
    rule_relevant_correct = case_when(
      (rule_class == "color" & correct_color == 1)|(rule_class == "shape" & correct_shape == 1) ~ 1,
      (rule_class == "color" & correct_color == 0)|(rule_class == "shape" & correct_shape == 0) ~ 0),
    rule_irrelevant_correct = case_when(
      (rule_class == "color" & correct_shape == 1)|(rule_class == "shape" & correct_color == 1) ~ 1,
      (rule_class == "color" & correct_shape == 0)|(rule_class == "shape" & correct_color == 0) ~ 0),
    on_off = case_when(
      grepl("on", proliferate.condition) ~ "on",
      grepl("off", proliferate.condition) ~ "off"))%>% 
  select(workerid, correct_color, correct_shape, rule_class, rule, blicket_response, on_off, rule_relevant_correct, rule_irrelevant_correct) 

# summarize performance on each trial type for each participant
df.exp1.feedback.participant_summary = df.exp1.feedback.trials %>% 
  select(-question_order, -error) %>% 
  mutate(accuracy = case_when(accuracy== "True" ~1, accuracy=="False"~0)) %>% 
  group_by(workerid, rule, trial) %>% 
  summarize(accuracy = mean(accuracy), rt = mean(rt)) %>% 
  inner_join(df.exp1.feedback.pop_quiz,
             by = c("workerid", "rule")) %>% #add back in pop_quiz info
  pivot_wider(names_from = trial, values_from = c(rt, accuracy)) %>% #one participant per line
  ungroup()

# full dataset (speficic info from every trial, along with summary info)
df.exp1.feedback.full = df.exp1.feedback.trials %>% 
  select(workerid,
         accuracy_bool,
         rt,
         trial,
         trial_index) %>% 
  inner_join(df.exp1.feedback.participant_summary,
             by = "workerid") %>% 
  rename(accuracy = accuracy_bool,) %>% 
  group_by(workerid) %>% 
  # make sure trials are 1-18
  mutate( first_blicket_index = min(trial_index),
          trial_index = case_when(
            trial == 'blicket' ~ (trial_index - first_blicket_index) / 2 + 1,
            trial == "pop_quiz" ~ 17,
            trial == "rule_question" ~ 18)) %>% 
  select(-first_blicket_index) %>% 
  ungroup()

4.2 STATS

4.2.1 Power analysis

set.seed(1)

simulations = 1000
n = seq(50, 150, 10)
probabilities = c(0.6, 0.25, 0.1, 0.05)

fun_power = function(n){
  df.simulation = tibble(data = sample(1:4,
                                       size = n,
                                       replace = T,
                                       prob = probabilities)) %>% 
    mutate(data = factor(data, 
                         levels = c(2, 3, 4, 1),
                         labels = c("rule-congruent",
                                    "rule-incongruent",
                                    "incongruent",
                                    "congruent")))
  
  pvalue = multinom(formula = data ~ 1,
                    data = df.simulation,
                    trace = F) %>% 
    tidy() %>% 
    filter(y.level == "rule-incongruent") %>% 
    pull(p.value) %>% 
    .[1]
  
  return(pvalue)
}

df.power = expand_grid(simulation = 1:simulations, 
                       n = n) %>% 
  mutate(pvalue = map_dbl(.x = n,
                          .f = ~ fun_power(.x))) %>% 
  group_by(n) %>% 
  summarize(power = sum(pvalue < .05,
                        na.rm = T) / simulations)

# save(df.power,
#      file = "cache/power_analysis.RData")
load(file = "cache/power_analysis.RData")

ggplot(data = df.power,
       mapping = aes(x = n,
                     y = power)) + 
  geom_hline(yintercept = 0.8,
             linetype = "dashed") + 
  geom_line() + 
  geom_point() + 
  scale_x_continuous(breaks = df.power$n, 
                     labels = df.power$n)

4.2.2 Bootstrap counts

#make reproducible
set.seed(1)

#get boostrapped confidence intervals 
df.exp1.feedback.bootstraps = df.exp1.feedback.participant_summary %>%
  select(blicket_response) %>% 
  bootstrap(n = 1000) %>% # create 1000 bootstrapped samples
  mutate(counts = map(.x = strap,
                            .f = ~ .x %>% 
                              as_tibble() %>% 
                              group_by(blicket_response) %>% 
                              count(blicket_response))) %>% 
  select(-strap) %>% 
  unnest(counts) %>% 
  group_by(blicket_response) %>% 
  summarize(mean = mean(n),
            low = quantile(n, 0.025), # calculate the 2.5 / 97.5 percentiles
            high = quantile(n, 0.975))

#create data for plot
df.exp1.feedback.bootstraps = df.exp1.feedback.participant_summary %>% 
  group_by(blicket_response) %>% 
  count(blicket_response) %>% 
  inner_join(df.exp1.feedback.bootstraps,
             by = "blicket_response") %>% 
  ungroup() %>% 
  mutate(color = as.factor(c(2, 0, 1, 0))) 

4.2.3 Multinomial regression

# reference = "rule incongruent"
df.model = df.exp1.feedback.participant_summary %>% 
  mutate(blicket_response = fct_relevel(blicket_response, "rule_incongruent"))

fit = multinom(formula = blicket_response ~ 1,
               data = df.model,
               trace = F)

#broom.mixed summary
df.tidy = tidy(fit,
               conf.int = T) %>% 
  mutate(across(where(is.numeric), ~ round(., 2))) %>% 
  filter(y.level == "rule_congruent")

fun_regression_output(df.tidy$estimate,
                      df.tidy$conf.low,
                      df.tidy$conf.high,
                      df.tidy$p.value)
[1] "B = 0.55, 95% CI [-0.01, 1.12], p = .06"

4.2.4 Accuracy as a function of outcome

df.exp1.feedback.accuracy_outcome = df.exp1.feedback.participant_summary %>% 
  count(on_off, accuracy_pop_quiz) %>% 
  pivot_wider(names_from = accuracy_pop_quiz,
              values_from = n) %>% 
  mutate(percentage = `1` / (`0` + `1`))

df.exp1.feedback.accuracy_outcome %>% 
  print_table()
on_off 0 1 percentage
off 37 23 0.38
on 29 31 0.52

4.3 FIGURES

4.3.1 Accuracy

df.plot = df.exp1.feedback.full %>% 
  filter(trial == "blicket")

plot.exp1.feedback.accuracy = fun_plot_accuracy(data = df.plot,
                                      limit = 16)
plot.exp1.feedback.accuracy

4.3.2 Selections

df.plot = df.exp1.feedback.bootstraps %>% 
  mutate(blicket_response = factor(blicket_response,
                                   levels = c("fully_congruent",
                                              "rule_congruent",
                                              "rule_incongruent",
                                              "fully_incongruent"),
                                   labels = c("congruent",
                                              "rule\ncongruent",
                                              "rule\nincongruent",
                                              "incongruent")))

plot.exp1.feedback.selections = fun_plot_selections_blickets(data = df.plot)
plot.exp1.feedback.selections

5 EXPERIMENT 1: NO FEEDBACK

5.1 DATA

5.1.1 Read data

df.exp1.no_feedback.trials = read.csv("../../data/experiment1/no_feedback/experiment1_no_feedback-trials.csv")
df.exp1.no_feedback.participants = read.csv("../../data/experiment1/no_feedback/experiment1_no_feedback-participants.csv")
df.exp1.no_feedback.prolific_ids = read.csv("../../data/experiment1/no_feedback/experiment1_no_feedback-workerids.csv")

5.1.2 Wrangle

#summarize pop quiz performance for each participant
df.exp1.no_feedback.pop_quiz = df.exp1.no_feedback.trials %>%
  filter(trial=="pop_quiz") %>% 
  mutate(
    correct_color = case_when(substr(correct_response, 1, 1) == substr(response, 1, 1) ~ 1, TRUE ~ 0),
    correct_shape = case_when(substr(correct_response, 2, 2) == substr(response, 2, 2) ~ 1, TRUE ~ 0),
    rule_class = case_when(rule=="cube"|rule=="cylinder" ~ "shape",
                           rule=="dark"|rule=="light" ~ "color"),
    blicket_response = case_when(
      correct_color == 1 & correct_shape == 1 ~ "fully_congruent",
      correct_color == 0 & correct_shape == 0 ~ "fully_incongruent",
      rule_class == "color" & correct_color == 1 ~ "rule_congruent",
      rule_class == "shape" & correct_shape == 1 ~ "rule_congruent",
      rule_class == "color" & correct_shape == 1 ~ "rule_incongruent",
      rule_class == "shape" & correct_color == 1 ~ "rule_incongruent"),
    rule_relevant_correct = case_when(
      (rule_class == "color" & correct_color == 1)|(rule_class == "shape" & correct_shape == 1) ~ 1,
      (rule_class == "color" & correct_color == 0)|(rule_class == "shape" & correct_shape == 0) ~ 0),
    rule_irrelevant_correct = case_when(
      (rule_class == "color" & correct_shape == 1)|(rule_class == "shape" & correct_color == 1) ~ 1,
      (rule_class == "color" & correct_shape == 0)|(rule_class == "shape" & correct_color == 0) ~ 0),
    on_off = case_when(
      grepl("on", proliferate.condition) ~ "on",
      grepl("off", proliferate.condition) ~ "off"))%>% 
  select(workerid, correct_color, correct_shape, rule_class, rule, blicket_response, on_off, rule_relevant_correct, rule_irrelevant_correct)

#summarize performance on each trial type for each participant
df.exp1.no_feedback.participant_summary = df.exp1.no_feedback.trials %>% 
  select(-question_order, -error) %>% 
  mutate(accuracy = case_when(accuracy== "True" ~1, accuracy=="False"~0)) %>% 
  group_by(workerid, rule, trial) %>% 
  summarize(accuracy = mean(accuracy), rt = mean(rt)) %>% 
  inner_join(df.exp1.no_feedback.pop_quiz,
             by = c("workerid", "rule")) %>% #add back in pop_quiz info
  pivot_wider(names_from = trial, values_from = c(rt, accuracy)) %>%  #one participant per line
  ungroup()


#full dataset (speficic info from every trial, along with summary info)
df.exp1.no_feedback.full = df.exp1.no_feedback.trials %>% 
  select(workerid,
         accuracy_bool,
         rt,
         trial,
         trial_index) %>% 
  inner_join(df.exp1.no_feedback.participant_summary,
             by = "workerid") %>% 
  rename(accuracy = accuracy_bool,) %>% 
  group_by(workerid) %>% 
  #make sure trials are 1-18
  mutate( first_blicket_index = min(trial_index),
          trial_index = case_when(
            trial == "blicket" ~ (trial_index - first_blicket_index) / 2 + 1,
            trial == "pop_quiz" ~ 17,
            trial == "rule_question" ~ 18)) %>% 
  select(-first_blicket_index)

5.2 STATS

5.2.1 Bootstrap counts

#make reproducible
set.seed(1)

#get boostrapped confidence intervals 
df.exp1.no_feedback.bootstraps = df.exp1.no_feedback.participant_summary %>%
  select(blicket_response) %>% 
  bootstrap(n = 1000) %>% # create 1000 bootstrapped samples
  mutate(counts = map(.x = strap,
                            .f = ~ .x %>% 
                              as_tibble() %>% 
                              group_by(blicket_response) %>% 
                              count(blicket_response)))%>% 
  select(-strap) %>% 
  unnest(counts) %>% 
  group_by(blicket_response) %>% 
  summarize(mean = mean(n),
            low = quantile(n, 0.025), # calculate the 2.5 / 97.5 percentiles
            high = quantile(n, 0.975))

#create data for plot
df.exp1.no_feedback.bootstraps = df.exp1.no_feedback.participant_summary %>% 
  group_by(blicket_response) %>% 
  count(blicket_response) %>% 
  inner_join(df.exp1.no_feedback.bootstraps,
             by = "blicket_response") %>% 
  ungroup() %>% 
  mutate(color = as.factor(c(2, 0, 1, 0))) 

5.2.2 Multinomial regression

# reference = 'rule incongruent'
df.model = df.exp1.no_feedback.participant_summary %>% 
  mutate(blicket_response = fct_relevel(blicket_response, "rule_incongruent"))

fit = multinom(formula = blicket_response ~ 1,
               data = df.model,
               trace = F)

#broom.mixed summary
df.tidy = tidy(fit,
               conf.int = T) %>% 
  mutate(across(where(is.numeric), ~ round(., 2))) %>% 
  filter(y.level == "rule_congruent")
  
fun_regression_output(df.tidy$estimate,
                      df.tidy$conf.low,
                      df.tidy$conf.high,
                      df.tidy$p.value)
[1] "B = 0.77, 95% CI [0.21, 1.33], p = .01"

5.2.3 Accuracy as a function of outcome

df.exp1.no_feedback.accuracy_outcome = df.exp1.no_feedback.participant_summary %>% 
  count(on_off, accuracy_pop_quiz) %>% 
  pivot_wider(names_from = accuracy_pop_quiz,
              values_from = n) %>% 
  mutate(percentage = `1` / (`0` + `1`))

df.exp1.no_feedback.accuracy_outcome %>% 
  print_table()
on_off 0 1 percentage
off 35 25 0.42
on 31 27 0.47

5.3 FIGURES

5.3.1 Accuracy

df.plot = df.exp1.no_feedback.full %>% 
  filter(trial == "blicket")

plot.exp1.no_feedback.accuracy = fun_plot_accuracy(data = df.plot,
                                      limit = 16)
plot.exp1.no_feedback.accuracy

5.3.2 Selections

df.plot = df.exp1.no_feedback.bootstraps %>% 
  mutate(blicket_response = factor(blicket_response,
                                   levels = c("fully_congruent",
                                              "rule_congruent",
                                              "rule_incongruent",
                                              "fully_incongruent"),
                                   labels = c("congruent",
                                              "rule\ncongruent",
                                              "rule\nincongruent",
                                              "incongruent")))

plot.exp1.no_feedback.selections = fun_plot_selections_blickets(data = df.plot)
plot.exp1.no_feedback.selections

6 EXPERIMENT 1: SHORT

6.1 DATA

6.1.1 Read data

df.exp1.short.trials = read.csv("../../data/experiment1/short/experiment1_short-trials.csv")
df.exp1.short.participants = read.csv("../../data/experiment1/short/experiment1_short-participants.csv")
df.exp1.short.prolific_ids = read.csv("../../data/experiment1/short/experiment1_short-workerids.csv")

6.1.2 Wrangle

# summarize pop quiz performance for each participant
df.exp1.short.pop_quiz = df.exp1.short.trials %>%
  filter(trial=="pop_quiz") %>% 
  mutate(
    correct_color = case_when(substr(correct_response, 1, 1) == substr(response, 1, 1) ~ 1, TRUE ~ 0),
    correct_shape = case_when(substr(correct_response, 2, 2) == substr(response, 2, 2) ~ 1, TRUE ~ 0),
    rule_class = case_when(rule=="cube"|rule=="cylinder" ~ "shape",
                           rule=="dark"|rule=="light" ~ "color"),
    blicket_response = case_when(
      correct_color == 1 & correct_shape == 1 ~ "fully_congruent",
      correct_color == 0 & correct_shape == 0 ~ "fully_incongruent",
      rule_class == "color" & correct_color == 1 ~ "rule_congruent",
      rule_class == "shape" & correct_shape == 1 ~ "rule_congruent",
      rule_class == "color" & correct_shape == 1 ~ "rule_incongruent",
      rule_class == "shape" & correct_color == 1 ~ "rule_incongruent"),
    rule_relevant_correct = case_when(
      (rule_class == "color" & correct_color == 1)|(rule_class == "shape" & correct_shape == 1) ~ 1,
      (rule_class == "color" & correct_color == 0)|(rule_class == "shape" & correct_shape == 0) ~ 0),
    rule_irrelevant_correct = case_when(
      (rule_class == "color" & correct_shape == 1)|(rule_class == "shape" & correct_color == 1) ~ 1,
      (rule_class == "color" & correct_shape == 0)|(rule_class == "shape" & correct_color == 0) ~ 0),
    on_off = case_when(
      grepl("on", proliferate.condition) ~ "on",
      grepl("off", proliferate.condition) ~ "off"))%>% 
  select(workerid, correct_color, correct_shape, rule_class, rule, blicket_response, on_off, rule_relevant_correct, rule_irrelevant_correct)

# summarize performance on each trial type for each participant
df.exp1.short.participant_summary = df.exp1.short.trials %>% 
  select(-question_order, -error) %>% 
  mutate(accuracy = case_when(accuracy== "True" ~1, accuracy=="False"~0)) %>% 
  group_by(workerid, rule, trial) %>% 
  summarize(accuracy = mean(accuracy), rt = mean(rt)) %>% 
  inner_join(df.exp1.short.pop_quiz,
             by = c("workerid", "rule")) %>% #add back in pop_quiz info
  pivot_wider(names_from = trial, values_from = c(rt, accuracy)) %>%  #one participant per line
  ungroup()

# full dataset (specific info from every trial, along with summary info)
df.exp1.short.full = df.exp1.short.trials %>% 
  select(workerid,
         accuracy_bool,
         rt,
         trial,
         trial_index) %>% 
  inner_join(df.exp1.short.participant_summary,
             by = "workerid") %>% 
  rename(accuracy = accuracy_bool,) %>% 
  group_by(workerid) %>% 
  # make sure trials are 1-18
  mutate( first_blicket_index = min(trial_index),
          trial_index = case_when(
            trial == "blicket" ~ (trial_index - first_blicket_index) / 2 + 1,
            trial == "pop_quiz" ~ 17,
            trial == "rule_question" ~ 18)) %>% 
  select(-first_blicket_index) %>% 
  ungroup()

6.2 STATS

6.2.1 Bootstrap counts

#make reproducible
set.seed(1)

#get boostrapped confidence intervals 
df.exp1.short.bootstraps = df.exp1.short.participant_summary %>%
  select(blicket_response) %>% 
  bootstrap(n = 1000) %>% # create 1000 bootstrapped samples
  mutate(counts = map(.x = strap,
                            .f = ~ .x %>% 
                              as_tibble() %>% 
                              group_by(blicket_response) %>% 
                              count(blicket_response)))%>% 
  select(-strap) %>% 
  unnest(counts) %>% 
  group_by(blicket_response) %>% 
  summarize(mean = mean(n),
            low = quantile(n, 0.025), # calculate the 2.5 / 97.5 percentiles
            high = quantile(n, 0.975))

#create data for plot
df.exp1.short.bootstraps = df.exp1.short.participant_summary %>% 
  group_by(blicket_response) %>% 
  count(blicket_response) %>% 
  inner_join(df.exp1.short.bootstraps,
             by = "blicket_response") %>% 
  ungroup() %>% 
  mutate(color = as.factor(c(2, 0, 1, 0))) 

6.2.2 Multinomial Regression

# reference = "rule incongruent"
df.model = df.exp1.short.participant_summary %>% 
  mutate(blicket_response = fct_relevel(blicket_response, "rule_incongruent"))

fit = multinom(formula = blicket_response ~ 1,
               data = df.model,
               trace = F)

#broom.mixed summary
df.tidy = tidy(fit,
               conf.int = T) %>% 
  mutate(across(where(is.numeric), ~ round(., 2))) %>% 
  filter(y.level == "rule_congruent")
  
fun_regression_output(df.tidy$estimate,
                      df.tidy$conf.low,
                      df.tidy$conf.high,
                      df.tidy$p.value)
[1] "B = 0.18, 95% CI [-0.5, 0.87], p = .6"

6.2.3 Accuracy as a function of outcome

df.exp1.short.accuracy_outcome = df.exp1.short.participant_summary %>% 
  count(on_off, accuracy_pop_quiz) %>% 
  pivot_wider(names_from = accuracy_pop_quiz,
              values_from = n) %>% 
  mutate(percentage = `1` / (`0` + `1`))

df.exp1.short.accuracy_outcome %>% 
  print_table()
on_off 0 1 percentage
off 26 34 0.57
on 15 45 0.75

6.3 FIGURES

6.3.1 Accuracy

df.plot = df.exp1.short.full %>% 
  filter(trial == "blicket")

plot.exp1.short.accuracy = fun_plot_accuracy(data = df.plot,
                                      limit = 16)
plot.exp1.short.accuracy

6.3.2 Selections

df.plot = df.exp1.short.bootstraps %>% 
  mutate(blicket_response = factor(blicket_response,
                                   levels = c("fully_congruent",
                                              "rule_congruent",
                                              "rule_incongruent",
                                              "fully_incongruent"),
                                   labels = c("congruent",
                                              "rule\ncongruent",
                                              "rule\nincongruent",
                                              "incongruent")))

plot.exp1.short.selections = fun_plot_selections_blickets(data = df.plot)
plot.exp1.short.selections

7 EXPERIMENT 1: CONJUNCTIVE

7.1 DATA

7.1.1 Read data

df.exp1.conjunctive.trials = read.csv("../../data/experiment1/conjunctive/experiment1_conjunctive-trials.csv")
df.exp1.conjunctive.participants = read.csv("../../data/experiment1/conjunctive/experiment1_conjunctive-participants.csv")
df.exp1.conjunctive.prolific_ids = read.csv("../../data/experiment1/conjunctive/experiment1_conjunctive-workerids.csv")

7.1.2 Wrangle

# summarize pop quiz performance for each participant
df.exp1.conjunctive.pop_quiz = df.exp1.conjunctive.trials %>%
  filter(trial=="pop_quiz") %>% 
  mutate(correct_color = case_when(substr(correct_response, 1, 1) == substr(response, 1, 1) ~ 1, TRUE ~ 0),
         correct_shape = case_when(substr(correct_response, 2, 2) == substr(response, 2, 2) ~ 1, TRUE ~ 0),
         on_off = case_when(substr(condition, 1, 2) == substr(condition, 4, 5) ~ "on", TRUE ~ "off"),
         blicket_response = case_when(
           correct_color == 1 & correct_shape == 1 ~ "fully_congruent",
           correct_color == 0 & correct_shape == 0 ~ "fully_incongruent",
           correct_color == 1 & correct_shape == 0 ~ "correct_color",
           correct_color == 0 & correct_shape == 1 ~ "correct_shape")) %>% 
  select(workerid, correct_color, on_off, condition, correct_shape, blicket_response)

# summarize performance on each trial type for each participant
df.exp1.conjunctive.participant_summary = df.exp1.conjunctive.trials %>% 
  select(-question_order, -error) %>% 
  mutate(accuracy = case_when(accuracy== "True" ~1, accuracy=="False"~0)) %>% 
  group_by(workerid, trial) %>% 
  summarize(accuracy = mean(accuracy), rt = mean(rt)) %>% 
  inner_join(df.exp1.conjunctive.pop_quiz,
             by = "workerid") %>% #add back in pop_quiz info
  pivot_wider(names_from = trial, values_from = c(rt, accuracy)) %>%  #one participant per line
  ungroup()

# full dataset (specific info from every trial, along with summary info)
df.exp1.conjunctive.full = df.exp1.conjunctive.trials %>% 
  select(workerid,
         accuracy_bool,
         rt,
         trial,
         trial_index) %>% 
  inner_join(df.exp1.conjunctive.participant_summary,
             by = "workerid") %>% 
  rename(accuracy = accuracy_bool,) %>% 
  group_by(workerid) %>% 
  # make sure trials are 1-18
  mutate( first_blicket_index = min(trial_index),
          trial_index = case_when(
            trial == "blicket" ~ (trial_index - first_blicket_index) / 2 + 1,
            trial == "pop_quiz" ~ 17,
            trial == "rule_question" ~ 18)) %>% 
  select(-first_blicket_index) %>% 
  ungroup()

7.2 STATS

7.2.1 Bootstrap counts

#make reproducible
set.seed(1)

#get boostrapped confidence intervals 
df.exp1.conjunctive.bootstraps = df.exp1.conjunctive.participant_summary %>%
  select(blicket_response) %>% 
  bootstrap(n = 1000) %>% # create 1000 bootstrapped samples
  mutate(counts = map(.x = strap,
                      .f = ~ .x %>% 
                        as_tibble() %>% 
                        group_by(blicket_response) %>% 
                        count(blicket_response))) %>% 
  select(-strap) %>% 
  unnest(counts) %>% 
  group_by(blicket_response) %>% 
  summarize(mean = mean(n),
            low = quantile(n, 0.025), # calculate the 2.5 / 97.5 percentiles
            high = quantile(n, 0.975))

#create data for plot
df.exp1.conjunctive.bootstraps = df.exp1.conjunctive.participant_summary %>% 
  group_by(blicket_response) %>% 
  count(blicket_response) %>% 
  inner_join(df.exp1.conjunctive.bootstraps,
             by = "blicket_response") %>% 
  ungroup() %>% 
  mutate(color = as.factor(c(0, 0, 2, 0))) 

7.2.2 Multinomial Regression

7.2.3 Hypothesis 1: Color vs. shape

# reference = "correct color"
df.model = df.exp1.conjunctive.participant_summary %>% 
  mutate(blicket_response = fct_relevel(blicket_response, "correct_color"))

fit = multinom(formula = blicket_response ~ 1,
               data = df.model,
               trace = F)

# broom.mixed summary
df.tidy = tidy(fit,
               conf.int = T) %>% 
  mutate(across(where(is.numeric), ~ round(., 2))) %>% 
  filter(y.level == "correct_shape")

fun_regression_output(df.tidy$estimate,
                      df.tidy$conf.low,
                      df.tidy$conf.high,
                      df.tidy$p.value)
[1] "B = -0.36, 95% CI [-1, 0.28], p = .26"

7.2.4 Hypothesis 2: Fully incongruent vs. partially congruent

# reference = "rule incongruent"
df.model = df.exp1.conjunctive.participant_summary %>% 
  mutate(blicket_response = ifelse(blicket_response %in% c("correct_color", "correct_shape"),
                                   "partially_congruent", blicket_response),
         blicket_response = fct_relevel(blicket_response, "fully_incongruent"))

fit = multinom(formula = blicket_response ~ 1,
               data = df.model,
               trace = F)

# broom.mixed summary
df.tidy = tidy(fit,
               conf.int = T) %>% 
  mutate(across(where(is.numeric), ~ round(., 2))) %>% 
  filter(y.level == "partially_congruent")

str_c("B = ", df.tidy$estimate, 
      ", 95% CI [", df.tidy$conf.low, ", ", df.tidy$conf.high, "],",
      " p = ", p_format(df.tidy$p.value, leading.zero = F))
[1] "B = 1.72, 95% CI [0.91, 2.52], p = <.0001"

7.2.5 Accuracy as a function of outcome

df.exp1.conjunctive.accuracy_outcome = df.exp1.conjunctive.participant_summary %>% 
  count(on_off, accuracy_pop_quiz) %>% 
  pivot_wider(names_from = accuracy_pop_quiz,
              values_from = n) %>% 
  mutate(percentage = `1` / (`0` + `1`))

df.exp1.conjunctive.accuracy_outcome %>% 
  print_table()
on_off 0 1 percentage
off 39 51 0.57
on 7 23 0.77

7.3 FIGURES

7.3.1 Accuracy

df.plot = df.exp1.conjunctive.full %>% 
  filter(trial == "blicket")

plot.exp1.conjunctive.accuracy = fun_plot_accuracy(data = df.plot,
                                      limit = 16)
plot.exp1.conjunctive.accuracy

7.3.2 Selections

df.plot = df.exp1.conjunctive.bootstraps %>% 
  mutate(blicket_response = factor(blicket_response,
                                   levels = c("fully_congruent",
                                              "correct_shape",
                                              "correct_color",
                                              "fully_incongruent"),
                                   labels = c("congruent",
                                              "correct\nshape",
                                              "correct\ncolor",
                                              "incongruent")))

plot.exp1.conjunctive.selections = fun_plot_selections_blickets(data = df.plot)
plot.exp1.conjunctive.selections

8 EXPERIMENT 1: COMBINED

8.1 STATS

8.1.1 Demographics

df.exp1.participants = df.exp1.feedback.participants %>% 
  mutate(condition = "feedback") %>% 
  bind_rows(df.exp1.no_feedback.participants %>% 
              mutate(condition = "no feedback"),
            df.exp1.short.participants %>% 
              mutate(condition = "short"),
            df.exp1.conjunctive.participants %>% 
              mutate(condition = "conjunctive")) %>% 
    select(condition, workerid, age, gender, race, ethnicity)
  
# race
df.exp1.participants %>% 
  count(race) %>% 
  arrange(desc(n))
                           race   n
1                         White 368
2                         Asian  45
3        Black/African American  30
4                   Multiracial  27
5                                 8
6                      Hispanic   2
7 American Indian/Alaska Native   1
8                 White African   1
# ethnicity
df.exp1.participants %>% 
  count(ethnicity) %>% 
  arrange(desc(n))
     ethnicity   n
1 Non-Hispanic 428
2     Hispanic  39
3               15
# gender
df.exp1.participants %>% 
  count(gender) %>% 
  arrange(desc(n))
                                                                       gender
1                                                                      Female
2                                                                        Male
3                                                                  Non-binary
4                                                                            
5                                                                 Questioning
6 There are only TWO genders, male and female.  No need to add other choices.
7                                                          Transgender female
    n
1 267
2 192
3  12
4   8
5   1
6   1
7   1
# age
df.exp1.participants %>% 
  summarize(age_mean = mean(age),
            age_sd = sd(age)) %>% 
  print_table(digits = 0)
age_mean age_sd
38 14
# condition 
df.exp1.participants %>% 
  count(condition)
    condition   n
1 conjunctive 120
2    feedback 124
3 no feedback 118
4       short 120
df.exp1.participants %>% nrow()
[1] 482

8.1.2 Accuracy

df.exp1.feedback.pop_quiz %>% 
  mutate(condition = "feedback") %>% 
  bind_rows(df.exp1.no_feedback.pop_quiz %>% 
              mutate(condition = "no feedback")) %>% 
  count(blicket_response) %>% 
  mutate(p = n / sum(n)) %>% 
  print_table()
blicket_response n p
fully_congruent 106 0.45
fully_incongruent 23 0.10
rule_congruent 72 0.30
rule_incongruent 37 0.16

8.1.3 Surprise

df.model =  bind_rows(df.exp1.feedback.participant_summary %>% 
                        mutate(experiment = "feedback"),
                      df.exp1.no_feedback.participant_summary %>% 
                        mutate(experiment = "no_feedback"),
                      df.exp1.short.participant_summary %>% 
                        mutate(experiment = "short")) %>% 
  mutate(response = ifelse(blicket_response == "rule_congruent", 1, 0),
         experiment = factor(experiment, levels = c("short", "feedback", "no_feedback")))

fit = glm(formula = response ~ 1 + experiment,
          family = "binomial",
          data = df.model)

fit %>% 
  emmeans(pairwise ~ experiment,
          type = "response")
$emmeans
 experiment   prob     SE  df asymp.LCL asymp.UCL
 short       0.150 0.0326 Inf    0.0966     0.226
 feedback    0.275 0.0408 Inf    0.2026     0.362
 no_feedback 0.331 0.0433 Inf    0.2517     0.420

Confidence level used: 0.95 
Intervals are back-transformed from the logit scale 

$contrasts
 contrast               odds.ratio    SE  df null z.ratio p.value
 short / feedback            0.465 0.152 Inf    1  -2.338  0.0508
 short / no_feedback         0.357 0.115 Inf    1  -3.195  0.0040
 feedback / no_feedback      0.768 0.217 Inf    1  -0.931  0.6206

P value adjustment: tukey method for comparing a family of 3 estimates 
Tests are performed on the log odds ratio scale 

8.2 FIGURES

8.2.1 Accuracy

plot.exp1.feedback.accuracy + 
  labs(title = "feedback condition") + 
  plot.exp1.no_feedback.accuracy + 
  labs(title = "no feedback condition") + 
  plot.exp1.short.accuracy + 
  labs(title = "short condition") + 
  plot.exp1.conjunctive.accuracy + 
  labs(title = "conjunctive condition") + 
  plot_layout(ncol = 2, 
              nrow = 2) + 
  plot_annotation(tag_levels = "A") & 
  theme(plot.tag = element_text(face = "bold"))

ggsave(filename = "../../figures/plots/exp1_accuracy.pdf", 
       width = 16,
       height = 10)

8.2.2 Selections

plot.exp1.feedback.selections + 
  labs(title = "feedback condition") + 
  plot.exp1.no_feedback.selections + 
  labs(title = "no feedback condition") + 
  plot.exp1.short.selections + 
  labs(title = "short condition") + 
  plot.exp1.conjunctive.selections + 
  labs(title = "conjunctive condition") + 
  plot_layout(ncol = 2, 
              nrow = 2) + 
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold"))

ggsave(filename = "../../figures/plots/exp1_selections.pdf", 
       width = 16,
       height = 10,
       device = cairo_pdf)

8.3 TABLES

8.3.1 Accuracy as a function of outcome

bind_rows(df.exp1.feedback.accuracy_outcome %>% 
            mutate(condition = "feedback"),
          df.exp1.no_feedback.accuracy_outcome %>% 
            mutate(condition = "no feedback"),
          df.exp1.short.accuracy_outcome %>% 
            mutate(condition = "short"),
          df.exp1.conjunctive.accuracy_outcome %>% 
            mutate(condition = "conjunctive")) %>% 
  select(condition, on_off, percentage) %>% 
  pivot_wider(names_from = condition,
              values_from = percentage) %>% 
  rename(blicket = "on_off") %>% 
  mutate(blicket = factor(blicket,
                          levels = c("on", "off"),
                          labels = c("yes", "no")),
         across(.cols = -blicket,
                .fns = ~ str_c(round(. * 100), "%"))) %>% 
  arrange(blicket) %>% 
  print_table()
blicket feedback no feedback short conjunctive
yes 52% 47% 75% 77%
no 38% 42% 57% 57%

9 EXPERIMENT 2: LONG

9.1 DATA

9.1.1 Read data

df.exp2.long.trials = read.csv("../../data/experiment2/long/experiment2_long-trials.csv")
df.exp2.long.participants = read.csv("../../data/experiment2/long/experiment2_long-participants.csv")
df.exp2.long.prolific_ids = read.csv("../../data/experiment2/long/experiment2_long-workerids.csv")

9.1.2 Wrangle data

df.exp2.long.surprise_quiz = df.exp2.long.trials %>% 
  filter(trial == "surprise_quiz") %>% 
  mutate(end_position = end_posision,
         distance_from_correct = abs(selected_position - end_position)) %>% 
  select(-c(accuracy_bool,
            choices,
            correct,
            correct_response,
            crosses,
            end_posision,
            internal_node_id,
            question_order,
            response,
            stimulus,
            error,
            trial_type)) %>% 
  mutate(end_position = end_position + 1, 
         selected_position = selected_position + 1)

9.2 STATS

9.2.1 Bootstrap counts

# make reproducible
set.seed(1)

df.exp2.long.bootstraps = df.exp2.long.surprise_quiz %>%
  bootstraps(times = 1000,
             end_position) %>% 
  mutate(counts = map(.x = splits,
                      .f = ~ .x %>% 
                        as_tibble() %>% 
                        count(end_position, selected_position, .drop = F))) %>% 
  select(-splits) %>% 
  unnest(counts) %>% 
  group_by(end_position, selected_position) %>% 
  summarize(low = quantile(n, 0.025), 
            high = quantile(n, 0.975))

9.2.2 Hypothesis 1: Outcome-relevant features matter more

df.exp2.long.hyp1 = df.exp2.long.surprise_quiz %>% 
  select(workerid, end_position, selected_position) %>% 
  mutate(relevant = ifelse(end_position %in% c(1, 2), -1, 1),
         irrelevant = ifelse(end_position %in% c(1, 3), -1, 1)) %>% 
  mutate(selected_position = factor(selected_position,
                                    levels = 1:4,
                                    ordered = T),
         across(.cols = c(relevant, irrelevant), .fns = ~ as.factor(.)))

fit.brm.exp2.long.hyp1 = brm(formula = selected_position ~ relevant * irrelevant + (1 | workerid),
                       family = cumulative(link = "probit"),
                       data = df.exp2.long.hyp1,
                       file = "cache/fit.brm.exp2.long.hyp1",
                       cores = 4, 
                       seed = 1) 

fit.brm.exp2.long.hyp1 %>% 
  as_draws_df() %>% 
  select(b_relevant1, b_irrelevant1) %>% 
  mutate(difference = b_irrelevant1 - b_relevant1) %>% 
  summarize(mean = mean(difference),
            low = quantile(difference, 0.025),
            high = quantile(difference, 0.975)) %>% 
  print_table()
Warning: Dropping 'draws_df' class as required metadata was removed.
mean low high
0.85 0.7 1.01

9.2.3 Hypothesis 2: Outcome-congruent selection

df.exp2.long.hyp2 = df.exp2.long.surprise_quiz %>% 
  select(workerid, end_position, selected_position) %>% 
  filter(end_position %in% c(2, 3)) %>% 
  mutate(response = NA,
         response = ifelse(end_position == 2 & selected_position == 1, 1, response),
         response = ifelse(end_position == 2 & selected_position == 3, 0, response),
         response = ifelse(end_position == 3 & selected_position == 4, 1, response),
         response = ifelse(end_position == 3 & selected_position == 2, 0, response))

fit.brm.exp2.long.hyp2 = brm(formula = response ~ 1 + (1 | workerid),
                       family = "bernoulli",
                       data = df.exp2.long.hyp2,
                       file = "cache/fit.brm.exp2.long.hyp2",
                       cores = 4, 
                       seed = 1,
                       control = list(adapt_delta = 0.9)) 

fit.brm.exp2.long.hyp2 %>% 
  tidy() %>% 
  filter(effect == "fixed") %>% 
  select(estimate, contains("conf")) %>% 
  mutate(across(.cols = everything(),
                .fns = ~ inv.logit(.) * 100)) %>% 
  print_table()
estimate conf.low conf.high
90.09 74.55 98.87

9.3 FIGURES

9.3.1 Accuracy

df.plot = df.exp2.long.trials %>% 
  filter(trial == "prediction_trial") %>% 
  select(workerid, trial_index, correct) %>% 
  group_by(workerid) %>% 
  mutate(trial_index = factor(trial_index, 
                              levels = unique(trial_index),
                              labels = 1:16),
         trial_index = as.numeric(as.character(trial_index))) %>% 
  ungroup() %>% 
  mutate(accuracy = ifelse(correct == "True", 1, 0))

plot.exp2.long.accuracy = fun_plot_accuracy(data = df.plot,
                                      limit = 16)
plot.exp2.long.accuracy

9.3.2 Selections

df.plot = df.exp2.long.surprise_quiz %>% 
  count(end_position, selected_position, .drop = F) %>% 
  left_join(df.exp2.long.bootstraps, 
            by = c("end_position", "selected_position")) %>% 
  mutate(fill = 0,
         fill = ifelse(end_position == selected_position, 1, fill),
         fill = ifelse(end_position == 1 & selected_position == 2, 2, fill),
         fill = ifelse(end_position == 2 & selected_position == 1, 2, fill),
         fill = ifelse(end_position == 3 & selected_position == 4, 2, fill),
         fill = ifelse(end_position == 4 & selected_position == 3, 2, fill),
         selected_position = as.factor(selected_position),
         fill = as.factor(fill),
         condition = "long")

fun_plot_selections_physics(data = df.plot)

10 EXPERIMENT 2: SHORT

10.1 DATA

10.1.1 Read data

df.exp2.short.trials = read.csv("../../data/experiment2/short/experiment2_short-trials.csv")
df.exp2.short.participants = read.csv("../../data/experiment2/short/experiment2_short-participants.csv")
df.exp2.short.prolific_ids = read.csv("../../data/experiment2/short/experiment2_short-workerids.csv")

10.1.2 Wrangle data

df.exp2.short.surprise_quiz = df.exp2.short.trials %>% 
  filter(trial == "surprise_quiz") %>% 
  mutate(end_position = end_posision,
         distance_from_correct = abs(selected_position - end_position)) %>% 
  select(-c(accuracy_bool,
            choices,
            correct,
            correct_response,
            crosses,
            end_posision,
            internal_node_id,
            question_order,
            response,
            stimulus,
            error, trial_type)) %>% 
  arrange(workerid) %>% #Keep only 30 first from each condition
  group_by(condition, workerid) %>%
  group_by(condition) %>%
  slice(1:120) %>% #Each participant has 4 observations (keep 120 obs per condition)
  ungroup() %>% 
  mutate(end_position = end_position + 1, 
         selected_position = selected_position + 1)

10.2 STATS

10.2.1 Bootstrap counts

# make reproducible
set.seed(1)

df.exp2.short.bootstraps = df.exp2.short.surprise_quiz %>%
  bootstraps(times = 1000,
             end_position) %>% 
  mutate(counts = map(.x = splits,
                      .f = ~ .x %>% 
                        as_tibble() %>% 
                        count(end_position, selected_position, .drop = F))) %>% 
  select(-splits) %>% 
  unnest(counts) %>% 
  group_by(end_position, selected_position) %>% 
  summarize(low = quantile(n, 0.025), 
            high = quantile(n, 0.975))

10.2.2 Hypothesis 1: Outcome-relevant features matter more

df.exp2.short.hyp1 = df.exp2.short.surprise_quiz %>% 
  select(workerid, end_position, selected_position) %>% 
  mutate(relevant = ifelse(end_position %in% c(1, 2), -1, 1),
         irrelevant = ifelse(end_position %in% c(1, 3), -1, 1)) %>% 
  mutate(selected_position = factor(selected_position,
                                    levels = 1:4,
                                    ordered = T),
         across(.cols = c(relevant, irrelevant), .fns = ~ as.factor(.)))

fit.brm.exp2.short.hyp1 = brm(formula = selected_position ~ relevant * irrelevant + (1 | workerid),
                       family = cumulative(link = "probit"),
                       data = df.exp2.short.hyp1,
                       file = "cache/fit.brm.exp2.short.hyp1",
                       cores = 4, 
                       seed = 1) 

fit.brm.exp2.short.hyp1 %>% 
  as_draws_df() %>% 
  select(b_relevant1, b_irrelevant1) %>% 
  mutate(difference = b_irrelevant1 - b_relevant1) %>% 
  summarize(mean = mean(difference),
            low = quantile(difference, 0.025),
            high = quantile(difference, 0.975)) %>% 
  print_table()
Warning: Dropping 'draws_df' class as required metadata was removed.
mean low high
0.53 0.39 0.67

10.2.3 Hypothesis 2: Outcome-congruent selection

df.exp2.short.hyp2 = df.exp2.short.surprise_quiz %>% 
  select(workerid, end_position, selected_position) %>% 
  filter(end_position %in% c(2, 3)) %>% 
  mutate(response = NA,
         response = ifelse(end_position == 2 & selected_position == 1, 1, response),
         response = ifelse(end_position == 2 & selected_position == 3, 0, response),
         response = ifelse(end_position == 3 & selected_position == 4, 1, response),
         response = ifelse(end_position == 3 & selected_position == 2, 0, response))

fit.brm.exp2.short.hyp2 = brm(formula = response ~ 1 + (1 | workerid),
                       family = "bernoulli",
                       data = df.exp2.short.hyp2,
                       file = "cache/fit.brm.exp2.short.hyp2",
                       cores = 4, 
                       seed = 1,
                       control = list(adapt_delta = 0.95)) 

fit.brm.exp2.short.hyp2 %>% 
  tidy() %>% 
  filter(effect == "fixed") %>% 
  select(estimate, contains("conf")) %>% 
  mutate(across(.cols = everything(),
                .fns = ~ inv.logit(.) * 100)) %>% 
  print_table()
estimate conf.low conf.high
73.32 60.19 87.53

10.3 FIGURES

10.3.1 Accuracy

df.plot = df.exp2.short.trials %>% 
  filter(trial == "prediction_trial") %>% 
  select(workerid, trial_index, correct) %>% 
  group_by(workerid) %>% 
  mutate(trial_index = factor(trial_index, 
                              levels = unique(trial_index),
                              labels = 1:4),
         trial_index = as.numeric(as.character(trial_index))) %>% 
  ungroup() %>% 
  mutate(accuracy = ifelse(correct == "True", 1, 0))

plot.exp2.short.accuracy = fun_plot_accuracy(data = df.plot,
                                      limit = 4)
plot.exp2.short.accuracy

10.3.2 Selections

df.plot = df.exp2.short.surprise_quiz %>% 
  count(end_position, selected_position, .drop = F) %>% 
  left_join(df.exp2.short.bootstraps, 
            by = c("end_position", "selected_position")) %>% 
  mutate(fill = 0,
         fill = ifelse(end_position == selected_position, 1, fill),
         fill = ifelse(end_position == 1 & selected_position == 2, 2, fill),
         fill = ifelse(end_position == 2 & selected_position == 1, 2, fill),
         fill = ifelse(end_position == 3 & selected_position == 4, 2, fill),
         fill = ifelse(end_position == 4 & selected_position == 3, 2, fill),
         selected_position = as.factor(selected_position),
         fill = as.factor(fill),
         condition = "short")

fun_plot_selections_physics(data = df.plot)

11 EXPERIMENT 2: CONJUNCTIVE

11.1 DATA

11.1.1 Read data

df.exp2.conjunctive.trials = read.csv("../../data/experiment2/conjunctive/experiment2_conjunctive-trials.csv")
df.exp2.conjunctive.participants = read.csv("../../data/experiment2/conjunctive/experiment2_conjunctive-participants.csv")
df.exp2.conjunctive.prolific_ids = read.csv("../../data/experiment2/conjunctive/experiment2_conjunctive-workerids.csv")

11.1.2 Wrangle data

df.exp2.conjunctive.surprise_quiz = df.exp2.conjunctive.trials %>% 
  filter(trial == "surprise_quiz") %>% 
  mutate(end_position = end_posision,
         distance_from_correct = abs(selected_position - end_position)) %>% 
  select(-c(accuracy_bool,
            choices,
            correct,
            correct_response,
            crosses,
            end_posision,
            internal_node_id,
            question_order,
            response,
            stimulus,
            error, trial_type)) %>% 
  mutate(end_position = end_position + 1, 
         selected_position = selected_position + 1)

11.2 STATS

11.2.1 Bootstrap counts

# make reproducible
set.seed(1)

df.exp2.conjunctive.bootstraps = df.exp2.conjunctive.surprise_quiz %>%
  bootstraps(times = 1000,
             end_position) %>% 
  mutate(counts = map(.x = splits,
                      .f = ~ .x %>% 
                        as_tibble() %>% 
                        count(end_position, selected_position, .drop = F))) %>% 
  select(-splits) %>% 
  unnest(counts) %>% 
  group_by(end_position, selected_position) %>% 
  summarize(low = quantile(n, 0.025), 
            high = quantile(n, 0.975))

11.2.2 Hypothesis 1: Responses for position 3

df.exp2.conjunctive.hyp1 = df.exp2.conjunctive.surprise_quiz %>% 
  select(workerid, end_position, selected_position) %>% 
  filter(end_position == 3) %>% 
  mutate(response = NA,
         response = ifelse(selected_position == 2, 1, response),
         response = ifelse(selected_position == 4, 0, response))

fit.brm.exp2.conjunctive.hyp1 = brm(formula = response ~ 1,
                       family = "bernoulli",
                       data = df.exp2.conjunctive.hyp1,
                       file = "cache/fit.brm.exp2.conjunctive.hyp1",
                       cores = 4, 
                       seed = 1) 

fit.brm.exp2.conjunctive.hyp1 %>% 
  tidy() %>% 
  filter(effect == "fixed") %>% 
  select(estimate, contains("conf")) %>% 
  mutate(across(.cols = everything(),
                .fns = ~ inv.logit(.) * 100)) %>% 
  print_table()
estimate conf.low conf.high
59.24 45.52 72.4

11.2.3 Hypothesis 2: Correct vs. incorrect

df.exp2.conjunctive.hyp2 = df.exp2.conjunctive.surprise_quiz %>% 
  select(workerid, end_position, selected_position) %>% 
  mutate(response = (end_position == selected_position)*1,
         position = ifelse(end_position == 4, 1, 0))
  
fit.brm.exp2.conjunctive.hyp2 = brm(formula = response ~ 1 + position + (1 | workerid),
                       family = "bernoulli",
                       data = df.exp2.conjunctive.hyp2,
                       file = "cache/fit.brm.exp2.conjunctive.hyp2",
                       cores = 4, 
                       seed = 1) 

fit.brm.exp2.conjunctive.hyp2 %>% 
  emmeans(specs = pairwise ~ position,
          type = "response") %>% 
  summary(point.est = "mean") %>% 
  print(digits = 4)
$emmeans
 position response lower.HPD upper.HPD
        0    0.509     0.406     0.619
        1    0.809     0.713     0.898

Point estimate displayed: mean 
Results are back-transformed from the logit scale 
HPD interval probability: 0.95 

$contrasts
 contrast              odds.ratio lower.HPD upper.HPD
 position0 / position1      0.248     0.124     0.404

Point estimate displayed: mean 
Results are back-transformed from the log odds ratio scale 
HPD interval probability: 0.95 
# percentage correct 
fit.brm.exp2.conjunctive.hyp2 %>% 
  emmeans(specs = pairwise ~ position,
          type = "response") %>% 
  pluck("emmeans") %>% 
  as_tibble() %>% 
  mutate(across(.cols = - position,
                .fns = ~ . * 100)) %>% 
  print_table()
position response lower.HPD upper.HPD
0 50.89 40.57 61.87
1 81.27 71.25 89.83
# difference 
fit.brm.exp2.conjunctive.hyp2 %>% 
  emmeans(specs = pairwise ~ position) %>% 
  pluck("contrasts") %>% 
  as_tibble() %>% 
  print_table()
contrast estimate lower.HPD upper.HPD
position0 - position1 -1.44 -2.01 -0.86

11.3 FIGURES

11.3.1 Accuracy

df.plot = df.exp2.conjunctive.trials %>% 
  filter(trial == "prediction_trial") %>% 
  select(workerid, trial_index, correct) %>% 
  group_by(workerid) %>% 
  mutate(trial_index = factor(trial_index, 
                              levels = unique(trial_index),
                              labels = 1:16),
         trial_index = as.numeric(as.character(trial_index))) %>% 
  ungroup() %>% 
  mutate(accuracy = ifelse(correct == "True", 1, 0))

plot.exp2.conjunctive.accuracy = fun_plot_accuracy(data = df.plot,
                                      limit = 16)
plot.exp2.conjunctive.accuracy

11.3.2 Selections

df.plot = df.exp2.conjunctive.surprise_quiz %>% 
  count(end_position, selected_position, .drop = F) %>% 
  left_join(df.exp2.conjunctive.bootstraps, 
            by = c("end_position", "selected_position")) %>% 
  mutate(fill = case_when(end_position == selected_position ~ 1,
                          end_position == 1 & selected_position == 2 ~ 2,
                          end_position == 1 & selected_position == 3 ~ 2,
                          end_position == 2 & selected_position == 1 ~ 2,
                          end_position == 2 & selected_position == 3 ~ 2,
                          end_position == 3 & selected_position == 1 ~ 2,
                          end_position == 3 & selected_position == 2 ~ 2,
                          TRUE ~ 0),
         selected_position = as.factor(selected_position),
         fill = as.factor(fill),
         condition = "conjunctive")

plot.exp2.conjunctive.selections = fun_plot_selections_physics(data = df.plot,
                                                               exp = "exp2.conjunctive")
plot.exp2.conjunctive.selections

12 EXPERIMENT 2: COMBINED

12.1 STATS

12.1.1 Demographics

df.exp2.participants = df.exp2.long.participants %>% 
  mutate(condition = "long") %>% 
  bind_rows(df.exp2.short.participants %>% 
  inner_join(df.exp2.short.surprise_quiz %>% 
               distinct(workerid),
             by = "workerid") %>% 
              mutate(condition = "short"),
            df.exp2.conjunctive.participants %>% 
              mutate(condition = "conjunctive")) %>% 
    select(condition, workerid, age, gender, race, ethnicity)
  
# race
df.exp2.participants %>% 
  count(race) %>% 
  arrange(desc(n))
                               race   n
1                             White 272
2            Black/African American  32
3                             Asian  29
4                       Multiracial  18
5                                     2
6     American Indian/Alaska Native   2
7  Native Hawaiian/Pacific Islander   1
8                          american   1
9                   hispanic/latino   1
10                         romanian   1
# ethnicity
df.exp2.participants %>% 
  count(ethnicity) %>% 
  arrange(desc(n))
     ethnicity   n
1 Non-Hispanic 316
2     Hispanic  30
3               13
# gender
df.exp2.participants %>% 
  count(gender) %>% 
  arrange(desc(n))
      gender   n
1     Female 186
2       Male 161
3 Non-binary   9
4              3
# age
df.exp2.participants %>% 
  summarize(age_mean = mean(age, na.rm = T),
            age_sd = sd(age, na.rm = T)) %>% 
  print_table(digits = 0)
age_mean age_sd
38 15
# condition 
df.exp2.participants %>% 
  count(condition)
    condition   n
1 conjunctive 119
2        long 120
3       short 120
nrow(df.exp2.participants)
[1] 359

12.1.2 Accuracy depending on final position

df.exp2.long.surprise_quiz %>% 
  select(workerid, accuracy, end_position, selected_position) %>% 
  mutate(condition = "long") %>% 
  bind_rows(df.exp2.short.surprise_quiz %>% 
              select(workerid, accuracy, end_position, selected_position) %>% 
              mutate(condition = "short")) %>% 
  mutate(close = ifelse(end_position %in% c(2, 3), "yes", "no"),
         accuracy = ifelse(accuracy == "False", F, T)) %>% 
  group_by(condition, close) %>% 
  summarize(accuracy = sum(accuracy)/n()) %>% 
  print_table()
condition close accuracy
long no 0.52
long yes 0.64
short no 0.42
short yes 0.55

12.2 FIGURES

12.2.1 Accuracy

plot.exp2.long.accuracy + 
  labs(title = "long condition") + 
  plot.exp2.short.accuracy + 
  labs(title = "short condition") + 
  plot.exp2.conjunctive.accuracy + 
  labs(title = "conjunctive condition") +
  plot_layout(ncol = 3, 
              nrow = 1,
              widths = c(2, 0.75, 2)) + 
  plot_annotation(tag_levels = "A") & 
  theme(plot.tag = element_text(face = "bold"))

ggsave(filename = "../../figures/plots/exp2_accuracy.pdf", 
       width = 24,
       height = 6)

12.2.2 Selections

df.plot = df.exp2.long.surprise_quiz %>% 
  count(end_position, selected_position, .drop = F) %>% 
  left_join(df.exp2.long.bootstraps, 
            by = c("end_position", "selected_position")) %>% 
  mutate(condition = "long") %>% 
  bind_rows(df.exp2.short.surprise_quiz %>% 
              count(end_position, selected_position, .drop = F) %>% 
              left_join(df.exp2.short.bootstraps, 
                        by = c("end_position", "selected_position")) %>% 
              mutate(condition = "short")) %>% 
  mutate(fill = 0,
         fill = ifelse(end_position == selected_position, 1, fill),
         fill = ifelse(end_position == 1 & selected_position == 2, 2, fill),
         fill = ifelse(end_position == 2 & selected_position == 1, 2, fill),
         fill = ifelse(end_position == 3 & selected_position == 4, 2, fill),
         fill = ifelse(end_position == 4 & selected_position == 3, 2, fill),
         selected_position = as.factor(selected_position),
         fill = as.factor(fill))

df.plot = df.plot %>% 
  group_by(end_position, condition) %>% 
  mutate(total = sum(n),
         n = (n/total) * 100,
         low = (low/total) * 100,
         high = (high/total) * 100) %>% 
  ungroup()

fun_load_image = function(image){
  readPNG(str_c("../../figures/stimuli/physics/position", image, ".png"))
}

df.images = df.plot %>% 
  distinct(end_position, condition) %>% 
  mutate(grob = map(.x = end_position,
                    .f = ~ fun_load_image(image = .x))) %>% 
  mutate(grob = ifelse(condition == "short", NA, grob))

df.text = df.plot %>% 
  distinct(end_position, condition) %>% 
  group_by(condition) %>% 
  mutate(x = 4,
         y = 155,
         text = 1:4) %>% 
  ungroup() %>% 
  mutate(text = ifelse(condition == "short", NA, text))

plot.ex56.selections = ggplot(data = df.plot,
                              mapping = aes(x = selected_position,
                                            y = n,
                                            fill = fill)) +
  geom_bar(stat = "identity",
           color = "black") +
  geom_linerange(mapping = aes(ymin = low,
                               ymax = high)) +
  geom_custom(data = df.images,
              mapping = aes(data = grob,
                            x = -Inf,
                            y = Inf,
                            fill = NA),
              grob_fun = function(x) rasterGrob(x,
                                                interpolate = T,
                                                vjust = -0.05,
                                                hjust = 0)) +
  geom_text(data = df.text,
            mapping = aes(x = x,
                          y = y,
                          label = text,
                          fill = NA),
            color = "white",
            size = 8) +
  facet_grid(cols = vars(end_position),
             rows = vars(condition)) +
  labs(x = "response option",
       y = "% selected") +
  scale_fill_manual(values = c("0" = "gray80",
                               "1" = "darkgreen",
                               "2" = "orange")) +
  scale_y_continuous(breaks = seq(0, 100, 20),
                     labels = function(x) str_c(x, "%"),
                     expand = expansion(add = c(0, 5))) +
  coord_cartesian(clip = "off",
                  ylim = c(0, 95)) +
  theme(legend.position = "none",
        panel.grid.major.y = element_line(),
        strip.background.x = element_blank(),
        strip.text.x = element_blank(),
        plot.margin = margin(t = 2.5, l = 0.2, r = 0.2, b = 0, unit = "cm"),
        panel.spacing.y = unit(1, "cm"))

plot.ex56.selections + 
  plot.exp2.conjunctive.selections +
  theme(plot.margin = margin(t = 2.5, l = 0.2, r = 0.2, b = 0, unit = "cm")) + 
  plot_layout(ncol = 1, 
              nrow = 2,
              heights = c(2, 1.1)) +
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold"))

ggsave(filename = "../../figures/plots/exp2_selections.pdf", 
       width = 10,
       height = 10)

13 EXPERIMENT 3

13.1 DATA

13.1.1 Read data

file_list = list.files(path = "../../data/experiment3",
                       pattern = "\\.csv$",
                       full.names = TRUE)

df.exp3 = map_df(file_list,
                 ~ read_csv(., show_col_types = FALSE))

13.1.2 Wrangle data

# create individual datasets
df.exp3.trials = df.exp3 %>% 
  filter(trial %in% c("prediction_trial",
                      "surprise_quiz",
                      "generalization_check",
                      "generalization_check_survey"))

df.exp3.participants = df.exp3 %>% 
  filter(trial == "exit_survey") %>% 
  select(participant = prolific_id,
         condition,
         contains("ramp"),
         rt,
         age,
         ethnicity,
         gender,
         race,
         overall_accuracy,
         bonus)

# condition 0 and 4 participants might need to be replaced (cube never ended in position 1)
df.exp3.predictions = df.exp3.trials %>% 
  filter(trial == "prediction_trial") %>% 
  select(participant = prolific_id,
         condition,
         contains("ramp"),
         answer_image,
         correct_response,
         response,
         end_position,
         accuracy_bool,
         rt) %>% 
  group_by(participant) %>% 
  mutate(order = 1:n(),
         .after = participant) %>% 
  ungroup()

df.exp3.surprise = df.exp3.trials %>% 
  filter(trial == "surprise_quiz") %>%
  select(participant = prolific_id,
         condition,
         contains("ramp"),
         surprise_setup,
         end_position = end_posision,
         selected_position,
         accuracy,
         accuracy_finish_line,
         rt) %>% 
  mutate(distance_from_correct = end_position - selected_position) %>% 
  group_by(participant) %>% 
  mutate(order = 1:n(),
         .after = participant) %>% 
  ungroup()
  
df.exp3.generalization = df.exp3.trials %>% 
  filter(trial == "generalization_check") %>% 
  select(participant = prolific_id,
         condition,
         contains("ramp"),
         stimulus,
         choices,
         orientation, 
         farther_color,
         predicted_further,
         predicted_direction,
         farther_color_generalized,
         rt) %>% 
  rename(relative_orientation = orientation,
         ramp_orientation_training = ramp_orientation) %>% 
  mutate(ramp_orientation_absolute = case_when(
    relative_orientation == "consistent" ~ ramp_orientation_training,
    relative_orientation == "reversed" & ramp_orientation_training == "forward" ~ "backward",
    relative_orientation == "reversed" & ramp_orientation_training == "backward" ~ "forward",
    TRUE ~ "ERROR"),
    predicted_direction = ifelse(predicted_direction == "left", 0, 1),
    stimulus = ifelse(str_detect(stimulus, "cubes"), "cube", "ramp")) %>% 
  mutate(selection = case_when(
    predicted_direction == 0 & farther_color_generalized ~ 1,
    predicted_direction == 0 & !farther_color_generalized ~ 2,
    predicted_direction == 1 & !farther_color_generalized ~ 3,
    predicted_direction == 1 & farther_color_generalized ~ 4),
    selection = factor(selection, levels = 1:4)) %>% 
  group_by(participant) %>% 
  mutate(order = 1:n(),
         .after = participant) %>% 
  ungroup() %>% 
  mutate(trial_match = ramp_or_cube == stimulus,
         trial_type = str_c(ramp_orientation_absolute, ramp_or_cube, stimulus, sep = "_"),
         trial_type = case_when(
           trial_type == "forward_cube_cube" ~ 1,
           trial_type == "forward_cube_ramp" ~ 2,
           trial_type == "backward_cube_cube" ~ 3,
           trial_type == "backward_cube_ramp" ~ 4,
           trial_type == "forward_ramp_ramp" ~ 1,
           trial_type == "forward_ramp_cube" ~ 2,
           trial_type == "backward_ramp_ramp" ~ 3,
           trial_type == "backward_ramp_cube" ~ 4))

df.exp3.index = df.exp3.generalization %>% 
  select(ramp_orientation_training,
         ramp_orientation_absolute,
         trial_type) %>% 
  distinct() %>% 
  mutate(ramp_orientation_training = factor(ramp_orientation_training,
                                            levels = c("forward", "backward")),
         ramp_orientation_absolute = factor(ramp_orientation_absolute,
                                            levels = c("forward", "backward"))) %>% 
  arrange(ramp_orientation_training,
          ramp_orientation_absolute,
          trial_type) %>% 
  mutate(stimulus = rep(c("cube", "ramp"), 4))

13.1.3 Demographics

df.exp3.participants %>% 
  nrow()
[1] 239
df.exp3.participants %>% 
  summarize(age_mean = mean(age),
            age_sd = sd(age)) %>% 
  print_table(digits = 0)
age_mean age_sd
37 11
df.exp3.participants %>% 
  count(gender) %>% 
  print_table()
gender n
Female 137
Male 98
Non-binary 2
NA 2
df.exp3.participants %>% 
  count(race) %>% 
  print_table()
race n
American Indian/Alaska Native 2
Asian 22
Black/African American 45
Hispanic/Latino 1
Multiracial 7
White 158
NA 4
df.exp3.participants %>% 
  count(ethnicity) %>% 
  print_table()
ethnicity n
Hispanic 19
Non-Hispanic 215
NA 5
df.exp3.participants %>% 
  count(ramp_orientation, ramp_or_cube)
# A tibble: 4 × 3
  ramp_orientation ramp_or_cube     n
  <chr>            <chr>        <int>
1 backward         cube            62
2 backward         ramp            57
3 forward          cube            59
4 forward          ramp            61

13.1.4 Read in modeling results

df.exp3.generalization.model = read_csv("../python/data/bestfit/model_vs_human_comparison.csv",
                                        show_col_types = F) %>% 
  clean_names() %>% 
  select(condition, trial, contains("prediction")) %>% 
  pivot_longer(cols = -c(condition, trial),
               names_to = "position",
               values_to = "prediction") %>% 
  mutate(condition = str_remove(condition, "_ramp_condition"),
         trial = str_remove(trial, "trial_"),
         position = as.numeric(str_extract(position, "\\d+")),
         ramp_orientation_absolute = ifelse(trial %in% c("a", "b"), "forward", "backward"),
         stimulus = ifelse(trial %in% c("a", "c"), "cube", "ramp")) %>% 
  rename(ramp_orientation_training = condition) %>% 
  relocate(trial)

13.2 STATS

13.2.1 Bootstraps

13.2.1.1 Surprise

# make reproducible
set.seed(1)

df.exp3.bootstraps.surprise = df.exp3.surprise %>%
  bootstraps(times = 1000,
             end_position) %>% 
  mutate(counts = map(.x = splits,
                      .f = ~ .x %>% 
                        as_tibble() %>% 
                        group_by(ramp_orientation) %>% 
                        count(end_position, selected_position, .drop = F))) %>% 
  select(-splits) %>% 
  unnest(counts) %>% 
  group_by(ramp_orientation, end_position, selected_position) %>% 
  summarize(low = quantile(n, 0.025), 
            high = quantile(n, 0.975))

13.2.1.2 Generalization

13.2.1.2.1 Direction
set.seed(1)

df.exp3.bootstraps.generalization = df.exp3.generalization %>% 
  bootstrap(n = 1000) %>% # create 1000 bootstrapped samples
  mutate(data = map(.x = strap,
                      .f = ~ .x %>% 
                        as_tibble() %>% 
                        group_by(ramp_orientation_training, ramp_orientation_absolute) %>% 
                        summarize(prediction = mean(predicted_direction)))) %>% 
  select(-strap) %>% 
  unnest(data) %>% 
  group_by(ramp_orientation_training, ramp_orientation_absolute) %>% 
  summarize(mean = mean(prediction),
            low = quantile(prediction, 0.025), # calculate the 2.5 / 97.5 percentiles
            high = quantile(prediction, 0.975))
13.2.1.2.2 Position
set.seed(1)

df.exp3.generalization.position.boot.aggregated = df.exp3.generalization %>% 
  bootstrap(n = 1000) %>% # create 1000 bootstrapped samples
  mutate(data = map(.x = strap,
                    .f = ~ .x %>% 
                      as_tibble() %>%
                      count(ramp_orientation_training,
                            trial_type,
                            selection,
                            .drop = F) %>% 
                      ungroup())) %>% 
  select(-strap) %>% 
  unnest(data) %>% 
  group_by(ramp_orientation_training,
                            trial_type,
                            selection) %>% 
  summarize(low = quantile(n, 0.025), # calculate the 2.5 / 97.5 percentiles
            high = quantile(n, 0.975))

13.2.2 Hypotheses

13.2.2.1 Hypothesis 1: Outcome relevant feature

df.exp3.hyp1 = df.exp3.surprise %>% 
  select(participant, end_position, selected_position) %>% 
  mutate(relevant = ifelse(end_position %in% c(1, 2), -1, 1),
         irrelevant = ifelse(end_position %in% c(1, 3), -1, 1)) %>% 
  mutate(selected_position = factor(selected_position,
                                    levels = 1:4,
                                    ordered = T),
         across(.cols = c(relevant, irrelevant), .fns = ~ as.factor(.)))

fit.brm.exp3.hyp1 = brm(formula = selected_position ~ relevant * irrelevant + (1 | participant),
                       family = cumulative(link = "probit"),
                       data = df.exp3.hyp1,
                       file = "cache/fit.brm.exp3.hyp1",
                       cores = 4, 
                       seed = 1) 

fit.brm.exp3.hyp1 %>% 
  as_draws_df() %>% 
  select(b_relevant1, b_irrelevant1) %>% 
  mutate(difference = b_irrelevant1 - b_relevant1) %>% 
  summarize(mean = mean(difference),
            low = quantile(difference, 0.025),
            high = quantile(difference, 0.975)) %>% 
  print_table()
mean low high
0.7 0.6 0.8

13.2.2.2 Hypothesis 2: Correct vs. incorrect

df.exp3.hyp2 = df.exp3.surprise %>% 
  filter(end_position %in% c(2, 3)) %>% 
  mutate(response = NA,
         response = ifelse(end_position == 2 & selected_position == 1, 1, response),
         response = ifelse(end_position == 2 & selected_position == 3, 0, response),
         response = ifelse(end_position == 3 & selected_position == 4, 1, response),
         response = ifelse(end_position == 3 & selected_position == 2, 0, response))
  
fit.brm.exp3.hyp2 = brm(formula = response ~ 1 + (1 | participant),
                       family = "bernoulli",
                       data = df.exp3.hyp2,
                       file = "cache/fit.brm.exp3.hyp2",
                       cores = 4, 
                       seed = 1,
                       control = list(adapt_delta = 0.95)) 

fit.brm.exp3.hyp2 %>% 
  tidy() %>% 
  filter(effect == "fixed") %>% 
  select(estimate, contains("conf")) %>% 
  mutate(across(.cols = everything(),
                .fns = ~ inv.logit(.) * 100)) %>% 
  print_table()
estimate conf.low conf.high
95.5 84.66 99.41

13.2.2.3 Hypothesis 3: Generalization direction

We predict that there will be a negative effect of training (people will be more likely to select an image with the blocks on the right side of the ramp when the ramp faces backwards than when it faces forwards). We predict that there will be a positive effect of generalization (people will be more likely to select an image with the blocks on the right side when the ramp in the generalization trial faces forward than when it faces backward). We predict that there will be a positive interaction effect (the difference in participants’ selections between the two types of generalization trials will be greater when the ramp faced forward in the training compared to when it faced backward).

df.exp3.hyp3 = df.exp3.generalization %>% 
  select(participant,
         order,
         ramp_or_cube,
         ramp_orientation_training,
         ramp_orientation_absolute,
         predicted_direction) %>% 
  mutate(response = predicted_direction,
         ramp_orientation_training = ifelse(ramp_orientation_training == "forward", 1, -1),
         ramp_orientation_absolute = ifelse(ramp_orientation_absolute == "forward", 1, -1))

fit.brm.exp3.hyp3 = brm(formula = response ~ 1 + ramp_orientation_training * ramp_orientation_absolute + (1 | participant),
                       family = "bernoulli",
                       data = df.exp3.hyp3,
                       file = "cache/fit.brm.exp3.hyp3",
                       control = list(adapt_delta = 0.9),
                       cores = 4, 
                       seed = 1)

fit.brm.exp3.hyp3 %>% 
  tidy() %>% 
  filter(effect == "fixed") %>% 
  select(-c(effect, component, group)) %>% 
  print_table()
Warning in tidy.brmsfit(.): some parameter names contain underscores: term
naming may be unreliable!
term estimate std.error conf.low conf.high
(Intercept) 0.51 0.11 0.29 0.74
ramp_orientation_training -0.17 0.11 -0.39 0.04
ramp_orientation_absolute 0.90 0.11 0.69 1.12
ramp_orientation_training:ramp_orientation_absolute 1.70 0.12 1.47 1.95

13.2.3 Predictions

13.2.3.1 Accuracy across trials for each condition

# forward condition
df.data = df.exp3.predictions %>% 
  mutate(order = order - 1) %>% 
  filter(ramp_orientation == "forward")

fit.brm.exp3.prediction_forward = brm(formula = accuracy_bool ~ 0 + order + (0 + order | participant),
                                      family = "bernoulli",
                                      data = df.data,
                                      file = "cache/fit.brm.exp3.prediction_forward",
                                      cores = 4, 
                                      seed = 1)

# backward condition
df.data = df.exp3.predictions %>% 
  mutate(order = order - 1) %>% 
  filter(ramp_orientation == "backward")

fit.brm.exp3.prediction_backward = brm(formula = accuracy_bool ~ 0 + order + (0 + order | participant),
                                       family = "bernoulli",
                                       data = df.data,
                                       file = "cache/fit.brm.exp3.prediction_backward",
                                       control = list(adapt_delta = 0.99),
                                       cores = 4, 
                                       seed = 1)

13.2.3.2 Effect of training and cube/ramp

df.data = df.exp3.predictions %>% 
  mutate(ramp_orientation = ifelse(ramp_orientation == "forward", 1, -1),
         ramp_or_cube = ifelse(ramp_or_cube == "cube", 1, -1))

fit.brm.exp3.prediction = brm(formula = accuracy_bool ~ 1 + ramp_orientation * ramp_or_cube + (1 | participant),
                       family = "bernoulli",
                       data = df.data,
                       file = "cache/fit.brm.exp3.prediction",
                       cores = 4, 
                       seed = 1)

fit.brm.exp3.prediction
 Family: bernoulli 
  Links: mu = logit 
Formula: accuracy_bool ~ 1 + ramp_orientation * ramp_or_cube + (1 | participant) 
   Data: df.data (Number of observations: 3808) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Multilevel Hyperparameters:
~participant (Number of levels: 238) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     1.00      0.07     0.87     1.16 1.00     1689     2440

Regression Coefficients:
                              Estimate Est.Error l-95% CI u-95% CI Rhat
Intercept                         1.28      0.08     1.13     1.44 1.00
ramp_orientation                 -0.01      0.08    -0.16     0.14 1.00
ramp_or_cube                      0.18      0.08     0.02     0.33 1.00
ramp_orientation:ramp_or_cube    -0.04      0.08    -0.19     0.11 1.00
                              Bulk_ESS Tail_ESS
Intercept                         2142     2591
ramp_orientation                  2076     2726
ramp_or_cube                      2108     2382
ramp_orientation:ramp_or_cube     2112     2665

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

13.2.3.3 Effect of training

df.data = df.exp3.predictions %>% 
  mutate(ramp_orientation = ifelse(ramp_orientation == "forward", 0, 1))

fit.brm.exp3.prediction2 = brm(formula = accuracy_bool ~ 1 + ramp_orientation + (1 | participant),
                               family = "bernoulli",
                               data = df.data,
                               file = "cache/fit.brm.exp3.prediction2",
                               cores = 4, 
                               seed = 1)

fit.brm.exp3.prediction2
 Family: bernoulli 
  Links: mu = logit 
Formula: accuracy_bool ~ 1 + ramp_orientation + (1 | participant) 
   Data: df.data (Number of observations: 3808) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Multilevel Hyperparameters:
~participant (Number of levels: 238) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     1.00      0.07     0.87     1.16 1.01     1531     1812

Regression Coefficients:
                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept            1.26      0.11     1.05     1.48 1.00     2105     2776
ramp_orientation     0.02      0.16    -0.28     0.33 1.00     2169     2582

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

13.2.4 Feature-based model

df.features = df.exp3.generalization %>%
  count(ramp_orientation_training,
        trial_type,
        selection,
        .drop = F) %>% 
  left_join(df.exp3.generalization.position.boot.aggregated,
            by = join_by(ramp_orientation_training, trial_type, selection)) %>% 
  mutate(side = case_when(ramp_orientation_training == "forward" & trial_type %in% c(1, 2) & selection %in% c(3, 4) ~ "consistent",
                          ramp_orientation_training == "forward" & trial_type %in% c(3, 4) & selection %in% c(1, 2) ~ "consistent",
                          ramp_orientation_training == "backward" & trial_type %in% c(1, 2) & selection %in% c(1, 2) ~ "consistent",
                          ramp_orientation_training == "backward" & trial_type %in% c(3, 4) & selection %in% c(3, 4) ~ "consistent",
                          TRUE ~ "inconsistent"),
         feature = ifelse(trial_type %in% c(1, 3), "diagnostic", "non-diagnostic"),
         friction = case_when(trial_type %in% c(1, 3) & selection %in% c(1, 4) ~ "consistent",
                              trial_type %in% c(2, 4) & selection %in% c(2, 3) ~ "consistent",
                              TRUE ~ "inconsistent")
  )

fit.features_full = lm(formula = n ~ 1 + ramp_orientation_training * side * feature * friction,
                     data = df.features)

df.features_full = df.features %>% 
  bind_cols(fit.features_full %>% 
              augment() %>% 
              clean_names() %>% 
              select(fitted))

fit.features_full %>% 
  tidy()
# A tibble: 16 × 5
   term                                    estimate std.error statistic  p.value
   <chr>                                      <dbl>     <dbl>     <dbl>    <dbl>
 1 (Intercept)                             2.99e+ 1      1.84  1.62e+ 1 2.38e-11
 2 ramp_orientation_training1             -1.25e- 1      1.84 -6.78e- 2 9.47e- 1
 3 side1                                   1.75e+ 1      1.84  9.49e+ 0 5.64e- 8
 4 feature1                                1.39e-16      1.84  7.54e-17 1   e+ 0
 5 friction1                               8.75e+ 0      1.84  4.75e+ 0 2.19e- 4
 6 ramp_orientation_training1:side1       -7.62e+ 0      1.84 -4.14e+ 0 7.75e- 4
 7 ramp_orientation_training1:feature1     2.65e-15      1.84  1.44e-15 1.00e+ 0
 8 side1:feature1                          1.13e+ 0      1.84  6.10e- 1 5.50e- 1
 9 ramp_orientation_training1:friction1    1.25e- 1      1.84  6.78e- 2 9.47e- 1
10 side1:friction1                         6.13e+ 0      1.84  3.32e+ 0 4.31e- 3
11 feature1:friction1                      7.88e+ 0      1.84  4.27e+ 0 5.84e- 4
12 ramp_orientation_training1:side1:feat…  1.00e+ 0      1.84  5.42e- 1 5.95e- 1
13 ramp_orientation_training1:side1:fric… -1.62e+ 0      1.84 -8.81e- 1 3.91e- 1
14 ramp_orientation_training1:feature1:f… -1.00e+ 0      1.84 -5.42e- 1 5.95e- 1
15 side1:feature1:friction1                6.25e+ 0      1.84  3.39e+ 0 3.74e- 3
16 ramp_orientation_training1:side1:feat… -2.25e+ 0      1.84 -1.22e+ 0 2.40e- 1

13.2.5 Feature-based model (without interactions)

fit.features_simple = lm(formula = n ~ 1 + ramp_orientation_training * (side + feature + friction),
                         data = df.features)

df.features_simple = df.features %>% 
  bind_cols(fit.features_simple %>% 
              augment() %>% 
              clean_names() %>% 
              select(fitted))

fit.features_simple %>% 
  tidy()
# A tibble: 8 × 5
  term                                  estimate std.error statistic  p.value
  <chr>                                    <dbl>     <dbl>     <dbl>    <dbl>
1 (Intercept)                           2.99e+ 1      2.92  1.02e+ 1 3.04e-10
2 ramp_orientation_training1           -1.25e- 1      2.92 -4.29e- 2 9.66e- 1
3 side1                                 1.75e+ 1      2.92  6.00e+ 0 3.38e- 6
4 feature1                              6.20e-16      2.92  2.13e-16 1   e+ 0
5 friction1                             8.75e+ 0      2.92  3.00e+ 0 6.18e- 3
6 ramp_orientation_training1:side1     -7.62e+ 0      2.92 -2.62e+ 0 1.52e- 2
7 ramp_orientation_training1:feature1   4.22e-16      2.92  1.45e-16 1   e+ 0
8 ramp_orientation_training1:friction1  1.25e- 1      2.92  4.29e- 2 9.66e- 1

13.2.6 Correlations between model predictions and responses

df.n = df.exp3.generalization %>% 
  distinct(participant, ramp_orientation_training) %>% 
  count(ramp_orientation_training)

df.model = df.exp3.generalization.model %>% 
  mutate(prediction = ifelse(ramp_orientation_training == "forward",
                             prediction * df.n$n[df.n$ramp_orientation_training == "forward"],
                             prediction * df.n$n[df.n$ramp_orientation_training == "backward"])) %>% 
  left_join(df.exp3.index,
            by = c("ramp_orientation_training", "ramp_orientation_absolute", "stimulus")) %>% 
  rename(selection = position)

df.features_full %>%
  mutate(selection = as.numeric(as.character(selection))) %>%
  left_join(df.model,
            by = join_by(ramp_orientation_training, trial_type, selection)) %>% 
  select(data = n,
         features = fitted,
         abstraction = prediction) %>% 
  summarize(r_features = cor(features, data),
            r_abstraction = cor(abstraction, data),
            rmse_features = sqrt(mean((features - data)^2)),
            rmse_abstraction = sqrt(mean((abstraction - data)^2))) %>% 
  pivot_longer(cols = everything(),
               names_sep = "_",
               names_to = c("index", "model"),
               values_to = "value") %>% 
  pivot_wider(names_from = index,
              values_from = value) %>% 
  print_table()
model r rmse
features 0.96 7.37
abstraction 0.99 3.71

13.3 TABLES

13.3.1 Accuracy

df.exp3.predictions %>% 
  group_by(ramp_orientation, ramp_or_cube) %>% 
  summarize(mean = mean(accuracy_bool) * 100) %>% 
  print_table(digits = 0)
ramp_orientation ramp_or_cube mean
backward cube 77
backward ramp 71
forward cube 76
forward ramp 72

13.3.2 Consistency

df.exp3.generalization %>% 
  select(participant, 
         ramp_or_cube,
         stimulus,
         ramp_orientation_training,
         ramp_orientation_absolute,
         predicted_direction) %>% 
  mutate(model_physics = ifelse(ramp_orientation_absolute == "forward", 1, 0),
         model_agent = 1,
         model_ramp = ifelse(ramp_orientation_absolute == "backward", 1, 0),
         index_physics = predicted_direction == model_physics,
         index_agent = predicted_direction == model_agent,
         index_ramp = predicted_direction == model_ramp) %>% 
  group_by(participant, ramp_orientation_training) %>%
  summarize(n_physics = all(index_physics),
            n_agent = all(index_agent),
            n_ramp = all(index_ramp)) %>% 
  mutate(n_other = !(n_physics | n_agent | n_ramp)) %>% 
  group_by(ramp_orientation_training) %>%
  summarize(physics = sum(n_physics),
            agent = sum(n_agent),
            ramp = sum(n_ramp),
            other = sum(n_other)) %>% 
  print_table()
ramp_orientation_training physics agent ramp other
backward 7 27 45 40
forward 95 6 0 19

13.3.3 Explanations

df.table = df.exp3 %>% 
  filter(trial == "generalization_check_survey") %>% 
  select(ramp_orientation,
         ramp_or_cube,
         orientation,
         predicted_further,
         predicted_direction,
         response) %>% 
  rowwise() %>% 
  mutate(response = list(fromJSON(response))) %>% 
  ungroup() %>% 
  unnest(response)

# print_table(df.table)

13.4 FIGURES

13.4.1 Accuracy

df.plot = df.exp3.predictions %>% 
  rename(accuracy = accuracy_bool,
         trial_index = order)


p1 = fun_plot_accuracy(data = df.plot %>% 
                         filter(ramp_orientation == "forward"),
                  limit = 16) + 
  labs(title = "ramp forward")

p2 = fun_plot_accuracy(data = df.plot %>% 
                         filter(ramp_orientation == "backward"),
                  limit = 16) + 
  labs(title = "ramp backward")

p1 + p2 + 
  plot_layout(ncol = 2) + 
  plot_annotation(tag_levels = "A") & 
  theme(title = element_text(size = 24),
        plot.tag = element_text(face = "bold"))

ggsave(filename = "../../figures/plots/exp3_accuracy.pdf",
       width = 16,
       height = 5)

13.4.2 Selections

df.plot = df.exp3.surprise %>% 
  group_by(ramp_orientation) %>% 
  count(end_position, selected_position, .drop = F) %>%
  left_join(df.exp3.bootstraps.surprise, 
            by = c("ramp_orientation",
                   "end_position", 
                   "selected_position")) %>% 
  mutate(fill = 0,
         fill = ifelse(end_position == selected_position, 1, fill),
         fill = ifelse(end_position == 1 & selected_position == 2, 2, fill),
         fill = ifelse(end_position == 2 & selected_position == 1, 2, fill),
         fill = ifelse(end_position == 3 & selected_position == 4, 2, fill),
         fill = ifelse(end_position == 4 & selected_position == 3, 2, fill),
         selected_position = as.factor(selected_position),
         fill = as.factor(fill),
         condition = NA)

plot.ex8.selections_forward = fun_plot_selections_physics(data = df.plot %>% 
                                                            filter(ramp_orientation == "forward"),
                                                          exp = "exp3") + 
  theme(strip.background = element_blank(),
        strip.text = element_blank())

plot.ex8.selections_backward = fun_plot_selections_physics(data = df.plot %>% 
                                                             filter(ramp_orientation == "backward"),
                                                          exp = "exp3") + 
  theme(strip.background = element_blank(),
        strip.text = element_blank())

plot.ex8.selections_forward +
  theme(plot.margin = margin(t = 2.5, l = 0.2, r = 0.2, b = 0, unit = "cm")) + 
  plot.ex8.selections_backward +
  theme(plot.margin = margin(t = 2.5, l = 0.2, r = 0.2, b = 0, unit = "cm")) + 
  plot_layout(ncol = 1, 
              nrow = 2) +
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold"))

ggsave(filename = "../../figures/plots/exp3_selections.pdf", 
       width = 10,
       height = 8)

13.4.3 Generalization

13.4.3.1 Direction

df.plot = df.exp3.bootstraps.generalization %>% 
  mutate(ramp_orientation_training = factor(ramp_orientation_training,
                                            levels = c("forward", "backward")),
         ramp_orientation_absolute = factor(ramp_orientation_absolute,
                                            levels = c("forward", "backward"))) %>% 
  mutate(across(.cols = c(mean, low, high),
                .fns = ~ . - 0.5))

df.model.abstraction = df.exp3.generalization.model %>% 
  mutate(direction = ifelse(position %in% c(1, 2), "left", "right")) %>% 
  group_by(ramp_orientation_training, ramp_orientation_absolute, direction, stimulus) %>%
  summarize(prediction = sum(prediction)) %>% 
  group_by(ramp_orientation_training, ramp_orientation_absolute, direction) %>% 
  summarize(prediction = mean(prediction)) %>% 
  filter(direction == "right") %>% 
  mutate(prediction = prediction - 0.5)
  
df.model.features = df.features_full %>% 
  group_by(ramp_orientation_training, trial_type) %>%
  mutate(n = n / sum(n),
         fitted = fitted / sum(fitted)) %>%
  ungroup() %>% 
  mutate(ramp_orientation_absolute = ifelse(trial_type %in% c(1, 2), "forward", "backward"),
         direction = ifelse(selection %in% c(1, 2), "left", "right")) %>% 
  group_by(ramp_orientation_training, ramp_orientation_absolute, direction, trial_type) %>%
  summarize(prediction = sum(fitted)) %>% 
  group_by(ramp_orientation_training, ramp_orientation_absolute, direction) %>% 
  summarize(prediction = mean(prediction)) %>% 
  filter(direction == "right") %>% 
  mutate(prediction = prediction - 0.5)

df.model = df.model.abstraction %>% 
  select(ramp_orientation_training, ramp_orientation_absolute, prediction) %>%
  mutate(model = "abstraction") %>% 
  bind_rows(df.model.features %>% 
              select(ramp_orientation_training, ramp_orientation_absolute, prediction) %>%
              mutate(model = "features")) %>% 
  mutate(ramp_orientation_training = factor(ramp_orientation_training,
                                            levels = c("forward", "backward")),
         ramp_orientation_absolute = factor(ramp_orientation_absolute,
                                            levels = c("forward", "backward")))

ggplot(data = df.plot,
       mapping = aes(x = ramp_orientation_training,
                     y = mean,
                     fill = ramp_orientation_absolute,
                     group = ramp_orientation_absolute)) +
  geom_hline(yintercept = 0,
             linetype = "dashed",
             color = "black") +
  geom_col(position = position_dodge(width = 0.9),
           color = "black") +
  geom_linerange(mapping = aes(ymin = low,
                               ymax = high),
                 position = position_dodge(width = 0.9),
                 color = "black",
                 linewidth = 1) +
  geom_point(data = df.model,
             mapping = aes(x = ramp_orientation_training,
                           y = prediction,
                           group = ramp_orientation_absolute,
                           fill = model),
             position = position_dodge2(width = c(0.9)),
             shape = 21,
             size = 3,
             show.legend = F) +
  scale_y_continuous(limits = c(-0.5, 0.5),
                     breaks = c(-0.5, 0, 0.5),
                     labels = c("100% left", "50%", "100% right")) +
  scale_fill_manual(values = c("forward" = "#1f77b4",
                               "backward" = "#E41A1C",
                               "features" = "#a020f0",
                               "abstraction" = "#ffc0cb"),
                    breaks = c("forward", "backward")) + 
  labs(x = "ramp orientation in training",
       y = "predicted direction",
       fill = "ramp orientation\nduring generalization") +
  theme(panel.grid.minor.y = element_line(),
        panel.grid.major.y = element_line(),
        legend.title = element_text(size = 20))

ggsave(filename = "../../figures/plots/exp3_generalization_direction.pdf",
       width = 8,
       height = 5)

13.4.3.2 Position

df.index = df.exp3.generalization %>% 
  select(ramp_orientation_training,
         ramp_orientation_absolute,
         trial_type) %>% 
  distinct() %>% 
  mutate(ramp_orientation_training = factor(ramp_orientation_training,
                                            levels = c("forward", "backward")),
         ramp_orientation_absolute = factor(ramp_orientation_absolute,
                                            levels = c("forward", "backward"))) %>% 
  arrange(ramp_orientation_training,
          ramp_orientation_absolute,
          trial_type) %>% 
  mutate(stimulus = rep(c("cube", "ramp"), 4))

l.plots = list()

for (i in 1:nrow(df.index)){
  df.plot = df.exp3.generalization %>%
    count(ramp_orientation_training,
          trial_type,
          selection,
          .drop = F) %>% 
    ungroup() %>% 
    left_join(df.exp3.generalization.position.boot.aggregated, 
              by = c("ramp_orientation_training",
                     "trial_type",
                     "selection")) %>% 
    filter(ramp_orientation_training == df.index$ramp_orientation_training[i],
           trial_type == df.index$trial_type[i])
  
  df.plot = df.plot %>%
    group_by(ramp_orientation_training,
             trial_type) %>%
    mutate(total = sum(n)) %>%
    ungroup() %>%
    mutate(n = (n / total) * 100,
           low = (low / total) * 100,
           high = (high / total) * 100)
  
fun_load_image = function(image){
  readPNG(str_c("../../figures/stimuli/physics/generalization_",
                df.index$stimulus[i],
                "_",
                df.index$ramp_orientation_absolute[i],
                image,
                ".png"))
}

df.images = df.plot %>% 
  distinct(selection) %>% 
  mutate(grob = map(.x = selection,
                    .f = ~ fun_load_image(image = .x)))

df.text = df.plot %>% 
  distinct(selection) %>% 
  mutate(x = 1.3,
         y = 190,
         text = 1:4)

df.n = df.exp3.generalization %>% 
  distinct(participant, ramp_orientation_training) %>% 
  count(ramp_orientation_training)

df.model = df.exp3.generalization.model %>% 
  mutate(prediction = ifelse(ramp_orientation_training == "forward",
                             prediction * df.n$n[df.n$ramp_orientation_training == "forward"],
                             prediction * df.n$n[df.n$ramp_orientation_training == "backward"])) %>% 
  left_join(df.index,
            by = c("ramp_orientation_training", "ramp_orientation_absolute", "stimulus")) %>% 
  rename(selection = position) %>% 
  filter(ramp_orientation_training == df.index$ramp_orientation_training[i],
           trial_type == df.index$trial_type[i]) %>% 
  mutate(prediction = prediction / unique(df.plot$total) * 100)

df.model_features = df.features_full %>% 
  filter(ramp_orientation_training == df.index$ramp_orientation_training[i],
           trial_type == df.index$trial_type[i]) %>% 
  mutate(fitted = fitted / unique(df.plot$total) * 100)

p = ggplot(data = df.plot,
              mapping = aes(x = 1,
                            y = n)) + 
  geom_bar(stat = "identity",
           color = "black",
           fill = "gray80") + 
  geom_linerange(mapping = aes(ymin = low,
                               ymax = high),
                 linewidth = 1) + 
  geom_point(data = df.model,
             mapping = aes(x = 0.8,
                           y = prediction),
             shape = 21,
             fill = "pink",
             size = 4) +
  geom_point(data = df.model_features,
             mapping = aes(x = 1.2,
                           y = fitted),
             shape = 21,
             fill = "purple",
             size = 4) +
  geom_custom(data = df.images,
              mapping = aes(data = grob,
                            x = -Inf,
                            y = Inf),
              grob_fun = function(x) rasterGrob(x,
                                                interpolate = T,
                                                vjust = -0.25,
                                                hjust = 0)) + 
  facet_grid(cols = vars(selection)) +
  labs(x = "response option",
       y = "% selected") + 
  scale_y_continuous(breaks = seq(0, 100, 20),
                     labels = function(x){str_c(x, "%")},
                     expand = expansion(add = c(3, 1))) + 
  coord_cartesian(clip = "off",
                  ylim = c(0, 100)) + 
  theme(legend.position = "none",
        panel.grid.major.y = element_line(),
        strip.background.x = element_blank(),
        strip.text.x = element_blank(),
        axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.line.x = element_blank(),
        axis.ticks.x = element_blank(),
        plot.margin = margin(t = 3.5, l = 0.2, r = 0.2, b = 0.1, unit = "cm"),
        plot.title = element_text(vjust = 16))

l.plots[[i]] = p
}

wrap_plots(l.plots[1:4]) &
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold"))

ggsave(filename = "../../figures/plots/exp3_generalization_training_forward.pdf",
       width = 20,
       height = 10)

wrap_plots(l.plots[5:8]) &
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold"))

ggsave(filename = "../../figures/plots/exp3_generalization_training_backward.pdf",
       width = 20,
       height = 10)

14 Session info

R version 4.5.0 (2025-04-11)
Platform: aarch64-apple-darwin20
Running under: macOS Sequoia 15.7.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/Los_Angeles
tzcode source: internal

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] lubridate_1.9.4     forcats_1.0.0       stringr_1.5.1      
 [4] dplyr_1.1.4         purrr_1.1.0         readr_2.1.5        
 [7] tidyr_1.3.1         tibble_3.2.1        tidyverse_2.0.0    
[10] jsonlite_2.0.0      xtable_1.8-4        ggeffects_2.3.0    
[13] glue_1.8.0          egg_0.4.5           ggplot2_4.0.0      
[16] gridExtra_2.3       png_0.1-8           rsample_1.3.0      
[19] rstatix_0.7.2       ggtext_0.1.2        patchwork_1.3.2    
[22] janitor_2.2.1       tidybayes_3.0.7     corrr_0.4.4        
[25] lme4_1.1-37         Matrix_1.7-3        modelr_0.1.11      
[28] nnet_7.3-20         boot_1.3-31         Hmisc_5.2-3        
[31] brms_2.22.0         Rcpp_1.0.14         kableExtra_1.4.0   
[34] broom.mixed_0.2.9.6 emmeans_1.11.1      knitr_1.50         

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3   tensorA_0.36.2.1     rstudioapi_0.17.1   
  [4] magrittr_2.0.3       estimability_1.5.1   farver_2.1.2        
  [7] nloptr_2.2.1         rmarkdown_2.29       ragg_1.4.0          
 [10] vctrs_0.6.5          minqa_1.2.8          base64enc_0.1-3     
 [13] htmltools_0.5.8.1    distributional_0.5.0 broom_1.0.8         
 [16] Formula_1.2-5        StanHeaders_2.32.10  sass_0.4.10         
 [19] parallelly_1.45.0    bslib_0.9.0          htmlwidgets_1.6.4   
 [22] plyr_1.8.9           cachem_1.1.0         lifecycle_1.0.4     
 [25] pkgconfig_2.0.3      R6_2.6.1             fastmap_1.2.0       
 [28] rbibutils_2.3        future_1.58.0        snakecase_0.11.1    
 [31] digest_0.6.37        colorspace_2.1-1     furrr_0.3.1         
 [34] textshaping_1.0.1    labeling_0.4.3       timechange_0.3.0    
 [37] abind_1.4-8          mgcv_1.9-1           compiler_4.5.0      
 [40] bit64_4.6.0-1        withr_3.0.2          inline_0.3.21       
 [43] htmlTable_2.4.3      S7_0.2.0             backports_1.5.0     
 [46] carData_3.0-5        QuickJSR_1.7.0       pkgbuild_1.4.8      
 [49] MASS_7.3-65          loo_2.8.0            tools_4.5.0         
 [52] foreign_0.8-90       nlme_3.1-168         gridtext_0.1.5      
 [55] checkmate_2.3.2      reshape2_1.4.4       cluster_2.1.8.1     
 [58] generics_0.1.4       gtable_0.3.6         tzdb_0.5.0          
 [61] data.table_1.17.4    hms_1.1.3            utf8_1.2.5          
 [64] xml2_1.3.8           car_3.1-3            pillar_1.10.2       
 [67] ggdist_3.3.3         vroom_1.6.5          posterior_1.6.1     
 [70] splines_4.5.0        lattice_0.22-6       bit_4.6.0           
 [73] tidyselect_1.2.1     reformulas_0.4.1     arrayhelpers_1.1-0  
 [76] bookdown_0.43        svglite_2.2.1        stats4_4.5.0        
 [79] xfun_0.52            bridgesampling_1.1-2 matrixStats_1.5.0   
 [82] rstan_2.32.7         stringi_1.8.7        yaml_2.3.10         
 [85] evaluate_1.0.3       codetools_0.2-20     cli_3.6.5           
 [88] RcppParallel_5.1.10  rpart_4.1.24         systemfonts_1.2.3   
 [91] Rdpack_2.6.4         jquerylib_0.1.4      globals_0.18.0      
 [94] coda_0.19-4.1        svUnit_1.0.6         parallel_4.5.0      
 [97] rstantools_2.4.0     bayesplot_1.12.0     Brobdingnag_1.2-9   
[100] listenv_0.9.1        viridisLite_0.4.2    mvtnorm_1.3-3       
[103] scales_1.4.0         crayon_1.5.3         insight_1.4.2       
[106] rlang_1.1.6