1 Setup

1.1 Load packages

# library("ggtern")    # for ternary plots
# library("afex")      # for ANOVAs 
library("knitr")       # for knitting stuff
library("kableExtra")  # for markdown tables
library("DT")          # for nice tables in markdown 
library("lme4")        # for linear mixed effects models
library("broom.mixed") # for tidying mixed models results 
library("brms")        # Bayesian regression with Stan
library("corrr")       # for tidy correlation matrix
library("xtable")      # for latex tables
library("jsonlite")    # for reading json files
library("janitor")     # cleans stuff
library("Hmisc")       # bootstrapped confidence intervals
library("tidybayes")   # for Bayesian data analysis
library("png")         # adding pngs to images
library("grid")        # functions for dealing with images 
library("plotly")      # 3D scatter plot 
library("egg")         # for geom_custom
library("clValid")     # for validating clustering solutions 
library("patchwork")   # for figure panels
library("ggrepel")     # for labels in ggplot
library("tidyverse")   # for wrangling, plotting, etc. 

1.2 Helper functions

# setting some knitr options
opts_chunk$set(comment = "",
               fig.show = "hold")

# set the ggplot theme
theme_set(theme_classic())

# suppress summarise() grouping warning 
options(dplyr.summarise.inform = F)

# function for printing out html or latex tables 
print_table = function(data, format = "html", digits = 2){
  if(format == "html"){
    data %>% 
      kable(digits = digits) %>% 
      kable_styling()
  }else if(format == "latex"){
    data %>% 
      xtable(digits = digits) %>%
      print(include.rownames = F,
            booktabs = T)
  }
}

# root mean squared error
rmse = function(x, y){
  return(sqrt(mean((x - y)^2)))
}

actual_counterfactual_plot = function(data, text, ylabel) {
  p = ggplot(data = data, 
              mapping = aes(x = clipindex,
                            y = value,
                            fill = colorindex,
                            group = model)) +
    stat_summary(geom = "bar",
                 fun = "mean",
                 color = "black",
                 position = position_dodge(0.7),
                 width = 0.7) +
    stat_summary(geom = "linerange",
                 fun.data = "mean_cl_boot",
                 position = position_dodge(0.7),
                 size = 1) +
    geom_point(data = data %>% 
                 mutate(value = ifelse(model == "rating", value, NA)),
               position = position_jitterdodge(dodge.width = 0.7, jitter.width = 0.2),
               shape = 21,
               color = "black",
               size = 1,
               alpha = 0.2) +
    scale_fill_manual(values = c("red2", "green2", "black")) +
    facet_grid(index_actual ~ index_counterfactual) +
    geom_text(data = text %>%
                drop_na(label),
              mapping = aes(x = x, y = y, label = as.character(label)),
              size = 6,
              position = position_dodge(width = .7),
              hjust = 0.5) +
    coord_cartesian(xlim = c(0.5, 2.5),
                    ylim = c(-5, 105),
                    expand = F,
                    clip = "off") +
    labs(y = ylabel) +
    theme(text = element_text(size = 20),
          legend.position = "none",
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.y = element_text(margin = margin(0, 10, 0, 0)),
          panel.spacing.y = unit(0.8, "cm"),
          plot.margin = unit(c(0.2, 0.2, .8, .2), "cm"),
          axis.title.x = element_blank(),
          panel.grid = element_blank(),
          strip.background = element_blank(),
          panel.border = element_rect(colour = "black", fill = NA))
  # print(p)
}

actual_counterfactual_threeballs_plot = function(x, ylabel) {
  p = ggplot(data = x, 
              mapping = aes(x = ball,
                            y = value,
                            group = model,
                            fill = colorindex)) +
    stat_summary(fun = "mean",
                 geom = "bar",
                 color = "black",
                 position = position_dodge(0.9),
                 width = 0.9) +
    stat_summary(fun.data = "mean_cl_boot",
                 geom = "linerange",
                 color = "black",
                 position = position_dodge(0.9)) +
    geom_point(data = x %>% 
                 mutate(value = ifelse(model == "rating", value, NA)),
               position = position_jitterdodge(jitter.width = 0.3,
                                               jitter.height = 0,
                                               dodge.width = 0.9),
               shape = 21,
               color = "black",
               size = 1,
               alpha = 0.15) +
    facet_wrap(~clip, ncol = 8) +
    geom_text(data = df.text, 
              mapping = aes(x = x,
                            y = y,
                            label = as.character(label)),
              size = 5, 
              position = position_dodge(width = .9), 
              hjust = 0.5, 
              fontface = "bold") +
    coord_cartesian(xlim = c(0.5, 2.5), 
                    ylim = c(-5, 120), 
                    expand = F,
                    clip = "off") +
    scale_y_continuous(breaks = seq(0, 100, 25)) +
    scale_fill_manual(values = c("red2", "green2", "black")) +
    scale_color_manual(values = c("red2", "green2", "black")) +
    labs(y = ylabel) +
    theme(text = element_text(size = 16),
          legend.position = "none",
          axis.title.x = element_blank(),
          panel.spacing.y = unit(0.3, "cm"),
          panel.grid = element_blank(),
          strip.background = element_blank(),
          strip.text = element_blank(),
          panel.border = element_rect(colour = "black", fill = NA))
}

2 Preliminary study

2.1 Clips

2.2 Read in data

# causal judgments
df.study.causal = read.csv("../../data/study_causal.csv", 
                          header = T,
                          stringsAsFactors = F)

# counterfactual judgments
df.study.counterfactual = read.csv("../../data/study_counterfactual.csv",
                                  header = T,
                                  stringsAsFactors = F)

# Information about each clip 
df.study.clipinfo = tibble(clip = 1:18,
                          outcome_actual = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 
                                             1, 1, 0, 1, 1, 1, 1, 1, 1),
                          outcome_counterfactual = c(0, 0, 0, 1, 1, 1, 0, 0, 1, 
                                                     0, 1, 1, 0, 0, 1, 0, 1, 1),
                          index_actual = rep(c("actual miss", 
                                               "actual close", 
                                               "actual hit"), each = 6),
                          index_counterfactual = rep(rep(c("counterfactual miss",
                                                           "counterfactual close", 
                                                           "counterfactual hit"),
                                                         each = 2),
                                                     3)) %>% 
  mutate(index_actual = factor(index_actual, levels = c("actual miss", 
                                                        "actual close", 
                                                        "actual hit")),
         index_counterfactual = factor(index_counterfactual, 
                                       levels = c("counterfactual miss",
                                                  "counterfactual close", 
                                                  "counterfactual hit")))

# Structure data 
df.study.causal.long = df.study.causal$data %>%
  enframe() %>% 
  mutate(participant = 1:n()) %>% 
  separate_rows(value, sep = ",") %>% 
  mutate(name = rep(c("clip", "rating"), n()/2),
         order = rep(rep(1:18, each = 2), max(participant)),
         value = as.numeric(value)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  arrange(participant, clip, rating) %>% 
  left_join(df.study.clipinfo,
            by = "clip")

df.study.counterfactual.long = df.study.counterfactual$data %>%
  enframe() %>% 
  mutate(participant = 1:n()) %>% 
  separate_rows(value, sep = ",") %>% 
  mutate(name = rep(c("clip", "rating"), n()/2),
         order = rep(rep(1:18, each = 2), max(participant)),
         value = as.numeric(value)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  arrange(participant, clip, rating) %>% 
  left_join(df.study.clipinfo,
            by = "clip")

2.2.1 Demographics

# counterfactual condition 
df.study.counterfactual %>% 
  separate(extra,
           into = c("gender", "age", "difficulty", "smoothness", "time"),
           sep = ",") %>% 
  mutate(gender = factor(gender, levels = c(1, 2), labels = c("female", "male"))) %>% 
  select(gender:time) %>% 
  mutate_if(is.character, ~ as.numeric(.)) %>% 
  summarize(n = nrow(.),
            age_mean = mean(age) %>% round(0),
            age_sd = sd(age) %>% round(1),
            n_female = sum(gender == "female"),
            time_mean = mean(time) %>% round(2),
            time_sd = sd(time) %>% round(2)) %>% 
  print_table()
n age_mean age_sd n_female time_mean time_sd
41 32 8.8 19 7.5 3.12
# causal condition 
df.study.causal %>% 
  separate(extra,
           into = c("gender", "age", "difficulty", "smoothness", "time"),
           sep = ",") %>% 
  mutate(gender = factor(gender, levels = c(1, 2), labels = c("female", "male"))) %>% 
  select(gender:time) %>% 
  mutate_if(is.character, ~ as.numeric(.)) %>% 
  summarize(n = nrow(.),
            age_mean = mean(age) %>% round(0),
            age_sd = sd(age) %>% round(1),
            n_female = sum(gender == "female"),
            time_mean = mean(time) %>% round(2),
            time_sd = sd(time) %>% round(2)) %>% 
  print_table()
n age_mean age_sd n_female time_mean time_sd
41 33 10.3 21 9.65 6.58

2.3 Plots

2.3.1 Scatterplot

Relationship between counterfactual judgments and causal judgments.

set.seed(1)

df.plot = df.study.causal.long %>% 
  mutate(rating = abs(rating)) %>% 
  group_by(clip, outcome_actual, outcome_counterfactual,
           index_actual, index_counterfactual) %>% 
  summarize(rating = smean.cl.boot(rating)) %>% 
  mutate(index = c("cause_mean", "cause_low", "cause_high")) %>% 
  ungroup() %>% 
  pivot_wider(names_from = index,
              values_from = rating) %>% 
  left_join(df.study.counterfactual.long %>% 
              mutate(rating = ifelse(outcome_actual == 1, 100 - rating, rating)) %>% 
              group_by(clip, outcome_actual, outcome_counterfactual,
                       index_actual, index_counterfactual) %>% 
              summarize(rating = smean.cl.boot(rating)) %>% 
              mutate(index = c("counterfactual_mean", "counterfactual_low", "counterfactual_high")) %>% 
              pivot_wider(names_from = index,
                          values_from = rating),
            by = c("clip", "outcome_actual", "outcome_counterfactual", "index_actual", "index_counterfactual")) %>% 
  mutate(outcome_actual = as.factor(outcome_actual))

ggplot(data = df.plot,
       mapping = aes(x = counterfactual_mean, 
                     y = cause_mean)) +
  geom_abline(intercept = 0,
              slope = 1,
              linetype = 2) + 
  # geom_smooth(aes(y = estimate,
  #                 ymin = q2_5,
  #                 ymax = q97_5),
  #             stat = "identity",
  #             color = "black") +
  geom_linerange(size = 0.75, 
                 mapping = aes(ymin = cause_low,
                               ymax = cause_high),
                 color = "gray80") +
  geom_linerange(size = 0.75, 
                 mapping = aes(xmin = counterfactual_low,
                               xmax = counterfactual_high),
                 color = "gray80") +
  geom_text_repel(mapping = aes(label = clip,
                                color = outcome_actual), 
                  size = 6,
                  direction = "both",
                  show.legend = F,
                  seed = 1,
                  box.padding = 0.27
                  ) + 
  geom_point(size = 3,
             shape = 21, 
             stroke = 1,
             color = "black",
             mapping = aes(fill = outcome_actual),
             show.legend = F) +
  annotate(geom = "text",
           label = str_c(
             "r = ", cor(df.plot$cause_mean, df.plot$counterfactual_mean) %>%
               round(2) %>%
               as.character() %>%
               str_sub(start = 2),
             "\nRMSE = ", rmse(df.plot$cause_mean, df.plot$counterfactual_mean) %>%
               round(2)),
           x = -2,
           y = 95,
           size = 6,
           hjust = 0) +
  coord_fixed(xlim = c(0, 100),
              ylim = c(0, 100)) +
  labs(x = "counterfactual judgment",
       y = "causal judgment") +
  scale_color_manual(values = c("darkred", "darkgreen")) + 
  scale_fill_manual(values = c("red", "green")) + 
  scale_x_continuous(breaks = seq(0, 100, 25)) +
  scale_y_continuous(breaks = seq(0, 100, 25)) +
  theme(text = element_text(size = 20))

# ggsave("../../figures/plots/study_scatter.pdf",
#        width = 5,
#        height = 5)

3 Experiment 1: One candidate cause

3.1 Clips

3.2 Read in data

df.exp1.causal_first = read.csv("../../data/experiment1_causal_first.csv",
                                header = T,
                                stringsAsFactors = F)
df.exp1.counterfactual_first = read.csv("../../data/experiment1_counterfactual_first.csv",
                                        header = T,
                                        stringsAsFactors = F)

# Information about each clip

df.exp1.clipinfo = tibble(clip = 1:20,
                          outcome_actual = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 
                                             0, 1, 1, 1, 1, 1, 1, 1, 0, 1),
                          outcome_counterfactual = c(0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 
                                                     1, 1, 0, 0, 1, 0, 1, 1, 1, 1),
                          index_actual = c(rep(c("actual miss", 
                                                 "actual close",
                                                 "actual hit"),
                                               each = 6),
                                           rep("practice", 2)),
                          index_counterfactual = c(rep(rep(c("counterfactual miss",
                                                             "counterfactual close",
                                                             "counterfactual hit"),
                                                           each = 2), 
                                                       3),
                                                   rep("practice", 2))) %>% 
  mutate(index_actual = factor(index_actual, levels = c("actual miss", 
                                                        "actual close", 
                                                        "actual hit")),
         index_counterfactual = factor(index_counterfactual, 
                                       levels = c("counterfactual miss",
                                                  "counterfactual close", 
                                                  "counterfactual hit")))

# Structure data 
df.exp1.causal_first.long = df.exp1.causal_first$data %>% 
  enframe() %>% 
  mutate(participant = 1:n()) %>% 
  separate_rows(value, sep = ",") %>% 
  mutate(name = rep(c("clip", "rating"), n()/2),
         order = rep(rep(1:40, each = 2), max(participant)),
         value = as.numeric(value)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  mutate(question = rep(rep(c("causal", "counterfactual"), each = 20),
                        max(participant)),
         condition = "causal_first") %>% 
  left_join(df.exp1.clipinfo,
            by = "clip") %>%
  arrange(participant, question, clip) %>%
  select(participant, question, clip, rating, everything())

df.exp1.counterfactual_first.long = df.exp1.counterfactual_first$data %>% 
  enframe() %>% 
  mutate(participant = 1:n()) %>% 
  separate_rows(value, sep = ",") %>% 
  mutate(name = rep(c("clip", "rating"), n()/2),
         order = rep(rep(1:40, each = 2), max(participant)),
         value = as.numeric(value)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  mutate(question = rep(rep(c("counterfactual", "causal"), each = 20),
                        max(participant)),
         condition = "counterfactual_first") %>% 
  left_join(df.exp1.clipinfo,
            by = "clip") %>%
  arrange(participant, question, clip) %>%
  select(participant, question, clip, rating, everything())

# combine data from both conditions
df.exp1.combined.long = df.exp1.causal_first.long %>%
  bind_rows(df.exp1.counterfactual_first.long %>% 
              mutate(participant = participant + 
                       max(df.exp1.causal_first.long$participant)))

3.2.1 Demographics

df.exp1.counterfactual_first %>% 
  separate(extra,
           into = c("gender", "age", "difficulty", "smoothness", "time", "condition"),
           sep = ",") %>% 
  select(gender:time) %>% 
  bind_rows(df.exp1.causal_first %>% 
  separate(extra,
           into = c("gender", "age", "difficulty", "smoothness", "time", "condition"),
           sep = ",") %>% 
  select(gender:time)) %>% 
  mutate(gender = factor(gender, levels = c(1, 2), labels = c("female", "male"))) %>% 
  mutate_if(is.character, ~ as.numeric(.)) %>% 
  summarize(n = nrow(.),
            age_mean = mean(age) %>% round(0),
            age_sd = sd(age) %>% round(1),
            n_female = sum(gender == "female"),
            time_mean = mean(time) %>% round(2),
            time_sd = sd(time) %>% round(2)) %>% 
  print_table()
n age_mean age_sd n_female time_mean time_sd
82 35 12.2 45 21.25 5.11

3.2.2 Participants’ feedback

df.exp1.counterfactual_first %>% 
  select(feedback) %>% 
  bind_rows(df.exp1.causal_first %>% 
              select(feedback)) %>% 
  mutate(participant = 1:n()) %>% 
  relocate(participant) %>% 
  datatable()

3.3 Read in model predictions

3.3.1 Counterfactual condition

# read model predictions 
path = "../python/results/"
files = list.files(path = path, pattern = "*.csv")
files = files[str_detect(files, "teleport")]

df.exp1.model = files %>%
  map_dfr(~ read.csv(str_c(path, .))) %>%
  set_names(c("clip", "noise", "prediction")) %>%
  left_join(df.exp1.clipinfo, by = "clip") %>%
  mutate(prediction = ifelse(outcome_actual == 1, 1 - prediction, prediction))

# calculate mean counterfactual judgments 
df.exp1.counterfactual.means = df.exp1.combined.long %>%
  filter(question == "counterfactual",
         clip <= 18) %>%
  # mutate(rating = abs(rating)) %>% 
  group_by(clip) %>%
  summarize(rating = mean(rating)) %>%
  ungroup() %>%
  left_join(df.exp1.clipinfo, by = "clip")

# find noisy simulation model that best predicts the mean counterfactual judgments by 
# minimizing the sum of squared errors 
df.exp1.counterfactual.model = df.exp1.model %>% 
  group_by(noise) %>% 
  nest() %>% 
  mutate(sse = map(data, 
                   ~ sum((.x$prediction*100 - df.exp1.counterfactual.means$rating) ^ 2)),
         sse = unlist(sse)) %>% 
  ungroup() %>% 
  filter(sse == min(sse)) %>% 
  unnest(data) %>% 
  mutate(prediction = prediction * 100)

3.3.2 Causal condition

# Model predictions based on counterfactual judgments
df.exp1.causal.model = df.exp1.combined.long %>%
  filter(question == "counterfactual",
         clip <= 18) %>%
  group_by(clip, condition) %>%
  summarize(rating = mean(rating)) %>%
  pivot_wider(names_from = condition,
              values_from = rating) %>% 
  left_join(df.exp1.combined.long %>%
              filter(question == "counterfactual", clip <= 18) %>%
              group_by(clip) %>%
              summarize(combined = mean(rating)),
            by = "clip") %>% 
  left_join(df.exp1.counterfactual.model,
            by = "clip") %>% 
  select(-c(sse, noise)) %>% 
  rename(simulation = prediction) %>% 
  mutate(across(c(simulation, causal_first, counterfactual_first, combined), ~ ifelse(outcome_actual == 1, 100 - ., .)))

3.4 Counterfactual condition

3.4.1 Plots

set.seed(1)

df.plot = df.exp1.combined.long %>%
  filter(question == "counterfactual",
         clip <= 18) %>%
  mutate(clipindex = rep(c(1, 2), nrow(.) / 2)) %>%
  left_join(df.exp1.counterfactual.model,
            by = c("clip", 
                   "outcome_actual", 
                   "outcome_counterfactual", 
                   "index_actual", 
                   "index_counterfactual")) %>%
  pivot_longer(cols = c(rating, prediction),
               names_to = "model",
               values_to = "value") %>% 
  mutate(colorindex = ifelse(outcome_counterfactual == 1, 2, 1),
         colorindex = ifelse(model != "rating", 3, colorindex),
         colorindex = as.factor(colorindex),
         index_actual = factor(index_actual,
                               levels = c("actual miss", 
                                          "actual close", 
                                          "actual hit")),
         index_counterfactual = factor(index_counterfactual, 
                                       levels = c("counterfactual miss", 
                                                  "counterfactual close", 
                                                  "counterfactual hit")),
         model = factor(model,
                             levels = c("rating", "prediction")))

df.text = df.plot %>%
  expand(index_actual, index_counterfactual, clipindex, model) %>%
  mutate(label = rep(c("1 (A)", "2 (B)", "3", "4", "5 (A)", "6 (B)",
                       "7", "8 (C)", "9", "10", "11", "12 (C)",
                       "13 (D)", "14 (E)", "15", "16", "17 (D)", "18 (E)"),
                     each = 2),
         label = ifelse(model != "rating", NA, label),
         y = -15,
         x = clipindex,
         colorindex = NA)

p = actual_counterfactual_plot(df.plot, df.text, "counterfactual judgment")
print(p)

# ggsave("../../figures/plots/exp1_counterfactual_bars.pdf",
#        width = 8,
#        height = 6)

3.4.2 Stats

3.4.2.1 Check for potential order effects

# full model that takes condition into account 
fit.brm_exp1_counterfactual_full = brm(formula = rating ~ 1 + condition * 
                                         (index_actual + index_counterfactual) + 
                                         (1 + index_actual + 
                                            index_counterfactual | participant),
                                       data = df.exp1.combined.long %>% 
                                         filter(question == "counterfactual",
                                                clip <= 18),
                                       cores = 4,
                                       chains = 4,
                                       iter = 4000,
                                       seed = 1,
                                       file = "cache/brm_exp1_counterfactual_full")

# reduced model that ignores condition (i.e. the block order)
fit.brm_exp1_counterfactual_reduced = brm(formula = rating ~ 1 +  
                                            index_actual + index_counterfactual + 
                                            (1 + index_actual + 
                                               index_counterfactual | participant),
                                          data = df.exp1.combined.long %>% 
                                            filter(question == "counterfactual",
                                                   clip <= 18),
                                          cores = 4,
                                          chains = 4,
                                          iter = 4000,
                                          seed = 1,
                                          file = "cache/brm_exp1_counterfactual_reduced") 

# add loo 
fit.brm_exp1_counterfactual_reduced = add_criterion(fit.brm_exp1_counterfactual_reduced,
                                                    criterion = c("loo"),
                                                    reloo = T,
                                                    file = "cache/brm_exp1_counterfactual_reduced")

fit.brm_exp1_counterfactual_full = add_criterion(fit.brm_exp1_counterfactual_full,
                                                 criterion = c("loo"),
                                                 reloo = T,
                                                 file = "cache/brm_exp1_counterfactual_full")

# model comparison 
loo_compare(fit.brm_exp1_counterfactual_full, fit.brm_exp1_counterfactual_reduced)
                                    elpd_diff se_diff
fit.brm_exp1_counterfactual_reduced  0.0       0.0   
fit.brm_exp1_counterfactual_full    -3.5       0.7   

The reduced model fares better in the approximate cross-validation suggesting that there was no effect of condition (block order) on counterfactual judgments.

3.4.2.2 Model evaluation

df.exp1.counterfactual.means %>% 
  left_join(df.exp1.counterfactual.model,
            by = c("clip",
                   "outcome_actual",
                   "outcome_counterfactual",
                   "index_actual",
                   "index_counterfactual")) %>% 
  summarize(noise = unique(noise),
            simulation_r = cor(rating, prediction),
            simulation_rmse = rmse(rating, prediction),
            deterministic_r = cor(rating, outcome_counterfactual*100),
            deterministic_rmse = rmse(rating, outcome_counterfactual*100)) %>% 
  print_table()
noise simulation_r simulation_rmse deterministic_r deterministic_rmse
0.9 0.96 13.46 0.86 28.15

3.5 Causal condition

3.5.1 Plots

set.seed(1)

func_exp1_causal_plot = function(condition.name, model.name){

df.plot = df.exp1.combined.long %>%
  filter(question == "causal", clip <= 18) %>%
  filter(condition %in% condition.name) %>%
  mutate(rating = abs(rating),
         clipindex = rep(c(1, 2), nrow(.) / 2)) %>%
  left_join(df.exp1.causal.model,
            by = c("clip", 
                   "outcome_actual", 
                   "outcome_counterfactual", 
                   "index_actual", 
                   "index_counterfactual")) %>%
  pivot_longer(cols = c("rating", all_of(model.name)),
               names_to = "model",
               values_to = "value") %>% 
  mutate(model = factor(model, levels = c("rating", model.name))) %>%
  mutate(colorindex = ifelse(outcome_actual == 1, 2, 1),
         colorindex = ifelse(model != "rating", 3, colorindex),
         colorindex = as.factor(colorindex),
         index_actual = factor(index_actual, 
                               levels = c("actual miss", 
                                          "actual close", 
                                          "actual hit")),
         index_counterfactual = factor(index_counterfactual, 
                                       levels = c("counterfactual miss", 
                                                  "counterfactual close", 
                                                  "counterfactual hit")))

df.text = df.plot %>%
  expand(index_actual, index_counterfactual, clipindex, model) %>%
  mutate(label = rep(c("1 (A)", "2 (B)", "3", "4", "5 (A)", "6 (B)",
                       "7", "8 (C)", "9", "10", "11", "12 (C)",
                       "13 (D)", "14 (E)", "15", "16", "17 (D)", "18 (E)"),
                     each = 2),
         label = ifelse(model != "rating", NA, label),
         y = -15,
         x = clipindex,
         colorindex = NA)

actual_counterfactual_plot(df.plot, df.text, "causal judgment")
}

plot.list = list(func_exp1_causal_plot(condition.name = "counterfactual_first",
                                       model.name = "counterfactual_first"),
                 func_exp1_causal_plot(condition.name = "causal_first",
                                       model.name = "causal_first"))

wrap_plots(plot.list, ncol = 2)

# condition.name = "counterfactual_first"
# condition.name = "causal_first"
# model.name = "counterfactual_first"
# model.name = "causal_first"
# model.name = "combined"

# func_exp1_causal_plot(condition.name = condition.name,
#                       model.name = model.name)
# 
# ggsave(str_c("../../figures/plots/exp1_", condition.name ,"_causal_bars.pdf"),
#        width = 8,
#        height = 6)
Left panel: Counterfactual judgments first; right panel: Causal judgments first

Figure 3.1: Left panel: Counterfactual judgments first; right panel: Causal judgments first

3.5.2 Stats

3.5.2.1 Check for potential order effects

# full model that takes condition into account 
fit.brm_exp1_causal_full = brm(formula = rating ~ 1 + condition * 
                                 (index_actual + index_counterfactual) +
                                 (1 + index_actual + index_counterfactual | participant),
                               data = df.exp1.combined.long %>% 
                                 filter(question == "causal",
                                        clip <= 18),
                               cores = 4,
                               chains = 4,
                               iter = 4000,
                               seed = 1,
                               file = "cache/brm_exp1_causal_full")

# reduced model that ignores condition (i.e. the block order)
fit.brm_exp1_causal_reduced = brm(formula = rating ~ 1 + 
                                    index_actual + index_counterfactual +
                                    (1 + index_actual + index_counterfactual | participant),
                                  data = df.exp1.combined.long %>% 
                                    filter(question == "causal",
                                           clip <= 18),
                                  cores = 4,
                                  chains = 4,
                                  iter = 4000,
                                  seed = 1,
                                  file = "cache/brm_exp1_causal_reduced") 

# add loo 
fit.brm_exp1_causal_reduced = add_criterion(fit.brm_exp1_causal_reduced,
                                            criterion = c("loo"),
                                            reloo = T,
                                            file = "cache/brm_exp1_causal_reduced")

fit.brm_exp1_causal_full = add_criterion(fit.brm_exp1_causal_full,
                                         criterion = c("loo"),
                                         reloo = T,
                                         file = "cache/brm_exp1_causal_full")

# model comparison 
loo_compare(fit.brm_exp1_causal_full, fit.brm_exp1_causal_reduced)
                            elpd_diff se_diff
fit.brm_exp1_causal_full     0.0       0.0   
fit.brm_exp1_causal_reduced -3.2       3.8   

The full model fares better in the approximate cross-validation suggesting that condition (block order) affected participants’ causal judgments.

3.5.2.2 Model evaluation

df.data = df.exp1.combined.long %>%
  filter(question == "causal",
         clip <= 18) %>%
  mutate(rating = abs(rating)) %>%
  group_by(condition,
           clip,
           outcome_actual,
           outcome_counterfactual,
           index_actual,
           index_counterfactual) %>%
  summarize(rating = mean(rating)) %>%
  ungroup() %>%
  left_join(df.exp1.causal.model,
            by = c("clip",
                   "outcome_actual",
                   "outcome_counterfactual",
                   "index_actual",
                   "index_counterfactual"))

df.data %>% 
  group_by(condition) %>% 
  summarize(empirical_r = cor(rating, combined),
            empirical_rmse = rmse(rating, combined)) %>% 
  print_table()
condition empirical_r empirical_rmse
causal_first 0.92 13.78
counterfactual_first 0.99 5.27
df.data %>%
  summarize(empirical_r = cor(rating, combined),
            empirical_rmse = rmse(rating, combined),
            simulation_r = cor(rating, simulation),
            simulation_rmse = rmse(rating, simulation)) %>%
  print_table()
empirical_r empirical_rmse simulation_r simulation_rmse
0.95 10.43 0.92 19.09

4 Experiment 2: Two candidate causes

4.1 Clips

4.2 Read in data

# clipinfo
df.exp2.clipinfo = tibble(clip = 1:32,
                          outcome_both = c(0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 
                                           0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1),
                          outcome_a = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 
                                        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1),
                          outcome_b = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 
                                        0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1),
                          outcome_none = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
                          clipindex = rep(1:2, 16))

# COUNTERFACTUAL JUDGMENTS 
df.exp2.counterfactual = read.csv("../../data/experiment2_counterfactual.csv",
                                  header = T,
                                  stringsAsFactors = F)

# demographics
df.exp2.counterfactual.demographics = df.exp2.counterfactual$extra %>% 
  enframe() %>% 
  separate_rows(value) %>% 
  mutate(value = as.numeric(value),
         name = rep(c("gender", "age", "difficulty",
                      "smoothness", "time", "condition"),
                    n()/6),
         participant = rep(1:(n()/6), each = 6)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  mutate(condition = factor(condition, levels = 20:21, labels = c("A", "B")),
         gender = factor(gender, levels = 1:2, labels = c("female", "male"))) %>% 
  select(participant, condition, gender, age, time, smoothness, difficulty)

# judgments
df.exp2.counterfactual.long = df.exp2.counterfactual$data %>%
  enframe() %>% 
  separate_rows(value) %>% 
  mutate(value = as.numeric(value),
         name = rep(c("clip", "rating"), n()/2),
         participant = rep(1:(n()/64), each = 64),
         order = rep(rep(1:32, each = 2), n()/64)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  left_join(df.exp2.counterfactual.demographics %>% 
              select(participant, ball = condition),
            by = "participant") %>% 
  select(participant, ball, clip, everything()) %>% 
  arrange(participant, clip)

# CAUSAL JUDGMENTS
df.exp2.causal = read.csv("../../data/experiment2_causal.csv",
                          header = T,
                          stringsAsFactors = F)

# demographics 
df.exp2.causal.demographics = df.exp2.causal$extra %>% 
  enframe() %>% 
  separate_rows(value) %>% 
  mutate(value = as.numeric(value),
         name = rep(c("gender", "age", "difficulty", "smoothness", "time", 
                      "counterbalance", "replayType", "experiment"),
                    n()/8),
         participant = rep(1:(n()/8), each = 8)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  mutate(counterbalance = ifelse(participant %in% c(1, 3, 5, 6, 9), 1, counterbalance),
         gender = factor(gender, levels = 1:2, labels = c("female", "male"))) %>% 
  select(participant, gender, age, counterbalance, time, smoothness, difficulty)

# judgments
df.exp2.causal.long = df.exp2.causal$data %>%
  enframe() %>% 
  separate_rows(value) %>% 
  mutate(value = as.numeric(value),
         name = rep(c("clip", "x", "y", "z", "rating1", "rating2"), n()/6),
         participant = rep(1:(n()/(32*6)), each = (32*6)),
         order = rep(rep(1:32, each = 6), n()/(32*6))) %>% 
  filter(!name %in% c("x", "y", "z")) %>% 
pivot_wider(names_from = name,
            values_from = value) %>% 
  left_join(df.exp2.causal.demographics %>% 
              select(participant, counterbalance),
            by = "participant") %>% 
  mutate(A = ifelse(counterbalance == 1, rating1, rating2),
         B = ifelse(counterbalance == 1, rating2, rating1)) %>% 
  select(-rating1, -rating2) %>% 
  pivot_longer(cols = c(A, B),
               names_to = "ball",
               values_to = "rating") %>% 
  select(participant, clip, ball, rating, order) %>% 
  arrange(participant, clip, ball)

4.2.1 Demographics

# counterfactual condition 
df.exp2.counterfactual.demographics %>% 
  summarize(n = nrow(.),
            age_mean = mean(age) %>% round(0),
            age_sd = sd(age) %>% round(1),
            n_female = sum(gender == "female"),
            time_mean = mean(time) %>% round(2),
            time_sd = sd(time) %>% round(2)) %>% 
  print_table()
n age_mean age_sd n_female time_mean time_sd
80 33 10.1 34 18.08 4.63
# causal condition 
df.exp2.causal.demographics %>% 
  summarize(n = nrow(.),
            age_mean = mean(age) %>% round(0),
            age_sd = sd(age) %>% round(1),
            n_female = sum(gender == "female"),
            time_mean = mean(time) %>% round(2),
            time_sd = sd(time) %>% round(2)) %>% 
  print_table()
n age_mean age_sd n_female time_mean time_sd
41 34 10.5 21 21.19 4.96

4.2.2 Participants’ feedback

df.exp2.counterfactual %>%
  select(feedback) %>%
  mutate(condition = "counterfactual",
         participant = 1:n()) %>% 
  bind_rows(df.exp2.causal %>%
              select(feedback) %>% 
              mutate(condition = "causal",
                     participant = 1:n())) %>%
  relocate(participant, condition) %>%
  datatable()

4.3 Read in model predictions

# read model predictions 
path = "../python/results/"
files = list.files(path = path, pattern = "*.csv")
files = files[str_detect(files, "3ball")]

df.exp2.model = files %>%
  map_dfr(~ read_csv(str_c(path, .))) %>% 
  rename(clip = trial)

4.3.1 Counterfactual condition

# calculate mean counterfactual judgments 
df.exp2.counterfactual.means = df.exp2.counterfactual.long %>% 
  group_by(clip, ball) %>%
  summarize(rating_mean = mean(rating),
            rating_low = smean.cl.boot(rating)[2],
            rating_high = smean.cl.boot(rating)[3]) %>% 
  ungroup()

# find noisy simulation model that best predicts the mean counterfactual 
# judgments by minimizing the sum of squared errors 
df.exp2.model.counterfactual = df.exp2.model %>% 
  group_by(noise) %>% 
  select(clip, contains("whether"), noise) %>% 
  pivot_longer(cols = c(A_whether, B_whether),
               names_to = "ball",
               values_to = "prediction") %>% 
  mutate(ball = str_remove(ball, "_whether")) %>% 
  arrange(noise, clip, ball) %>% 
  left_join(df.exp2.clipinfo, by = "clip") %>% 
  mutate(prediction = ifelse(outcome_both == 1, 1 - prediction, prediction)) %>% 
  group_by(noise) %>% 
  nest() %>% 
  mutate(sse = map(data,
                   ~ sum((.x$prediction*100 -
                            df.exp2.counterfactual.means$rating_mean) ^ 2)),
         sse = unlist(sse)) %>% 
  ungroup() %>% 
  filter(sse == min(sse)) %>% 
  unnest(data) %>% 
  mutate(prediction = prediction * 100)

4.3.2 Causal condition

# calculate mean causal judgments 
df.exp2.causal.means = df.exp2.causal.long %>% 
  group_by(clip, ball) %>%
  summarize(rating_mean = mean(rating),
            rating_low = smean.cl.boot(rating)[2],
            rating_high = smean.cl.boot(rating)[3]) %>% 
  ungroup()

df.exp2.model.causal = df.exp2.model %>% 
  # take into account difference-making 
  mutate(across(A_how:A_robust,
                ~ . * A_difference),
         across(B_how:B_robust,
                ~ . * B_difference)) %>% 
  # choose model based on best fit with counterfactual data 
  filter(noise == unique(df.exp2.model.counterfactual$noise)) %>% 
  pivot_longer(cols = A_difference:B_robust,
               names_to = c("ball", "aspect"),
               names_sep = "_",
               values_to = "value") %>% 
  pivot_wider(names_from = aspect,
              values_from = value) %>% 
  select(clip, ball:robust)

4.3.3 Heuristic model

l.features = fromJSON("data/features.json")

df.features.initial = l.features[["appearance"]] %>%
  cbind(l.features[["initialVelocity"]][["ballA"]]) %>%
  cbind(l.features[["initialVelocity"]][["ballB"]]) %>%
  cbind(l.features[["initialVelocity"]][["ballE"]]) %>%
  setNames(c(str_c("ball", LETTERS[c(1, 2, 5)], "_appearance"),
             "ballA_velx",
             "ballA_vely",
             "ballB_velx",
             "ballB_vely",
             "ballE_velx",
             "ballE_vely")) %>%
  mutate(clip = 1:nrow(.)) %>%
  pivot_longer(cols = starts_with("ball")) %>%
  separate(name, into = c("ball", "property")) %>%
  pivot_wider(names_from = property, values_from = value) %>% 
  mutate(ball = str_remove_all(ball, pattern = "ball"))

# information about collisions
df.features.collisions = data.frame()
ballnames = c("ballA", "ballB", "ballE")

for (i in 1:32) {
  df.tmp = data.frame()
  for (j in 1:length(ballnames)) {
    ncollisions = length(l.features[["collisions"]][[ballnames[j]]][["object"]][[i]])
    if (ncollisions > 0) {
      tmp = data.frame(
        ball = ballnames[j],
        object = l.features[["collisions"]][[ballnames[j]]][["object"]][[i]],
        time = l.features[["collisions"]][[ballnames[j]]][["time"]][[i]],
        pre.velx = l.features[["collisions"]][[ballnames[j]]][["pre"]][[i]][["x"]],
        pre.vely = l.features[["collisions"]][[ballnames[j]]][["pre"]][[i]][["y"]],
        post.velx = l.features[["collisions"]][[ballnames[j]]][["post"]][[i]][["x"]],
        post.vely = l.features[["collisions"]][[ballnames[j]]][["post"]][[i]][["y"]],
        ncollision = 1:ncollisions
      )
      df.tmp = df.tmp %>%
        rbind(tmp)
    }
  }
  df.features.collisions = df.features.collisions %>%
    rbind(df.tmp %>%
            mutate(clip = i) %>%
            select(clip, ball, everything()))
}

# find end of clip
tmp = df.features.collisions %>%
  group_by(clip) %>%
  filter(ball == "ballE",
         str_detect(object, "goal|Left|Right")) %>%
  group_by(clip) %>%
  mutate(endclip = max(time)) %>%
  select(clip, endclip) %>%
  ungroup() %>%
  distinct()

# remove events that happen after the end of the clip
df.features.collisions = df.features.collisions %>%
  left_join(tmp, by = "clip") %>%
  mutate(endclip = ifelse(is.na(endclip), 400, endclip)) %>%
  group_by(clip) %>%
  filter(time <= endclip)

# E initially at rest or moving?
tmp.movingE = df.features.initial %>%
  filter(ball == "E") %>%
  mutate(E.moving = ifelse(velx == 0 & vely == 0, 1, 0)) %>%
  select(clip, E.moving)

# contact with ball E
tmp.contactE = df.features.collisions %>%
  filter(object == "ballE") %>%
  mutate(ball = factor(ball,
                       levels = c("ballA", "ballB"),
                       labels = c("A", "B"))) %>%
  mutate(contactE = 1) %>%
  select(clip, ball, contactE) %>%
  group_by(clip, ball) %>%
  summarize(contactE = any(contactE %>% as.logical()) * 1) %>%
  left_join(expand.grid(clip = 1:32, ball = c("A", "B")), .,
            by = c("clip", "ball")) %>%
  mutate(contactE = ifelse(is.na(contactE), 0, contactE))

# number of collisions
tmp.ncollisions = df.features.collisions %>%
  filter(str_detect(object, "ball"),
         ball != "ballE") %>%
  mutate(ball = factor(ball,
                       levels = c("ballA", "ballB"),
                       labels = c("A", "B"))) %>%
  group_by(clip, ball) %>%
  count() %>%
  rename(ncollision = n)

# exclusivity (no contact with ball E by the other ball)
tmp.exclusive = df.features.collisions %>%
  filter(str_detect(object, "ball"),
         ball != "ballE") %>%
  count(clip, ball, object) %>%
  mutate(AE = ifelse(ball == "ballA" & object == "ballE", 1, 0),
         BE = ifelse(ball == "ballB" & object == "ballE", 1, 0)) %>%
  group_by(clip) %>%
  summarize(AE = any(AE %>% as.logical()) * 1,
            BE = any(BE %>% as.logical()) * 1) %>%
  mutate(A = ifelse(AE == 1 & BE == 0, 1, 0),
         B = ifelse(AE == 0 & BE == 1, 1, 0)) %>%
  select(clip, A, B) %>%
  pivot_longer(cols = -clip,
               names_to = "ball",
               values_to = "exclusive")

# impact
func_angle = function(x, y) {
  dot.prod = x %*% y
  norm.x = norm(x, type = "2")
  norm.y = norm(y, type = "2")
  theta = acos(dot.prod / (norm.x * norm.y))
  as.numeric(theta)
}

# impact on ball E 
tmp.impactE = df.features.collisions %>%
  rowwise() %>%
  filter(str_detect(object, "ball"),
         ball == "ballE") %>%
  mutate(pre.speed = abs(pre.velx) + abs(pre.vely),
         post.speed = abs(post.velx) + abs(post.vely),
         speed.diff = post.speed - pre.speed,
         direction.diff = func_angle(c(pre.velx, pre.vely),
                                     c(post.velx, post.vely)),
         direction.diff = ifelse(is.na(direction.diff),
                                 abs(atan2(post.vely - pre.vely,
                                           post.velx - pre.velx)),
                                 direction.diff)) %>%
  ungroup() %>%
  group_by(clip, object) %>%
  summarize(speed.diff = sum(speed.diff),
            direction.diff = sum(direction.diff)) %>%
  rename(ball = object) %>%
  mutate(ball = factor(ball,
                       levels = c("ballA", "ballB"),
                       labels = c("A", "B"))) %>%
  left_join(expand.grid(clip = 1:32, ball = c("A", "B")), .,
            by = c("clip", "ball")) %>%
  mutate(across(c(speed.diff, direction.diff),
                   ~ ifelse(is.na(.), 0, .))) %>%
  arrange(clip, ball) %>%
  rename(E.speed.diff = speed.diff,
         E.direction.diff = direction.diff)

# total impact 
tmp.impactTotal = df.features.collisions %>%
  rowwise() %>%
  filter(str_detect(object, "ballA|ballB")) %>%
  mutate(pre.speed = abs(pre.velx) + abs(pre.vely),
         post.speed = abs(post.velx) + abs(post.vely),
         speed.diff = post.speed - pre.speed,
         direction.diff = func_angle(c(pre.velx, pre.vely),
                                     c(post.velx, post.vely)),
         direction.diff = ifelse(is.na(direction.diff),
                                 abs(atan2(post.vely - pre.vely,
                                           post.velx - pre.velx)),
                                 direction.diff)) %>%
  ungroup() %>%
  group_by(clip, object) %>%
  summarize(speed.diff = sum(speed.diff),
            direction.diff = sum(direction.diff)) %>%
  rename(ball = object) %>%
  mutate(ball = factor(ball,
                       levels = c("ballA", "ballB"),
                       labels = c("A", "B"))) %>%
  left_join(expand.grid(clip = 1:32, ball = c("A", "B")), .,
            by = c("clip", "ball")) %>%
  mutate(across(c(speed.diff, direction.diff),
                ~ ifelse(is.na(.), 0, .))) %>%
  arrange(clip, ball) %>%
  rename(total.speed.diff = speed.diff,
         total.direction.diff = direction.diff)

# transfer of force 
tmp.transfer = df.features.collisions %>%
  filter(str_detect(ball, "ballA|ballB"),
         str_detect(object, "ball")) %>%
  mutate(AE = ifelse(ball == "ballA" & object == "ballE", 1, NA),
         BE = ifelse(ball == "ballB" & object == "ballE", 1, NA),
         AB = ifelse((ball == "ballA" & object == "ballB"), 1, NA)) %>%
  mutate(across(c(AE, BE, AB), ~ . * time)) %>%
  filter(!is.na(AE) | !is.na(BE) | !is.na(AB)) %>%
  arrange(clip, time) %>%
  group_by(clip) %>%
  summarize(A = any(!is.na(AE)),
            B = any(!is.na(BE)),
            A = ifelse(any(!is.na(AB)) &
                         any(!is.na(BE)) &
                         max(AB, na.rm = T) < min(BE, na.rm = T),
                       T,
                       A),
            B = ifelse(any(!is.na(AB)) &
                         any(!is.na(AE)) &
                         max(AB, na.rm = T) < min(AE, na.rm = T),
                       T,
                       B)) %>%
  pivot_longer(cols = -clip,
               names_to = "ball",
               values_to = "transfer") %>% 
  arrange(clip, ball)

# collect features
df.features = df.features.initial %>%
  filter(ball != "E") %>%
  mutate(ball = factor(ball)) %>%
  mutate(moving = ifelse(velx == 0 & vely == 0, 0, 1),
         speed = abs(velx) + abs(vely)) %>%
  left_join(tmp.contactE) %>%
  left_join(tmp.impactE) %>%
  left_join(tmp.impactTotal) %>%
  left_join(tmp.transfer %>% 
              mutate(transfer = transfer * 1)) %>%
  left_join(tmp.movingE) %>%
  left_join(tmp.exclusive) %>%
  select(-c(appearance, velx, vely))

# remove temporary variables
rm(list = ls()[str_detect(ls(), "tmp")])

4.4 Counterfactual condition

4.4.1 Stats

4.4.1.1 Model evaluation

# best fitting model 
df.exp2.counterfactual.means %>% 
  left_join(df.exp2.model.counterfactual,
            by = c("clip", "ball")) %>% 
  summarize(noise = unique(noise),
            simulation_r = cor(rating_mean, prediction),
            simulation_rmse = rmse(rating_mean, prediction)) %>% 
  print_table()
noise simulation_r simulation_rmse
2 0.86 19.84
# deterministic model 
df.exp2.counterfactual.means %>% 
  left_join(df.exp2.clipinfo %>% 
              select(clip, outcome_a, outcome_b) %>% 
              pivot_longer(cols = -clip,
                           names_to = "ball",
                           values_to = "outcome") %>% 
              mutate(ball = factor(ball,
                                   levels = c("outcome_a", "outcome_b"),
                                   labels = c("B", "A"))),
            by = c("clip", "ball")) %>% 
  mutate(outcome = outcome * 100) %>% 
  summarize(simulation_r = cor(rating_mean, outcome),
            simulation_rmse = rmse(rating_mean, outcome)) %>% 
  print_table()
simulation_r simulation_rmse
0.83 30.28

4.4.2 Plots

4.4.2.1 Bar graph

set.seed(1)

df.plot = df.exp2.counterfactual.long %>%
  left_join(df.exp2.model.counterfactual %>%
              select(clip, ball, prediction) %>%
              mutate(ball = as.factor(ball)),
            by = c("clip", "ball")) %>%
  left_join(df.exp2.clipinfo,
            by = "clip") %>% 
  pivot_longer(cols = c(rating, prediction),
               names_to = "model",
               values_to = "value") %>% 
  mutate(colorindex = 1,
         colorindex = ifelse(model == "rating" & ball == "A" & outcome_b == 1,
                             2,
                             colorindex),
         colorindex = ifelse(model == "rating" & ball == "B" & outcome_a == 1,
                             2,
                             colorindex),
         colorindex = ifelse(model == "prediction", 3, colorindex),
         colorindex = as.factor(colorindex),
         model = factor(model, levels = c("rating", "prediction"))) %>%
  arrange(participant, clip, ball)

df.text = df.plot %>%
  expand(ball, clip, model) %>%
  mutate(label = ifelse(model == "rating" & ball == "A", clip, NA),
         y = 110,
         x = 1.5,
         colorindex = NA)

p = actual_counterfactual_threeballs_plot(df.plot, "counterfactual judgment")
print(p)

# ggsave("../../figures/plots/exp2_counterfactual_bars.pdf",
#        width = 12,
#        height = 6)

4.5 Causal condition

4.5.1 Stats

4.5.1.1 Bayesian mixed effects model

df.data = df.exp2.causal.long %>% 
  left_join(df.exp2.model.causal,
            by = c("clip", "ball"))

fit.brm_exp2_causal_w = brm(formula = rating ~ 1 + whether + 
                              (1 + whether | participant),
                            data = df.data,
                            cores = 4,
                            chains = 4,
                            iter = 4000,
                            seed = 1,
                            file = "cache/brm_exp2_causal_w")

fit.brm_exp2_causal_wh = brm(formula = rating ~ 1 + whether + how +
                               (1 + whether + how | participant),
                             data = df.data,
                             cores = 4,
                             chains = 4,
                             iter = 4000,
                             seed = 1,
                             file = "cache/brm_exp2_causal_wh")

fit.brm_exp2_causal_whs = brm(formula = rating ~ 1 + whether + how + sufficient +
                                (1 + whether + how + sufficient | participant),
                              data = df.data,
                              cores = 4,
                              chains = 4,
                              iter = 4000,
                              seed = 1,
                              file = "cache/brm_exp2_causal_whs")

4.5.1.2 Model comparison

fit.brm_exp2_causal_w = add_criterion(fit.brm_exp2_causal_w,
                                      criterion = c("loo"),
                                      reloo = T,
                                      file = "cache/brm_exp2_causal_w")

fit.brm_exp2_causal_wh = add_criterion(fit.brm_exp2_causal_wh,
                                       criterion = c("loo"),
                                       reloo = T,
                                       file = "cache/brm_exp2_causal_wh")

fit.brm_exp2_causal_whs = add_criterion(fit.brm_exp2_causal_whs,
                                        criterion = c("loo"),
                                        reloo = T,
                                        file = "cache/brm_exp2_causal_whs")

loo_compare(fit.brm_exp2_causal_w,
            fit.brm_exp2_causal_wh,
            fit.brm_exp2_causal_whs)
                        elpd_diff se_diff
fit.brm_exp2_causal_whs    0.0       0.0 
fit.brm_exp2_causal_wh  -107.0      14.7 
fit.brm_exp2_causal_w   -383.5      30.1 

4.5.1.3 Heuristic model

# Regression based on features
df.regression.features = df.exp2.causal.long %>%
  left_join(df.exp2.clipinfo, by = "clip") %>% 
  left_join(df.features %>% 
              mutate(across(moving:exclusive, ~ scale(.)[,])),
            by = c("clip", "ball")) %>%
  clean_names() %>% 
  mutate(e_moving = 1 - e_moving)

# restrict the weights such that all predictors are positive
priors = c(set_prior("normal(0,10)", class = "b", lb = 0))

fit.brm_exp2_causal_heuristic = brm(formula = rating ~ moving + 
                                      speed + 
                                      contact_e + 
                                      e_speed_diff + 
                                      e_direction_diff + 
                                      total_speed_diff + 
                                      total_direction_diff + 
                                      transfer + 
                                      e_moving + 
                                      exclusive + (1 | participant),
                                    prior = priors,
                                    data = df.regression.features %>% 
                                      select(-c(clip, ball, order, clipindex,
                                                contains("outcome"))),
                                    cores = 4,
                                    chains = 4,
                                    iter = 4000,
                                    seed = 1,
                                    file = "cache/brm_exp2_causal_heuristic")
fit.brm_exp2_causal_heuristic
 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: rating ~ moving + speed + contact_e + e_speed_diff + e_direction_diff + total_speed_diff + total_direction_diff + transfer + e_moving + exclusive + (1 | participant) 
   Data: df.regression.features %>% select(-c(clip, ball, o (Number of observations: 2624) 
Samples: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
         total post-warmup samples = 8000

Group-Level Effects: 
~participant (Number of levels: 41) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     8.57      1.23     6.42    11.25 1.00     2557     4356

Population-Level Effects: 
                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
Intercept               49.73      1.48    46.87    52.70 1.00     2235
moving                   0.22      0.21     0.00     0.79 1.00     7506
speed                    2.08      0.84     0.45     3.73 1.00     3648
contact_e                0.38      0.36     0.01     1.35 1.00     7037
e_speed_diff             0.12      0.12     0.00     0.46 1.00     8369
e_direction_diff         1.06      0.73     0.06     2.76 1.00     5417
total_speed_diff         2.19      0.95     0.42     4.09 1.00     3860
total_direction_diff     3.99      0.90     2.21     5.69 1.00     5958
transfer                15.59      0.80    13.98    17.14 1.00     7961
e_moving                 0.18      0.18     0.00     0.65 1.00     8827
exclusive                4.38      0.71     3.00     5.79 1.00     9485
                     Tail_ESS
Intercept                3643
moving                   3506
speed                    2114
contact_e                4269
e_speed_diff             4016
e_direction_diff         4198
total_speed_diff         2781
total_direction_diff     4094
transfer                 5368
e_moving                 4555
exclusive                4992

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma    33.20      0.46    32.32    34.12 1.00    13320     5610

Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

4.5.1.4 Predictions for mean causal judgments

func_fit_data = function(fit){
  fit %>% 
    fitted(newdata = df.exp2.model.causal %>% 
             left_join(df.features %>% 
                         mutate(across(moving:exclusive,
                                       ~ scale(.)[,])),
                       by = c("clip", "ball")) %>%
             clean_names() %>% 
             mutate(e_moving = 1 - e_moving),
           re_formula = NA) %>% 
    as_tibble() %>% 
    pull(Estimate)
}

df.exp2.causal.regression = df.exp2.causal.means %>% 
  mutate(w = func_fit_data(fit.brm_exp2_causal_w),
         wh = func_fit_data(fit.brm_exp2_causal_wh),
         whs = func_fit_data(fit.brm_exp2_causal_whs),
         heuristic = func_fit_data(fit.brm_exp2_causal_heuristic))

4.5.1.5 Model evaluation

prediction = "whs"

df.exp2.causal.regression %>% 
  summarize(simulation_r = cor(rating_mean, .[[prediction]]),
            simulation_rmse = rmse(rating_mean, .[[prediction]])) %>% 
  print_table()
simulation_r simulation_rmse
0.87 13.06

4.5.1.6 Individual participant regressions

This code chunk takes a long time to compute but needs to be run once to generate the .RData file with individual data fits. The file was larger than 100mb so we couldn’t upload it to github.

df.fit = df.exp2.causal.long %>% 
  left_join(df.exp2.model.causal,
            by = c("clip", "ball"))

# priors 
priors = c(set_prior("normal(0,10)", class = "b", lb = 0))

# initial model fits (used for compilation)
fit.brm_exp2_causal_individual_baseline = 
  brm(formula = rating ~ 1,
      data = df.fit %>% 
        filter(participant == 1),
      cores = 4,
      chains = 4,
      iter = 4000,
      seed = 1,
      file = str_c("cache/brm_exp2_causal_individual_baseline"))

fit.brm_exp2_causal_individual_w = 
  brm(formula = rating ~ 1 + whether,
      data = df.fit %>% 
        filter(participant == 1),
      prior = priors,
      cores = 4,
      chains = 4,
      iter = 4000,
      seed = 1,
      file = str_c("cache/brm_exp2_causal_individual_w"))

fit.brm_exp2_causal_individual_wh = 
  brm(formula = rating ~ 1 + whether + how,
      data = df.fit %>% 
        filter(participant == 1),
      prior = priors,
      cores = 4,
      chains = 4,
      iter = 4000,
      seed = 1,
      file = str_c("cache/brm_exp2_causal_individual_wh"))

fit.brm_exp2_causal_individual_whs = 
  brm(formula = rating ~ 1 + whether + how + sufficient,
      data = df.fit %>% 
        filter(participant == 1),
      prior = priors,
      cores = 4,
      chains = 4,
      iter = 4000,
      seed = 1,
      file = str_c("cache/brm_exp2_causal_individual_whs"))

# update model fits for different participants
df.exp2.causal.individual_fit = df.fit %>%
  group_by(participant) %>%
  nest() %>%
  ungroup() %>% 
  # fit model for each participant
  mutate(fit_baseline = map(.x = data,
                            .f = ~ update(fit.brm_exp2_causal_individual_baseline,
                                          newdata = .x)),
         fit_w = map(.x = data,
                     .f = ~ update(fit.brm_exp2_causal_individual_w,
                                   newdata = .x)),
         fit_wh = map(.x = data,
                      .f = ~ update(fit.brm_exp2_causal_individual_wh,
                                    newdata = .x)),
         fit_whs = map(.x = data,
                       .f = ~ update(fit.brm_exp2_causal_individual_whs,
                                     newdata = .x))) %>% 
  mutate(fit_baseline = map(.x = fit_baseline,
                            ~ add_criterion(.x,
                                            criterion = c("loo"))),
         fit_w = map(.x = fit_w,
                     ~ add_criterion(.x,
                                     criterion = c("loo"))),
         fit_wh = map(.x = fit_wh,
                      ~ add_criterion(.x,
                                      criterion = c("loo"))),
         fit_whs = map(.x = fit_whs,
                       ~ add_criterion(.x,
                                       criterion = c("loo"))),
         r_w = map2_dbl(.x = data,
                        .y = fit_w,
                        .f = ~ cor(.x$rating, .y %>% 
                                     fitted() %>% 
                                     .[, 1])),
         r_wh = map2_dbl(.x = data,
                         .y = fit_wh,
                         .f = ~ cor(.x$rating, .y %>% 
                                      fitted() %>% 
                                      .[, 1])),
         r_whs = map2_dbl(.x = data,
                          .y = fit_whs,
                          .f = ~ cor(.x$rating, .y %>% 
                                       fitted() %>% 
                                       .[, 1])),
         rmse_w = map2_dbl(.x = data,
                           .y = fit_w,
                           .f = ~ rmse(.x$rating, .y %>% 
                                         fitted() %>% 
                                         .[, 1])),
         rmse_wh = map2_dbl(.x = data,
                            .y = fit_wh,
                            .f = ~ rmse(.x$rating, .y %>% 
                                          fitted() %>% 
                                          .[, 1])),
         rmse_whs = map2_dbl(.x = data,
                             .y = fit_whs,
                             .f = ~ rmse(.x$rating, .y %>% 
                                           fitted() %>% 
                                           .[, 1])),
         model_comparison = pmap(.l = list(baseline = fit_baseline,
                                           w = fit_w,
                                           wh = fit_wh,
                                           whs = fit_whs),
                                 .f = ~ loo_compare(..1, ..2, ..3, ..4)),
         best_model = map_chr(.x = model_comparison,
                              .f = ~ rownames(.) %>% .[1]),
         best_model = factor(best_model,
                             levels = c("..1", "..2", "..3", "..4"),
                             labels = c("baseline", "w", "wh", "whs")))

# save(list = c("df.exp2.causal.individual_fit"),
#      file = "data/exp2_causal_individual_fit.RData")
4.5.1.6.1 Load individual participant regressions
load("data/exp2_causal_individual_fit.RData")

# count how many participants are best fit by the different models
df.exp2.causal.individual_fit %>% 
  count(best_model) %>% 
  print_table()
best_model n
wh 2
whs 39
4.5.1.6.2 Model performance on individual participant level
df.exp2.causal.individual_fit %>% 
  select(participant, contains("r_"), contains("rmse_")) %>% 
  pivot_longer(cols = -participant,
               names_to = c("measure", "model"),
               names_sep = "_",
               values_to = "fit") %>% 
  group_by(model, measure) %>% 
  summarize(quantiles = quantile(fit, probs = c(0.05, 0.5, 0.95)),
            prob = c(0.05, 0.5, 0.95)) %>% 
  pivot_wider(names_from = prob,
              values_from = quantiles) %>% 
  print_table()
model measure 0.05 0.5 0.95
w r 0.22 0.43 0.60
w rmse 29.19 36.18 43.73
wh r 0.37 0.60 0.78
wh rmse 23.70 32.54 40.70
whs r 0.40 0.64 0.79
whs rmse 22.50 31.20 39.08
4.5.1.6.3 Clusters of individual participants’ responses
set.seed(1)

df.cluster_whs = fit.brm_exp2_causal_whs %>% 
  ranef() %>% 
  .$participant %>% 
  as_tibble() %>% 
  select(contains("Estimate")) %>% 
  set_names(tolower(str_replace_all(names(.), "Estimate.", ""))) %>% 
  mutate(participant = 1:n()) %>% 
  relocate(participant)

df.cluster_whs = df.cluster_whs %>% 
  mutate(cluster = df.cluster_whs %>%
           select(-participant) %>%
           kmeans(centers = 2) %>% 
           .$cluster)

df.cluster_whs %>% 
  print_table()
participant intercept whether how sufficient cluster
1 7.76 0.99 -4.99 -0.78 1
2 -0.47 -20.70 31.94 -17.47 2
3 2.23 -9.06 26.55 -9.04 2
4 -0.15 5.35 -0.65 6.33 1
5 -0.62 0.85 -4.67 -1.25 1
6 4.23 -14.57 21.17 -14.30 2
7 -0.19 2.73 -3.75 0.86 1
8 -3.21 -3.78 5.72 -1.50 2
9 -3.89 -6.15 9.73 -2.84 2
10 1.32 -8.35 2.15 -7.56 2
11 2.29 8.96 -11.31 5.78 1
12 -1.37 14.00 -23.75 13.86 1
13 3.32 4.86 0.91 1.48 1
14 2.78 5.28 -11.98 2.23 1
15 -5.80 -9.62 -7.03 -4.98 1
16 0.34 22.21 -15.58 19.53 1
17 0.12 -3.30 2.25 -2.35 1
18 2.59 0.71 -10.39 -1.16 1
19 -2.47 -7.50 3.86 -6.12 2
20 8.77 19.87 -10.31 13.20 1
21 -1.59 0.68 -12.00 1.17 1
22 -4.81 0.01 -3.14 0.99 1
23 -0.48 -0.22 -5.95 1.69 1
24 -0.35 1.04 0.46 1.87 1
25 3.11 13.59 -12.70 8.57 1
26 1.19 12.95 1.25 9.67 1
27 -3.52 12.99 -17.44 15.89 1
28 1.56 -6.75 -2.51 -7.44 2
29 -0.22 7.39 -4.79 5.92 1
30 -4.45 -20.63 17.27 -15.29 2
31 -5.86 6.61 -12.18 10.19 1
32 -1.23 0.77 4.39 4.61 1
33 3.66 13.23 -9.56 10.28 1
34 -4.52 -9.39 6.97 -5.32 2
35 -0.67 -13.75 -1.43 -13.16 2
36 -2.11 -27.38 46.11 -23.71 2
37 10.67 13.82 1.17 5.96 1
38 1.84 8.24 -5.49 5.76 1
39 -5.80 -10.51 7.51 -9.68 2
40 -6.06 -6.80 7.55 -4.07 2
41 1.76 -2.14 -1.20 -0.79 1
# cluster sizes
df.cluster_whs %>% 
  count(cluster)
# A tibble: 2 x 2
  cluster     n
*   <int> <int>
1       1    27
2       2    14
4.5.1.6.3.1 Check that clustering is stable

A 2-cluster solution yields a good result according to a number of different validation measures (see here for more details on the different measures).

tmp = df.cluster_whs %>% 
  select(-participant, -cluster)

fit = clValid(obj = as.matrix(tmp),
        nClust = 2:10,
        clMethods = c("kmeans"),
        validation = c("internal", "stability"))
Warning in clValid(obj = as.matrix(tmp), nClust = 2:10, clMethods =
c("kmeans"), : rownames for data not specified, using 1:nrow(data)
fit %>% 
  summary()

Clustering Methods:
 kmeans 

Cluster sizes:
 2 3 4 5 6 7 8 9 10 

Validation Measures:
                           2       3       4       5       6       7       8       9      10
                                                                                            
kmeans APN            0.1176  0.1414  0.2080  0.1219  0.1897  0.2723  0.3262  0.2900  0.2716
       AD            18.7780 14.1819 13.5064 10.7858 10.4043 10.2673 10.1356  9.3995  8.7734
       ADM            4.4409  3.5882  4.1361  2.2932  3.4262  4.3009  4.8524  4.6046  4.5736
       FOM            7.5680  6.2550  6.1193  5.4051  5.2029  5.1031  5.3320  5.1198  4.8700
       Connectivity   8.0238 14.7222 17.0556 24.7833 28.7302 32.9802 33.8135 41.5984 48.9571
       Dunn           0.0931  0.1915  0.2054  0.1882  0.2138  0.2138  0.2138  0.2472  0.2560
       Silhouette     0.5150  0.4096  0.3791  0.3538  0.3357  0.3145  0.3087  0.2843  0.2777

Optimal Scores:

             Score  Method Clusters
APN          0.1176 kmeans 2       
AD           8.7734 kmeans 10      
ADM          2.2932 kmeans 5       
FOM          4.8700 kmeans 10      
Connectivity 8.0238 kmeans 2       
Dunn         0.2560 kmeans 10      
Silhouette   0.5150 kmeans 2       

4.5.2 Plots

4.5.2.1 Bar graph

set.seed(1)
model_index = "whs"

# model predictions 
model_prediction = list(fit.brm_exp2_causal_w,
                        fit.brm_exp2_causal_wh,
                        fit.brm_exp2_causal_whs) %>%
  map_dfr(~ fitted(., df.exp2.causal.means %>% 
                     left_join(df.exp2.model.causal,
                               by = c("clip", "ball")),
                   re_formula = NA) %>% 
            as_tibble()) %>% 
  mutate(ball = rep(c("A", "B"), n()/2),
         clip = rep(rep(1:32, each = 2), 3),
         model = rep(c("w", "wh", "whs"),
                     each = 64))

df.plot = df.exp2.causal.long %>%
  left_join(df.exp2.clipinfo %>% 
              select(clip, outcome_both),
            by = c("clip")) %>% 
  left_join(model_prediction %>% 
              filter(model == model_index) %>% 
              select(model = Estimate, clip, ball),
            by = c("clip", "ball")) %>% 
  pivot_longer(cols = c(rating, model),
               names_to = "model",
               values_to = "value") %>% 
  mutate(colorindex = 1,
         colorindex = ifelse(model == "rating" &
                               outcome_both == 0, 1, colorindex),
         colorindex = ifelse(model == "rating" &
                               outcome_both == 1, 2, colorindex),
         colorindex = ifelse(model == "model", 3, colorindex),
         colorindex = as.factor(colorindex),
         model = factor(model, levels = c("rating", "model"))) %>%
  arrange(participant, clip, ball)

df.text = df.plot %>%
  expand(ball, clip, model) %>% 
  mutate(label = ifelse(model == "rating" & ball == "A", clip, NA),
         y = 110,
         x = 1.5,
         colorindex = NA)

p = actual_counterfactual_threeballs_plot(df.plot,"causal responsibility")
print(p)

# ggsave("../../figures/plots/exp2_causal_bars.pdf",
#        width = 12,
#        height = 6)

4.5.2.2 Selection

set.seed(1)

check = "\u2713"
cross = "\u2717"

x_labels = c(str_c("\nW ", cross, "\nH ", check, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", cross, "\nS ", cross),
             str_c("\nW ", check, "\nH ", cross, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", cross),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", cross, "\nS ", cross))
  

df.plot = df.exp2.causal.long %>%
  filter(clip %in% c(3, 7, 15, 16, 23)) %>%
  group_by(clip, ball) %>%
  summarize(mean = mean(rating),
            low = smean.cl.boot(rating)[2],
            high = smean.cl.boot(rating)[3]) %>%
  left_join(df.exp2.causal.regression %>% select(clip, ball, w, wh, whs),
            by = c("clip", "ball")) %>%
  ungroup() %>%
  pivot_longer(cols = c(mean, w, wh, whs),
               names_to = "index",
               values_to = "value") %>% 
  mutate(across(c(low, high),
                ~ ifelse(index == "mean", ., NA))) %>%
  mutate(index = factor(index, levels = c("mean", "w", "wh", "whs")),
         clip = factor(clip, levels = c(7, 23, 3, 15, 16),
                       labels = c("causal chain",
                                  "double prevention",
                                  "joint causation",
                                  "overdetermination",
                                  "preemption")))

df.labels = df.plot %>% 
  distinct(clip, ball) %>% 
  arrange(clip, ball) %>% 
  mutate(labels = x_labels,
         value = -10,
         index = NA)

func_load_image = function(clip){
  readPNG(str_c("../../figures/diagrams/exp2_clip", clip, ".png"))
}

df.clips = df.plot %>% 
  distinct(clip) %>% 
  arrange(clip) %>% 
  mutate(number = c(7, 23, 3, 15, 16),
         grob = map(.x = number, .f = ~ func_load_image(clip = .x)),
         index = NA,
         value = NA,
         ball = NA,
         label = str_c("clip ", number))

ball_a = readPNG("../../figures/diagrams/ballA.png")
ball_a = rasterGrob(ball_a, interpolate = TRUE)

ball_b = readPNG("../../figures/diagrams/ballB.png")
ball_b = rasterGrob(ball_b, interpolate = TRUE)

p = ggplot(data = df.plot, 
       mapping = aes(x = ball,
                     y = value,
                     group = index,
                     fill = index)) +
  geom_bar(stat = "identity", color = "black",
           position = position_dodge(0.9),
           width = 0.9) +
  geom_errorbar(mapping = aes(ymin = low, ymax = high),
                width = 0,
                size = 1,
                position = position_dodge(0.9)) +
  annotation_custom(grob = ball_a,
                    xmin = 0.5,
                    xmax = 1.5,
                    ymin = -25,
                    ymax = -7) + 
  annotation_custom(grob = ball_b,
                    xmin = 1.5,
                    xmax = 2.5,
                    ymin = -25,
                    ymax = -7) +
  geom_text(data = df.labels,
            mapping = aes(label = labels,
                          y = -Inf),
            vjust = 1.2,
            family = "Arial Unicode MS",
            size = 8) + 
  geom_custom(data = df.clips,
              mapping = aes(data = grob, x = 1.5, y = Inf),
              grob_fun = function(x) rasterGrob(x,
                                                interpolate = T,
                                                vjust = -0.25)) + 
  geom_text(data = df.clips,
            mapping = aes(label = label,
                          y = Inf,
                          x = -Inf),
            size = 9,
            hjust = -0.2,
            vjust = -4) + 
  facet_grid(~clip) +
  scale_fill_grey(start = 1,
                  end = 0,
                  labels = c("mean rating  ", expression(CSM[W] ~ " "),
                             expression(CSM[WH] ~ " "),
                             expression(CSM[WHS] ~ " "))) +
  labs(y = "causal responsibility", fill = "") +
  coord_cartesian(clip = "off") + 
  theme_bw() +
  theme(legend.text.align = 0,
        text = element_text(size = 20),
        panel.grid = element_blank(),
        legend.position = "bottom",
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.title.y = element_text(size = 30),
        legend.text = element_text(size = 30),
        strip.text = element_text(size = 30),
        legend.spacing = unit(0.5, "cm"),
        legend.background = element_rect(fill = "transparent"),
        legend.margin = margin(t = 5, unit = "cm"),
        plot.margin = margin(t = 8, l = 0.2, r = 0.2, b = 0.1, unit = "cm"))
print(p)

# ggsave("../../figures/plots/exp2_selection_bars.pdf",
#        width = 20,
#        height = 10,
#        device = cairo_pdf)

4.5.2.3 Scatter plot

func_scatterplot = function(model){
  if(model == "heuristic"){
    xlabel = "Heuristic"
  }else{
    xlabel = bquote(CSM[.(toupper(model))])
  }
  
  l.models = list(w = fit.brm_exp2_causal_w, 
                  wh = fit.brm_exp2_causal_wh,
                  whs = fit.brm_exp2_causal_whs,
                  heuristic = fit.brm_exp2_causal_heuristic)
  
  df.data = df.exp2.causal.means %>% 
    left_join(df.exp2.model.causal, by = c("clip", "ball")) %>% 
    left_join(df.regression.features, by = c("clip", "ball"))
  
  df.plot = l.models[[model]] %>%
    fitted(newdata = df.data,
           re_formula = NA) %>% 
    as_tibble() %>% 
    clean_names() %>%
    bind_cols(df.data)
  
  p = ggplot(data = df.plot,
             mapping = aes(x = estimate, 
                           y = rating_mean)) +
    geom_abline(intercept = 0,
                slope = 1,
                linetype = 2) + 
    geom_smooth(aes(y = estimate,
                    ymin = q2_5,
                    ymax = q97_5),
                stat = "identity",
                color = "lightblue",
                fill = "lightblue") +
    geom_linerange(size = 0.5, 
                   mapping = aes(ymin = rating_low,
                                 ymax = rating_high),
                   color = "gray80") +
    geom_point(size = 2) + 
    scale_color_manual(values = c("black", "#e41a1c"),
                       guide = F) +
    coord_fixed(xlim = c(0, 100),
                ylim = c(0, 100)) +
    labs(x = xlabel,
         y = "responsibility judgment") +
    annotate(geom = "text",
             label = str_c(
               "r = ", cor(df.plot$estimate, df.plot$rating_mean) %>% 
                 round(2) %>% 
                 as.character() %>% 
                 str_sub(start = 2),
               "\nRMSE = ", rmse(df.plot$estimate, df.plot$rating_mean) %>% 
                 round(2)),
             x = -2,
             y = 95,
             size = 6,
             hjust = 0) +
    scale_x_continuous(breaks = seq(0, 100, 25)) +
    scale_y_continuous(breaks = seq(0, 100, 25)) +
    theme(text = element_text(size = 20))
  return(p)
}

list(func_scatterplot(model = "w"),
     func_scatterplot(model = "wh"),
     func_scatterplot(model = "whs"),
     func_scatterplot(model = "heuristic")) %>%
  wrap_plots(ncol = 2)


# for creating and saving an individual scatter plots 
# model = "w"
# model = "wh"
# model = "whs"
# model = "heuristic"

# func_scatterplot(model)
# ggsave(str_c("../../figures/plots/exp2_", model, "_scatter.pdf"),
#        width = 5,
#        height = 5)

4.5.2.4 Individual participant variance for a selection of clips (clustered)

check = "\u2713"
cross = "\u2717"

x_labels = c(str_c("\nW ", cross, "\nH ", check, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", cross, "\nS ", cross),
             str_c("\nW ", check, "\nH ", cross, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", cross),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", cross, "\nS ", cross))

df.plot = df.exp2.causal.long %>%
  filter(clip %in% c(7, 23, 3, 15, 16)) %>%
  mutate(clip = factor(clip, levels = c(7, 23, 3, 15, 16),
    labels = c("causal chain", "double prevention", "joint causation",
      "overdetermination", "preemption")))

df.plot = df.plot %>% 
  left_join(df.cluster_whs %>% 
              group_by(cluster) %>% 
              mutate(group = factor(cluster,
                                    levels = 1:2,
                                    labels = str_c("n = ", group_size(.)))) %>% 
              ungroup() %>% 
              select(participant, cluster, group),
            by = "participant")


df.labels = df.plot %>% 
  distinct(clip, ball) %>% 
  arrange(clip, ball) %>% 
  mutate(labels = x_labels,
         rating = -10,
         group = NA,
         participant = NA)

func_load_image = function(clip){
  readPNG(str_c("../../figures/diagrams/exp2_clip", clip, ".png"))
}

df.clips = df.plot %>% 
  distinct(clip) %>% 
  arrange(clip) %>% 
  mutate(number = c(7, 23, 3, 15, 16),
         grob = map(.x = number, .f = ~ func_load_image(clip = .x)),
         group = NA,
         participant = NA,
         ball = NA,
         label = str_c("clip ", number))

ball_a = readPNG("../../figures/diagrams/ballA.png")
ball_a = rasterGrob(ball_a, interpolate = TRUE)

ball_b = readPNG("../../figures/diagrams/ballB.png")
ball_b = rasterGrob(ball_b, interpolate = TRUE)

p = ggplot(data = df.plot, 
       mapping = aes(x = ball,
                     y = rating,
                     group = participant,
                     shape = group)) +
  geom_line(mapping = aes(linetype = "individual",
                          color = group),
            size = 1,
            alpha = 0.3) +
  geom_point(mapping = aes(color = group),
             size = 1,
             alpha = 0.3) +
  stat_summary(mapping = aes(group = group,
                             color = group,
                             linetype = "mean"),
               fun = "mean",
               geom = "line",
               size = 1.5) +
  stat_summary(data = df.plot %>% filter(cluster == 1),
               mapping = aes(group = group,
                             color = group),
               fun.data = "mean_cl_boot",
               geom = "pointrange",
               size = 1,
               shape = 19) +
  stat_summary(data = df.plot %>% filter(cluster == 2),
               mapping = aes(group = group,
                             color = group),
               fun.data = "mean_cl_boot",
               geom = "pointrange",
               size = 1,
               shape = 19) +
  annotation_custom(grob = ball_a,
                    xmin = 0.5,
                    xmax = 1.5,
                    ymin = -30,
                    ymax = -10) + 
  annotation_custom(grob = ball_b,
                    xmin = 1.5,
                    xmax = 2.5,
                    ymin = -30,
                    ymax = -10) +
  geom_text(data = df.labels,
            mapping = aes(label = labels,
                          y = -Inf),
            vjust = 1.2,
            family = "Arial Unicode MS",
            size = 8) + 
  geom_custom(data = df.clips,
              mapping = aes(data = grob, x = 1.5, y = Inf),
              grob_fun = function(x) rasterGrob(x,
                                                interpolate = T,
                                                vjust = -0.25)) +
  geom_text(data = df.clips,
            mapping = aes(label = label,
                          y = Inf,
                          x = -Inf),
            size = 9,
            hjust = -0.2,
            vjust = -4) + 
  facet_wrap(~ clip,
             ncol = 8) +
  coord_cartesian(xlim = c(0.9, 2.1),
                  ylim = c(-5, 105),
                  expand = F,
                  clip = "off") +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = seq(0, 100, 25)) +
  scale_color_brewer(palette = "Set1",
                     guide = "none") +
  labs(y = "causal responsibility rating",
       linetype = "legend",
       shape = "") +
  theme_bw() +
  guides(linetype = guide_legend(override.aes = list(alpha = c(0.3, 1)),
                                 keywidth = unit(1.2, "cm")),
         shape = guide_legend(override.aes = list(color = c("#377eb8",
                                                            "#e41a1c"),
                                                  shape = c(19, 19),
                                                  alpha = c(1, 1),
                                                  size = c(5, 5)))) +
  theme(panel.grid = element_blank(),
        text = element_text(size = 20),
        legend.position = c(0.505, 0.23),
        axis.title.x = element_blank(),
        legend.text = element_text(size = 20),
        legend.title = element_text(size = 24),
        legend.background = element_rect(fill = "transparent"),
        strip.text = element_text(size = 30),
        axis.text.x = element_blank(),
        legend.key = element_rect(fill = "transparent"),
        legend.box = "horizontal",
        legend.spacing.x = unit(0.1, "cm"),
        panel.spacing.x = unit(1, "cm"),
        plot.margin = margin(b = 5, l = 0.2, r = 0.2, t = 7.5, unit = "cm"))

print(p)

# ggsave(str_c("../../figures/plots/exp2_individual_variance_selection_lines_clustered.pdf"),
#        plot = p,
#        width = 20,
#        height = 8.5,
#        device = cairo_pdf)

4.5.3 Tables

4.5.3.1 Relationship between predictors

df.exp2.model %>% 
  filter(noise == unique(df.exp2.model.counterfactual$noise)) %>%
  pivot_longer(cols = A_difference:B_robust,
               names_to = c("ball", "aspect"),
               names_sep = "_",
               values_to = "value") %>% 
  pivot_wider(names_from = aspect,
              values_from = value) %>% 
  select(difference, whether, how, sufficient, robust) %>% 
  correlate() %>% 
  shave() %>% 
  fashion() %>% 
  print_table()
term difference whether how sufficient robust
difference
whether .50
how .79 .27
sufficient .21 .10 .36
robust .43 .93 .24 .20

4.5.3.2 Population level predictors in the mixed effects models

fit.brm_exp2_causal_w %>% 
  tidy(effects = "fixed") %>% 
  mutate(model = "CSM_w") %>% 
  bind_rows(fit.brm_exp2_causal_wh %>% 
              tidy(effects = "fixed") %>% 
              mutate(model = "CSM_wh")) %>% 
  bind_rows(fit.brm_exp2_causal_whs %>% 
              tidy(effects = "fixed") %>% 
              mutate(model = "CSM_whs")) %>% 
  mutate(term = tolower(term)) %>% 
  rename(`lower 95% HDI` = conf.low,
         `upper 95% HDI` = conf.high) %>% 
  mutate_if(is.numeric, ~ round(., 2)) %>% 
  select(model, everything(), -effect, -component) %>% 
  print_table()
model term estimate std.error lower 95% HDI upper 95% HDI
CSM_w (intercept) 30.46 1.62 27.27 33.64
CSM_w whether 43.28 2.20 38.98 47.62
CSM_wh (intercept) 10.73 1.54 7.70 13.81
CSM_wh whether 30.17 2.69 24.96 35.39
CSM_wh how 34.88 2.61 29.73 39.95
CSM_whs (intercept) 11.04 1.55 8.03 14.10
CSM_whs whether 28.96 2.72 23.62 34.40
CSM_whs how 25.32 3.02 19.34 31.33
CSM_whs sufficient 31.97 2.98 26.14 37.97

4.5.3.3 Individual participants regression results

df.exp2.causal.individual_fit %>% 
  select(where(~ is.numeric(.) | is.factor(.))) %>% 
  select(participant, everything(), best_model) %>% 
  print_table()
participant r_w r_wh r_whs rmse_w rmse_wh rmse_whs best_model
1 0.31 0.38 0.47 29.91 29.00 27.91 whs
2 0.24 0.77 0.77 39.12 27.82 27.43 whs
3 0.39 0.78 0.79 38.75 28.04 27.06 whs
4 0.41 0.54 0.61 43.73 40.80 39.08 whs
5 0.42 0.50 0.51 39.68 37.81 37.40 whs
6 0.21 0.54 0.54 40.70 36.30 35.99 whs
7 0.52 0.62 0.64 32.76 29.87 29.06 whs
8 0.41 0.63 0.67 38.27 33.26 32.02 whs
9 0.42 0.71 0.75 35.76 28.39 26.63 whs
10 0.31 0.52 0.55 29.01 26.24 25.66 whs
11 0.52 0.56 0.60 33.59 32.23 31.20 whs
12 0.55 0.55 0.65 33.21 32.54 30.21 whs
13 0.58 0.70 0.73 29.74 25.59 24.50 whs
14 0.45 0.47 0.50 33.67 32.88 32.27 whs
15 0.24 0.37 0.40 39.54 38.12 37.50 whs
16 0.62 0.65 0.73 40.91 38.61 35.67 whs
17 0.45 0.67 0.71 28.26 23.70 22.27 whs
18 0.32 0.36 0.38 37.76 37.08 36.65 whs
19 0.38 0.60 0.62 33.36 29.29 28.68 whs
20 0.57 0.60 0.66 36.70 35.33 33.55 whs
21 0.43 0.49 0.54 31.05 29.79 28.84 whs
22 0.51 0.66 0.69 33.83 29.68 28.62 whs
23 0.33 0.44 0.51 38.80 37.09 35.88 whs
24 0.46 0.62 0.68 34.13 30.18 28.49 whs
25 0.63 0.66 0.70 30.41 28.88 27.66 whs
26 0.60 0.72 0.76 40.17 35.05 32.98 whs
27 0.46 0.50 0.62 43.42 41.78 39.22 whs
28 0.33 0.45 0.47 29.19 27.59 27.29 whs
29 0.50 0.59 0.64 38.89 36.09 34.73 whs
30 0.22 0.71 0.71 32.92 24.91 24.51 whs
31 0.49 0.59 0.68 37.52 34.81 32.17 whs
32 0.39 0.60 0.69 40.56 36.03 33.32 whs
33 0.51 0.55 0.62 38.76 37.04 35.38 whs
34 0.28 0.51 0.54 44.27 40.70 39.80 whs
35 0.19 0.33 0.32 34.41 33.26 33.23 wh
36 0.23 0.84 0.83 45.20 29.54 29.49 whs
37 0.55 0.62 0.65 36.21 33.72 32.64 whs
38 0.52 0.60 0.64 36.18 33.66 32.38 whs
39 0.49 0.77 0.77 29.40 21.81 21.74 wh
40 0.49 0.78 0.80 32.19 23.68 22.50 whs
41 0.35 0.50 0.57 32.72 30.30 28.86 whs

4.5.3.4 CSMwhs predictions for selection of cases

df.exp2.model.causal %>%
  left_join(df.exp2.causal.regression,
            by = c("clip", "ball")) %>% 
  filter(clip %in% c(7, 23, 3, 15, 16)) %>%
  select(clip, ball, difference, whether, how, sufficient, robust) %>%
  mutate(clip = factor(clip, levels = c(7, 23, 3, 15, 16))) %>%
  mutate(across(where(is.numeric), ~ round(., 2))) %>%
  mutate(across(c(difference, whether, how, sufficient, robust),
                   ~ ifelse(. < 0.5,
                            str_c("xmark (", ., ")"),
                            str_c("cmark (", ., ")")))) %>%
  arrange(clip) %>% 
  print_table()
clip ball difference whether how sufficient robust
7 A cmark (1) xmark (0.34) cmark (1) xmark (0) xmark (0.25)
7 B cmark (1) cmark (1) cmark (1) cmark (0.67) cmark (0.6)
23 A xmark (0.05) xmark (0) xmark (0) xmark (0) xmark (0)
23 B cmark (0.91) cmark (0.79) xmark (0) xmark (0) cmark (0.72)
3 A cmark (1) cmark (0.88) cmark (1) xmark (0.12) cmark (0.76)
3 B cmark (1) cmark (0.89) cmark (1) xmark (0.11) cmark (0.75)
15 A cmark (1) xmark (0.01) cmark (1) cmark (0.99) xmark (0.1)
15 B cmark (1) xmark (0.01) cmark (1) cmark (1) xmark (0.1)
16 A cmark (1) xmark (0.23) cmark (1) cmark (1) xmark (0.35)
16 B xmark (0) xmark (0) xmark (0) xmark (0) xmark (0)

4.5.3.5 Heuristic model

fit.brm_exp2_causal_heuristic %>% 
  tidy(effects = "fixed") %>% 
  mutate(term = tolower(term)) %>% 
  rename(`lower 95% HDI` = conf.low,
         `upper 95% HDI` = conf.high) %>% 
  select(-c(effect, component)) %>% 
  print_table()
Warning in tidy.brmsfit(., effects = "fixed"): some parameter names contain
underscores: term naming may be unreliable!
term estimate std.error lower 95% HDI upper 95% HDI
(intercept) 49.73 1.48 46.87 52.70
moving 0.22 0.21 0.00 0.79
speed 2.08 0.84 0.45 3.73
contact_e 0.38 0.36 0.01 1.35
e_speed_diff 0.12 0.12 0.00 0.46
e_direction_diff 1.06 0.73 0.06 2.76
total_speed_diff 2.19 0.95 0.42 4.09
total_direction_diff 3.99 0.90 2.21 5.69
transfer 15.59 0.80 13.98 17.14
e_moving 0.18 0.18 0.00 0.65
exclusive 4.38 0.71 3.00 5.79

5 Appendix

5.1 Preliminary study

5.1.1 Read in model predictions

Reading in the predictions from the approximate simulation model and finding the model that best matches participants’ counterfactual judgments.

5.1.1.1 Counterfactual condition

# read model predictions 
path = "../python/results/"
files = list.files(path = path, pattern = "*.csv")
files = files[str_detect(files, "2ball")]

df.study.model = files %>%
  map_dfr(~ read.csv(str_c(path, .))) %>%
  set_names(c("clip", "noise", "prediction")) %>%
  left_join(df.study.clipinfo, by = "clip") %>%
  mutate(prediction = ifelse(outcome_actual == 1, 1 - prediction, prediction))

# calculate mean counterfactual judgments 
df.study.counterfactual.means = df.study.counterfactual.long %>%
  group_by(clip) %>%
  summarize(rating_mean = mean(rating),
            rating_low = smean.cl.boot(rating)[2],
            rating_high = smean.cl.boot(rating)[3]) %>%
  ungroup() %>%
  left_join(df.study.clipinfo, by = "clip")

# find noisy simulation model that best predicts the mean counterfactual judgments by 
# minimizing the sum of squared errors 
df.study.counterfactual.model = df.study.model %>% 
  group_by(noise) %>% 
  nest() %>% 
  mutate(sse = map(data, 
                   ~ sum((.x$prediction*100 - 
                            df.study.counterfactual.means$rating_mean) ^ 2)),
         sse = unlist(sse)) %>% 
  ungroup() %>% 
  filter(sse == min(sse)) %>% 
  unnest(data) %>% 
  mutate(prediction = prediction * 100)

5.1.1.2 Causal condition

Calculating the predictions for the causal condition using both the approximate simulation model (simulation) as well as participants’ judgments from the counterfactual condition (empirical).

df.study.causal.model = df.study.counterfactual.long %>% 
  group_by(clip) %>% 
  summarize(empirical = mean(rating)) %>% 
  ungroup() %>% 
  left_join(df.study.counterfactual.model %>% 
              select(-c(noise, sse)),
            by = "clip") %>% 
  rename(simulation = prediction) %>% 
  mutate(empirical = ifelse(outcome_actual == 1, 100 - empirical, empirical),
         simulation = ifelse(outcome_actual == 1, 100 - simulation, simulation))

5.1.2 Counterfactual condition

5.1.2.1 Plots

Bar plot showing mean judgments for each clip together with the individual judgments, as well as the predictions of the best-fitting approximate simulation model.

set.seed(1)
df.plot = df.study.counterfactual.long %>% 
  mutate(rating = abs(rating),
         clipindex = rep(c(1, 2), nrow(.)/2)) %>% 
  left_join(df.study.counterfactual.model,
            by = c("clip",
                   "outcome_actual",
                   "outcome_counterfactual",
                   "index_actual",
                   "index_counterfactual")) %>% 
  pivot_longer(cols = c(rating, prediction),
               names_to = "model",
               values_to = "value") %>% 
  mutate(colorindex = ifelse(outcome_counterfactual == 1, 2, 1),
         colorindex = ifelse(model != "rating", 3, colorindex),
         colorindex = as.factor(colorindex),
         index_actual = factor(index_actual,
                               levels = c("actual miss", "actual close", "actual hit")),
         index_counterfactual = factor(index_counterfactual,
                                       levels = c("counterfactual miss",
                                                  "counterfactual close",
                                                  "counterfactual hit")),
         model = factor(model, levels = c("rating", "prediction"))) %>% 
  mutate(clipindex = ifelse(clip == 11, 2, clipindex),
         clipindex = ifelse(clip == 12, 1, clipindex)) #swap clips 11 and 12


df.text = df.plot %>%
  expand(index_actual, index_counterfactual, clipindex, model) %>%
  mutate(label = rep(1:18, each = 2),
         label = ifelse(model != "rating", NA, label),
         y = -15,
         x = clipindex,
         colorindex = NA)

p = actual_counterfactual_plot(df.plot, df.text, "counterfactual judgment")
print(p)

# ggsave("../../figures/plots/study_counterfactual_bars.pdf",
#        width = 8,
#        height = 6)

5.1.2.2 Stats

Model fit as captured by r and RMSE.

df.study.counterfactual.means %>% 
  left_join(df.study.counterfactual.model,
            by = c("clip",
                   "outcome_actual",
                   "outcome_counterfactual",
                   "index_actual",
                   "index_counterfactual")) %>% 
  summarize(noise = unique(noise), 
            simulation_r = cor(rating_mean, prediction),
            simulation_rmse = rmse(rating_mean, prediction)) %>% 
  print_table()
noise simulation_r simulation_rmse
1 0.97 10.95

5.1.3 Causal condition

5.1.3.1 Plots

Bar plot showing mean causal judgments for each clip together with the individual judgments, as well as the predictions of the counterfactual simulation model (black bars) based on participants’ mean counterfactual judgments.

set.seed(1)

model.name = c("empirical")
# model.name = c("simulation")

df.plot = df.study.causal.long %>%
  mutate(rating = abs(rating),
         clipindex = rep(c(1, 2), nrow(.) / 2)) %>%
  left_join(df.study.causal.model,
            by = c("clip",
                   "outcome_actual",
                   "outcome_counterfactual",
                   "index_actual",
                   "index_counterfactual")) %>%
  pivot_longer(cols = c(rating, simulation, empirical),
               names_to = "model",
               values_to = "value") %>% 
  filter(model %in% c(model.name, "rating")) %>%
  mutate(model = factor(model, levels = c("rating", model.name))) %>%
  mutate(
    colorindex = ifelse(outcome_actual == 1, 2, 1),
    colorindex = ifelse(model != "rating", 3, colorindex),
    colorindex = as.factor(colorindex),
    index.actual = factor(index_actual, 
                          levels = c("actual miss", "actual close", "actual hit")),
    index.counterfactual = factor(index_counterfactual, 
                                  levels = c("counterfactual miss", 
                                             "counterfactual close", 
                                             "counterfactual hit"))) %>%
  mutate(clipindex = ifelse(clip == 11, 2, clipindex),
         clipindex = ifelse(clip == 12, 1, clipindex)) # swap clips 11 and 12

df.text = df.plot %>%
  expand(index_actual, index_counterfactual, clipindex, model) %>%
  mutate(label = rep(1:18, each = 2),
         label = ifelse(model != "rating", NA, label),
         y = -15,
         x = clipindex,
         colorindex = NA)

p = actual_counterfactual_plot(df.plot, df.text, "causal judgment")
print(p)

# ggsave("../../figures/plots/study_causal_bars.pdf",
#        width = 8,
#        height = 6)

5.1.3.2 Stats

Model fit as captured by r and RMSE.

df.study.causal.long %>% 
  mutate(rating = abs(rating)) %>% 
  group_by(clip,
           outcome_actual, 
           outcome_counterfactual,
           index_actual,
           index_counterfactual) %>% 
  summarize(rating = mean(rating)) %>% 
  ungroup() %>% 
  left_join(df.study.causal.model,
            by = c("clip", 
                   "outcome_actual", 
                   "outcome_counterfactual", 
                   "index_actual", 
                   "index_counterfactual")) %>% 
  summarize(empirical_r = cor(rating, empirical),
            empirical_rmse = rmse(rating, empirical),
            simulation_r = cor(rating, simulation),
            simulation_rmse = rmse(rating, simulation)) %>% 
  print_table()
empirical_r empirical_rmse simulation_r simulation_rmse
0.96 8.57 0.93 15.15

5.2 Experiment 1

5.2.1 Frequentist tests for potential order effects

5.2.1.1 Counterfactual condition

df.data = df.exp1.combined.long %>% 
  filter(question == "counterfactual",
         clip <= 18)

afex::aov_ez(id = "participant",
             dv = "rating",
             data = df.data,
             between = "condition",
             within = c("index_actual", "index_counterfactual")) %>% 
  .$anova_table %>% 
  print_table()
num Df den Df MSE F ges Pr(>F)
condition 1.00 80.00 566.69 0.47 0.00 0.49
index_actual 1.84 147.59 427.45 3.65 0.01 0.03
condition:index_actual 1.84 147.59 427.45 0.03 0.00 0.97
index_counterfactual 1.85 148.17 1038.95 348.21 0.65 0.00
condition:index_counterfactual 1.85 148.17 1038.95 0.03 0.00 0.97
index_actual:index_counterfactual 3.06 245.17 400.46 3.50 0.01 0.02
condition:index_actual:index_counterfactual 3.06 245.17 400.46 0.29 0.00 0.84

5.2.1.2 Causal condition

df.data = df.exp1.combined.long %>% 
  filter(question == "causal",
         clip <= 18)

afex::aov_ez(id = "participant",
             dv = "rating",
             data = df.data,
             between = "condition",
             within = c("index_actual", "index_counterfactual")) %>% 
  .$anova_table %>% 
  print_table()
num Df den Df MSE F ges Pr(>F)
condition 1.00 80.00 919.37 0.70 0.00 0.40
index_actual 1.48 118.66 1238.03 796.01 0.73 0.00
condition:index_actual 1.48 118.66 1238.03 0.95 0.00 0.36
index_counterfactual 1.84 147.34 1224.82 216.37 0.48 0.00
condition:index_counterfactual 1.84 147.34 1224.82 9.21 0.04 0.00
index_actual:index_counterfactual 3.87 309.59 429.49 3.87 0.01 0.00
condition:index_actual:index_counterfactual 3.87 309.59 429.49 1.23 0.00 0.30

5.3 Experiment 2

5.3.1 Table with CSMwhs predictions for all cases

df.exp2.causal.regression %>% 
  left_join(df.exp2.model.causal,
            by = c("clip", "ball")) %>% 
  left_join(df.exp2.clipinfo %>% 
              select(-clipindex),
            by = c("clip")) %>% 
  mutate(across(c(difference, whether, how, sufficient, robust),
                ~ . * 100)) %>% 
  select(clip, ball, contains("outcome"), difference, whether, how, sufficient,
         robust, w, wh, whs, heuristic, rating = rating_mean) %>% 
  print_table(digits = 0)
clip ball outcome_both outcome_a outcome_b outcome_none difference whether how sufficient robust w wh whs heuristic rating
1 A 0 0 0 0 100 40 100 23 36 48 58 55 57 42
1 B 0 0 0 0 100 15 100 16 9 37 50 46 54 37
2 A 0 0 0 0 57 12 0 0 10 35 14 14 25 21
2 B 0 0 0 0 18 0 0 0 0 30 11 11 24 19
3 A 1 0 0 0 100 88 100 12 76 69 72 66 72 76
3 B 1 0 0 0 100 89 100 11 75 69 73 66 72 75
4 A 1 0 0 0 100 78 100 4 78 64 69 60 58 63
4 B 1 0 0 0 100 95 100 15 57 72 74 69 54 78
5 A 0 0 1 0 100 90 100 0 47 69 73 62 47 71
5 B 0 0 1 0 100 0 100 0 0 30 46 36 68 22
6 A 0 0 1 0 100 59 100 16 35 56 64 59 53 73
6 B 0 0 1 0 100 18 100 6 14 38 51 44 53 22
7 A 1 0 1 0 100 34 100 0 25 45 56 46 70 59
7 B 1 0 1 0 100 100 100 67 60 74 76 87 64 79
8 A 1 0 1 0 0 0 0 0 0 30 11 11 25 7
8 B 1 0 1 0 100 100 100 100 100 74 76 97 84 92
9 A 0 1 0 0 0 0 0 0 0 30 11 11 14 8
9 B 0 1 0 0 100 100 100 0 100 74 76 65 78 90
10 A 0 1 0 0 77 18 0 0 22 38 16 16 15 23
10 B 0 1 0 0 98 79 0 0 63 65 35 34 21 55
11 A 1 1 0 0 100 70 100 77 68 61 67 81 71 93
11 B 1 1 0 0 0 0 0 0 0 30 11 11 16 4
12 A 1 1 0 0 100 82 100 74 83 66 70 84 57 77
12 B 1 1 0 0 100 0 100 12 24 30 46 40 53 37
13 A 0 1 1 0 67 34 0 0 35 45 21 21 14 8
13 B 0 1 1 0 70 35 0 0 35 46 21 21 21 64
14 A 0 1 1 0 97 91 0 0 59 70 38 37 21 22
14 B 0 1 1 0 91 77 0 0 51 64 34 33 20 18
15 A 1 1 1 0 100 1 100 99 10 31 46 68 71 76
15 B 1 1 1 0 100 1 100 100 10 31 46 68 71 76
16 A 1 1 1 0 100 23 100 100 35 40 53 75 80 92
16 B 1 1 1 0 0 0 0 0 0 30 11 11 14 4
17 A 0 0 0 1 100 19 100 37 18 39 51 54 66 69
17 B 0 0 0 1 100 0 100 36 17 30 46 48 65 46
18 A 0 0 0 1 100 11 100 40 17 35 49 52 55 63
18 B 0 0 0 1 100 7 100 37 9 33 48 50 56 66
19 A 1 0 0 1 100 74 100 7 65 63 68 60 55 53
19 B 1 0 0 1 100 72 100 7 65 61 67 59 55 49
20 A 1 0 0 1 100 92 100 8 72 70 73 66 57 41
20 B 1 0 0 1 100 88 100 4 53 68 72 63 56 71
21 A 0 0 1 1 100 47 100 40 45 51 60 63 58 80
21 B 0 0 1 1 100 9 100 21 10 34 48 46 59 18
22 A 0 0 1 1 100 100 100 89 83 74 76 94 47 60
22 B 0 0 1 1 100 8 100 0 15 34 48 39 53 42
23 A 1 0 1 1 5 0 0 0 0 31 11 11 15 3
23 B 1 0 1 1 91 79 0 0 72 65 35 34 22 39
24 A 1 0 1 1 100 66 100 4 63 59 66 57 57 44
24 B 1 0 1 1 100 94 100 22 79 71 74 71 54 73
25 A 0 1 0 1 100 25 100 21 26 41 53 50 69 43
25 B 0 1 0 1 100 74 100 54 65 62 68 75 56 73
26 A 0 1 0 1 100 6 100 3 9 33 47 39 60 39
26 B 0 1 0 1 100 87 100 35 54 68 72 73 46 69
27 A 1 1 0 1 100 97 100 52 97 73 75 81 67 80
27 B 1 1 0 1 0 0 0 0 0 30 11 11 17 6
28 A 1 1 0 1 100 90 100 22 80 69 73 69 79 89
28 B 1 1 0 1 0 0 0 0 0 30 11 11 12 5
29 A 0 1 1 1 100 58 100 24 44 56 63 61 66 47
29 B 0 1 1 1 100 63 100 24 38 58 65 62 54 67
30 A 0 1 1 1 100 57 100 29 49 55 63 62 62 58
30 B 0 1 1 1 100 46 100 24 39 51 60 58 63 56
31 A 1 1 1 1 100 2 100 4 3 31 46 38 63 44
31 B 1 1 1 1 100 4 100 4 4 32 47 39 53 46
32 A 1 1 1 1 0 0 0 0 0 30 11 11 16 5
32 B 1 1 1 1 100 75 100 66 73 63 68 79 65 71

5.3.2 Does the question framing matter? (Responsibility vs. causation)

We have run a pilot eye-tracking experiment with 15 participants who saw the same clips as the ones in Experiment 2. Instead of asking participants to judge “To what extent were A and B responsible for E (not) going through the gate?”, we asked them to judge to what extent they agreed with the following statement: “Ball A/B caused ball E to go through the gate.” or “Ball A/B prevented ball E from going through the gate.” depending on the outcome.

Participants’ agreement judgments with the causal statements in this pilot eye-tracking experiment were strikingly similar to participants’ responsibility judgments in the online experiment.

df.exp2.eye_tracking_pilot = 
  read_csv("../../data/experiment2_eye_tracking_pilot.csv") %>% 
  rename(clip = trial) %>%
  mutate(ball = str_to_upper(ball)) %>% 
  group_by(clip, ball, outcome) %>% 
  summarize(causality_mean = mean(causation),
            causality_low = smean.cl.boot(causation)[2],
            causality_high = smean.cl.boot(causation)[3]) %>% 
  ungroup()

df.plot = df.exp2.eye_tracking_pilot %>% 
  # select(clip, ball, outcome,) %>% 
  left_join(df.exp2.causal.means %>% 
              select(clip,
                     ball,
                     responsibility_mean = rating_mean,
                     responsibility_low = rating_low,
                     responsibility_high = rating_high),
            by = c("clip", "ball"))

# highlight the double prevention clip (#23)
df.plot = df.plot %>% 
  mutate(color = ifelse(clip == 23, "1", "0"))

ggplot(data = df.plot,
       mapping = aes(x = causality_mean,
                     y = responsibility_mean)) + 
  geom_abline(intercept = 0, 
              slope = 1,
              linetype = 2) + 
  geom_smooth(method = "lm",
              color = "lightblue",
              fill = "lightblue") +
  geom_linerange(mapping = aes(ymin = responsibility_low,
                               ymax = responsibility_high),
                 alpha = 0.1) +
  geom_linerange(mapping = aes(xmin = causality_low,
                               xmax = causality_high),
                 alpha = 0.1) +
  geom_point(size = 2,
             mapping = aes(color = color),
             show.legend = F) + 
  annotate(geom = "text",
           x = c(0, 0),
           y = c(100, 90),
           label = c(str_c("r = ",
                           cor(df.plot$causality_mean,
                               df.plot$responsibility_mean) %>% 
                             round(2)),
                     str_c("RMSE = ", rmse(df.plot$causality_mean,
                                           df.plot$responsibility_mean) %>% 
                             round(2))),
           hjust = 0, 
           size = 8) + 
  scale_x_continuous(breaks = seq(0, 100, 25),
                     limits = c(0, 100)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     limits = c(0, 100)) +
  scale_color_manual(values = c("black", "red")) +
  labs(x = "causal judgment",
       y = "responsibility judgment") +
  theme(text = element_text(size = 24))

# ggsave("../../figures/plots/aux_question_framing_comparison.pdf",
#        width = 6,
#        height = 6)

5.3.3 CSM_WHS fits for each individual participant

df.plot = fit.brm_exp2_causal_whs %>% 
  fitted() %>% 
  as_tibble() %>% 
  select(prediction = Estimate) %>% 
  bind_cols(df.exp2.causal.long) %>% 
  relocate(prediction, .after = last_col())

df.text = df.plot %>% 
  group_by(participant) %>% 
  summarize(r = round(cor(prediction, rating), 2)) %>% 
  ungroup() %>% 
  mutate(r = str_c("r = ", r),
         prediction = 1,
         rating = 110)

ggplot(data = df.plot,
       mapping = aes(x = prediction,
                     y = rating)) + 
  geom_abline(intercept = 0, 
              slope = 1,
              color = "blue",
              alpha = 0.5) +
  geom_point(alpha = 0.3,
             size = 1) +
  geom_text(data = df.text,
            mapping = aes(label = r),
            hjust = 0,
            color = "red",
            size = 4) +
  facet_wrap(facets = vars(participant),
             ncol = 7) +
  labs(x = "model prediction",
       y = "causal responsibility rating") + 
  coord_cartesian(xlim = c(0, 100),
                  ylim = c(0, 100),
                  expand = F,
                  clip = "off") +
  scale_x_continuous(breaks = seq(0, 100, 25),
                     limits = c(0, 100)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     expand = expansion(mult = c(0, 0))) +
  theme_classic() + 
  theme(strip.background = element_blank(),
        strip.text = element_blank(),
        text = element_text(size = 16),
        panel.spacing.x = unit(0.75, "cm"),
        panel.spacing.y = unit(1, "cm"),
        plot.margin = margin(t = 0.7,
                             r = 0.8,
                             b = 0.2,
                             l = 0.2,
                             unit = "cm"))

# ggsave("../../figures/plots/exp2_individual_scatter_plots.pdf",
#        width = 12,
#        height = 8)

5.3.4 Individual participant fit model comparison

df.plot = df.exp2.causal.individual_fit %>% 
  select(participant, contains("r_"), contains("rmse_")) %>% 
  pivot_longer(cols = -participant,
               names_to = c("measure", "model"),
               names_sep = "_",
               values_to = "fit")

ggplot(data = df.plot %>% 
         filter(measure == "r"),
       mapping = aes(x = fit,
                     fill = model)) + 
  geom_density(alpha = 0.5) + 
  labs(x = "correlation value") + 
  scale_x_continuous(breaks = seq(0, 1, 0.2),
                     limits = c(0, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0, 0.1))) + 
  scale_fill_brewer(palette = "Set1") + 
  # ggplot2::theme_classic() + 
  theme(legend.position = c(0.3, 0.95),
        legend.direction = "horizontal",
        text = element_text(size = 20))

# ggsave("../../figures/plots/exp2_individual_densities.pdf",
#        width = 8,
#        height = 6)

### 3D scatter plot of participant clusters

plot_ly(x = df.cluster_whs$whether,
        y = df.cluster_whs$how,
        z = df.cluster_whs$sufficient,
        type = "scatter3d",
        mode = "markers",
        color = as.factor(df.cluster_whs$cluster),
        colors = c("#e41a1c", "#377eb8")) %>% 
  layout(showlegend = FALSE,
         title = "Participant clusters",
         scene = list(
           xaxis = list(title = "whether"),
           yaxis = list(title = "how"),
           zaxis = list(title = "sufficient")))

5.3.5 Clusters of participants

set.seed(1)

df.plot = df.exp2.causal.long %>% 
  left_join(df.cluster_whs %>% 
              select(participant, cluster) %>% 
              mutate(participant = as.numeric(participant),
                     cluster = as.factor(cluster)) %>% 
              group_by(cluster) %>% 
              mutate(label = factor(cluster,
                                    levels = 1:2,
                                    labels = str_c("n = ", group_size(.)))) %>% 
              ungroup(),
            by = "participant")

ggplot(data = df.plot,
       mapping = aes(x = ball,
                     y = rating,
                     group = label,
                     fill = label)) + 
  geom_point(shape = 21,
             alpha = 0.2,
             position = position_jitterdodge(jitter.width = 0.1,
                                             jitter.height = 0,
                                             dodge.width = 0.75)) + 
  stat_summary(fun.data = "mean_cl_boot",
               geom = "linerange", 
               mapping = aes(color = label),
               size = 1,
               position = position_dodge(width = 0.75)) + 
  stat_summary(fun = "mean",
               geom = "point", 
               size = 4,
               shape = 21,
               position = position_dodge(width = 0.75)) + 
  facet_wrap(facets = vars(clip), ncol = 8) + 
  labs(y = "causal responsibility rating",
       fill = "cluster",
       color = "cluster") + 
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme(text = element_text(size = 24),
        legend.position = "bottom")

# ggsave("../../figures/plots/exp2_clusters_points.pdf",
#        width = 12,
#        height = 8)

5.3.6 Ternary plots for individual regression results

library("ggtern")

df.plot = df.exp2.causal.individual_fit %>% 
  select(participant, fit_whs) %>% 
  mutate(estimates = map(fit_whs, ~ fixef(.) %>% 
                           as_tibble(rownames = "term") %>% 
                           clean_names())) %>%
  select(participant, estimates) %>% 
  unnest(estimates) %>% 
  filter(!str_detect(term, "Intercept")) %>% 
  select(participant, term, estimate) %>% 
  pivot_wider(names_from = term,
              values_from = estimate) %>% 
  # check this
  mutate(across(.cols = c(whether, how, sufficient),
                .fns = ~ . / (how + whether + sufficient),
                .names = "{.col}_norm")) %>% 
  mutate(color = 0,
         color = ifelse(how_norm == max(how_norm), 1, color),
         color = ifelse(whether_norm == max(whether_norm), 2, color),
         color = ifelse(sufficient_norm == max(sufficient_norm), 3, color),
         color = factor(color))

ggplot(data = df.plot,
       mapping = aes(x = how,
                     y = sufficient,
                     z = whether,
                     color = color)) + 
  geom_point(alpha = 0.7,
             size = 2,
             show.legend = F) + 
  scale_color_manual(values = c("black", "red", "green", "blue")) + 
  coord_tern() +
  theme_showarrows() + 
  theme(text = element_text(size = 20),
        tern.axis.title.T = element_blank(),
        tern.axis.title.L = element_blank(),
        tern.axis.title.R = element_blank())

# ggsave(str_c("../../figures/plots/exp2_individual_regression_ternary_plot_scaled.pdf"),
#        width = 5,
#        height = 5)

6 Paper figures

6.1 Figure 11

plot.list = list(func_exp1_causal_plot(condition.name = "counterfactual_first",
                                       model.name = "counterfactual_first"),
                 func_exp1_causal_plot(condition.name = "causal_first",
                                       model.name = "causal_first"))

wrap_plots(plot.list,
           ncol = 2) +
  plot_annotation(tag_levels = c("A")) &
  theme(plot.tag.position = c(0, 0.99),
        plot.tag = element_text(size = 30,
                                face = "bold"))

# ggsave("../../figures/plots/figure11.pdf",
#        width = 16,
#        height = 6)

6.2 Figure 15

list(func_scatterplot(model = "w"),
     func_scatterplot(model = "wh"),
     func_scatterplot(model = "whs"),
     func_scatterplot(model = "heuristic")) %>%
  wrap_plots(ncol = 2) + 
  plot_annotation(tag_levels = c("A")) &
  theme(plot.tag.position = c(0, 1),
        plot.tag = element_text(size = 30,
                                face = "bold"),
        plot.margin = margin(t = 0.5, r = 0.5, b = 0, l = 0.5, unit = "cm"))

# ggsave("../../figures/plots/figure15.pdf",
#        width = 10,
#        height = 9)

7 Session info

sessionInfo()
R version 4.0.3 (2020-10-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur 10.16

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] forcats_0.5.1     stringr_1.4.0     dplyr_1.0.4       purrr_0.3.4      
 [5] readr_1.4.0       tidyr_1.1.2       tibble_3.0.6      tidyverse_1.3.0  
 [9] ggrepel_0.9.1     patchwork_1.1.1   clValid_0.6-9     cluster_2.1.0    
[13] egg_0.4.5         gridExtra_2.3     plotly_4.9.3      png_0.1-7        
[17] tidybayes_2.3.1   Hmisc_4.4-2       ggplot2_3.3.3     Formula_1.2-4    
[21] survival_3.2-7    lattice_0.20-41   janitor_2.1.0     jsonlite_1.7.2   
[25] xtable_1.8-4      corrr_0.4.3       brms_2.14.4       Rcpp_1.0.6       
[29] broom.mixed_0.2.6 lme4_1.1-26       Matrix_1.3-2      DT_0.17          
[33] kableExtra_1.3.1  knitr_1.31       

loaded via a namespace (and not attached):
  [1] utf8_1.1.4           tidyselect_1.1.0     htmlwidgets_1.5.3   
  [4] munsell_0.5.0        codetools_0.2-18     statmod_1.4.35      
  [7] miniUI_0.1.1.1       withr_2.4.1          Brobdingnag_1.2-6   
 [10] colorspace_2.0-0     highr_0.8            rstudioapi_0.13     
 [13] stats4_4.0.3         bayesplot_1.8.0      labeling_0.4.2      
 [16] emmeans_1.5.3        rstan_2.21.1         farver_2.1.0        
 [19] bridgesampling_1.0-0 coda_0.19-4          vctrs_0.3.6         
 [22] generics_0.1.0       TH.data_1.0-10       afex_0.28-1         
 [25] xfun_0.21            R6_2.5.0             markdown_1.1        
 [28] gamm4_0.2-6          projpred_2.0.2       assertthat_0.2.1    
 [31] promises_1.1.1       scales_1.1.1         multcomp_1.4-15     
 [34] nnet_7.3-15          gtable_0.3.0         processx_3.4.5      
 [37] sandwich_3.0-0       rlang_0.4.10         splines_4.0.3       
 [40] TMB_1.7.18           lazyeval_0.2.2       broom_0.7.3         
 [43] checkmate_2.0.0      inline_0.3.17        yaml_2.2.1          
 [46] reshape2_1.4.4       abind_1.4-5          modelr_0.1.8        
 [49] threejs_0.3.3        crosstalk_1.1.1      backports_1.2.1     
 [52] httpuv_1.5.5         rsconnect_0.8.16     tools_4.0.3         
 [55] bookdown_0.21        ellipsis_0.3.1       RColorBrewer_1.1-2  
 [58] ggridges_0.5.3       plyr_1.8.6           base64enc_0.1-3     
 [61] ps_1.6.0             prettyunits_1.1.1    rpart_4.1-15        
 [64] zoo_1.8-8            haven_2.3.1          fs_1.5.0            
 [67] magrittr_2.0.1       data.table_1.13.6    lmerTest_3.1-3      
 [70] openxlsx_4.2.3       ggdist_2.4.0         colourpicker_1.1.0  
 [73] reprex_1.0.0         mvtnorm_1.1-1        matrixStats_0.57.0  
 [76] hms_1.0.0            shinyjs_2.0.0        mime_0.10           
 [79] evaluate_0.14        arrayhelpers_1.1-0   shinystan_2.5.0     
 [82] rio_0.5.16           jpeg_0.1-8.1         readxl_1.3.1        
 [85] rstantools_2.1.1     compiler_4.0.3       V8_3.4.0            
 [88] crayon_1.4.1         minqa_1.2.4          StanHeaders_2.21.0-7
 [91] htmltools_0.5.1.1    mgcv_1.8-33          later_1.1.0.1       
 [94] RcppParallel_5.0.2   lubridate_1.7.9.2    DBI_1.1.1           
 [97] dbplyr_2.0.0         MASS_7.3-53          boot_1.3-26         
[100] car_3.0-10           cli_2.3.0            parallel_4.0.3      
[103] igraph_1.2.6         pkgconfig_2.0.3      numDeriv_2016.8-1.1 
[106] foreign_0.8-81       xml2_1.3.2           svUnit_1.0.3        
[109] dygraphs_1.1.1.6     webshot_0.5.2        estimability_1.3    
[112] rvest_0.3.6          snakecase_0.11.0     distributional_0.2.1
[115] callr_3.5.1          digest_0.6.27        rmarkdown_2.6       
[118] cellranger_1.1.0     htmlTable_2.1.0      curl_4.3            
[121] shiny_1.6.0          gtools_3.8.2         nloptr_1.2.2.2      
[124] lifecycle_1.0.0      nlme_3.1-151         carData_3.0-4       
[127] viridisLite_0.3.0    fansi_0.4.2          pillar_1.4.7        
[130] loo_2.4.1.9000       fastmap_1.1.0        httr_1.4.2          
[133] pkgbuild_1.2.0       glue_1.4.2           xts_0.12.1          
[136] zip_2.1.1            shinythemes_1.2.0    class_7.3-18        
[139] stringi_1.5.3        latticeExtra_0.6-29