1 Setup

1.1 Load packages

# library("ggtern")    # for ternary plots
# library("afex")      # for ANOVAs 
library("knitr")       # for knitting stuff
library("kableExtra")  # for markdown tables
library("DT")          # for nice tables in markdown 
library("lme4")        # for linear mixed effects models
library("broom.mixed") # for tidying mixed models results 
library("brms")        # Bayesian regression with Stan
library("corrr")       # for tidy correlation matrix
library("xtable")      # for latex tables
library("jsonlite")    # for reading json files
library("janitor")     # cleans stuff
library("Hmisc")       # bootstrapped confidence intervals
library("tidybayes")   # for Bayesian data analysis
library("png")         # adding pngs to images
library("grid")        # functions for dealing with images 
library("plotly")      # 3D scatter plot 
library("egg")         # for geom_custom
library("clValid")     # for validating clustering solutions 
library("patchwork")   # for figure panels
library("ggrepel")     # for labels in ggplot
library("tidyverse")   # for wrangling, plotting, etc.

1.2 Helper functions

# setting some knitr options
opts_chunk$set(comment = "",
               fig.show = "hold")

# set the ggplot theme
theme_set(theme_classic())

# suppress summarise() grouping warning 
options(dplyr.summarise.inform = F)

# function for printing out html or latex tables 
print_table = function(data, format = "html", digits = 2){
  if(format == "html"){
    data %>% 
      kable(digits = digits) %>% 
      kable_styling()
  }else if(format == "latex"){
    data %>% 
      xtable(digits = digits) %>%
      print(include.rownames = F,
            booktabs = T)
  }
}

# root mean squared error
rmse = function(x, y){
  return(sqrt(mean((x - y)^2)))
}

actual_counterfactual_plot = function(data, text, ylabel) {
  p = ggplot(data = data, 
              mapping = aes(x = clipindex,
                            y = value,
                            fill = colorindex,
                            group = model)) +
    stat_summary(geom = "bar",
                 fun = "mean",
                 color = "black",
                 position = position_dodge(0.7),
                 width = 0.7) +
    stat_summary(geom = "linerange",
                 fun.data = "mean_cl_boot",
                 position = position_dodge(0.7),
                 size = 1) +
    geom_point(data = data %>% 
                 mutate(value = ifelse(model == "rating", value, NA)),
               position = position_jitterdodge(dodge.width = 0.7, jitter.width = 0.2),
               shape = 21,
               color = "black",
               size = 1,
               alpha = 0.2) +
    scale_fill_manual(values = c("red2", "green2", "black")) +
    facet_grid(index_actual ~ index_counterfactual) +
    geom_text(data = text %>%
                drop_na(label),
              mapping = aes(x = x, y = y, label = as.character(label)),
              size = 6,
              position = position_dodge(width = .7),
              hjust = 0.5) +
    coord_cartesian(xlim = c(0.5, 2.5),
                    ylim = c(-5, 105),
                    expand = F,
                    clip = "off") +
    labs(y = ylabel) +
    theme(text = element_text(size = 20),
          legend.position = "none",
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.y = element_text(margin = margin(0, 10, 0, 0)),
          panel.spacing.y = unit(0.8, "cm"),
          plot.margin = unit(c(0.2, 0.2, .8, .2), "cm"),
          axis.title.x = element_blank(),
          panel.grid = element_blank(),
          strip.background = element_blank(),
          panel.border = element_rect(colour = "black", fill = NA))
  # print(p)
}

actual_counterfactual_threeballs_plot = function(x, ylabel) {
  p = ggplot(data = x, 
              mapping = aes(x = ball,
                            y = value,
                            group = model,
                            fill = colorindex)) +
    stat_summary(fun = "mean",
                 geom = "bar",
                 color = "black",
                 position = position_dodge(0.9),
                 width = 0.9) +
    stat_summary(fun.data = "mean_cl_boot",
                 geom = "linerange",
                 color = "black",
                 position = position_dodge(0.9)) +
    geom_point(data = x %>% 
                 mutate(value = ifelse(model == "rating", value, NA)),
               position = position_jitterdodge(jitter.width = 0.3,
                                               jitter.height = 0,
                                               dodge.width = 0.9),
               shape = 21,
               color = "black",
               size = 1,
               alpha = 0.15) +
    facet_wrap(~clip, ncol = 8) +
    geom_text(data = df.text, 
              mapping = aes(x = x,
                            y = y,
                            label = as.character(label)),
              size = 5, 
              position = position_dodge(width = .9), 
              hjust = 0.5, 
              fontface = "bold") +
    coord_cartesian(xlim = c(0.5, 2.5), 
                    ylim = c(-5, 120), 
                    expand = F,
                    clip = "off") +
    scale_y_continuous(breaks = seq(0, 100, 25)) +
    scale_fill_manual(values = c("red2", "green2", "black")) +
    scale_color_manual(values = c("red2", "green2", "black")) +
    labs(y = ylabel) +
    theme(text = element_text(size = 16),
          legend.position = "none",
          axis.title.x = element_blank(),
          panel.spacing.y = unit(0.3, "cm"),
          panel.grid = element_blank(),
          strip.background = element_blank(),
          strip.text = element_blank(),
          panel.border = element_rect(colour = "black", fill = NA))
}

2 Preliminary study

2.1 Clips

2.2 Read in data

# causal judgments
df.study.causal = read.csv("../../data/study_causal.csv", 
                          header = T,
                          stringsAsFactors = F)

# counterfactual judgments
df.study.counterfactual = read.csv("../../data/study_counterfactual.csv",
                                  header = T,
                                  stringsAsFactors = F)

# Information about each clip 
df.study.clipinfo = tibble(clip = 1:18,
                          outcome_actual = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 
                                             1, 1, 0, 1, 1, 1, 1, 1, 1),
                          outcome_counterfactual = c(0, 0, 0, 1, 1, 1, 0, 0, 1, 
                                                     0, 1, 1, 0, 0, 1, 0, 1, 1),
                          index_actual = rep(c("actual miss", 
                                               "actual close", 
                                               "actual hit"), each = 6),
                          index_counterfactual = rep(rep(c("counterfactual miss",
                                                           "counterfactual close", 
                                                           "counterfactual hit"),
                                                         each = 2),
                                                     3)) %>% 
  mutate(index_actual = factor(index_actual, levels = c("actual miss", 
                                                        "actual close", 
                                                        "actual hit")),
         index_counterfactual = factor(index_counterfactual, 
                                       levels = c("counterfactual miss",
                                                  "counterfactual close", 
                                                  "counterfactual hit")))

# Structure data 
df.study.causal.long = df.study.causal$data %>%
  enframe() %>% 
  mutate(participant = 1:n()) %>% 
  separate_rows(value, sep = ",") %>% 
  mutate(name = rep(c("clip", "rating"), n()/2),
         order = rep(rep(1:18, each = 2), max(participant)),
         value = as.numeric(value)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  arrange(participant, clip, rating) %>% 
  left_join(df.study.clipinfo,
            by = "clip")

df.study.counterfactual.long = df.study.counterfactual$data %>%
  enframe() %>% 
  mutate(participant = 1:n()) %>% 
  separate_rows(value, sep = ",") %>% 
  mutate(name = rep(c("clip", "rating"), n()/2),
         order = rep(rep(1:18, each = 2), max(participant)),
         value = as.numeric(value)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  arrange(participant, clip, rating) %>% 
  left_join(df.study.clipinfo,
            by = "clip")

2.2.1 Demographics

# counterfactual condition 
df.study.counterfactual %>% 
  separate(extra,
           into = c("gender", "age", "difficulty", "smoothness", "time"),
           sep = ",") %>% 
  mutate(gender = factor(gender, levels = c(1, 2), labels = c("female", "male"))) %>% 
  select(gender:time) %>% 
  mutate_if(is.character, ~ as.numeric(.)) %>% 
  summarize(n = nrow(.),
            age_mean = mean(age) %>% round(0),
            age_sd = sd(age) %>% round(1),
            n_female = sum(gender == "female"),
            time_mean = mean(time) %>% round(2),
            time_sd = sd(time) %>% round(2)) %>% 
  print_table()

n	age_mean	age_sd	n_female	time_mean	time_sd
41	32	8.8	19	7.5	3.12

# causal condition 
df.study.causal %>% 
  separate(extra,
           into = c("gender", "age", "difficulty", "smoothness", "time"),
           sep = ",") %>% 
  mutate(gender = factor(gender, levels = c(1, 2), labels = c("female", "male"))) %>% 
  select(gender:time) %>% 
  mutate_if(is.character, ~ as.numeric(.)) %>% 
  summarize(n = nrow(.),
            age_mean = mean(age) %>% round(0),
            age_sd = sd(age) %>% round(1),
            n_female = sum(gender == "female"),
            time_mean = mean(time) %>% round(2),
            time_sd = sd(time) %>% round(2)) %>% 
  print_table()

n	age_mean	age_sd	n_female	time_mean	time_sd
41	33	10.3	21	9.65	6.58

2.3 Plots

2.3.1 Scatterplot

Relationship between counterfactual judgments and causal judgments.

set.seed(1)

df.plot = df.study.causal.long %>% 
  mutate(rating = abs(rating)) %>% 
  group_by(clip, outcome_actual, outcome_counterfactual,
           index_actual, index_counterfactual) %>% 
  summarize(rating = smean.cl.boot(rating)) %>% 
  mutate(index = c("cause_mean", "cause_low", "cause_high")) %>% 
  ungroup() %>% 
  pivot_wider(names_from = index,
              values_from = rating) %>% 
  left_join(df.study.counterfactual.long %>% 
              mutate(rating = ifelse(outcome_actual == 1, 100 - rating, rating)) %>% 
              group_by(clip, outcome_actual, outcome_counterfactual,
                       index_actual, index_counterfactual) %>% 
              summarize(rating = smean.cl.boot(rating)) %>% 
              mutate(index = c("counterfactual_mean", "counterfactual_low", "counterfactual_high")) %>% 
              pivot_wider(names_from = index,
                          values_from = rating),
            by = c("clip", "outcome_actual", "outcome_counterfactual", "index_actual", "index_counterfactual")) %>% 
  mutate(outcome_actual = as.factor(outcome_actual))

ggplot(data = df.plot,
       mapping = aes(x = counterfactual_mean, 
                     y = cause_mean)) +
  geom_abline(intercept = 0,
              slope = 1,
              linetype = 2) + 
  # geom_smooth(aes(y = estimate,
  #                 ymin = q2_5,
  #                 ymax = q97_5),
  #             stat = "identity",
  #             color = "black") +
  geom_linerange(size = 0.75, 
                 mapping = aes(ymin = cause_low,
                               ymax = cause_high),
                 color = "gray80") +
  geom_linerange(size = 0.75, 
                 mapping = aes(xmin = counterfactual_low,
                               xmax = counterfactual_high),
                 color = "gray80") +
  geom_text_repel(mapping = aes(label = clip,
                                color = outcome_actual), 
                  size = 6,
                  direction = "both",
                  show.legend = F,
                  seed = 1,
                  box.padding = 0.27
                  ) + 
  geom_point(size = 3,
             shape = 21, 
             stroke = 1,
             color = "black",
             mapping = aes(fill = outcome_actual),
             show.legend = F) +
  annotate(geom = "text",
           label = str_c(
             "r = ", cor(df.plot$cause_mean, df.plot$counterfactual_mean) %>%
               round(2) %>%
               as.character() %>%
               str_sub(start = 2),
             "\nRMSE = ", rmse(df.plot$cause_mean, df.plot$counterfactual_mean) %>%
               round(2)),
           x = -2,
           y = 95,
           size = 6,
           hjust = 0) +
  coord_fixed(xlim = c(0, 100),
              ylim = c(0, 100)) +
  labs(x = "counterfactual judgment",
       y = "causal judgment") +
  scale_color_manual(values = c("darkred", "darkgreen")) + 
  scale_fill_manual(values = c("red", "green")) + 
  scale_x_continuous(breaks = seq(0, 100, 25)) +
  scale_y_continuous(breaks = seq(0, 100, 25)) +
  theme(text = element_text(size = 20))

# ggsave("../../figures/plots/study_scatter.pdf",
#        width = 5,
#        height = 5)

3 Experiment 1: One candidate cause

3.1 Clips

3.2 Read in data

df.exp1.causal_first = read.csv("../../data/experiment1_causal_first.csv",
                                header = T,
                                stringsAsFactors = F)
df.exp1.counterfactual_first = read.csv("../../data/experiment1_counterfactual_first.csv",
                                        header = T,
                                        stringsAsFactors = F)

# Information about each clip

df.exp1.clipinfo = tibble(clip = 1:20,
                          outcome_actual = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 
                                             0, 1, 1, 1, 1, 1, 1, 1, 0, 1),
                          outcome_counterfactual = c(0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 
                                                     1, 1, 0, 0, 1, 0, 1, 1, 1, 1),
                          index_actual = c(rep(c("actual miss", 
                                                 "actual close",
                                                 "actual hit"),
                                               each = 6),
                                           rep("practice", 2)),
                          index_counterfactual = c(rep(rep(c("counterfactual miss",
                                                             "counterfactual close",
                                                             "counterfactual hit"),
                                                           each = 2), 
                                                       3),
                                                   rep("practice", 2))) %>% 
  mutate(index_actual = factor(index_actual, levels = c("actual miss", 
                                                        "actual close", 
                                                        "actual hit")),
         index_counterfactual = factor(index_counterfactual, 
                                       levels = c("counterfactual miss",
                                                  "counterfactual close", 
                                                  "counterfactual hit")))

# Structure data 
df.exp1.causal_first.long = df.exp1.causal_first$data %>% 
  enframe() %>% 
  mutate(participant = 1:n()) %>% 
  separate_rows(value, sep = ",") %>% 
  mutate(name = rep(c("clip", "rating"), n()/2),
         order = rep(rep(1:40, each = 2), max(participant)),
         value = as.numeric(value)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  mutate(question = rep(rep(c("causal", "counterfactual"), each = 20),
                        max(participant)),
         condition = "causal_first") %>% 
  left_join(df.exp1.clipinfo,
            by = "clip") %>%
  arrange(participant, question, clip) %>%
  select(participant, question, clip, rating, everything())

df.exp1.counterfactual_first.long = df.exp1.counterfactual_first$data %>% 
  enframe() %>% 
  mutate(participant = 1:n()) %>% 
  separate_rows(value, sep = ",") %>% 
  mutate(name = rep(c("clip", "rating"), n()/2),
         order = rep(rep(1:40, each = 2), max(participant)),
         value = as.numeric(value)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  mutate(question = rep(rep(c("counterfactual", "causal"), each = 20),
                        max(participant)),
         condition = "counterfactual_first") %>% 
  left_join(df.exp1.clipinfo,
            by = "clip") %>%
  arrange(participant, question, clip) %>%
  select(participant, question, clip, rating, everything())

# combine data from both conditions
df.exp1.combined.long = df.exp1.causal_first.long %>%
  bind_rows(df.exp1.counterfactual_first.long %>% 
              mutate(participant = participant + 
                       max(df.exp1.causal_first.long$participant)))

3.2.1 Demographics

df.exp1.counterfactual_first %>% 
  separate(extra,
           into = c("gender", "age", "difficulty", "smoothness", "time", "condition"),
           sep = ",") %>% 
  select(gender:time) %>% 
  bind_rows(df.exp1.causal_first %>% 
  separate(extra,
           into = c("gender", "age", "difficulty", "smoothness", "time", "condition"),
           sep = ",") %>% 
  select(gender:time)) %>% 
  mutate(gender = factor(gender, levels = c(1, 2), labels = c("female", "male"))) %>% 
  mutate_if(is.character, ~ as.numeric(.)) %>% 
  summarize(n = nrow(.),
            age_mean = mean(age) %>% round(0),
            age_sd = sd(age) %>% round(1),
            n_female = sum(gender == "female"),
            time_mean = mean(time) %>% round(2),
            time_sd = sd(time) %>% round(2)) %>% 
  print_table()

n	age_mean	age_sd	n_female	time_mean	time_sd
82	35	12.2	45	21.25	5.11

3.2.2 Participants’ feedback

df.exp1.counterfactual_first %>% 
  select(feedback) %>% 
  bind_rows(df.exp1.causal_first %>% 
              select(feedback)) %>% 
  mutate(participant = 1:n()) %>% 
  relocate(participant) %>% 
  datatable()

3.3 Read in model predictions

3.3.1 Counterfactual condition

# read model predictions 
path = "../python/results/"
files = list.files(path = path, pattern = "*.csv")
files = files[str_detect(files, "teleport")]

df.exp1.model = files %>%
  map_dfr(~ read.csv(str_c(path, .))) %>%
  set_names(c("clip", "noise", "prediction")) %>%
  left_join(df.exp1.clipinfo, by = "clip") %>%
  mutate(prediction = ifelse(outcome_actual == 1, 1 - prediction, prediction))

# calculate mean counterfactual judgments 
df.exp1.counterfactual.means = df.exp1.combined.long %>%
  filter(question == "counterfactual",
         clip <= 18) %>%
  # mutate(rating = abs(rating)) %>% 
  group_by(clip) %>%
  summarize(rating = mean(rating)) %>%
  ungroup() %>%
  left_join(df.exp1.clipinfo, by = "clip")

# find noisy simulation model that best predicts the mean counterfactual judgments by 
# minimizing the sum of squared errors 
df.exp1.counterfactual.model = df.exp1.model %>% 
  group_by(noise) %>% 
  nest() %>% 
  mutate(sse = map(data, 
                   ~ sum((.x$prediction*100 - df.exp1.counterfactual.means$rating) ^ 2)),
         sse = unlist(sse)) %>% 
  ungroup() %>% 
  filter(sse == min(sse)) %>% 
  unnest(data) %>% 
  mutate(prediction = prediction * 100)

3.3.2 Causal condition

# Model predictions based on counterfactual judgments
df.exp1.causal.model = df.exp1.combined.long %>%
  filter(question == "counterfactual",
         clip <= 18) %>%
  group_by(clip, condition) %>%
  summarize(rating = mean(rating)) %>%
  pivot_wider(names_from = condition,
              values_from = rating) %>% 
  left_join(df.exp1.combined.long %>%
              filter(question == "counterfactual", clip <= 18) %>%
              group_by(clip) %>%
              summarize(combined = mean(rating)),
            by = "clip") %>% 
  left_join(df.exp1.counterfactual.model,
            by = "clip") %>% 
  select(-c(sse, noise)) %>% 
  rename(simulation = prediction) %>% 
  mutate(across(c(simulation, causal_first, counterfactual_first, combined), ~ ifelse(outcome_actual == 1, 100 - ., .)))

3.4 Counterfactual condition

3.4.1 Plots

set.seed(1)

df.plot = df.exp1.combined.long %>%
  filter(question == "counterfactual",
         clip <= 18) %>%
  mutate(clipindex = rep(c(1, 2), nrow(.) / 2)) %>%
  left_join(df.exp1.counterfactual.model,
            by = c("clip", 
                   "outcome_actual", 
                   "outcome_counterfactual", 
                   "index_actual", 
                   "index_counterfactual")) %>%
  pivot_longer(cols = c(rating, prediction),
               names_to = "model",
               values_to = "value") %>% 
  mutate(colorindex = ifelse(outcome_counterfactual == 1, 2, 1),
         colorindex = ifelse(model != "rating", 3, colorindex),
         colorindex = as.factor(colorindex),
         index_actual = factor(index_actual,
                               levels = c("actual miss", 
                                          "actual close", 
                                          "actual hit")),
         index_counterfactual = factor(index_counterfactual, 
                                       levels = c("counterfactual miss", 
                                                  "counterfactual close", 
                                                  "counterfactual hit")),
         model = factor(model,
                             levels = c("rating", "prediction")))

df.text = df.plot %>%
  expand(index_actual, index_counterfactual, clipindex, model) %>%
  mutate(label = rep(c("1 (A)", "2 (B)", "3", "4", "5 (A)", "6 (B)",
                       "7", "8 (C)", "9", "10", "11", "12 (C)",
                       "13 (D)", "14 (E)", "15", "16", "17 (D)", "18 (E)"),
                     each = 2),
         label = ifelse(model != "rating", NA, label),
         y = -15,
         x = clipindex,
         colorindex = NA)

p = actual_counterfactual_plot(df.plot, df.text, "counterfactual judgment")
print(p)

# ggsave("../../figures/plots/exp1_counterfactual_bars.pdf",
#        width = 8,
#        height = 6)

3.4.2 Stats

3.4.2.1 Check for potential order effects

# full model that takes condition into account 
fit.brm_exp1_counterfactual_full = brm(formula = rating ~ 1 + condition * 
                                         (index_actual + index_counterfactual) + 
                                         (1 + index_actual + 
                                            index_counterfactual | participant),
                                       data = df.exp1.combined.long %>% 
                                         filter(question == "counterfactual",
                                                clip <= 18),
                                       cores = 4,
                                       chains = 4,
                                       iter = 4000,
                                       seed = 1,
                                       file = "cache/brm_exp1_counterfactual_full")

# reduced model that ignores condition (i.e. the block order)
fit.brm_exp1_counterfactual_reduced = brm(formula = rating ~ 1 +  
                                            index_actual + index_counterfactual + 
                                            (1 + index_actual + 
                                               index_counterfactual | participant),
                                          data = df.exp1.combined.long %>% 
                                            filter(question == "counterfactual",
                                                   clip <= 18),
                                          cores = 4,
                                          chains = 4,
                                          iter = 4000,
                                          seed = 1,
                                          file = "cache/brm_exp1_counterfactual_reduced") 

# add loo 
fit.brm_exp1_counterfactual_reduced = add_criterion(fit.brm_exp1_counterfactual_reduced,
                                                    criterion = c("loo"),
                                                    reloo = T,
                                                    file = "cache/brm_exp1_counterfactual_reduced")

fit.brm_exp1_counterfactual_full = add_criterion(fit.brm_exp1_counterfactual_full,
                                                 criterion = c("loo"),
                                                 reloo = T,
                                                 file = "cache/brm_exp1_counterfactual_full")

# model comparison 
loo_compare(fit.brm_exp1_counterfactual_full, fit.brm_exp1_counterfactual_reduced)

                                    elpd_diff se_diff
fit.brm_exp1_counterfactual_reduced  0.0       0.0   
fit.brm_exp1_counterfactual_full    -3.5       0.7

The reduced model fares better in the approximate cross-validation suggesting that there was no effect of condition (block order) on counterfactual judgments.

3.4.2.2 Model evaluation

df.exp1.counterfactual.means %>% 
  left_join(df.exp1.counterfactual.model,
            by = c("clip",
                   "outcome_actual",
                   "outcome_counterfactual",
                   "index_actual",
                   "index_counterfactual")) %>% 
  summarize(noise = unique(noise),
            simulation_r = cor(rating, prediction),
            simulation_rmse = rmse(rating, prediction),
            deterministic_r = cor(rating, outcome_counterfactual*100),
            deterministic_rmse = rmse(rating, outcome_counterfactual*100)) %>% 
  print_table()

noise	simulation_r	simulation_rmse	deterministic_r	deterministic_rmse
0.9	0.96	13.46	0.86	28.15

3.5 Causal condition

3.5.1 Plots

set.seed(1)

func_exp1_causal_plot = function(condition.name, model.name){

df.plot = df.exp1.combined.long %>%
  filter(question == "causal", clip <= 18) %>%
  filter(condition %in% condition.name) %>%
  mutate(rating = abs(rating),
         clipindex = rep(c(1, 2), nrow(.) / 2)) %>%
  left_join(df.exp1.causal.model,
            by = c("clip", 
                   "outcome_actual", 
                   "outcome_counterfactual", 
                   "index_actual", 
                   "index_counterfactual")) %>%
  pivot_longer(cols = c("rating", all_of(model.name)),
               names_to = "model",
               values_to = "value") %>% 
  mutate(model = factor(model, levels = c("rating", model.name))) %>%
  mutate(colorindex = ifelse(outcome_actual == 1, 2, 1),
         colorindex = ifelse(model != "rating", 3, colorindex),
         colorindex = as.factor(colorindex),
         index_actual = factor(index_actual, 
                               levels = c("actual miss", 
                                          "actual close", 
                                          "actual hit")),
         index_counterfactual = factor(index_counterfactual, 
                                       levels = c("counterfactual miss", 
                                                  "counterfactual close", 
                                                  "counterfactual hit")))

df.text = df.plot %>%
  expand(index_actual, index_counterfactual, clipindex, model) %>%
  mutate(label = rep(c("1 (A)", "2 (B)", "3", "4", "5 (A)", "6 (B)",
                       "7", "8 (C)", "9", "10", "11", "12 (C)",
                       "13 (D)", "14 (E)", "15", "16", "17 (D)", "18 (E)"),
                     each = 2),
         label = ifelse(model != "rating", NA, label),
         y = -15,
         x = clipindex,
         colorindex = NA)

actual_counterfactual_plot(df.plot, df.text, "causal judgment")
}

plot.list = list(func_exp1_causal_plot(condition.name = "counterfactual_first",
                                       model.name = "counterfactual_first"),
                 func_exp1_causal_plot(condition.name = "causal_first",
                                       model.name = "causal_first"))

wrap_plots(plot.list, ncol = 2)

# condition.name = "counterfactual_first"
# condition.name = "causal_first"
# model.name = "counterfactual_first"
# model.name = "causal_first"
# model.name = "combined"

# func_exp1_causal_plot(condition.name = condition.name,
#                       model.name = model.name)
# 
# ggsave(str_c("../../figures/plots/exp1_", condition.name ,"_causal_bars.pdf"),
#        width = 8,
#        height = 6)

Figure 3.1: Left panel: Counterfactual judgments first; right panel: Causal judgments first

3.5.2 Stats

3.5.2.1 Check for potential order effects

# full model that takes condition into account 
fit.brm_exp1_causal_full = brm(formula = rating ~ 1 + condition * 
                                 (index_actual + index_counterfactual) +
                                 (1 + index_actual + index_counterfactual | participant),
                               data = df.exp1.combined.long %>% 
                                 filter(question == "causal",
                                        clip <= 18),
                               cores = 4,
                               chains = 4,
                               iter = 4000,
                               seed = 1,
                               file = "cache/brm_exp1_causal_full")

# reduced model that ignores condition (i.e. the block order)
fit.brm_exp1_causal_reduced = brm(formula = rating ~ 1 + 
                                    index_actual + index_counterfactual +
                                    (1 + index_actual + index_counterfactual | participant),
                                  data = df.exp1.combined.long %>% 
                                    filter(question == "causal",
                                           clip <= 18),
                                  cores = 4,
                                  chains = 4,
                                  iter = 4000,
                                  seed = 1,
                                  file = "cache/brm_exp1_causal_reduced") 

# add loo 
fit.brm_exp1_causal_reduced = add_criterion(fit.brm_exp1_causal_reduced,
                                            criterion = c("loo"),
                                            reloo = T,
                                            file = "cache/brm_exp1_causal_reduced")

fit.brm_exp1_causal_full = add_criterion(fit.brm_exp1_causal_full,
                                         criterion = c("loo"),
                                         reloo = T,
                                         file = "cache/brm_exp1_causal_full")

# model comparison 
loo_compare(fit.brm_exp1_causal_full, fit.brm_exp1_causal_reduced)

                            elpd_diff se_diff
fit.brm_exp1_causal_full     0.0       0.0   
fit.brm_exp1_causal_reduced -3.2       3.8

The full model fares better in the approximate cross-validation suggesting that condition (block order) affected participants’ causal judgments.

3.5.2.2 Model evaluation

df.data = df.exp1.combined.long %>%
  filter(question == "causal",
         clip <= 18) %>%
  mutate(rating = abs(rating)) %>%
  group_by(condition,
           clip,
           outcome_actual,
           outcome_counterfactual,
           index_actual,
           index_counterfactual) %>%
  summarize(rating = mean(rating)) %>%
  ungroup() %>%
  left_join(df.exp1.causal.model,
            by = c("clip",
                   "outcome_actual",
                   "outcome_counterfactual",
                   "index_actual",
                   "index_counterfactual"))

df.data %>% 
  group_by(condition) %>% 
  summarize(empirical_r = cor(rating, combined),
            empirical_rmse = rmse(rating, combined)) %>% 
  print_table()

condition	empirical_r	empirical_rmse
causal_first	0.92	13.78
counterfactual_first	0.99	5.27

df.data %>%
  summarize(empirical_r = cor(rating, combined),
            empirical_rmse = rmse(rating, combined),
            simulation_r = cor(rating, simulation),
            simulation_rmse = rmse(rating, simulation)) %>%
  print_table()

empirical_r	empirical_rmse	simulation_r	simulation_rmse
0.95	10.43	0.92	19.09

4 Experiment 2: Two candidate causes

4.1 Clips

4.2 Read in data

# clipinfo
df.exp2.clipinfo = tibble(clip = 1:32,
                          outcome_both = c(0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 
                                           0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1),
                          outcome_a = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 
                                        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1),
                          outcome_b = c(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 
                                        0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1),
                          outcome_none = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
                          clipindex = rep(1:2, 16))

# COUNTERFACTUAL JUDGMENTS 
df.exp2.counterfactual = read.csv("../../data/experiment2_counterfactual.csv",
                                  header = T,
                                  stringsAsFactors = F)

# demographics
df.exp2.counterfactual.demographics = df.exp2.counterfactual$extra %>% 
  enframe() %>% 
  separate_rows(value) %>% 
  mutate(value = as.numeric(value),
         name = rep(c("gender", "age", "difficulty",
                      "smoothness", "time", "condition"),
                    n()/6),
         participant = rep(1:(n()/6), each = 6)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  mutate(condition = factor(condition, levels = 20:21, labels = c("A", "B")),
         gender = factor(gender, levels = 1:2, labels = c("female", "male"))) %>% 
  select(participant, condition, gender, age, time, smoothness, difficulty)

# judgments
df.exp2.counterfactual.long = df.exp2.counterfactual$data %>%
  enframe() %>% 
  separate_rows(value) %>% 
  mutate(value = as.numeric(value),
         name = rep(c("clip", "rating"), n()/2),
         participant = rep(1:(n()/64), each = 64),
         order = rep(rep(1:32, each = 2), n()/64)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  left_join(df.exp2.counterfactual.demographics %>% 
              select(participant, ball = condition),
            by = "participant") %>% 
  select(participant, ball, clip, everything()) %>% 
  arrange(participant, clip)

# CAUSAL JUDGMENTS
df.exp2.causal = read.csv("../../data/experiment2_causal.csv",
                          header = T,
                          stringsAsFactors = F)

# demographics 
df.exp2.causal.demographics = df.exp2.causal$extra %>% 
  enframe() %>% 
  separate_rows(value) %>% 
  mutate(value = as.numeric(value),
         name = rep(c("gender", "age", "difficulty", "smoothness", "time", 
                      "counterbalance", "replayType", "experiment"),
                    n()/8),
         participant = rep(1:(n()/8), each = 8)) %>% 
  pivot_wider(names_from = name,
              values_from = value) %>% 
  mutate(counterbalance = ifelse(participant %in% c(1, 3, 5, 6, 9), 1, counterbalance),
         gender = factor(gender, levels = 1:2, labels = c("female", "male"))) %>% 
  select(participant, gender, age, counterbalance, time, smoothness, difficulty)

# judgments
df.exp2.causal.long = df.exp2.causal$data %>%
  enframe() %>% 
  separate_rows(value) %>% 
  mutate(value = as.numeric(value),
         name = rep(c("clip", "x", "y", "z", "rating1", "rating2"), n()/6),
         participant = rep(1:(n()/(32*6)), each = (32*6)),
         order = rep(rep(1:32, each = 6), n()/(32*6))) %>% 
  filter(!name %in% c("x", "y", "z")) %>% 
pivot_wider(names_from = name,
            values_from = value) %>% 
  left_join(df.exp2.causal.demographics %>% 
              select(participant, counterbalance),
            by = "participant") %>% 
  mutate(A = ifelse(counterbalance == 1, rating1, rating2),
         B = ifelse(counterbalance == 1, rating2, rating1)) %>% 
  select(-rating1, -rating2) %>% 
  pivot_longer(cols = c(A, B),
               names_to = "ball",
               values_to = "rating") %>% 
  select(participant, clip, ball, rating, order) %>% 
  arrange(participant, clip, ball)

4.2.1 Demographics

# counterfactual condition 
df.exp2.counterfactual.demographics %>% 
  summarize(n = nrow(.),
            age_mean = mean(age) %>% round(0),
            age_sd = sd(age) %>% round(1),
            n_female = sum(gender == "female"),
            time_mean = mean(time) %>% round(2),
            time_sd = sd(time) %>% round(2)) %>% 
  print_table()

n	age_mean	age_sd	n_female	time_mean	time_sd
80	33	10.1	34	18.08	4.63

# causal condition 
df.exp2.causal.demographics %>% 
  summarize(n = nrow(.),
            age_mean = mean(age) %>% round(0),
            age_sd = sd(age) %>% round(1),
            n_female = sum(gender == "female"),
            time_mean = mean(time) %>% round(2),
            time_sd = sd(time) %>% round(2)) %>% 
  print_table()

n	age_mean	age_sd	n_female	time_mean	time_sd
41	34	10.5	21	21.19	4.96

4.2.2 Participants’ feedback

df.exp2.counterfactual %>%
  select(feedback) %>%
  mutate(condition = "counterfactual",
         participant = 1:n()) %>% 
  bind_rows(df.exp2.causal %>%
              select(feedback) %>% 
              mutate(condition = "causal",
                     participant = 1:n())) %>%
  relocate(participant, condition) %>%
  datatable()

4.3 Read in model predictions

# read model predictions 
path = "../python/results/"
files = list.files(path = path, pattern = "*.csv")
files = files[str_detect(files, "3ball")]

df.exp2.model = files %>%
  map_dfr(~ read_csv(str_c(path, .))) %>% 
  rename(clip = trial)

4.3.1 Counterfactual condition

# calculate mean counterfactual judgments 
df.exp2.counterfactual.means = df.exp2.counterfactual.long %>% 
  group_by(clip, ball) %>%
  summarize(rating_mean = mean(rating),
            rating_low = smean.cl.boot(rating)[2],
            rating_high = smean.cl.boot(rating)[3]) %>% 
  ungroup()

# find noisy simulation model that best predicts the mean counterfactual 
# judgments by minimizing the sum of squared errors 
df.exp2.model.counterfactual = df.exp2.model %>% 
  group_by(noise) %>% 
  select(clip, contains("whether"), noise) %>% 
  pivot_longer(cols = c(A_whether, B_whether),
               names_to = "ball",
               values_to = "prediction") %>% 
  mutate(ball = str_remove(ball, "_whether")) %>% 
  arrange(noise, clip, ball) %>% 
  left_join(df.exp2.clipinfo, by = "clip") %>% 
  mutate(prediction = ifelse(outcome_both == 1, 1 - prediction, prediction)) %>% 
  group_by(noise) %>% 
  nest() %>% 
  mutate(sse = map(data,
                   ~ sum((.x$prediction*100 -
                            df.exp2.counterfactual.means$rating_mean) ^ 2)),
         sse = unlist(sse)) %>% 
  ungroup() %>% 
  filter(sse == min(sse)) %>% 
  unnest(data) %>% 
  mutate(prediction = prediction * 100)

4.3.2 Causal condition

# calculate mean causal judgments 
df.exp2.causal.means = df.exp2.causal.long %>% 
  group_by(clip, ball) %>%
  summarize(rating_mean = mean(rating),
            rating_low = smean.cl.boot(rating)[2],
            rating_high = smean.cl.boot(rating)[3]) %>% 
  ungroup()

df.exp2.model.causal = df.exp2.model %>% 
  # take into account difference-making 
  mutate(across(A_how:A_robust,
                ~ . * A_difference),
         across(B_how:B_robust,
                ~ . * B_difference)) %>% 
  # choose model based on best fit with counterfactual data 
  filter(noise == unique(df.exp2.model.counterfactual$noise)) %>% 
  pivot_longer(cols = A_difference:B_robust,
               names_to = c("ball", "aspect"),
               names_sep = "_",
               values_to = "value") %>% 
  pivot_wider(names_from = aspect,
              values_from = value) %>% 
  select(clip, ball:robust)

4.3.3 Heuristic model

l.features = fromJSON("data/features.json")

df.features.initial = l.features[["appearance"]] %>%
  cbind(l.features[["initialVelocity"]][["ballA"]]) %>%
  cbind(l.features[["initialVelocity"]][["ballB"]]) %>%
  cbind(l.features[["initialVelocity"]][["ballE"]]) %>%
  setNames(c(str_c("ball", LETTERS[c(1, 2, 5)], "_appearance"),
             "ballA_velx",
             "ballA_vely",
             "ballB_velx",
             "ballB_vely",
             "ballE_velx",
             "ballE_vely")) %>%
  mutate(clip = 1:nrow(.)) %>%
  pivot_longer(cols = starts_with("ball")) %>%
  separate(name, into = c("ball", "property")) %>%
  pivot_wider(names_from = property, values_from = value) %>% 
  mutate(ball = str_remove_all(ball, pattern = "ball"))

# information about collisions
df.features.collisions = data.frame()
ballnames = c("ballA", "ballB", "ballE")

for (i in 1:32) {
  df.tmp = data.frame()
  for (j in 1:length(ballnames)) {
    ncollisions = length(l.features[["collisions"]][[ballnames[j]]][["object"]][[i]])
    if (ncollisions > 0) {
      tmp = data.frame(
        ball = ballnames[j],
        object = l.features[["collisions"]][[ballnames[j]]][["object"]][[i]],
        time = l.features[["collisions"]][[ballnames[j]]][["time"]][[i]],
        pre.velx = l.features[["collisions"]][[ballnames[j]]][["pre"]][[i]][["x"]],
        pre.vely = l.features[["collisions"]][[ballnames[j]]][["pre"]][[i]][["y"]],
        post.velx = l.features[["collisions"]][[ballnames[j]]][["post"]][[i]][["x"]],
        post.vely = l.features[["collisions"]][[ballnames[j]]][["post"]][[i]][["y"]],
        ncollision = 1:ncollisions
      )
      df.tmp = df.tmp %>%
        rbind(tmp)
    }
  }
  df.features.collisions = df.features.collisions %>%
    rbind(df.tmp %>%
            mutate(clip = i) %>%
            select(clip, ball, everything()))
}

# find end of clip
tmp = df.features.collisions %>%
  group_by(clip) %>%
  filter(ball == "ballE",
         str_detect(object, "goal|Left|Right")) %>%
  group_by(clip) %>%
  mutate(endclip = max(time)) %>%
  select(clip, endclip) %>%
  ungroup() %>%
  distinct()

# remove events that happen after the end of the clip
df.features.collisions = df.features.collisions %>%
  left_join(tmp, by = "clip") %>%
  mutate(endclip = ifelse(is.na(endclip), 400, endclip)) %>%
  group_by(clip) %>%
  filter(time <= endclip)

# E initially at rest or moving?
tmp.movingE = df.features.initial %>%
  filter(ball == "E") %>%
  mutate(E.moving = ifelse(velx == 0 & vely == 0, 1, 0)) %>%
  select(clip, E.moving)

# contact with ball E
tmp.contactE = df.features.collisions %>%
  filter(object == "ballE") %>%
  mutate(ball = factor(ball,
                       levels = c("ballA", "ballB"),
                       labels = c("A", "B"))) %>%
  mutate(contactE = 1) %>%
  select(clip, ball, contactE) %>%
  group_by(clip, ball) %>%
  summarize(contactE = any(contactE %>% as.logical()) * 1) %>%
  left_join(expand.grid(clip = 1:32, ball = c("A", "B")), .,
            by = c("clip", "ball")) %>%
  mutate(contactE = ifelse(is.na(contactE), 0, contactE))

# number of collisions
tmp.ncollisions = df.features.collisions %>%
  filter(str_detect(object, "ball"),
         ball != "ballE") %>%
  mutate(ball = factor(ball,
                       levels = c("ballA", "ballB"),
                       labels = c("A", "B"))) %>%
  group_by(clip, ball) %>%
  count() %>%
  rename(ncollision = n)

# exclusivity (no contact with ball E by the other ball)
tmp.exclusive = df.features.collisions %>%
  filter(str_detect(object, "ball"),
         ball != "ballE") %>%
  count(clip, ball, object) %>%
  mutate(AE = ifelse(ball == "ballA" & object == "ballE", 1, 0),
         BE = ifelse(ball == "ballB" & object == "ballE", 1, 0)) %>%
  group_by(clip) %>%
  summarize(AE = any(AE %>% as.logical()) * 1,
            BE = any(BE %>% as.logical()) * 1) %>%
  mutate(A = ifelse(AE == 1 & BE == 0, 1, 0),
         B = ifelse(AE == 0 & BE == 1, 1, 0)) %>%
  select(clip, A, B) %>%
  pivot_longer(cols = -clip,
               names_to = "ball",
               values_to = "exclusive")

# impact
func_angle = function(x, y) {
  dot.prod = x %*% y
  norm.x = norm(x, type = "2")
  norm.y = norm(y, type = "2")
  theta = acos(dot.prod / (norm.x * norm.y))
  as.numeric(theta)
}

# impact on ball E 
tmp.impactE = df.features.collisions %>%
  rowwise() %>%
  filter(str_detect(object, "ball"),
         ball == "ballE") %>%
  mutate(pre.speed = abs(pre.velx) + abs(pre.vely),
         post.speed = abs(post.velx) + abs(post.vely),
         speed.diff = post.speed - pre.speed,
         direction.diff = func_angle(c(pre.velx, pre.vely),
                                     c(post.velx, post.vely)),
         direction.diff = ifelse(is.na(direction.diff),
                                 abs(atan2(post.vely - pre.vely,
                                           post.velx - pre.velx)),
                                 direction.diff)) %>%
  ungroup() %>%
  group_by(clip, object) %>%
  summarize(speed.diff = sum(speed.diff),
            direction.diff = sum(direction.diff)) %>%
  rename(ball = object) %>%
  mutate(ball = factor(ball,
                       levels = c("ballA", "ballB"),
                       labels = c("A", "B"))) %>%
  left_join(expand.grid(clip = 1:32, ball = c("A", "B")), .,
            by = c("clip", "ball")) %>%
  mutate(across(c(speed.diff, direction.diff),
                   ~ ifelse(is.na(.), 0, .))) %>%
  arrange(clip, ball) %>%
  rename(E.speed.diff = speed.diff,
         E.direction.diff = direction.diff)

# total impact 
tmp.impactTotal = df.features.collisions %>%
  rowwise() %>%
  filter(str_detect(object, "ballA|ballB")) %>%
  mutate(pre.speed = abs(pre.velx) + abs(pre.vely),
         post.speed = abs(post.velx) + abs(post.vely),
         speed.diff = post.speed - pre.speed,
         direction.diff = func_angle(c(pre.velx, pre.vely),
                                     c(post.velx, post.vely)),
         direction.diff = ifelse(is.na(direction.diff),
                                 abs(atan2(post.vely - pre.vely,
                                           post.velx - pre.velx)),
                                 direction.diff)) %>%
  ungroup() %>%
  group_by(clip, object) %>%
  summarize(speed.diff = sum(speed.diff),
            direction.diff = sum(direction.diff)) %>%
  rename(ball = object) %>%
  mutate(ball = factor(ball,
                       levels = c("ballA", "ballB"),
                       labels = c("A", "B"))) %>%
  left_join(expand.grid(clip = 1:32, ball = c("A", "B")), .,
            by = c("clip", "ball")) %>%
  mutate(across(c(speed.diff, direction.diff),
                ~ ifelse(is.na(.), 0, .))) %>%
  arrange(clip, ball) %>%
  rename(total.speed.diff = speed.diff,
         total.direction.diff = direction.diff)

# transfer of force 
tmp.transfer = df.features.collisions %>%
  filter(str_detect(ball, "ballA|ballB"),
         str_detect(object, "ball")) %>%
  mutate(AE = ifelse(ball == "ballA" & object == "ballE", 1, NA),
         BE = ifelse(ball == "ballB" & object == "ballE", 1, NA),
         AB = ifelse((ball == "ballA" & object == "ballB"), 1, NA)) %>%
  mutate(across(c(AE, BE, AB), ~ . * time)) %>%
  filter(!is.na(AE) | !is.na(BE) | !is.na(AB)) %>%
  arrange(clip, time) %>%
  group_by(clip) %>%
  summarize(A = any(!is.na(AE)),
            B = any(!is.na(BE)),
            A = ifelse(any(!is.na(AB)) &
                         any(!is.na(BE)) &
                         max(AB, na.rm = T) < min(BE, na.rm = T),
                       T,
                       A),
            B = ifelse(any(!is.na(AB)) &
                         any(!is.na(AE)) &
                         max(AB, na.rm = T) < min(AE, na.rm = T),
                       T,
                       B)) %>%
  pivot_longer(cols = -clip,
               names_to = "ball",
               values_to = "transfer") %>% 
  arrange(clip, ball)

# collect features
df.features = df.features.initial %>%
  filter(ball != "E") %>%
  mutate(ball = factor(ball)) %>%
  mutate(moving = ifelse(velx == 0 & vely == 0, 0, 1),
         speed = abs(velx) + abs(vely)) %>%
  left_join(tmp.contactE) %>%
  left_join(tmp.impactE) %>%
  left_join(tmp.impactTotal) %>%
  left_join(tmp.transfer %>% 
              mutate(transfer = transfer * 1)) %>%
  left_join(tmp.movingE) %>%
  left_join(tmp.exclusive) %>%
  select(-c(appearance, velx, vely))

# remove temporary variables
rm(list = ls()[str_detect(ls(), "tmp")])

4.4 Counterfactual condition

4.4.1 Stats

4.4.1.1 Model evaluation

# best fitting model 
df.exp2.counterfactual.means %>% 
  left_join(df.exp2.model.counterfactual,
            by = c("clip", "ball")) %>% 
  summarize(noise = unique(noise),
            simulation_r = cor(rating_mean, prediction),
            simulation_rmse = rmse(rating_mean, prediction)) %>% 
  print_table()

noise	simulation_r	simulation_rmse
2	0.86	19.84

# deterministic model 
df.exp2.counterfactual.means %>% 
  left_join(df.exp2.clipinfo %>% 
              select(clip, outcome_a, outcome_b) %>% 
              pivot_longer(cols = -clip,
                           names_to = "ball",
                           values_to = "outcome") %>% 
              mutate(ball = factor(ball,
                                   levels = c("outcome_a", "outcome_b"),
                                   labels = c("B", "A"))),
            by = c("clip", "ball")) %>% 
  mutate(outcome = outcome * 100) %>% 
  summarize(simulation_r = cor(rating_mean, outcome),
            simulation_rmse = rmse(rating_mean, outcome)) %>% 
  print_table()

simulation_r	simulation_rmse
0.83	30.28

4.4.2 Plots

4.4.2.1 Bar graph

set.seed(1)

df.plot = df.exp2.counterfactual.long %>%
  left_join(df.exp2.model.counterfactual %>%
              select(clip, ball, prediction) %>%
              mutate(ball = as.factor(ball)),
            by = c("clip", "ball")) %>%
  left_join(df.exp2.clipinfo,
            by = "clip") %>% 
  pivot_longer(cols = c(rating, prediction),
               names_to = "model",
               values_to = "value") %>% 
  mutate(colorindex = 1,
         colorindex = ifelse(model == "rating" & ball == "A" & outcome_b == 1,
                             2,
                             colorindex),
         colorindex = ifelse(model == "rating" & ball == "B" & outcome_a == 1,
                             2,
                             colorindex),
         colorindex = ifelse(model == "prediction", 3, colorindex),
         colorindex = as.factor(colorindex),
         model = factor(model, levels = c("rating", "prediction"))) %>%
  arrange(participant, clip, ball)

df.text = df.plot %>%
  expand(ball, clip, model) %>%
  mutate(label = ifelse(model == "rating" & ball == "A", clip, NA),
         y = 110,
         x = 1.5,
         colorindex = NA)

p = actual_counterfactual_threeballs_plot(df.plot, "counterfactual judgment")
print(p)

# ggsave("../../figures/plots/exp2_counterfactual_bars.pdf",
#        width = 12,
#        height = 6)

4.5 Causal condition

4.5.1 Stats

4.5.1.1 Bayesian mixed effects model

df.data = df.exp2.causal.long %>% 
  left_join(df.exp2.model.causal,
            by = c("clip", "ball"))

fit.brm_exp2_causal_w = brm(formula = rating ~ 1 + whether + 
                              (1 + whether | participant),
                            data = df.data,
                            cores = 4,
                            chains = 4,
                            iter = 4000,
                            seed = 1,
                            file = "cache/brm_exp2_causal_w")

fit.brm_exp2_causal_wh = brm(formula = rating ~ 1 + whether + how +
                               (1 + whether + how | participant),
                             data = df.data,
                             cores = 4,
                             chains = 4,
                             iter = 4000,
                             seed = 1,
                             file = "cache/brm_exp2_causal_wh")

fit.brm_exp2_causal_whs = brm(formula = rating ~ 1 + whether + how + sufficient +
                                (1 + whether + how + sufficient | participant),
                              data = df.data,
                              cores = 4,
                              chains = 4,
                              iter = 4000,
                              seed = 1,
                              file = "cache/brm_exp2_causal_whs")

4.5.1.2 Model comparison

fit.brm_exp2_causal_w = add_criterion(fit.brm_exp2_causal_w,
                                      criterion = c("loo"),
                                      reloo = T,
                                      file = "cache/brm_exp2_causal_w")

fit.brm_exp2_causal_wh = add_criterion(fit.brm_exp2_causal_wh,
                                       criterion = c("loo"),
                                       reloo = T,
                                       file = "cache/brm_exp2_causal_wh")

fit.brm_exp2_causal_whs = add_criterion(fit.brm_exp2_causal_whs,
                                        criterion = c("loo"),
                                        reloo = T,
                                        file = "cache/brm_exp2_causal_whs")

loo_compare(fit.brm_exp2_causal_w,
            fit.brm_exp2_causal_wh,
            fit.brm_exp2_causal_whs)

                        elpd_diff se_diff
fit.brm_exp2_causal_whs    0.0       0.0 
fit.brm_exp2_causal_wh  -107.0      14.7 
fit.brm_exp2_causal_w   -383.5      30.1

4.5.1.3 Heuristic model

# Regression based on features
df.regression.features = df.exp2.causal.long %>%
  left_join(df.exp2.clipinfo, by = "clip") %>% 
  left_join(df.features %>% 
              mutate(across(moving:exclusive, ~ scale(.)[,])),
            by = c("clip", "ball")) %>%
  clean_names() %>% 
  mutate(e_moving = 1 - e_moving)

# restrict the weights such that all predictors are positive
priors = c(set_prior("normal(0,10)", class = "b", lb = 0))

fit.brm_exp2_causal_heuristic = brm(formula = rating ~ moving + 
                                      speed + 
                                      contact_e + 
                                      e_speed_diff + 
                                      e_direction_diff + 
                                      total_speed_diff + 
                                      total_direction_diff + 
                                      transfer + 
                                      e_moving + 
                                      exclusive + (1 | participant),
                                    prior = priors,
                                    data = df.regression.features %>% 
                                      select(-c(clip, ball, order, clipindex,
                                                contains("outcome"))),
                                    cores = 4,
                                    chains = 4,
                                    iter = 4000,
                                    seed = 1,
                                    file = "cache/brm_exp2_causal_heuristic")
fit.brm_exp2_causal_heuristic

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: rating ~ moving + speed + contact_e + e_speed_diff + e_direction_diff + total_speed_diff + total_direction_diff + transfer + e_moving + exclusive + (1 | participant) 
   Data: df.regression.features %>% select(-c(clip, ball, o (Number of observations: 2624) 
Samples: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
         total post-warmup samples = 8000

Group-Level Effects: 
~participant (Number of levels: 41) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     8.57      1.23     6.42    11.25 1.00     2557     4356

Population-Level Effects: 
                     Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
Intercept               49.73      1.48    46.87    52.70 1.00     2235
moving                   0.22      0.21     0.00     0.79 1.00     7506
speed                    2.08      0.84     0.45     3.73 1.00     3648
contact_e                0.38      0.36     0.01     1.35 1.00     7037
e_speed_diff             0.12      0.12     0.00     0.46 1.00     8369
e_direction_diff         1.06      0.73     0.06     2.76 1.00     5417
total_speed_diff         2.19      0.95     0.42     4.09 1.00     3860
total_direction_diff     3.99      0.90     2.21     5.69 1.00     5958
transfer                15.59      0.80    13.98    17.14 1.00     7961
e_moving                 0.18      0.18     0.00     0.65 1.00     8827
exclusive                4.38      0.71     3.00     5.79 1.00     9485
                     Tail_ESS
Intercept                3643
moving                   3506
speed                    2114
contact_e                4269
e_speed_diff             4016
e_direction_diff         4198
total_speed_diff         2781
total_direction_diff     4094
transfer                 5368
e_moving                 4555
exclusive                4992

Family Specific Parameters: 
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma    33.20      0.46    32.32    34.12 1.00    13320     5610

Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

4.5.1.4 Predictions for mean causal judgments

func_fit_data = function(fit){
  fit %>% 
    fitted(newdata = df.exp2.model.causal %>% 
             left_join(df.features %>% 
                         mutate(across(moving:exclusive,
                                       ~ scale(.)[,])),
                       by = c("clip", "ball")) %>%
             clean_names() %>% 
             mutate(e_moving = 1 - e_moving),
           re_formula = NA) %>% 
    as_tibble() %>% 
    pull(Estimate)
}

df.exp2.causal.regression = df.exp2.causal.means %>% 
  mutate(w = func_fit_data(fit.brm_exp2_causal_w),
         wh = func_fit_data(fit.brm_exp2_causal_wh),
         whs = func_fit_data(fit.brm_exp2_causal_whs),
         heuristic = func_fit_data(fit.brm_exp2_causal_heuristic))

4.5.1.5 Model evaluation

prediction = "whs"

df.exp2.causal.regression %>% 
  summarize(simulation_r = cor(rating_mean, .[[prediction]]),
            simulation_rmse = rmse(rating_mean, .[[prediction]])) %>% 
  print_table()

simulation_r	simulation_rmse
0.87	13.06

4.5.1.6 Individual participant regressions

This code chunk takes a long time to compute but needs to be run once to generate the .RData file with individual data fits. The file was larger than 100mb so we couldn’t upload it to github.

df.fit = df.exp2.causal.long %>% 
  left_join(df.exp2.model.causal,
            by = c("clip", "ball"))

# priors 
priors = c(set_prior("normal(0,10)", class = "b", lb = 0))

# initial model fits (used for compilation)
fit.brm_exp2_causal_individual_baseline = 
  brm(formula = rating ~ 1,
      data = df.fit %>% 
        filter(participant == 1),
      cores = 4,
      chains = 4,
      iter = 4000,
      seed = 1,
      file = str_c("cache/brm_exp2_causal_individual_baseline"))

fit.brm_exp2_causal_individual_w = 
  brm(formula = rating ~ 1 + whether,
      data = df.fit %>% 
        filter(participant == 1),
      prior = priors,
      cores = 4,
      chains = 4,
      iter = 4000,
      seed = 1,
      file = str_c("cache/brm_exp2_causal_individual_w"))

fit.brm_exp2_causal_individual_wh = 
  brm(formula = rating ~ 1 + whether + how,
      data = df.fit %>% 
        filter(participant == 1),
      prior = priors,
      cores = 4,
      chains = 4,
      iter = 4000,
      seed = 1,
      file = str_c("cache/brm_exp2_causal_individual_wh"))

fit.brm_exp2_causal_individual_whs = 
  brm(formula = rating ~ 1 + whether + how + sufficient,
      data = df.fit %>% 
        filter(participant == 1),
      prior = priors,
      cores = 4,
      chains = 4,
      iter = 4000,
      seed = 1,
      file = str_c("cache/brm_exp2_causal_individual_whs"))

# update model fits for different participants
df.exp2.causal.individual_fit = df.fit %>%
  group_by(participant) %>%
  nest() %>%
  ungroup() %>% 
  # fit model for each participant
  mutate(fit_baseline = map(.x = data,
                            .f = ~ update(fit.brm_exp2_causal_individual_baseline,
                                          newdata = .x)),
         fit_w = map(.x = data,
                     .f = ~ update(fit.brm_exp2_causal_individual_w,
                                   newdata = .x)),
         fit_wh = map(.x = data,
                      .f = ~ update(fit.brm_exp2_causal_individual_wh,
                                    newdata = .x)),
         fit_whs = map(.x = data,
                       .f = ~ update(fit.brm_exp2_causal_individual_whs,
                                     newdata = .x))) %>% 
  mutate(fit_baseline = map(.x = fit_baseline,
                            ~ add_criterion(.x,
                                            criterion = c("loo"))),
         fit_w = map(.x = fit_w,
                     ~ add_criterion(.x,
                                     criterion = c("loo"))),
         fit_wh = map(.x = fit_wh,
                      ~ add_criterion(.x,
                                      criterion = c("loo"))),
         fit_whs = map(.x = fit_whs,
                       ~ add_criterion(.x,
                                       criterion = c("loo"))),
         r_w = map2_dbl(.x = data,
                        .y = fit_w,
                        .f = ~ cor(.x$rating, .y %>% 
                                     fitted() %>% 
                                     .[, 1])),
         r_wh = map2_dbl(.x = data,
                         .y = fit_wh,
                         .f = ~ cor(.x$rating, .y %>% 
                                      fitted() %>% 
                                      .[, 1])),
         r_whs = map2_dbl(.x = data,
                          .y = fit_whs,
                          .f = ~ cor(.x$rating, .y %>% 
                                       fitted() %>% 
                                       .[, 1])),
         rmse_w = map2_dbl(.x = data,
                           .y = fit_w,
                           .f = ~ rmse(.x$rating, .y %>% 
                                         fitted() %>% 
                                         .[, 1])),
         rmse_wh = map2_dbl(.x = data,
                            .y = fit_wh,
                            .f = ~ rmse(.x$rating, .y %>% 
                                          fitted() %>% 
                                          .[, 1])),
         rmse_whs = map2_dbl(.x = data,
                             .y = fit_whs,
                             .f = ~ rmse(.x$rating, .y %>% 
                                           fitted() %>% 
                                           .[, 1])),
         model_comparison = pmap(.l = list(baseline = fit_baseline,
                                           w = fit_w,
                                           wh = fit_wh,
                                           whs = fit_whs),
                                 .f = ~ loo_compare(..1, ..2, ..3, ..4)),
         best_model = map_chr(.x = model_comparison,
                              .f = ~ rownames(.) %>% .[1]),
         best_model = factor(best_model,
                             levels = c("..1", "..2", "..3", "..4"),
                             labels = c("baseline", "w", "wh", "whs")))

# save(list = c("df.exp2.causal.individual_fit"),
#      file = "data/exp2_causal_individual_fit.RData")

4.5.1.6.1 Load individual participant regressions

load("data/exp2_causal_individual_fit.RData")

# count how many participants are best fit by the different models
df.exp2.causal.individual_fit %>% 
  count(best_model) %>% 
  print_table()

best_model	n
wh	2
whs	39

4.5.1.6.2 Model performance on individual participant level

df.exp2.causal.individual_fit %>% 
  select(participant, contains("r_"), contains("rmse_")) %>% 
  pivot_longer(cols = -participant,
               names_to = c("measure", "model"),
               names_sep = "_",
               values_to = "fit") %>% 
  group_by(model, measure) %>% 
  summarize(quantiles = quantile(fit, probs = c(0.05, 0.5, 0.95)),
            prob = c(0.05, 0.5, 0.95)) %>% 
  pivot_wider(names_from = prob,
              values_from = quantiles) %>% 
  print_table()

model	measure	0.05	0.5	0.95
w	r	0.22	0.43	0.60
w	rmse	29.19	36.18	43.73
wh	r	0.37	0.60	0.78
wh	rmse	23.70	32.54	40.70
whs	r	0.40	0.64	0.79
whs	rmse	22.50	31.20	39.08

4.5.1.6.3 Clusters of individual participants’ responses

set.seed(1)

df.cluster_whs = fit.brm_exp2_causal_whs %>% 
  ranef() %>% 
  .$participant %>% 
  as_tibble() %>% 
  select(contains("Estimate")) %>% 
  set_names(tolower(str_replace_all(names(.), "Estimate.", ""))) %>% 
  mutate(participant = 1:n()) %>% 
  relocate(participant)

df.cluster_whs = df.cluster_whs %>% 
  mutate(cluster = df.cluster_whs %>%
           select(-participant) %>%
           kmeans(centers = 2) %>% 
           .$cluster)

df.cluster_whs %>% 
  print_table()

participant	intercept	whether	how	sufficient	cluster
1	7.76	0.99	-4.99	-0.78	1
2	-0.47	-20.70	31.94	-17.47	2
3	2.23	-9.06	26.55	-9.04	2
4	-0.15	5.35	-0.65	6.33	1
5	-0.62	0.85	-4.67	-1.25	1
6	4.23	-14.57	21.17	-14.30	2
7	-0.19	2.73	-3.75	0.86	1
8	-3.21	-3.78	5.72	-1.50	2
9	-3.89	-6.15	9.73	-2.84	2
10	1.32	-8.35	2.15	-7.56	2
11	2.29	8.96	-11.31	5.78	1
12	-1.37	14.00	-23.75	13.86	1
13	3.32	4.86	0.91	1.48	1
14	2.78	5.28	-11.98	2.23	1
15	-5.80	-9.62	-7.03	-4.98	1
16	0.34	22.21	-15.58	19.53	1
17	0.12	-3.30	2.25	-2.35	1
18	2.59	0.71	-10.39	-1.16	1
19	-2.47	-7.50	3.86	-6.12	2
20	8.77	19.87	-10.31	13.20	1
21	-1.59	0.68	-12.00	1.17	1
22	-4.81	0.01	-3.14	0.99	1
23	-0.48	-0.22	-5.95	1.69	1
24	-0.35	1.04	0.46	1.87	1
25	3.11	13.59	-12.70	8.57	1
26	1.19	12.95	1.25	9.67	1
27	-3.52	12.99	-17.44	15.89	1
28	1.56	-6.75	-2.51	-7.44	2
29	-0.22	7.39	-4.79	5.92	1
30	-4.45	-20.63	17.27	-15.29	2
31	-5.86	6.61	-12.18	10.19	1
32	-1.23	0.77	4.39	4.61	1
33	3.66	13.23	-9.56	10.28	1
34	-4.52	-9.39	6.97	-5.32	2
35	-0.67	-13.75	-1.43	-13.16	2
36	-2.11	-27.38	46.11	-23.71	2
37	10.67	13.82	1.17	5.96	1
38	1.84	8.24	-5.49	5.76	1
39	-5.80	-10.51	7.51	-9.68	2
40	-6.06	-6.80	7.55	-4.07	2
41	1.76	-2.14	-1.20	-0.79	1

# cluster sizes
df.cluster_whs %>% 
  count(cluster)

# A tibble: 2 x 2
  cluster     n
*   <int> <int>
1       1    27
2       2    14

4.5.1.6.3.1 Check that clustering is stable

A 2-cluster solution yields a good result according to a number of different validation measures (see here for more details on the different measures).

tmp = df.cluster_whs %>% 
  select(-participant, -cluster)

fit = clValid(obj = as.matrix(tmp),
        nClust = 2:10,
        clMethods = c("kmeans"),
        validation = c("internal", "stability"))

Warning in clValid(obj = as.matrix(tmp), nClust = 2:10, clMethods =
c("kmeans"), : rownames for data not specified, using 1:nrow(data)

fit %>% 
  summary()


Clustering Methods:
 kmeans 

Cluster sizes:
 2 3 4 5 6 7 8 9 10 

Validation Measures:
                           2       3       4       5       6       7       8       9      10
                                                                                            
kmeans APN            0.1176  0.1414  0.2080  0.1219  0.1897  0.2723  0.3262  0.2900  0.2716
       AD            18.7780 14.1819 13.5064 10.7858 10.4043 10.2673 10.1356  9.3995  8.7734
       ADM            4.4409  3.5882  4.1361  2.2932  3.4262  4.3009  4.8524  4.6046  4.5736
       FOM            7.5680  6.2550  6.1193  5.4051  5.2029  5.1031  5.3320  5.1198  4.8700
       Connectivity   8.0238 14.7222 17.0556 24.7833 28.7302 32.9802 33.8135 41.5984 48.9571
       Dunn           0.0931  0.1915  0.2054  0.1882  0.2138  0.2138  0.2138  0.2472  0.2560
       Silhouette     0.5150  0.4096  0.3791  0.3538  0.3357  0.3145  0.3087  0.2843  0.2777

Optimal Scores:

             Score  Method Clusters
APN          0.1176 kmeans 2       
AD           8.7734 kmeans 10      
ADM          2.2932 kmeans 5       
FOM          4.8700 kmeans 10      
Connectivity 8.0238 kmeans 2       
Dunn         0.2560 kmeans 10      
Silhouette   0.5150 kmeans 2

4.5.2 Plots

4.5.2.1 Bar graph

set.seed(1)
model_index = "whs"

# model predictions 
model_prediction = list(fit.brm_exp2_causal_w,
                        fit.brm_exp2_causal_wh,
                        fit.brm_exp2_causal_whs) %>%
  map_dfr(~ fitted(., df.exp2.causal.means %>% 
                     left_join(df.exp2.model.causal,
                               by = c("clip", "ball")),
                   re_formula = NA) %>% 
            as_tibble()) %>% 
  mutate(ball = rep(c("A", "B"), n()/2),
         clip = rep(rep(1:32, each = 2), 3),
         model = rep(c("w", "wh", "whs"),
                     each = 64))

df.plot = df.exp2.causal.long %>%
  left_join(df.exp2.clipinfo %>% 
              select(clip, outcome_both),
            by = c("clip")) %>% 
  left_join(model_prediction %>% 
              filter(model == model_index) %>% 
              select(model = Estimate, clip, ball),
            by = c("clip", "ball")) %>% 
  pivot_longer(cols = c(rating, model),
               names_to = "model",
               values_to = "value") %>% 
  mutate(colorindex = 1,
         colorindex = ifelse(model == "rating" &
                               outcome_both == 0, 1, colorindex),
         colorindex = ifelse(model == "rating" &
                               outcome_both == 1, 2, colorindex),
         colorindex = ifelse(model == "model", 3, colorindex),
         colorindex = as.factor(colorindex),
         model = factor(model, levels = c("rating", "model"))) %>%
  arrange(participant, clip, ball)

df.text = df.plot %>%
  expand(ball, clip, model) %>% 
  mutate(label = ifelse(model == "rating" & ball == "A", clip, NA),
         y = 110,
         x = 1.5,
         colorindex = NA)

p = actual_counterfactual_threeballs_plot(df.plot,"causal responsibility")
print(p)

# ggsave("../../figures/plots/exp2_causal_bars.pdf",
#        width = 12,
#        height = 6)

4.5.2.2 Selection

set.seed(1)

check = "\u2713"
cross = "\u2717"

x_labels = c(str_c("\nW ", cross, "\nH ", check, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", cross, "\nS ", cross),
             str_c("\nW ", check, "\nH ", cross, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", cross),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", cross, "\nS ", cross))
  

df.plot = df.exp2.causal.long %>%
  filter(clip %in% c(3, 7, 15, 16, 23)) %>%
  group_by(clip, ball) %>%
  summarize(mean = mean(rating),
            low = smean.cl.boot(rating)[2],
            high = smean.cl.boot(rating)[3]) %>%
  left_join(df.exp2.causal.regression %>% select(clip, ball, w, wh, whs),
            by = c("clip", "ball")) %>%
  ungroup() %>%
  pivot_longer(cols = c(mean, w, wh, whs),
               names_to = "index",
               values_to = "value") %>% 
  mutate(across(c(low, high),
                ~ ifelse(index == "mean", ., NA))) %>%
  mutate(index = factor(index, levels = c("mean", "w", "wh", "whs")),
         clip = factor(clip, levels = c(7, 23, 3, 15, 16),
                       labels = c("causal chain",
                                  "double prevention",
                                  "joint causation",
                                  "overdetermination",
                                  "preemption")))

df.labels = df.plot %>% 
  distinct(clip, ball) %>% 
  arrange(clip, ball) %>% 
  mutate(labels = x_labels,
         value = -10,
         index = NA)

func_load_image = function(clip){
  readPNG(str_c("../../figures/diagrams/exp2_clip", clip, ".png"))
}

df.clips = df.plot %>% 
  distinct(clip) %>% 
  arrange(clip) %>% 
  mutate(number = c(7, 23, 3, 15, 16),
         grob = map(.x = number, .f = ~ func_load_image(clip = .x)),
         index = NA,
         value = NA,
         ball = NA,
         label = str_c("clip ", number))

ball_a = readPNG("../../figures/diagrams/ballA.png")
ball_a = rasterGrob(ball_a, interpolate = TRUE)

ball_b = readPNG("../../figures/diagrams/ballB.png")
ball_b = rasterGrob(ball_b, interpolate = TRUE)

p = ggplot(data = df.plot, 
       mapping = aes(x = ball,
                     y = value,
                     group = index,
                     fill = index)) +
  geom_bar(stat = "identity", color = "black",
           position = position_dodge(0.9),
           width = 0.9) +
  geom_errorbar(mapping = aes(ymin = low, ymax = high),
                width = 0,
                size = 1,
                position = position_dodge(0.9)) +
  annotation_custom(grob = ball_a,
                    xmin = 0.5,
                    xmax = 1.5,
                    ymin = -25,
                    ymax = -7) + 
  annotation_custom(grob = ball_b,
                    xmin = 1.5,
                    xmax = 2.5,
                    ymin = -25,
                    ymax = -7) +
  geom_text(data = df.labels,
            mapping = aes(label = labels,
                          y = -Inf),
            vjust = 1.2,
            family = "Arial Unicode MS",
            size = 8) + 
  geom_custom(data = df.clips,
              mapping = aes(data = grob, x = 1.5, y = Inf),
              grob_fun = function(x) rasterGrob(x,
                                                interpolate = T,
                                                vjust = -0.25)) + 
  geom_text(data = df.clips,
            mapping = aes(label = label,
                          y = Inf,
                          x = -Inf),
            size = 9,
            hjust = -0.2,
            vjust = -4) + 
  facet_grid(~clip) +
  scale_fill_grey(start = 1,
                  end = 0,
                  labels = c("mean rating  ", expression(CSM[W] ~ " "),
                             expression(CSM[WH] ~ " "),
                             expression(CSM[WHS] ~ " "))) +
  labs(y = "causal responsibility", fill = "") +
  coord_cartesian(clip = "off") + 
  theme_bw() +
  theme(legend.text.align = 0,
        text = element_text(size = 20),
        panel.grid = element_blank(),
        legend.position = "bottom",
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.title.y = element_text(size = 30),
        legend.text = element_text(size = 30),
        strip.text = element_text(size = 30),
        legend.spacing = unit(0.5, "cm"),
        legend.background = element_rect(fill = "transparent"),
        legend.margin = margin(t = 5, unit = "cm"),
        plot.margin = margin(t = 8, l = 0.2, r = 0.2, b = 0.1, unit = "cm"))
print(p)

# ggsave("../../figures/plots/exp2_selection_bars.pdf",
#        width = 20,
#        height = 10,
#        device = cairo_pdf)

4.5.2.3 Scatter plot

func_scatterplot = function(model){
  if(model == "heuristic"){
    xlabel = "Heuristic"
  }else{
    xlabel = bquote(CSM[.(toupper(model))])
  }
  
  l.models = list(w = fit.brm_exp2_causal_w, 
                  wh = fit.brm_exp2_causal_wh,
                  whs = fit.brm_exp2_causal_whs,
                  heuristic = fit.brm_exp2_causal_heuristic)
  
  df.data = df.exp2.causal.means %>% 
    left_join(df.exp2.model.causal, by = c("clip", "ball")) %>% 
    left_join(df.regression.features, by = c("clip", "ball"))
  
  df.plot = l.models[[model]] %>%
    fitted(newdata = df.data,
           re_formula = NA) %>% 
    as_tibble() %>% 
    clean_names() %>%
    bind_cols(df.data)
  
  p = ggplot(data = df.plot,
             mapping = aes(x = estimate, 
                           y = rating_mean)) +
    geom_abline(intercept = 0,
                slope = 1,
                linetype = 2) + 
    geom_smooth(aes(y = estimate,
                    ymin = q2_5,
                    ymax = q97_5),
                stat = "identity",
                color = "lightblue",
                fill = "lightblue") +
    geom_linerange(size = 0.5, 
                   mapping = aes(ymin = rating_low,
                                 ymax = rating_high),
                   color = "gray80") +
    geom_point(size = 2) + 
    scale_color_manual(values = c("black", "#e41a1c"),
                       guide = F) +
    coord_fixed(xlim = c(0, 100),
                ylim = c(0, 100)) +
    labs(x = xlabel,
         y = "responsibility judgment") +
    annotate(geom = "text",
             label = str_c(
               "r = ", cor(df.plot$estimate, df.plot$rating_mean) %>% 
                 round(2) %>% 
                 as.character() %>% 
                 str_sub(start = 2),
               "\nRMSE = ", rmse(df.plot$estimate, df.plot$rating_mean) %>% 
                 round(2)),
             x = -2,
             y = 95,
             size = 6,
             hjust = 0) +
    scale_x_continuous(breaks = seq(0, 100, 25)) +
    scale_y_continuous(breaks = seq(0, 100, 25)) +
    theme(text = element_text(size = 20))
  return(p)
}

list(func_scatterplot(model = "w"),
     func_scatterplot(model = "wh"),
     func_scatterplot(model = "whs"),
     func_scatterplot(model = "heuristic")) %>%
  wrap_plots(ncol = 2)


# for creating and saving an individual scatter plots 
# model = "w"
# model = "wh"
# model = "whs"
# model = "heuristic"

# func_scatterplot(model)
# ggsave(str_c("../../figures/plots/exp2_", model, "_scatter.pdf"),
#        width = 5,
#        height = 5)

4.5.2.4 Individual participant variance for a selection of clips (clustered)

check = "\u2713"
cross = "\u2717"

x_labels = c(str_c("\nW ", cross, "\nH ", check, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", cross, "\nS ", cross),
             str_c("\nW ", check, "\nH ", cross, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", cross),
             str_c("\nW ", check, "\nH ", check, "\nS ", cross),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", check, "\nS ", check),
             str_c("\nW ", cross, "\nH ", cross, "\nS ", cross))

df.plot = df.exp2.causal.long %>%
  filter(clip %in% c(7, 23, 3, 15, 16)) %>%
  mutate(clip = factor(clip, levels = c(7, 23, 3, 15, 16),
    labels = c("causal chain", "double prevention", "joint causation",
      "overdetermination", "preemption")))

df.plot = df.plot %>% 
  left_join(df.cluster_whs %>% 
              group_by(cluster) %>% 
              mutate(group = factor(cluster,
                                    levels = 1:2,
                                    labels = str_c("n = ", group_size(.)))) %>% 
              ungroup() %>% 
              select(participant, cluster, group),
            by = "participant")


df.labels = df.plot %>% 
  distinct(clip, ball) %>% 
  arrange(clip, ball) %>% 
  mutate(labels = x_labels,
         rating = -10,
         group = NA,
         participant = NA)

func_load_image = function(clip){
  readPNG(str_c("../../figures/diagrams/exp2_clip", clip, ".png"))
}

df.clips = df.plot %>% 
  distinct(clip) %>% 
  arrange(clip) %>% 
  mutate(number = c(7, 23, 3, 15, 16),
         grob = map(.x = number, .f = ~ func_load_image(clip = .x)),
         group = NA,
         participant = NA,
         ball = NA,
         label = str_c("clip ", number))

ball_a = readPNG("../../figures/diagrams/ballA.png")
ball_a = rasterGrob(ball_a, interpolate = TRUE)

ball_b = readPNG("../../figures/diagrams/ballB.png")
ball_b = rasterGrob(ball_b, interpolate = TRUE)

p = ggplot(data = df.plot, 
       mapping = aes(x = ball,
                     y = rating,
                     group = participant,
                     shape = group)) +
  geom_line(mapping = aes(linetype = "individual",
                          color = group),
            size = 1,
            alpha = 0.3) +
  geom_point(mapping = aes(color = group),
             size = 1,
             alpha = 0.3) +
  stat_summary(mapping = aes(group = group,
                             color = group,
                             linetype = "mean"),
               fun = "mean",
               geom = "line",
               size = 1.5) +
  stat_summary(data = df.plot %>% filter(cluster == 1),
               mapping = aes(group = group,
                             color = group),
               fun.data = "mean_cl_boot",
               geom = "pointrange",
               size = 1,
               shape = 19) +
  stat_summary(data = df.plot %>% filter(cluster == 2),
               mapping = aes(group = group,
                             color = group),
               fun.data = "mean_cl_boot",
               geom = "pointrange",
               size = 1,
               shape = 19) +
  annotation_custom(grob = ball_a,
                    xmin = 0.5,
                    xmax = 1.5,
                    ymin = -30,
                    ymax = -10) + 
  annotation_custom(grob = ball_b,
                    xmin = 1.5,
                    xmax = 2.5,
                    ymin = -30,
                    ymax = -10) +
  geom_text(data = df.labels,
            mapping = aes(label = labels,
                          y = -Inf),
            vjust = 1.2,
            family = "Arial Unicode MS",
            size = 8) + 
  geom_custom(data = df.clips,
              mapping = aes(data = grob, x = 1.5, y = Inf),
              grob_fun = function(x) rasterGrob(x,
                                                interpolate = T,
                                                vjust = -0.25)) +
  geom_text(data = df.clips,
            mapping = aes(label = label,
                          y = Inf,
                          x = -Inf),
            size = 9,
            hjust = -0.2,
            vjust = -4) + 
  facet_wrap(~ clip,
             ncol = 8) +
  coord_cartesian(xlim = c(0.9, 2.1),
                  ylim = c(-5, 105),
                  expand = F,
                  clip = "off") +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     labels = seq(0, 100, 25)) +
  scale_color_brewer(palette = "Set1",
                     guide = "none") +
  labs(y = "causal responsibility rating",
       linetype = "legend",
       shape = "") +
  theme_bw() +
  guides(linetype = guide_legend(override.aes = list(alpha = c(0.3, 1)),
                                 keywidth = unit(1.2, "cm")),
         shape = guide_legend(override.aes = list(color = c("#377eb8",
                                                            "#e41a1c"),
                                                  shape = c(19, 19),
                                                  alpha = c(1, 1),
                                                  size = c(5, 5)))) +
  theme(panel.grid = element_blank(),
        text = element_text(size = 20),
        legend.position = c(0.505, 0.23),
        axis.title.x = element_blank(),
        legend.text = element_text(size = 20),
        legend.title = element_text(size = 24),
        legend.background = element_rect(fill = "transparent"),
        strip.text = element_text(size = 30),
        axis.text.x = element_blank(),
        legend.key = element_rect(fill = "transparent"),
        legend.box = "horizontal",
        legend.spacing.x = unit(0.1, "cm"),
        panel.spacing.x = unit(1, "cm"),
        plot.margin = margin(b = 5, l = 0.2, r = 0.2, t = 7.5, unit = "cm"))

print(p)

# ggsave(str_c("../../figures/plots/exp2_individual_variance_selection_lines_clustered.pdf"),
#        plot = p,
#        width = 20,
#        height = 8.5,
#        device = cairo_pdf)

4.5.3 Tables

4.5.3.1 Relationship between predictors

df.exp2.model %>% 
  filter(noise == unique(df.exp2.model.counterfactual$noise)) %>%
  pivot_longer(cols = A_difference:B_robust,
               names_to = c("ball", "aspect"),
               names_sep = "_",
               values_to = "value") %>% 
  pivot_wider(names_from = aspect,
              values_from = value) %>% 
  select(difference, whether, how, sufficient, robust) %>% 
  correlate() %>% 
  shave() %>% 
  fashion() %>% 
  print_table()

term	difference	whether	how	sufficient
difference
whether	.50
how	.79	.27
sufficient	.21	.10	.36
robust	.43	.93	.24	.20

4.5.3.2 Population level predictors in the mixed effects models

fit.brm_exp2_causal_w %>% 
  tidy(effects = "fixed") %>% 
  mutate(model = "CSM_w") %>% 
  bind_rows(fit.brm_exp2_causal_wh %>% 
              tidy(effects = "fixed") %>% 
              mutate(model = "CSM_wh")) %>% 
  bind_rows(fit.brm_exp2_causal_whs %>% 
              tidy(effects = "fixed") %>% 
              mutate(model = "CSM_whs")) %>% 
  mutate(term = tolower(term)) %>% 
  rename(`lower 95% HDI` = conf.low,
         `upper 95% HDI` = conf.high) %>% 
  mutate_if(is.numeric, ~ round(., 2)) %>% 
  select(model, everything(), -effect, -component) %>% 
  print_table()

model	term	estimate	std.error	lower 95% HDI	upper 95% HDI
CSM_w	(intercept)	30.46	1.62	27.27	33.64
CSM_w	whether	43.28	2.20	38.98	47.62
CSM_wh	(intercept)	10.73	1.54	7.70	13.81
CSM_wh	whether	30.17	2.69	24.96	35.39
CSM_wh	how	34.88	2.61	29.73	39.95
CSM_whs	(intercept)	11.04	1.55	8.03	14.10
CSM_whs	whether	28.96	2.72	23.62	34.40
CSM_whs	how	25.32	3.02	19.34	31.33
CSM_whs	sufficient	31.97	2.98	26.14	37.97

4.5.3.3 Individual participants regression results

df.exp2.causal.individual_fit %>% 
  select(where(~ is.numeric(.) | is.factor(.))) %>% 
  select(participant, everything(), best_model) %>% 
  print_table()

participant	r_w	r_wh	r_whs	rmse_w	rmse_wh	rmse_whs	best_model
1	0.31	0.38	0.47	29.91	29.00	27.91	whs
2	0.24	0.77	0.77	39.12	27.82	27.43	whs
3	0.39	0.78	0.79	38.75	28.04	27.06	whs
4	0.41	0.54	0.61	43.73	40.80	39.08	whs
5	0.42	0.50	0.51	39.68	37.81	37.40	whs
6	0.21	0.54	0.54	40.70	36.30	35.99	whs
7	0.52	0.62	0.64	32.76	29.87	29.06	whs
8	0.41	0.63	0.67	38.27	33.26	32.02	whs
9	0.42	0.71	0.75	35.76	28.39	26.63	whs
10	0.31	0.52	0.55	29.01	26.24	25.66	whs
11	0.52	0.56	0.60	33.59	32.23	31.20	whs
12	0.55	0.55	0.65	33.21	32.54	30.21	whs
13	0.58	0.70	0.73	29.74	25.59	24.50	whs
14	0.45	0.47	0.50	33.67	32.88	32.27	whs
15	0.24	0.37	0.40	39.54	38.12	37.50	whs
16	0.62	0.65	0.73	40.91	38.61	35.67	whs
17	0.45	0.67	0.71	28.26	23.70	22.27	whs
18	0.32	0.36	0.38	37.76	37.08	36.65	whs
19	0.38	0.60	0.62	33.36	29.29	28.68	whs
20	0.57	0.60	0.66	36.70	35.33	33.55	whs
21	0.43	0.49	0.54	31.05	29.79	28.84	whs
22	0.51	0.66	0.69	33.83	29.68	28.62	whs
23	0.33	0.44	0.51	38.80	37.09	35.88	whs
24	0.46	0.62	0.68	34.13	30.18	28.49	whs
25	0.63	0.66	0.70	30.41	28.88	27.66	whs
26	0.60	0.72	0.76	40.17	35.05	32.98	whs
27	0.46	0.50	0.62	43.42	41.78	39.22	whs
28	0.33	0.45	0.47	29.19	27.59	27.29	whs
29	0.50	0.59	0.64	38.89	36.09	34.73	whs
30	0.22	0.71	0.71	32.92	24.91	24.51	whs
31	0.49	0.59	0.68	37.52	34.81	32.17	whs
32	0.39	0.60	0.69	40.56	36.03	33.32	whs
33	0.51	0.55	0.62	38.76	37.04	35.38	whs
34	0.28	0.51	0.54	44.27	40.70	39.80	whs
35	0.19	0.33	0.32	34.41	33.26	33.23	wh
36	0.23	0.84	0.83	45.20	29.54	29.49	whs
37	0.55	0.62	0.65	36.21	33.72	32.64	whs
38	0.52	0.60	0.64	36.18	33.66	32.38	whs
39	0.49	0.77	0.77	29.40	21.81	21.74	wh
40	0.49	0.78	0.80	32.19	23.68	22.50	whs
41	0.35	0.50	0.57	32.72	30.30	28.86	whs

4.5.3.4 CSMwhs predictions for selection of cases

df.exp2.model.causal %>%
  left_join(df.exp2.causal.regression,
            by = c("clip", "ball")) %>% 
  filter(clip %in% c(7, 23, 3, 15, 16)) %>%
  select(clip, ball, difference, whether, how, sufficient, robust) %>%
  mutate(clip = factor(clip, levels = c(7, 23, 3, 15, 16))) %>%
  mutate(across(where(is.numeric), ~ round(., 2))) %>%
  mutate(across(c(difference, whether, how, sufficient, robust),
                   ~ ifelse(. < 0.5,
                            str_c("xmark (", ., ")"),
                            str_c("cmark (", ., ")")))) %>%
  arrange(clip) %>% 
  print_table()

clip	ball	difference	whether	how	sufficient	robust
7	A	cmark (1)	xmark (0.34)	cmark (1)	xmark (0)	xmark (0.25)
7	B	cmark (1)	cmark (1)	cmark (1)	cmark (0.67)	cmark (0.6)
23	A	xmark (0.05)	xmark (0)	xmark (0)	xmark (0)	xmark (0)
23	B	cmark (0.91)	cmark (0.79)	xmark (0)	xmark (0)	cmark (0.72)
3	A	cmark (1)	cmark (0.88)	cmark (1)	xmark (0.12)	cmark (0.76)
3	B	cmark (1)	cmark (0.89)	cmark (1)	xmark (0.11)	cmark (0.75)
15	A	cmark (1)	xmark (0.01)	cmark (1)	cmark (0.99)	xmark (0.1)
15	B	cmark (1)	xmark (0.01)	cmark (1)	cmark (1)	xmark (0.1)
16	A	cmark (1)	xmark (0.23)	cmark (1)	cmark (1)	xmark (0.35)
16	B	xmark (0)	xmark (0)	xmark (0)	xmark (0)	xmark (0)

4.5.3.5 Heuristic model

fit.brm_exp2_causal_heuristic %>% 
  tidy(effects = "fixed") %>% 
  mutate(term = tolower(term)) %>% 
  rename(`lower 95% HDI` = conf.low,
         `upper 95% HDI` = conf.high) %>% 
  select(-c(effect, component)) %>% 
  print_table()

Warning in tidy.brmsfit(., effects = "fixed"): some parameter names contain
underscores: term naming may be unreliable!

term	estimate	std.error	lower 95% HDI	upper 95% HDI
(intercept)	49.73	1.48	46.87	52.70
moving	0.22	0.21	0.00	0.79
speed	2.08	0.84	0.45	3.73
contact_e	0.38	0.36	0.01	1.35
e_speed_diff	0.12	0.12	0.00	0.46
e_direction_diff	1.06	0.73	0.06	2.76
total_speed_diff	2.19	0.95	0.42	4.09
total_direction_diff	3.99	0.90	2.21	5.69
transfer	15.59	0.80	13.98	17.14
e_moving	0.18	0.18	0.00	0.65
exclusive	4.38	0.71	3.00	5.79

5 Appendix

5.1 Preliminary study

5.1.1 Read in model predictions

Reading in the predictions from the approximate simulation model and finding the model that best matches participants’ counterfactual judgments.

5.1.1.1 Counterfactual condition

# read model predictions 
path = "../python/results/"
files = list.files(path = path, pattern = "*.csv")
files = files[str_detect(files, "2ball")]

df.study.model = files %>%
  map_dfr(~ read.csv(str_c(path, .))) %>%
  set_names(c("clip", "noise", "prediction")) %>%
  left_join(df.study.clipinfo, by = "clip") %>%
  mutate(prediction = ifelse(outcome_actual == 1, 1 - prediction, prediction))

# calculate mean counterfactual judgments 
df.study.counterfactual.means = df.study.counterfactual.long %>%
  group_by(clip) %>%
  summarize(rating_mean = mean(rating),
            rating_low = smean.cl.boot(rating)[2],
            rating_high = smean.cl.boot(rating)[3]) %>%
  ungroup() %>%
  left_join(df.study.clipinfo, by = "clip")

# find noisy simulation model that best predicts the mean counterfactual judgments by 
# minimizing the sum of squared errors 
df.study.counterfactual.model = df.study.model %>% 
  group_by(noise) %>% 
  nest() %>% 
  mutate(sse = map(data, 
                   ~ sum((.x$prediction*100 - 
                            df.study.counterfactual.means$rating_mean) ^ 2)),
         sse = unlist(sse)) %>% 
  ungroup() %>% 
  filter(sse == min(sse)) %>% 
  unnest(data) %>% 
  mutate(prediction = prediction * 100)

5.1.1.2 Causal condition

Calculating the predictions for the causal condition using both the approximate simulation model (simulation) as well as participants’ judgments from the counterfactual condition (empirical).

df.study.causal.model = df.study.counterfactual.long %>% 
  group_by(clip) %>% 
  summarize(empirical = mean(rating)) %>% 
  ungroup() %>% 
  left_join(df.study.counterfactual.model %>% 
              select(-c(noise, sse)),
            by = "clip") %>% 
  rename(simulation = prediction) %>% 
  mutate(empirical = ifelse(outcome_actual == 1, 100 - empirical, empirical),
         simulation = ifelse(outcome_actual == 1, 100 - simulation, simulation))

5.1.2 Counterfactual condition

5.1.2.1 Plots

Bar plot showing mean judgments for each clip together with the individual judgments, as well as the predictions of the best-fitting approximate simulation model.

set.seed(1)
df.plot = df.study.counterfactual.long %>% 
  mutate(rating = abs(rating),
         clipindex = rep(c(1, 2), nrow(.)/2)) %>% 
  left_join(df.study.counterfactual.model,
            by = c("clip",
                   "outcome_actual",
                   "outcome_counterfactual",
                   "index_actual",
                   "index_counterfactual")) %>% 
  pivot_longer(cols = c(rating, prediction),
               names_to = "model",
               values_to = "value") %>% 
  mutate(colorindex = ifelse(outcome_counterfactual == 1, 2, 1),
         colorindex = ifelse(model != "rating", 3, colorindex),
         colorindex = as.factor(colorindex),
         index_actual = factor(index_actual,
                               levels = c("actual miss", "actual close", "actual hit")),
         index_counterfactual = factor(index_counterfactual,
                                       levels = c("counterfactual miss",
                                                  "counterfactual close",
                                                  "counterfactual hit")),
         model = factor(model, levels = c("rating", "prediction"))) %>% 
  mutate(clipindex = ifelse(clip == 11, 2, clipindex),
         clipindex = ifelse(clip == 12, 1, clipindex)) #swap clips 11 and 12


df.text = df.plot %>%
  expand(index_actual, index_counterfactual, clipindex, model) %>%
  mutate(label = rep(1:18, each = 2),
         label = ifelse(model != "rating", NA, label),
         y = -15,
         x = clipindex,
         colorindex = NA)

p = actual_counterfactual_plot(df.plot, df.text, "counterfactual judgment")
print(p)

# ggsave("../../figures/plots/study_counterfactual_bars.pdf",
#        width = 8,
#        height = 6)

5.1.2.2 Stats

Model fit as captured by r and RMSE.

df.study.counterfactual.means %>% 
  left_join(df.study.counterfactual.model,
            by = c("clip",
                   "outcome_actual",
                   "outcome_counterfactual",
                   "index_actual",
                   "index_counterfactual")) %>% 
  summarize(noise = unique(noise), 
            simulation_r = cor(rating_mean, prediction),
            simulation_rmse = rmse(rating_mean, prediction)) %>% 
  print_table()

noise	simulation_r	simulation_rmse
1	0.97	10.95

5.1.3 Causal condition

5.1.3.1 Plots

Bar plot showing mean causal judgments for each clip together with the individual judgments, as well as the predictions of the counterfactual simulation model (black bars) based on participants’ mean counterfactual judgments.

set.seed(1)

model.name = c("empirical")
# model.name = c("simulation")

df.plot = df.study.causal.long %>%
  mutate(rating = abs(rating),
         clipindex = rep(c(1, 2), nrow(.) / 2)) %>%
  left_join(df.study.causal.model,
            by = c("clip",
                   "outcome_actual",
                   "outcome_counterfactual",
                   "index_actual",
                   "index_counterfactual")) %>%
  pivot_longer(cols = c(rating, simulation, empirical),
               names_to = "model",
               values_to = "value") %>% 
  filter(model %in% c(model.name, "rating")) %>%
  mutate(model = factor(model, levels = c("rating", model.name))) %>%
  mutate(
    colorindex = ifelse(outcome_actual == 1, 2, 1),
    colorindex = ifelse(model != "rating", 3, colorindex),
    colorindex = as.factor(colorindex),
    index.actual = factor(index_actual, 
                          levels = c("actual miss", "actual close", "actual hit")),
    index.counterfactual = factor(index_counterfactual, 
                                  levels = c("counterfactual miss", 
                                             "counterfactual close", 
                                             "counterfactual hit"))) %>%
  mutate(clipindex = ifelse(clip == 11, 2, clipindex),
         clipindex = ifelse(clip == 12, 1, clipindex)) # swap clips 11 and 12

df.text = df.plot %>%
  expand(index_actual, index_counterfactual, clipindex, model) %>%
  mutate(label = rep(1:18, each = 2),
         label = ifelse(model != "rating", NA, label),
         y = -15,
         x = clipindex,
         colorindex = NA)

p = actual_counterfactual_plot(df.plot, df.text, "causal judgment")
print(p)

# ggsave("../../figures/plots/study_causal_bars.pdf",
#        width = 8,
#        height = 6)

5.1.3.2 Stats

Model fit as captured by r and RMSE.

df.study.causal.long %>% 
  mutate(rating = abs(rating)) %>% 
  group_by(clip,
           outcome_actual, 
           outcome_counterfactual,
           index_actual,
           index_counterfactual) %>% 
  summarize(rating = mean(rating)) %>% 
  ungroup() %>% 
  left_join(df.study.causal.model,
            by = c("clip", 
                   "outcome_actual", 
                   "outcome_counterfactual", 
                   "index_actual", 
                   "index_counterfactual")) %>% 
  summarize(empirical_r = cor(rating, empirical),
            empirical_rmse = rmse(rating, empirical),
            simulation_r = cor(rating, simulation),
            simulation_rmse = rmse(rating, simulation)) %>% 
  print_table()

empirical_r	empirical_rmse	simulation_r	simulation_rmse
0.96	8.57	0.93	15.15

5.2 Experiment 1

5.2.1 Frequentist tests for potential order effects

5.2.1.1 Counterfactual condition

df.data = df.exp1.combined.long %>% 
  filter(question == "counterfactual",
         clip <= 18)

afex::aov_ez(id = "participant",
             dv = "rating",
             data = df.data,
             between = "condition",
             within = c("index_actual", "index_counterfactual")) %>% 
  .$anova_table %>% 
  print_table()

	num Df	den Df	MSE	F	ges	Pr(>F)
condition	1.00	80.00	566.69	0.47	0.00	0.49
index_actual	1.84	147.59	427.45	3.65	0.01	0.03
condition:index_actual	1.84	147.59	427.45	0.03	0.00	0.97
index_counterfactual	1.85	148.17	1038.95	348.21	0.65	0.00
condition:index_counterfactual	1.85	148.17	1038.95	0.03	0.00	0.97
index_actual:index_counterfactual	3.06	245.17	400.46	3.50	0.01	0.02
condition:index_actual:index_counterfactual	3.06	245.17	400.46	0.29	0.00	0.84

5.2.1.2 Causal condition

df.data = df.exp1.combined.long %>% 
  filter(question == "causal",
         clip <= 18)

afex::aov_ez(id = "participant",
             dv = "rating",
             data = df.data,
             between = "condition",
             within = c("index_actual", "index_counterfactual")) %>% 
  .$anova_table %>% 
  print_table()

	num Df	den Df	MSE	F	ges	Pr(>F)
condition	1.00	80.00	919.37	0.70	0.00	0.40
index_actual	1.48	118.66	1238.03	796.01	0.73	0.00
condition:index_actual	1.48	118.66	1238.03	0.95	0.00	0.36
index_counterfactual	1.84	147.34	1224.82	216.37	0.48	0.00
condition:index_counterfactual	1.84	147.34	1224.82	9.21	0.04	0.00
index_actual:index_counterfactual	3.87	309.59	429.49	3.87	0.01	0.00
condition:index_actual:index_counterfactual	3.87	309.59	429.49	1.23	0.00	0.30

5.3 Experiment 2

5.3.1 Table with CSMwhs predictions for all cases

df.exp2.causal.regression %>% 
  left_join(df.exp2.model.causal,
            by = c("clip", "ball")) %>% 
  left_join(df.exp2.clipinfo %>% 
              select(-clipindex),
            by = c("clip")) %>% 
  mutate(across(c(difference, whether, how, sufficient, robust),
                ~ . * 100)) %>% 
  select(clip, ball, contains("outcome"), difference, whether, how, sufficient,
         robust, w, wh, whs, heuristic, rating = rating_mean) %>% 
  print_table(digits = 0)

clip	ball	outcome_both	outcome_a	outcome_b	outcome_none	difference	whether	how	sufficient	robust	w	wh	whs	heuristic	rating
1	A	0	0	0	0	100	40	100	23	36	48	58	55	57	42
1	B	0	0	0	0	100	15	100	16	9	37	50	46	54	37
2	A	0	0	0	0	57	12	0	0	10	35	14	14	25	21
2	B	0	0	0	0	18	0	0	0	0	30	11	11	24	19
3	A	1	0	0	0	100	88	100	12	76	69	72	66	72	76
3	B	1	0	0	0	100	89	100	11	75	69	73	66	72	75
4	A	1	0	0	0	100	78	100	4	78	64	69	60	58	63
4	B	1	0	0	0	100	95	100	15	57	72	74	69	54	78
5	A	0	0	1	0	100	90	100	0	47	69	73	62	47	71
5	B	0	0	1	0	100	0	100	0	0	30	46	36	68	22
6	A	0	0	1	0	100	59	100	16	35	56	64	59	53	73
6	B	0	0	1	0	100	18	100	6	14	38	51	44	53	22
7	A	1	0	1	0	100	34	100	0	25	45	56	46	70	59
7	B	1	0	1	0	100	100	100	67	60	74	76	87	64	79
8	A	1	0	1	0	0	0	0	0	0	30	11	11	25	7
8	B	1	0	1	0	100	100	100	100	100	74	76	97	84	92
9	A	0	1	0	0	0	0	0	0	0	30	11	11	14	8
9	B	0	1	0	0	100	100	100	0	100	74	76	65	78	90
10	A	0	1	0	0	77	18	0	0	22	38	16	16	15	23
10	B	0	1	0	0	98	79	0	0	63	65	35	34	21	55
11	A	1	1	0	0	100	70	100	77	68	61	67	81	71	93
11	B	1	1	0	0	0	0	0	0	0	30	11	11	16	4
12	A	1	1	0	0	100	82	100	74	83	66	70	84	57	77
12	B	1	1	0	0	100	0	100	12	24	30	46	40	53	37
13	A	0	1	1	0	67	34	0	0	35	45	21	21	14	8
13	B	0	1	1	0	70	35	0	0	35	46	21	21	21	64
14	A	0	1	1	0	97	91	0	0	59	70	38	37	21	22
14	B	0	1	1	0	91	77	0	0	51	64	34	33	20	18
15	A	1	1	1	0	100	1	100	99	10	31	46	68	71	76
15	B	1	1	1	0	100	1	100	100	10	31	46	68	71	76
16	A	1	1	1	0	100	23	100	100	35	40	53	75	80	92
16	B	1	1	1	0	0	0	0	0	0	30	11	11	14	4
17	A	0	0	0	1	100	19	100	37	18	39	51	54	66	69
17	B	0	0	0	1	100	0	100	36	17	30	46	48	65	46
18	A	0	0	0	1	100	11	100	40	17	35	49	52	55	63
18	B	0	0	0	1	100	7	100	37	9	33	48	50	56	66
19	A	1	0	0	1	100	74	100	7	65	63	68	60	55	53
19	B	1	0	0	1	100	72	100	7	65	61	67	59	55	49
20	A	1	0	0	1	100	92	100	8	72	70	73	66	57	41
20	B	1	0	0	1	100	88	100	4	53	68	72	63	56	71
21	A	0	0	1	1	100	47	100	40	45	51	60	63	58	80
21	B	0	0	1	1	100	9	100	21	10	34	48	46	59	18
22	A	0	0	1	1	100	100	100	89	83	74	76	94	47	60
22	B	0	0	1	1	100	8	100	0	15	34	48	39	53	42
23	A	1	0	1	1	5	0	0	0	0	31	11	11	15	3
23	B	1	0	1	1	91	79	0	0	72	65	35	34	22	39
24	A	1	0	1	1	100	66	100	4	63	59	66	57	57	44
24	B	1	0	1	1	100	94	100	22	79	71	74	71	54	73
25	A	0	1	0	1	100	25	100	21	26	41	53	50	69	43
25	B	0	1	0	1	100	74	100	54	65	62	68	75	56	73
26	A	0	1	0	1	100	6	100	3	9	33	47	39	60	39
26	B	0	1	0	1	100	87	100	35	54	68	72	73	46	69
27	A	1	1	0	1	100	97	100	52	97	73	75	81	67	80
27	B	1	1	0	1	0	0	0	0	0	30	11	11	17	6
28	A	1	1	0	1	100	90	100	22	80	69	73	69	79	89
28	B	1	1	0	1	0	0	0	0	0	30	11	11	12	5
29	A	0	1	1	1	100	58	100	24	44	56	63	61	66	47
29	B	0	1	1	1	100	63	100	24	38	58	65	62	54	67
30	A	0	1	1	1	100	57	100	29	49	55	63	62	62	58
30	B	0	1	1	1	100	46	100	24	39	51	60	58	63	56
31	A	1	1	1	1	100	2	100	4	3	31	46	38	63	44
31	B	1	1	1	1	100	4	100	4	4	32	47	39	53	46
32	A	1	1	1	1	0	0	0	0	0	30	11	11	16	5
32	B	1	1	1	1	100	75	100	66	73	63	68	79	65	71

5.3.2 Does the question framing matter? (Responsibility vs. causation)

We have run a pilot eye-tracking experiment with 15 participants who saw the same clips as the ones in Experiment 2. Instead of asking participants to judge “To what extent were A and B responsible for E (not) going through the gate?”, we asked them to judge to what extent they agreed with the following statement: “Ball A/B caused ball E to go through the gate.” or “Ball A/B prevented ball E from going through the gate.” depending on the outcome.

Participants’ agreement judgments with the causal statements in this pilot eye-tracking experiment were strikingly similar to participants’ responsibility judgments in the online experiment.

df.exp2.eye_tracking_pilot = 
  read_csv("../../data/experiment2_eye_tracking_pilot.csv") %>% 
  rename(clip = trial) %>%
  mutate(ball = str_to_upper(ball)) %>% 
  group_by(clip, ball, outcome) %>% 
  summarize(causality_mean = mean(causation),
            causality_low = smean.cl.boot(causation)[2],
            causality_high = smean.cl.boot(causation)[3]) %>% 
  ungroup()

df.plot = df.exp2.eye_tracking_pilot %>% 
  # select(clip, ball, outcome,) %>% 
  left_join(df.exp2.causal.means %>% 
              select(clip,
                     ball,
                     responsibility_mean = rating_mean,
                     responsibility_low = rating_low,
                     responsibility_high = rating_high),
            by = c("clip", "ball"))

# highlight the double prevention clip (#23)
df.plot = df.plot %>% 
  mutate(color = ifelse(clip == 23, "1", "0"))

ggplot(data = df.plot,
       mapping = aes(x = causality_mean,
                     y = responsibility_mean)) + 
  geom_abline(intercept = 0, 
              slope = 1,
              linetype = 2) + 
  geom_smooth(method = "lm",
              color = "lightblue",
              fill = "lightblue") +
  geom_linerange(mapping = aes(ymin = responsibility_low,
                               ymax = responsibility_high),
                 alpha = 0.1) +
  geom_linerange(mapping = aes(xmin = causality_low,
                               xmax = causality_high),
                 alpha = 0.1) +
  geom_point(size = 2,
             mapping = aes(color = color),
             show.legend = F) + 
  annotate(geom = "text",
           x = c(0, 0),
           y = c(100, 90),
           label = c(str_c("r = ",
                           cor(df.plot$causality_mean,
                               df.plot$responsibility_mean) %>% 
                             round(2)),
                     str_c("RMSE = ", rmse(df.plot$causality_mean,
                                           df.plot$responsibility_mean) %>% 
                             round(2))),
           hjust = 0, 
           size = 8) + 
  scale_x_continuous(breaks = seq(0, 100, 25),
                     limits = c(0, 100)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     limits = c(0, 100)) +
  scale_color_manual(values = c("black", "red")) +
  labs(x = "causal judgment",
       y = "responsibility judgment") +
  theme(text = element_text(size = 24))

# ggsave("../../figures/plots/aux_question_framing_comparison.pdf",
#        width = 6,
#        height = 6)

5.3.3 CSM_WHS fits for each individual participant

df.plot = fit.brm_exp2_causal_whs %>% 
  fitted() %>% 
  as_tibble() %>% 
  select(prediction = Estimate) %>% 
  bind_cols(df.exp2.causal.long) %>% 
  relocate(prediction, .after = last_col())

df.text = df.plot %>% 
  group_by(participant) %>% 
  summarize(r = round(cor(prediction, rating), 2)) %>% 
  ungroup() %>% 
  mutate(r = str_c("r = ", r),
         prediction = 1,
         rating = 110)

ggplot(data = df.plot,
       mapping = aes(x = prediction,
                     y = rating)) + 
  geom_abline(intercept = 0, 
              slope = 1,
              color = "blue",
              alpha = 0.5) +
  geom_point(alpha = 0.3,
             size = 1) +
  geom_text(data = df.text,
            mapping = aes(label = r),
            hjust = 0,
            color = "red",
            size = 4) +
  facet_wrap(facets = vars(participant),
             ncol = 7) +
  labs(x = "model prediction",
       y = "causal responsibility rating") + 
  coord_cartesian(xlim = c(0, 100),
                  ylim = c(0, 100),
                  expand = F,
                  clip = "off") +
  scale_x_continuous(breaks = seq(0, 100, 25),
                     limits = c(0, 100)) +
  scale_y_continuous(breaks = seq(0, 100, 25),
                     expand = expansion(mult = c(0, 0))) +
  theme_classic() + 
  theme(strip.background = element_blank(),
        strip.text = element_blank(),
        text = element_text(size = 16),
        panel.spacing.x = unit(0.75, "cm"),
        panel.spacing.y = unit(1, "cm"),
        plot.margin = margin(t = 0.7,
                             r = 0.8,
                             b = 0.2,
                             l = 0.2,
                             unit = "cm"))

# ggsave("../../figures/plots/exp2_individual_scatter_plots.pdf",
#        width = 12,
#        height = 8)

5.3.4 Individual participant fit model comparison

df.plot = df.exp2.causal.individual_fit %>% 
  select(participant, contains("r_"), contains("rmse_")) %>% 
  pivot_longer(cols = -participant,
               names_to = c("measure", "model"),
               names_sep = "_",
               values_to = "fit")

ggplot(data = df.plot %>% 
         filter(measure == "r"),
       mapping = aes(x = fit,
                     fill = model)) + 
  geom_density(alpha = 0.5) + 
  labs(x = "correlation value") + 
  scale_x_continuous(breaks = seq(0, 1, 0.2),
                     limits = c(0, 1)) + 
  scale_y_continuous(expand = expansion(mult = c(0, 0.1))) + 
  scale_fill_brewer(palette = "Set1") + 
  # ggplot2::theme_classic() + 
  theme(legend.position = c(0.3, 0.95),
        legend.direction = "horizontal",
        text = element_text(size = 20))

# ggsave("../../figures/plots/exp2_individual_densities.pdf",
#        width = 8,
#        height = 6)

### 3D scatter plot of participant clusters

plot_ly(x = df.cluster_whs$whether,
        y = df.cluster_whs$how,
        z = df.cluster_whs$sufficient,
        type = "scatter3d",
        mode = "markers",
        color = as.factor(df.cluster_whs$cluster),
        colors = c("#e41a1c", "#377eb8")) %>% 
  layout(showlegend = FALSE,
         title = "Participant clusters",
         scene = list(
           xaxis = list(title = "whether"),
           yaxis = list(title = "how"),
           zaxis = list(title = "sufficient")))

5.3.5 Clusters of participants

set.seed(1)

df.plot = df.exp2.causal.long %>% 
  left_join(df.cluster_whs %>% 
              select(participant, cluster) %>% 
              mutate(participant = as.numeric(participant),
                     cluster = as.factor(cluster)) %>% 
              group_by(cluster) %>% 
              mutate(label = factor(cluster,
                                    levels = 1:2,
                                    labels = str_c("n = ", group_size(.)))) %>% 
              ungroup(),
            by = "participant")

ggplot(data = df.plot,
       mapping = aes(x = ball,
                     y = rating,
                     group = label,
                     fill = label)) + 
  geom_point(shape = 21,
             alpha = 0.2,
             position = position_jitterdodge(jitter.width = 0.1,
                                             jitter.height = 0,
                                             dodge.width = 0.75)) + 
  stat_summary(fun.data = "mean_cl_boot",
               geom = "linerange", 
               mapping = aes(color = label),
               size = 1,
               position = position_dodge(width = 0.75)) + 
  stat_summary(fun = "mean",
               geom = "point", 
               size = 4,
               shape = 21,
               position = position_dodge(width = 0.75)) + 
  facet_wrap(facets = vars(clip), ncol = 8) + 
  labs(y = "causal responsibility rating",
       fill = "cluster",
       color = "cluster") + 
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme(text = element_text(size = 24),
        legend.position = "bottom")

# ggsave("../../figures/plots/exp2_clusters_points.pdf",
#        width = 12,
#        height = 8)

5.3.6 Ternary plots for individual regression results

library("ggtern")

df.plot = df.exp2.causal.individual_fit %>% 
  select(participant, fit_whs) %>% 
  mutate(estimates = map(fit_whs, ~ fixef(.) %>% 
                           as_tibble(rownames = "term") %>% 
                           clean_names())) %>%
  select(participant, estimates) %>% 
  unnest(estimates) %>% 
  filter(!str_detect(term, "Intercept")) %>% 
  select(participant, term, estimate) %>% 
  pivot_wider(names_from = term,
              values_from = estimate) %>% 
  # check this
  mutate(across(.cols = c(whether, how, sufficient),
                .fns = ~ . / (how + whether + sufficient),
                .names = "{.col}_norm")) %>% 
  mutate(color = 0,
         color = ifelse(how_norm == max(how_norm), 1, color),
         color = ifelse(whether_norm == max(whether_norm), 2, color),
         color = ifelse(sufficient_norm == max(sufficient_norm), 3, color),
         color = factor(color))

ggplot(data = df.plot,
       mapping = aes(x = how,
                     y = sufficient,
                     z = whether,
                     color = color)) + 
  geom_point(alpha = 0.7,
             size = 2,
             show.legend = F) + 
  scale_color_manual(values = c("black", "red", "green", "blue")) + 
  coord_tern() +
  theme_showarrows() + 
  theme(text = element_text(size = 20),
        tern.axis.title.T = element_blank(),
        tern.axis.title.L = element_blank(),
        tern.axis.title.R = element_blank())

# ggsave(str_c("../../figures/plots/exp2_individual_regression_ternary_plot_scaled.pdf"),
#        width = 5,
#        height = 5)

6 Paper figures

6.1 Figure 11

plot.list = list(func_exp1_causal_plot(condition.name = "counterfactual_first",
                                       model.name = "counterfactual_first"),
                 func_exp1_causal_plot(condition.name = "causal_first",
                                       model.name = "causal_first"))

wrap_plots(plot.list,
           ncol = 2) +
  plot_annotation(tag_levels = c("A")) &
  theme(plot.tag.position = c(0, 0.99),
        plot.tag = element_text(size = 30,
                                face = "bold"))

# ggsave("../../figures/plots/figure11.pdf",
#        width = 16,
#        height = 6)

6.2 Figure 15

list(func_scatterplot(model = "w"),
     func_scatterplot(model = "wh"),
     func_scatterplot(model = "whs"),
     func_scatterplot(model = "heuristic")) %>%
  wrap_plots(ncol = 2) + 
  plot_annotation(tag_levels = c("A")) &
  theme(plot.tag.position = c(0, 1),
        plot.tag = element_text(size = 30,
                                face = "bold"),
        plot.margin = margin(t = 0.5, r = 0.5, b = 0, l = 0.5, unit = "cm"))

# ggsave("../../figures/plots/figure15.pdf",
#        width = 10,
#        height = 9)

7 Session info

sessionInfo()

R version 4.0.3 (2020-10-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur 10.16

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] forcats_0.5.1     stringr_1.4.0     dplyr_1.0.4       purrr_0.3.4      
 [5] readr_1.4.0       tidyr_1.1.2       tibble_3.0.6      tidyverse_1.3.0  
 [9] ggrepel_0.9.1     patchwork_1.1.1   clValid_0.6-9     cluster_2.1.0    
[13] egg_0.4.5         gridExtra_2.3     plotly_4.9.3      png_0.1-7        
[17] tidybayes_2.3.1   Hmisc_4.4-2       ggplot2_3.3.3     Formula_1.2-4    
[21] survival_3.2-7    lattice_0.20-41   janitor_2.1.0     jsonlite_1.7.2   
[25] xtable_1.8-4      corrr_0.4.3       brms_2.14.4       Rcpp_1.0.6       
[29] broom.mixed_0.2.6 lme4_1.1-26       Matrix_1.3-2      DT_0.17          
[33] kableExtra_1.3.1  knitr_1.31       

loaded via a namespace (and not attached):
  [1] utf8_1.1.4           tidyselect_1.1.0     htmlwidgets_1.5.3   
  [4] munsell_0.5.0        codetools_0.2-18     statmod_1.4.35      
  [7] miniUI_0.1.1.1       withr_2.4.1          Brobdingnag_1.2-6   
 [10] colorspace_2.0-0     highr_0.8            rstudioapi_0.13     
 [13] stats4_4.0.3         bayesplot_1.8.0      labeling_0.4.2      
 [16] emmeans_1.5.3        rstan_2.21.1         farver_2.1.0        
 [19] bridgesampling_1.0-0 coda_0.19-4          vctrs_0.3.6         
 [22] generics_0.1.0       TH.data_1.0-10       afex_0.28-1         
 [25] xfun_0.21            R6_2.5.0             markdown_1.1        
 [28] gamm4_0.2-6          projpred_2.0.2       assertthat_0.2.1    
 [31] promises_1.1.1       scales_1.1.1         multcomp_1.4-15     
 [34] nnet_7.3-15          gtable_0.3.0         processx_3.4.5      
 [37] sandwich_3.0-0       rlang_0.4.10         splines_4.0.3       
 [40] TMB_1.7.18           lazyeval_0.2.2       broom_0.7.3         
 [43] checkmate_2.0.0      inline_0.3.17        yaml_2.2.1          
 [46] reshape2_1.4.4       abind_1.4-5          modelr_0.1.8        
 [49] threejs_0.3.3        crosstalk_1.1.1      backports_1.2.1     
 [52] httpuv_1.5.5         rsconnect_0.8.16     tools_4.0.3         
 [55] bookdown_0.21        ellipsis_0.3.1       RColorBrewer_1.1-2  
 [58] ggridges_0.5.3       plyr_1.8.6           base64enc_0.1-3     
 [61] ps_1.6.0             prettyunits_1.1.1    rpart_4.1-15        
 [64] zoo_1.8-8            haven_2.3.1          fs_1.5.0            
 [67] magrittr_2.0.1       data.table_1.13.6    lmerTest_3.1-3      
 [70] openxlsx_4.2.3       ggdist_2.4.0         colourpicker_1.1.0  
 [73] reprex_1.0.0         mvtnorm_1.1-1        matrixStats_0.57.0  
 [76] hms_1.0.0            shinyjs_2.0.0        mime_0.10           
 [79] evaluate_0.14        arrayhelpers_1.1-0   shinystan_2.5.0     
 [82] rio_0.5.16           jpeg_0.1-8.1         readxl_1.3.1        
 [85] rstantools_2.1.1     compiler_4.0.3       V8_3.4.0            
 [88] crayon_1.4.1         minqa_1.2.4          StanHeaders_2.21.0-7
 [91] htmltools_0.5.1.1    mgcv_1.8-33          later_1.1.0.1       
 [94] RcppParallel_5.0.2   lubridate_1.7.9.2    DBI_1.1.1           
 [97] dbplyr_2.0.0         MASS_7.3-53          boot_1.3-26         
[100] car_3.0-10           cli_2.3.0            parallel_4.0.3      
[103] igraph_1.2.6         pkgconfig_2.0.3      numDeriv_2016.8-1.1 
[106] foreign_0.8-81       xml2_1.3.2           svUnit_1.0.3        
[109] dygraphs_1.1.1.6     webshot_0.5.2        estimability_1.3    
[112] rvest_0.3.6          snakecase_0.11.0     distributional_0.2.1
[115] callr_3.5.1          digest_0.6.27        rmarkdown_2.6       
[118] cellranger_1.1.0     htmlTable_2.1.0      curl_4.3            
[121] shiny_1.6.0          gtools_3.8.2         nloptr_1.2.2.2      
[124] lifecycle_1.0.0      nlme_3.1-151         carData_3.0-4       
[127] viridisLite_0.3.0    fansi_0.4.2          pillar_1.4.7        
[130] loo_2.4.1.9000       fastmap_1.1.0        httr_1.4.2          
[133] pkgbuild_1.2.0       glue_1.4.2           xts_0.12.1          
[136] zip_2.1.1            shinythemes_1.2.0    class_7.3-18        
[139] stringi_1.5.3        latticeExtra_0.6-29

clip	ball	outcome_both	outcome_a	outcome_b	outcome_none	difference	whether	how	sufficient	robust	w	wh	whs	heuristic	rating
1	A	0	0	0	0	100	40	100	23	36	48	58	55	57	42
1	B	0	0	0	0	100	15	100	16	9	37	50	46	54	37
2	A	0	0	0	0	57	12	0	0	10	35	14	14	25	21
2	B	0	0	0	0	18	0	0	0	0	30	11	11	24	19
3	A	1	0	0	0	100	88	100	12	76	69	72	66	72	76
3	B	1	0	0	0	100	89	100	11	75	69	73	66	72	75
4	A	1	0	0	0	100	78	100	4	78	64	69	60	58	63
4	B	1	0	0	0	100	95	100	15	57	72	74	69	54	78
5	A	0	0	1	0	100	90	100	0	47	69	73	62	47	71
5	B	0	0	1	0	100	0	100	0	0	30	46	36	68	22
6	A	0	0	1	0	100	59	100	16	35	56	64	59	53	73
6	B	0	0	1	0	100	18	100	6	14	38	51	44	53	22
7	A	1	0	1	0	100	34	100	0	25	45	56	46	70	59
7	B	1	0	1	0	100	100	100	67	60	74	76	87	64	79
8	A	1	0	1	0	0	0	0	0	0	30	11	11	25	7
8	B	1	0	1	0	100	100	100	100	100	74	76	97	84	92
9	A	0	1	0	0	0	0	0	0	0	30	11	11	14	8
9	B	0	1	0	0	100	100	100	0	100	74	76	65	78	90
10	A	0	1	0	0	77	18	0	0	22	38	16	16	15	23
10	B	0	1	0	0	98	79	0	0	63	65	35	34	21	55
11	A	1	1	0	0	100	70	100	77	68	61	67	81	71	93
11	B	1	1	0	0	0	0	0	0	0	30	11	11	16	4
12	A	1	1	0	0	100	82	100	74	83	66	70	84	57	77
12	B	1	1	0	0	100	0	100	12	24	30	46	40	53	37
13	A	0	1	1	0	67	34	0	0	35	45	21	21	14	8
13	B	0	1	1	0	70	35	0	0	35	46	21	21	21	64
14	A	0	1	1	0	97	91	0	0	59	70	38	37	21	22
14	B	0	1	1	0	91	77	0	0	51	64	34	33	20	18
15	A	1	1	1	0	100	1	100	99	10	31	46	68	71	76
15	B	1	1	1	0	100	1	100	100	10	31	46	68	71	76
16	A	1	1	1	0	100	23	100	100	35	40	53	75	80	92
16	B	1	1	1	0	0	0	0	0	0	30	11	11	14	4
17	A	0	0	0	1	100	19	100	37	18	39	51	54	66	69
17	B	0	0	0	1	100	0	100	36	17	30	46	48	65	46
18	A	0	0	0	1	100	11	100	40	17	35	49	52	55	63
18	B	0	0	0	1	100	7	100	37	9	33	48	50	56	66
19	A	1	0	0	1	100	74	100	7	65	63	68	60	55	53
19	B	1	0	0	1	100	72	100	7	65	61	67	59	55	49
20	A	1	0	0	1	100	92	100	8	72	70	73	66	57	41
20	B	1	0	0	1	100	88	100	4	53	68	72	63	56	71
21	A	0	0	1	1	100	47	100	40	45	51	60	63	58	80
21	B	0	0	1	1	100	9	100	21	10	34	48	46	59	18
22	A	0	0	1	1	100	100	100	89	83	74	76	94	47	60
22	B	0	0	1	1	100	8	100	0	15	34	48	39	53	42
23	A	1	0	1	1	5	0	0	0	0	31	11	11	15	3
23	B	1	0	1	1	91	79	0	0	72	65	35	34	22	39
24	A	1	0	1	1	100	66	100	4	63	59	66	57	57	44
24	B	1	0	1	1	100	94	100	22	79	71	74	71	54	73
25	A	0	1	0	1	100	25	100	21	26	41	53	50	69	43
25	B	0	1	0	1	100	74	100	54	65	62	68	75	56	73
26	A	0	1	0	1	100	6	100	3	9	33	47	39	60	39
26	B	0	1	0	1	100	87	100	35	54	68	72	73	46	69
27	A	1	1	0	1	100	97	100	52	97	73	75	81	67	80
27	B	1	1	0	1	0	0	0	0	0	30	11	11	17	6
28	A	1	1	0	1	100	90	100	22	80	69	73	69	79	89
28	B	1	1	0	1	0	0	0	0	0	30	11	11	12	5
29	A	0	1	1	1	100	58	100	24	44	56	63	61	66	47
29	B	0	1	1	1	100	63	100	24	38	58	65	62	54	67
30	A	0	1	1	1	100	57	100	29	49	55	63	62	62	58
30	B	0	1	1	1	100	46	100	24	39	51	60	58	63	56
31	A	1	1	1	1	100	2	100	4	3	31	46	38	63	44
31	B	1	1	1	1	100	4	100	4	4	32	47	39	53	46
32	A	1	1	1	1	0	0	0	0	0	30	11	11	16	5
32	B	1	1	1	1	100	75	100	66	73	63	68	79	65	71

clip	ball	outcome_both	outcome_a	outcome_b	outcome_none	difference	whether	how	sufficient	robust	w	wh	whs	heuristic	rating
1	A	0	0	0	0	100	40	100	23	36	48	58	55	57	42
1	B	0	0	0	0	100	15	100	16	9	37	50	46	54	37
2	A	0	0	0	0	57	12	0	0	10	35	14	14	25	21
2	B	0	0	0	0	18	0	0	0	0	30	11	11	24	19
3	A	1	0	0	0	100	88	100	12	76	69	72	66	72	76
3	B	1	0	0	0	100	89	100	11	75	69	73	66	72	75
4	A	1	0	0	0	100	78	100	4	78	64	69	60	58	63
4	B	1	0	0	0	100	95	100	15	57	72	74	69	54	78
5	A	0	0	1	0	100	90	100	0	47	69	73	62	47	71
5	B	0	0	1	0	100	0	100	0	0	30	46	36	68	22
6	A	0	0	1	0	100	59	100	16	35	56	64	59	53	73
6	B	0	0	1	0	100	18	100	6	14	38	51	44	53	22
7	A	1	0	1	0	100	34	100	0	25	45	56	46	70	59
7	B	1	0	1	0	100	100	100	67	60	74	76	87	64	79
8	A	1	0	1	0	0	0	0	0	0	30	11	11	25	7
8	B	1	0	1	0	100	100	100	100	100	74	76	97	84	92
9	A	0	1	0	0	0	0	0	0	0	30	11	11	14	8
9	B	0	1	0	0	100	100	100	0	100	74	76	65	78	90
10	A	0	1	0	0	77	18	0	0	22	38	16	16	15	23
10	B	0	1	0	0	98	79	0	0	63	65	35	34	21	55
11	A	1	1	0	0	100	70	100	77	68	61	67	81	71	93
11	B	1	1	0	0	0	0	0	0	0	30	11	11	16	4
12	A	1	1	0	0	100	82	100	74	83	66	70	84	57	77
12	B	1	1	0	0	100	0	100	12	24	30	46	40	53	37
13	A	0	1	1	0	67	34	0	0	35	45	21	21	14	8
13	B	0	1	1	0	70	35	0	0	35	46	21	21	21	64
14	A	0	1	1	0	97	91	0	0	59	70	38	37	21	22
14	B	0	1	1	0	91	77	0	0	51	64	34	33	20	18
15	A	1	1	1	0	100	1	100	99	10	31	46	68	71	76
15	B	1	1	1	0	100	1	100	100	10	31	46	68	71	76
16	A	1	1	1	0	100	23	100	100	35	40	53	75	80	92
16	B	1	1	1	0	0	0	0	0	0	30	11	11	14	4
17	A	0	0	0	1	100	19	100	37	18	39	51	54	66	69
17	B	0	0	0	1	100	0	100	36	17	30	46	48	65	46
18	A	0	0	0	1	100	11	100	40	17	35	49	52	55	63
18	B	0	0	0	1	100	7	100	37	9	33	48	50	56	66
19	A	1	0	0	1	100	74	100	7	65	63	68	60	55	53
19	B	1	0	0	1	100	72	100	7	65	61	67	59	55	49
20	A	1	0	0	1	100	92	100	8	72	70	73	66	57	41
20	B	1	0	0	1	100	88	100	4	53	68	72	63	56	71
21	A	0	0	1	1	100	47	100	40	45	51	60	63	58	80
21	B	0	0	1	1	100	9	100	21	10	34	48	46	59	18
22	A	0	0	1	1	100	100	100	89	83	74	76	94	47	60
22	B	0	0	1	1	100	8	100	0	15	34	48	39	53	42
23	A	1	0	1	1	5	0	0	0	0	31	11	11	15	3
23	B	1	0	1	1	91	79	0	0	72	65	35	34	22	39
24	A	1	0	1	1	100	66	100	4	63	59	66	57	57	44
24	B	1	0	1	1	100	94	100	22	79	71	74	71	54	73
25	A	0	1	0	1	100	25	100	21	26	41	53	50	69	43
25	B	0	1	0	1	100	74	100	54	65	62	68	75	56	73
26	A	0	1	0	1	100	6	100	3	9	33	47	39	60	39
26	B	0	1	0	1	100	87	100	35	54	68	72	73	46	69
27	A	1	1	0	1	100	97	100	52	97	73	75	81	67	80
27	B	1	1	0	1	0	0	0	0	0	30	11	11	17	6
28	A	1	1	0	1	100	90	100	22	80	69	73	69	79	89
28	B	1	1	0	1	0	0	0	0	0	30	11	11	12	5
29	A	0	1	1	1	100	58	100	24	44	56	63	61	66	47
29	B	0	1	1	1	100	63	100	24	38	58	65	62	54	67
30	A	0	1	1	1	100	57	100	29	49	55	63	62	62	58
30	B	0	1	1	1	100	46	100	24	39	51	60	58	63	56
31	A	1	1	1	1	100	2	100	4	3	31	46	38	63	44
31	B	1	1	1	1	100	4	100	4	4	32	47	39	53	46
32	A	1	1	1	1	0	0	0	0	0	30	11	11	16	5
32	B	1	1	1	1	100	75	100	66	73	63	68	79	65	71

A Counterfactual Simulation Model of Causal Judgments for Physical Events

Tobias Gerstenberg

March 28, 2021

1 Setup

1.1 Load packages

1.2 Helper functions

2 Preliminary study

2.1 Clips

2.2 Read in data

2.2.1 Demographics

2.3 Plots

2.3.1 Scatterplot

3 Experiment 1: One candidate cause

3.1 Clips

3.2 Read in data

3.2.1 Demographics

3.2.2 Participants’ feedback

3.3 Read in model predictions

3.3.1 Counterfactual condition

3.3.2 Causal condition

3.4 Counterfactual condition

3.4.1 Plots

3.4.2 Stats

3.4.2.1 Check for potential order effects

3.4.2.2 Model evaluation

3.5 Causal condition

3.5.1 Plots

3.5.2 Stats

3.5.2.1 Check for potential order effects

3.5.2.2 Model evaluation

4 Experiment 2: Two candidate causes

4.1 Clips

4.2 Read in data

4.2.1 Demographics

4.2.2 Participants’ feedback

4.3 Read in model predictions

4.3.1 Counterfactual condition

4.3.2 Causal condition

4.3.3 Heuristic model

4.4 Counterfactual condition

4.4.1 Stats

4.4.1.1 Model evaluation

4.4.2 Plots

4.4.2.1 Bar graph

4.5 Causal condition

4.5.1 Stats

4.5.1.1 Bayesian mixed effects model

4.5.1.2 Model comparison

4.5.1.3 Heuristic model

4.5.1.4 Predictions for mean causal judgments

4.5.1.5 Model evaluation

4.5.1.6 Individual participant regressions

4.5.1.6.1 Load individual participant regressions

4.5.1.6.2 Model performance on individual participant level

4.5.1.6.3 Clusters of individual participants’ responses

4.5.1.6.3.1 Check that clustering is stable

4.5.2 Plots

4.5.2.1 Bar graph

4.5.2.2 Selection

4.5.2.3 Scatter plot

4.5.2.4 Individual participant variance for a selection of clips (clustered)

4.5.3 Tables

4.5.3.1 Relationship between predictors

4.5.3.2 Population level predictors in the mixed effects models

4.5.3.3 Individual participants regression results

4.5.3.4 CSMwhs predictions for selection of cases

4.5.3.5 Heuristic model

5 Appendix

5.1 Preliminary study

5.1.1 Read in model predictions

5.1.1.1 Counterfactual condition

5.1.1.2 Causal condition

5.1.2 Counterfactual condition

5.1.2.1 Plots

5.1.2.2 Stats

5.1.3 Causal condition

5.1.3.1 Plots

5.1.3.2 Stats

5.2 Experiment 1

5.2.1 Frequentist tests for potential order effects

clip	ball	outcome_both	outcome_a	outcome_b	outcome_none	difference	whether	how	sufficient	robust	w	wh	whs	heuristic	rating
1	A	0	0	0	0	100	40	100	23	36	48	58	55	57	42
1	B	0	0	0	0	100	15	100	16	9	37	50	46	54	37
2	A	0	0	0	0	57	12	0	0	10	35	14	14	25	21
2	B	0	0	0	0	18	0	0	0	0	30	11	11	24	19
3	A	1	0	0	0	100	88	100	12	76	69	72	66	72	76
3	B	1	0	0	0	100	89	100	11	75	69	73	66	72	75
4	A	1	0	0	0	100	78	100	4	78	64	69	60	58	63
4	B	1	0	0	0	100	95	100	15	57	72	74	69	54	78
5	A	0	0	1	0	100	90	100	0	47	69	73	62	47	71
5	B	0	0	1	0	100	0	100	0	0	30	46	36	68	22
6	A	0	0	1	0	100	59	100	16	35	56	64	59	53	73
6	B	0	0	1	0	100	18	100	6	14	38	51	44	53	22
7	A	1	0	1	0	100	34	100	0	25	45	56	46	70	59
7	B	1	0	1	0	100	100	100	67	60	74	76	87	64	79
8	A	1	0	1	0	0	0	0	0	0	30	11	11	25	7
8	B	1	0	1	0	100	100	100	100	100	74	76	97	84	92
9	A	0	1	0	0	0	0	0	0	0	30	11	11	14	8
9	B	0	1	0	0	100	100	100	0	100	74	76	65	78	90
10	A	0	1	0	0	77	18	0	0	22	38	16	16	15	23
10	B	0	1	0	0	98	79	0	0	63	65	35	34	21	55
11	A	1	1	0	0	100	70	100	77	68	61	67	81	71	93
11	B	1	1	0	0	0	0	0	0	0	30	11	11	16	4
12	A	1	1	0	0	100	82	100	74	83	66	70	84	57	77
12	B	1	1	0	0	100	0	100	12	24	30	46	40	53	37
13	A	0	1	1	0	67	34	0	0	35	45	21	21	14	8
13	B	0	1	1	0	70	35	0	0	35	46	21	21	21	64
14	A	0	1	1	0	97	91	0	0	59	70	38	37	21	22
14	B	0	1	1	0	91	77	0	0	51	64	34	33	20	18
15	A	1	1	1	0	100	1	100	99	10	31	46	68	71	76
15	B	1	1	1	0	100	1	100	100	10	31	46	68	71	76
16	A	1	1	1	0	100	23	100	100	35	40	53	75	80	92
16	B	1	1	1	0	0	0	0	0	0	30	11	11	14	4
17	A	0	0	0	1	100	19	100	37	18	39	51	54	66	69
17	B	0	0	0	1	100	0	100	36	17	30	46	48	65	46
18	A	0	0	0	1	100	11	100	40	17	35	49	52	55	63
18	B	0	0	0	1	100	7	100	37	9	33	48	50	56	66
19	A	1	0	0	1	100	74	100	7	65	63	68	60	55	53
19	B	1	0	0	1	100	72	100	7	65	61	67	59	55	49
20	A	1	0	0	1	100	92	100	8	72	70	73	66	57	41
20	B	1	0	0	1	100	88	100	4	53	68	72	63	56	71
21	A	0	0	1	1	100	47	100	40	45	51	60	63	58	80
21	B	0	0	1	1	100	9	100	21	10	34	48	46	59	18
22	A	0	0	1	1	100	100	100	89	83	74	76	94	47	60
22	B	0	0	1	1	100	8	100	0	15	34	48	39	53	42
23	A	1	0	1	1	5	0	0	0	0	31	11	11	15	3
23	B	1	0	1	1	91	79	0	0	72	65	35	34	22	39
24	A	1	0	1	1	100	66	100	4	63	59	66	57	57	44
24	B	1	0	1	1	100	94	100	22	79	71	74	71	54	73
25	A	0	1	0	1	100	25	100	21	26	41	53	50	69	43
25	B	0	1	0	1	100	74	100	54	65	62	68	75	56	73
26	A	0	1	0	1	100	6	100	3	9	33	47	39	60	39
26	B	0	1	0	1	100	87	100	35	54	68	72	73	46	69
27	A	1	1	0	1	100	97	100	52	97	73	75	81	67	80
27	B	1	1	0	1	0	0	0	0	0	30	11	11	17	6
28	A	1	1	0	1	100	90	100	22	80	69	73	69	79	89
28	B	1	1	0	1	0	0	0	0	0	30	11	11	12	5
29	A	0	1	1	1	100	58	100	24	44	56	63	61	66	47
29	B	0	1	1	1	100	63	100	24	38	58	65	62	54	67
30	A	0	1	1	1	100	57	100	29	49	55	63	62	62	58
30	B	0	1	1	1	100	46	100	24	39	51	60	58	63	56
31	A	1	1	1	1	100	2	100	4	3	31	46	38	63	44
31	B	1	1	1	1	100	4	100	4	4	32	47	39	53	46
32	A	1	1	1	1	0	0	0	0	0	30	11	11	16	5
32	B	1	1	1	1	100	75	100	66	73	63	68	79	65	71