= read_csv(paste0(trial_info_dir, 'experiment.csv'),
trial_info show_col_types = F) %>%
mutate(bar_color = ifelse(cf_success_rate >= 50, 1, 0),
pred_cf = ifelse(outcome == 'won', 100 - cf_success_rate, cf_success_rate),
pred_hyp = ifelse(outcome == 'won', 100 - hyp_success_rate, hyp_success_rate),
diff_made = c(F, F, T, T, T, T,
F, T, T, F, T, F,
T, T, F, F, F, F),trial_label = c(1, NA, 2, NA, 3, NA,
NA, 4, NA, 5, NA, 6,
7, NA, 8, NA, 9, NA))
= read_csv(paste0(data_dir, 'experiment1_cf/trials.csv'),
data_cf_raw show_col_types = F) %>%
rename(id = workerid)
= read_csv(paste0(data_dir, 'experiment1_cf/participants.csv'),
data_cf_participants_raw show_col_types = F) %>%
rename(id = workerid)
= data_cf_raw %>%
data_cf drop_na(trial) %>%
left_join(trial_info %>%
select(trial, outcome),
by = c('trial')) %>%
mutate(cf_recode = ifelse(outcome == 'won', 100 - cf, cf),
id = factor(id))
= data_cf %>%
data_cf_means group_by(trial) %>%
summarise(cf_mean = mean(cf),
cf_low = smean.cl.boot(cf)[2],
cf_high = smean.cl.boot(cf)[3],
cf_recode_mean = mean(cf_recode),
cf_recode_low = smean.cl.boot(cf_recode)[2],
cf_recode_high = smean.cl.boot(cf_recode)[3])
= data_cf_participants_raw %>%
data_cf_participants mutate(id = factor(id))
Number of participants:
length(unique(data_cf$id))
## [1] 50
Participants were asked: “What factors influenced how you decided to respond? Do you have any questions or comments regarding the experiment?”
%>%
data_cf_participants select(feedback) %>%
datatable()
= read_csv(paste0(data_dir, 'experiment1_hyp/trials.csv'),
data_hyp_raw show_col_types = F) %>%
rename(id = workerid)
= read_csv(paste0(data_dir, 'experiment1_hyp/participants.csv'),
data_hyp_participants_raw show_col_types = F) %>%
rename(id = workerid)
= data_hyp_raw %>%
data_hyp drop_na(trial) %>%
left_join(trial_info %>%
select(trial, outcome),
by = c('trial')) %>%
mutate(hyp_recode = ifelse(outcome == 'won', 100 - hyp, hyp),
id = factor(id))
= data_hyp %>%
data_hyp_means group_by(trial) %>%
summarise(hyp_mean = mean(hyp),
hyp_low = smean.cl.boot(hyp)[2],
hyp_high = smean.cl.boot(hyp)[3],
hyp_recode_mean = mean(hyp_recode),
hyp_recode_low = smean.cl.boot(hyp_recode)[2],
hyp_recode_high = smean.cl.boot(hyp_recode)[3])
= data_hyp_participants_raw %>%
data_hyp_participants mutate(id = factor(id))
Number of participants:
length(unique(data_hyp$id))
## [1] 50
Participants were asked: “What factors influenced how you decided to respond? Do you have any questions or comments regarding the experiment?”
%>%
data_hyp_participants select(feedback) %>%
datatable()
= read_csv(paste0(data_dir, 'experiment2/trials.csv'),
data_raw show_col_types = F) %>%
rename(id = workerid)
= read_csv(paste0(data_dir, 'experiment2/participants.csv'),
data_participants_raw show_col_types = F) %>%
rename(id = workerid)
= data_raw %>%
data drop_na(trial) %>%
left_join(trial_info, by = c('trial')) %>%
mutate(id = factor(id))
= data_participants_raw %>%
data_participants mutate(id = factor(id))
Number of participants:
length(unique(data$id))
## [1] 50
Participants were asked: “What factors influenced how you decided to respond? Do you have any questions or comments regarding the experiment?”
%>%
data_participants select(feedback) %>%
datatable()
= function(data) {
print_demographics # gender
%>%
data group_by(gender) %>%
summarise(n = n()) %>%
print()
# age
print('age:')
mean(data$age, na.rm = T) %>% print()
sd(data$age, na.rm = T) %>% print()
# race
%>%
data group_by(race) %>%
summarise(n = n()) %>%
print()
# ethnicity
%>%
data group_by(ethnicity) %>%
summarise(n = n()) %>%
print()
# time taken
print('time taken (min):')
print(mean(data$time, na.rm = T)/60/1000)
print(sd(data$time, na.rm = T)/60/1000)
}
print_demographics(data_cf_participants %>%
rbind(data_hyp_participants))
## # A tibble: 4 × 2
## gender n
## <chr> <int>
## 1 Female 63
## 2 Male 33
## 3 Non-binary 3
## 4 Trans Man 1
## [1] "age:"
## [1] 32.77551
## [1] 13.27623
## # A tibble: 8 × 2
## race n
## <chr> <int>
## 1 American Indian/Alaska Native 3
## 2 Asian 10
## 3 black and white 1
## 4 Black/African American 5
## 5 Latino/Hispanic 1
## 6 Multiracial 6
## 7 native 1
## 8 White 73
## # A tibble: 3 × 2
## ethnicity n
## <chr> <int>
## 1 Hispanic 6
## 2 Non-Hispanic 88
## 3 <NA> 6
## [1] "time taken (min):"
## [1] 9.871441
## [1] 7.198263
print_demographics(data_participants)
## # A tibble: 4 × 2
## gender n
## <chr> <int>
## 1 Female 24
## 2 Male 24
## 3 Non-binary 1
## 4 Transgender man 1
## [1] "age:"
## [1] 40.08
## [1] 15.02901
## # A tibble: 5 × 2
## race n
## <chr> <int>
## 1 Asian 3
## 2 Black/African American 6
## 3 Middle Eastern 1
## 4 Multiracial 2
## 5 White 38
## # A tibble: 2 × 2
## ethnicity n
## <chr> <int>
## 1 Hispanic 2
## 2 Non-Hispanic 48
## [1] "time taken (min):"
## [1] 10.86952
## [1] 4.984792
= read_csv(sprintf('%s/features.csv', trial_info_dir),
features show_col_types = F) %>%
left_join(trial_info %>%
select(trial, outcome),
by = 'trial') %>%
mutate(doors_final = factor(doors_final,
levels = c('open_open', 'open_closed',
'closed_open', 'closed_closed',
'none_none')))
= data %>%
data_modeling select(id, trial, cause) %>%
left_join(data_cf_means %>%
select(trial, cf_recode_mean),
by = 'trial') %>%
left_join(data_hyp_means %>%
select(trial, hyp_recode_mean),
by = 'trial') %>%
left_join(features,
by = 'trial')
= lmer(
fit.heuristic formula = cause ~ 1 + outcome*doors_final + (1 | id),
data = data_modeling
)
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
summary(fit.heuristic)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cause ~ 1 + outcome * doors_final + (1 | id)
## Data: data_modeling
##
## REML criterion at convergence: 8348.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.98560 -0.63528 0.06144 0.61406 2.92631
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 155.6 12.48
## Residual 718.9 26.81
## Number of obs: 882, groups: id, 50
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 83.646 2.613 143.159 32.017 < 2e-16
## outcomewon -32.353 2.575 826.345 -12.562 < 2e-16
## doors_finalopen_closed -56.668 3.313 826.508 -17.103 < 2e-16
## doors_finalclosed_open 4.540 3.337 826.808 1.360 0.174
## doors_finalclosed_closed -68.115 4.325 826.586 -15.749 < 2e-16
## doors_finalnone_none -23.633 4.160 826.136 -5.681 1.85e-08
## outcomewon:doors_finalopen_closed 90.865 4.331 826.291 20.980 < 2e-16
##
## (Intercept) ***
## outcomewon ***
## doors_finalopen_closed ***
## doors_finalclosed_open
## doors_finalclosed_closed ***
## doors_finalnone_none ***
## outcomewon:doors_finalopen_closed ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) otcmwn drs_fnlp_ drs_fnlclsd_p drs_fnlclsd_c drs_fnln_
## outcomewon -0.551
## drs_fnlpn_c -0.429 0.435
## drs_fnlclsd_p -0.426 0.432 0.336
## drs_fnlclsd_c -0.328 0.333 0.259 0.257
## drs_fnlnn_n 0.000 -0.273 0.000 0.000 0.000
## otcmwn:dr__ 0.328 -0.595 -0.765 -0.257 -0.198 0.162
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 3 columns / coefficients
Fit matrix is rank-deficient because not all factor levels have data for both outcomes (e.g. there is no trial that was a loss and had no doors). So we fit win and loss trials separately, using the same formula.
= lmer(
fit.heuristic_win formula = cause ~ 1 + doors_final + (1 | id),
data = data_modeling %>%
filter(outcome == 'won')
)
summary(fit.heuristic_win)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cause ~ 1 + doors_final + (1 | id)
## Data: data_modeling %>% filter(outcome == "won")
##
## REML criterion at convergence: 4305.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.53645 -0.67408 0.05998 0.68515 2.24554
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 190.7 13.81
## Residual 873.1 29.55
## Number of obs: 444, groups: id, 50
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 51.409 2.715 79.125 18.94 < 2e-16 ***
## doors_finalopen_closed 34.085 3.076 392.902 11.08 < 2e-16 ***
## doors_finalnone_none -23.749 4.585 392.411 -5.18 3.55e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) drs_fnlp_
## drs_fnlpn_c -0.426
## drs_fnlnn_n -0.286 0.252
= lmer(
fit.heuristic_loss formula = cause ~ 1 + doors_final + (1 | id),
data = data_modeling %>%
filter(outcome == 'lost')
)
summary(fit.heuristic_loss)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: cause ~ 1 + doors_final + (1 | id)
## Data: data_modeling %>% filter(outcome == "lost")
##
## REML criterion at convergence: 4026.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7577 -0.6817 0.0798 0.5997 3.1435
##
## Random effects:
## Groups Name Variance Std.Dev.
## id (Intercept) 173.5 13.17
## Residual 514.2 22.68
## Number of obs: 438, groups: id, 50
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 83.483 2.476 82.024 33.718 <2e-16 ***
## doors_finalopen_closed -56.500 2.804 384.304 -20.152 <2e-16 ***
## doors_finalclosed_open 4.618 2.825 384.684 1.635 0.103
## doors_finalclosed_closed -68.288 3.660 384.403 -18.658 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) drs_fnlp_ drs_fnlclsd_p
## drs_fnlpn_c -0.383
## drs_fnlclsd_p -0.381 0.336
## drs_fnlclsd_c -0.292 0.258 0.256
= data_cf_means %>%
data_cf_pred rename(emp_mean = cf_mean,
emp_low = cf_low,
emp_high = cf_high) %>%
left_join(trial_info %>%
select(trial,
pred = cf_success_rate,
pred_recode = pred_cf,
diff_made,
trial_label),by = 'trial')
= data_hyp_means %>%
data_hyp_pred rename(emp_mean = hyp_mean,
emp_low = hyp_low,
emp_high = hyp_high) %>%
left_join(trial_info %>%
select(trial,
pred = hyp_success_rate,
pred_recode = pred_hyp,
diff_made,
trial_label),by = 'trial')
= c(0, 100)
limits = c("FALSE" = "red3", "TRUE" = "chartreuse3")
color_scale
= function(data, name) {
plot_simulation = cor(data$emp_mean, data$pred) %>%
r round(2)
= rmse(data$emp_mean, data$pred)
rmse
= ggplot(data = data,
g aes(x = pred,
y = emp_mean)) +
geom_abline(intercept = 0, slope = 1,
linetype = 2, size = 0.2) +
# error bars
geom_linerange(size = 0.5,
mapping = aes(ymin = emp_low,
ymax = emp_high),
color = 'gray') +
# means
geom_point(mapping = aes(fill = 'darkgray'),
shape = 21, size = 3, stroke = 0.2) +
# trial labels
geom_text_repel(data = data %>%
filter(!is.na(trial_label)),
mapping = aes(label = trial_label),
size = 4,
seed = 1,
max.overlaps = 18) +
theme(legend.position = "none",
text = element_text(size = 12),
axis.text = element_text(size = 12)) +
annotate('text',
label = sprintf('RMSE = %.2f\nr = %s', rmse, r),
hjust = 0, # left align
x = limits[1], y = limits[2]) +
scale_x_continuous(name = paste(name, 'simulation'),
limits = limits) +
scale_y_continuous(name = paste(name, 'judgment'),
limits = limits,
expand = expansion(0.1)) +
scale_color_manual(values = color_scale,
guide = 'none') +
scale_fill_manual(values = color_scale,
guide = 'none')
return(g)
}
= plot_simulation(data_cf_pred, 'counterfactual')
p_cf = plot_simulation(data_hyp_pred, 'hypothetical')
p_hyp
+ p_cf + plot_annotation(tag_levels = 'A') p_hyp
#ggsave(paste0(figures_dir, 'experiment1.pdf'), width = 6, height = 3)
= data %>%
data_pred group_by(trial) %>%
summarise(cause_mean = mean(cause),
cause_low = smean.cl.boot(cause)[2],
cause_high = smean.cl.boot(cause)[3]) %>%
ungroup() %>%
# counterfactual judgments
left_join(data_cf_means %>%
select(trial,
pred_cf_mean = cf_recode_mean,
pred_cf_low = cf_recode_low,
pred_cf_high = cf_recode_high),
by = 'trial') %>%
# hypothetical judgments
left_join(data_hyp_means %>%
select(trial,
pred_hyp_mean = hyp_recode_mean,
pred_hyp_low = hyp_recode_low,
pred_hyp_high = hyp_recode_high),
by = 'trial') %>%
# heuristic model
left_join(features %>%
select(trial, outcome),
by = 'trial') %>%
left_join(data_modeling %>%
filter(outcome == 'won') %>%
mutate(x = predict(object = fit.heuristic_win,
newdata = data_modeling %>%
filter(outcome == 'won'),
re.form = NA)) %>%
rbind(data_modeling %>%
filter(outcome == 'lost') %>%
mutate(x = predict(object = fit.heuristic_loss,
newdata = data_modeling %>%
filter(outcome == 'lost'),
re.form = NA))) %>%
group_by(trial) %>%
summarise(pred_heuristic_mean = mean(x)),
by = 'trial') %>%
mutate(# since the plot function will draw error bars
pred_heuristic_low = pred_heuristic_mean,
pred_heuristic_high = pred_heuristic_mean) %>%
merge(trial_info %>%
select(trial, diff_made, trial_label),
by = c('trial'))
= c(0, 100)
limits = c("FALSE" = "red3", "TRUE" = "chartreuse3")
color_scale
= function(model_name) {
plot_cause_vs = paste('pred', model_name, 'mean', sep = '_')
col_mean = paste('pred', model_name, 'low', sep = '_')
col_low = paste('pred', model_name, 'high', sep = '_')
col_high
= cor(data_pred$cause_mean, data_pred[[col_mean]]) %>%
r round(2)
= rmse(data_pred$cause_mean, data_pred[[col_mean]])
rmse
= ggplot(data = data_pred,
g aes(x = get(col_mean),
y = cause_mean)) +
geom_abline(intercept = 0, slope = 1,
linetype = 2, size = 0.2) +
# error bars
geom_linerange(size = 0.5,
mapping = aes(ymin = cause_low,
ymax = cause_high),
color = 'gray') +
geom_errorbarh(mapping = aes(xmin = get(col_low),
xmax = get(col_high)),
color = 'lightgray',
height = 0) +
# means
geom_point(mapping = aes(fill = diff_made),
shape = 21, size = 3, stroke = 0.2) +
# trial labels
geom_text_repel(data = data_pred %>%
filter(!is.na(trial_label)),
mapping = aes(label = trial_label,
color = diff_made),
size = 4,
seed = 1,
max.overlaps = 18) +
theme(legend.position = "none",
text = element_text(size = 12),
axis.text = element_text(size = 12)) +
annotate('text',
label = sprintf('RMSE = %.2f\nr = %s', rmse, r),
hjust = 0,
x = limits[1], y = limits[2] - 3) +
scale_x_continuous(name = case_when(
== 'cf' ~ 'counterfactual simulation model',
model_name == 'hyp' ~ 'hypothetical simulation model',
model_name == 'heuristic' ~ 'heuristic model'
model_name
),limits = limits) +
scale_y_continuous(name = 'causal judgment',
limits = limits) +
scale_color_manual(values = color_scale,
guide = 'none') +
scale_fill_manual(values = color_scale,
guide = 'none')
return(g)
}
= plot_cause_vs('cf')
p_cause_vs_cf = plot_cause_vs('hyp')
p_cause_vs_hyp = plot_cause_vs('heuristic')
p_cause_vs_heuristic
+ p_cause_vs_cf + p_cause_vs_heuristic +
p_cause_vs_hyp plot_annotation(tag_levels = 'A')
#ggsave(paste0(figures_dir, 'experiment2.pdf'), width = 10, height = 3.5)
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.8 purrr_0.3.4
## [5] readr_2.1.2 tidyr_1.2.0 tibble_3.1.6 tidyverse_1.3.1
## [9] patchwork_1.1.1 ggrepel_0.9.1 Hmisc_4.6-0 ggplot2_3.3.5
## [13] Formula_1.2-4 survival_3.2-13 lattice_0.20-45 Metrics_0.1.4
## [17] DT_0.22 lmerTest_3.1-3 lme4_1.1-29 Matrix_1.4-0
## [21] ggstatsplot_0.9.1 knitr_1.38
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-155 fs_1.5.2 bit64_4.0.5
## [4] lubridate_1.8.0 httr_1.4.2 insight_0.17.0
## [7] RColorBrewer_1.1-3 numDeriv_2016.8-1.1 tools_4.1.3
## [10] backports_1.4.1 bslib_0.3.1 utf8_1.2.2
## [13] R6_2.5.1 rpart_4.1.16 statsExpressions_1.3.1
## [16] DBI_1.1.2 colorspace_2.0-3 nnet_7.3-17
## [19] withr_2.5.0 tidyselect_1.1.2 gridExtra_2.3
## [22] bit_4.0.4 compiler_4.1.3 rvest_1.0.2
## [25] performance_0.9.0 cli_3.2.0 htmlTable_2.4.0
## [28] xml2_1.3.3 labeling_0.4.2 bookdown_0.26
## [31] bayestestR_0.11.5 sass_0.4.1 scales_1.2.0
## [34] checkmate_2.0.0 mvtnorm_1.1-3 mc2d_0.1-21
## [37] digest_0.6.29 foreign_0.8-82 minqa_1.2.4
## [40] rmarkdown_2.13 base64enc_0.1-3 WRS2_1.1-3
## [43] jpeg_0.1-9 pkgconfig_2.0.3 htmltools_0.5.2
## [46] highr_0.9 dbplyr_2.1.1 fastmap_1.1.0
## [49] readxl_1.4.0 htmlwidgets_1.5.4 rlang_1.0.2
## [52] rstudioapi_0.13 farver_2.1.0 jquerylib_0.1.4
## [55] generics_0.1.2 jsonlite_1.8.0 crosstalk_1.2.0
## [58] vroom_1.5.7 magrittr_2.0.3 parameters_0.17.0
## [61] Rcpp_1.0.8.3 munsell_0.5.0 fansi_1.0.3
## [64] lifecycle_1.0.1 stringi_1.7.6 yaml_2.3.5
## [67] MASS_7.3-55 plyr_1.8.7 grid_4.1.3
## [70] paletteer_1.4.0 parallel_4.1.3 crayon_1.5.1
## [73] haven_2.4.3 splines_4.1.3 hms_1.1.1
## [76] zeallot_0.1.0 pillar_1.7.0 boot_1.3-28
## [79] reprex_2.0.1 glue_1.6.2 evaluate_0.15
## [82] latticeExtra_0.6-29 modelr_0.1.8 data.table_1.14.2
## [85] tzdb_0.3.0 png_0.1-7 vctrs_0.4.0
## [88] nloptr_2.0.0 cellranger_1.1.0 gtable_0.3.0
## [91] rematch2_2.1.2 reshape_0.8.8 assertthat_0.2.1
## [94] datawizard_0.4.0 xfun_0.30 correlation_0.8.0
## [97] broom_0.8.0 cluster_2.1.2 ellipsis_0.3.2