library("Hmisc")
library("gtools")
library("magick")
library("tidyjson")
library("png")
library("RSQLite")
library("patchwork")
library("corrr")
library("ggrepel")
library("ggpubr")
library("lsr")
library("knitr")
library("grid")
library("xtable")
library("rjson")
library("transport")
library("pdftools")
library("tidyverse")
theme_set(theme_classic() +
theme(text = element_text(size = 20,
family = "Helvetica")))
# theme_set(theme_classic())
$set(comment = "",
opts_chunkresults = "hold",
fig.show = "hold")
# suppress grouping warning
options(dplyr.summarise.inform = F)
# L2
= function(x, y, w = NULL) {
l2 if (is.null(w)) {
return(mean((x - y)^2))
else {
} return(weighted.mean((x-y)^2, w))
}
}
# root mean squared error
= function(x, y) {
rmse return(sqrt(mean(l2(x, y))))
}
# natural log clipped between 0.01 and 0.99
= function(x) {
logclip return(log(pmax(pmin(0.99, x), 0.01)))
}
# get the clipped nll, with optional weights
= function(p, x, w = NULL) {
nll if (is.null(w)) {
return(-mean(p * logclip(x) + (1 - p) * logclip(1 - x)))
else {
} return(-weighted.mean(p * logclip(x) + (1 - p) * logclip(1 - x), w))
}
}
# go from logit to probability, used for fitting logistic models
= function(logit) {
logit2prob = exp(logit)
odds = odds / (1 + odds)
prob return(prob)
}
load("data/exp1_participants.RData")
load("data/exp1_selection.RData")
load("data/exp1_prediction.RData")
load("data/exp1_responsibility.RData")
load("data/exp1_info.RData")
load("cache/exp1_features_all.RData")
load("cache/exp1_model.RData")
load("data/exp2_participants.RData")
load("data/exp2_selection.RData")
load("data/exp2_prediction.RData")
load("data/exp2_responsibility.RData")
load("data/exp2_info.RData")
load("cache/exp2_features_all.RData")
load("cache/exp2_model.RData")
load("data/exp3_participants.RData")
load("data/exp3_prediction.RData")
load("data/exp3_responsibility.RData")
load("data/exp3_info.RData")
load("cache/exp3_features_all.RData")
load("cache/exp3_model.RData")
set.seed(1)
%>%
df.exp1.participants mutate(age = as.numeric(age)) %>%
summarize(age.mean = mean(age),
age.sd = sd(age),
nfemale = sum(sex == "female"),
nmale = sum(sex == "male"))
# experiment_3 is selection
# experiment_2, condition 0 is prediction
# experiment_2, condition 1 is responsibility
%>%
df.exp1.participants mutate(across(.cols = c(age, time),
.fns = ~ as.numeric(.))) %>%
group_by(experiment, condition) %>%
summarize(time.mean = mean(time),
time.sd = sd(time),
exclude = sum(exclude),
n = n(),
nfemale = sum(sex == "female"),
nmale = sum(sex == "male")) %>%
mutate(across(.cols = contains("time"),
.fns = ~ round(., 2)))
%>%
df.exp2.participants mutate(age = as.numeric(age)) %>%
summarize(age.mean = mean(age),
age.sd = sd(age),
nfemale = sum(sex == "female"),
nmale = sum(sex == "male"))
# experiment_5 is selection
# experiment_4, condition 0 is prediction
# experiment_4, condition 1 is responsibility
%>%
df.exp2.participants mutate(across(.cols = c(age, time),
.fns = ~ as.numeric(.))) %>%
group_by(experiment, condition) %>%
summarize(time.mean = mean(time),
time.sd = sd(time),
exclude = sum(exclude),
n = n(),
nfemale = sum(sex == "female"),
nmale = sum(sex == "male")) %>%
mutate(across(.cols = contains("time"),
.fns = ~ round(., 2)))
%>%
df.exp3.participants mutate(age = as.numeric(age)) %>%
summarize(age.mean = mean(age),
age.sd = sd(age),
nfemale = sum(gender == "female"),
nmale = sum(gender == "male"))
# experiment_7 is responsibility
# experiment_8 is prediction
# calculating total time spent in conditions
%>%
df.exp3.prediction summarize(mean(rt), sd(rt)) / 60 / 1000 * 42
%>%
df.exp3.responsibility summarize(mean(rt), sd(rt)) / 60 / 1000 * 42
= df.exp1.info %>%
df.exp1.info filter(trial != 5, index != 0)
= df.exp1.selection %>%
df.exp1.selection filter(trial != 5)
= df.exp1.model %>%
df.exp1.model filter(trial != 5, index != 0)
= df.exp1.features.all %>%
df.exp1.features.all filter(trial != 5)
= df.exp2.info %>%
df.exp2.info filter(trial != 43, index != 0)
= df.exp2.selection %>%
df.exp2.selection filter(trial != 43)
= df.exp2.model %>%
df.exp2.model filter(trial != 43, index != 0)
= df.exp2.features.all %>%
df.exp2.features.all filter(trial != 43)
= df.exp3.info %>%
df.exp3.info filter(trial != 43)
= df.exp3.features.all %>%
df.exp3.features.all filter(trial != 43)
# creates long df with a row for each brick seen in each trial by each participant
= df.exp1.info %>%
df.exp1.selection.long select(trial, index) %>%
crossing(participant = unique(df.exp1.selection$participant)) %>%
left_join(df.exp1.selection %>%
unnest(response) %>%
select(participant, trial, index = response) %>%
mutate(response = 1),
by = c("participant", "trial", "index")) %>%
replace_na(list(response = 0)) %>%
arrange(participant, trial, index) %>%
select(participant, trial, index, response)
# averages out participant selection responses on the individual brick level
= df.exp1.selection.long %>%
df.exp1.selection.brick group_by(trial, index) %>%
summarize(across(response, mean), .groups = "drop")
# prediction/responsibility conditions
= df.exp1.info %>%
df.exp1.trials group_by(trial) %>%
summarize(n_bricks = n()) %>%
left_join(df.exp1.responsibility %>%
group_by(trial) %>%
summarize(r_mean = mean(response),
r_low = smean.cl.boot(response)[2],
r_high = smean.cl.boot(response)[3]),
by = "trial") %>%
left_join(df.exp1.prediction %>%
group_by(trial) %>%
summarize(p_mean = mean(response),
p_low = smean.cl.boot(response)[2],
p_high = smean.cl.boot(response)[3]),
by = "trial")
= df.exp2.info %>%
df.exp2.selection.long select(trial, index) %>%
crossing(participant = unique(df.exp2.selection$participant)) %>%
left_join(df.exp2.selection %>%
unnest(response) %>%
select(participant, trial, index = response) %>%
mutate(response = 1),
by = c("participant", "trial", "index")) %>%
replace_na(list(response = 0)) %>%
arrange(participant, trial, index) %>%
select(participant, trial, index, response)
= df.exp2.selection.long %>%
df.exp2.selection.brick group_by(trial, index) %>%
summarize(across(response, mean), .groups = "drop")
= df.exp2.info %>%
df.exp2.trials group_by(trial) %>%
summarize(n_bricks = n()) %>%
left_join(df.exp2.responsibility %>%
group_by(trial) %>%
summarize(r_mean = mean(response),
r_low = smean.cl.boot(response)[2],
r_high = smean.cl.boot(response)[3]),
by = "trial") %>%
left_join(df.exp2.prediction %>%
group_by(trial) %>%
summarize(p_mean = mean(response),
p_low = smean.cl.boot(response)[2],
p_high = smean.cl.boot(response)[3]),
by = "trial")
= df.exp3.info %>%
df.exp3.trials select(trial, index, color, pfall, qfall, fall) %>%
na.omit() %>%
nest_by(trial, pfall, qfall) %>%
mutate(A = data %>% filter(color == "A") %>% pull(index),
B = data %>% filter(color == "B") %>% pull(index),
fall = data %>% filter(color == "B") %>% pull(fall)) %>%
select(-data) %>%
left_join(df.exp3.responsibility %>%
group_by(trial) %>%
summarize(r_mean = mean(response),
r_low = smean.cl.boot(response)[2],
r_high = smean.cl.boot(response)[3]),
by = "trial") %>%
left_join(df.exp3.prediction %>%
select(trial, response) %>%
group_by(trial) %>%
summarize(p_mean = mean(response),
p_low = smean.cl.boot(response)[2],
p_high = smean.cl.boot(response)[3]),
by = "trial") %>%
rename(a = A, b = B, probability = pfall, class = qfall) %>%
ungroup() %>%
filter(trial != 43)
= df.exp1.model %>%
df.exp1.master mutate(response = df.exp1.selection.brick$response) %>%
select(trial, index, response, everything())
= df.exp1.master %>%
df.exp1.bestmodels summarize(across(contains("_"), ~nll(response, .) )) %>%
pivot_longer(contains("_"), names_to = "modelname") %>%
arrange(value)
= df.exp2.model %>%
df.exp2.master mutate(response = df.exp2.selection.brick$response) %>%
select(trial, index, response, everything())
= df.exp2.master %>%
df.exp2.bestmodels summarize(across(contains("_"), ~nll(response, .) )) %>%
pivot_longer(contains("_"), names_to = "modelname") %>%
arrange(value)
= df.exp3.model %>%
df.exp3.master left_join(df.exp3.trials %>%
select(trial, r_mean, r_low, r_high, p_mean, p_low, p_high),
by = "trial") %>%
mutate(across(c(r_mean, p_mean, r_low, r_high, p_low, p_high), ~ . / 100)) %>%
select(trial, index, contains("p_"), contains("r_"), contains("_"))
= df.exp3.master %>%
df.exp3.bestmodels summarize(across(contains("_"), ~l2(r_mean, .) )) %>%
select(-contains('r_'), -contains('p_')) %>%
pivot_longer(contains("_"), names_to = "modelname") %>%
arrange(value)
# combine master dfs
= bind_rows(
df.exp123.master list(df.exp1.master,
df.exp2.master,%>%
df.exp3.master mutate(response = r_mean) %>%
select(-contains("r_"), -contains("p_"))
.id = "exp") %>%
), mutate(exp = as.numeric(exp)) %>%
group_by(exp) %>%
mutate(w = 1/n()) %>%
ungroup() %>%
select(exp, trial, index, response, w, everything())
= df.exp123.master %>%
df.exp123.bestmodels summarize(across(contains("_"), ~nll(response, .) )) %>%
pivot_longer(contains("_"), names_to = "modelname") %>%
arrange(value)
= df.exp123.master %>%
tmp1 filter(exp == 1) %>%
summarize(across(contains("_"), ~nll(response, .) )) %>%
pivot_longer(contains("_"), names_to = "modelname") %>%
arrange(value) %>%
filter(row_number() == 1) %>%
pull(modelname)
= df.exp123.master %>%
tmp2 filter(exp == 2) %>%
summarize(across(contains("_"), ~nll(response, .) )) %>%
pivot_longer(contains("_"), names_to = "modelname") %>%
arrange(value) %>%
filter(row_number() == 1) %>%
pull(modelname)
= df.exp123.master %>%
tmp12 filter(exp == 1 | exp == 2) %>%
summarize(across(contains("_"), ~nll(response, .) )) %>%
pivot_longer(contains("_"), names_to = "modelname") %>%
arrange(value) %>%
filter(row_number() == 1) %>%
pull(modelname)
= df.exp123.master %>%
tmp123 summarize(across(contains("_"), ~nll(response, .) )) %>%
pivot_longer(contains("_"), names_to = "modelname") %>%
arrange(value) %>%
filter(row_number() == 1) %>%
pull(modelname)
= c(tmp1, tmp2, tmp12, tmp123)
models
# correlations for all bricks, regardless of experiment
= df.exp123.master %>%
df.tmp select(response, all_of(models)) %>%
mutate(hsm1 = !!sym(models[[1]]),
hsm2 = !!sym(models[[2]]),
hsm12 = !!sym(models[[3]]),
hsm123 = !!sym(models[[4]])) %>%
select(-all_of(models)) %>%
::correlate(quiet=T) %>%
corrrselect(response) %>%
filter(row_number() != 1)
= df.exp123.master %>%
df.r_model select(exp, response, all_of(models)) %>%
mutate(hsm1 = !!sym(models[[1]]),
hsm2 = !!sym(models[[2]]),
hsm12 = !!sym(models[[3]]),
hsm123 = !!sym(models[[4]])) %>%
select(-all_of(models)) %>%
nest_by(exp) %>%
summarize(corrr::correlate(data, quiet=T)) %>%
filter(row_number() == 1) %>%
pivot_longer(contains("hsm"), names_to = "params", values_to = "vals") %>%
pivot_wider(names_from = exp, values_from = vals) %>%
mutate(fit = c("hsm:1", "hsm:2", "hsm:1,2", "hsm:1,2,3")) %>%
select(fit, exp1 = "1", exp2 = "2", exp3 = "3", params, -term, -response) %>%
mutate("all" = df.tmp$response) %>%
mutate(params = models) %>%
select(fit, exp1, exp2, exp3, all, params)
= tmp123 fit_bestmodel
= bind_rows(
df.exp123.features list(df.exp1.features.all,
df.exp2.features.all,
df.exp3.features.all.id = "exp") %>%
), mutate(across(-c(exp, trial, id), ~as.vector(scale(.)))) %>%
select(-s.avg_x, -b.x, -o.dx, -o.dy, -o.x) %>%
mutate(exp = as.numeric(exp)) %>%
rename(index = id)
= bind_rows(
df.exp123.selection list(df.exp1.selection.brick,
df.exp2.selection.brick,%>%
df.exp3.trials select(trial, index = b, response = p_mean) %>%
mutate(response = response / 100)
.id = "exp") %>%
), mutate(exp = as.numeric(exp))
= bind_rows(
df.exp12.trials list(df.exp1.trials,
df.exp2.trials.id = "exp") %>%
), mutate(exp = as.numeric(exp))
= bind_rows(
df.exp123.trials list(df.exp1.trials,
df.exp2.trials,%>%
df.exp3.trials select(trial, r_mean, r_low, r_high, p_mean, p_low, p_high) %>%
mutate(n_bricks = 1) %>%
mutate(across(contains("p_"), ~./100))
.id = "exp") %>%
), mutate(exp = as.numeric(exp))
= corrr::correlate(df.exp123.features %>% select(exp, trial, index, s.avg_y, s.avg_edge_dist, s.avg_angle, everything()) %>% select(-exp, -trial, -index))
df.tmp.corr # xtable(df.tmp.corr)
df.tmp.corr
s.max_y
should be removed= df.exp123.features %>%
df.exp123.features select(-s.max_y)
= df.exp123.features %>%
df.tmp left_join(df.exp123.trials %>%
select(exp, trial, response = r_mean), by = c("exp", "trial"))
= colnames(df.exp123.features)
feats = feats[! feats %in% c("exp", "trial", "index")]
feats
# relevant features for exps 1 and 2: don't include white block features
= list(feats[lapply(feats, (function(x) startsWith(x, "s"))) == T],
l.regress12 lapply(feats, (function(x) startsWith(x, "b"))) == T]
feats[
)= append(l.regress12, list(unlist(l.regress12)))
l.regress12
# relevant features for exp 3: include white block features
= list(feats[lapply(feats, (function(x) startsWith(x, "s"))) == T],
l.regress3 lapply(feats, (function(x) startsWith(x, "b"))) == T],
feats[lapply(feats, (function(x) startsWith(x, "o"))) == T]
feats[
)= append(l.regress3, list(unlist(l.regress3)))
l.regress3
# individual feature correlations per experiment
%>%
df.tmp group_by(exp) %>%
summarize(across(feats, ~cor(., response)))
# individual feature correlations across all experiments
%>%
df.tmp summarize(across(feats, ~cor(., response)))
# feature group regressions
= c("scene", "black", "all")
names12 = c("scene", "black", "other", "all")
names3 = c()
corrs1 = c()
corrs2 = c()
corrs3 = c()
corrs123 for (cols in l.regress12) {
= df.tmp %>%
df.tmp2 filter(exp == 1) %>%
select(all_of(cols), response) %>%
mutate(reg = lm(as.formula(paste("response ~ ",
paste(cols, collapse="+"))))$fitted.values)
= c(corrs1, cor(df.tmp2$reg, df.tmp2$response))
corrs1 = df.tmp %>%
df.tmp2 filter(exp == 2) %>%
select(all_of(cols), response) %>%
mutate(reg = lm(as.formula(paste("response ~ ",
paste(cols, collapse="+"))))$fitted.values)
= c(corrs2, cor(df.tmp2$reg, df.tmp2$response))
corrs2 = df.tmp %>%
df.tmp2 select(all_of(cols), response) %>%
mutate(reg = lm(as.formula(paste("response ~ ",
paste(cols, collapse="+"))))$fitted.values)
= c(corrs123, cor(df.tmp2$reg, df.tmp2$response))
corrs123
}for (cols in l.regress3) {
= df.tmp %>%
df.tmp2 filter(exp == 3) %>%
select(all_of(cols), response) %>%
mutate(reg = lm(as.formula(paste("response ~ ",
paste(cols, collapse="+"))))$fitted.values)
= c(corrs3, cor(df.tmp2$reg, df.tmp2$response))
corrs3
}= data.frame(names12, corrs1, corrs2, corrs123)
df.exp12.rcorrs = data.frame(names3, corrs3) df.exp3.rcorrs
= df.exp123.features %>%
df.tmp left_join(df.exp123.selection %>%
select(exp, trial, index, response), by = c("exp", "trial", "index"))
= colnames(df.exp123.features)
feats = feats[! feats %in% c("exp", "trial", "index")]
feats
# include all feature groups in regression
= list(feats[lapply(feats, (function(x) startsWith(x, "s"))) == T],
l.regress lapply(feats, (function(x) startsWith(x, "b"))) == T],
feats[lapply(feats, (function(x) startsWith(x, "o"))) == T]
feats[
)= append(l.regress, list(unlist(l.regress)))
l.regress
# individual feature correlations per experiment
%>%
df.tmp group_by(exp) %>%
summarize(across(all_of(feats), ~cor(., response)))
# individual feature correlations across all experiments
%>%
df.tmp summarize(across(feats, ~cor(., response)))
= c("scene", "black", "other", "all")
names = c()
corrs1 = c()
corrs2 = c()
corrs3 = c()
corrs123 for (cols in l.regress) {
= df.tmp %>%
df.tmp2 filter(exp == 1) %>%
select(all_of(cols), response) %>%
mutate(reg = lm(as.formula(paste("response ~ ",
paste(cols, collapse="+"))))$fitted.values)
= c(corrs1, cor(df.tmp2$reg, df.tmp2$response))
corrs1 = df.tmp %>%
df.tmp2 filter(exp == 2) %>%
select(all_of(cols), response) %>%
mutate(reg = lm(as.formula(paste("response ~ ",
paste(cols, collapse="+"))))$fitted.values)
= c(corrs2, cor(df.tmp2$reg, df.tmp2$response))
corrs2 = df.tmp %>%
df.tmp2 select(all_of(cols), response) %>%
mutate(reg = lm(as.formula(paste("response ~ ",
paste(cols, collapse="+"))))$fitted.values)
= c(corrs123, cor(df.tmp2$reg, df.tmp2$response))
corrs123 = df.tmp %>%
df.tmp2 filter(exp == 3) %>%
select(all_of(cols), response) %>%
mutate(reg = lm(as.formula(paste("response ~ ",
paste(cols, collapse="+"))))$fitted.values)
= c(corrs3, cor(df.tmp2$reg, df.tmp2$response))
corrs3
}= data.frame(names, corrs1, corrs2, corrs3, corrs123) df.exp123.correlations
tmpX
are models fit to experiments X# uncomment a couple lines if we want weighted regression
= df.exp123.selection %>%
df.tmp.features left_join(df.exp123.features, by = c("exp", "trial", "index"))
= df.tmp.features %>%
tmp1 filter(exp == 1) %>%
glm(response ~ . - exp - trial - index, data = ., family = "binomial")
= df.tmp.features %>%
tmp2 filter(exp == 2) %>%
glm(response ~ . - exp - trial - index, data = ., family = "binomial")
= df.tmp.features %>%
tmp12 filter(exp == 1 | exp == 2) %>%
glm(response ~ . - exp - trial - index, data = ., family = "binomial")
= df.tmp.features %>%
tmp123 glm(response ~ . - exp - trial - index, data = ., family = "binomial")
= df.tmp.features %>%
tmp123sq glm(response ~ (. - exp - trial - index)^2, data = ., family = "binomial")
= tmp123
regression_best
= df.tmp.features %>%
df.exp123.regression mutate(fit1 = logit2prob(predict(tmp1, df.tmp.features))) %>%
mutate(fit2 = logit2prob(predict(tmp2, df.tmp.features))) %>%
mutate(fit12 = logit2prob(predict(tmp12, df.tmp.features))) %>%
mutate(fit123 = logit2prob(predict(tmp123, df.tmp.features))) %>%
mutate(fit123sq = logit2prob(predict(tmp123sq, df.tmp.features)))
# correlations for all bricks, regardless of experiment
= df.exp123.regression %>%
df.tmp select(response, fit1, fit2, fit12, fit123, fit123sq) %>%
::correlate(quiet=T) %>%
corrrselect(response) %>%
filter(row_number() != 1)
# experiment-specific correlations
= df.exp123.regression %>%
df.r_features select(exp, response, fit1, fit2, fit12, fit123, fit123sq) %>%
nest_by(exp) %>%
summarize(corrr::correlate(data, quiet=T)) %>%
filter(row_number() == 1) %>%
pivot_longer(contains("fit"), names_to = "params", values_to = "vals") %>%
pivot_wider(names_from = exp, values_from = vals) %>%
mutate("fit" = c("feat:1", "feat:2", "feat:1,2", "feat:1,2,3", "feat:1,2,3,sq")) %>%
select(fit, exp1 = "1", exp2 = "2", exp3 = "3", -term, -response) %>%
mutate("all" = df.tmp$response)
bind_rows(df.r_model, df.r_features)
# A tibble: 9 × 6
fit exp1 exp2 exp3 all params
<chr> <dbl> <dbl> <dbl> <dbl> <chr>
1 hsm:1 0.863 0.766 0.781 0.805 hsm_6,5,6
2 hsm:2 0.872 0.844 0.795 0.856 hsm_2,5,4
3 hsm:1,2 0.872 0.844 0.795 0.856 hsm_2,5,4
4 hsm:1,2,3 0.872 0.844 0.795 0.856 hsm_2,5,4
5 feat:1 0.784 0.589 0.767 0.674 <NA>
6 feat:2 0.569 0.727 0.395 0.646 <NA>
7 feat:1,2 0.753 0.701 0.631 0.729 <NA>
8 feat:1,2,3 0.757 0.699 0.652 0.730 <NA>
9 feat:1,2,3,sq 0.833 0.864 0.837 0.853 <NA>
= colnames(df.exp1.master %>%
l.full_sci select(contains("hsm")) %>%
rename_with(.fn = ~ str_remove(., "hsm_")))
=
l.perceptual_intervention_dynamics unlist(lapply(
l.full_sci[
l.full_sci,function(v) str_detect(v, "^[:digit:]+,[:digit:]+,[:digit:]+$")))]
=
l.perceptual_intervention unlist(lapply(
l.full_sci[
l.full_sci,function(v) str_detect(v, "^[:digit:]+,[:digit:]+,0$")))]
=
l.perceptual_dynamics unlist(lapply(
l.full_sci[
l.full_sci,function(v) str_detect(v, "^[:digit:]+,0,[:digit:]+$")))]
=
l.intervention_dynamics unlist(lapply(
l.full_sci[
l.full_sci,function(v) str_detect(v, "^0,[:digit:]+,[:digit:]+$")))]
=
l.perceptual unlist(lapply(l.full_sci, function(v) str_detect(v, "^[:digit:]+,0,0$")))]
l.full_sci[=
l.intervention unlist(lapply(l.full_sci, function(v) str_detect(v, "^0,[:digit:]+,0$")))]
l.full_sci[=
l.dynamics unlist(lapply(l.full_sci, function(v) str_detect(v, "^0,0,[:digit:]+$")))]
l.full_sci[
= list(
l.models "pid" = l.perceptual_intervention_dynamics,
"pi" = l.perceptual_intervention,
"pd" = l.perceptual_dynamics,
"id" = l.intervention_dynamics,
"p" = l.perceptual,
"i" = l.intervention,
"d" = l.dynamics
)
# choose whether to do weighted xval or not
= F
weighted
= df.exp123.master %>%
df.summary.model group_by(exp) %>%
mutate(weight = ifelse(weighted, 1 / n(), 1)) %>%
ungroup() %>%
select(exp, trial, index, weight, everything())
= df.summary.model %>%
df.summary.model_losses mutate(across(contains(","), ~(response - .)^2)) %>%
set_names(~ str_replace_all(., "hsm_", ""))
= df.exp123.features %>%
df.summary.features mutate(response = df.exp123.selection$response) %>%
group_by(exp) %>%
mutate(weight = ifelse(weighted, 1 / n(), 1)) %>%
ungroup() %>%
select(exp, trial, index, weight, response, everything()) %>%
mutate(across(-c(exp, trial, index, response, weight), scale))
= nrow(df.summary.model) n_total
load("cache/xval_losses.RData")
=
df.cross_val setNames(data.frame(matrix(ncol = 5, nrow = 0), stringsAsFactors = FALSE),
c("model", "A_losses_mean", "B_losses_mean", "A_losses_sd", "B_losses_sd"))
=
df.losses data.frame("model" = character(),
"A_losses" = double(),
"B_losses" = double(),
"A_params" = character(),
"cors" = double(),
stringsAsFactors = F)
= 200
n = F
verbose
for (i in 1:n) {
if (verbose) print(paste0("Cross val #", i))
# don't have to worry about specialized exp1/exp2 selection
= sample(1:n_total)
order = order[1:ceiling(n_total / 2)]
ixs_A = order[ceiling(n_total / 2):n_total]
ixs_B
= df.summary.model_losses %>% slice(ixs_A)
df.arrangement_A = df.summary.model_losses %>% slice(ixs_B)
df.arrangement_B
= setNames(data.frame(matrix(ncol = 5, nrow = 0)),
df.losses_n c("model", "A_losses", "B_losses", "A_params", "cors"))
# doing cross validation on noise models
for (j in names(l.models)) {
# choosing the subset for this particular model
= df.arrangement_A %>%
df.rearrangement select(weight, l.models[[j]])
# find the best performing model in A
= df.rearrangement %>%
A_best summarize(across(.cols = contains(","),
.fns = ~ weighted.mean(., weight))) %>%
pivot_longer(everything(),
names_to = "parameters",
values_to = "loss") %>%
arrange(loss) %>%
filter(row_number() == 1)
= A_best["parameters"][[1]]
A_best_param = A_best["loss"][[1]]
A_best_loss
# calculate the loss of that model in B
= df.arrangement_B %>%
B_loss select(weight, param = any_of(A_best_param)) %>%
summarize(loss = weighted.mean(param, weight)) %>%
pull(loss)
= df.summary.model %>%
r_xval slice(ixs_B) %>%
summarize(cor = cor(response, !!as.symbol(paste0("hsm_", A_best_param)))) %>%
pull(cor)
nrow(df.losses_n)+1,] = list(j, A_best_loss, B_loss, A_best_param, r_xval)
df.losses_n[if (verbose) print(paste0(A_best_param, " | ", A_best_loss, " | ", B_loss))
}
# cross validation on features
= df.summary.features %>% slice(ixs_A)
df.arrangement_A = df.summary.features %>% slice(ixs_B)
df.arrangement_B
# get the parameters of the features regression to A
= df.arrangement_A %>%
A_fit glm(response ~ . - exp - trial - index - weight, data = .,
weights = weight,
family = "binomial")
= df.arrangement_A %>%
A_loss mutate(regression = A_fit$fitted.values) %>%
summarize(loss = l2(response, regression, w = weight)) %>%
pull(loss)
# calculate the loss and corr on B based on the regression to A
= df.arrangement_B %>%
df.B_info mutate(regression = logit2prob(predict(A_fit, df.arrangement_B))) %>%
summarize(loss = l2(response, regression, w = weight), cor = cor(response, regression))
= df.B_info %>%
B_loss pull(loss)
= df.B_info %>%
r_xval pull(cor)
# record the regressions for the features model
if (i == 1) {
= setNames(
df.xval_f_regressions data.frame(matrix(ncol = length(A_fit$coefficients) + 2, nrow = 0)),
c(names(A_fit$coefficients), "A_loss", "B_loss"))
}= c(A_fit$coefficients, "A_loss" = A_loss, "B_loss" = B_loss)
df.xval_f_regressions[i, ]
if (verbose) print(paste0("f", " | ", A_loss, " | ", B_loss))
nrow(df.losses_n) + 1, ] = list("f", A_loss, B_loss, NA, r_xval)
df.losses_n[
# cross validation on features^2
= df.arrangement_A %>%
A_fit glm(response ~ (. - exp - trial - index - weight)^2, data = .,
weights = weight,
family = "binomial")
= df.arrangement_A %>%
A_loss mutate(regression = A_fit$fitted.values) %>%
summarize(loss = l2(response, regression, w = weight)) %>%
pull(loss)
# calculate the loss and corr on B based on the regression to A
= df.arrangement_B %>%
df.B_info mutate(regression = logit2prob(predict(A_fit, df.arrangement_B))) %>%
summarize(loss = l2(response, regression, w = weight), cor = cor(response, regression))
= df.B_info %>%
B_loss pull(loss)
= df.B_info %>%
r_xval pull(cor)
if (verbose) print(paste0("fsq", " | ", A_loss, " | ", B_loss))
nrow(df.losses_n) + 1, ] = list("fsq", A_loss, B_loss, NA, r_xval)
df.losses_n[
= df.losses %>% bind_rows(df.losses_n)
df.losses
}
= df.losses %>%
df.losses mutate(diffs = B_losses - A_losses)
# save(df.losses, file = "cache/xval_losses.RData")
# df.losses = df.losses %>%
# group_by(x = ceiling(row_number() / 9)) %>%
# filter(!any(is.nan(B_losses))) %>%
# ungroup()
#
# n_notnan = nrow(df.losses) / 9
= df.losses %>%
df.losses filter(model != "fsq")
= df.losses %>%
df.xval_params group_by(model, A_params) %>%
summarize(n = n(),
p5 = quantile(cors, .05),
p50 = mean(cors),
p95 = quantile(cors, .95)) %>%
arrange(model, -n)
= df.xval_params %>%
xval_bestmodel filter(model == "pid") %>%
filter(row_number() == 1) %>%
pull(A_params) %>%
paste0("hsm_", .)
# mean, var of xval losses
= df.losses %>%
df.xval_rmses group_by(model) %>%
summarize(across(c("A_losses", "B_losses", "diffs"),
c("mean" = mean,
"sd" = sd,
p5 = ~quantile(., .05, na.rm=T),
p50 = ~mean(., na.rm=T),
p95 = ~quantile(., .95, na.rm=T)))) %>%
arrange(B_losses_mean) %>%
mutate(across(contains('_'), ~.*100)) %>%
select(model, contains("B_losses_p"), everything())
# dif between xval losses and full HSM losses
= df.losses %>%
df.xval_rmse_difs select(model, B_losses) %>%
group_by(x = ceiling(row_number() / 8)) %>%
pivot_wider(names_from = "model", values_from = "B_losses") %>%
rename(xval_num = x) %>%
ungroup() %>%
mutate(across(c(everything(), -xval_num), ~ . - pid)) %>%
summarize(across(c(everything(), -xval_num),
c(p5 = ~quantile(., .05), p50 = ~mean(.), p95 = ~quantile(., .95)))) %>%
mutate(across(everything(), ~.*100))
# dif between xval losses and full HSM losses
= df.losses %>%
df.xval_corr_difsselect(model, cors) %>%
group_by(x = ceiling(row_number() / 8)) %>%
pivot_wider(names_from = "model", values_from = "cors") %>%
rename(xval_num = x) %>%
ungroup() %>%
mutate(across(c(everything(), -xval_num), ~ pid - .)) %>%
summarize(across(c(everything(), -xval_num),
c(p5 = ~quantile(., .05), p50 = ~mean(.), p95 = ~quantile(., .95))))
## xval correlations
= df.xval_params %>%
df.xval_corrs group_by(model) %>%
filter(row_number() == 1) %>%
ungroup()
ggplot(data = df.losses %>% filter(!is.nan(B_losses)),
mapping = aes(
x = model %>% reorder(B_losses),
y = B_losses
+
)) geom_point(alpha = 0.08,
color = "coral",
position = position_jitter(width = 0.02, height = 0),
size = 2) +
stat_summary(fun = ~ mean(.),
fun.min = ~ mean(.) - sd(.),
fun.max = ~ mean(.) + sd(.),
# fun.args = list(conf.int=.99),
geom = "pointrange",
shape = 21,
size = 1,
fill = "white",
color = "red") +
labs(x = "model", y = "cross validation loss")
# isolate hsm and features predictions on exp/trial basis
= df.exp123.master %>%
df.tmp select(exp, trial, hsm = any_of(fit_bestmodel)) %>%
mutate(features = df.exp123.regression$fit123) %>%
group_by(exp, trial) %>%
mutate(n_bricks = 1) %>%
summarize(across(c(hsm, features, n_bricks), sum), .groups = "drop") %>%
filter(exp < 3)
# do regressions
= df.exp123.trials %>%
exp1.pr_regression filter(exp == 1) %>%
mutate(p_mean = p_mean / n_bricks) %>%
lm(r_mean ~ p_mean, data = .)
= df.exp123.trials %>%
exp2.pr_regression filter(exp == 2) %>%
mutate(p_mean = p_mean / n_bricks) %>%
lm(r_mean ~ p_mean, data = .)
# predict regressions
= df.tmp %>%
df.exp1.pr_regression filter(exp == 1) %>%
mutate(r_hsm =
predict(exp1.pr_regression,
%>%
df.tmp filter(exp == 1) %>%
mutate(p_mean = hsm / n_bricks))) %>%
mutate(r_features =
predict(exp1.pr_regression,
%>%
df.tmp filter(exp == 1) %>%
mutate(p_mean = features / n_bricks))) %>%
mutate(r_prediction_low =
predict(exp1.pr_regression,
%>%
df.exp123.trials filter(exp == 1) %>%
mutate(p_mean = p_low / n_bricks))) %>%
mutate(r_prediction_high =
predict(exp1.pr_regression,
%>%
df.exp123.trials filter(exp == 1) %>%
mutate(p_mean = p_high / n_bricks))) %>%
mutate(r_prediction_mean = exp1.pr_regression$fitted.values)
= df.tmp %>%
df.exp2.pr_regression filter(exp == 2) %>%
mutate(r_hsm =
predict(exp2.pr_regression,
%>%
df.tmp filter(exp == 2) %>%
mutate(p_mean = hsm / n_bricks))) %>%
mutate(r_features =
predict(exp2.pr_regression,
%>%
df.tmp filter(exp == 2) %>%
mutate(p_mean = features / n_bricks))) %>%
mutate(r_prediction_low =
predict(exp2.pr_regression,
%>%
df.exp123.trials filter(exp == 2) %>%
mutate(p_mean = p_low / n_bricks))) %>%
mutate(r_prediction_high =
predict(exp2.pr_regression,
%>%
df.exp123.trials filter(exp == 2) %>%
mutate(p_mean = p_high / n_bricks))) %>%
mutate(r_prediction_mean = exp2.pr_regression$fitted.values)
# combine regressions
= bind_rows(
df.exp12.pr_regression
df.exp1.pr_regression,
df.exp2.pr_regression,.id = "exp") %>%
mutate(exp = as.numeric(exp)) %>%
left_join(df.exp12.trials %>%
select(exp, trial, r_mean, r_low, r_high, p_mean, p_low, p_high),
by = c("trial", "exp"))
%>%
df.exp123.master select(exp, trial, response, fall, hsm = any_of(fit_bestmodel)) %>%
mutate(acc = ifelse(fall == 0, 1 - hsm, hsm)) %>%
group_by(exp) %>%
summarize(mean(acc))
%>%
df.exp123.master select(exp, trial, response, fall, hsm = any_of(fit_bestmodel)) %>%
mutate(acc = ifelse(fall == 0, 1 - hsm, hsm)) %>%
group_by(fall, exp) %>%
summarize(mean(acc))
%>%
df.exp123.regression select(exp, trial, response, features = fit123) %>%
mutate(fall = df.exp123.master$fall) %>%
mutate(acc = ifelse(fall == 0, 1 - features, features)) %>%
group_by(exp) %>%
summarize(mean(acc))
%>%
df.exp123.regression select(exp, trial, response, features = fit123) %>%
mutate(fall = df.exp123.master$fall) %>%
mutate(acc = ifelse(fall == 0, 1 - features, features)) %>%
group_by(fall, exp) %>%
summarize(mean(acc))
# exp 1
= df.exp1.selection.brick %>%
df.tmp left_join(df.exp1.info %>%
select(trial, index, fall),
by = c("trial", "index")) %>%
mutate(diff = abs(response - fall)) %>%
group_by(trial, fall) %>%
summarize(nred = n(),
acc = 1 - mean(diff),
pred_selection = mean(response),
pred_truth = mean(fall)) %>%
ungroup() %>%
left_join(df.exp1.trials %>%
select(trial, pred_prediction = p_mean, pred_responsibility = r_mean),
by = "trial")
weighted.mean(df.tmp$acc, df.tmp$nred) # participant accuracy vs ground truth
weighted.mean(df.tmp[df.tmp$fall == 1,]$acc, df.tmp[df.tmp$fall == 1,]$nred) # accuracy for blocks that fall
weighted.mean(df.tmp[df.tmp$fall == 0,]$acc, df.tmp[df.tmp$fall == 0,]$nred) # accuracy for blocks that don't
cor(df.tmp %>% select(pred_selection, pred_prediction, pred_truth, pred_responsibility))
# accuracy of selection vs prediction conditions
%>%
df.tmp filter(fall == 1) %>%
group_by(trial) %>%
summarize(nbricks = sum(nred),
pred_selection = sum(nred * pred_selection),
pred_prediction = mean(pred_prediction),
pred_truth = nred) %>%
summarize(sel_err = sum(l2(pred_selection, pred_truth)),
pred_err = sum(l2(pred_prediction, pred_truth)))
# exp 2
= df.exp2.selection.brick %>%
df.tmp left_join(df.exp2.info %>%
select(trial, index, fall),
by = c("trial", "index")) %>%
mutate(diff = abs(response - fall)) %>%
group_by(trial, fall) %>%
summarize(nred = n(),
acc = 1 - mean(diff),
pred_selection = mean(response),
pred_truth = mean(fall)) %>%
ungroup() %>%
left_join(df.exp1.trials %>%
select(trial, pred_prediction = p_mean, pred_responsibility = r_mean),
by = "trial")
weighted.mean(df.tmp$acc, df.tmp$nred) # participant accuracy vs ground truth
weighted.mean(df.tmp[df.tmp$fall == 1,]$acc, df.tmp[df.tmp$fall == 1,]$nred)
weighted.mean(df.tmp[df.tmp$fall == 0,]$acc, df.tmp[df.tmp$fall == 0,]$nred)
cor(df.tmp %>% select(pred_selection, pred_prediction, pred_truth, pred_responsibility))
# exp 3
%>%
df.exp3.trials mutate(fall = as.numeric(fall) * 100) %>%
summarize(100 - mean(abs(p_mean - fall)))
= strsplit(fit_bestmodel, '_')[[1]][[2]]
fit_bestparams
= read_json(paste0("simulations/exp1/perceptual,impulse-aligned,dynamics_",fit_bestparams,".json")) %>%
df.tmp %>%
as.tbl_json gather_object("trial") %>%
gather_array("simulation") %>%
gather_array("number") %>%
append_values_number("index") %>%
mutate(trial = str_replace_all(trial, "trial_", ""),
trial = as.numeric(trial)) %>%
select(-document.id) %>%
group_by(trial, simulation) %>%
count(name="prediction") %>%
group_by(trial, prediction) %>%
count(name = "frequency")
= df.tmp %>%
df.tmp2 group_by(trial) %>%
summarize(frequency = 200 - sum(frequency)) %>%
mutate(prediction = 0) %>%
select(trial, prediction, frequency) %>%
bind_rows(df.tmp, .) %>%
arrange(trial, prediction) %>%
ungroup()
= df.tmp2 %>%
df.tmp.sim_vars group_by(trial) %>%
summarize(sim = wtd.var(prediction, frequency))
= df.exp1.prediction %>%
df.tmp.human_vars group_by(trial) %>%
summarize(human = var(response))
= df.tmp.sim_vars %>%
df.tmp.vars1 left_join(df.tmp.human_vars, by="trial")
= df.tmp.vars1 %>%
var_regression lm(human ~ sim, data = .)
# summary(var_regression)
= read_json(paste0("simulations/exp2/perceptual,impulse-aligned,dynamics_",fit_bestparams,".json")) %>%
df.tmp %>%
as.tbl_json gather_object("trial") %>%
gather_array("simulation") %>%
gather_array("number") %>%
append_values_number("index") %>%
mutate(trial = str_replace_all(trial, "trial_", ""),
trial = as.numeric(trial)) %>%
select(-document.id) %>%
group_by(trial, simulation) %>%
count(name="prediction") %>%
group_by(trial, prediction) %>%
count(name = "frequency")
= tibble(trial = 1:42, prediction = 0, frequency = 0)
df.tmp.template = df.tmp %>%
df.tmp2 full_join(df.tmp.template, by = c("trial", "prediction", "frequency")) %>%
group_by(trial) %>%
summarize(frequency = 200 - sum(frequency)) %>%
mutate(prediction = 0) %>%
select(trial, prediction, frequency) %>%
bind_rows(df.tmp, .) %>%
arrange(trial, prediction) %>%
ungroup()
= df.tmp2 %>%
df.tmp.sim_vars group_by(trial) %>%
summarize(sim = wtd.var(prediction, frequency))
= df.exp2.prediction %>%
df.tmp.human_vars group_by(trial) %>%
summarize(human = var(response))
= df.tmp.sim_vars %>%
df.tmp.vars2 left_join(df.tmp.human_vars, by="trial")
= df.tmp.vars2 %>%
var_regression lm(human ~ sim, data = .)
# summary(var_regression)
= df.tmp.vars1 %>%
df.tmp.vars12 mutate(exp = 1) %>%
rbind(df.tmp.vars2 %>% mutate(exp = 2))
= df.tmp.vars12 %>%
var_regression lm(human ~ sim, data = .)
# summary(var_regression)
= ggplot(df.tmp.vars1, aes(sim, human)) +
plot.exp1.vars geom_point(size = 3, color='black', alpha=.8) +
geom_abline(aes(intercept = 0, slope = 1, color='#00A9FF'), linetype='dashed', size=1) +
geom_abline(aes(intercept = 0, slope = .5, color='#CD9600'), linetype='dashed', size=1) +
geom_abline(aes(intercept = 0, slope = .25, color='#F8766D'), linetype='dashed', size=1) +
geom_smooth(method='lm', color='black', alpha=0, fullrange=T, size=1.5) +
scale_x_continuous(limits = c(0, 10), expand=c(.05,0)) +
scale_y_continuous(limits = c(0, 20), expand=c(.05,0)) +
annotate(geom = "text", x=0,y=Inf,
label = paste0("k = 0.70"),
hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text", x=0,y=Inf,
label = "v = 1.47",
hjust = 0, vjust = 2.5, size = 8) +
labs(x = "variances (CSM)",
y = "variances (human)",
title = "Experiment 1",
tag = "a)") +
theme(legend.position = c(.9, .2),
legend.title = element_blank(),
legend.background = element_rect(colour = "black"),
plot.title = element_text(face="bold")) +
scale_color_identity(labels=c("k=1","k=2","k=4"), guide="legend")
= ggplot(df.tmp.vars2, aes(sim, human)) +
plot.exp2.vars geom_point(size = 3, color='black', alpha=.8) +
geom_abline(intercept = 0, slope = 1, color='#00A9FF', linetype='dashed', size=1) +
geom_abline(intercept = 0, slope = .5, color='#CD9600', linetype='dashed', size=1) +
geom_abline(intercept = 0, slope = .25, color='#F8766D', linetype='dashed', size=1) +
geom_smooth(method='lm', color='black', alpha=0, fullrange=T, size=1.5) +
scale_x_continuous(limits = c(0, 10), expand=c(.05,0)) +
scale_y_continuous(limits = c(0, 20), expand=c(.05,0)) +
annotate(geom = "text", x=0,y=Inf,
label = paste0("k = 1.39"),
hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text", x=0,y=Inf,
label = "v = 2.90",
hjust = 0, vjust = 2.5, size = 8) +
labs(x = "variances (CSM)",
y = "variances (human)",
title = "Experiment 2",
tag = "b)") +
theme(plot.title = element_text(face="bold"))
= ggplot(df.tmp.vars12, aes(sim, human)) +
plot.exp12.vars geom_point(size = 3, color='black', alpha=.8) +
geom_abline(intercept = 0, slope = 1, color='#00A9FF', linetype='dashed', size=1) +
geom_abline(intercept = 0, slope = .5, color='#CD9600', linetype='dashed', size=1) +
geom_abline(intercept = 0, slope = .25, color='#F8766D', linetype='dashed', size=1) +
geom_smooth(method='lm', color='black', alpha=0, fullrange=T, size=1.5) +
scale_x_continuous(limits = c(0, 10), expand=c(.05,0), breaks=seq(0,10,2)) +
scale_y_continuous(limits = c(0, 20), expand=c(.05,0)) +
annotate(geom = "text", x=0,y=Inf,
label = paste0("k = 1.00"),
hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text", x=0,y=Inf,
label = "v = 2.27",
hjust = 0, vjust = 2.5, size = 8) +
labs(x = "variances (CSM)",
y = "variances (human)",
title = "Experiments 1 & 2") +
theme(plot.title = element_text(face="bold"))
| plot.exp2.vars plot.exp1.vars
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
# ggsave("../../figures/plots/variances.pdf", width = 12, height = 5)
= df.exp1.pr_regression %>%
df.plot select(trial,
p_hsm = hsm, r_hsm = r_hsm,
p_features = features, r_features = r_features,
r_human = r_prediction_mean) %>%
left_join(df.exp1.trials %>%
select(trial, p_human = p_mean,
p_human_high = p_high,
p_human_low = p_low), by = "trial") %>%
left_join(df.exp1.info %>%
group_by(trial) %>%
summarize(p_truth = sum(fall)),
by = "trial") %>%
mutate(error_p_hsm = p_hsm - p_truth,
error_p_feat = p_features - p_truth,
error_p_human = p_human - p_truth)
= ggplot(df.plot, aes(x = error_p_hsm, y = error_p_human)) +
plot.exp1.errors_hsm geom_abline(intercept = 0, slope = 1, linetype = 2) +
geom_hline(yintercept = 0, linetype=3, alpha=.6) +
geom_vline(xintercept = 0, linetype=3, alpha=.6) +
geom_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$error_p_hsm, df.plot$error_p_human) %>% round(2)),
x = -3, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$error_p_hsm, df.plot$error_p_human) %>% round(2)),
x = -3, y = Inf, hjust = 0, vjust = 3, size = 8) +
labs(title = "Experiment 1", x = "prediction error (CSM)", y = "prediction error (human)") +
scale_x_continuous(breaks = seq(-3, 6, 1), labels = seq(-3, 6, 1), limits = c(-3, 7)) +
scale_y_continuous(breaks = seq(-3, 6, 1), labels = seq(-3, 6, 1), limits = c(-3, 7)) +
theme(text = element_text(size = 20),
panel.grid = element_blank(),
plot.title = element_text(face = "bold"),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)))
= ggplot(df.plot, aes(x = error_p_feat, y = error_p_human)) +
plot.exp1.errors_feat geom_abline(intercept = 0, slope = 1, linetype = 2) +
geom_hline(yintercept = 0, linetype=3, alpha=.6) +
geom_vline(xintercept = 0, linetype=3, alpha=.6) +
geom_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$error_p_feat, df.plot$error_p_human) %>% round(2)),
x = -3, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$error_p_feat, df.plot$error_p_human) %>% round(2)),
x = -3, y = Inf, hjust = 0, vjust = 3, size = 8) +
labs(x = "prediction error (features)", y = "prediction error (human)") +
scale_x_continuous(breaks = seq(-3, 6, 1), labels = seq(-3, 6, 1), limits = c(-3, 7)) +
scale_y_continuous(breaks = seq(-3, 6, 1), labels = seq(-3, 6, 1), limits = c(-3, 7)) +
theme(text = element_text(size = 20),
panel.grid = element_blank(),
plot.title = element_text(face = "bold"),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)))
= df.plot df.plot1
= df.exp2.pr_regression %>%
df.plot select(trial,
p_hsm = hsm, r_hsm = r_hsm,
p_features = features, r_features = r_features,
r_human = r_prediction_mean) %>%
left_join(df.exp2.trials %>%
select(trial, p_human = p_mean,
p_human_high = p_high,
p_human_low = p_low), by = "trial") %>%
left_join(df.exp2.info %>%
group_by(trial) %>%
summarize(p_truth = sum(fall)),
by = "trial") %>%
mutate(error_p_hsm = p_hsm - p_truth,
error_p_feat = p_features - p_truth,
error_p_human = p_human - p_truth)
= ggplot(df.plot, aes(x = error_p_feat, y = error_p_human)) +
plot.exp2.errors_feat geom_abline(intercept = 0, slope = 1, linetype = 2) +
geom_hline(yintercept = 0, linetype=3, alpha=.6) +
geom_vline(xintercept = 0, linetype=3, alpha=.6) +
geom_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$error_p_feat, df.plot$error_p_human) %>% round(2)),
x = -3, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$error_p_feat, df.plot$error_p_human) %>% round(2)),
x = -3, y = Inf, hjust = 0, vjust = 3, size = 8) +
labs(x = "prediction error (features)", y = element_blank()) +
scale_x_continuous(breaks = seq(-3, 6, 1), labels = seq(-3, 6, 1), limits = c(-3, 7)) +
scale_y_continuous(breaks = seq(-3, 6, 1), labels = seq(-3, 6, 1), limits = c(-3, 7)) +
theme(text = element_text(size = 20),
panel.grid = element_blank(),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)))
= ggplot(df.plot, aes(x = error_p_hsm, y = error_p_human)) +
plot.exp2.errors_hsm geom_abline(intercept = 0, slope = 1, linetype = 2) +
geom_hline(yintercept = 0, linetype=3, alpha=.6) +
geom_vline(xintercept = 0, linetype=3, alpha=.6) +
geom_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$error_p_hsm, df.plot$error_p_human) %>% round(2)),
x = -3, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$error_p_hsm, df.plot$error_p_human) %>% round(2)),
x = -3, y = Inf, hjust = 0, vjust = 3, size = 8) +
labs(title = "Experiment 2", x = "prediction error (CSM)", y = element_blank()) +
scale_x_continuous(breaks = seq(-3, 6, 1), labels = seq(-3, 6, 1), limits = c(-3, 7)) +
scale_y_continuous(breaks = seq(-3, 6, 1), labels = seq(-3, 6, 1), limits = c(-3, 7)) +
theme(text = element_text(size = 20),
panel.grid = element_blank(),
plot.title = element_text(face = "bold"),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)))
= df.plot df.plot2
+ plot.exp2.errors_hsm) /
(plot.exp1.errors_hsm + plot.exp2.errors_feat) (plot.exp1.errors_feat
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 1 rows containing non-finite values (stat_smooth).
Warning: Removed 1 rows containing missing values (geom_point).
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 1 rows containing non-finite values (stat_smooth).
Removed 1 rows containing missing values (geom_point).
# ggsave("../../figures/plots/error_corrs.pdf", width = 10, height = 10)
= df.exp123.selection %>%
df.tmp.feat_hsm left_join(df.exp123.features, by = c("exp", "trial", "index")) %>%
left_join(df.exp123.master %>% select(exp, trial, index, hsm = any_of(fit_bestmodel)),
by = c("exp", "trial", "index"))
= df.tmp.feat_hsm %>%
tmp1 filter(exp == 1) %>%
glm(response ~ . - exp - trial - index, data = ., family = "binomial")
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
= df.tmp.feat_hsm %>%
tmp2 filter(exp == 2) %>%
glm(response ~ . - exp - trial - index, data = ., family = "binomial")
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
= df.tmp.feat_hsm %>%
tmp12 filter(exp == 1 | exp == 2) %>%
glm(response ~ . - exp - trial - index, data = ., family = "binomial")
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
= df.tmp.feat_hsm %>%
tmp123 glm(response ~ . - exp - trial - index, data = ., family = "binomial")
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
= tmp123
regression_all_best
= df.tmp.feat_hsm %>%
df.exp123.regression_all mutate(fit1 = logit2prob(predict(tmp1, df.tmp.feat_hsm))) %>%
mutate(fit2 = logit2prob(predict(tmp2, df.tmp.feat_hsm))) %>%
mutate(fit12 = logit2prob(predict(tmp12, df.tmp.feat_hsm))) %>%
mutate(fit123 = logit2prob(predict(tmp123, df.tmp.feat_hsm)))
# correlations for all bricks, regardless of experiment
= df.exp123.regression_all %>%
df.tmp select(response, fit1, fit2, fit12, fit123) %>%
::correlate(quiet=T) %>%
corrrselect(response) %>%
filter(row_number() != 1)
# experiment-specific correlations
= df.exp123.regression_all %>%
df.r_features_all select(exp, response, fit1, fit2, fit12, fit123) %>%
nest_by(exp) %>%
summarize(corrr::correlate(data, quiet=T)) %>%
filter(row_number() == 1) %>%
pivot_longer(contains("fit"), names_to = "params", values_to = "vals") %>%
pivot_wider(names_from = exp, values_from = vals) %>%
mutate("fit" = c("f-hsm:1", "f-hsm:2", "f-hsm:1,2", "f-hsm:1,2,3")) %>%
select(fit, exp1 = "1", exp2 = "2", exp3 = "3", -term, -response) %>%
mutate("all" = df.tmp$response)
bind_rows(df.r_model, df.r_features, df.r_features_all)
# summary(regression_all_best)
# A tibble: 13 × 6
fit exp1 exp2 exp3 all params
<chr> <dbl> <dbl> <dbl> <dbl> <chr>
1 hsm:1 0.863 0.766 0.781 0.805 hsm_6,5,6
2 hsm:2 0.872 0.844 0.795 0.856 hsm_2,5,4
3 hsm:1,2 0.872 0.844 0.795 0.856 hsm_2,5,4
4 hsm:1,2,3 0.872 0.844 0.795 0.856 hsm_2,5,4
5 feat:1 0.784 0.589 0.767 0.674 <NA>
6 feat:2 0.569 0.727 0.395 0.646 <NA>
7 feat:1,2 0.753 0.701 0.631 0.729 <NA>
8 feat:1,2,3 0.757 0.699 0.652 0.730 <NA>
9 feat:1,2,3,sq 0.833 0.864 0.837 0.853 <NA>
10 f-hsm:1 0.882 0.815 0.835 0.843 <NA>
11 f-hsm:2 0.828 0.865 0.740 0.839 <NA>
12 f-hsm:1,2 0.869 0.860 0.782 0.864 <NA>
13 f-hsm:1,2,3 0.871 0.859 0.797 0.864 <NA>
= df.exp123.bestmodels %>%
df.tmp separate(modelname, c("hsm", "params"), sep="_") %>%
separate(params, c("perceptual", "intervention", "dynamics")) %>%
select(-hsm) %>%
mutate(across(c("perceptual", "intervention", "dynamics"), as.numeric))
= df.tmp %>%
df.tile_pi group_by(perceptual, intervention) %>%
summarize(loss = mean(value))
= ggplot(data = df.tile_pi,
plot.tile_pi mapping = aes(x = perceptual, y = intervention)) +
geom_tile(aes(fill = loss),
color = "black") +
scale_fill_gradient(low = "white", high = "black", limits=c(.4,.8), breaks=c(.40,.80), labels=c("0.40", "0.80")) +
coord_cartesian(expand = F) +
scale_x_continuous(breaks = seq(0, 10, 1)) +
scale_y_continuous(breaks = seq(0, 10, 1)) +
labs(x = "perceptual noise",
y = "intervention noise")
= df.tmp %>%
df.tile_pd group_by(perceptual, dynamics) %>%
summarize(loss = mean(value))
= ggplot(data = df.tile_pd,
plot.tile_pd mapping = aes(x = perceptual, y = dynamics)) +
geom_tile(aes(fill = loss),
color = "black") +
scale_fill_gradient(low = "white", high = "black", limits=c(.429,.51), breaks=c(.43,.51)) +
coord_cartesian(expand = F) +
scale_x_continuous(breaks = seq(0, 10, 1)) +
scale_y_continuous(breaks = seq(0, 10, 1)) +
labs(x = "perceptual noise",
y = "dynamics noise")
= df.tmp %>%
df.tile_id group_by(intervention, dynamics) %>%
summarize(loss = mean(value))
= ggplot(data = df.tile_id,
plot.tile_id mapping = aes(x = intervention, y = dynamics)) +
geom_tile(aes(fill = loss),
color = "black") +
scale_fill_gradient(low = "white", high = "black", limits=c(.419,.54), breaks=c(.42,.54)) +
coord_cartesian(expand = F) +
scale_x_continuous(breaks = seq(0, 10, 1)) +
scale_y_continuous(breaks = seq(0, 10, 1)) +
labs(x = "intervention noise",
y = "dynamics noise")
| plot.tile_pd | plot.tile_id +
plot.tile_pi plot_annotation(tag_levels = 'a', tag_suffix = ')') &
theme(plot.tag = element_text(face='bold'))
# ggsave("../../figures/plots/sensitivity_analysis.pdf", width = 14, height = 3.5)
# CSM
= nrow(df.exp123.master)
n = 1
k = (df.exp123.bestmodels %>% filter(modelname == fit_bestmodel) %>% pull(value)) * n
sum_nll 2 * k + 2 * sum_nll # AIC
log(n) * k + 2 * sum_nll # BIC
# features model
= df.exp123.regression %>%
sum_nll select(response, fit123) %>%
summarize(val = nll(response, fit123)) %>%
pull(val) * n
= 13
k 2 * k + 2 * sum_nll
log(n) * k + 2 * sum_nll
= df.exp123.features %>%
df.tmp.features select(exp, trial, contains("s."), contains("b.")) %>%
group_by(trial, exp) %>%
filter(row_number() == 1) %>%
ungroup() %>%
left_join(df.exp123.trials %>%
select(exp,trial,r_mean, r_low, r_high),
by = c("exp", "trial")) %>%
filter(exp != 3)
= df.tmp.features %>%
tmp12 lm(r_mean ~ . - exp - trial - r_low - r_high, data = .)
= df.tmp.features %>%
df.exp12.regression_resp mutate(fit12 = predict(tmp12, df.tmp.features))
cor(df.exp12.regression_resp$fit12, df.exp12.regression_resp$r_mean)
# cor(df.exp12.pr_regression$r_hsm, df.exp12.pr_regression$r_mean)
rmse(df.exp12.regression_resp$fit12, df.exp12.regression_resp$r_mean)
# rmse(df.exp12.pr_regression$r_hsm, df.exp12.pr_regression$r_mean)
# hsm
= df.exp123.master %>%
df.tmp.hsm filter(exp != 3) %>%
group_by(exp, trial) %>%
summarize(across(c(response, contains("hsm")), mean), .groups="drop") %>%
rename_with(.fn = ~ str_remove(., "hsm_")) %>%
left_join(df.exp12.trials %>% select(exp, trial, r_mean, r_low, r_high)) %>%
select(exp, trial, contains("r_"), everything(), -response)
= df.tmp.hsm %>%
sel_resp_fit select(exp, trial, contains("r_"), selection_hsm = any_of("2,5,5")) %>%
lm(r_mean ~ selection_hsm, data = .)
= df.tmp.hsm %>%
df.tmp select(exp, trial, contains("r_"), selection_hsm = any_of("2,5,5")) %>%
mutate(hsm_mean = predict(sel_resp_fit, .))
cor(df.tmp$r_mean, df.tmp$hsm_mean)
rmse(df.tmp$hsm_mean, df.tmp$r_mean)
= ggplot(df.tmp, aes(hsm_mean, r_mean)) +
plot.sel_resp_hsm geom_abline(intercept = 0, slope = 1, linetype = 2) +
geom_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = r_low, ymax = r_high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.tmp$hsm_mean, df.tmp$r_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.tmp$hsm_mean, df.tmp$r_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 3, size = 8) +
annotate(geom = "text",
label = "", x = 0, y = 0, hjust = 0) +
labs(title = "",
x = "CSM",
y = "responsibility condition") +
scale_x_continuous(breaks = seq(0, 100, 25), labels = seq(0, 100, 25), limits = c(0, 100)) +
scale_y_continuous(breaks = seq(0, 100, 25), labels = seq(0, 100, 25), limits = c(0, 100)) +
theme(text = element_text(size = 20),
plot.title = element_text(face = "bold"),
panel.grid = element_blank())
= ggplot(df.exp12.regression_resp, aes(fit12, r_mean)) +
plot.sel_resp_feat geom_abline(intercept = 0, slope = 1, linetype = 2) +
geom_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = r_low, ymax = r_high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.exp12.regression_resp$fit12, df.exp12.regression_resp$r_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.exp12.regression_resp$fit12, df.exp12.regression_resp$r_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 3, size = 8) +
annotate(geom = "text",
label = "", x = 0, y = 0, hjust = 0) +
labs(title = "",
x = "features model",
y = "responsibility condition") +
scale_x_continuous(breaks = seq(0, 100, 25), labels = seq(0, 100, 25), limits = c(0, 100)) +
scale_y_continuous(breaks = seq(0, 100, 25), labels = seq(0, 100, 25), limits = c(0, 100)) +
theme(text = element_text(size = 20),
plot.title = element_text(face = "bold"),
panel.grid = element_blank())
+ plot.sel_resp_feat +
plot.sel_resp_hsm plot_annotation(tag_levels = "a", tag_suffix = ")") &
theme(plot.tag = element_text(face = 'bold'))
# ggsave("../../figures/plots/selection_responsibility.pdf",width = 10,height = 5)
load("cache/xval_losses_resp.RData")
=
df.cross_val_resp setNames(data.frame(matrix(ncol = 5, nrow = 0), stringsAsFactors = FALSE),
c("model", "A_losses_mean", "B_losses_mean", "A_losses_sd", "B_losses_sd"))
=
df.losses_resp data.frame("model" = character(),
"A_params" = character(),
"A_fit" = list(),
"A_losses" = double(),
"B_losses" = double(),
"cors" = double(),
stringsAsFactors = F)
= colnames(df.tmp.hsm %>%
l.full_sci select(-exp, -trial, -contains("r_")))
= 200
n = nrow(df.tmp.hsm)
n_total
for (i in 1:n) {
# don't have to worry about specialized exp1/exp2 selection
= sample(1:n_total)
order = order[1:ceiling(n_total / 2)]
ixs_A = order[ceiling(n_total / 2):n_total]
ixs_B
= df.tmp.hsm %>% slice(ixs_A)
df.arrangement_A = df.tmp.hsm %>% slice(ixs_B)
df.arrangement_B
= setNames(data.frame(matrix(ncol = 6, nrow = 0)),
df.losses_n c("model", "A_params", "A_fit", "A_losses", "B_losses", "cors"))
# find the best performing model in A
= df.arrangement_A %>%
A_best select(-exp, -trial, -r_low, -r_high) %>%
pivot_longer(contains(","), names_to = "model", values_to = "selection") %>%
nest(data = -model) %>%
mutate(fit = map(data, ~lm(r_mean ~ selection, data = .x))) %>%
mutate(pred = map2(fit, data, ~mutate(.y, pred = predict(object = .x, newdata = .y)))) %>%
select(-data) %>%
unnest(pred) %>%
group_by(model, fit) %>%
summarize(loss = rmse(r_mean, pred), .groups="drop") %>%
arrange(loss) %>%
filter(row_number() == 1)
= A_best["fit"][[1]]
A_best_fit = A_best["model"][[1]]
A_best_param = A_best["loss"][[1]]
A_best_loss
# calculate the loss of that model in B
= df.arrangement_B %>%
B_best select(r_mean, selection = any_of(A_best_param)) %>%
mutate(pred = predict(A_best_fit[[1]], .))
= rmse(B_best$pred, B_best$r_mean)
B_loss = cor(B_best$pred, B_best$r_mean)
r_xval nrow(df.losses_n)+1,] = list("hsm", A_best_param, list(A_best_fit[[1]]$coefficients), A_best_loss, B_loss, r_xval)
df.losses_n[
# cross validation on features
= df.tmp.features %>% slice(ixs_A)
df.arrangement_A = df.tmp.features %>% slice(ixs_B)
df.arrangement_B
# get the parameters of the features regression to A
= df.arrangement_A %>%
A_fit lm(r_mean ~ . - exp - trial - r_low - r_high, data = .)
= df.arrangement_A %>%
A_loss mutate(regression = A_fit$fitted.values) %>%
summarize(loss = rmse(r_mean, regression)) %>%
pull(loss)
# calculate the loss and corr on B based on the regression to A
= df.arrangement_B %>%
df.B_info mutate(regression = predict(A_fit, .)) %>%
summarize(loss = rmse(r_mean, regression), cor = cor(r_mean, regression))
= df.B_info %>%
B_loss pull(loss)
= df.B_info %>%
r_xval pull(cor)
nrow(df.losses_n) + 1, ] = list("f", NA, NA, A_loss, B_loss, r_xval)
df.losses_n[= df.losses_resp %>% bind_rows(df.losses_n)
df.losses_resp
}
= df.losses_resp %>%
df.losses_resp mutate(diffs = B_losses - A_losses)
# save(df.losses_resp, file = "cache/xval_losses_resp.RData")
= df.losses_resp %>%
df.xval_params group_by(model, A_params) %>%
summarize(n = n(),
p5 = quantile(cors, .05),
p50 = mean(cors),
p95 = quantile(cors, .95)) %>%
arrange(model, -n)
= df.xval_params %>%
xval_bestmodel filter(model == "hsm") %>%
filter(row_number() == 1) %>%
pull(A_params) %>%
paste0("hsm_", .)
# mean, var of xval losses
= df.losses_resp %>%
df.xval_rmses group_by(model) %>%
summarize(across(c("A_losses", "B_losses", "diffs"),
c(p5 = ~quantile(., .05),
p50 = ~mean(.),
p95 = ~quantile(., .95))))
# dif between xval losses and full HSM losses
= df.losses_resp %>%
df.xval_rmse_difs select(model, B_losses) %>%
group_by(x = ceiling(row_number() / 2)) %>%
pivot_wider(names_from = "model", values_from = "B_losses") %>%
rename(xval_num = x) %>%
ungroup() %>%
mutate(across(c(everything(), -xval_num), ~ . - hsm)) %>%
summarize(across(c(everything(), -xval_num),
c(p5 = ~quantile(., .05), p50 = ~mean(.), p95 = ~quantile(., .95))))
# dif between xval losses and full HSM losses
= df.losses_resp %>%
df.xval_corr_difsselect(model, cors) %>%
group_by(x = ceiling(row_number() / 2)) %>%
pivot_wider(names_from = "model", values_from = "cors") %>%
rename(xval_num = x) %>%
ungroup() %>%
mutate(across(c(everything(), -xval_num), ~ hsm - .)) %>%
summarize(across(c(everything(), -xval_num),
c(p5 = ~quantile(., .05), p50 = ~mean(.), p95 = ~quantile(., .95))))
## xval correlations
= df.xval_params %>%
df.xval_corrs group_by(model) %>%
filter(row_number() == 1) %>%
ungroup()
ggplot(data = df.losses_resp %>% filter(!is.nan(B_losses)),
mapping = aes(
x = model %>% reorder(B_losses),
y = B_losses
+
)) geom_point(alpha = 0.08,
color = "coral",
position = position_jitter(width = 0.02, height = 0),
size = 2) +
stat_summary(fun = ~ mean(.),
fun.min = ~ mean(.) - sd(.),
fun.max = ~ mean(.) + sd(.),
# fun.args = list(conf.int=.99),
geom = "pointrange",
shape = 21,
size = 1,
fill = "white",
color = "red") +
labs(x = "model", y = "cross validation loss")
ggplot(data = df.losses_resp %>% filter(!is.nan(cors)),
mapping = aes(
x = model %>% reorder(cors),
y = cors
+
)) geom_point(alpha = 0.08,
color = "coral",
position = position_jitter(width = 0.02, height = 0),
size = 2) +
stat_summary(fun = ~ mean(.),
fun.min = ~ mean(.) - sd(.),
fun.max = ~ mean(.) + sd(.),
geom = "pointrange",
shape = 21,
size = 1,
fill = "white",
color = "red") +
labs(x = "model", y = "correlation coefficient")
= df.exp123.features %>%
df.tmp.features filter(exp == 3) %>%
select(-o.black_pile, -exp) %>%
left_join(df.exp3.trials %>%
select(trial,r_mean, r_low, r_high),
by = "trial")
= df.tmp.features %>%
tmp3 lm(r_mean ~ . - trial - r_low - r_high, data = .)
= df.tmp.features %>%
df.exp3.regression_resp mutate(fit3 = predict(tmp3, df.tmp.features))
cor(df.exp3.regression_resp$fit3, df.exp3.regression_resp$r_mean)
rmse(df.exp3.regression_resp$fit3, df.exp3.regression_resp$r_mean)
# hsm
= df.exp123.master %>%
df.tmp.hsm filter(exp == 3) %>%
select(-exp, -w) %>%
rename_with(.fn = ~ str_remove(., "hsm_")) %>%
left_join(df.exp3.trials %>% select(trial, r_mean, r_low, r_high)) %>%
select(trial, contains("r_"), everything(), -response, -fall, -index)
= df.tmp.hsm %>%
df.tmp.hsm_losses mutate(across(contains(","), ~(.*100 - r_mean)^2))
= df.tmp.hsm %>%
df.tmp select(trial, contains("r_"), hsm = any_of("2,5,4")) %>%
mutate(hsm = hsm * 100)
cor(df.tmp$r_mean, df.tmp$hsm)
rmse(df.tmp$hsm, df.tmp$r_mean)
load("cache/xval_losses_resp3.RData")
=
df.cross_val_resp setNames(data.frame(matrix(ncol = 5, nrow = 0), stringsAsFactors = FALSE),
c("model", "A_losses_mean", "B_losses_mean", "A_losses_sd", "B_losses_sd"))
=
df.losses_resp data.frame("model" = character(),
"A_params" = character(),
"A_losses" = double(),
"B_losses" = double(),
"cors" = double(),
stringsAsFactors = F)
= colnames(df.tmp.hsm %>%
l.full_sci select(-trial, -contains("r_")))
= 200
n = nrow(df.tmp.hsm_losses)
n_total
for (i in 1:n) {
# don't have to worry about specialized exp1/exp2 selection
= sample(1:n_total)
order = order[1:ceiling(n_total / 2)]
ixs_A = order[ceiling(n_total / 2):n_total]
ixs_B
= df.tmp.hsm_losses %>% slice(ixs_A)
df.arrangement_A = df.tmp.hsm_losses %>% slice(ixs_B)
df.arrangement_B
= setNames(data.frame(matrix(ncol = 5, nrow = 0)),
df.losses_n c("model", "A_params", "A_losses", "B_losses", "cors"))
# find the best performing model in A
= df.arrangement_A %>%
A_best summarize(across(.cols = contains(","),
.fns = ~ sqrt(mean(.)))) %>%
pivot_longer(everything(),
names_to = "parameters",
values_to = "loss") %>%
arrange(loss) %>%
filter(row_number() == 1)
= A_best["parameters"][[1]]
A_best_param = A_best["loss"][[1]]
A_best_loss
# calculate the loss of that model in B
= df.arrangement_B %>%
B_best select(r_mean, pred = any_of(A_best_param))
= sqrt(mean(B_best$pred))
B_loss = df.tmp.hsm %>%
r_xval slice(ixs_B) %>%
summarize(cor = cor(r_mean, !!as.symbol(A_best_param))) %>%
pull(cor)
nrow(df.losses_n)+1,] = list("hsm", A_best_param, A_best_loss, B_loss, r_xval)
df.losses_n[
# cross validation on features
= df.tmp.features %>% slice(ixs_A)
df.arrangement_A = df.tmp.features %>% slice(ixs_B)
df.arrangement_B
# get the parameters of the features regression to A
= df.arrangement_A %>%
A_fit lm(r_mean ~ . - trial - index - r_low - r_high, data = .)
= df.arrangement_A %>%
A_loss mutate(regression = A_fit$fitted.values) %>%
summarize(loss = rmse(r_mean, regression)) %>%
pull(loss)
# calculate the loss and corr on B based on the regression to A
= df.arrangement_B %>%
df.B_info mutate(regression = predict(A_fit, .)) %>%
summarize(loss = rmse(r_mean, regression), cor = cor(r_mean, regression))
= df.B_info %>%
B_loss pull(loss)
= df.B_info %>%
r_xval pull(cor)
nrow(df.losses_n) + 1, ] = list("f", NA, A_loss, B_loss, r_xval)
df.losses_n[= df.losses_resp %>% bind_rows(df.losses_n)
df.losses_resp
}
= df.losses_resp %>%
df.losses_resp mutate(diffs = B_losses - A_losses)
# save(df.losses_resp, file = "cache/xval_losses_resp3.RData")
= df.losses_resp %>%
df.xval_params group_by(model, A_params) %>%
summarize(n = n(),
p5 = quantile(cors, .05),
p50 = mean(cors),
p95 = quantile(cors, .95)) %>%
arrange(model, -n)
= df.xval_params %>%
xval_bestmodel filter(model == "hsm") %>%
filter(row_number() == 1) %>%
pull(A_params) %>%
paste0("hsm_", .)
# mean, var of xval losses
= df.losses_resp %>%
df.xval_rmses group_by(model) %>%
summarize(across(c("A_losses", "B_losses", "diffs"),
c(p5 = ~quantile(., .05),
p50 = ~mean(.),
p95 = ~quantile(., .95)))) %>%
arrange(B_losses_p50)
# dif between xval losses and full HSM losses
= df.losses_resp %>%
df.xval_rmse_difs select(model, B_losses) %>%
group_by(x = ceiling(row_number() / 2)) %>%
pivot_wider(names_from = "model", values_from = "B_losses") %>%
rename(xval_num = x) %>%
ungroup() %>%
mutate(across(c(everything(), -xval_num), ~ . - hsm)) %>%
summarize(across(c(everything(), -xval_num),
c(p5 = ~quantile(., .05), p50 = ~mean(.), p95 = ~quantile(., .95))))
# dif between xval losses and full HSM losses
= df.losses_resp %>%
df.xval_corr_difsselect(model, cors) %>%
group_by(x = ceiling(row_number() / 2)) %>%
pivot_wider(names_from = "model", values_from = "cors") %>%
rename(xval_num = x) %>%
ungroup() %>%
mutate(across(c(everything(), -xval_num), ~ hsm - .)) %>%
summarize(across(c(everything(), -xval_num),
c(p5 = ~quantile(., .05), p50 = ~mean(.), p95 = ~quantile(., .95))))
## xval correlations
= df.xval_params %>%
df.xval_corrs group_by(model) %>%
filter(row_number() == 1) %>%
ungroup()
ggplot(data = df.losses_resp %>% filter(!is.nan(B_losses)),
mapping = aes(
x = model %>% reorder(B_losses),
y = B_losses
+
)) geom_point(alpha = 0.08,
color = "coral",
position = position_jitter(width = 0.02, height = 0),
size = 2) +
stat_summary(fun = ~ mean(.),
fun.min = ~ mean(.) - sd(.),
fun.max = ~ mean(.) + sd(.),
# fun.args = list(conf.int=.99),
geom = "pointrange",
shape = 21,
size = 1,
fill = "white",
color = "red") +
labs(x = "model", y = "cross validation loss")
ggplot(data = df.losses_resp %>% filter(!is.nan(cors)),
mapping = aes(
x = model %>% reorder(cors),
y = cors
+
)) geom_point(alpha = 0.08,
color = "coral",
position = position_jitter(width = 0.02, height = 0),
size = 2) +
stat_summary(fun = ~ mean(.),
fun.min = ~ mean(.) - sd(.),
fun.max = ~ mean(.) + sd(.),
geom = "pointrange",
shape = 21,
size = 1,
fill = "white",
color = "red") +
labs(x = "model", y = "correlation coefficient")
# Read in brick information
= "../../data/exp1/"
json.location = "../../figures/stimuli/exp1/"
stimuli.location
= list.files(json.location) %>% mixedsort
filenames = list.files(stimuli.location) %>% mixedsort
imagenames
for (i in 1:length(imagenames)) {
= rjson::fromJSON(file = paste0(json.location, filenames[i])) %>%
df.plot unlist() %>%
matrix(ncol = 4, byrow = T) %>%
as.data.frame(stringsAsFactors = F) %>%
setNames(c("index", "x", "y", "angle")) %>%
mutate(index = str_replace(index, "brick_", "")) %>%
mutate(across(.cols = everything(), ~ as.numeric(.))) %>%
mutate(across(c(x, y), ~ . * 100)) %>%
mutate(y = 600 - y) %>%
filter(x != -500) %>%
mutate(e.x_1 = x - 40,
e.y_1 = y + 20,
e.x_2 = x + 40,
e.y_2 = y + 20,
e.x_3 = x + 40,
e.y_3 = y - 20,
e.x_4 = x - 40,
e.y_4 = y - 20) %>%
left_join(df.exp1.info %>% filter(trial == i) %>% select(index, fall)) %>%
wideToLong(within = 'edge') %>%
mutate(e.xt = x - e.x,
e.yt = y - e.y,
angle = -angle,
e.xr = e.xt * cos(angle) - e.yt * sin(angle),
e.yr = e.yt * cos(angle) + e.xt * sin(angle),
e.xrm = e.xr + x,
e.yrm = e.yr + y
%>%
) arrange(index, edge) %>%
filter(index != 0)
= df.plot %>%
df.plot arrange(fall)
= png::readPNG(paste0(stimuli.location, imagenames[i]))
image = ggplot(df.plot) +
p annotation_custom(rasterGrob(image,
width = unit(1, "npc"),
height = unit(1, "npc")),
xmin = 0, xmax = 800, ymin = 0, ymax = 600) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 800)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 600)) +
scale_color_manual(values = c("black", "red")) +
theme_void() +
theme(axis.ticks.length = unit(0.001, "mm"),
legend.position = 'none')
for (j in unique(df.plot$index)) {
= df.plot %>% filter(index == j) %>% filter(row_number() == 1) %>% pull(fall)
fall if (i != 5 && fall == 1) {
= p +
p geom_polygon(data = df.plot %>% filter(index == j),
aes(x = e.xrm, y = e.yrm, color = "red"),
alpha = 0, linetype = 1, size = 1.5)
else {
} = p +
p geom_polygon(data = df.plot %>% filter(index == j),
aes(x = e.xrm, y = e.yrm, color = "black"),
alpha = 0, linetype = 1, size = 1.5)
}
}
print(p)
= paste0("../../figures/plots/exp1/stimuli_truth/", i, ".png")
img.name ggsave(img.name, width = 8, height = 6, dpi = 100)
= image_read(img.name)
tmp = image_crop(tmp, "400x550+200+50")
tmp image_write(tmp, path = img.name, format = "png")
}
= c(1, 8, 14, 19, 36, 41, 5)
ids
for (i in 1:length(ids)) {
= ids[i]
id = image_read(paste0("../../figures/plots/exp1/stimuli_truth/", id, ".png"))
img = ggplot() + background_image(img) + theme_void() + coord_fixed(ratio = 4/3) +
img theme(panel.border = element_rect(colour = "black", fill=NA, size=2),
plot.margin = unit(c(0,0,0,3), "pt"),
plot.tag.position = c(0.11, 0.92))
if (i == 1) {
= img
row else {
} = row | img
row
}
}
+
row plot_annotation(tag_levels = 'a', tag_suffix = ")")
# ggsave("../../figures/plots/exp1/exp1_stimuli.pdf", width = 9.5,height = 2)
# brick size: 80 x 40
# Read in brick information
= "../../data/exp1/"
json.location = "../../figures/stimuli/exp1/"
stimuli.location
= list.files(json.location) %>% mixedsort
filenames = list.files(stimuli.location) %>% mixedsort
imagenames
# use the appropriate left-join line
= "hsm"
modelname # modelname = "feat"
= "people"
modelname = df.exp1.selection.brick %>%
df.exp1.percentages # left_join(df.exp1.model %>% select(trial, index, model = one_of(fit_bestmodel))) %>%
# mutate(model = df.exp123.regression %>% filter(exp == 1) %>% pull(fit123)) %>%
rename(model = response) %>%
mutate(model = model * 100)
= df.exp1.trials %>%
df.tmp select(trial, responsibility = r_mean, prediction = p_mean)
for (i in 1:length(imagenames)) {
# for (i in 1:5) {
if (i == 5) next
= rjson::fromJSON(file = paste0(json.location, filenames[i])) %>%
df.plot unlist() %>%
matrix(ncol = 4, byrow = T) %>%
as.data.frame(stringsAsFactors = F) %>%
setNames(c("index", "x", "y", "angle")) %>%
mutate(index = str_replace(index, "brick_", "")) %>%
mutate(across(.cols = everything(),
.fns = ~ as.numeric(.))) %>%
mutate(across(.cols = c(x, y),
.fns = ~ . * 100)) %>%
mutate(y = 600 - y) %>%
filter(x != -500) %>%
mutate(e.x_1 = x - 40,
e.y_1 = y + 20,
e.x_2 = x + 40,
e.y_2 = y + 20,
e.x_3 = x + 40,
e.y_3 = y - 20,
e.x_4 = x - 40,
e.y_4 = y - 20) %>%
left_join(df.exp1.percentages %>% filter(trial == i) %>% select(index, model)) %>%
left_join(df.exp1.info %>% filter(trial == i) %>% select(index, fall)) %>%
mutate(selection = round(model, 0)) %>%
select(-model) %>%
wideToLong(within = 'edge') %>%
mutate(e.xt = x - e.x,
e.yt = y - e.y,
angle = -angle,
e.xr = e.xt * cos(angle) - e.yt * sin(angle),
e.yr = e.yt * cos(angle) + e.xt * sin(angle),
e.xrm = e.xr + x,
e.yrm = e.yr + y
%>%
) arrange(index, edge) %>%
filter(index != 0)
= df.plot %>%
df.text select(index, x, y, selection) %>%
distinct()
= png::readPNG(paste0(stimuli.location, imagenames[i]))
image = ggplot(df.plot) +
p annotation_custom(rasterGrob(image,
width = unit(1, "npc"),
height = unit(1, "npc")),
xmin = 0, xmax = 800, ymin = 0, ymax = 600) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 800)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 600)) +
# scale_fill_gradient(low = 'white', high = 'red', limits = c(0, 100)) +
scale_alpha_continuous(range = c(min(df.plot$selection/100),
max(df.plot$selection/100))) +
scale_color_manual(values = c("black", "red")) +
theme_void() +
theme(axis.ticks.length = unit(0.001, "mm"),
legend.position = 'none')
if (modelname == "people") {
= p +
p geom_text(data = df.tmp %>%
filter(trial == i) %>%
mutate(pred_text = paste0("prediction: ",
as.character(round(prediction, 1)),
" blocks")),
mapping = aes(x = 220,
y = 520,
hjust = 0,
label = pred_text),
size = 8) +
geom_text(data = df.tmp %>%
filter(trial == i) %>%
mutate(resp_text = paste0("responsibility: ",
as.character(round(responsibility, 0)))),
mapping = aes(x = 220,
y = 480,
hjust = 0,
label = resp_text),
size = 8)
}
# make sure red-bordered blocks are drawn after black-bordered blocks
= df.plot %>%
df.plot arrange(fall)
for (j in unique(df.plot$index)) {
= p +
p geom_polygon(data = df.plot %>% filter(index == j),
aes(x = e.xrm, y = e.yrm, alpha=selection/100, color = as.factor(fall)),
fill = "white",
linetype = 1, size = 1.5) +
geom_text(data = df.text %>%
filter(index == j), aes(x = x, y = y, label = selection), size = 7)
}print(p)
= paste0("../../figures/plots/exp1/stimuli_percentages/",i, "_",modelname,".png")
img.name ggsave(img.name, width = 8, height = 6, dpi = 100)
= image_read(img.name)
tmp = image_crop(tmp, "400x550+200+50")
tmp image_write(tmp, path = img.name, format = "png")
}
= c(18, 24, 21, 25, 27)
ids = c("a)", "b)", "c)", "d)", "e)")
labels
= ggplot() + theme_void() + ylab("CSM") +
label_hsm theme(axis.title.y.left = element_text(face = "bold", size = 18, angle = 90))
= ggplot() + theme_void() + ylab("features model") +
label_feat theme(axis.title.y.left = element_text(face = "bold", size = 18, angle = 90))
= ggplot() + theme_void() + ylab("people") +
label_ppl theme(axis.title.y.left = element_text(face = "bold", size = 18, angle = 90))
= (label_hsm / label_feat / label_ppl)
combo_img
for (i in 1:length(ids)) {
= ids[[i]]
id = image_read(paste0('../../figures/plots/exp1/stimuli_percentages/',
img_hsm '_hsm.png'))
id,= image_read(paste0('../../figures/plots/exp1/stimuli_percentages/',
img_feat '_feat.png'))
id,= image_read(paste0('../../figures/plots/exp1/stimuli_percentages/',
img_ppl '_people.png'))
id,= ggplot() + background_image(img_hsm) + theme_void() +
img_hsm coord_fixed(ratio = 4/3) + ggtitle(labels[[i]]) +
theme(plot.title = element_text(face = "bold", size = 20, vjust = -5, hjust = 0.06),
axis.title.y.left = element_text(face = "bold", size = 10, angle = 90))
= ggplot() + background_image(img_feat) + coord_fixed(ratio = 4/3) +
img_feat theme_void() + theme(plot.margin = unit(c(10,10,10,0), "pt"))
= ggplot() + background_image(img_ppl) + coord_fixed(ratio = 4/3) +
img_ppl theme_void()
= (img_hsm / img_feat / img_ppl)
img
= combo_img | img
combo_img
}
+ plot_layout(widths = c(0.05, 1, 1, 1, 1, 1))
combo_img
# ggsave("../../figures/plots/exp1/exp1_samples.pdf", width = 10,height = 8)
# SELECTION
= df.exp1.master %>%
df.plot select(trial, index, response, hsm = any_of(fit_bestmodel)) %>%
left_join(df.exp123.regression %>%
filter(exp == 1) %>%
select(trial, index, features = fit123),
by = c("trial", "index"))
# hsm model
= ggplot(data = df.plot, mapping = aes(x = hsm, y = response)) +
plot.hsm_selection geom_abline(intercept = 0, slope = 1, linetype='dashed') +
geom_smooth(method='lm', color='lightskyblue', fill='lightblue1') +
geom_point(size=2.5, alpha = 0.4) +
annotate(geom = "text", x=0,y=Inf,
label = paste0("r = ", cor(df.plot$response,df.plot$hsm) %>% round(2)),
hjust = 0, vjust = 1, size = 8)+
annotate(geom = "text", x=0,y=Inf,
label = paste0("RMSE = ", rmse(df.plot$response,df.plot$hsm) %>% round(2)),
hjust = 0, vjust = 2.5, size = 8) +
scale_x_continuous(breaks = seq(0, 1, .25),
labels = str_c(seq(0, 100, 25), "%"), limits = c(0, 1.05)) +
scale_y_continuous(breaks = seq(0, 1, .25),
labels = str_c(seq(0, 100, 25), "%"), limits = c(0, 1.2)) +
labs(x = "CSM",
y = "selection probability",
tag = "a)") +
theme(axis.title.x = element_text(margin = margin(t = 10)),
plot.tag = element_text(face = "bold"))
# features model
= ggplot(data = df.plot,
plot.features_selection mapping = aes(x = features, y = response)) +
geom_smooth(method='lm', color='lightskyblue', fill="lightblue1") +
geom_point(size=2.5, alpha = 0.5) +
geom_abline(intercept = 0, slope = 1, linetype='dashed') +
annotate(geom = "text", x=0,y=Inf,
label = paste0("r = ", cor(df.plot$response,df.plot$features) %>% round(2)),
hjust = 0, vjust = 1, size = 8)+
annotate(geom = "text", x=0,y=Inf,
label = paste0("RMSE = ", rmse(df.plot$response,df.plot$features) %>% round(2)),
hjust = 0, vjust =2.5, size = 8) +
scale_x_continuous(breaks = seq(0, 1, .25),
labels = str_c(seq(0, 100, 25), "%"), limits = c(0, 1.05)) +
scale_y_continuous(breaks = seq(0, 1, .25),
labels = str_c(seq(0, 100, 25), "%"), limits = c(0, 1.2)) +
labs(x = "features model",
y = "selection probability",
tag = "d)") +
theme(axis.title.x = element_text(margin = margin(t = 10)),
plot.tag = element_text(face = "bold"))
# PREDICTION
= df.exp12.pr_regression %>%
df.plot filter(exp == 1) %>%
select(trial, hsm, features, mean = p_mean, low = p_low, high = p_high)
= c(18, 24, 21, 25, 27)
ids = df.plot %>%
df.tmp filter(trial %in% ids) %>%
arrange(ids) %>%
mutate(lets = letters[1:5]) %>%
mutate(hsm_ys = c(1,1.5,2,-1.5,-1.5),
hsm_xs = c(-1.5,-.5,1.2,-.2,.5),
feat_ys = c(1,.2,-1.5,-1.5,1.2),
feat_xs = c(-1.5,-.8,.5,-.2,.2))
# hsm model
= ggplot(df.plot, aes(x = hsm, y = mean)) +
plot.hsm_prediction geom_abline(intercept = 0, slope = 1, linetype = 2) +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = low, ymax = high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
geom_point(data=df.tmp, aes(hsm, mean), color="red", size=3) +
geom_text_repel(data = df.tmp, aes(label = lets), color="red", size=6,
nudge_x = df.tmp$hsm_xs, nudge_y = df.tmp$hsm_ys) +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$hsm, df.plot$mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$hsm, df.plot$mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 2.5, size = 8) +
labs(x = "CSM", y = "prediction judgment", tag = "b)") +
scale_x_continuous(breaks = seq(0, 8, 2), labels = seq(0, 8, 2), limits = c(0, 8)) +
scale_y_continuous(breaks = seq(0, 8, 2), labels = seq(0, 8, 2), limits = c(0, 9)) +
theme(panel.grid = element_blank(), plot.tag = element_text(face = "bold"))
# features model
= ggplot(df.plot, aes(x = features, y = mean)) +
plot.features_prediction geom_abline(intercept = 0, slope = 1, linetype = 2) +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = low, ymax = high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
geom_point(data=df.tmp, aes(features, mean), color="red", size=3) +
geom_text_repel(data = df.tmp, aes(label = lets), color="red", size=6,
nudge_x = df.tmp$feat_xs, nudge_y = df.tmp$feat_ys) +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$features, df.plot$mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$features, df.plot$mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 3, size = 8) +
labs(x = "features model", y = "prediction judgment", tag = "e)") +
scale_x_continuous(breaks = seq(0, 8, 2), labels = seq(0, 8, 2), limits = c(0, 8)) +
scale_y_continuous(breaks = seq(0, 8, 2), labels = seq(0, 8, 2), limits = c(0, 9)) +
theme(panel.grid = element_blank(), plot.tag = element_text(face = "bold"))
# RESPONSIBILITY
= df.exp12.pr_regression %>%
df.plot filter(exp == 1) %>%
select(hsm = r_hsm, features = r_features, mean = r_mean, low = r_low, high = r_high)
# hsm model
= ggplot(df.plot, aes(x = hsm, y = mean)) +
plot.hsm_responsibility geom_abline(intercept = 0, slope = 1, linetype='dashed') +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = low, ymax = high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$hsm, df.plot$mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$hsm, df.plot$mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 2.5, size = 8) +
labs(x = "CSM", y = "responsibility judgment", tag = "c)") +
scale_x_continuous(breaks = seq(0, 100, 25), limits = c(0, 105)) +
scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0, 120)) +
theme(panel.grid = element_blank(), plot.tag = element_text(face = "bold"))
# features model
= ggplot(df.plot, aes(x = features, y = mean)) +
plot.features_responsibility geom_abline(intercept = 0, slope = 1, linetype='dashed') +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = low, ymax = high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$features, df.plot$mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$features, df.plot$mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 2.5, size = 8) +
labs(x = "features model", y = "responsibility judgment", tag = "f)") +
scale_x_continuous(breaks = seq(0, 100, 25), limits = c(0, 105)) +
scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0, 120)) +
theme(panel.grid = element_blank(), plot.tag = element_text(face = "bold"))
# FIGURE
<- ggplot() + annotate(geom = 'text', x=-5, y=0, label="Selection",
col1 size=9, fontface = "bold" ) + theme_void()
<- ggplot() + annotate(geom = 'text', x=0, y=0, label="Prediction",
col2 size=9, fontface = "bold") + theme_void()
<- ggplot() + annotate(geom = 'text', x=0, y=0, label="Responsibility",
col3 size=9, fontface = "bold") + theme_void()
/ plot.hsm_selection / plot.features_selection) +
(col1 plot_layout(heights = c(0.15, 1, 1)) |
/ plot.hsm_prediction / plot.features_prediction) +
(col2 plot_layout(heights = c(0.15, 1, 1)) |
/ plot.hsm_responsibility / plot.features_responsibility) +
(col3 plot_layout(heights = c(0.15, 1, 1))
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
Warning: Removed 1 rows containing missing values (geom_smooth).
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
# ggsave("../../figures/plots/exp1/exp1_plots.pdf", width = 15, height = 10)
# selection vs prediction
= df.exp1.selection.long %>%
df.plot group_by(participant, trial) %>%
summarize(s_pred = sum(response), .groups = "drop") %>%
group_by(trial) %>%
summarize(s_mean = mean(s_pred),
s_low = smean.cl.boot(s_pred)[2],
s_high = smean.cl.boot(s_pred)[3]) %>%
left_join(df.exp1.trials %>%
select(trial, p_mean, p_low, p_high, r_mean, r_low, r_high, n_bricks),
by = "trial") %>%
mutate(sp_mean = s_mean/n_bricks, sp_low = s_low/n_bricks, sp_high = s_high/n_bricks,
pp_mean = p_mean/n_bricks, pp_low = p_low/n_bricks, pp_high = p_high/n_bricks)
= ggplot(df.plot, aes(x = sp_mean, y = pp_mean)) +
plot.selection_prediction geom_abline(intercept = 0, slope = 1, linetype = 2) +
geom_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbarh(aes(xmin = sp_low, xmax = sp_high), height = 0, color='grey70') +
geom_errorbar(aes(ymin = pp_low, ymax = pp_high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$sp_mean, df.plot$pp_mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$sp_mean, df.plot$pp_mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 3, size = 8) +
# annotate(geom = "text", label = "", x = 0, y = 0, hjust = 0) +
labs(x = "selection proportion", y = "prediction proportion") +
# coord_cartesian(ylim = c(0, NA), xlim = c(0, NA)) +
scale_x_continuous(breaks = seq(0, 1, .2), labels = seq(0, 1, .2), limits = c(0, 1)) +
scale_y_continuous(breaks = seq(0, 1, .2), labels = seq(0, 1, .2), limits = c(0, 1)) +
theme(text = element_text(size = 20),
panel.grid = element_blank(),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)))
= df.exp12.pr_regression %>%
df.plot2 filter(exp == 1) %>%
mutate(across(contains("r_prediction"), ~./100))
# selection vs responsibility
= ggplot(df.plot2,
plot.prediction_responsibility aes(x = r_prediction_mean, y = r_mean)) +
geom_abline(intercept = 0, slope = 100, linetype = 2) +
geom_smooth(method = "lm", color = "lightskyblue", fill="lightblue1") +
geom_errorbarh(aes(xmin = r_prediction_low, xmax = r_prediction_high),
height = 0, color="grey70") +
geom_errorbar(aes(ymin = r_low, ymax = r_high), width = 0, color="grey70") +
geom_point(size = 2, color="black") +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot2$r_mean, df.plot2$r_prediction_mean)
%>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot2$r_mean, df.plot2$r_prediction_mean*100)
%>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 3, size = 8) +
labs(x = "scaled prediction proportion", y = "responsibility judgment") +
scale_x_continuous(breaks = seq(0, 1, .2),
labels = seq(0, 1, .2), limits = c(0, 1)) +
scale_y_continuous(breaks = seq(0, 100, 20),
labels = seq(0, 100, 20), limits = c(0, 100)) +
# coord_cartesian(xlim = c(0, 0.65), ylim = c(0, 0.9)) +
theme(text = element_text(size = 20),
panel.grid = element_blank(),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)))
+ plot.prediction_responsibility +
plot.selection_prediction plot_annotation(tag_levels = "a", tag_suffix = ")") &
theme(plot.tag = element_text(face = 'bold'))
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
# ggsave("../../figures/plots/exp1/exp1_selection_responsibility.pdf",width = 12,height = 6)
# Read in brick information
= "../../data/exp2/"
json.location = "../../figures/stimuli/exp2/"
stimuli.location
= list.files(json.location) %>% mixedsort()
filenames = list.files(stimuli.location) %>% mixedsort
imagenames
for (i in 1:length(imagenames)) {
= rjson::fromJSON(file = paste0(json.location, filenames[i])) %>%
df.plot unlist() %>%
matrix(ncol = 4, byrow = T) %>%
as.data.frame(stringsAsFactors = F) %>%
setNames(c("index", "x", "y", "angle")) %>%
mutate(index = str_replace(index, "brick_", "")) %>%
mutate(across(.cols = everything(),
.fns = ~ as.numeric(.))) %>%
mutate(across(.cols = c(x, y),
.fns = ~ . * 100)) %>%
mutate(y = 600 - y) %>%
filter(x != -500) %>%
mutate(e.x_1 = x - 40,
e.y_1 = y + 20,
e.x_2 = x + 40,
e.y_2 = y + 20,
e.x_3 = x + 40,
e.y_3 = y - 20,
e.x_4 = x - 40,
e.y_4 = y - 20) %>%
left_join(df.exp2.info %>% filter(trial == i) %>% select(index, fall)) %>%
wideToLong(within = 'edge') %>%
mutate(e.xt = x - e.x,
e.yt = y - e.y,
angle = -angle,
e.xr = e.xt * cos(angle) - e.yt * sin(angle),
e.yr = e.yt * cos(angle) + e.xt * sin(angle),
e.xrm = e.xr + x,
e.yrm = e.yr + y
%>%
) arrange(index, edge) %>%
filter(index != 0)
= png::readPNG(paste0(stimuli.location, imagenames[i]))
image = ggplot(df.plot) +
p annotation_custom(rasterGrob(image,
width = unit(1, "npc"),
height = unit(1, "npc")),
xmin = 0, xmax = 800, ymin = 0, ymax = 600) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 800)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 600)) +
scale_color_manual(values = c("black", "red")) +
theme_void() +
theme(axis.ticks.length = unit(0.001, "mm"),
legend.position = 'none')
= df.plot %>%
df.plot arrange(fall)
for (j in unique(df.plot$index)) {
= df.plot %>% filter(index == j) %>% filter(row_number() == 1) %>% pull(fall)
fall if (fall == 1) {
= p +
p geom_polygon(data = df.plot %>% filter(index == j),
aes(x = e.xrm, y = e.yrm, color = "red"),
alpha = 0, linetype = 1, size = 1.5)
else if (fall == 0) {
} = p +
p geom_polygon(data = df.plot %>% filter(index == j),
aes(x = e.xrm, y = e.yrm, color = "black"),
alpha = 0, linetype = 1, size = 1.5)
}
}
print(p)
= paste0("../../figures/plots/exp2/stimuli_truth/", i, ".png")
img.name ggsave(img.name, width = 8, height = 6, dpi = 100)
= image_read(img.name)
tmp = image_crop(tmp, "400x550+200+50")
tmp image_write(tmp, path = img.name, format = "png")
}
= list(c(10,12,14), c(16,19,20), c(36,42,38), c(24,25,27))
ids = c("I", "II", "III", "IV")
labels = c("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l")
tags
for (i in 1:length(ids)) {
= ggplot() + theme_void() + ylab(paste0("Tower ", labels[i])) +
tower theme(axis.title.y.left = element_text(face = "bold", size = 18, angle = 90),
plot.margin = unit(c(0,0,0,10), "pt"))
for (j in 1:length(ids[[i]])) {
= ids[[i]][[j]]
id = image_read(paste0("../../figures/plots/exp2/stimuli_truth/", id, ".png"))
img = ggplot() + background_image(img) + theme_void() + coord_fixed(ratio = 4/3) +
img
labs(title = paste0(tags[[(i-1)*3+j]], ")")) +
theme(panel.border = element_rect(colour = "black", fill=NA, size=2),
plot.margin = unit(c(0,0,0,3), "pt"),
plot.title = element_text(size = 15, vjust = -4.7, hjust = 0.11),
plot.title.position = "panel")
= tower | img
tower
}
if (i %% 2 == 1) {
= tower + plot_layout(widths = c(0.05, 1, 1, 1))
row else {
} = tower + plot_layout(widths = c(0.05, 1, 1, 1))
tower = row - tower
row if (i == 2) {
= row
rows else if (i == 4) {
} = rows / row
rows
}
}
}
rows
# ggsave("../../figures/plots/exp2/exp2_stimuli.pdf", width = 9.5,height = 4)
# Read in brick information
= "../../data/exp2/"
json.location = "../../figures/stimuli/exp2/"
stimuli.location
= list.files(json.location) %>% mixedsort
filenames = list.files(stimuli.location) %>% mixedsort
imagenames
# use the appropriate left-join line
= "hsm"
modelname # modelname = "feat"
= "people"
modelname = df.exp2.selection.brick %>%
df.exp2.percentages # left_join(df.exp2.model %>% select(trial, index, model = one_of(fit_bestmodel))) %>%
# mutate(model = df.exp123.regression %>% filter(exp == 2) %>% pull(fit123)) %>%
rename(model = response) %>%
mutate(model = model * 100)
= df.exp2.trials %>%
df.tmp select(trial, responsibility = r_mean, prediction = p_mean)
for (i in 1:length(imagenames)) {
= rjson::fromJSON(file = paste0(json.location, filenames[i])) %>%
df.plot unlist() %>%
matrix(ncol = 4, byrow = T) %>%
as.data.frame(stringsAsFactors = F) %>%
setNames(c("index", "x", "y", "angle")) %>%
mutate(index = str_replace(index, "brick_", "")) %>%
mutate(across(.cols = everything(),
.fns = ~ as.numeric(.))) %>%
mutate(across(.cols = c(x, y),
.fns = ~ . * 100)) %>%
mutate(y = 600 - y) %>%
filter(x != -500) %>%
mutate(e.x_1 = x - 40,
e.y_1 = y + 20,
e.x_2 = x + 40,
e.y_2 = y + 20,
e.x_3 = x + 40,
e.y_3 = y - 20,
e.x_4 = x - 40,
e.y_4 = y - 20) %>%
left_join(df.exp2.percentages %>% filter(trial == i) %>% select(index, model)) %>%
left_join(df.exp2.info %>% filter(trial == i) %>% select(index, fall)) %>%
mutate(selection = round(model, 0)) %>%
select(-model) %>%
wideToLong(within = 'edge') %>%
mutate(e.xt = x - e.x,
e.yt = y - e.y,
angle = -angle,
e.xr = e.xt * cos(angle) - e.yt * sin(angle),
e.yr = e.yt * cos(angle) + e.xt * sin(angle),
e.xrm = e.xr + x,
e.yrm = e.yr + y
%>%
) arrange(index, edge) %>%
filter(index != 0)
= df.plot %>%
df.text select(index, x, y, selection) %>%
distinct()
= png::readPNG(paste0(stimuli.location, imagenames[i]))
image = ggplot(df.plot) +
p annotation_custom(rasterGrob(image,
width = unit(1, "npc"),
height = unit(1, "npc")),
xmin = 0, xmax = 800, ymin = 0, ymax = 600) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 800)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 600)) +
# scale_fill_gradient(low = 'white', high = 'red', limits = c(0, 100)) +
scale_alpha_continuous(range = c(min(df.plot$selection/100),
max(df.plot$selection/100))) +
scale_color_manual(values = c("black", "red")) +
theme_void() +
theme(axis.ticks.length = unit(0.001, "mm"),
legend.position = 'none')
# add prediction and responsibility text to image
if (modelname == "people") {
= p +
p geom_text(data = df.tmp %>%
filter(trial == i) %>%
mutate(pred_text = paste0("prediction: ",
as.character(round(prediction, 1)),
" blocks")),
mapping = aes(x = 220,
y = 520,
hjust = 0,
label = pred_text),
size = 8) +
geom_text(data = df.tmp %>%
filter(trial == i) %>%
mutate(resp_text = paste0("responsibility: ",
as.character(round(responsibility, 0)))),
mapping = aes(x = 220,
y = 480,
hjust = 0,
label = resp_text),
size = 8)
}
= df.plot %>%
df.plot arrange(fall)
for (j in unique(df.plot$index)) {
= p +
p geom_polygon(data = df.plot %>% filter(index == j),
aes(x = e.xrm, y = e.yrm, alpha = selection/100, color=as.factor(fall)),
fill = "white",
linetype = 1, size = 1.5) +
geom_text(data = df.text %>% filter(index == j),
aes(x = x, y = y, label = selection), size = 7)
}print(p)
= paste0("../../figures/plots/exp2/stimuli_percentages/",i,"_",modelname,".png")
img.name ggsave(img.name, width = 8, height = 6, dpi = 100)
= image_read(img.name)
tmp = image_crop(tmp, "400x550+200+50")
tmp image_write(tmp, path = img.name, format = "png")
}
= c(2, 6, 24, 20, 13)
ids = c(38, 42, 14, 20, 3)
ids = c("a)", "b)", "c)", "d)", "e)")
labels
= ggplot() + theme_void() + ylab("CSM") +
label_hsm theme(axis.title.y.left = element_text(face = "bold", size = 18, angle = 90))
= ggplot() + theme_void() + ylab("features model") +
label_feat theme(axis.title.y.left = element_text(face = "bold", size = 18, angle = 90))
= ggplot() + theme_void() + ylab("people") +
label_ppl theme(axis.title.y.left = element_text(face = "bold", size = 18, angle = 90))
= (label_hsm / label_feat / label_ppl)
combo_img
for (i in 1:length(ids)) {
= ids[[i]]
id = image_read(paste0('../../figures/plots/exp2/stimuli_percentages/',
img_hsm '_hsm.png'))
id,= image_read(paste0('../../figures/plots/exp2/stimuli_percentages/',
img_feat '_feat.png'))
id,= image_read(paste0('../../figures/plots/exp2/stimuli_percentages/',
img_ppl '_people.png'))
id,= ggplot() + background_image(img_hsm) + theme_void() +
img_hsm coord_fixed(ratio=4/3) + ggtitle(labels[[i]]) +
theme(plot.title = element_text(face = "bold", size = 20, vjust = -5, hjust = 0.06),
axis.title.y.left = element_text(face = "bold", size = 10, angle = 90))
= ggplot() + background_image(img_feat) + theme_void() +
img_feat coord_fixed(ratio=4/3) + theme(plot.margin = unit(c(10,10,10,0), "pt"))
= ggplot() + background_image(img_ppl) + theme_void() + coord_fixed(ratio=4/3)
img_ppl
= (img_hsm / img_feat / img_ppl)
img = combo_img | img
combo_img
}
+ plot_layout(widths = c(0.05, 1, 1, 1, 1, 1))
combo_img
# ggsave("../../figures/plots/exp2/exp2_samples.pdf",width = 10,height = 8)
# SELECTION
= df.exp2.master %>%
df.plot select(trial, index, response, hsm = any_of(fit_bestmodel)) %>%
left_join(df.exp123.regression %>%
filter(exp == 2) %>%
select(trial, index, features = fit123),
by = c("trial", "index"))
# hsm model
= ggplot(data = df.plot, mapping = aes(x = hsm, y = response)) +
plot.hsm_selection geom_abline(intercept = 0, slope = 1, linetype='dashed') +
geom_smooth(method='lm', color='lightskyblue', fill='lightblue1') +
geom_point(size=2.5, alpha = 0.4) +
annotate(geom = "text", x=0,y=Inf,
label = paste0("r = ", cor(df.plot$response,df.plot$hsm) %>% round(2)),
hjust = 0, vjust = 1, size = 8)+
annotate(geom = "text", x=0,y=Inf,
label = paste0("RMSE = ", rmse(df.plot$response,df.plot$hsm) %>% round(2)),
hjust = 0, vjust =2.5, size = 8) +
scale_x_continuous(breaks = seq(0, 1, .25),
labels = str_c(seq(0, 100, 25), "%"), limits = c(0, 1.05)) +
scale_y_continuous(breaks = seq(0, 1, .25),
labels = str_c(seq(0, 100, 25), "%"), limits = c(0, 1.2)) +
labs(x = "CSM", y = "selection probability", tag = "a)") +
theme(axis.title.x = element_text(margin = margin(t = 10)),
plot.tag = element_text(face = "bold"))
# features model
= ggplot(data = df.plot,
plot.features_selection mapping = aes(x = features, y = response)) +
geom_smooth(method='lm', color='lightskyblue', fill="lightblue1") +
geom_point(size=2.5, alpha = 0.5) +
geom_abline(intercept = 0, slope = 1, linetype='dashed') +
annotate(geom = "text", x=0,y=Inf,
label = paste0("r = ", cor(df.plot$response,df.plot$features) %>% round(2)),
hjust = 0, vjust = 1, size = 8)+
annotate(geom = "text", x=0,y=Inf,
label = paste0("RMSE = ", rmse(df.plot$response,df.plot$features) %>% round(2)),
hjust = 0, vjust =2.5, size = 8) +
scale_x_continuous(breaks = seq(0, 1, .25),
labels = str_c(seq(0, 100, 25), "%"), limits = c(0, 1.05)) +
scale_y_continuous(breaks = seq(0, 1, .25),
labels = str_c(seq(0, 100, 25), "%"), limits = c(0, 1.2)) +
labs(x = "features model", y = "selection probability", tag = "d)") +
theme(axis.title.x = element_text(margin = margin(t = 10)),
plot.tag = element_text(face = "bold"))
# PREDICTION
= df.exp12.pr_regression %>%
df.plot filter(exp == 2) %>%
select(trial, hsm, features, mean = p_mean, low = p_low, high = p_high)
= c(2, 6, 24, 20, 13)
ids = df.plot %>%
df.tmp filter(trial %in% ids) %>%
arrange(ids) %>%
mutate(lets = letters[1:5]) %>%
mutate(hsm_ys = c(0,-1,1,1.5,-1.5),
hsm_xs = c(1,1,-.4,-.2,.5),
feat_ys = c(.3,1.5,-1.5,1,1.2),
feat_xs = c(.5,-.8,1.5,-.2,-1))
# hsm model
= ggplot(df.plot, aes(x = hsm, y = mean)) +
plot.hsm_prediction geom_abline(intercept = 0, slope = 1, linetype = 2) +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = low, ymax = high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
geom_point(data=df.tmp, aes(hsm, mean), color="red", size=3) +
geom_text_repel(data = df.tmp, aes(label = lets), color="red", size=6,
nudge_x = df.tmp$hsm_xs, nudge_y = df.tmp$hsm_ys) +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$hsm, df.plot$mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$hsm, df.plot$mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 2.5, size = 8) +
labs(x = "CSM", y = "prediction judgment", tag = "b)") +
scale_x_continuous(breaks = seq(0, 8, 2), limits = c(0, 8)) +
scale_y_continuous(breaks = seq(0, 8, 2), limits = c(0, 9)) +
theme(text = element_text(size = 20), panel.grid = element_blank(),
plot.tag = element_text(face = "bold"))
# features model
= ggplot(df.plot, aes(x = features, y = mean)) +
plot.features_prediction geom_abline(intercept = 0, slope = 1, linetype = 2) +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = low, ymax = high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
geom_point(data=df.tmp, aes(features, mean), color="red", size=3) +
geom_text_repel(data = df.tmp, aes(label = lets), color="red", size=6,
nudge_x = df.tmp$feat_xs, nudge_y = df.tmp$feat_ys) +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$features, df.plot$mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$features, df.plot$mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 2.5, size = 8) +
labs(x = "features model", y = "prediction judgment", tag = "e)") +
scale_x_continuous(breaks = seq(0, 8, 2), limits = c(0, 8)) +
scale_y_continuous(breaks = seq(0, 8, 2), limits = c(0, 9)) +
theme(text = element_text(size = 20), panel.grid = element_blank(),
plot.tag = element_text(face = "bold"))
# RESPONSIBILITY
= df.exp12.pr_regression %>%
df.plot filter(exp == 2) %>%
select(hsm = r_hsm, features = r_features, mean = r_mean, low = r_low, high = r_high)
# hsm model
= ggplot(df.plot, aes(x = hsm, y = mean)) +
plot.hsm_responsibility geom_abline(intercept = 0, slope = 1, linetype='dashed') +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = low, ymax = high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$hsm, df.plot$mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$hsm, df.plot$mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 2.5, size = 8) +
labs(x = "CSM", y = "responsibility judgment", tag = "c)") +
scale_x_continuous(breaks = seq(0, 100, 25), limits = c(0, 105)) +
scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0, 120)) +
theme(panel.grid = element_blank(), plot.tag = element_text(face = "bold"))
annotate(geom = "text", label = "responsibility", x = 0, y = 0)
# features model
= ggplot(df.plot, aes(x = features, y = mean)) +
plot.features_responsibility geom_abline(intercept = 0, slope = 1, linetype='dashed') +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = low, ymax = high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$features, df.plot$mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$features, df.plot$mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 2.5, size = 8) +
labs(x = "features model", y = "responsibility judgment", tag = "f)") +
scale_x_continuous(breaks = seq(0, 100, 25), limits = c(0, 105)) +
scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0, 120)) +
theme(panel.grid = element_blank(), plot.tag = element_text(face = "bold"))
# FIGURE
<- ggplot() +
col1 annotate(geom = 'text', x=-5, y=0, label="Selection", size=9, fontface = "bold" ) +
theme_void()
<- ggplot() +
col2 annotate(geom = 'text', x=0, y=0, label="Prediction", size=9, fontface = "bold") +
theme_void()
<- ggplot() +
col3 annotate(geom = 'text', x=0, y=0, label="Responsibility", size=9, fontface = "bold") +
theme_void()
/ plot.hsm_selection / plot.features_selection) +
(col1 plot_layout(heights = c(0.15, 1, 1)) |
/ plot.hsm_prediction / plot.features_prediction) +
(col2 plot_layout(heights = c(0.15, 1, 1)) |
/ plot.hsm_responsibility / plot.features_responsibility) +
(col3 plot_layout(heights = c(0.15, 1, 1))
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
# plot.hsm_prediction + theme_classic()
# ggsave("../../figures/plots/exp2/exp2_plots.pdf", width = 15, height = 10)
mapping: x = ~x, y = ~y
geom_text: na.rm = FALSE
stat_identity: na.rm = FALSE
position_identity
= df.exp2.selection.long %>%
df.plot group_by(participant, trial) %>%
summarize(s_pred = sum(response), .groups = "drop") %>%
group_by(trial) %>%
summarize(s_mean = mean(s_pred),
s_low = smean.cl.boot(s_pred)[2],
s_high = smean.cl.boot(s_pred)[3]) %>%
left_join(df.exp2.trials %>%
select(trial, p_mean, p_low, p_high, r_mean, r_low, r_high, n_bricks),
by = "trial") %>%
mutate(sp_mean = s_mean/n_bricks, sp_low = s_low/n_bricks, sp_high = s_high/n_bricks,
pp_mean = p_mean/n_bricks, pp_low = p_low/n_bricks, pp_high = p_high/n_bricks)
= ggplot(df.plot, aes(x = sp_mean, y = pp_mean)) +
plot.selection_prediction geom_abline(intercept = 0, slope = 1, linetype = 2) +
geom_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbarh(aes(xmin = sp_low, xmax = sp_high), height = 0, color='grey70') +
geom_errorbar(aes(ymin = pp_low, ymax = pp_high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$sp_mean, df.plot$pp_mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$sp_mean, df.plot$pp_mean) %>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 3, size = 8) +
labs(x = "selection proportion", y = "prediction proportion") +
scale_x_continuous(breaks = seq(0, 1, .2), labels = seq(0, 1, .2), limits = c(0, 1)) +
scale_y_continuous(breaks = seq(0, 1, .2), labels = seq(0, 1, .2), limits = c(0, 1)) +
theme(text = element_text(size = 20),
panel.grid = element_blank(),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)))
= df.exp12.pr_regression %>%
df.plot2 filter(exp == 2) %>%
mutate(across(contains("r_prediction"), ~./100))
# selection vs responsibility
= ggplot(df.plot2,
plot.prediction_responsibility aes(x = r_prediction_mean, y = r_mean)) +
geom_abline(intercept = 0, slope = 100, linetype = 2) +
geom_smooth(method = "lm", color = "lightskyblue", fill="lightblue1") +
geom_errorbarh(aes(xmin = r_prediction_low, xmax = r_prediction_high),
height = 0, color="grey70") +
geom_errorbar(aes(ymin = r_low, ymax = r_high), width = 0, color="grey70") +
geom_point(size = 2, color="black") +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot2$r_mean, df.plot2$r_prediction_mean)
%>% round(2)),
x = 0, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ",
rmse(df.plot2$r_mean, df.plot2$r_prediction_mean*100) %>%
round(2)),
x = 0, y = Inf, hjust = 0, vjust = 3, size = 8) +
labs(x = "scaled prediction proportion", y = "responsibility judgment") +
scale_x_continuous(breaks = seq(0, 1, .2),
labels = seq(0, 1, .2), limits = c(0, 1)) +
scale_y_continuous(breaks = seq(0, 100, 20),
labels = seq(0, 100, 20), limits = c(0, 100)) +
theme(text = element_text(size = 20),
panel.grid = element_blank(),
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)))
+ plot.prediction_responsibility +
plot.selection_prediction plot_annotation(tag_levels = "a", tag_suffix = ")") &
theme(plot.tag = element_text(face = 'bold'))
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
# ggsave("../../figures/plots/exp2/exp2_selection_responsibility.pdf",width = 12,height = 6)
# brick size: 80 x 40
# Read in brick information
= "../../data/exp3/"
json.location = "../../figures/stimuli/exp3/"
stimuli.location
= list.files(json.location) %>% mixedsort
filenames = list.files(stimuli.location) %>% mixedsort
imagenames
= df.exp3.trials %>%
df.exp3.percentages select(trial, a, b, responsibility = r_mean, prediction = p_mean) %>%
left_join(df.exp3.master %>% select(trial, model = any_of(fit_bestmodel))) %>%
left_join(df.exp123.regression %>%
filter(exp == 3) %>%
select(trial, features = fit123)) %>%
mutate(model = model * 100, features = features * 100)
for (i in 1:length(imagenames)) {
# for (i in 1:1) {
= rjson::fromJSON(file = paste0(json.location, filenames[i])) %>%
tmp unlist() %>%
matrix(ncol = 4, byrow = T) %>%
as.data.frame(stringsAsFactors = F) %>%
setNames(c("index", "x", "y", "angle")) %>%
mutate(index = str_replace(index, "brick_", "")) %>%
mutate(across(.cols = everything(),
.fns = ~ as.numeric(.))) %>%
mutate(across(c(x, y), ~ . * 100)) %>%
mutate(y = 600 - y) %>%
filter(x != -500) %>%
mutate(e.x_1 = x - 40,
e.y_1 = y + 20,
e.x_2 = x + 40,
e.y_2 = y + 20,
e.x_3 = x + 40,
e.y_3 = y - 20,
e.x_4 = x - 40,
e.y_4 = y - 20) %>%
mutate(prediction = df.exp3.percentages$prediction[[i]],
a = df.exp3.percentages$a[[i]],
b = df.exp3.percentages$b[[i]]) %>%
inner_join(df.exp3.info %>% filter(trial == i) %>% select(index, fall)) %>%
mutate(selection = ifelse(index == b, round(prediction, 0), NA)) %>%
select(-prediction) %>%
wideToLong(within = 'edge') %>%
mutate(e.xt = x - e.x,
e.yt = y - e.y,
angle = -angle,
e.xr = e.xt * cos(angle) - e.yt * sin(angle),
e.yr = e.yt * cos(angle) + e.xt * sin(angle),
e.xrm = e.xr + x,
e.yrm = e.yr + y
%>%
) arrange(index, edge)
= png::readPNG(paste0(stimuli.location, imagenames[i]))
image = ggplot(tmp) +
p annotation_custom(rasterGrob(image,
width = unit(1, "npc"),
height = unit(1, "npc")),
xmin = 0, xmax = 800, ymin = 0, ymax = 600) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 800)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 600)) +
scale_fill_gradient(low = 'black', high = 'red', limits = c(0, 100)) +
scale_color_manual(values = c("black", "red")) +
theme_void() +
theme(axis.ticks.length = unit(0.001, "mm"),
legend.position = 'none')
# text on top
= p +
p geom_text(data = df.exp3.percentages %>%
filter(trial == i) %>%
mutate(model_text = paste0("CSM: ", as.character(round(model)), "%")),
mapping = aes(x = 580,
y = 560,
hjust = 1,
label = model_text),
size = 7) +
geom_text(data = df.exp3.percentages %>%
filter(trial == i) %>%
mutate(feat_text = paste0("features model: ",
as.character(round(features)), "%")),
mapping = aes(x = 580,
y = 530,
hjust = 1,
label = feat_text),
size = 7) +
geom_text(data = df.exp3.percentages %>%
filter(trial == i) %>%
mutate(pred_text = paste0("responsibility: ",
as.character(round(responsibility)))),
mapping = aes(x = 580,
y = 500,
hjust = 1,
label = pred_text),
size = 7)
# text with participant prediction judgments
= tmp %>%
df.text select(index, x, y, selection) %>%
group_by(index) %>%
arrange(-selection) %>%
filter(row_number() == 1) %>%
distinct()
# p = p +
# geom_text(data = df.text %>% filter(!is.na(selection)),
# aes(x = x, y = y, label = selection), color="black", size = 7)
# draw block borders and fill
= tmp %>%
a filter(row_number() == 1) %>%
pull(a)
= tmp %>%
b filter(row_number() == 1) %>%
pull(b)
= tmp %>%
tmp arrange(fall)
for (j in unique(tmp$index)) {
if (j == a) next
if (j == b) {
= p + geom_polygon(data = tmp %>% filter(index == j),
p aes(x = e.xrm, y = e.yrm, alpha=.5),
fill = "white")
}= p + geom_polygon(data = tmp %>% filter(index == j),
p aes(x = e.xrm, y = e.yrm, color = as.factor(fall)),
alpha = 0.1, linetype = 1, size = 1.5)
}
= p +
p geom_text(data = df.text %>% filter(!is.na(selection)),
aes(x = x, y = y, label = selection), color="black", size = 7)
= p +
p theme(plot.margin = margin(-5, -145, 0, -145, "pt"))
= paste0("../../figures/plots/exp3/stimuli_percentages/", i, ".pdf")
img.name ggsave(img.name, width = 4, height = 6, dpi = 300)
}
= c(7, 35, 11, 4, 37, 8, 21, 31)
ids = c("a)", "b)", "c)", "d)", "e)", "f)", "g)", "h)")
labels
for (i in 1:length(ids)) {
= ids[[i]]
id = image_read_pdf(paste0('../../figures/plots/exp3/stimuli_percentages/',id,'.pdf'))
img = ggplot() + background_image(img) + theme_void() +
img coord_fixed(ratio=59/40) + ggtitle(labels[[i]]) +
theme(plot.title = element_text(face = "bold", size = 20, vjust = -5, hjust = 0.06),
plot.margin = unit(c(0, 20, 0, 0), "pt"))
if (i == 1) {
= img
combo_img else {
} = combo_img + img
combo_img
}
}
+ plot_layout(ncol = 4, nrow = 2)
combo_img
ggsave("../../figures/plots/exp3/exp3_samples.pdf",width = 12,height = 8)
= c(7, 35, 11, 4, 37, 8, 21, 31)
ids
= df.exp3.master %>%
df.plot select(trial, index, r_mean, r_low, r_high, p_mean, p_low, p_high,
hsm = any_of(fit_bestmodel)) %>%
left_join(df.exp123.regression %>%
filter(exp == 3) %>%
select(trial, index, features = fit123),
by = c("trial", "index")) %>%
mutate(across(c(features, hsm, contains("r_"), contains("p_")), ~.*100))
= df.plot %>%
df.tmp filter(trial %in% ids) %>%
arrange(ids) %>%
mutate(lets = letters[1:8])
# PREDICTION
# hsm model
= ggplot(df.plot, aes(x = hsm, y = p_mean)) +
plot.hsm_prediction geom_abline(intercept = 0, slope = 1, linetype = 2) +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = p_low, ymax = p_high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
geom_point(data=df.tmp, aes(hsm, p_mean), color="red", size=3) +
geom_text_repel(data = df.tmp, aes(label = lets), color="red", size=5) +
# geom_text_repel(aes(label = trial)) +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$hsm, df.plot$p_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$hsm, df.plot$p_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 2.5, size = 8) +
labs(x = "CSM", y = "prediction judgment (%)", tag = "a)") +
scale_x_continuous(breaks = seq(0, 100, 20), limits = c(0, 100)) +
scale_y_continuous(breaks = seq(0, 100, 20), limits = c(0, 120)) +
theme(panel.grid = element_blank(), plot.tag = element_text(face = "bold"))
# features model
= ggplot(df.plot, aes(x = features, y = p_mean)) +
plot.features_prediction geom_abline(intercept = 0, slope = 1, linetype = 2) +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = p_low, ymax = p_high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
geom_point(data=df.tmp, aes(features, p_mean), color="red", size=3) +
geom_text_repel(data = df.tmp, aes(label = lets), color="red", size=5) +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$features, df.plot$p_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$features, df.plot$p_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 3, size = 8) +
labs(x = "features model", y = "prediction judgment (%)", tag = "c)") +
scale_x_continuous(breaks = seq(0, 100, 20), limits = c(0, 100)) +
scale_y_continuous(breaks = seq(0, 100, 20), limits = c(0, 120)) +
theme(panel.grid = element_blank(), plot.tag = element_text(face = "bold"))
# RESPONSIBILITY
# hsm model
= ggplot(df.plot, aes(x = hsm, y = r_mean)) +
plot.hsm_responsibility geom_abline(intercept = 0, slope = 1, linetype = 2) +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = r_low, ymax = r_high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$hsm, df.plot$r_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 1, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$hsm, df.plot$r_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 2.5, size = 8) +
labs(x = "CSM", y = "responsibility judgment", tag = "b)") +
scale_x_continuous(breaks = seq(0, 100, 20), limits = c(0, 100)) +
scale_y_continuous(breaks = seq(0, 100, 20), limits = c(0, 110)) +
theme(panel.grid = element_blank(), plot.tag = element_text(face = "bold"))
# features model
= ggplot(df.plot, aes(x = features, y = r_mean)) +
plot.features_responsibility geom_abline(intercept = 0, slope = 1, linetype = 2) +
stat_smooth(method = "lm", color = "lightskyblue", fill='lightblue1') +
geom_errorbar(aes(ymin = r_low, ymax = r_high), width = 0, color='grey70') +
geom_point(size = 2, color='black') +
annotate(geom = "text",
label = str_c("r = ", cor(df.plot$features, df.plot$r_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 1.5, size = 8) +
annotate(geom = "text",
label = str_c("RMSE = ", rmse(df.plot$features, df.plot$r_mean) %>% round(2)),
x = .2, y = Inf, hjust = 0, vjust = 3, size = 8) +
labs(x = "features model", y = "responsibility judgment", tag = "d)") +
scale_x_continuous(breaks = seq(0, 100, 20), limits = c(0, 100)) +
scale_y_continuous(breaks = seq(0, 100, 20), limits = c(0, 110)) +
theme(panel.grid = element_blank(), plot.tag = element_text(face = "bold"))
<- ggplot() +
col1 annotate(geom = 'text', x=-5, y=0, label="Prediction", size=9, fontface = "bold" ) +
theme_void()
<- ggplot() +
col2 annotate(geom = 'text', x=0, y=0, label="Responsibility", size=9, fontface = "bold") +
theme_void()
/ plot.hsm_prediction / plot.features_prediction) +
(col1 plot_layout(heights = c(0.15, 1, 1)) |
/ plot.hsm_responsibility / plot.features_responsibility) +
(col2 plot_layout(heights = c(0.15, 1, 1))
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
`geom_smooth()` using formula 'y ~ x'
# ggsave("../../figures/plots/exp3/exp3_plots.pdf", width = 10, height = 10)