library("xtable") # for saving tables
library("emmeans") # for comparing models
library("knitr") # for knitting
library("RSQLite") # for reading in participants.db file
library("tidyjson") # for reading in json data
library("brms") # for Bayesian data analysis
library("tidyverse") # for everything else
# filter out incompletes and debugs
df.exp1.data = df.exp1.data %>%
filter(status %in% 3:5,
codeversion == "experiment1")
# demographic information
df.exp1.demographics = df.exp1.data$datastring %>%
enter_object("questiondata") %>%
gather_object("index") %>%
append_values_string("value") %>%
as_tibble() %>%
rename(participant = document.id) %>%
pivot_wider(names_from = index,
values_from = value) %>%
mutate(time = difftime(df.exp1.data$endhit,
df.exp1.data$beginhit,
units = "mins"))
# judgments
df.exp1.long = df.exp1.data$datastring %>%
as.tbl_json() %>%
enter_object("data") %>%
gather_array("order") %>%
enter_object("trialdata") %>%
gather_object("index") %>%
append_values_string("value") %>%
as_tibble() %>%
rename(participant = document.id) %>%
pivot_wider(names_from = index,
values_from = value) %>%
na.omit()
# remove participants who didn't pass attention check items
# (who did not select either biological or purpose for attention check items
# "have bones" and "have DNA")
df.exp1.long = df.exp1.long %>%
group_by(participant) %>%
filter(!any(item == "have DNA" & response =="social" |
item == "have DNA" & response == "behavioral" |
item == "have bones" & response == "social" |
item == "have bones" & response == "behavioral")) %>%
ungroup() %>%
filter(!(item %in% c("have bones", "have DNA")))
#> [1] 1 2 3 4 5 6 8 9 10 11 12 13 14 15 16 18 19 20 21 22 23 25 26 27 28
#> [26] 29 30 32 33 34 35 36 37 38 39 41 42 43 44 45 46 47 48 49 50 51 52 54 55 56
df.exp1.demographics = df.exp1.demographics %>%
# remove participants from demographics who failed the comprehension check
filter(!(participant %in% c("7", "17", "24", "31", "40", "53"))) %>%
select(participant, age : ethnicity) %>%
mutate(gender = str_to_lower(gender),
gender = str_replace(gender, "^m$", "male"),
race = str_to_lower(race))
df.exp1.demographics %>%
summarise(age_mean = mean(as.numeric(age), na.rm = TRUE))
#> # A tibble: 1 × 1
#> age_mean
#> <dbl>
#> 1 36.4
#> # A tibble: 1 × 1
#> age_sd
#> <dbl>
#> 1 9.88
#> # A tibble: 3 × 2
#> # Groups: gender [3]
#> gender n
#> <chr> <int>
#> 1 female 15
#> 2 male 33
#> 3 <NA> 2
#> # A tibble: 9 × 2
#> # Groups: race [9]
#> race n
#> <chr> <int>
#> 1 "" 1
#> 2 "american" 1
#> 3 "asian" 1
#> 4 "asian indian" 1
#> 5 "black" 2
#> 6 "caucasian" 4
#> 7 "latino" 1
#> 8 "white" 37
#> 9 <NA> 2
#> # A tibble: 3 × 2
#> # Groups: ethnicity [3]
#> ethnicity n
#> <chr> <int>
#> 1 hispanic 5
#> 2 non-hispanic 43
#> 3 <NA> 2
# create dataframe for plotting and remove pollinate flowers since it was used as an example in exp1
df.exp1.top_items = df.exp1.long %>%
filter(!item == "pollinate flowers") %>%
count(item, response) %>%
arrange(response, desc(n)) %>%
group_by(response) %>%
slice_head(n = 4) %>%
mutate(rank = 1:n()) %>%
ungroup()
df.plot = df.exp1.long %>%
count(item, response) %>%
left_join(df.exp1.top_items %>%
mutate(type = response) %>%
select(-c(n, response)),
by = c("item")) %>%
na.omit() %>%
mutate(type = factor(type,
levels = c("behavior", "biology", "purpose", "social")))
#give short labels to properties for plotting
df.plot = df.plot %>%
mutate(item = str_replace_all(item, "are warm blooded", "blood"),
item = str_replace_all(item, "have hair", "hair"),
item = str_replace_all(item, "have pointy ears", "ears"),
item = str_replace_all(item, "have long legs", "legs"),
item = str_replace_all(item, "enable decomposition", "decompose"),
item = str_replace_all(item, "purify water", "purify"),
item = str_replace_all(item, "make honey", "honey"),
item = str_replace_all(item, "aerate soil", "aerate"),
item = str_replace_all(item, "share food with group members", "share"),
item = str_replace_all(item, "cooperate with group members", "cooperate"),
item = str_replace_all(item, "follow the dominant group member", "follow"),
item = str_replace_all(item, "pair bond", "bond"))
ggplot(data = df.plot,
mapping = aes(x = reorder(item, rank),
y = n,
fill = response)) +
geom_col(width = 0.75,
color = "black",
position = position_fill(reverse = TRUE)) +
scale_y_continuous(labels = c("0%", "25%", "50%", "75%", "100%")) +
coord_flip(expand = F) +
facet_wrap(~type,
ncol = 2,
scales = "free") +
labs(title = element_blank(),
y = element_blank()) +
scale_fill_brewer(palette = "Set1") +
theme(plot.title = element_text(hjust = 0.5,
size = 16,
face = "bold"),
text = element_text(size = 16),
strip.text = element_text(size = 16,
margin = margin(b = 0.3,
t = 0.1,
unit = "cm")),
strip.background = element_blank(),
axis.title.y = element_blank(),
legend.position = "bottom",
legend.title = element_blank(),
plot.margin = margin(t = 0.5,
b = 0.2,
l = 0.2,
r = 1,
unit = "cm"))
ggsave(filename = "../../figures/experiment1/exp1_top_stacked.pdf",
height = 4,
width = 8)
# create dataframe to get proportion of properties that were categorized as expected
df.plot = df.exp1.long %>%
mutate(expected = case_when(item %in% c("jump", "swim", "chew","run", "swallow", "fly", "smell", "salivate", "urinate", "digest slowly") ~ "behavior",
item %in% c("are warm blooded", "have hair", "have long legs", "have pointy ears", "have sharp teeth", "have a tail", "have small nostrils", "have spots", "have claws", "have large eyes") ~ "biology",
item %in% c("enable decomposition", "pollinate flowers", "purify water", "make honey", "aerate soil", "enable nitrogen fixation", "recycle nutrients in soil", "catch and kill insects", "produce oxygen", "eat animal carcasses") ~ "purpose",
item %in% c("share food with group members", "cooperate with group members", "follow the dominant group member", "pair bond", "dance before mating", "are nomadic", "sing", "mark territory", "store resources", "build shelter") ~ "social")) %>%
group_by(expected, response, item) %>%
count(.drop = F) %>%
mutate(proportion = n/50)
ggplot(data = df.plot) +
geom_tile(aes(x = expected,
y = response,
fill = proportion,
group = proportion),
color = "black") +
scale_fill_gradient(low = "white", high = "black", limits = c(0, 1), breaks = seq(0, 1, 0.25)) +
theme(legend.title = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
text = element_text(size = 30))
ggsave(filename = "../../figures/experiment1/exp1_heat_map.pdf",
height = 5,
width = 8)
#> Rows: 200 Columns: 27
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (20): proliferate.condition, expCondition, item_0, item_0_categorization...
#> dbl (6): workerid, comprehension_check, response_behavioral, response_biolo...
#> lgl (1): error
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# catgeorization judgment dataframe
df.exp2.long = df.exp2 %>%
rename(condition = expCondition) %>%
mutate(participant = rep(1:100, each = 2)) %>%
select(participant, comprehension_check, condition, response_behavioral: response_social) %>%
pivot_longer(cols = response_behavioral:response_social,
names_to = "property",
values_to = "response") %>%
mutate(property = str_remove_all(property, "response_"),
property = str_replace_all(property, "behavioral", "behavior"),
property = str_replace_all(property, "biological", "biology"),
response = recode(response, `0` = 1, `1` = 2, `2` = 3, `3` = 4, `4` = 5, `5` = 6, `6` = 7),
condition = recode(condition, "non_generic" = "specific"),
study = rep("transformation"))
# read in demographic data
df.exp2.demographics = read_csv("../../data/experiment2/experiment2_demographics.csv")
#> Rows: 100 Columns: 7
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (4): proliferate.condition, ethnicity, gender, race
#> dbl (2): workerid, age
#> lgl (1): error
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> # A tibble: 1 × 1
#> age_mean
#> <dbl>
#> 1 34.1
#> # A tibble: 1 × 1
#> age_sd
#> <dbl>
#> 1 11.8
#> # A tibble: 3 × 2
#> # Groups: gender [3]
#> gender n
#> <chr> <int>
#> 1 Female 48
#> 2 Male 48
#> 3 Non-binary 4
#> # A tibble: 4 × 2
#> # Groups: race [4]
#> race n
#> <chr> <int>
#> 1 Asian 14
#> 2 Black/African American 6
#> 3 other_race 4
#> 4 White 76
#> # A tibble: 3 × 2
#> # Groups: ethnicity [3]
#> ethnicity n
#> <chr> <int>
#> 1 Hispanic 14
#> 2 Non-Hispanic 85
#> 3 <NA> 1
ggplot(data = df.exp2.long,
mapping = aes(x = property,
y = response,
group = condition,
color = property,
fill = property,
shape = condition)) +
geom_point(position = position_jitterdodge(dodge.width = 0.5,
jitter.width = 0.1,
jitter.height = 0.0),
alpha = 0.05) +
theme(legend.position = "none") +
stat_summary(fun.data = "mean_cl_boot",
position = position_dodge(width = 0.5),
color = "black",
size = .5) +
scale_fill_brewer(palette = "Set1") +
scale_color_brewer(palette = "Set1") +
scale_y_continuous(breaks = 1:7) +
scale_shape_manual(values = c(21, 23)) +
theme(plot.title = element_text(size=16, hjust = .5),
legend.title = element_blank(),
legend.position = "bottom",
axis.title.x = element_blank()) +
guides(fill = "none",
color = "none",
shape = guide_legend(override.aes = list(fill = "gray50")))
ggsave(width = 5, height = 3, "../../figures/experiment2/exp2_property_by_condition_means.pdf")
# create dataframe to get proportion of properties that were categorized as expected
df.plot = df.exp2 %>%
mutate(participant = rep(1:100, each = 2)) %>%
rename(condition = expCondition) %>%
relocate(participant) %>%
select(-c("item_0", "item_1", "error")) %>%
pivot_longer(starts_with("item"),
names_to = "tmp",
values_to = "value") %>%
separate(col = tmp,
into = c("placeholder_a", "thing", "rank", "placeholder_b"),
sep = "_") %>%
group_by(rank) %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = rank,
values_from = value) %>%
select(-c("workerid", "proliferate.condition", "placeholder_a", "placeholder_b", "row")) %>%
mutate(thing = recode(thing, "0" = "first", "1" = "second")) %>%
rename(item = property,
response = categorization) %>%
mutate(item = str_replace_all(item, c("pair bonds" = "pair bond", "follows the dominant group member" = "follow the dominant group member", "cooperates with group members" = "cooperate with group members", "shares food with group members" = "share food with group members", "aerates soil" = "aerate soil", "makes honey" = "make honey", "purifies water" = "purify water", "enables decomposition" = "enable decomposition", "has pointy ears" = "have pointy ears", "has long legs" = "have long legs", "has hair" = "have hair", "is warm blooded" = "are warm blooded", "runs" = "run", "chews" = "chew", "swims" = "swim", "jumps" = "jump")),
expected = case_when(item %in% c("jump", "swim", "chew","run", "swallow", "fly", "smell", "salivate", "urinate", "digest slowly") ~ "behavior",
item %in% c("are warm blooded", "have hair", "have long legs", "have pointy ears", "have sharp teeth", "have a tail", "have small nostrils", "have spots", "have claws", "have large eyes") ~ "biology",
item %in% c("enable decomposition", "pollinate flowers", "purify water", "make honey", "aerate soil", "enable nitrogen fixation", "recycle nutrients in soil", "catch and kill insects", "produce oxygen", "eat animal carcasses") ~ "purpose",
item %in% c("share food with group members", "cooperate with group members", "follow the dominant group member", "pair bond", "dance before mating", "are nomadic", "sing", "mark territory", "store resources", "build shelter") ~ "social")) %>%
group_by(expected, response, item) %>%
count(.drop = F) %>%
mutate(proportion = n/100) %>%
mutate(response = str_replace_all(response, "behavioral", "behavior"),
response = str_replace_all(response, "biological", "biology")) %>%
ungroup() %>%
add_row(expected = "social", response = "purpose", item = "just for plotting", n = 0 , proportion = 0.00)
ggplot(data = df.plot) +
geom_tile(aes(x = expected,
y = response,
fill = proportion,
group = proportion),
color = "black") +
scale_fill_gradient(low = "white", high = "black", limits = c(0, 1), breaks = seq(0, 1, 0.25)) +
theme(legend.title = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
text = element_text(size = 30))
ggsave(filename = "../../figures/experiment2/exp2_heat_map.pdf",
height = 5,
width = 8)
# property categorization and categorization judgment dataframe
df.exp2.property_categorization = df.exp2 %>%
mutate(participant = rep(1:100, each = 2)) %>%
rename(condition = expCondition) %>%
relocate(participant) %>%
select(-c("item_0", "item_1", "error")) %>%
pivot_longer(starts_with("item"),
names_to = "tmp",
values_to = "value") %>%
separate(col = tmp,
into = c("placeholder_a", "thing", "rank", "placeholder_b"),
sep = "_") %>%
group_by(rank) %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = rank,
values_from = value) %>%
mutate(thing = recode(thing, "0" = "first", "1" = "second"),
pair = rep(c("first", "second"), each = 8, times = 100)) %>%
select(-c("workerid", "proliferate.condition", "placeholder_a", "placeholder_b", "row")) %>%
pivot_longer(cols = response_behavioral:response_social,
names_to = "property_changed",
values_to = "response") %>%
mutate(property_changed = str_remove_all(property_changed, "response_"),
response = recode(response, `0` = 1, `1` = 2, `2` = 3, `3` = 4, `4` = 5, `5` = 6, `6` = 7),
condition = recode(condition, "non_generic" = "specific")) %>%
rename(property_selected = property) %>%
mutate(property = recode(thing, "first" = "original", "second" = "new")) %>%
select(-thing) %>%
relocate(response, .after = last_col()) %>%
mutate(property_selected = str_replace_all(property_selected, c("pair bonds" = "pair bond", "follows the dominant group member" = "follow the dominant group member", "cooperates with group members" = "cooperate with group members", "shares food with group members" = "share food with group members", "aerates soil" = "aerate soil", "makes honey" = "make honey", "purifies water" = "purify water", "enables decomposition" = "enable decomposition", "has pointy ears" = "have pointy ears", "has long legs" = "have long legs", "has hair" = "have hair", "is warm blooded" = "are warm blooded", "runs" = "run", "chews" = "chew", "swims" = "swim", "jumps" = "jump")))
df.exp2.bio = df.exp2.property_categorization %>%
filter(property_changed == "biological" & categorization == "biological")
df.exp2.behavior = df.exp2.property_categorization %>%
filter(property_changed == "behavioral" & categorization == "behavioral")
df.exp2.social = df.exp2.property_categorization %>%
filter(property_changed == "social" & categorization == "social")
df.exp2.purpose = df.exp2.property_categorization %>%
filter(property_changed == "purpose" & categorization == "purpose")
df.exp2.property_categorization = df.exp2.behavior %>%
bind_rows(df.exp2.bio,
df.exp2.purpose,
df.exp2.social)
df.plot = df.exp2.property_categorization %>%
filter(categorization == "behavioral" & property_selected %in% c("jump", "chew", "swim", "run") | categorization == "biological" & property_selected %in% c("have hair", "have long legs", "are warm blooded", "have pointy ears") | categorization == "purpose" & property_selected %in% c("purify water", "aerate soil", "enable decomposition", "make honey") | categorization == "social" & property_selected %in% c("pair bond", "share food with group members", "follow the dominant group member", "cooperate with group members"))
#give short labels to properties for plotting
df.plot = df.plot %>%
mutate(property_selected = str_replace_all(property_selected, "are warm blooded", "blood"),
property_selected = str_replace_all(property_selected, "have hair", "hair"),
property_selected = str_replace_all(property_selected, "have pointy ears", "ears"),
property_selected = str_replace_all(property_selected, "have long legs", "legs"),
property_selected = str_replace_all(property_selected, "enable decomposition", "decompose"),
property_selected = str_replace_all(property_selected, "purify water", "purify"),
property_selected = str_replace_all(property_selected, "make honey", "honey"),
property_selected = str_replace_all(property_selected, "aerate soil", "aerate"),
property_selected = str_replace_all(property_selected, "share food with group members", "share"),
property_selected = str_replace_all(property_selected, "cooperate with group members", "cooperate"),
property_selected = str_replace_all(property_selected, "follow the dominant group member", "follow"),
property_selected = str_replace_all(property_selected, "pair bond", "bond"))
# rename property types
df.plot = df.plot %>%
mutate(property_changed = str_replace_all(property_changed, "behavioral", "behavior"),
property_changed = str_replace_all(property_changed, "biological", "biology"))
ggplot(data = df.plot,
mapping = aes(x = property_selected,
y = response,
group = property_changed,
color = property_changed,
fill = property_changed)) +
geom_point(position = position_jitterdodge(dodge.width = 0.5,
jitter.width = 0.1,
jitter.height = 0.3),
alpha = 0.1) +
stat_summary(fun.data = "mean_cl_boot",
position = position_dodge(width = 0.5),
shape = 21,
color = "black",
size = .9) +
scale_fill_brewer(palette = "Set1") +
scale_color_brewer(palette = "Set1") +
scale_y_continuous(breaks = 1:7) +
theme(plot.title = element_text(hjust = 0.5,
size = 18,
face = "bold"),
text = element_text(size = 18),
strip.text = element_text(size = 18,
margin = margin(b = 0.3,
t = 0.1,
unit = "cm")),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
strip.background = element_blank(),
legend.position = "none",
legend.title = element_blank(),
plot.margin = margin(t = 0.5,
b = 0.2,
l = 0.2,
r = 1,
unit = "cm")) +
coord_flip() +
facet_wrap(~property_changed,
scales = "free")
ggsave(width = 10, height = 6, "../../figures/experiment2/exp2_expected_property_selections_for_property_type_ratings.pdf")
## Stats
fit.brm1 = brm(formula = response ~ 1 + property*condition + (1 | participant),
data = df.exp2.long,
seed = 1,
file = "cache/brm1")
fit.brm1
#> Family: gaussian
#> Links: mu = identity; sigma = identity
#> Formula: response ~ 1 + property * condition + (1 | participant)
#> Data: df.exp2 (Number of observations: 796)
#> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
#> total post-warmup draws = 4000
#>
#> Group-Level Effects:
#> ~participant (Number of levels: 100)
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
#> sd(Intercept) 0.85 0.08 0.71 1.01 1.00 1670 2978
#>
#> Population-Level Effects:
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
#> Intercept 2.99 0.09 2.81 3.17 1.00 1671
#> property1 -0.20 0.08 -0.34 -0.05 1.00 6998
#> property2 0.02 0.07 -0.13 0.17 1.00 6524
#> property3 0.54 0.07 0.40 0.68 1.00 6303
#> condition1 0.06 0.10 -0.14 0.25 1.00 1808
#> property1:condition1 0.12 0.08 -0.02 0.27 1.00 8216
#> property2:condition1 0.05 0.08 -0.11 0.19 1.00 7384
#> property3:condition1 -0.07 0.08 -0.22 0.08 1.00 7842
#> Tail_ESS
#> Intercept 2569
#> property1 3084
#> property2 3056
#> property3 3132
#> condition1 2511
#> property1:condition1 3212
#> property2:condition1 2774
#> property3:condition1 2597
#>
#> Family Specific Parameters:
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
#> sigma 1.21 0.03 1.15 1.28 1.00 6417 3211
#>
#> Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
#> and Tail_ESS are effective sample size measures, and Rhat is the potential
#> scale reduction factor on split chains (at convergence, Rhat = 1).
#> $emmeans
#> condition = generic:
#> property emmean lower.HPD upper.HPD
#> behavior 2.97 2.61 3.29
#> biology 3.11 2.78 3.47
#> purpose 3.51 3.19 3.87
#> social 2.59 2.25 2.93
#>
#> condition = specific:
#> property emmean lower.HPD upper.HPD
#> behavior 2.61 2.26 2.92
#> biology 2.91 2.59 3.25
#> purpose 3.54 3.21 3.88
#> social 2.67 2.33 2.99
#>
#> Point estimate displayed: median
#> HPD interval probability: 0.95
#>
#> $contrasts
#> condition = generic:
#> contrast estimate lower.HPD upper.HPD
#> behavior - biology -0.1440 -0.4956 0.1993
#> behavior - purpose -0.5491 -0.9012 -0.2205
#> behavior - social 0.3811 0.0395 0.7263
#> biology - purpose -0.4026 -0.7392 -0.0710
#> biology - social 0.5299 0.2034 0.8859
#> purpose - social 0.9298 0.5751 1.2491
#>
#> condition = specific:
#> contrast estimate lower.HPD upper.HPD
#> behavior - biology -0.2946 -0.6111 0.0586
#> behavior - purpose -0.9313 -1.2594 -0.5799
#> behavior - social -0.0617 -0.3959 0.2799
#> biology - purpose -0.6380 -0.9889 -0.3236
#> biology - social 0.2373 -0.1115 0.5541
#> purpose - social 0.8712 0.5406 1.2094
#>
#> Point estimate displayed: median
#> HPD interval probability: 0.95
#> Rows: 80 Columns: 6
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (5): property, category, question, answer, model
#> dbl (1): probability
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Rows: 80 Columns: 6
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (5): model, property, category, question, answer
#> dbl (1): probability
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> Rows: 80 Columns: 6
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (5): model, property, category, question, answer
#> dbl (1): probability
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df.llm = df.bert_base %>%
bind_rows(df.bert_large,
df.roberta_large)
df.llm = df.llm %>%
mutate(category = str_replace_all(category, "biological", "biology"),
model = str_replace_all(model, "bert_base", "BERT-base"),
model = str_replace_all(model, "bert_large", "BERT-large"),
model = str_replace_all(model, "roberta_large", "RoBERTa-large"))
ggplot(data = df.llm,
mapping = aes(x = category,
y = probability,
group = category,
color = category,
fill = category)) +
geom_point(position = position_jitterdodge(dodge.width = 0.5,
jitter.width = 0.1,
jitter.height = 0.0),
alpha = 0.2) +
stat_summary(fun.data = "mean_cl_boot",
position = position_dodge(width = 0.5),
shape = 21,
color = "black",
size = .5) +
scale_fill_brewer(palette = "Set1") +
scale_color_brewer(palette = "Set1") +
# scale_y_continuous(breaks = 1:7) +
theme(plot.title = element_text(size=16, hjust = .5),
axis.text.x = element_blank(),
legend.title = element_blank(),
legend.position="bottom",
axis.title.x = element_blank(),
text = element_text(size = 18)) +
facet_wrap(~ model)
ggsave(width = 8, height = 3, "../../figures/experiment2/bert_overall_property_means.pdf")
#> Rows: 200 Columns: 27
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (20): proliferate.condition, expCondition, item_0, item_0_categorization...
#> dbl (6): workerid, comprehension_check, response_behavioral, response_biolo...
#> lgl (1): error
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# catgeorization judgment dataframe
df.exp3 = df.exp3 %>%
rename(condition = expCondition) %>%
mutate(participant = rep(1:100, each = 2)) %>%
select(participant, comprehension_check, condition, response_behavioral: response_social) %>%
pivot_longer(cols = response_behavioral:response_social,
names_to = "property",
values_to = "response") %>%
mutate(property = str_remove_all(property, "response_"),
property = str_replace_all(property, "behavioral", "behavior"),
property = str_replace_all(property, "biological", "biology"),
response = recode(response, `0` = 1, `1` = 2, `2` = 3, `3` = 4, `4` = 5, `5` = 6, `6` = 7),
condition = recode(condition, "non_generic" = "specific"),
study = rep("induction"))
# read in demographic data
df.exp3.demographics = read_csv("../../data/experiment3/experiment3_demographics.csv")
#> Rows: 100 Columns: 7
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (4): proliferate.condition, ethnicity, gender, race
#> dbl (2): workerid, age
#> lgl (1): error
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#> # A tibble: 1 × 1
#> age_mean
#> <dbl>
#> 1 34.4
#> # A tibble: 1 × 1
#> age_sd
#> <dbl>
#> 1 14.0
#> # A tibble: 5 × 2
#> # Groups: gender [5]
#> gender n
#> <chr> <int>
#> 1 Female 57
#> 2 Male 38
#> 3 Non-binary 3
#> 4 other_gender 1
#> 5 <NA> 1
#> # A tibble: 5 × 2
#> # Groups: race [5]
#> race n
#> <chr> <int>
#> 1 Asian 20
#> 2 Black/African American 13
#> 3 other_race 2
#> 4 White 62
#> 5 <NA> 3
#> # A tibble: 3 × 2
#> # Groups: ethnicity [3]
#> ethnicity n
#> <chr> <int>
#> 1 Hispanic 10
#> 2 Non-Hispanic 87
#> 3 <NA> 3
ggplot(data = df.exp3,
mapping = aes(x = property,
y = response,
group = condition,
color = property,
fill = property,
shape = condition)) +
geom_point(position = position_jitterdodge(dodge.width = 0.5,
jitter.width = 0.1,
jitter.height = 0.0),
alpha = 0.05) +
theme(legend.position = "none") +
stat_summary(fun.data = "mean_cl_boot",
position = position_dodge(width = 0.5),
color = "black",
size = .5) +
scale_fill_brewer(palette = "Set1") +
scale_color_brewer(palette = "Set1") +
scale_y_continuous(breaks = 1:7) +
scale_shape_manual(values = c(21, 23)) +
theme(plot.title = element_text(size=16, hjust = .5),
legend.title = element_blank(),
legend.position = "bottom",
axis.title.x = element_blank()) +
guides(fill = "none",
color = "none",
shape = guide_legend(override.aes = list(fill = "gray50")))
ggsave(width = 5, height = 3, "../../figures/experiment3/exp3_property_by_condition_means.pdf")
fit.brm2 = brm(formula = response ~ 1 + property*condition + (1 | participant),
data = df.exp3,
seed = 1,
file = "cache/brm2")
fit.brm2
#> Family: gaussian
#> Links: mu = identity; sigma = identity
#> Formula: response ~ 1 + property * condition + (1 | participant)
#> Data: df.exp3 (Number of observations: 798)
#> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
#> total post-warmup draws = 4000
#>
#> Group-Level Effects:
#> ~participant (Number of levels: 100)
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
#> sd(Intercept) 0.89 0.08 0.75 1.04 1.00 1656 2865
#>
#> Population-Level Effects:
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
#> Intercept 3.11 0.10 2.92 3.30 1.00 1167
#> property1 -0.39 0.07 -0.53 -0.26 1.00 6374
#> property2 0.19 0.07 0.06 0.34 1.00 6769
#> property3 0.68 0.07 0.54 0.82 1.00 6569
#> condition1 -0.09 0.10 -0.29 0.10 1.00 1146
#> property1:condition1 0.01 0.07 -0.12 0.16 1.00 8268
#> property2:condition1 0.01 0.07 -0.13 0.15 1.00 7574
#> property3:condition1 -0.07 0.07 -0.21 0.07 1.00 6779
#> Tail_ESS
#> Intercept 1708
#> property1 2643
#> property2 2839
#> property3 3473
#> condition1 2004
#> property1:condition1 3221
#> property2:condition1 3232
#> property3:condition1 3006
#>
#> Family Specific Parameters:
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
#> sigma 1.15 0.03 1.09 1.21 1.00 6713 3610
#>
#> Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
#> and Tail_ESS are effective sample size measures, and Rhat is the potential
#> scale reduction factor on split chains (at convergence, Rhat = 1).
#> $emmeans
#> condition = generic:
#> property emmean lower.HPD upper.HPD
#> behavior 2.64 2.32 2.98
#> biology 3.22 2.87 3.53
#> purpose 3.63 3.31 3.98
#> social 2.58 2.27 2.93
#>
#> condition = specific:
#> property emmean lower.HPD upper.HPD
#> behavior 2.79 2.47 3.13
#> biology 3.39 3.06 3.73
#> purpose 3.95 3.60 4.27
#> social 2.67 2.35 3.01
#>
#> Point estimate displayed: median
#> HPD interval probability: 0.95
#>
#> $contrasts
#> condition = generic:
#> contrast estimate lower.HPD upper.HPD
#> behavior - biology -0.5805 -0.891 -0.2563
#> behavior - purpose -0.9902 -1.297 -0.6524
#> behavior - social 0.0572 -0.263 0.3601
#> biology - purpose -0.4103 -0.723 -0.0673
#> biology - social 0.6405 0.318 0.9512
#> purpose - social 1.0463 0.751 1.3717
#>
#> condition = specific:
#> contrast estimate lower.HPD upper.HPD
#> behavior - biology -0.5943 -0.926 -0.2684
#> behavior - purpose -1.1564 -1.489 -0.8627
#> behavior - social 0.1275 -0.189 0.4507
#> biology - purpose -0.5606 -0.877 -0.2572
#> biology - social 0.7221 0.408 1.0423
#> purpose - social 1.2828 0.957 1.5883
#>
#> Point estimate displayed: median
#> HPD interval probability: 0.95
#> R version 4.2.2 (2022-10-31)
#> Platform: aarch64-apple-darwin20 (64-bit)
#> Running under: macOS Ventura 13.2
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] forcats_0.5.2 stringr_1.5.0 dplyr_1.0.10 purrr_0.3.5
#> [5] readr_2.1.3 tidyr_1.2.1 tibble_3.1.8 ggplot2_3.4.0
#> [9] tidyverse_1.3.2 brms_2.18.0 Rcpp_1.0.9 tidyjson_0.3.1
#> [13] RSQLite_2.2.19 knitr_1.41 emmeans_1.8.3 xtable_1.8-4
#>
#> loaded via a namespace (and not attached):
#> [1] readxl_1.4.1 backports_1.4.1 Hmisc_4.7-2
#> [4] systemfonts_1.0.4 plyr_1.8.8 igraph_1.3.5
#> [7] splines_4.2.2 crosstalk_1.2.0 rstantools_2.2.0
#> [10] inline_0.3.19 digest_0.6.30 htmltools_0.5.4
#> [13] fansi_1.0.3 magrittr_2.0.3 checkmate_2.1.0
#> [16] memoise_2.0.1 cluster_2.1.4 googlesheets4_1.0.1
#> [19] tzdb_0.3.0 modelr_0.1.10 RcppParallel_5.1.5
#> [22] matrixStats_0.63.0 vroom_1.6.0 xts_0.12.2
#> [25] timechange_0.1.1 prettyunits_1.1.1 jpeg_0.1-10
#> [28] colorspace_2.0-3 blob_1.2.3 rvest_1.0.3
#> [31] textshaping_0.3.6 haven_2.5.1 xfun_0.35
#> [34] callr_3.7.3 crayon_1.5.2 jsonlite_1.8.4
#> [37] survival_3.4-0 zoo_1.8-11 glue_1.6.2
#> [40] gtable_0.3.1 gargle_1.2.1 distributional_0.3.1
#> [43] pkgbuild_1.4.0 rstan_2.21.7 abind_1.4-5
#> [46] scales_1.2.1 mvtnorm_1.1-3 DBI_1.1.3
#> [49] miniUI_0.1.1.1 htmlTable_2.4.1 foreign_0.8-83
#> [52] bit_4.0.5 Formula_1.2-4 stats4_4.2.2
#> [55] StanHeaders_2.21.0-7 DT_0.26 htmlwidgets_1.5.4
#> [58] httr_1.4.4 threejs_0.3.3 RColorBrewer_1.1-3
#> [61] posterior_1.3.1 ellipsis_0.3.2 pkgconfig_2.0.3
#> [64] loo_2.5.1 farver_2.1.1 nnet_7.3-18
#> [67] deldir_1.0-6 sass_0.4.4 dbplyr_2.2.1
#> [70] utf8_1.2.2 tidyselect_1.2.0 labeling_0.4.2
#> [73] rlang_1.0.6 reshape2_1.4.4 later_1.3.0
#> [76] munsell_0.5.0 cellranger_1.1.0 tools_4.2.2
#> [79] cachem_1.0.6 cli_3.4.1 generics_0.1.3
#> [82] broom_1.0.1 evaluate_0.18 fastmap_1.1.0
#> [85] yaml_2.3.6 ragg_1.2.5 processx_3.8.0
#> [88] bit64_4.0.5 fs_1.5.2 nlme_3.1-160
#> [91] mime_0.12 xml2_1.3.3 compiler_4.2.2
#> [94] bayesplot_1.10.0 shinythemes_1.2.0 rstudioapi_0.14
#> [97] png_0.1-8 reprex_2.0.2 bslib_0.4.1
#> [100] stringi_1.7.8 highr_0.9 ps_1.7.2
#> [103] Brobdingnag_1.2-9 lattice_0.20-45 Matrix_1.5-1
#> [106] markdown_1.4 shinyjs_2.1.0 tensorA_0.36.2
#> [109] vctrs_0.5.1 pillar_1.8.1 lifecycle_1.0.3
#> [112] jquerylib_0.1.4 bridgesampling_1.1-2 estimability_1.4.1
#> [115] data.table_1.14.6 httpuv_1.6.6 latticeExtra_0.6-30
#> [118] R6_2.5.1 bookdown_0.30 promises_1.2.0.1
#> [121] gridExtra_2.3 codetools_0.2-18 colourpicker_1.2.0
#> [124] gtools_3.9.4 assertthat_0.2.1 withr_2.5.0
#> [127] shinystan_2.6.0 parallel_4.2.2 hms_1.1.2
#> [130] rpart_4.1.19 grid_4.2.2 coda_0.19-4
#> [133] rmarkdown_2.18 googledrive_2.0.0 shiny_1.7.3
#> [136] lubridate_1.9.0 base64enc_0.1-3 dygraphs_1.1.1.6
#> [139] interp_1.1-3