1 Libraries

library("lme4")        # for linear mixed effects models
library("rsample")     # for bootstrapping
library("xtable")      # for latex tables
library("kableExtra")  # for rmarkdown
library("knitr")       # for rmarkdown 
library("car")         # for hypothesis test
library("Metrics")     # for rmse
library("scales")      # for percentage plots
library("broom.mixed") # for model summaries
library("grateful")    # for package citations 
library("ggeffects")   # for marginal predictions
library("scales")      # for percentage scales
library("Hmisc")       # for bootstrapped means 
library("ggtext")      # for colored text in ggplot
library("tidyverse")   # for everything else

2 Helper functions

# set classic theme 
theme_set(theme_classic() + 
            theme(text = element_text(size = 16)))

# function for printing out html or latex tables 
print_table = function(data, format = "html", digits = 2){
  if(format == "html"){
    data %>% 
      kable(digits = digits) %>% 
      kable_styling()
  }else if(format == "latex"){
    data %>% 
      xtable(digits = digits,
             caption = "Caption",
             label = "tab:table") %>%
      print(include.rownames = F,
            booktabs = T,
            sanitize.colnames.function = identity,
            caption.placement = "top")
  }
}

# suppress grouping warning 
options(dplyr.summarise.inform = F)

# show figures at the end of code chunks
opts_chunk$set(comment = "",
               fig.show = "hold")

# regression function 
fun.regression = function(formula, data){
  results = glmer(formula = formula,
                  family = binomial,
                  data = data) 
  print(results)
  return(results)
}

# results table 
fun.table = function(results, type = "exploratory"){
  table = results %>% 
    tidy(conf.int = T) %>% 
    filter(effect == "fixed") %>% 
    select(-group)
  
  if (type == "exploratory"){
    table = table %>% 
      select(-c(p.value))
  }
  table %>% 
    print_table()
}

# colors 
l.color = list(agreement = "#89fa50",
               disagreement = "#ff968c",
               ambiguous = "#d38950",
               unambiguous = "#96d5d6")

3 EXPERIMENT 1

3.1 DATA

3.1.1 Read in data

# fixed rounding issue; one participant was actually 11 and turned 12 the next day
# participant reported they were 9 despite birth year indicating they were 8; 
# recoded to 9.69 given reported age likely more reliable

df.exp1 = read_csv("../data/data1_infer.csv") %>% 
  rename(trial_order = trial_order_dada) %>%
  mutate(age_continuous = ifelse(age_continuous == 12, 11.99, 
                          ifelse(age_continuous == 8.69, 9.69,
                                 age_continuous)))

3.2 STATS

3.2.1 Counterbalancing

check if counterbalanced factors moderate the effect of trial type

3.2.1.1 Story order

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree * story_order_wagon + (1 | participant)",
  data = df.exp1)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree * story_order_wagon +  
    (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 243.5716  260.2593 -116.7858  233.5716       203 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.424   
Number of obs: 208, groups:  participant, 52
Fixed Effects:
                         (Intercept)                    condition_disagree  
                             -1.8473                                1.8559  
                   story_order_wagon  condition_disagree:story_order_wagon  
                              0.5618                               -0.2295

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	-1.85	0.40	-4.61	-2.63	-1.06
fixed	condition_disagree	1.86	0.42	4.46	1.04	2.67
fixed	story_order_wagon	0.56	0.36	1.56	-0.15	1.27
fixed	condition_disagree:story_order_wagon	-0.23	0.38	-0.61	-0.97	0.51

3.2.1.2 Trial order

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree * trial_order + (1 | participant)",
  data = df.exp1)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: 
ambiguous_yes ~ 1 + condition_disagree * trial_order + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 241.2821  257.9698 -115.6410  231.2821       203 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.477   
Number of obs: 208, groups:  participant, 52
Fixed Effects:
                   (Intercept)              condition_disagree  
                      -1.84349                         1.83408  
                   trial_order  condition_disagree:trial_order  
                      -0.02668                        -0.64759

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	-1.84	0.40	-4.57	-2.63	-1.05
fixed	condition_disagree	1.83	0.42	4.34	1.01	2.66
fixed	trial_order	-0.03	0.35	-0.08	-0.72	0.67
fixed	condition_disagree:trial_order	-0.65	0.39	-1.67	-1.41	0.11

3.2.1.3 Valence

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree * valence_neg + (1 | participant)",
  data = df.exp1)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: 
ambiguous_yes ~ 1 + condition_disagree * valence_neg + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 244.9991  261.6868 -117.4996  234.9991       203 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.481   
Number of obs: 208, groups:  participant, 52
Fixed Effects:
                   (Intercept)              condition_disagree  
                     -1.844699                        1.867938  
                   valence_neg  condition_disagree:valence_neg  
                     -0.004625                        0.350825

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	-1.84	0.40	-4.57	-2.64	-1.05
fixed	condition_disagree	1.87	0.42	4.44	1.04	2.69
fixed	valence_neg	0.00	0.36	-0.01	-0.70	0.69
fixed	condition_disagree:valence_neg	0.35	0.38	0.92	-0.40	1.10

3.2.2 Confirmatory analysis

3.2.2.1 Trial type effect

Choose ambiguous statement more in disagreement than agreement trials.

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree + (1 | participant)",
  data = df.exp1)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 242.4062  252.4188 -118.2031  236.4062       205 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.455   
Number of obs: 208, groups:  participant, 52
Fixed Effects:
       (Intercept)  condition_disagree  
            -1.828               1.828

fun.table(results, type = "confirmatory")

effect	term	estimate	std.error	statistic	p.value	conf.low	conf.high
fixed	(Intercept)	-1.83	0.40	-4.62	0	-2.60	-1.05
fixed	condition_disagree	1.83	0.41	4.47	0	1.03	2.63

3.2.2.2 Inferences above chance

Choose unambiguous in agreement trials above chance (log odds = -.69; 33%).

results = fun.regression(
  formula = "unambiguous_yes ~ 1 + condition_disagree + (1 | participant)",
  data = df.exp1)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: unambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 242.1031  252.1157 -118.0515  236.1031       205 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.475   
Number of obs: 208, groups:  participant, 52
Fixed Effects:
       (Intercept)  condition_disagree  
             1.841              -1.893

fun.table(results, type = "confirmatory")

effect	term	estimate	std.error	statistic	p.value	conf.low	conf.high
fixed	(Intercept)	1.84	0.40	4.61	0	1.06	2.62
fixed	condition_disagree	-1.89	0.41	-4.57	0	-2.71	-1.08

linearHypothesis(results, "(Intercept) = -.69")

Linear hypothesis test

Hypothesis:
(Intercept) = - 0.69

Model 1: restricted model
Model 2: unambiguous_yes ~ 1 + condition_disagree + (1 | participant)

  Df  Chisq Pr(>Chisq)    
1                         
2  1 40.144  2.359e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.2.2.3 Ambiguous choice

Choose ambiguous in disagreement trials above chance (log odds = -.69; 33%).

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_agree + (1 | participant)",
  data = df.exp1)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_agree + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 242.4062  252.4188 -118.2031  236.4062       205 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.455   
Number of obs: 208, groups:  participant, 52
Fixed Effects:
    (Intercept)  condition_agree  
     -0.0003634       -1.8277981

fun.table(results, type = "confirmatory")

effect	term	estimate	std.error	statistic	p.value	conf.low	conf.high
fixed	(Intercept)	0.00	0.31	0.00	1	-0.61	0.61
fixed	condition_agree	-1.83	0.41	-4.47	0	-2.63	-1.03

linearHypothesis(results, "(Intercept) = -.69")

Linear hypothesis test

Hypothesis:
(Intercept) = - 0.69

Model 1: restricted model
Model 2: ambiguous_yes ~ 1 + condition_agree + (1 | participant)

  Df  Chisq Pr(>Chisq)  
1                       
2  1 4.8736    0.02727 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.2.3 Exploratory analysis

3.2.3.1 Trial type by age interaction

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree * age_continuous + (1 | participant)",
  data = df.exp1)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree * age_continuous + (1 |  
    participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 237.4681  254.1558 -113.7340  227.4681       203 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.546   
Number of obs: 208, groups:  participant, 52
Fixed Effects:
                      (Intercept)                 condition_disagree  
                           2.5127                            -6.0120  
                   age_continuous  condition_disagree:age_continuous  
                          -0.4749                             0.8474

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	2.51	2.55	0.98	-2.49	7.52
fixed	condition_disagree	-6.01	2.74	-2.20	-11.38	-0.65
fixed	age_continuous	-0.47	0.28	-1.71	-1.02	0.07
fixed	condition_disagree:age_continuous	0.85	0.30	2.83	0.26	1.43

3.2.3.2 Moderation by age

# from 7 to 11 years 
for(i in 7:11){
  cat(str_c("Age = ", i, "\n\n"))
  results = fun.regression(
    formula = "ambiguous_yes ~ 1 + condition_disagree + (1 | participant)",
    data = df.exp1 %>% 
      filter(age_group == i))
}

Age = 7

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 50.2215  55.2881 -22.1107  44.2215       37 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 2.049   
Number of obs: 40, groups:  participant, 10
Fixed Effects:
       (Intercept)  condition_disagree  
            -1.879               1.489  
Age = 8

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 61.2404  66.8540 -27.6202  55.2404       45 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.994   
Number of obs: 48, groups:  participant, 12
Fixed Effects:
       (Intercept)  condition_disagree  
           -1.1496              0.5988  
Age = 9

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 54.6152  59.6818 -24.3076  48.6152       37 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0.8046  
Number of obs: 40, groups:  participant, 10
Fixed Effects:
       (Intercept)  condition_disagree  
           -1.2513              0.7889  
Age = 10

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 43.7876  48.8542 -18.8938  37.7876       37 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.833   
Number of obs: 40, groups:  participant, 10
Fixed Effects:
       (Intercept)  condition_disagree  
            -3.272               3.225  
Age = 11

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 40.0251  45.0917 -17.0125  34.0251       37 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.543   
Number of obs: 40, groups:  participant, 10
Fixed Effects:
       (Intercept)  condition_disagree  
            -2.956               4.529

3.3 PLOTS

3.3.1 Inference

set.seed(1)

df.plot.individual = df.exp1 %>% 
    mutate(condition_disagree = as.character(condition_disagree)) %>% 
    group_by(participant, age_continuous, condition_disagree) %>% 
    summarize(pct_amb = sum(ambiguous_yes)/n())

df.age.means = df.plot.individual %>%
  distinct(participant, age_continuous) %>%
  mutate(age_group = floor(age_continuous)) %>%
  group_by(age_group) %>%
  summarize(age_mean = mean(age_continuous),
            n = str_c("n = ", n())) %>%
  ungroup()

df.plot.means = df.exp1 %>% 
  mutate(condition_disagree = as.character(condition_disagree)) %>% 
  group_by(participant, age_group, condition_disagree) %>% 
  summarize(pct_amb = sum(ambiguous_yes)/n()) %>% 
  group_by(age_group, condition_disagree) %>% 
  reframe(response = smean.cl.boot(pct_amb),
          name = c("mean", "low", "high")) %>% 
  left_join(df.age.means,
            by = "age_group") %>% 
  pivot_wider(names_from = name,
              values_from = response) %>% 
  mutate(age_mean = ifelse(condition_disagree == 0, age_mean - 0.05, age_mean + 0.05))

df.plot.text = df.plot.means %>% 
  distinct(age_group, n)

ggplot() + 
  geom_hline(yintercept = 1/3,
             linetype = 2,
             alpha = 0.1) + 
  geom_point(data = df.plot.individual,
             mapping = aes(x = age_continuous,
                           y = pct_amb,
                           color = condition_disagree),
             alpha = 0.5,
             show.legend = T,
             shape = 16,
             size = 1.5) +
  geom_linerange(data = df.plot.means,
                 mapping = aes(x = age_mean,
                               y = mean,
                               ymin = low,
                               ymax = high),
                 color = "gray40") + 
  geom_point(data = df.plot.means,
             mapping = aes(x = age_mean,
                           y = mean,
                           fill = condition_disagree),
             shape = 21,
             size = 3,
             show.legend = T) +
  geom_text(data = df.plot.text,
            mapping = aes(x = age_group + 0.5,
                          y = 1.05,
                          label = n),
            hjust = 0.5) + 
  scale_y_continuous(labels = percent) +
  labs(x = "Age (in years)",
       y = "% Infer Ambiguous Utterance", 
       title = "Experiment 1: Inference") + 
  coord_cartesian(xlim = c(7, 12),
                  ylim = c(0, 1),
                  clip = "off") + 
  scale_color_manual(name = "Trial Type",
                     labels = c("Agreement", "Disagreement"),
                     values = c(l.color$agreement, l.color$disagreement),
                     guide = guide_legend(reverse = T)) +
  scale_fill_manual(name = "Trial Type",
                    labels = c("Agreement", "Disagreement"),
                    values = c(l.color$agreement, l.color$disagreement),
                    guide = guide_legend(reverse = T)) +
  theme(plot.title = element_text(hjust = 0.5,
                                  vjust = 2,
                                  size = 18,
                                  face = "bold"),
        axis.title.y = element_markdown(color = l.color$ambiguous),
        legend.position = "right")

ggsave(filename = "../figures/plots/exp1_inference.pdf",
       width = 8,
       height = 4)

4 EXPERIMENT 2

4.1 DATA

4.1.1 Read in data

df.exp2.predict = read_csv("../data/data2_predict.csv")
df.exp2.infer = read_csv("../data/data2_infer.csv") %>% 
  drop_na()

4.2 STATS

4.2.1 Counterbalancing

4.2.1.1 Prediction condition

4.2.1.1.1 Story order

results = fun.regression(
  formula = "dis_yes ~ 1 + condition_amb * story_order_wagon + (1 | participant)",
  data = df.exp2.predict)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dis_yes ~ 1 + condition_amb * story_order_wagon + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 533.8373  554.1795 -261.9187  523.8373       427 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0.5109  
Number of obs: 432, groups:  participant, 54
Fixed Effects:
                    (Intercept)                    condition_amb  
                        -1.2683                           1.5824  
              story_order_wagon  condition_amb:story_order_wagon  
                        -0.1105                           0.2620

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	-1.27	0.19	-6.85	-1.63	-0.91
fixed	condition_amb	1.58	0.23	7.00	1.14	2.03
fixed	story_order_wagon	-0.11	0.18	-0.62	-0.46	0.24
fixed	condition_amb:story_order_wagon	0.26	0.22	1.20	-0.17	0.69

4.2.1.1.2 Trial order

results = fun.regression(
  formula = "dis_yes ~ 1 + condition_amb*trial_order_auau + (1 | participant)",
  data = df.exp2.predict)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dis_yes ~ 1 + condition_amb * trial_order_auau + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 533.6323  553.9745 -261.8162  523.6323       427 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0.473   
Number of obs: 432, groups:  participant, 54
Fixed Effects:
                   (Intercept)                   condition_amb  
                      -1.25973                         1.58341  
              trial_order_auau  condition_amb:trial_order_auau  
                       0.15624                         0.01714

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	-1.26	0.18	-6.90	-1.62	-0.90
fixed	condition_amb	1.58	0.23	7.03	1.14	2.02
fixed	trial_order_auau	0.16	0.18	0.88	-0.19	0.50
fixed	condition_amb:trial_order_auau	0.02	0.22	0.08	-0.41	0.44

4.2.1.1.3 Valence

results = fun.regression(
  formula = "dis_yes ~ 1 + condition_amb * valence_neg + (1 | participant)",
  data = df.exp2.predict)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dis_yes ~ 1 + condition_amb * valence_neg + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 531.5686  551.9108 -260.7843  521.5686       427 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0.4787  
Number of obs: 432, groups:  participant, 54
Fixed Effects:
              (Intercept)              condition_amb  
                 -1.26341                    1.59408  
              valence_neg  condition_amb:valence_neg  
                 -0.05144                    0.34376

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	-1.26	0.18	-6.92	-1.62	-0.91
fixed	condition_amb	1.59	0.23	7.08	1.15	2.04
fixed	valence_neg	-0.05	0.18	-0.29	-0.40	0.30
fixed	condition_amb:valence_neg	0.34	0.22	1.57	-0.08	0.77

4.2.1.2 Inference condition

4.2.1.2.1 Story order

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree * story_order_wagon + (1 | participant)",
  data = df.exp2.infer)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree * story_order_wagon +  
    (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 398.6856  419.1984 -194.3428  388.6856       442 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.009   
Number of obs: 447, groups:  participant, 56
Fixed Effects:
                         (Intercept)                    condition_disagree  
                           -2.687817                              3.783142  
                   story_order_wagon  condition_disagree:story_order_wagon  
                            0.005322                             -0.262981

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	-2.69	0.31	-8.70	-3.29	-2.08
fixed	condition_disagree	3.78	0.35	10.69	3.09	4.48
fixed	story_order_wagon	0.01	0.28	0.02	-0.55	0.56
fixed	condition_disagree:story_order_wagon	-0.26	0.31	-0.86	-0.86	0.33

4.2.1.2.2 Trial order

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree * trial_order_dada + (1 | participant)",
  data = df.exp2.infer)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree * trial_order_dada + (1 |  
    participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 400.1794  420.6922 -195.0897  390.1794       442 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.032   
Number of obs: 447, groups:  participant, 56
Fixed Effects:
                        (Intercept)                   condition_disagree  
                           -2.70545                              3.78219  
                   trial_order_dada  condition_disagree:trial_order_dada  
                           -0.06539                              0.08335

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	-2.71	0.31	-8.67	-3.32	-2.09
fixed	condition_disagree	3.78	0.36	10.62	3.08	4.48
fixed	trial_order_dada	-0.07	0.29	-0.23	-0.63	0.49
fixed	condition_disagree:trial_order_dada	0.08	0.31	0.27	-0.52	0.68

4.2.1.2.3 Valence

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree * valence_neg + (1 | participant)",
  data = df.exp2.infer)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: 
ambiguous_yes ~ 1 + condition_disagree * valence_neg + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 396.7389  417.2517 -193.3695  386.7389       442 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0.9986  
Number of obs: 447, groups:  participant, 56
Fixed Effects:
                   (Intercept)              condition_disagree  
                       -2.6816                          3.7756  
                   valence_neg  condition_disagree:valence_neg  
                       -0.0941                         -0.3050

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	-2.68	0.31	-8.73	-3.28	-2.08
fixed	condition_disagree	3.78	0.35	10.65	3.08	4.47
fixed	valence_neg	-0.09	0.28	-0.33	-0.65	0.46
fixed	condition_disagree:valence_neg	-0.31	0.31	-0.99	-0.91	0.30

4.2.2 Confirmatory analyses

4.2.2.1 Trial type effect

4.2.2.1.1 Prediction condition

Predict disagreement more in ambiguous than unambiguous trials.

results = fun.regression(
  formula = "dis_yes ~ 1 + condition_amb + (1 | participant)",
  data = df.exp2.predict)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dis_yes ~ 1 + condition_amb + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 531.3732  543.5785 -262.6866  525.3732       429 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0.5009  
Number of obs: 432, groups:  participant, 54
Fixed Effects:
  (Intercept)  condition_amb  
       -1.267          1.584

prop.table(table(df.exp2.predict$condition_amb, df.exp2.predict$dis_yes),
           margin = 1)

   
            0         1
  0 0.7685185 0.2314815
  1 0.4259259 0.5740741

fun.table(results, type = "confirmatory")

effect	term	estimate	std.error	statistic	p.value	conf.low	conf.high
fixed	(Intercept)	-1.27	0.18	-6.88	0	-1.63	-0.91
fixed	condition_amb	1.58	0.23	7.03	0	1.14	2.02

4.2.2.1.2 Inference condition

Choose ambiguous statement more in disagreement than agreement trials.

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree + (1 | participant)",
  data = df.exp2.infer)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 396.2555  408.5632 -195.1278  390.2555       444 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.028   
Number of obs: 447, groups:  participant, 56
Fixed Effects:
       (Intercept)  condition_disagree  
            -2.697               3.771

prop.table(table(df.exp2.infer$condition_disagree, df.exp2.infer$ambiguous_yes),
           margin = 1)

   
             0          1
  0 0.91071429 0.08928571
  1 0.29147982 0.70852018

fun.table(results, type = "confirmatory")

effect	term	estimate	std.error	statistic	p.value	conf.low	conf.high
fixed	(Intercept)	-2.70	0.31	-8.72	0	-3.30	-2.09
fixed	condition_disagree	3.77	0.35	10.69	0	3.08	4.46

4.2.3 Exploratory analysis

4.2.3.1 Trial type by age interaction

4.2.3.1.1 Prediction

results = fun.regression(
  formula = "dis_yes ~ 1 + condition_amb * age_continuous + (1 | participant)",
  data = df.exp2.predict)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dis_yes ~ 1 + condition_amb * age_continuous + (1 | participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 533.1134  553.4555 -261.5567  523.1134       427 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0.4888  
Number of obs: 432, groups:  participant, 54
Fixed Effects:
                 (Intercept)                 condition_amb  
                      0.3225                       -0.3813  
              age_continuous  condition_amb:age_continuous  
                     -0.1702                        0.2100

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	0.32	1.16	0.28	-1.95	2.60
fixed	condition_amb	-0.38	1.43	-0.27	-3.18	2.41
fixed	age_continuous	-0.17	0.12	-1.37	-0.41	0.07
fixed	condition_amb:age_continuous	0.21	0.15	1.39	-0.09	0.51

4.2.3.1.2 Inference

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree * age_continuous + (1 | participant)",
  data = df.exp2.infer)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree * age_continuous + (1 |  
    participant)
   Data: data
      AIC       BIC    logLik  deviance  df.resid 
 370.9069  391.4197 -180.4534  360.9069       442 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.188   
Number of obs: 447, groups:  participant, 56
Fixed Effects:
                      (Intercept)                 condition_disagree  
                           4.0689                            -7.5859  
                   age_continuous  condition_disagree:age_continuous  
                          -0.7699                             1.2725

fun.table(results)

effect	term	estimate	std.error	statistic	conf.low	conf.high
fixed	(Intercept)	4.07	2.20	1.85	-0.24	8.38
fixed	condition_disagree	-7.59	2.30	-3.30	-12.09	-3.08
fixed	age_continuous	-0.77	0.25	-3.05	-1.26	-0.27
fixed	condition_disagree:age_continuous	1.27	0.27	4.70	0.74	1.80

4.2.3.2 Moderation by age

4.2.3.2.1 Prediction condition

# from 7 to 11 years 
for(i in 7:11){
  cat(str_c("Age = ", i, "\n\n"))
  fun.regression(
    formula = "dis_yes ~ 1 + condition_amb + (1 | participant)",
    data = df.exp2.predict %>% 
      filter(age_group == i))
}

Age = 7

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dis_yes ~ 1 + condition_amb + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
124.0071 131.7002 -59.0036 118.0071       93 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.161   
Number of obs: 96, groups:  participant, 12
Fixed Effects:
  (Intercept)  condition_amb  
      -0.9926         1.2093  
Age = 8

boundary (singular) fit: see help('isSingular')

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dis_yes ~ 1 + condition_amb + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 93.6577 100.8037 -43.8288  87.6577       77 
Random effects:
 Groups      Name        Std.Dev. 
 participant (Intercept) 3.525e-08
Number of obs: 80, groups:  participant, 10
Fixed Effects:
  (Intercept)  condition_amb  
       -1.735          2.140  
optimizer (Nelder_Mead) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings 
Age = 9

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dis_yes ~ 1 + condition_amb + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
130.8159 138.5089 -62.4079 124.8159       93 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0.1557  
Number of obs: 96, groups:  participant, 12
Fixed Effects:
  (Intercept)  condition_amb  
       -0.793          1.132  
Age = 10

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dis_yes ~ 1 + condition_amb + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 94.7171 101.8631 -44.3585  88.7171       77 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0.3675  
Number of obs: 80, groups:  participant, 10
Fixed Effects:
  (Intercept)  condition_amb  
       -1.782          1.989  
Age = 11

boundary (singular) fit: see help('isSingular')

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: dis_yes ~ 1 + condition_amb + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 99.8731 107.0192 -46.9366  93.8731       77 
Random effects:
 Groups      Name        Std.Dev. 
 participant (Intercept) 1.804e-07
Number of obs: 80, groups:  participant, 10
Fixed Effects:
  (Intercept)  condition_amb  
       -1.386          1.792  
optimizer (Nelder_Mead) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings

4.2.3.2.2 Inference condition

# from 7 to 11 years 
for(i in 7:11){
  cat(str_c("Age = ", i, "\n\n"))
  fun.regression(
    formula = "ambiguous_yes ~ 1 + condition_disagree + (1 | participant)",
    data = df.exp2.infer %>% 
      filter(age_group == i))
}

Age = 7

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
120.7673 128.4603 -57.3836 114.7673       93 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0.8776  
Number of obs: 96, groups:  participant, 12
Fixed Effects:
       (Intercept)  condition_disagree  
            -1.410               1.197  
Age = 8

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
  88.856   96.549  -41.428   82.856       93 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 1.184   
Number of obs: 96, groups:  participant, 12
Fixed Effects:
       (Intercept)  condition_disagree  
            -2.911               3.917  
Age = 9

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 52.2821  59.9752 -23.1411  46.2821       93 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 3.168   
Number of obs: 96, groups:  participant, 12
Fixed Effects:
       (Intercept)  condition_disagree  
            -5.704               8.442  
Age = 10

boundary (singular) fit: see help('isSingular')

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 43.0340  50.1423 -18.5170  37.0340       76 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0       
Number of obs: 79, groups:  participant, 10
Fixed Effects:
       (Intercept)  condition_disagree  
            -2.944               5.429  
optimizer (Nelder_Mead) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings 
Age = 11

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 65.8653  73.0114 -29.9327  59.8653       77 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0.843   
Number of obs: 80, groups:  participant, 10
Fixed Effects:
       (Intercept)  condition_disagree  
            -3.241               4.510

4.2.3.2.3 Inference condition: First story only

Examine story 1 (trials 1 and 2) and story 4 (trials 7 and 8) among 7-year-olds.

# story 1, 7 year olds
df.exp2.infer.7.1 = df.exp2.infer %>%
  filter(age_group == 7 & 
          (trial == "trial 1" |trial == "trial 2"))

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree + (1 | participant)",
  data = df.exp2.infer.7.1)

boundary (singular) fit: see help('isSingular')

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 34.7724  38.3065 -14.3862  28.7724       21 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0       
Number of obs: 24, groups:  participant, 12
Fixed Effects:
       (Intercept)  condition_disagree  
           -0.6931             -0.4055  
optimizer (Nelder_Mead) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings

prop.table(table(df.exp2.infer.7.1$condition_disagree, df.exp2.infer.7.1$ambiguous_yes),
           margin = 1)

   
            0         1
  0 0.6666667 0.3333333
  1 0.7500000 0.2500000

fun.table(results, type = "confirmatory")

effect	term	estimate	std.error	statistic	p.value	conf.low	conf.high
fixed	(Intercept)	-0.69	0.61	-1.13	0.26	-1.89	0.51
fixed	condition_disagree	-0.41	0.91	-0.45	0.65	-2.18	1.37

# story 4, 7 year olds
df.exp2.infer.7.4 = df.exp2.infer %>%
  filter(age_group == 7 & 
          (trial == "trial 7" |trial == "trial 8"))

results = fun.regression(
  formula = "ambiguous_yes ~ 1 + condition_disagree + (1 | participant)",
  data = df.exp2.infer.7.4)

boundary (singular) fit: see help('isSingular')

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: ambiguous_yes ~ 1 + condition_disagree + (1 | participant)
   Data: data
     AIC      BIC   logLik deviance df.resid 
 35.7967  39.3308 -14.8983  29.7967       21 
Random effects:
 Groups      Name        Std.Dev.
 participant (Intercept) 0       
Number of obs: 24, groups:  participant, 12
Fixed Effects:
       (Intercept)  condition_disagree  
            -1.099               1.435  
optimizer (Nelder_Mead) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings

prop.table(table(df.exp2.infer.7.4$condition_disagree, df.exp2.infer.7.4$ambiguous_yes),
           margin = 1)

   
            0         1
  0 0.7500000 0.2500000
  1 0.4166667 0.5833333

fun.table(results, type = "confirmatory")

effect	term	estimate	std.error	statistic	p.value	conf.low	conf.high
fixed	(Intercept)	-1.10	0.67	-1.65	0.10	-2.41	0.21
fixed	condition_disagree	1.44	0.89	1.62	0.11	-0.30	3.17

4.2.4 Bayesian model

4.2.4.1 Prediction data

df.exp2.predict.prob = df.exp2.predict %>% 
  count(age_group, condition_amb_c, dis_yes) %>% 
  group_by(age_group, condition_amb_c) %>% 
  mutate(probability = n/sum(n)) %>% 
  ungroup() %>% 
  mutate(utterance = str_remove_all(condition_amb_c, " Trials"),
         utterance = factor(utterance,
                            levels = c("Unambiguous", "Ambiguous")),
         agreement = factor(dis_yes,
                            levels = c(0, 1),
                            labels = c("agree", "disagree"))) %>% 
  select(-c(condition_amb_c, dis_yes, n)) %>% 
  relocate(probability, .after = last_col()) %>%
  arrange(age_group, utterance, agreement)

4.2.4.2 Without softmax

utterance_prior = c(0.5, 0.5)

df.inference = df.exp2.predict.prob %>% 
    group_by(agreement, age_group) %>% 
    mutate(prior = utterance_prior) %>% 
    mutate(posterior = probability * prior / 
               sum(probability * prior)) %>% 
    ungroup()

df.model.posterior = df.inference %>% 
    rename(condition = agreement) %>% 
    mutate(condition = factor(condition,
                              levels = c("agree", "disagree"),
                              labels = c("Agreement Trials", "Disagreement Trials"))) %>% 
    filter(utterance == "Ambiguous")

4.2.4.3 One temperature parameter

age = 7:11

softmax = function(vec, temp = 3) {
    out = exp(vec*temp) / sum(exp(vec*temp))
    return(out)
}

df.data = df.exp2.infer %>% 
    count(age_group, condition_disagree_c, ambiguous_yes) %>% 
    group_by(age_group, condition_disagree_c) %>% 
    reframe(p = n/sum(n)) %>% 
    filter(row_number() %% 2 == 0) %>% 
    rename(agreement = condition_disagree_c) %>% 
    mutate(agreement = ifelse(agreement == "Agreement Trials", "agree", "disagree"))

fit_softmax = function(beta){
    df.prediction = df.inference %>% 
        filter(age_group %in% age) %>%
        select(age_group, utterance, agreement, posterior) %>% 
        pivot_wider(names_from = utterance,
                    values_from = posterior) %>% 
        rowwise() %>% 
        mutate(Unambiguous_soft = softmax(c(Unambiguous, Ambiguous),
                                          temp = beta)[1],
               Ambiguous_soft = softmax(c(Unambiguous, Ambiguous),
                                        temp = beta)[2]) %>% 
        select(age_group, agreement, prediction = Ambiguous_soft)
    
    # compute loss as squared error
    loss = df.data %>% 
        filter(age_group %in% age) %>% 
        left_join(df.prediction) %>% 
        mutate(loss = (p-prediction)^2) %>% 
        pull(loss) %>% 
        sum()
    
    return(loss)
}

# find best fitting softmax parameter
fit = optim(par = 0, 
            fn = fit_softmax)

# use the best parameter
beta = fit[[1]]

# model with softmax 
df.model.softmax = df.inference %>% 
    select(age_group, utterance, agreement, posterior) %>% 
    pivot_wider(names_from = utterance,
                values_from = posterior) %>% 
    rowwise() %>% 
    mutate(Unambiguous_soft = softmax(c(Unambiguous, Ambiguous),
                                      temp = beta)[1],
           Ambiguous_soft = softmax(c(Unambiguous, Ambiguous),
                                    temp = beta)[2]) %>% 
    select(age_group, condition = agreement, posterior = Ambiguous_soft) %>% 
    mutate(condition = factor(condition,
                              levels = c("agree", "disagree"),
                              labels = c("Agreement Trials", "Disagreement Trials")))

4.2.4.4 Linear increase of temperature parameter

fit linear model of softmax temperature as a function of age

# rm(beta)

fit_softmax_age = function(par){
  df.prediction = df.inference %>% 
    select(age_group, utterance, agreement, posterior) %>% 
    mutate(beta = par[1] + par[2] * age_group) %>%
    pivot_wider(names_from = utterance,
                values_from = posterior) %>% 
    rowwise() %>% 
    mutate(Unambiguous_soft = softmax(c(Unambiguous, Ambiguous),
                                      temp = beta)[1],
           Ambiguous_soft = softmax(c(Unambiguous, Ambiguous),
                                    temp = beta)[2]) %>% 
    select(age_group, agreement, prediction = Ambiguous_soft)
  
  # compute loss as squared error
  loss = df.data %>% 
    filter(age_group %in% age) %>% 
    left_join(df.prediction,
              by = c("age_group", "agreement")) %>% 
    mutate(loss = (p-prediction)^2) %>% 
    pull(loss) %>% 
    sum()
  
  return(loss)
}

# find best fitting softmax parameter
fit = optim(par = c(0, 0), 
            fn = fit_softmax_age)

df.model.softmax.linear = df.inference %>% 
    select(age_group, utterance, agreement, posterior) %>% 
    pivot_wider(names_from = utterance,
                values_from = posterior) %>% 
    mutate(beta = fit$par[1] + fit$par[2] * age_group) %>%
    rowwise() %>% 
    mutate(Unambiguous_soft = softmax(c(Unambiguous, Ambiguous),
                                      temp = beta)[1],
           Ambiguous_soft = softmax(c(Unambiguous, Ambiguous),
                                    temp = beta)[2]) %>% 
    select(age_group, condition = agreement, posterior = Ambiguous_soft) %>% 
    mutate(condition = factor(condition,
                              levels = c("agree", "disagree"),
                              labels = c("Agreement Trials", "Disagreement Trials")))

4.2.4.5 Model comparison

df.model.posterior %>% 
    mutate(name = "posterior") %>% 
    select(-c(utterance, probability, prior)) %>% 
    bind_rows(df.model.softmax %>% 
                  mutate(name = "softmax")) %>% 
    bind_rows(df.model.softmax.linear %>% 
                  mutate(name = "softmax increase")) %>% 
    pivot_wider(names_from = name,
                values_from = posterior) %>% 
    left_join(df.data %>% 
                  mutate(condition = factor(agreement,
                                            levels = c("agree", "disagree"),
                                            labels = c("Agreement Trials",
                                                       "Disagreement Trials"))) %>% 
                  select(-agreement),
              by = c("age_group", "condition")) %>% 
    summarize(
        r_posterior = cor(p, posterior),
        r_softmax = cor(p, softmax),
        r_softmaxincrease = cor(p, `softmax increase`),
        rmse_posterior = rmse(p, posterior),
        rmse_softmax = rmse(p, softmax),
        rmse_softmaxincrease = rmse(p, `softmax increase`)) %>% 
    pivot_longer(cols = everything(),
                 names_to = c("index", "name"),
                 names_sep = "_") %>% 
    pivot_wider(names_from = index,
                values_from = value) %>% 
    print_table()

name	r	rmse
posterior	0.96	0.21
softmax	0.96	0.17
softmaxincrease	0.98	0.15

4.3 PLOTS

4.3.1 Prediction

set.seed(1)

df.plot.individual = df.exp2.predict %>% 
    mutate(condition_amb = as.character(condition_amb)) %>% 
    group_by(participant, age_continuous, condition_amb) %>% 
    summarize(pct_dis = sum(dis_yes)/n()) 

df.age.means = df.plot.individual %>%
  distinct(participant, age_continuous) %>%
  mutate(age_group = floor(age_continuous)) %>%
  group_by(age_group) %>%
  summarize(age_mean = mean(age_continuous),
            n = str_c("n = ", n())) %>%
  ungroup()

df.plot.means = df.exp2.predict %>% 
  mutate(condition_amb = as.character(condition_amb)) %>% 
    group_by(participant, age_group, condition_amb) %>% 
    summarize(pct_dis = sum(dis_yes)/n()) %>% 
  group_by(age_group, condition_amb) %>% 
  reframe(response = smean.cl.boot(pct_dis),
          name = c("mean", "low", "high")) %>% 
  left_join(df.age.means,
            by = "age_group") %>% 
  pivot_wider(names_from = name,
              values_from = response) %>% 
  mutate(age_mean = ifelse(condition_amb == 0, age_mean - 0.05, age_mean + 0.05))

df.plot.text = df.plot.means %>% 
  distinct(age_group, n)


ggplot() + 
  geom_hline(yintercept = 0.5,
             linetype = 2,
             alpha = 0.1) + 
  geom_point(data = df.plot.individual,
             mapping = aes(x = age_continuous,
                           y = pct_dis,
                           color = condition_amb),
             alpha = 0.5,
             show.legend = T,
             shape = 16,
             size = 1.5) +
  geom_linerange(data = df.plot.means,
                 mapping = aes(x = age_mean,
                               y = mean,
                               ymin = low,
                               ymax = high),
                 color = "gray40") + 
  geom_point(data = df.plot.means,
             mapping = aes(x = age_mean,
                           y = mean,
                           fill = condition_amb),
             shape = 21,
             size = 3,
             show.legend = T) +
  geom_text(data = df.plot.text,
            mapping = aes(x = age_group + 0.5,
                          y = 1.05,
                          label = n),
            hjust = 0.5) + 
  scale_y_continuous(labels = percent) +
  labs(x = "Age (in years)",
       y = "% Predict Disagreement", 
       title = "Experiment 2: Prediction") + 
  coord_cartesian(xlim = c(7, 12),
                  ylim = c(0, 1),
                  clip = "off") + 
  scale_color_manual(name = "Trial Type",
                     labels = c("Unambiguous", "Ambiguous"),
                     values = c(l.color$unambiguous, l.color$ambiguous),
                     guide = guide_legend(reverse = T)) +
  scale_fill_manual(name = "Trial Type",
                    labels = c("Unambiguous", "Ambiguous"),
                    values = c(l.color$unambiguous, l.color$ambiguous),
                    guide = guide_legend(reverse = T)) +
  theme(plot.title = element_text(hjust = 0.5,
                                  vjust = 2,
                                  size = 18,
                                  face = "bold"),
        axis.title.y = element_markdown(color = l.color$disagreement),
        legend.position = "right")

ggsave(filename = "../figures/plots/exp2_prediction.pdf",
       width = 8,
       height = 4)

4.3.2 Inference

set.seed(1)

df.plot.individual = df.exp2.infer %>% 
    mutate(condition_disagree = as.character(condition_disagree)) %>% 
    group_by(participant, age_continuous, condition_disagree) %>% 
    summarize(pct_amb = sum(ambiguous_yes)/n())

df.age.means = df.plot.individual %>%
  distinct(participant, age_continuous) %>%
  mutate(age_group = floor(age_continuous)) %>%
  group_by(age_group) %>%
  summarize(age_mean = mean(age_continuous),
            n = str_c("n = ", n())) %>%
  ungroup()

df.plot.means = df.exp2.infer %>% 
  mutate(condition_disagree = as.character(condition_disagree)) %>% 
  group_by(participant, age_group, condition_disagree) %>% 
  summarize(pct_amb = sum(ambiguous_yes)/n()) %>% 
  group_by(age_group, condition_disagree) %>% 
  reframe(response = smean.cl.boot(pct_amb),
          name = c("mean", "low", "high")) %>% 
  left_join(df.age.means,
            by = "age_group") %>% 
  pivot_wider(names_from = name,
              values_from = response) %>% 
  mutate(age_mean = ifelse(condition_disagree == 0, age_mean - 0.05, age_mean + 0.05))

df.plot.text = df.plot.means %>% 
  distinct(age_group, n)

df.model = df.model.posterior %>% 
    mutate(name = "posterior") %>% 
    select(-c(utterance, probability, prior)) %>% 
    bind_rows(df.model.softmax %>% 
                  mutate(name = "softmax")) %>% 
    bind_rows(df.model.softmax.linear %>% 
                  mutate(name = "softmax increase")) %>% 
  mutate(condition_disagree = factor(condition,
                                     levels = c("Agreement Trials", 
                                                "Disagreement Trials"),
                                     labels = c(0,
                                                1))) %>% 
  left_join(df.age.means %>% 
              select(-n),
            by = "age_group") %>% 
  mutate(age_mean = ifelse(condition_disagree == 0,
                           age_mean - 0.05,
                           age_mean + 0.05))

ggplot() + 
  geom_hline(yintercept = 0.5,
             linetype = 2,
             alpha = 0.1) + 
  geom_point(data = df.plot.individual,
             mapping = aes(x = age_continuous,
                           y = pct_amb,
                           color = condition_disagree),
             alpha = 0.5,
             show.legend = T,
             shape = 16,
             size = 1.5) +
  geom_linerange(data = df.plot.means,
                 mapping = aes(x = age_mean,
                               y = mean,
                               ymin = low,
                               ymax = high),
                 color = "gray40",
                 show.legend = F) + 
  geom_point(data = df.plot.means,
             mapping = aes(x = age_mean,
                           y = mean,
                           fill = condition_disagree),
             shape = 21,
             size = 3,
             show.legend = F) +
  geom_point(data = df.model,
             mapping = aes(x = age_mean,
                           y = posterior,
                           shape = name,
                           fill = condition_disagree),
             size = 1.5,
             alpha = 0.5,
             show.legend = T) +
    geom_text(data = df.plot.text,
            mapping = aes(x = age_group + 0.5,
                          y = 1.05,
                          label = n),
            hjust = 0.5) + 
  scale_y_continuous(labels = percent) +
  labs(x = "Age (in years)",
       y = "% Infer Ambiguous Utterance", 
       title = "Experiment 2: Inference") + 
  coord_cartesian(xlim = c(7, 12),
                  ylim = c(0, 1),
                  clip = "off") + 
  scale_color_manual(name = "Trial Type",
                     labels = c("Agreement", "Disagreement"),
                     values = c(l.color$agreement, l.color$disagreement)) +
  scale_fill_manual(name = "Trial Type",
                    labels = c("Agreement", "Disagreement"),
                    values = c(l.color$agreement, l.color$disagreement)) +
  scale_shape_manual(name = "Model",
                    labels = c("posterior", "softmax", "softmax increase"),
                    values = c(21, 22, 23)) +
  theme(plot.title = element_text(hjust = 0.5,
                                  vjust = 2,
                                  size = 18,
                                  face = "bold"),
        axis.title.y = element_markdown(color = l.color$ambiguous),
        legend.position = "right") +
  guides(fill = guide_legend(override.aes = list(shape = 21,
                                                 size = 3,
                                                 alpha = 1),
                             reverse = T,
                             order = 1),
         shape = guide_legend(override.aes = list(fill = "white",
                                                  alpha = 1)),
         color = "none")

ggsave(filename = "../figures/plots/exp2_inference.pdf",
       width = 8,
       height = 4)

5 Session info

cite_packages(output = "paragraph",
              cite.tidyverse = TRUE,
              out.dir = ".")

We used R version 4.3.2 (R Core Team 2023) and the following R packages: bookdown v. 0.37 (Xie 2016, 2023a), broom.mixed v. 0.2.9.4 (Bolker and Robinson 2022), car v. 3.1.2 (Fox and Weisberg 2019), ggeffects v. 1.3.4 (Lüdecke 2018), ggtext v. 0.1.2 (Wilke and Wiernik 2022), Hmisc v. 5.1.1 (Harrell Jr 2023), kableExtra v. 1.3.4 (Zhu 2021), knitr v. 1.45 (Xie 2014, 2015, 2023b), lme4 v. 1.1.35.1 (Bates et al. 2015), Metrics v. 0.1.4 (Hamner and Frasco 2018), rmarkdown v. 2.25 (Xie, Allaire, and Grolemund 2018; Xie, Dervieux, and Riederer 2020; Allaire et al. 2023), rsample v. 1.2.0 (Frick et al. 2023), scales v. 1.3.0 (Wickham, Pedersen, and Seidel 2023), tidyverse v. 2.0.0 (Wickham et al. 2019), xtable v. 1.8.4 (Dahl et al. 2019).

sessionInfo()

R version 4.3.2 (2023-10-31)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Sonoma 14.4.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/Los_Angeles
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] lubridate_1.9.3     forcats_1.0.0       stringr_1.5.1      
 [4] dplyr_1.1.4         purrr_1.0.2         readr_2.1.4        
 [7] tidyr_1.3.0         tibble_3.2.1        ggplot2_3.4.4      
[10] tidyverse_2.0.0     ggtext_0.1.2        Hmisc_5.1-1        
[13] ggeffects_1.3.4     grateful_0.2.4      broom.mixed_0.2.9.4
[16] scales_1.3.0        Metrics_0.1.4       car_3.1-2          
[19] carData_3.0-5       knitr_1.45          kableExtra_1.3.4   
[22] xtable_1.8-4        rsample_1.2.0       lme4_1.1-35.1      
[25] Matrix_1.6-4       

loaded via a namespace (and not attached):
 [1] gridExtra_2.3      rlang_1.1.3        magrittr_2.0.3     furrr_0.3.1       
 [5] compiler_4.3.2     systemfonts_1.0.5  vctrs_0.6.5        rvest_1.0.3       
 [9] pkgconfig_2.0.3    crayon_1.5.2       fastmap_1.1.1      backports_1.4.1   
[13] labeling_0.4.3     utf8_1.2.4         rmarkdown_2.25     markdown_1.12     
[17] tzdb_0.4.0         nloptr_2.0.3       ragg_1.2.7         bit_4.0.5         
[21] xfun_0.41          cachem_1.0.8       jsonlite_1.8.8     highr_0.10        
[25] broom_1.0.5        parallel_4.3.2     cluster_2.1.6      R6_2.5.1          
[29] bslib_0.6.1        stringi_1.8.3      parallelly_1.37.0  boot_1.3-28.1     
[33] rpart_4.1.23       jquerylib_0.1.4    Rcpp_1.0.12        bookdown_0.37     
[37] base64enc_0.1-3    splines_4.3.2      nnet_7.3-19        timechange_0.2.0  
[41] tidyselect_1.2.0   rstudioapi_0.15.0  abind_1.4-5        yaml_2.3.8        
[45] codetools_0.2-19   listenv_0.9.1      lattice_0.22-5     withr_3.0.0       
[49] evaluate_0.23      foreign_0.8-86     future_1.33.1      xml2_1.3.6        
[53] pillar_1.9.0       renv_1.0.3         checkmate_2.3.1    generics_0.1.3    
[57] vroom_1.6.5        hms_1.1.3          commonmark_1.9.0   munsell_0.5.0     
[61] minqa_1.2.6        globals_0.16.2     glue_1.7.0         tools_4.3.2       
[65] data.table_1.14.10 webshot_0.5.5      grid_4.3.2         colorspace_2.1-0  
[69] nlme_3.1-164       htmlTable_2.4.2    Formula_1.2-5      cli_3.6.2         
[73] textshaping_0.3.7  fansi_1.0.6        viridisLite_0.4.2  svglite_2.1.3     
[77] gtable_0.3.4       sass_0.4.8         digest_0.6.34      htmlwidgets_1.6.4 
[81] farver_2.1.1       htmltools_0.5.7    lifecycle_1.0.4    httr_1.4.7        
[85] gridtext_0.1.5     bit64_4.0.5        MASS_7.3-60

Allaire, JJ, Yihui Xie, Christophe Dervieux, Jonathan McPherson, Javier Luraschi, Kevin Ushey, Aron Atkins, et al. 2023. rmarkdown: Dynamic Documents for r. https://github.com/rstudio/rmarkdown.

Bates, Douglas, Martin Mächler, Ben Bolker, and Steve Walker. 2015. “Fitting Linear Mixed-Effects Models Using lme4.” Journal of Statistical Software 67 (1): 1–48. https://doi.org/10.18637/jss.v067.i01.

Bolker, Ben, and David Robinson. 2022. broom.mixed: Tidying Methods for Mixed Models. https://CRAN.R-project.org/package=broom.mixed.

Dahl, David B., David Scott, Charles Roosen, Arni Magnusson, and Jonathan Swinton. 2019. xtable: Export Tables to LaTeX or HTML. https://CRAN.R-project.org/package=xtable.

Fox, John, and Sanford Weisberg. 2019. An R Companion to Applied Regression. Third. Thousand Oaks CA: Sage. https://socialsciences.mcmaster.ca/jfox/Books/Companion/.

Frick, Hannah, Fanny Chow, Max Kuhn, Michael Mahoney, Julia Silge, and Hadley Wickham. 2023. rsample: General Resampling Infrastructure. https://CRAN.R-project.org/package=rsample.

Hamner, Ben, and Michael Frasco. 2018. Metrics: Evaluation Metrics for Machine Learning. https://CRAN.R-project.org/package=Metrics.

Harrell Jr, Frank E. 2023. Hmisc: Harrell Miscellaneous. https://CRAN.R-project.org/package=Hmisc.

Lüdecke, Daniel. 2018. “ggeffects: Tidy Data Frames of Marginal Effects from Regression Models.” Journal of Open Source Software 3 (26): 772. https://doi.org/10.21105/joss.00772.

R Core Team. 2023. R: A Language and Environment for Statistical Computing. Vienna, Austria: R Foundation for Statistical Computing. https://www.R-project.org/.

Wickham, Hadley, Mara Averick, Jennifer Bryan, Winston Chang, Lucy D’Agostino McGowan, Romain François, Garrett Grolemund, et al. 2019. “Welcome to the tidyverse.” Journal of Open Source Software 4 (43): 1686. https://doi.org/10.21105/joss.01686.

Wickham, Hadley, Thomas Lin Pedersen, and Dana Seidel. 2023. scales: Scale Functions for Visualization. https://CRAN.R-project.org/package=scales.

Wilke, Claus O., and Brenton M. Wiernik. 2022. ggtext: Improved Text Rendering Support for “ggplot2”. https://CRAN.R-project.org/package=ggtext.

Xie, Yihui. 2014. “knitr: A Comprehensive Tool for Reproducible Research in R.” In Implementing Reproducible Computational Research, edited by Victoria Stodden, Friedrich Leisch, and Roger D. Peng. Chapman; Hall/CRC.

———. 2015. Dynamic Documents with R and Knitr. 2nd ed. Boca Raton, Florida: Chapman; Hall/CRC. https://yihui.org/knitr/.

———. 2016. bookdown: Authoring Books and Technical Documents with R Markdown. Boca Raton, Florida: Chapman; Hall/CRC. https://bookdown.org/yihui/bookdown.

———. 2023a. bookdown: Authoring Books and Technical Documents with r Markdown. https://github.com/rstudio/bookdown.

———. 2023b. knitr: A General-Purpose Package for Dynamic Report Generation in r. https://yihui.org/knitr/.

Xie, Yihui, J. J. Allaire, and Garrett Grolemund. 2018. R Markdown: The Definitive Guide. Boca Raton, Florida: Chapman; Hall/CRC. https://bookdown.org/yihui/rmarkdown.

Xie, Yihui, Christophe Dervieux, and Emily Riederer. 2020. R Markdown Cookbook. Boca Raton, Florida: Chapman; Hall/CRC. https://bookdown.org/yihui/rmarkdown-cookbook.

Zhu, Hao. 2021. kableExtra: Construct Complex Table with “kable” and Pipe Syntax. https://CRAN.R-project.org/package=kableExtra.

Children use disagreement to infer what happened

Jamie Amemiya, Gail D. Heyman & Tobias Gerstenberg

April 09, 2024