df.data = bind_rows(
read_csv(file = "../../../data/aggregate/hardint_pos.csv"),
read_csv(file = "../../../data/aggregate/hardint_neg.csv"),
read_csv(file = "../../../data/aggregate/softint_pos.csv"),
read_csv(file = "../../../data/aggregate/softint_neg.csv"),
read_csv(file = "../../../data/aggregate/fixedint_pos.csv"),
read_csv(file = "../../../data/aggregate/fixedint_neg.csv")) %>%
clean_names() %>%
mutate(causal_structure = str_to_lower(causal_structure),
experiment = str_remove(experiment, "int")) %>%
rename_with(.fn = ~str_remove_all(., "_percentage")) %>%
pivot_longer(cols = -c(causal_structure, outcome, experiment),
values_to = "probability") %>%
separate(col = name,
into = c("choice", "type")) %>%
mutate(across(.cols = -probability,
.fns = ~ as.factor(.)),
choice = factor(choice, levels = c("abnormal", "nopreference", "normal"))) %>%
mutate(probability = probability / 100)
df.intervention = df.data %>%
filter(type == "intervention") %>%
rename(intervention = experiment)
df.explanation = df.data %>%
filter(type == "explanation")
colnames(df.data)
[1] "causal_structure" "outcome" "experiment" "choice"
[5] "type" "probability"
fun.success = function(p_abnormal, p_normal, causal_structure, outcome){
if (causal_structure == "conjunctive"){
p = p_abnormal * p_normal
} else{
p = 1 - (1 - p_abnormal) * (1 - p_normal)
}
if (outcome == "negative"){
p = 1 - p
}
return(p)
}
causal_structure = c("conjunctive", "disjunctive")
outcome = c("positive", "negative")
df.model = expand_grid(causal_structure, outcome) %>%
mutate(p_abnormal = 0.2,
p_normal = 0.8,
int_hard_abnormal = ifelse(outcome == "positive", 1, 0),
int_hard_normal = ifelse(outcome == "positive", 1, 0),
int_soft_abnormal = ifelse(outcome == "positive",
p_abnormal + 0.2,
p_abnormal - 0.2),
int_soft_normal = ifelse(outcome == "positive",
p_normal + 0.2,
p_normal - 0.2),
int_fixed_abnormal = ifelse(outcome == "positive",
0.9,
0.1),
int_fixed_normal = ifelse(outcome == "positive",
0.9,
0.1),
p_success = pmap_dbl(.l = list(p_abnormal,
p_normal,
causal_structure,
outcome),
.f = ~ fun.success(..1, ..2, ..3, ..4)),
p_success_int_hard_abnormal = pmap_dbl(.l = list(int_hard_abnormal,
p_normal,
causal_structure,
outcome),
.f = ~ fun.success(..1, ..2, ..3, ..4)),
p_success_int_hard_normal = pmap_dbl(.l = list(p_abnormal,
int_hard_normal,
causal_structure,
outcome),
.f = ~ fun.success(..1, ..2, ..3, ..4)),
p_success_int_soft_abnormal = pmap_dbl(.l = list(int_soft_abnormal,
p_normal,
causal_structure,
outcome),
.f = ~ fun.success(..1, ..2, ..3, ..4)),
p_success_int_soft_normal = pmap_dbl(.l = list(p_abnormal,
int_soft_normal,
causal_structure,
outcome),
.f = ~ fun.success(..1, ..2, ..3, ..4)),
p_success_int_fixed_abnormal = pmap_dbl(.l = list(int_fixed_abnormal,
p_normal,
causal_structure,
outcome),
.f = ~ fun.success(..1, ..2, ..3, ..4)),
p_success_int_fixed_normal = pmap_dbl(.l = list(p_abnormal,
int_fixed_normal,
causal_structure,
outcome),
.f = ~ fun.success(..1, ..2, ..3, ..4)))
# gives expected value for each intervention
df.choice = df.model %>%
select(causal_structure, outcome,
contains("p_success_int")) %>%
pivot_longer(-c(causal_structure, outcome)) %>%
mutate(name = str_remove(name, "p_success_int_")) %>%
separate(name, into = c("intervention", "target")) %>%
pivot_wider(names_from = target,
values_from = value) %>%
arrange(intervention, causal_structure) %>%
relocate(intervention) %>%
mutate(nopreference = 0.5 * abnormal + 0.5 * normal) %>%
pivot_longer(c(abnormal, normal, nopreference),
names_to = "choice") %>%
mutate(choice = factor(choice, levels = c("abnormal", "nopreference", "normal")),
across(.cols = c(intervention, causal_structure, outcome),
.fns = ~ as.factor(.)))
fun.fit_temperature = function(df_data, df_prediction, temperature){
df_prediction %>%
group_by(intervention, causal_structure, outcome) %>%
mutate(prediction = fun.softmax(value, temp = temperature)) %>%
ungroup() %>%
left_join(df_data,
by = c("intervention", "causal_structure", "outcome", "choice")) %>%
summarize(loss = sum((prediction - probability) ^ 2)) %>%
pull(loss)
}
fit.temperature = optim(par = 10,
fn = fun.fit_temperature,
method = "L-BFGS-B",
lower = 0,
upper = 100,
df_data = df.intervention,
df_prediction = df.choice)
print(fit.temperature$par)
[1] 18.97389
df.prediction_intervention = df.choice %>%
group_by(intervention, causal_structure, outcome) %>%
mutate(prediction = fun.softmax(value, temp = fit.temperature$par)) %>%
left_join(df.data %>%
filter(type == "intervention") %>%
select(-type) %>%
rename(intervention = experiment),
by = c("causal_structure", "outcome", "intervention", "choice"))
df.prediction_explanation = df.choice %>%
group_by(intervention, causal_structure, outcome) %>%
mutate(truth = ifelse(choice == "nopreference", 1, 0)) %>%
left_join(df.data %>%
filter(type == "explanation") %>%
select(-type) %>%
rename(intervention = experiment),
by = c("causal_structure", "outcome", "intervention", "choice")) %>%
ungroup()
fun.fit_params = function(params, df_prediction){
weight <- params[1]
temperature <- params[2]
df_prediction %>%
group_by(intervention, causal_structure, outcome) %>%
mutate(prediction = fun.softmax(weight * value + (1 - weight) * truth, temp = temperature)) %>%
ungroup() %>%
summarize(loss = sum((prediction - probability) ^ 2)) %>%
pull(loss)
}
fun.fit_params_intervention_only = function(params, df_prediction){
temperature <- params[1]
df_prediction %>%
group_by(intervention, causal_structure, outcome) %>%
mutate(prediction = fun.softmax(value, temp = temperature)) %>%
ungroup() %>%
summarize(loss = sum((prediction - probability) ^ 2)) %>%
pull(loss)
}
fun.fit_params_truth_only = function(params, df_prediction){
temperature <- params[1]
df_prediction %>%
group_by(intervention, causal_structure, outcome) %>%
mutate(prediction = fun.softmax(truth, temp = temperature)) %>%
ungroup() %>%
summarize(loss = sum((prediction - probability) ^ 2)) %>%
pull(loss)
}
initial_params <- c(weight = 0.5, temperature = 10)
lower_bounds <- c(weight = 0, temperature = 0)
upper_bounds <- c(weight = 1, temperature = 100)
fit.params <- optim(par = initial_params,
fn = fun.fit_params,
method = "L-BFGS-B",
lower = lower_bounds,
upper = upper_bounds,
df_prediction = df.prediction_explanation)
print(fit.params$par)
weight temperature
0.8420403 3.5050364
initial_params <- c(temperature = 10)
lower_bounds <- c(temperature = 0)
upper_bounds <- c(temperature = 100)
fit.params_intervention_only <- optim(par = initial_params,
fn = fun.fit_params_intervention_only,
method = "L-BFGS-B",
lower = lower_bounds,
upper = upper_bounds,
df_prediction = df.prediction_explanation)
print(fit.params_intervention_only$par)
temperature
2.097383
initial_params <- c(temperature = 10)
lower_bounds <- c(temperature = 0)
upper_bounds <- c(temperature = 100)
fit.params_truth_only <- optim(par = initial_params,
fn = fun.fit_params_truth_only,
method = "L-BFGS-B",
lower = lower_bounds,
upper = upper_bounds,
df_prediction = df.prediction_explanation)
print(fit.params_truth_only$par)
temperature
0.4546058
df.prediction_explanation = df.prediction_explanation %>%
group_by(intervention, causal_structure, outcome) %>%
mutate(prediction = fun.softmax(fit.params$par[1] * value + (1 - fit.params$par[1]) * truth, temp = fit.params$par[2])) %>%
ungroup()
write.csv(df.prediction_explanation, file = "../../../data/model/explanation_predictions.csv", row.names = FALSE)
write.csv(df.prediction_intervention, file = "../../../data/model/intervention_predictions.csv", row.names = FALSE)
df.prediction_explanation_intervention_only = df.prediction_explanation %>%
group_by(intervention, causal_structure, outcome) %>%
mutate(prediction = fun.softmax(value, temp = fit.params_intervention_only$par[1])) %>%
ungroup()
write.csv(df.prediction_explanation_intervention_only, file = "../../../data/model/explanation_predictions_intervention_only.csv", row.names = FALSE)
df.prediction_explanation_truth_only = df.prediction_explanation %>%
group_by(intervention, causal_structure, outcome) %>%
mutate(prediction = fun.softmax(truth, temp = fit.params_truth_only$par[1])) %>%
ungroup()
write.csv(df.prediction_explanation_truth_only, file = "../../../data/model/explanation_predictions_truth_only.csv", row.names = FALSE)
R version 4.3.2 (2023-10-31)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Sonoma 14.5
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/Los_Angeles
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[5] purrr_1.0.2 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
[9] ggplot2_3.4.4 tidyverse_2.0.0 Metrics_0.1.4 patchwork_1.2.0
[13] janitor_2.2.0 knitr_1.45
loaded via a namespace (and not attached):
[1] sass_0.4.8 utf8_1.2.4 generics_0.1.3 stringi_1.8.3
[5] hms_1.1.3 digest_0.6.34 magrittr_2.0.3 evaluate_0.23
[9] grid_4.3.2 timechange_0.2.0 bookdown_0.37 fastmap_1.1.1
[13] jsonlite_1.8.8 fansi_1.0.6 scales_1.3.0 jquerylib_0.1.4
[17] cli_3.6.2 crayon_1.5.2 rlang_1.1.3 bit64_4.0.5
[21] munsell_0.5.0 withr_3.0.0 cachem_1.0.8 yaml_2.3.8
[25] parallel_4.3.2 tools_4.3.2 tzdb_0.4.0 colorspace_2.1-0
[29] vctrs_0.6.5 R6_2.5.1 lifecycle_1.0.4 snakecase_0.11.1
[33] bit_4.0.5 vroom_1.6.5 pkgconfig_2.0.3 pillar_1.9.0
[37] bslib_0.6.1 gtable_0.3.4 glue_1.7.0 xfun_0.41
[41] tidyselect_1.2.0 rstudioapi_0.15.0 htmltools_0.5.7 rmarkdown_2.25
[45] compiler_4.3.2