Prelims and libraries.
rm(list = ls())
#Load libraries
library(reshape2)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.3
library(bootstrap)
library(lme4)
## Loading required package: Matrix
library(knitr)
Functions and add some style elements for ggplot2
## number of unique subs
n.unique <- function (x) {
length(unique(x))
}
## for bootstrapping 95% confidence intervals
theta <- function(x,xdata) {mean(xdata[x])}
ci.low <- function(x) {
quantile(bootstrap(1:length(x), 1000, theta, x)$thetastar, .025)}
ci.high <- function(x) {
quantile(bootstrap(1:length(x), 1000, theta, x)$thetastar, .975)}
plot.style <- theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
legend.position = "right",
axis.line = element_line(colour = "black",size = .5),
axis.ticks = element_line(size = .5),
axis.title.x = element_text(vjust = -.5),
axis.title.y = element_text(angle = 90,vjust = 0.25))
Load in data
d.exp1 <- read.csv("data/exp1.csv")
d.exp1$condition <- factor(d.exp1$condition,
levels = c("noContext", "context"),
labels = c("None", "Target"))
d.exp1$sentence.type <- factor(d.exp1$sentence.type,
levels = c("positive", "negative"),
labels = c("Positive" , "Negative"))
d.exp1$neg.concept <- factor(d.exp1$neg.concept,
levels = c("something", "nothing"),
labels = c("Alternative", "Nonexistence"))
d.exp1$neg.syntax <- factor(d.exp1$neg.syntax,
levels = c("", "has no", "doesn't have"),
labels = c("", "has no X", "doesn't have X"))
Histogram of rating scale:
qplot(d.exp1$rating, geom = "histogram") + plot.style
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Plot mean ratings for every trial type (Table 4 in paper):
ms <- d.exp1 %>%
group_by(subid, condition, sentence.type, truth.value, neg.concept, neg.syntax) %>%
summarise(subm = mean(rating)) %>%
group_by(condition, sentence.type, truth.value, neg.concept, neg.syntax) %>%
summarise(rating = mean(subm),
high = ci.high(subm),
low = ci.low(subm))
ms$truth.value <- factor(ms$truth.value, levels=c(TRUE, FALSE), labels=c("True","False"))
kable(ms, digits = 2)
condition | sentence.type | truth.value | neg.concept | neg.syntax | rating | high | low |
---|---|---|---|---|---|---|---|
None | Positive | False | Alternative | 2.04 | 2.60 | 1.57 | |
None | Positive | False | Nonexistence | 2.51 | 3.06 | 1.96 | |
None | Positive | True | NA | 6.51 | 6.73 | 6.22 | |
None | Negative | False | NA | has no X | 1.40 | 1.66 | 1.19 |
None | Negative | False | NA | doesn’t have X | 2.00 | 2.51 | 1.57 |
None | Negative | True | Alternative | has no X | 4.15 | 4.68 | 3.65 |
None | Negative | True | Alternative | doesn’t have X | 4.83 | 5.33 | 4.29 |
None | Negative | True | Nonexistence | has no X | 4.78 | 5.20 | 4.32 |
None | Negative | True | Nonexistence | doesn’t have X | 5.29 | 5.69 | 4.85 |
Target | Positive | False | Alternative | 1.83 | 2.38 | 1.36 | |
Target | Positive | False | Nonexistence | 1.88 | 2.31 | 1.49 | |
Target | Positive | True | NA | 6.42 | 6.63 | 6.20 | |
Target | Negative | False | NA | has no X | 1.58 | 1.87 | 1.31 |
Target | Negative | False | NA | doesn’t have X | 1.89 | 2.38 | 1.44 |
Target | Negative | True | Alternative | has no X | 4.96 | 5.34 | 4.54 |
Target | Negative | True | Alternative | doesn’t have X | 5.74 | 6.10 | 5.32 |
Target | Negative | True | Nonexistence | has no X | 5.61 | 6.02 | 5.19 |
Target | Negative | True | Nonexistence | doesn’t have X | 6.34 | 6.63 | 5.99 |
ggplot(data = ms, aes(x = sentence.type, y = rating,
color = neg.concept, shape = neg.syntax)) +
geom_point(stat = "identity", position = position_dodge(width = .5)) +
facet_grid(truth.value ~ condition) +
geom_errorbar(aes(ymin = low, ymax = high), width = 0,
position = position_dodge(width = .5)) +
plot.style
Just look at negative sentences:
d.exp1.neg <- filter(d.exp1, sentence.type == "Negative")
ms <- d.exp1.neg %>%
group_by(subid, condition, truth.value, neg.concept, neg.syntax) %>%
summarise(subm = mean(rating)) %>%
group_by(condition, truth.value, neg.concept, neg.syntax) %>%
summarise(rating = mean(subm),
high = ci.high(subm),
low = ci.low(subm))
ms$truth.value <- factor(ms$truth.value, levels=c(TRUE, FALSE), labels=c("True","False"))
ggplot(data = ms, aes(x = condition, y = rating, fill = neg.concept)) +
geom_bar(stat = "identity", position = "dodge") +
facet_grid(truth.value ~ neg.syntax) +
geom_errorbar(aes(ymin = low, ymax = high), width = 0,
position = position_dodge(width = .9)) +
scale_fill_grey("Negation Type") +
xlab("Context") + ylab("Rating") +
plot.style
Just look at true negatives (Figure 2 in paper)
d.exp1.trueneg <- filter(d.exp1, sentence.type == "Negative" & truth.value == TRUE)
ms <- d.exp1.trueneg %>%
group_by(subid, condition, neg.concept, neg.syntax) %>%
summarise(subm = mean(rating)) %>%
group_by(condition, neg.concept, neg.syntax) %>%
summarise(rating = mean(subm),
high = ci.high(subm),
low = ci.low(subm))
ggplot(data = ms, aes(x = neg.concept, y = rating, fill = condition)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ neg.syntax) +
geom_errorbar(aes(ymin = low, ymax = high), width = 0,
position = position_dodge(width = .9)) +
scale_fill_grey("Negation Type") +
xlab("Context") + ylab("Rating") +
plot.style
All data:
#deviation coding
contrasts(d.exp1$sentence.type) <- rbind(.5, -.5)
contrasts(d.exp1$sentence.type)
## [,1]
## Positive 0.5
## Negative -0.5
contrasts(d.exp1$condition) <- rbind(.5, -.5)
contrasts(d.exp1$condition)
## [,1]
## None 0.5
## Target -0.5
contrasts(d.exp1$truth.value) <- rbind(.5, -.5)
contrasts(d.exp1$truth.value)
## [,1]
## FALSE 0.5
## TRUE -0.5
model.exp1.all <- lmer(rating ~ sentence.type * condition * truth.value +
(sentence.type * truth.value | subid) +
(sentence.type * truth.value | item),
data = d.exp1)
kable(summary(model.exp1.all)$coefficients, digits = 2)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 3.87 | 0.07 | 51.78 |
sentence.type1 | 0.80 | 0.08 | 9.60 |
condition1 | -0.11 | 0.14 | -0.79 |
truth.value1 | -3.94 | 0.18 | -21.96 |
sentence.type1:condition1 | 0.71 | 0.17 | 4.23 |
sentence.type1:truth.value1 | -0.89 | 0.21 | -4.24 |
condition1:truth.value1 | 0.59 | 0.35 | 1.68 |
sentence.type1:condition1:truth.value1 | -0.57 | 0.40 | -1.43 |
Just negative sentences:
contrasts(d.exp1.neg$condition) <- rbind(.5, -.5)
contrasts(d.exp1.neg$condition)
## [,1]
## None 0.5
## Target -0.5
contrasts(d.exp1.neg$truth.value) <- rbind(.5, -.5)
contrasts(d.exp1.neg$truth.value)
## [,1]
## FALSE 0.5
## TRUE -0.5
model.exp1.neg <- lmer(rating ~ condition * truth.value +
(truth.value | subid) +
(condition * truth.value | item),
data = d.exp1.neg)
kable(summary(model.exp1.neg)$coefficients, digits = 2)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 3.46 | 0.08 | 44.45 |
condition1 | -0.46 | 0.16 | -2.89 |
truth.value1 | -3.49 | 0.21 | -16.70 |
condition1:truth.value1 | 0.86 | 0.41 | 2.09 |
Just true negative sentences (Table 1 in paper):
contrasts(d.exp1.trueneg$condition) <- rbind(.5, -.5)
contrasts(d.exp1.trueneg$condition)
## [,1]
## None 0.5
## Target -0.5
contrasts(d.exp1.trueneg$neg.concept) <- rbind(.5, -.5)
contrasts(d.exp1.trueneg$neg.concept)
## [,1]
## Alternative 0.5
## Nonexistence -0.5
d.exp1.trueneg$neg.syntax <- factor(d.exp1.trueneg$neg.syntax)
contrasts(d.exp1.trueneg$neg.syntax) <- rbind(.5, -.5)
contrasts(d.exp1.trueneg$neg.syntax)
## [,1]
## has no X 0.5
## doesn't have X -0.5
model.exp1.trueneg <- lmer(rating ~ condition * neg.concept * neg.syntax +
(neg.concept * neg.syntax | subid) +
(neg.concept * neg.syntax | item),
data = d.exp1.trueneg)
kable(summary(model.exp1.trueneg)$coefficients, digits = 2)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 5.21 | 0.13 | 38.78 |
condition1 | -0.90 | 0.26 | -3.45 |
neg.concept1 | -0.60 | 0.12 | -5.05 |
neg.syntax1 | -0.68 | 0.17 | -3.99 |
condition1:neg.concept1 | 0.11 | 0.19 | 0.55 |
condition1:neg.syntax1 | 0.15 | 0.29 | 0.52 |
neg.concept1:neg.syntax1 | -0.11 | 0.18 | -0.64 |
condition1:neg.concept1:neg.syntax1 | -0.22 | 0.32 | -0.67 |
Load in data.
d.exp2 <- read.csv("data/exp2.csv")
d.exp2$agegroup <- factor(d.exp2$agegroup)
d.exp2$sent.type <- factor(d.exp2$sent.type,
levels = c("positive", "negative"),
labels = c("Positive", "Negative"))
d.exp2$condition <- factor(d.exp2$condition,
labels = c("None", "Target"))
Condense scale from 5-point to 3-point
d.exp2$resp2 <- 2
d.exp2[d.exp2$resp > 3,]$resp2 <- 3
d.exp2[d.exp2$resp < 3,]$resp2 <- 1
d.exp2$resp2 <- factor(d.exp2$resp2)
Exclude subjects who didn’t complete at least half of the trials
#Reject children who didn't complete at least 8 trials
ntrials <- d.exp2 %>%
group_by(subid) %>%
summarize(ntrials = n()) %>%
filter(ntrials < 8)
for (i in ntrials$subid) {
d.exp2 <- filter(d.exp2, subid != i)
}
Reject subjects who don’t understand scale (based on positive sentences):
reject <- d.exp2 %>%
filter(sent.type == "Positive") %>% #Only look at positive sentences
group_by(subid) %>%
mutate(total = n()) %>% #get total # of positive sentences child saw
group_by(subid, condition, truth, total, resp2) %>%
filter((truth == "True" & resp2 == 3) | (truth == "False" & resp2 == 1)) %>% #Get # "good" for true pos and "bad" for false pos
summarize(counts = n()) %>%
group_by(subid, condition, total) %>%
summarize(counts = sum(counts)) %>% #total # "correct" responses
mutate(prop = counts/total) %>% #proportion correct
filter(prop < .6) #reject kids who got < .6 "correct" (this allows for 2/6 "mistakes")
for (i in reject$subid) {
d.exp2 <- filter(d.exp2, subid != i)
}
Make sure there aren’t any kids who just used one side of scale. Reject kids who only chose a single data point
scaleUse <- aggregate(resp2 ~ subid, d.exp2, n.unique)
table(scaleUse$resp2) #Are any resp2 = 1
##
## 2 3
## 48 21
Categorize kids based on response type
tn_responses <- d.exp2 %>%
filter(sent.type == "Negative" & truth == "True") %>%
group_by(subid) %>%
mutate(total = n()) %>%
group_by(subid, condition, total, resp2) %>%
summarize(counts = n()) %>%
mutate(prop = counts/total)
category <- dcast(tn_responses, subid + condition ~ resp2)
## Using prop as value column: use value.var to override.
names(category) <- c("subid","condition","bad","neutral","good")
category[is.na(category)] <- 0
category$type <- "other"
#category[category$neutral > .6,]$type <- "tn_neutral"
category[category$bad > .6,]$type <- "tn_bad"
category[category$good > .6,]$type <- "tn_good"
cat_counts <- category %>%
group_by(condition, type) %>%
summarise(counts = n())
cat_counts$type <- factor(cat_counts$type, levels = c("tn_bad","tn_good","other"), labels = c("True Negatives = Bad", "True Negatives = Good", "Inconsistent/Other"))
ggplot(data = cat_counts, aes(x = condition, y = counts, fill = type)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_hue("Response Type") +
ylab("Count") + xlab("Context Condition") +
plot.style
Mean responses to all sentences (Table 5 in paper):
ms <- d.exp2 %>%
group_by(subid, condition, sent.type, truth) %>%
summarise(subm = mean(resp)) %>%
group_by(condition, sent.type, truth) %>%
summarise(m = mean(subm),
cih = ci.high(subm),
cil = ci.low(subm))
ms$truth <- factor(ms$truth, levels=c("True","False"))
kable(ms, digits = 2)
condition | sent.type | truth | m | cih | cil |
---|---|---|---|---|---|
None | Positive | False | 1.77 | 2.13 | 1.46 |
None | Positive | True | 4.59 | 4.79 | 4.38 |
None | Negative | False | 1.96 | 2.42 | 1.60 |
None | Negative | True | 2.72 | 3.20 | 2.28 |
Target | Positive | False | 1.48 | 1.78 | 1.23 |
Target | Positive | True | 4.61 | 4.80 | 4.40 |
Target | Negative | False | 2.18 | 2.67 | 1.73 |
Target | Negative | True | 3.58 | 4.01 | 3.14 |
ggplot(data = subset(ms),
aes(x = truth, y = m, fill = condition)) +
facet_grid(. ~ sent.type) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = cil, ymax = cih),
position = position_dodge(.9), width = 0) +
scale_fill_grey("") +
xlab("Context") + ylab("Response") +
scale_y_continuous(limits = c(0, 5), breaks = seq(1,5,1)) +
plot.style
Just negative trials (Figure 3 in paper):
ggplot(data = subset(ms, sent.type == "Negative"),
aes(x = truth, y = m, fill = condition)) +
#facet_grid(. ~ truth) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = cil, ymax = cih),
position = position_dodge(.9), width = 0) +
scale_fill_grey("") +
xlab("Context") + ylab("Response") +
scale_y_continuous(limits = c(0, 5), breaks = seq(1,5,1)) +
plot.style
Histogram of trial ratings (Figure 4 in paper):
truenegs <- filter(ms, truth == "True" & sent.type == "Negative")
#make df for histogram (for formatting reasons)
hist_data <- d.exp2 %>%
filter(truth == "True" & sent.type == "Negative") %>%
group_by(condition, resp) %>%
summarise(count = n())
ggplot(data = hist_data, aes(y = count, x = resp,
fill = condition)) +
geom_bar(width = .5, position = position_dodge(.6), stat = "identity") +
geom_point(data = truenegs, aes(x = m, y = c(44, 46), color = condition)) +
geom_segment(data = truenegs, aes(x = cil, xend = cih, y = c(44, 46), yend = c(44, 46), color = condition)) +
scale_fill_grey("Condition") + scale_color_grey("Condition") +
xlab("Response") + ylab("Count") +
plot.style
Basic continuous models:
Table 2 in paper:
contrasts(d.exp2$condition) <- rbind(.5, -.5)
contrasts(d.exp2$condition)
## [,1]
## None 0.5
## Target -0.5
contrasts(d.exp2$truth) <- rbind(.5, -.5)
contrasts(d.exp2$truth)
## [,1]
## False 0.5
## True -0.5
#maximal model (truth * condition | item) does not converge
#This is the maximal model that converges
model.noage <- lmer(resp ~ condition * truth
+ (truth|subid)
+ (truth|item),
data = filter(d.exp2, sent.type == "Negative"))
kable(summary(model.noage)$coefficients, digits = 2)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 2.61 | 0.11 | 23.46 |
condition1 | -0.55 | 0.22 | -2.46 |
truth1 | -1.07 | 0.24 | -4.48 |
condition1:truth1 | 0.61 | 0.48 | 1.28 |
Including age does not improve model fit:
model.age <- lmer(resp ~ condition * agegroup * truth
+ (truth|subid)
+ (truth|item),
data = filter(d.exp2, sent.type == "Negative"))
kable(summary(model.age)$coefficients, digits = 2)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 2.77 | 0.16 | 17.60 |
condition1 | -0.46 | 0.31 | -1.46 |
agegroup4 | -0.33 | 0.22 | -1.48 |
truth1 | -1.01 | 0.34 | -2.95 |
condition1:agegroup4 | -0.24 | 0.45 | -0.54 |
condition1:truth1 | 0.42 | 0.69 | 0.62 |
agegroup4:truth1 | -0.10 | 0.49 | -0.20 |
condition1:agegroup4:truth1 | 0.36 | 0.98 | 0.37 |
anova(model.age, model.noage)
## refitting model(s) with ML (instead of REML)
## Data: filter(d.exp2, sent.type == "Negative")
## Models:
## model.noage: resp ~ condition * truth + (truth | subid) + (truth | item)
## model.age: resp ~ condition * agegroup * truth + (truth | subid) + (truth |
## model.age: item)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## model.noage 11 2154.4 2204.0 -1066.2 2132.4
## model.age 15 2159.6 2227.1 -1064.8 2129.6 2.8619 4 0.5812
What about just true negs?
model.kids.trueneg <- lmer(resp ~ condition
+ (1 |subid)
+ (condition |item),
data = filter(d.exp2, sent.type == "Negative" & truth == "True"))
kable(summary(model.kids.trueneg)$coefficients, digits = 2)
Estimate | Std. Error | t value | |
---|---|---|---|
(Intercept) | 3.15 | 0.17 | 18.52 |
condition1 | -0.85 | 0.34 | -2.46 |
T-tests:
#All kids
subs <- aggregate(resp ~ subid + condition, d.exp2, mean)
t.test(resp ~ condition, subs)
##
## Welch Two Sample t-test
##
## data: resp by condition
## t = -2.2036, df = 66.996, p-value = 0.03099
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.78674329 -0.03888866
## sample estimates:
## mean in group None mean in group Target
## 2.998341 3.411157
#Three-year-olds
threes <- filter(d.exp2, agegroup == "3")
threes_subs <- aggregate(resp ~ subid + condition, threes, mean)
t.test(resp ~ condition, threes_subs)
##
## Welch Two Sample t-test
##
## data: resp by condition
## t = -1.1626, df = 30.945, p-value = 0.2539
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.8332835 0.2282204
## sample estimates:
## mean in group None mean in group Target
## 3.152014 3.454545
#Four-year-olds
fours <- filter(d.exp2, agegroup == "4")
fours_subs <- aggregate(resp ~ subid + condition, fours, mean)
t.test(resp ~ condition, fours_subs)
##
## Welch Two Sample t-test
##
## data: resp by condition
## t = -2.0693, df = 30.749, p-value = 0.04701
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.129489914 -0.008010086
## sample estimates:
## mean in group None mean in group Target
## 2.80625 3.37500