Prelims and libraries.
rm(list = ls())
#Load libraries
library(reshape2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.3
library(bootstrap)
library(lme4)
## Loading required package: Matrix
library(knitr)
Functions and add some style elements for ggplot2
## number of unique subs
n.unique <- function (x) {
  length(unique(x))
}
## for bootstrapping 95% confidence intervals
theta <- function(x,xdata) {mean(xdata[x])}
ci.low <- function(x) {
  quantile(bootstrap(1:length(x), 1000, theta, x)$thetastar, .025)}
ci.high <- function(x) {
  quantile(bootstrap(1:length(x), 1000, theta, x)$thetastar, .975)}
plot.style <- theme_bw() + 
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(), 
        legend.position = "right", 
        axis.line = element_line(colour = "black",size = .5), 
        axis.ticks = element_line(size = .5), 
        axis.title.x = element_text(vjust = -.5), 
        axis.title.y = element_text(angle = 90,vjust = 0.25))
Load in data
d.exp1 <- read.csv("data/exp1.csv")
d.exp1$condition <- factor(d.exp1$condition, 
                           levels = c("noContext", "context"), 
                           labels = c("None", "Target"))
d.exp1$sentence.type <- factor(d.exp1$sentence.type, 
                               levels = c("positive", "negative"), 
                               labels = c("Positive" , "Negative"))
d.exp1$neg.concept <- factor(d.exp1$neg.concept, 
                             levels = c("something", "nothing"),
                             labels = c("Alternative",  "Nonexistence"))
d.exp1$neg.syntax <- factor(d.exp1$neg.syntax, 
                            levels = c("", "has no", "doesn't have"), 
                            labels = c("", "has no X", "doesn't have X"))
Histogram of rating scale:
qplot(d.exp1$rating, geom = "histogram") + plot.style
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Plot mean ratings for every trial type (Table 4 in paper):
ms <- d.exp1 %>%
  group_by(subid, condition, sentence.type, truth.value, neg.concept, neg.syntax) %>%
  summarise(subm = mean(rating)) %>%
  group_by(condition, sentence.type, truth.value, neg.concept, neg.syntax) %>%
  summarise(rating = mean(subm),
            high = ci.high(subm),
            low = ci.low(subm))
ms$truth.value <- factor(ms$truth.value, levels=c(TRUE, FALSE), labels=c("True","False"))
kable(ms, digits = 2)
| condition | sentence.type | truth.value | neg.concept | neg.syntax | rating | high | low | 
|---|---|---|---|---|---|---|---|
| None | Positive | False | Alternative | 2.04 | 2.60 | 1.57 | |
| None | Positive | False | Nonexistence | 2.51 | 3.06 | 1.96 | |
| None | Positive | True | NA | 6.51 | 6.73 | 6.22 | |
| None | Negative | False | NA | has no X | 1.40 | 1.66 | 1.19 | 
| None | Negative | False | NA | doesn’t have X | 2.00 | 2.51 | 1.57 | 
| None | Negative | True | Alternative | has no X | 4.15 | 4.68 | 3.65 | 
| None | Negative | True | Alternative | doesn’t have X | 4.83 | 5.33 | 4.29 | 
| None | Negative | True | Nonexistence | has no X | 4.78 | 5.20 | 4.32 | 
| None | Negative | True | Nonexistence | doesn’t have X | 5.29 | 5.69 | 4.85 | 
| Target | Positive | False | Alternative | 1.83 | 2.38 | 1.36 | |
| Target | Positive | False | Nonexistence | 1.88 | 2.31 | 1.49 | |
| Target | Positive | True | NA | 6.42 | 6.63 | 6.20 | |
| Target | Negative | False | NA | has no X | 1.58 | 1.87 | 1.31 | 
| Target | Negative | False | NA | doesn’t have X | 1.89 | 2.38 | 1.44 | 
| Target | Negative | True | Alternative | has no X | 4.96 | 5.34 | 4.54 | 
| Target | Negative | True | Alternative | doesn’t have X | 5.74 | 6.10 | 5.32 | 
| Target | Negative | True | Nonexistence | has no X | 5.61 | 6.02 | 5.19 | 
| Target | Negative | True | Nonexistence | doesn’t have X | 6.34 | 6.63 | 5.99 | 
ggplot(data = ms, aes(x = sentence.type, y = rating, 
      color = neg.concept, shape = neg.syntax)) +
  geom_point(stat = "identity", position = position_dodge(width = .5)) + 
  facet_grid(truth.value ~ condition) +
  geom_errorbar(aes(ymin = low, ymax = high), width = 0,
                position = position_dodge(width = .5)) + 
  plot.style
Just look at negative sentences:
d.exp1.neg <- filter(d.exp1, sentence.type == "Negative")
ms <- d.exp1.neg %>%
  group_by(subid, condition, truth.value, neg.concept, neg.syntax) %>%
  summarise(subm = mean(rating)) %>%
  group_by(condition, truth.value, neg.concept, neg.syntax) %>%
  summarise(rating = mean(subm),
            high = ci.high(subm),
            low = ci.low(subm))
ms$truth.value <- factor(ms$truth.value, levels=c(TRUE, FALSE), labels=c("True","False"))
ggplot(data = ms, aes(x = condition, y = rating, fill = neg.concept)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(truth.value ~ neg.syntax) +
  geom_errorbar(aes(ymin = low, ymax = high), width = 0,
                position = position_dodge(width = .9)) + 
  scale_fill_grey("Negation Type") + 
  xlab("Context") + ylab("Rating") + 
  plot.style
Just look at true negatives (Figure 2 in paper)
d.exp1.trueneg <- filter(d.exp1, sentence.type ==  "Negative" & truth.value == TRUE)
ms <- d.exp1.trueneg %>%
  group_by(subid, condition, neg.concept, neg.syntax) %>%
  summarise(subm = mean(rating)) %>%
  group_by(condition, neg.concept, neg.syntax) %>%
  summarise(rating = mean(subm),
            high = ci.high(subm),
            low = ci.low(subm))
ggplot(data = ms, aes(x = neg.concept, y = rating, fill = condition)) +
  geom_bar(stat = "identity", position = "dodge") +  
  facet_wrap(~ neg.syntax) + 
  geom_errorbar(aes(ymin = low, ymax = high), width = 0,
                position = position_dodge(width = .9)) + 
  scale_fill_grey("Negation Type") + 
  xlab("Context") + ylab("Rating") + 
  plot.style
All data:
#deviation coding
contrasts(d.exp1$sentence.type) <- rbind(.5, -.5)
contrasts(d.exp1$sentence.type)
##          [,1]
## Positive  0.5
## Negative -0.5
contrasts(d.exp1$condition) <- rbind(.5, -.5)
contrasts(d.exp1$condition)
##        [,1]
## None    0.5
## Target -0.5
contrasts(d.exp1$truth.value) <- rbind(.5, -.5)
contrasts(d.exp1$truth.value)
##       [,1]
## FALSE  0.5
## TRUE  -0.5
model.exp1.all <- lmer(rating ~ sentence.type * condition * truth.value +
                         (sentence.type * truth.value | subid) +
                         (sentence.type * truth.value | item), 
                       data = d.exp1)
kable(summary(model.exp1.all)$coefficients, digits = 2)
| Estimate | Std. Error | t value | |
|---|---|---|---|
| (Intercept) | 3.87 | 0.07 | 51.78 | 
| sentence.type1 | 0.80 | 0.08 | 9.60 | 
| condition1 | -0.11 | 0.14 | -0.79 | 
| truth.value1 | -3.94 | 0.18 | -21.96 | 
| sentence.type1:condition1 | 0.71 | 0.17 | 4.23 | 
| sentence.type1:truth.value1 | -0.89 | 0.21 | -4.24 | 
| condition1:truth.value1 | 0.59 | 0.35 | 1.68 | 
| sentence.type1:condition1:truth.value1 | -0.57 | 0.40 | -1.43 | 
Just negative sentences:
contrasts(d.exp1.neg$condition) <- rbind(.5, -.5)
contrasts(d.exp1.neg$condition)
##        [,1]
## None    0.5
## Target -0.5
contrasts(d.exp1.neg$truth.value) <- rbind(.5, -.5)
contrasts(d.exp1.neg$truth.value)
##       [,1]
## FALSE  0.5
## TRUE  -0.5
model.exp1.neg <- lmer(rating ~ condition * truth.value +
                         (truth.value | subid) + 
                         (condition * truth.value | item), 
                       data = d.exp1.neg)
kable(summary(model.exp1.neg)$coefficients, digits = 2)
| Estimate | Std. Error | t value | |
|---|---|---|---|
| (Intercept) | 3.46 | 0.08 | 44.45 | 
| condition1 | -0.46 | 0.16 | -2.89 | 
| truth.value1 | -3.49 | 0.21 | -16.70 | 
| condition1:truth.value1 | 0.86 | 0.41 | 2.09 | 
Just true negative sentences (Table 1 in paper):
contrasts(d.exp1.trueneg$condition) <- rbind(.5, -.5)
contrasts(d.exp1.trueneg$condition)
##        [,1]
## None    0.5
## Target -0.5
contrasts(d.exp1.trueneg$neg.concept) <- rbind(.5, -.5)
contrasts(d.exp1.trueneg$neg.concept)
##              [,1]
## Alternative   0.5
## Nonexistence -0.5
d.exp1.trueneg$neg.syntax <- factor(d.exp1.trueneg$neg.syntax)
contrasts(d.exp1.trueneg$neg.syntax) <- rbind(.5, -.5)
contrasts(d.exp1.trueneg$neg.syntax)
##                [,1]
## has no X        0.5
## doesn't have X -0.5
model.exp1.trueneg <- lmer(rating ~ condition * neg.concept * neg.syntax +
                             (neg.concept * neg.syntax | subid) + 
                             (neg.concept * neg.syntax | item), 
                           data = d.exp1.trueneg)
kable(summary(model.exp1.trueneg)$coefficients, digits = 2)
| Estimate | Std. Error | t value | |
|---|---|---|---|
| (Intercept) | 5.21 | 0.13 | 38.78 | 
| condition1 | -0.90 | 0.26 | -3.45 | 
| neg.concept1 | -0.60 | 0.12 | -5.05 | 
| neg.syntax1 | -0.68 | 0.17 | -3.99 | 
| condition1:neg.concept1 | 0.11 | 0.19 | 0.55 | 
| condition1:neg.syntax1 | 0.15 | 0.29 | 0.52 | 
| neg.concept1:neg.syntax1 | -0.11 | 0.18 | -0.64 | 
| condition1:neg.concept1:neg.syntax1 | -0.22 | 0.32 | -0.67 | 
Load in data.
d.exp2 <- read.csv("data/exp2.csv")
d.exp2$agegroup <- factor(d.exp2$agegroup)
d.exp2$sent.type <- factor(d.exp2$sent.type, 
                           levels = c("positive", "negative"), 
                           labels = c("Positive", "Negative"))
d.exp2$condition <- factor(d.exp2$condition, 
                           labels = c("None", "Target"))
Condense scale from 5-point to 3-point
d.exp2$resp2 <- 2
d.exp2[d.exp2$resp > 3,]$resp2 <- 3
d.exp2[d.exp2$resp < 3,]$resp2 <- 1
d.exp2$resp2 <- factor(d.exp2$resp2)
Exclude subjects who didn’t complete at least half of the trials
#Reject children who didn't complete at least 8 trials
ntrials <- d.exp2 %>%
  group_by(subid) %>%
  summarize(ntrials = n()) %>%
  filter(ntrials < 8)
for (i in ntrials$subid) {
  d.exp2 <- filter(d.exp2, subid != i)
}
Reject subjects who don’t understand scale (based on positive sentences):
reject <- d.exp2 %>%
  filter(sent.type == "Positive") %>% #Only look at positive sentences
  group_by(subid) %>%
  mutate(total = n()) %>% #get total # of positive sentences child saw
  group_by(subid, condition, truth, total, resp2) %>%
  filter((truth == "True" & resp2 == 3) | (truth == "False" & resp2 == 1)) %>% #Get # "good" for true pos and "bad" for false pos
  summarize(counts = n()) %>% 
  group_by(subid, condition, total) %>%
  summarize(counts = sum(counts)) %>% #total # "correct" responses
  mutate(prop = counts/total) %>% #proportion correct
  filter(prop < .6) #reject kids who got < .6 "correct" (this allows for 2/6 "mistakes")
for (i in reject$subid) {
  d.exp2 <- filter(d.exp2, subid != i)
}
Make sure there aren’t any kids who just used one side of scale. Reject kids who only chose a single data point
scaleUse <- aggregate(resp2 ~ subid, d.exp2, n.unique)
table(scaleUse$resp2) #Are any resp2 = 1
## 
##  2  3 
## 48 21
Categorize kids based on response type
tn_responses <- d.exp2 %>%
  filter(sent.type == "Negative" & truth == "True") %>%
  group_by(subid) %>%
  mutate(total = n()) %>%
  group_by(subid, condition, total, resp2) %>%
  summarize(counts = n()) %>%
  mutate(prop = counts/total)
category <- dcast(tn_responses, subid + condition ~ resp2)
## Using prop as value column: use value.var to override.
names(category) <- c("subid","condition","bad","neutral","good")
category[is.na(category)] <- 0
category$type <- "other"
#category[category$neutral > .6,]$type <- "tn_neutral"
category[category$bad > .6,]$type <- "tn_bad"
category[category$good > .6,]$type <- "tn_good"
cat_counts <- category %>%
  group_by(condition, type) %>%
  summarise(counts = n())
cat_counts$type <- factor(cat_counts$type, levels = c("tn_bad","tn_good","other"), labels = c("True Negatives = Bad", "True Negatives = Good", "Inconsistent/Other"))
ggplot(data = cat_counts, aes(x = condition, y = counts, fill = type)) +
  geom_bar(stat = "identity", position = "dodge") + 
  scale_fill_hue("Response Type") +
  ylab("Count") + xlab("Context Condition") +
  plot.style
Mean responses to all sentences (Table 5 in paper):
ms <- d.exp2 %>%
  group_by(subid, condition, sent.type, truth) %>%
  summarise(subm = mean(resp)) %>%
  group_by(condition, sent.type, truth) %>%
  summarise(m = mean(subm),
            cih = ci.high(subm),
            cil = ci.low(subm))
ms$truth <- factor(ms$truth, levels=c("True","False"))
kable(ms, digits = 2)
| condition | sent.type | truth | m | cih | cil | 
|---|---|---|---|---|---|
| None | Positive | False | 1.77 | 2.13 | 1.46 | 
| None | Positive | True | 4.59 | 4.79 | 4.38 | 
| None | Negative | False | 1.96 | 2.42 | 1.60 | 
| None | Negative | True | 2.72 | 3.20 | 2.28 | 
| Target | Positive | False | 1.48 | 1.78 | 1.23 | 
| Target | Positive | True | 4.61 | 4.80 | 4.40 | 
| Target | Negative | False | 2.18 | 2.67 | 1.73 | 
| Target | Negative | True | 3.58 | 4.01 | 3.14 | 
ggplot(data = subset(ms), 
      aes(x = truth, y = m, fill =  condition)) + 
  facet_grid(. ~ sent.type) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = cil, ymax = cih), 
                position = position_dodge(.9), width = 0) + 
  scale_fill_grey("") +
  xlab("Context") + ylab("Response") +
  scale_y_continuous(limits = c(0, 5), breaks = seq(1,5,1)) +
  plot.style
Just negative trials (Figure 3 in paper):
ggplot(data = subset(ms, sent.type == "Negative"), 
      aes(x = truth, y = m, fill = condition)) + 
  #facet_grid(. ~ truth) + 
  geom_bar(stat = "identity", position = "dodge") + 
  geom_errorbar(aes(ymin = cil, ymax = cih), 
                position = position_dodge(.9), width = 0) + 
  scale_fill_grey("") +
  xlab("Context") + ylab("Response") +
  scale_y_continuous(limits = c(0, 5), breaks = seq(1,5,1)) +
  plot.style
Histogram of trial ratings (Figure 4 in paper):
truenegs <- filter(ms, truth == "True" & sent.type ==  "Negative")
#make df for histogram (for formatting reasons)
hist_data <- d.exp2 %>%
  filter(truth == "True" & sent.type == "Negative") %>%
  group_by(condition, resp) %>%
  summarise(count = n())
ggplot(data = hist_data, aes(y = count, x = resp, 
      fill = condition)) +
  geom_bar(width = .5, position = position_dodge(.6), stat = "identity") +
  geom_point(data = truenegs, aes(x = m, y = c(44, 46), color = condition)) + 
  geom_segment(data = truenegs, aes(x = cil, xend = cih, y = c(44, 46), yend = c(44, 46), color = condition)) + 
  scale_fill_grey("Condition") + scale_color_grey("Condition") +
  xlab("Response") + ylab("Count") +
  plot.style
Basic continuous models:
Table 2 in paper:
contrasts(d.exp2$condition) <- rbind(.5, -.5)
contrasts(d.exp2$condition)
##        [,1]
## None    0.5
## Target -0.5
contrasts(d.exp2$truth) <- rbind(.5, -.5)
contrasts(d.exp2$truth)
##       [,1]
## False  0.5
## True  -0.5
#maximal model (truth * condition | item) does not converge
#This is the maximal model that converges
model.noage <- lmer(resp ~ condition * truth  
                    + (truth|subid) 
                    + (truth|item), 
                    data = filter(d.exp2, sent.type ==  "Negative"))
kable(summary(model.noage)$coefficients, digits = 2)
| Estimate | Std. Error | t value | |
|---|---|---|---|
| (Intercept) | 2.61 | 0.11 | 23.46 | 
| condition1 | -0.55 | 0.22 | -2.46 | 
| truth1 | -1.07 | 0.24 | -4.48 | 
| condition1:truth1 | 0.61 | 0.48 | 1.28 | 
Including age does not improve model fit:
model.age <- lmer(resp ~ condition * agegroup * truth  
                  + (truth|subid) 
                  + (truth|item), 
                  data = filter(d.exp2, sent.type ==  "Negative"))
kable(summary(model.age)$coefficients, digits = 2)
| Estimate | Std. Error | t value | |
|---|---|---|---|
| (Intercept) | 2.77 | 0.16 | 17.60 | 
| condition1 | -0.46 | 0.31 | -1.46 | 
| agegroup4 | -0.33 | 0.22 | -1.48 | 
| truth1 | -1.01 | 0.34 | -2.95 | 
| condition1:agegroup4 | -0.24 | 0.45 | -0.54 | 
| condition1:truth1 | 0.42 | 0.69 | 0.62 | 
| agegroup4:truth1 | -0.10 | 0.49 | -0.20 | 
| condition1:agegroup4:truth1 | 0.36 | 0.98 | 0.37 | 
anova(model.age, model.noage)
## refitting model(s) with ML (instead of REML)
## Data: filter(d.exp2, sent.type == "Negative")
## Models:
## model.noage: resp ~ condition * truth + (truth | subid) + (truth | item)
## model.age: resp ~ condition * agegroup * truth + (truth | subid) + (truth | 
## model.age:     item)
##             Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## model.noage 11 2154.4 2204.0 -1066.2   2132.4                         
## model.age   15 2159.6 2227.1 -1064.8   2129.6 2.8619      4     0.5812
What about just true negs?
model.kids.trueneg <- lmer(resp ~ condition
                    + (1 |subid) 
                    + (condition |item), 
                    data = filter(d.exp2, sent.type ==  "Negative" & truth == "True"))
kable(summary(model.kids.trueneg)$coefficients, digits = 2)
| Estimate | Std. Error | t value | |
|---|---|---|---|
| (Intercept) | 3.15 | 0.17 | 18.52 | 
| condition1 | -0.85 | 0.34 | -2.46 | 
T-tests:
#All kids
subs <- aggregate(resp ~ subid + condition, d.exp2, mean)
t.test(resp ~ condition, subs)
## 
##  Welch Two Sample t-test
## 
## data:  resp by condition
## t = -2.2036, df = 66.996, p-value = 0.03099
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.78674329 -0.03888866
## sample estimates:
##   mean in group None mean in group Target 
##             2.998341             3.411157
#Three-year-olds
threes <- filter(d.exp2, agegroup ==  "3")
threes_subs <- aggregate(resp ~ subid + condition, threes, mean)
t.test(resp ~ condition, threes_subs)
## 
##  Welch Two Sample t-test
## 
## data:  resp by condition
## t = -1.1626, df = 30.945, p-value = 0.2539
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.8332835  0.2282204
## sample estimates:
##   mean in group None mean in group Target 
##             3.152014             3.454545
#Four-year-olds
fours <- filter(d.exp2, agegroup ==  "4")
fours_subs <- aggregate(resp ~ subid + condition, fours, mean)
t.test(resp ~ condition, fours_subs)
## 
##  Welch Two Sample t-test
## 
## data:  resp by condition
## t = -2.0693, df = 30.749, p-value = 0.04701
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.129489914 -0.008010086
## sample estimates:
##   mean in group None mean in group Target 
##              2.80625              3.37500