Neg-tablet Analyses

Prelims and libraries.

rm(list = ls())
#Load libraries
library(reshape2)
library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.2.3

library(bootstrap)
library(lme4)

## Loading required package: Matrix

library(knitr)

Functions and add some style elements for ggplot2

## number of unique subs
n.unique <- function (x) {
  length(unique(x))
}

## for bootstrapping 95% confidence intervals
theta <- function(x,xdata) {mean(xdata[x])}
ci.low <- function(x) {
  quantile(bootstrap(1:length(x), 1000, theta, x)$thetastar, .025)}
ci.high <- function(x) {
  quantile(bootstrap(1:length(x), 1000, theta, x)$thetastar, .975)}

plot.style <- theme_bw() + 
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(), 
        legend.position = "right", 
        axis.line = element_line(colour = "black",size = .5), 
        axis.ticks = element_line(size = .5), 
        axis.title.x = element_text(vjust = -.5), 
        axis.title.y = element_text(angle = 90,vjust = 0.25))

Experiment 1: Adults, between-subjects, referent type & syntactic frame

Load in data

d.exp1 <- read.csv("data/exp1.csv")
d.exp1$condition <- factor(d.exp1$condition, 
                           levels = c("noContext", "context"), 
                           labels = c("None", "Target"))
d.exp1$sentence.type <- factor(d.exp1$sentence.type, 
                               levels = c("positive", "negative"), 
                               labels = c("Positive" , "Negative"))

d.exp1$neg.concept <- factor(d.exp1$neg.concept, 
                             levels = c("something", "nothing"),
                             labels = c("Alternative",  "Nonexistence"))

d.exp1$neg.syntax <- factor(d.exp1$neg.syntax, 
                            levels = c("", "has no", "doesn't have"), 
                            labels = c("", "has no X", "doesn't have X"))

Plot data

Histogram of rating scale:

qplot(d.exp1$rating, geom = "histogram") + plot.style

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Plot mean ratings for every trial type (Table 4 in paper):

ms <- d.exp1 %>%
  group_by(subid, condition, sentence.type, truth.value, neg.concept, neg.syntax) %>%
  summarise(subm = mean(rating)) %>%
  group_by(condition, sentence.type, truth.value, neg.concept, neg.syntax) %>%
  summarise(rating = mean(subm),
            high = ci.high(subm),
            low = ci.low(subm))
ms$truth.value <- factor(ms$truth.value, levels=c(TRUE, FALSE), labels=c("True","False"))

kable(ms, digits = 2)

condition	sentence.type	truth.value	neg.concept	neg.syntax	rating	high	low
None	Positive	False	Alternative		2.04	2.60	1.57
None	Positive	False	Nonexistence		2.51	3.06	1.96
None	Positive	True	NA		6.51	6.73	6.22
None	Negative	False	NA	has no X	1.40	1.66	1.19
None	Negative	False	NA	doesn’t have X	2.00	2.51	1.57
None	Negative	True	Alternative	has no X	4.15	4.68	3.65
None	Negative	True	Alternative	doesn’t have X	4.83	5.33	4.29
None	Negative	True	Nonexistence	has no X	4.78	5.20	4.32
None	Negative	True	Nonexistence	doesn’t have X	5.29	5.69	4.85
Target	Positive	False	Alternative		1.83	2.38	1.36
Target	Positive	False	Nonexistence		1.88	2.31	1.49
Target	Positive	True	NA		6.42	6.63	6.20
Target	Negative	False	NA	has no X	1.58	1.87	1.31
Target	Negative	False	NA	doesn’t have X	1.89	2.38	1.44
Target	Negative	True	Alternative	has no X	4.96	5.34	4.54
Target	Negative	True	Alternative	doesn’t have X	5.74	6.10	5.32
Target	Negative	True	Nonexistence	has no X	5.61	6.02	5.19
Target	Negative	True	Nonexistence	doesn’t have X	6.34	6.63	5.99

ggplot(data = ms, aes(x = sentence.type, y = rating, 
      color = neg.concept, shape = neg.syntax)) +
  geom_point(stat = "identity", position = position_dodge(width = .5)) + 
  facet_grid(truth.value ~ condition) +
  geom_errorbar(aes(ymin = low, ymax = high), width = 0,
                position = position_dodge(width = .5)) + 
  plot.style

Just look at negative sentences:

d.exp1.neg <- filter(d.exp1, sentence.type == "Negative")

ms <- d.exp1.neg %>%
  group_by(subid, condition, truth.value, neg.concept, neg.syntax) %>%
  summarise(subm = mean(rating)) %>%
  group_by(condition, truth.value, neg.concept, neg.syntax) %>%
  summarise(rating = mean(subm),
            high = ci.high(subm),
            low = ci.low(subm))
ms$truth.value <- factor(ms$truth.value, levels=c(TRUE, FALSE), labels=c("True","False"))

ggplot(data = ms, aes(x = condition, y = rating, fill = neg.concept)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(truth.value ~ neg.syntax) +
  geom_errorbar(aes(ymin = low, ymax = high), width = 0,
                position = position_dodge(width = .9)) + 
  scale_fill_grey("Negation Type") + 
  xlab("Context") + ylab("Rating") + 
  plot.style

Just look at true negatives (Figure 2 in paper)

d.exp1.trueneg <- filter(d.exp1, sentence.type ==  "Negative" & truth.value == TRUE)

ms <- d.exp1.trueneg %>%
  group_by(subid, condition, neg.concept, neg.syntax) %>%
  summarise(subm = mean(rating)) %>%
  group_by(condition, neg.concept, neg.syntax) %>%
  summarise(rating = mean(subm),
            high = ci.high(subm),
            low = ci.low(subm))

ggplot(data = ms, aes(x = neg.concept, y = rating, fill = condition)) +
  geom_bar(stat = "identity", position = "dodge") +  
  facet_wrap(~ neg.syntax) + 
  geom_errorbar(aes(ymin = low, ymax = high), width = 0,
                position = position_dodge(width = .9)) + 
  scale_fill_grey("Negation Type") + 
  xlab("Context") + ylab("Rating") + 
  plot.style

Statistical models:

All data:

#deviation coding
contrasts(d.exp1$sentence.type) <- rbind(.5, -.5)
contrasts(d.exp1$sentence.type)

##          [,1]
## Positive  0.5
## Negative -0.5

contrasts(d.exp1$condition) <- rbind(.5, -.5)
contrasts(d.exp1$condition)

##        [,1]
## None    0.5
## Target -0.5

contrasts(d.exp1$truth.value) <- rbind(.5, -.5)
contrasts(d.exp1$truth.value)

##       [,1]
## FALSE  0.5
## TRUE  -0.5

model.exp1.all <- lmer(rating ~ sentence.type * condition * truth.value +
                         (sentence.type * truth.value | subid) +
                         (sentence.type * truth.value | item), 
                       data = d.exp1)

kable(summary(model.exp1.all)$coefficients, digits = 2)

	Estimate	Std. Error	t value
(Intercept)	3.87	0.07	51.78
sentence.type1	0.80	0.08	9.60
condition1	-0.11	0.14	-0.79
truth.value1	-3.94	0.18	-21.96
sentence.type1:condition1	0.71	0.17	4.23
sentence.type1:truth.value1	-0.89	0.21	-4.24
condition1:truth.value1	0.59	0.35	1.68
sentence.type1:condition1:truth.value1	-0.57	0.40	-1.43

Just negative sentences:

contrasts(d.exp1.neg$condition) <- rbind(.5, -.5)
contrasts(d.exp1.neg$condition)

##        [,1]
## None    0.5
## Target -0.5

contrasts(d.exp1.neg$truth.value) <- rbind(.5, -.5)
contrasts(d.exp1.neg$truth.value)

##       [,1]
## FALSE  0.5
## TRUE  -0.5

model.exp1.neg <- lmer(rating ~ condition * truth.value +
                         (truth.value | subid) + 
                         (condition * truth.value | item), 
                       data = d.exp1.neg)

kable(summary(model.exp1.neg)$coefficients, digits = 2)

	Estimate	Std. Error	t value
(Intercept)	3.46	0.08	44.45
condition1	-0.46	0.16	-2.89
truth.value1	-3.49	0.21	-16.70
condition1:truth.value1	0.86	0.41	2.09

Just true negative sentences (Table 1 in paper):

contrasts(d.exp1.trueneg$condition) <- rbind(.5, -.5)
contrasts(d.exp1.trueneg$condition)

##        [,1]
## None    0.5
## Target -0.5

contrasts(d.exp1.trueneg$neg.concept) <- rbind(.5, -.5)
contrasts(d.exp1.trueneg$neg.concept)

##              [,1]
## Alternative   0.5
## Nonexistence -0.5

d.exp1.trueneg$neg.syntax <- factor(d.exp1.trueneg$neg.syntax)
contrasts(d.exp1.trueneg$neg.syntax) <- rbind(.5, -.5)
contrasts(d.exp1.trueneg$neg.syntax)

##                [,1]
## has no X        0.5
## doesn't have X -0.5

model.exp1.trueneg <- lmer(rating ~ condition * neg.concept * neg.syntax +
                             (neg.concept * neg.syntax | subid) + 
                             (neg.concept * neg.syntax | item), 
                           data = d.exp1.trueneg)

kable(summary(model.exp1.trueneg)$coefficients, digits = 2)

	Estimate	Std. Error	t value
(Intercept)	5.21	0.13	38.78
condition1	-0.90	0.26	-3.45
neg.concept1	-0.60	0.12	-5.05
neg.syntax1	-0.68	0.17	-3.99
condition1:neg.concept1	0.11	0.19	0.55
condition1:neg.syntax1	0.15	0.29	0.52
neg.concept1:neg.syntax1	-0.11	0.18	-0.64
condition1:neg.concept1:neg.syntax1	-0.22	0.32	-0.67

Experiment 2: Children, between subjects, none vs. target context

Load in data.

d.exp2 <- read.csv("data/exp2.csv")

d.exp2$agegroup <- factor(d.exp2$agegroup)

d.exp2$sent.type <- factor(d.exp2$sent.type, 
                           levels = c("positive", "negative"), 
                           labels = c("Positive", "Negative"))

d.exp2$condition <- factor(d.exp2$condition, 
                           labels = c("None", "Target"))

Condense scale from 5-point to 3-point

d.exp2$resp2 <- 2
d.exp2[d.exp2$resp > 3,]$resp2 <- 3
d.exp2[d.exp2$resp < 3,]$resp2 <- 1
d.exp2$resp2 <- factor(d.exp2$resp2)

Participant exclusions

Exclude subjects who didn’t complete at least half of the trials

#Reject children who didn't complete at least 8 trials
ntrials <- d.exp2 %>%
  group_by(subid) %>%
  summarize(ntrials = n()) %>%
  filter(ntrials < 8)

for (i in ntrials$subid) {
  d.exp2 <- filter(d.exp2, subid != i)
}

Reject subjects who don’t understand scale (based on positive sentences):

reject <- d.exp2 %>%
  filter(sent.type == "Positive") %>% #Only look at positive sentences
  group_by(subid) %>%
  mutate(total = n()) %>% #get total # of positive sentences child saw
  group_by(subid, condition, truth, total, resp2) %>%
  filter((truth == "True" & resp2 == 3) | (truth == "False" & resp2 == 1)) %>% #Get # "good" for true pos and "bad" for false pos
  summarize(counts = n()) %>% 
  group_by(subid, condition, total) %>%
  summarize(counts = sum(counts)) %>% #total # "correct" responses
  mutate(prop = counts/total) %>% #proportion correct
  filter(prop < .6) #reject kids who got < .6 "correct" (this allows for 2/6 "mistakes")

for (i in reject$subid) {
  d.exp2 <- filter(d.exp2, subid != i)
}

Make sure there aren’t any kids who just used one side of scale. Reject kids who only chose a single data point

scaleUse <- aggregate(resp2 ~ subid, d.exp2, n.unique)
table(scaleUse$resp2) #Are any resp2 = 1

## 
##  2  3 
## 48 21

Plot data, all ages

Categorize kids based on response type

tn_responses <- d.exp2 %>%
  filter(sent.type == "Negative" & truth == "True") %>%
  group_by(subid) %>%
  mutate(total = n()) %>%
  group_by(subid, condition, total, resp2) %>%
  summarize(counts = n()) %>%
  mutate(prop = counts/total)

category <- dcast(tn_responses, subid + condition ~ resp2)

## Using prop as value column: use value.var to override.

names(category) <- c("subid","condition","bad","neutral","good")
category[is.na(category)] <- 0

category$type <- "other"
#category[category$neutral > .6,]$type <- "tn_neutral"
category[category$bad > .6,]$type <- "tn_bad"
category[category$good > .6,]$type <- "tn_good"

cat_counts <- category %>%
  group_by(condition, type) %>%
  summarise(counts = n())

cat_counts$type <- factor(cat_counts$type, levels = c("tn_bad","tn_good","other"), labels = c("True Negatives = Bad", "True Negatives = Good", "Inconsistent/Other"))

ggplot(data = cat_counts, aes(x = condition, y = counts, fill = type)) +
  geom_bar(stat = "identity", position = "dodge") + 
  scale_fill_hue("Response Type") +
  ylab("Count") + xlab("Context Condition") +
  plot.style

Mean responses to all sentences (Table 5 in paper):

ms <- d.exp2 %>%
  group_by(subid, condition, sent.type, truth) %>%
  summarise(subm = mean(resp)) %>%
  group_by(condition, sent.type, truth) %>%
  summarise(m = mean(subm),
            cih = ci.high(subm),
            cil = ci.low(subm))
ms$truth <- factor(ms$truth, levels=c("True","False"))

kable(ms, digits = 2)

condition	sent.type	truth	m	cih	cil
None	Positive	False	1.77	2.13	1.46
None	Positive	True	4.59	4.79	4.38
None	Negative	False	1.96	2.42	1.60
None	Negative	True	2.72	3.20	2.28
Target	Positive	False	1.48	1.78	1.23
Target	Positive	True	4.61	4.80	4.40
Target	Negative	False	2.18	2.67	1.73
Target	Negative	True	3.58	4.01	3.14

ggplot(data = subset(ms), 
      aes(x = truth, y = m, fill =  condition)) + 
  facet_grid(. ~ sent.type) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = cil, ymax = cih), 
                position = position_dodge(.9), width = 0) + 
  scale_fill_grey("") +
  xlab("Context") + ylab("Response") +
  scale_y_continuous(limits = c(0, 5), breaks = seq(1,5,1)) +
  plot.style

Just negative trials (Figure 3 in paper):

ggplot(data = subset(ms, sent.type == "Negative"), 
      aes(x = truth, y = m, fill = condition)) + 
  #facet_grid(. ~ truth) + 
  geom_bar(stat = "identity", position = "dodge") + 
  geom_errorbar(aes(ymin = cil, ymax = cih), 
                position = position_dodge(.9), width = 0) + 
  scale_fill_grey("") +
  xlab("Context") + ylab("Response") +
  scale_y_continuous(limits = c(0, 5), breaks = seq(1,5,1)) +
  plot.style

Histogram of trial ratings (Figure 4 in paper):

truenegs <- filter(ms, truth == "True" & sent.type ==  "Negative")

#make df for histogram (for formatting reasons)
hist_data <- d.exp2 %>%
  filter(truth == "True" & sent.type == "Negative") %>%
  group_by(condition, resp) %>%
  summarise(count = n())

ggplot(data = hist_data, aes(y = count, x = resp, 
      fill = condition)) +
  geom_bar(width = .5, position = position_dodge(.6), stat = "identity") +
  geom_point(data = truenegs, aes(x = m, y = c(44, 46), color = condition)) + 
  geom_segment(data = truenegs, aes(x = cil, xend = cih, y = c(44, 46), yend = c(44, 46), color = condition)) + 
  scale_fill_grey("Condition") + scale_color_grey("Condition") +
  xlab("Response") + ylab("Count") +
  plot.style

Statistical models

Basic continuous models:

Table 2 in paper:

contrasts(d.exp2$condition) <- rbind(.5, -.5)
contrasts(d.exp2$condition)

##        [,1]
## None    0.5
## Target -0.5

contrasts(d.exp2$truth) <- rbind(.5, -.5)
contrasts(d.exp2$truth)

##       [,1]
## False  0.5
## True  -0.5

#maximal model (truth * condition | item) does not converge
#This is the maximal model that converges
model.noage <- lmer(resp ~ condition * truth  
                    + (truth|subid) 
                    + (truth|item), 
                    data = filter(d.exp2, sent.type ==  "Negative"))
kable(summary(model.noage)$coefficients, digits = 2)

	Estimate	Std. Error	t value
(Intercept)	2.61	0.11	23.46
condition1	-0.55	0.22	-2.46
truth1	-1.07	0.24	-4.48
condition1:truth1	0.61	0.48	1.28

Including age does not improve model fit:

model.age <- lmer(resp ~ condition * agegroup * truth  
                  + (truth|subid) 
                  + (truth|item), 
                  data = filter(d.exp2, sent.type ==  "Negative"))
kable(summary(model.age)$coefficients, digits = 2)

	Estimate	Std. Error	t value
(Intercept)	2.77	0.16	17.60
condition1	-0.46	0.31	-1.46
agegroup4	-0.33	0.22	-1.48
truth1	-1.01	0.34	-2.95
condition1:agegroup4	-0.24	0.45	-0.54
condition1:truth1	0.42	0.69	0.62
agegroup4:truth1	-0.10	0.49	-0.20
condition1:agegroup4:truth1	0.36	0.98	0.37

anova(model.age, model.noage)

## refitting model(s) with ML (instead of REML)

## Data: filter(d.exp2, sent.type == "Negative")
## Models:
## model.noage: resp ~ condition * truth + (truth | subid) + (truth | item)
## model.age: resp ~ condition * agegroup * truth + (truth | subid) + (truth | 
## model.age:     item)
##             Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## model.noage 11 2154.4 2204.0 -1066.2   2132.4                         
## model.age   15 2159.6 2227.1 -1064.8   2129.6 2.8619      4     0.5812

What about just true negs?

model.kids.trueneg <- lmer(resp ~ condition
                    + (1 |subid) 
                    + (condition |item), 
                    data = filter(d.exp2, sent.type ==  "Negative" & truth == "True"))
kable(summary(model.kids.trueneg)$coefficients, digits = 2)

	Estimate	Std. Error	t value
(Intercept)	3.15	0.17	18.52
condition1	-0.85	0.34	-2.46

T-tests:

#All kids
subs <- aggregate(resp ~ subid + condition, d.exp2, mean)
t.test(resp ~ condition, subs)

## 
##  Welch Two Sample t-test
## 
## data:  resp by condition
## t = -2.2036, df = 66.996, p-value = 0.03099
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.78674329 -0.03888866
## sample estimates:
##   mean in group None mean in group Target 
##             2.998341             3.411157

#Three-year-olds
threes <- filter(d.exp2, agegroup ==  "3")
threes_subs <- aggregate(resp ~ subid + condition, threes, mean)
t.test(resp ~ condition, threes_subs)

## 
##  Welch Two Sample t-test
## 
## data:  resp by condition
## t = -1.1626, df = 30.945, p-value = 0.2539
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.8332835  0.2282204
## sample estimates:
##   mean in group None mean in group Target 
##             3.152014             3.454545

#Four-year-olds
fours <- filter(d.exp2, agegroup ==  "4")
fours_subs <- aggregate(resp ~ subid + condition, fours, mean)
t.test(resp ~ condition, fours_subs)

## 
##  Welch Two Sample t-test
## 
## data:  resp by condition
## t = -2.0693, df = 30.749, p-value = 0.04701
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.129489914 -0.008010086
## sample estimates:
##   mean in group None mean in group Target 
##              2.80625              3.37500