################
#linear mixed effects models
################
#read in the data
d=read.csv("results.no.outliers.csv")
#load some packages
library(lmerTest)
library(dplyr)
#filter() the data so that we only have experimental items (no fillers)
wh = filter(d, island == "wh")
################
#set the contrast coding -- these are the numbers (1, 0, -1) that are used to represent the levels of each categorical factor in your model
################
#R's default is treatment coding. Treatment coding specifies one level of each factor as a baseline, and then calculates differences from that level
#Treatment coding makes it really easy to see the size of the DD (the size of the interaction) -- it will be the coefficient of the interaction term in the model
#Treatment coding makes the most sense if you can define one condition as a baseline, and want to compare other conditions to that baseline. We can typically do that in a 2x2.
#Another option that might be good is effects coding. Effects coding takes the grand mean of all conditions as a baseline, and makes each level of a factor either 1 or -1 (times a coefficient).
#Effects coding makes it really easy to see the size of the main effects (2x each of the factor coefficients). It is harder to see the size of the interaction, because it is conflated in the main effects.
#Effects coding makes the most sense if you can't define any one condition as a baseline, and instead want to use the grand mean as a baseline.
#The choice of coding will not affect the statistics (F, p-values). It only affects the coefficients that are estimated, and how they interact with the categorical factors.
#R defines treatment coding with the command contr.treatment(). It is the default setting for R.
#R defines effects coding with the command contr.sum().
#There are other options, but these are the two that you are most likely to use.
#The first thing you should do is look at the current coding of your contrasts
contrasts(wh$dependencyLength)
contrasts(wh$embeddedStructure)
#What we see is that R is using the default treatment coding, and that the order of the treatments is not right.
#This is because the order of the levels in the factors is alphabetical, and R assings 0 to the first level, and 1 to the second level.
#We could fix this in one two ways. We could go back and re-order the levels, and then re-set the contrast coding; or we could set the contrast coding directly
#Option1: re-order the levels, then reset the contrast coding. This is probably the best, because it will also help with plotting.
#specify the order of the levels of the factors we will be working with
wh$embeddedStructure=factor(wh$embeddedStructure, levels=c("non","isl"))
wh$dependencyLength=factor(wh$dependencyLength, levels=c("sh","lg"))
contrasts(wh$dependencyLength)<-contr.treatment
contrasts(wh$embeddedStructure)<-contr.treatment
#double check that it worked
contrasts(wh$dependencyLength)
contrasts(wh$embeddedStructure)
#Option2: Set the order of the contrasts by hand. This command will reverse the coding. (Don't do it after running option 1 above, or it will reverse them back to the incorrect order.
contrasts(wh$dependencyLength)<-contr.treatment(2)[2:1]
contrasts(wh$embeddedStructure)<-contr.treatment(2)[2:1]
#double check that it worked
contrasts(wh$dependencyLength)
contrasts(wh$embeddedStructure)
#If you want to change all unorderd factors in R to effects coding, you can change the global options, like this:
options(contrasts=c('contr.sum','contr.poly')) #The second opiton is for ordered factors, which we will leave as the defaul "poly" option
#If you want to change individual factors, you can use the contrasts() function
#then we set them to be what we want
contrasts(wh$dependencyLength)<-contr.sum
contrasts(wh$embeddedStructure)<-contr.sum
############
#Building the linear mixed effects models
############
#Linear mixed effects model with random intercepts for subject and item
#This model works with both types of coding (effects and treatment)
wh.lmer = lmer(zscores~embeddedStructure*dependencyLength + (1|subject) + (1|item), data=wh)
#see the coefficients of the model, and their t statistics and p-values
summary(wh.lmer)
#calculate F's and p-values using the lmerTest version of anova()
anova(wh.lmer)
#There are some people who believe you should create the maximal random effects structure that is licensed by your experimental design
#So, we could introduce random intercepts for subject and item, and random slopes for subjects
#This model converges with effects coding, but not with treatment coding. This is a common pattern for random slopes, see Barr et al. 2013.
#There are other reasons that a random slope model may not converge, such as too few data points,
#If your model won't converge, you need to make a change that lets it converge. Either change the effects coding, or use a model with intercepts only.
#If all you need out of the model are statistics and p-values, then simply switch your coding to the one that works (effects coding)
#If it is important to you to interpret the coefficients as treatments, then you will have to use the intercept only model (so you can keep treatment coding)
wh.slope.lmer = lmer(zscores~embeddedStructure*dependencyLength + (1+embeddedStructure*dependencyLength|subject) + (1|item), data=wh)
#see the coefficients of the model, and their t statistics and p-values
summary(wh.slope.lmer)
#Get the F statistics and p-values using the lmerTest version of anova()
anova(wh.slope.lmer)