############################
#This script takes the output of "add.items.conditions.factors.r" and calculates the z-score transformation for each participant
#The z-score transformation corrects for some forms of scale bias.
#This is the long (step-by-step) way of doing this. It is exactly what you'd do if you were doing this by hand in excel.
############################
#NOTE: This script assumes your dataset is called dataset.factors, which is the output of the add.items.conditions.factors.r script.
#remove practice items
#By definition, we don't want these contributing to the scale bias removal process. We used these items in the experiment to help participants work out their scale.
dataset.working=subset(dataset.factors, condition !="7P" & condition !="6P" & condition !="5P" & condition !="4P" & condition !="3P" & condition !="2P" & condition !="1P" & condition !="7p" & condition !="1p")
dataset.working = droplevels(dataset.working)
Nsubjects = length(levels(dataset.working$subject))
Nitems = nrow(dataset.working)/Nsubjects #This is the number of experimental items each subject saw
#sort by subject in ascending order (descending would be -subject)
#this is NECESSARY, but the choice between ascending/descending is not critical
#the dataset should already be sorted this way. But just in case it isn't, we do it here again. It doesn't hurt to do it twice.
dataset.working=dataset.working[with(dataset.working, order(subject)),]
#Calculate means for each subject using the aggregate function, which loops through the dataset, and aggregates over the first variable (judgment), keeping the second variable (subject) separate as a grouping function.
subject.mean=with(dataset.working,
aggregate(
list(judgment=judgment),
list(
subject=subject
),
mean , #this is the function applied to the collapsing variable
na.rm=TRUE)
)
#create a vector with the subject mean repeated for each record
subject.mean=as.numeric(unlist(subject.mean[ ,2], F, F))
subject.mean=rep(subject.mean, each = Nitems) #this number is the number of judgments per subject
#Calculate standard deviation for subject, again using the aggregate function
subject.sd=with(dataset.working,
aggregate(
list(judgment=judgment),
list(
subject=subject
),
sd, #this is the function applied to the collapsing variable
na.rm=TRUE)
)
#create a vector with the subject sd repeated for each record
subject.sd=as.numeric(unlist(subject.sd[ ,2], F, F))
subject.sd=rep(subject.sd, each = Nitems) #this is the number of judgments per subject
#have to get a vector of all of the judgments
judgment <- as.integer(as.character(unlist(dataset.working$judgment, F, F)))
#Calculate the z-score for each record and create a vector for it
#This is just the formula for z-scores.
zscores=(judgment-subject.mean)/subject.sd
#Add zscores to the dataframe
dataset.z <- data.frame(dataset.working,zscores)
#save a copy of the new dataset for your records
write.csv(dataset.z, file="results.long.format.csv", row.names=F)