#######################
##Multiple Comparisons and the Bonferroni correction##
#######################
################################################
#Demonstrating that the alpha level is the maximum per-comparison Type I Error Rate
###############################################
#take 2 sample from a normal distribution 10,000 times; each sample is 50 (x 2 = 100)
#this represents running 10,000 experiments
#notice that each experiment really should be a NULL result because they came from the same population
boot.replicates=replicate(10000, expr=rnorm(100))
#calculate an anova for each replicate
#first we define a function to calcluate the anova
anova.function <- function(dataset){
d = data.frame(x = dataset, y = rep(0:1, each=50))
d.aov = aov(y~x, data=d)
return(summary(d.aov)[[1]][5][[1]][1]) #this returns only the p-value from the summary function
}
#Next we apply that function to each column of "experiments", because each column represents the three samples from the experiment
experiments = apply(X=boot.replicates, MARGIN=2, FUN=anova.function)
#Next set an alpha level
alpha=.05
#Calculate how many type 1 errors occurred for each contrast; this should be very near the alpha level (but not quite because we are randomly sampling a finite number of samples)
sum(experiments<=alpha)/10000
################################################
#Demonstrating that experimentwise/familywise error is larger than the alpha level when there are multiple comparisons in an experiment (or family)
###############################################
#take 3 samples from a normal distribution 10,000 times; each sample is 50 (x 3 = 150)
#this represents running 10,000 experiments, and doing 3 contrasts for each experiment
#notice that each experiment really should be a NULL result because they came from the same population
boot.replicates=replicate(10000, expr=rnorm(150))
#first we define a function to calcluate the anova
anova.function <- function(dataset){
d = data.frame(y = dataset, x = rep(-1:1, each=50))
aov1 = aov(y~x, data=subset(d, x != -1))
aov2 = aov(y~x, data=subset(d, x != 0))
aov3 = aov(y~x, data=subset(d, x != 1))
return(
c(summary(aov1)[[1]][5][[1]][1],
summary(aov2)[[1]][5][[1]][1],
summary(aov3)[[1]][5][[1]][1])) #this returns only the p-value from the summary function
}
#calculate t-test for all three contrasts for every experiment
#first we define a function to calcluate the three t-tests
t.function <- function(dataset){
t1=t.test(dataset[1:50], dataset[51:100], var.equal=T)
t2=t.test(dataset[1:50], dataset[101:150], var.equal=T)
t3=t.test(dataset[51:100], dataset[101:150], var.equal=T)
return(c(t1$p.value,t2$p.value,t3$p.value))
}
#Next we apply that function to each column of "experiments", because each column represents the three samples from the experiment
experiments = apply(X=boot.replicates, MARGIN=2, FUN=anova.function)
#Calculate the per comparison error rate
alpha = .05
sum(experiments0)
}
#Next we apply that function to all 10000 experimental results
experimentwise.errors=apply(X=experiments, MARGIN=2, FUN=error.function)
#And finally count the number of experiments that had at least one error. This is the experimentwise error, and it is much larger than .05
sum(experimentwise.errors)/10000
#The equation for familywise error
alpha=.05
Ncontrasts=3
fwer=1-(1-alpha)^Ncontrasts
##################
#Demonstrating the Bonferroni correction
##################
#First we define a function to look for at least one error in the three contrasts using the bonferroni level of .05/3
b.function <- function(dataset){
errors=sum(dataset<=(.05/3))
return(errors>0)
}
#Next we apply that function to all 10000 experimental results
b.errors=apply(X=experiments, MARGIN=2, FUN=b.function)
#And finally count the number of experiments that had at least one error. This is the experimentwise error, and it is much larger than .05
sum(b.errors)/10000
##################
#Demonstrating that the Bonferroni correction does not work for post-hoc comparisons
##################
#take 3 samples from a normal distribution 10,000 times; each sample is 50 (x 3 = 150)
#this represents running 10,000 experiments, and doing 3 contrasts for each experiment
#notice that each experiment really should be a NULL result because they came from the same population
boot.replicates=replicate(10000, expr=rnorm(150))
#first we define a function to calcluate the anova
anova.function <- function(dataset){
d = data.frame(y = dataset, x = rep(-1:1, each=50))
aov1 = aov(y~x, data=subset(d, x != -1))
aov2 = aov(y~x, data=subset(d, x != 0))
aov3 = aov(y~x, data=subset(d, x != 1))
return(
c(summary(aov1)[[1]][5][[1]][1],
summary(aov2)[[1]][5][[1]][1],
summary(aov3)[[1]][5][[1]][1])) #this returns only the p-value from the summary function
}
#Next we apply that function to each column of "experiments", because each column represents the three samples from the experiment
experiments = apply(X=boot.replicates, MARGIN=2, FUN=anova.function)
###Do a post-hoc selection of 2/3 comparisons, and apply a Bonferroni of 2###
#To demonstrate this, we want to find the TWO LARGEST experimental differences, which means the TWO SMALLEST p-values.
#To make this easier, I am going to write a function that sorts each column (each column is an experiment now) from smallest to largest, that way we can keep only two rows (two comparisons)
#first, define a function that sorts by p-value:
sort.function <- function(dataset){
dataset=dataset[order(dataset)]
}
#next, we apply that function to all 10000 experimental results
sorted.experiments=apply(X=experiments, MARGIN=2, FUN=sort.function)
#next, we only keep the first 2 rows, since these are the two smallest p-values (biggest differences)
results=sorted.experiments[1:2,]
#finally, we apply the bonferroni correction of .05/2, in the same way we applied it before: we define a function, then apply the function to all of the experiments (all of the columns), and then count the number of p-values below .05:
#a function to perform the bonferroni correction
b.2.function <- function(dataset){
errors=sum(dataset<=(.05/2))
return(errors>0)
}
#apply the function to our results
b.2.errors=apply(X=results, MARGIN=2, FUN=b.2.function)
#count the number of type I errors for the post-hoc correction of 2 (and notice that it is larger than .05)
sum(b.2.errors)/10000
#it is even more extreme if we choose the largest result in each experiment, and do no correction (as if there was only one comparison per experiment)
#next, we only keep the first 1 row, since these are the two smallest p-values (biggest differences)
results=sorted.experiments[1,]
#count the number of type I errors for the post-hoc correction of 2 (and notice that it is even larger than before... showing that the problem gets worse, the more you eliminate)
sum(results < .05)/10000
###############
#Demonstrating that a full Bonferroni correction on the post-hoc selected results gives the same result as running it on the full set of comparisons
#This is a necessary truth -- we didn't eliminate any significant results in our post-hoc analysis, so the number of experiments with at least one significant result will be the same as if we were looking at the full set of results without removing any!
###############
#a full bonferroni correction is .05/3
sum(results < .05/3)/10000
################################################################################
################################################################################
################################################################################
################################################################################
################################################################################