## HEADER ##### ## What: Simple anova in R ## Using Ch 14 in Yarrr A Pirates Guide to R ## Sections 14.1 - 14.3 ## https://bookdown.org/ndphillips/YaRrr/anova.html ## When: HARUG! 2019.12.11 ## Who: Last edited by Ed #### ## CONTENTS #### ## 00 Setup ## 01 Explore your data ## 02 Graph your data (matching experimental design) ## 03 Test assumptions ## 04 Evaluate hypotheses (primary, .secondary) ## 00 Setup #### # install.packages("yarrr", dep=T) library(yarrr) # Ed HA PC setwd("D:/Dropbox/WORK/__Harper Adams stuff/_HARUG HA R Users Group/2019.12.11 HARUG R motivator") ## 01 Explore your data #### # data: "Getting poop off of poopdeck" data(poopdeck) help(poopdeck) head(poopdeck) # look at POSSIBLE questions table slide ## 02 Graph your data (matching experimental design) #### # We will look at cleaning *time* as a function of *cleaner* used # our formula: time ~ cleaner # The traditional way to graph 1-way ANOVA is the boxplot boxplot(formula = time ~ cleaner, data = poopdeck) ## 03 Test assumptions #### # basic assumptions of ANOVA # 03.a Gaussian (a/k/a "normal") distribution of residuals # 03.b Homoscedacity (==variance amongst groups) # 03.a Gaussian (a/k/a "normal") distribution of residuals #### # look at help() for function help(aov) # go ahead and calculate ANOVA in order to test residual distribution myAnova <- aov(formula = time ~ cleaner, data = poopdeck) # get residuals from ANOVA model object "myAnova" ?residuals (myResid <- residuals(myAnova)) # Always graph it! hist(myResid) # looks pretty Gaussian, good sample size, not bad qqnorm(myResid) qqline(myResid) # Test it formally help(shapiro.test) shapiro.test(myResid) # if p < 0.05 IT IS different to Gaussian. Is it? # 03.b Homoscedasticity (== homogeneous variance amongst groups) #### ?bartlett.test bartlett.test(formula = sqrt(time) ~ cleaner, data = poopdeck) # is different # Since the variances are not equal, I would have a closer look # make the boxplot par(mar=c(5.1, 5.1, 3, 4)) # ?par (made right margin a bit bigger) boxplot(formula = time ~ cleaner, data = poopdeck, cex = 0, xlab = "Cleaner used", ylab = "Cleaning time (min)", cex.lab = 1.5) # select the actual data a <- which(poopdeck$cleaner == "a") b <- which(poopdeck$cleaner == "b") c <- which(poopdeck$cleaner == "c") # draw on the actual data points(x = jitter(rep(1, length(a)), 5), y = poopdeck$tim[a], pch = 16, cex = .5, col = "blue") points(x = jitter(rep(2, length(a)), 2.8), y = poopdeck$tim[b], pch = 16, cex = .5, col = "blue") points(x = jitter(rep(3, length(a)), 1.8), y = poopdeck$tim[c], pch = 16, cex = .5, col = "blue") # (the most obvious) options here are to decide # the variances are "equal enough" # or else perform a non-parametric test ?kruskal.test ## 04 Evaluate hypotheses (primary, .secondary) #### # 04.a The means are not equal overall # 04.b The specific mean levels are different (post-hoc test) # 04.a The means are not equal overall #### # this is the fundamental ANOVA result ?anova # make an ANOVA table anova(myAnova) summary(myAnova) # alternative myAnova # different info names(myAnova) # lots in the object class(myAnova) # 04.b The specific mean levels are different (post-hoc test) #### ?TukeyHSD TukeyHSD(myAnova) # interpret