library(lsr)
library(ggplot2)
library(qqplotr)
## 
## Attaching package: 'qqplotr'
## The following objects are masked from 'package:ggplot2':
## 
##     stat_qq_line, StatQqLine
library(pastecs)
library(car)
## Loading required package: carData
ce <- read.csv("course_evals.csv")
interaction.plot(x.factor = ce$gender, # x var
                 trace.factor = ce$ethnicity, #separate line
                 response = ce$score, #num var
                 fun=mean
)

anovamodel <- aov(score~gender*ethnicity, data=ce)
anova(anovamodel)
## Analysis of Variance Table
## 
## Response: score
##                   Df  Sum Sq Mean Sq F value   Pr(>F)   
## gender             1   2.260 2.26021  7.7849 0.005488 **
## ethnicity          1   0.517 0.51662  1.7794 0.182884   
## gender:ethnicity   1   0.614 0.61409  2.1151 0.146536   
## Residuals        459 133.263 0.29033                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.lm(anovamodel)
## 
## Call:
## aov(formula = score ~ gender * ethnicity, data = ce)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8325 -0.3325  0.0675  0.3711  0.9667 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.93333    0.08980  43.799   <2e-16 ***
## gendermale                        0.31667    0.13577   2.332   0.0201 *  
## ethnicitynot minority             0.19560    0.09945   1.967   0.0498 *  
## gendermale:ethnicitynot minority -0.21310    0.14653  -1.454   0.1465    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5388 on 459 degrees of freedom
## Multiple R-squared:  0.02481,    Adjusted R-squared:  0.01844 
## F-statistic: 3.893 on 3 and 459 DF,  p-value: 0.009113
etaSquared(anovamodel)
##                       eta.sq eta.sq.part
## gender           0.014570835 0.014721629
## ethnicity        0.003780505 0.003861731
## gender:ethnicity 0.004493726 0.004586933
ce$resid <- resid(anovamodel)
ce$pred <- fitted.values(anovamodel)
ggplot(data=ce, mapping=aes(sample=resid))+
  stat_qq_point()+
  stat_qq_line()+
  stat_qq_band()

stat.desc(ce$resid, norm=TRUE)
##       nbr.val      nbr.null        nbr.na           min           max 
##  4.630000e+02  0.000000e+00  0.000000e+00 -1.832500e+00  9.666667e-01 
##         range           sum        median          mean       SE.mean 
##  2.799167e+00 -2.706169e-15  6.750000e-02 -5.800862e-18  2.495997e-02 
##  CI.mean.0.95           var       std.dev      coef.var      skewness 
##  4.904913e-02  2.884490e-01  5.370744e-01 -9.258528e+16 -6.899701e-01 
##      skew.2SE      kurtosis      kurt.2SE    normtest.W    normtest.p 
## -3.040294e+00  7.167785e-02  1.582567e-01  9.561648e-01  1.688229e-10
leveneTest(ce$score~ce$gender*ce$ethnicity)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  1.1364 0.3339
##       459
plot(ce$pred, ce$resid)

I ran a two-way ANOVA to test whether a faculty member’s gender and ethnicity predicts their average score on course evaluations. The faculty member’s gender was statistically significant (F=7.78, p=0.005, partial \(eta^2\)=0.014), but neither the ethnicity (F=1.77, p=0.18, partial \(eta^2\)=0.003) and interaction (F=2.11, p=0.14, partial \(eta^2\)=0.004) were significant. The \(R^2\) value was 0.02481, which shows that 2.48% of the variability in the average score on course evaluations can be explained by a faculty member’s gender and ethnicity.

A S-W test and normal probability plot were conducted. Based on the S-W test (W=0.956, p=0.00), the assumption for normality appears to be violated. Based on the Levene’s test (F=1.136, p=0.334), it does not appear to be violated. Although, the pred vs. Resid plot appears to have a pattern which leads us to performing a transformation of the y-variable.

Square root Transformation

ceSq <- sqrt(ce$score)
anovamodelSq <- aov(score~gender*ethnicity, data=ce)
anova(anovamodelSq)
## Analysis of Variance Table
## 
## Response: score
##                   Df  Sum Sq Mean Sq F value   Pr(>F)   
## gender             1   2.260 2.26021  7.7849 0.005488 **
## ethnicity          1   0.517 0.51662  1.7794 0.182884   
## gender:ethnicity   1   0.614 0.61409  2.1151 0.146536   
## Residuals        459 133.263 0.29033                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ce$predSq <- (fitted.values(anovamodelSq))^2
plot(ce$score, ce$predSq)

Log Transformation

ceLog <- log(ce$score)
anovamodelLog <- aov(ceLog~score, data=ce)
anova(anovamodelLog)
## Analysis of Variance Table
## 
## Response: ceLog
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## score       1 8.9329  8.9329   50314 < 2.2e-16 ***
## Residuals 461 0.0818  0.0002                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ce$predLog <- exp(fitted.values(anovamodelLog))
plot(ce$score, ce$predLog)

Based on the pattern of the pred vs. Resid plot of the test for whether a faculty member’s gender and ethnicity predicts their average score on course evaluations, I attempted a square root and log transformation. The square root transformation resulted in a constant error variance. To double-check I attempted a log transformation, which did not improve my previous model. Based on the models, I chose the square root model as this showed a constant error variance.

Question 2

Post-hoc tests are not needed for this data due to the fact that the interaction between gender and ethnicity of a faculty member (p=0.14) and faculty member’s ethnicity (0.18) were not statistically significant in the two-way ANOVA test.