# Statistical analysis in R ----
# Data Science Lab, University of Copenhagen
# August 2025


# Step 1: Load packages ----

#install.packages("emmeans")
#install.packages("multcomp")
#install.packages("multcompView")
#install.packages("GGally")
library(emmeans)
library(multcomp)
library(readxl)
library(tidyverse)
library(GGally)
library(multcomp)
library(multcompView)

# Example A: Analysis of variance ----


# Step 2: Data ----

psorData <- read_excel("psoriasis.xlsx")
psorData
psorData <- mutate(psorData, type = factor(type)) 
count(psorData, type)


# Step 3: Descriptive plots and statistics ----

ggplot(psorData, aes(x=type, y=intensity)) +
  geom_point() + 
  labs(x="Skin type", y="Intensity")

ggplot(psorData, aes(x=type, y=intensity)) +
  geom_boxplot() + 
  labs(x="Skin type", y="Intensity")

psorData %>% 
  group_by(type) %>% 
  summarise(avg=mean(intensity), sd=sd(intensity))


# Step 4: Fit of oneway ANOVA, model validation ----

oneway <- lm(log(intensity) ~ type, data=psorData)
oneway

par(mfrow=c(2,2))   # makes room for 4=2x2 plots!
plot(oneway)


# Step 5: Hypothesis test + Post hoc tests ----

drop1(oneway,test="F")

emmeans(oneway, ~type)
pairs(emmeans(oneway, ~type))

cld(emmeans(oneway, ~type))


# Step 6: Report of model parameters ----

emmeans(oneway,~type,type="response")
confint(pairs(emmeans(oneway, ~type, type="response")))


# Example B: Simple and multiple linear regression ----


# Step 2: Data ----

data(trees)


# Step 3: Visualization of raw data ----

ggpairs(trees)


# Step 4: Fitting and validating a simple linear regression model ----

linreg1 <- lm(Volume ~ Girth, data=trees)
linreg1

par(mfrow=c(2,2))   # makes room for 4=2x2 plots!
plot(linreg1)


# Step 4 iterated: Transformation ----

linreg2 <- lm(log(Volume) ~ log(Girth), data=trees)
par(mfrow=c(2,2))
plot(linreg2)


# Step 6: Report of the model ----

summary(linreg2)

confint(linreg2)


# Step 6: Visualization of the model ----

ggplot(trees, aes(x=log(Girth), y=log(Volume))) + 
  geom_point() +
  geom_abline(intercept=-2.35332, slope=2.19997, col="red")

ggplot(trees, aes(x = Girth, y = Volume)) + 
  geom_point() +
  geom_smooth(method="lm", se=TRUE) +
  scale_x_log10() +
  scale_y_log10()


# Step 4 iterated: Multiple linear regression ----

linreg3 <- lm(log(Volume) ~ log(Girth) + log(Height), data=trees)
summary(linreg3)$coefficients

par(mfrow=c(2,2))
plot(linreg3)


# Step 5: Hypothesis test ----

drop1(linreg3,test="F")


# Use of pipe operator ---

confint(pairs(emmeans(oneway, ~type, type="response")))
oneway %>% emmeans(~type, type="response") %>% pairs() %>% confint()

