# Capítulo 8. GLM con distribuciones de errores binomiales

#  8.1. GLM de respuesta binaria
#  8.1.1. Presentación del caso de estudio: estrategia reproduc-
#           tora de una especie de alga invasora

library(ADER)
data(algas)
str(algas)

#  8.1.2. Análisis exploratorio y ajuste del modelo

long.means <- with(algas, aggregate(Long, list(Estado, Sp), mean))
long.sd <- with(algas, aggregate(Long, list(Estado, Sp),sd))
long.n <- with(algas, aggregate(Long, list(Estado, Sp),length))
long.means$sel <- long.means$x+(long.sd$x/sqrt(long.n$x))
long.means$ser <- long.means$x-(long.sd$x/sqrt(long.n$x))

library(Hmisc)
long.means$Group.1 <- factor(long.means$Group.1)
levels(long.means$Group.1) <- c("No reproductivo", "Reproductivo")
levels(long.means$Group.2) <- c("C. fragile", "C. tomentosum",
  "C. vermilara")

dotchart3(cbind(long.means$sel, long.means$x, long.means$ser),
  labels=long.means$Group.1, groups=long.means$Group.2,
  pch=c(93,19,91), xlab="Longitud (cm)", groupfont=4)

glm.algas <- glm(Estado~Long*Sp, algas, family=binomial)


#  8.1.3. Resolución de hipótesis

anova(glm.algas, test="Chi")
glm.algas_f <- glm(Estado~Long+Sp, algas, family=binomial)
anova(glm.algas_f, test="Chi")

#  8.1.4. Interpretación del modelo
#  A) La función summary()

summary(glm.algas_f)
invlogit <- function(eta) exp(eta)/(1+exp(eta))
invlogit(-2.80703+0.26505*(10+1))- invlogit(-2.80703+0.26505*(10))
invlogit(-2.80703+0.26505*(20+1))- invlogit(-2.80703+0.26505*(20))

#  B) Devianza explicada por el modelo y capacidad predictiva

library(modEvA)
Dsquared(glm.algas_f)

table(algas$Estado, fitted(glm.algas_f)>0.5)

library(pROC)
rocplot <- roc(Estado ~ fitted(glm.algas_f), data=algas)
plot(rocplot, xlab="Especificidad", ylab="Sensibilidad")
auc(rocplot)

#  C) Gráficos de predicciones

xv <- seq(0, 30, 0.1)
yv.Fra <- predict(glm.algas_f, list(Long=xv, Sp=rep("Fra",
  length(xv))), type="response")
yv.Tom <- predict(glm.algas_f, list(Long=xv, Sp=rep("Tom",
  length(xv))), type="response")
yv.Ver <- predict(glm.algas_f, list(Long=xv, Sp=rep("Ver",
  length(xv))), type="response")


plot(Estado~Long, algas, pch=16, xlab="Longitud (cm)",
  ylab="Probabilidad reproducción")
lines(xv, yv.Fra, lwd=2)
lines(xv, yv.Tom, lwd=2, lty=3)
lines(xv, yv.Ver, lwd=2, lty=2)
legend(x=2, y=0.9, lty=c(1,3,2), lwd=rep(2,3),
legend=c("C. fragile", "C. tomentosum", "C. vermilara"))

library(logihist)
library(gridExtra)
Fra <- subset(algas, Sp=="Fra")
Tom <- subset(algas, Sp=="Tom")
Ver <- subset(algas, Sp=="Ver")

p1 <- logibox(Fra$Long, Fra$Estado, fillb=c("orange","blue"),
  sizeb=0.5) + ylab("P(reproductivo)") + xlab("") +
  stat_smooth(method = "glm", method.args =
  list(family = "binomial"), se=TRUE, size=1, colour="black") +
  theme_light() + ggtitle("C. fragile") +
  theme(plot.title = element_text(face="italic", hjust=0.5)) +
  geom_jitter(height=0.02, size=0.5, colour="green", alpha=0.5)

p2 <- logibox(Tom$Long, Tom$Estado, fillb=c("orange","blue"),
  sizeb=0.5) + ylab("") + xlab("Longitud (cm)") +
  stat_smooth(method = "glm", method.args =
  list(family = "binomial"), se=TRUE, size=1, colour="black") +
  theme_light() + ggtitle("C. tomentosum") +
  theme(plot.title = element_text(face="italic", hjust=0.5)) +
  geom_jitter(height=0.02, size=0.5, colour="green", alpha=0.5)

p3 <- logibox(Ver$Long, Ver$Estado, fillb=c("orange","blue"),
  sizeb=0.5) + ylab("") + xlab("") +
  stat_smooth(method = "glm", method.args =
  list(family = "binomial"), se=TRUE, size=1, colour="black") +
  theme_light() + ggtitle("C. vermilara") +
  theme(plot.title = element_text(face="italic", hjust=0.5)) +
  geom_jitter(height=0.02, size=0.5, colour=c("green"), alpha=0.5)

grid.arrange(p1, p2, p3, ncol=3, nrow=1)

par(mfcol=c(1,3))
for(i in 1:3) {
   algas.i <- subset(algas, Sp == levels(algas$Sp)[i])
   cutr <- cut(algas.i$Long, 5)
   probs <- as.vector(tapply(algas.i$Estado, cutr, sum)/table(cutr))
   resmeans <- tapply(algas.i$Long, cutr, mean)
   se <- sqrt(probs*(1-probs)/table(cutr))
   up <- probs + as.vector(se)
   down <- probs - as.vector(se)
   xv <- seq(0,30,0.1)
   yv <- predict(glm.algas_f, list(Long=xv,
     Sp=rep(levels(algas.i$Sp)[i], length(xv))), type="response")
   plot(Estado ~ Long, algas.i, type="n",
     ylab="P(reproductivo)", xlab="Longitud (cm)")
  rug(jitter(algas.i$Long[algas.i$Estado==0]))
  rug(jitter(algas.i$Long[algas.i$Estado==1]), side=3)
  lines(xv, yv, lwd=2)
  points(resmeans, probs, pch=16, cex=2)
  for (j in 1:5) {
     lines(c(resmeans[j], resmeans[j]), c(up[j], down[j]))
  }
  mtext(paste(letters[i], ")", sep=""), cex = 1.3, adj = 0,
  line = 1, at=-4.5)
}


#  D) Test post hoc

library(multcomp)
summary(glht(glm.algas_f, linfct=mcp(Sp="Tukey")))
library(emmeans)
lst <- lstrends(glm.algas, ~ Sp, var = "Long")
pairs(lst)


#  8.1.5. Revisión de los supuestos del modelo

par(mfrow=c(2,2))
plot(glm.algas_f)

res <- residuals(glm.algas_f)
fit <- predict(glm.algas_f)
plot(res~fit, pch=16, xlab="Valores esperados",
  ylab="Residuos de devianza")
abline(h=0, lty=3, col="gray", lwd=2)

library(splines)
rl <- lm(res~bs(fit, 8))
y <- predict(rl, se=TRUE)
i <- order(fit)
lines(y$fit[i]~fit[i], lwd=2)
segments(x0=fit[i], y0=y$fit[i]+2*y$se.fit[i],
y1=y$fit[i]-2*y$se.fit[i], col="gray")

residualplot <- function(model, nbins){
   fi <-fitted(model)
   re <- residuals(model, type="response")
   ord <- rank(fi)
   groups <- factor(cut(ord, nbins, labels=F))
   nj <-table(groups)
   fiavg<- tapply(fi, list(groups), mean)
   reavg <- tapply(re, list(groups), mean)
   twoSE <- 2*sqrt(fiavg*(1-fiavg)/nj)
   ylim <- range(c(twoSE, -1*twoSE))
   plot(fiavg, reavg, pch=19, ylim=ylim, xlab="Valores esperados",
     ylab="Residuos de devianza")
   abline(h=0, lty=3, col="gray", lwd=2)
   lines(fiavg, twoSE, lty=2, col=2)
   lines(fiavg, -1*twoSE, lty=2, col=2)
}

residualplot(glm.algas_f, nbins=30)

#  Cuadro 8.1. El paquete DHARMa

library(DHARMa)
simulationOutput <- simulateResiduals(glm.algas_f, n=1000)
plot(simulationOutput)
testUniformity(simulationOutput)

#  8.2. GLM de respuesta de tipo proporción

fracasos <- observaciones_totales - éxitos
y <- cbind(éxitos, fracasos)

#  8.2.1. Presentación del caso de estudio: ¿cuáles son las mejores
#           condiciones de germinación para el ailanto?

library(ADER)
data(ailanto)
str(ailanto)
ailanto$Luz <- factor(ailanto$Luz)
ailanto$Agua <- factor(ailanto$Agua)
levels(ailanto$Luz) <- c("10%", "40%", "70%", "100%")
levels(ailanto$Agua) <- c("50%", "70%", "90%")

#  8.2.2. Análisis exploratorio y ajuste del modelo

yprop <- ailanto$Germinacion/ailanto$Semillas

library(lattice)
bwplot(yprop~Luz|Agua, ailanto, xlab="Niveles de luz",
  ylab="Prop. germinación", main="Niveles de agua")
y <- cbind(ailanto$Germinacion, ailanto$Semillas-ailanto$Germinacion)
glm.seed <- glm(y~Luz*Agua, ailanto,family=binomial)
glm.seed <- glm(yprop~Luz*Agua, weights=Semillas, ailanto,
  family=binomial)

#  8.2.3. Resolución de hipótesis

library(car)
Anova(glm.seed, type="III")

#  8.2.4. Interpretación del modelo
#  A) La función summary()

summary(glm.seed)

library(emmeans)
emmip(glm.seed, Agua~ Luz, type="response", xlab="Niveles de luz",
  ylab="Valor esperado", CIs=TRUE)

#  B) Devianza explicada por el modelo

library(modEvA)
Dsquared(glm.seed)

#  C) Gráficos de predicciones

library(plotrix)
yprop.means <- with(ailanto, tapply(yprop, list(Luz, Agua), mean))
yprop.se <- with(ailanto, aggregate(yprop, list(Luz, Agua),
  function(x) std.error(x)*1.96))
bp <- barplot(yprop.means, beside=T,
  legend=rownames(yprop.means),
  args.legend=list(x="top", ncol=4, title="Niveles de luz"),
  ylim=c(0,1.2), xlab="Niveles de agua",
  ylab="Proporción de germinación", yaxt="n")

dispersion(bp, yprop.means, yprop.se$x, display.na=F, arrow.gap=0)

#  D) Test post hoc

library(emmeans)
seed.emm <- emmeans(glm.seed, ~ Luz*Agua)
pairs(seed.emm)
contrast(seed.emm, method="pairwise", simple="each",
  combine=FALSE, adjust="tukey")

library(plotrix)
library(multcomp)
yprop.means <- with(ailanto, tapply(yprop, list(Luz, Agua), mean))
yprop.se <- with(ailanto, aggregate(yprop, list(Luz, Agua),
  function(x) std.error(x)*1.96))
bp <- barplot(yprop.means, beside=T, legend=rownames(yprop.means),
  args.legend=list(x="top", ncol=4, title="Niveles de luz"),
  ylim=c(0,1.2), xlab="Niveles de agua",
  ylab="Proporción de germinación")
  
dispersion(bp, yprop.means, yprop.se$x, display.na=F, arrow.gap=0)
text(x=bp, y=yprop.means+yprop.se$x+0.1, font=2,
  labels=cld(seed.emm, Letters=letters, sort=FALSE)$.group)

#  8.2.5. Revisión de los supuestos del modelo

par(mfrow=c(2,2))
plot(glm.seed)

library(aods3)
gof(glm.seed)

glm.seed2 <- glm(y~Luz*Agua, ailanto, family=quasibinomial)
Anova(glm.seed2, type="III")


#  Cuadro 8.2. El 50% no siempre significa lo mismo

N.groups <- 3
N.sample <- 10
N <- N.groups*N.sample
x <- rep(1:N.groups, rep(N.sample, N.groups))
pop <- factor(x, labels=c("Jura", "Selva Negra", "Alpes"))
wetness.Jura <- sort(runif(N.sample, 0.1))
wetness.SNegra <- sort(runif(N.sample, 0.1))
wetness.Alpes <- sort(runif(N.sample, 0.1))
wetness <- c(wetness.Jura, wetness.SNegra, wetness.Alpes)
Xmat <- model.matrix(~pop*wetness)
beta.vec <- c(-4, 1, 2, 6, 3, -4)
lin.pred <- Xmat[,] %*% beta.vec
exp.p <- exp(lin.pred)/(1 + exp(lin.pred))
n1 <- rep(4, N)
C1 <- rbinom(n=N, size=n1, prob=exp.p)
n2 <- rep(10, N)
C2 <- rbinom(n=N, size=n2, prob=exp.p)
n3 <- rep(40, N)
C3 <- rbinom(n=N, size=n3, prob=exp.p)
glm1 <- glm(cbind(C1, n1-C1)~pop*wetness, family=binomial)
glm2 <- glm(cbind(C2, n2-C2)~pop*wetness, family=binomial)
glm3 <- glm(cbind(C3, n3-C3)~pop*wetness, family=binomial)
Anova(glm1, type="III")
Anova(glm2, type="III")
Anova(glm3, type="III")
summary(glm1)
summary(glm2)
summary(glm3)

library(visreg)
par(mfcol=c(3,1))
visreg(glm1, "wetness", by="pop", overlay=T, xlab="",
  ylab="Prop. melanismo", scale="response")
mtext("a)", cex = 1.3, adj = 0, line = 2, at=-0.01)

visreg(glm2, "wetness", by="pop", overlay=T, xlab="",
  ylab="Prop. melanismo", scale="response", legend=F)
mtext("b)", cex = 1.3, adj = 0, line = 2, at=-0.01)

visreg(glm3, "wetness", by="pop", overlay=T,
  ylab="Prop. melanismo", scale="response",
  xlab="Índice de humedad", legend=F)
mtext("c)", cex = 1.3, adj = 0, line = 2, at=-0.01)
