# R Script: Parametric Regression

# Fit parametric regression models to observations

# Plot resulting curve fits with binned data

# Create storage list

modlist.glm <- as.list(rep(NA, times = length(taxa.names)))

for (i in 1:length(taxa.names)) {

# Create a logical vector that is true if taxon is

# present and false if taxon is absent.

resp <- dfmerge[,taxa.names[i]]>0

# Fit the regression model and store the results in a list.

# Here, poly(temp,2) specifies that the

# model is fitting using a second order polynomial of the

# explanatory variable. glm calls the function that fits

# Generalized Linear Models. We specify in this case that

# the response variable is distributed binomially.

modlist.glm <- glm(resp ~ poly(temp,2), data = dfmerge,

family = "binomial")

print(summary(modlist.glm))

}

par(mfrow = c(1,3), pty = "s")

# Specify 3 plots per page

for (i in 1:length(taxa.names)) {

# Compute mean predicted probability of occurrence

# and standard errors about this predicted probability.

predres <- predict(modlist.glm, type= "link", se.fit = T)

# Compute upper and lower 90% confidence limits

up.bound.link <- predres$fit + 1.65*predres$se.fit

low.bound.link <- predres$fit - 1.65*predres$se.fit

mean.resp.link <- predres$fit

# Convert from logit transformed values to probability.

up.bound <- exp(up.bound.link)/(1+exp(up.bound.link))

low.bound <- exp(low.bound.link)/(1+exp(low.bound.link))

mean.resp <- exp(mean.resp.link)/(1+exp(mean.resp.link))

iord <- order(dfmerge$temp)

# Sort the environmental variable.

# Define bins to summarize observational data as

# probabilities of occurrence

# Define the number of bins

nbin <- 20

# Define bin boundaries so each bin has approximately the same

# number of observations.

cutp <- quantile(dfmerge$temp,probs = seq(from = 0, to = 1, length = 20))

# Compute the midpoint of each bin

cutm <- 0.5*(cutp[-1] + cutp[-nbin])

# Assign a factor to each bin

cutf <- cut(dfmerge$temp, cutp, include.lowest = T)

# Compute the mean of the presence/absence data within each

# bin.

vals <- tapply(dfmerge[, taxa.names[i]] > 0, cutf, mean)

# Now generate the plot, # Plot binned observational data as symbols.

plot(cutm, vals, xlab = "Temperature",

ylab = "Probability of occurrence", ylim = c(0,1),

main = taxa.names[i])

# Plot mean fit as a solid line.

lines(dfmerge$temp[iord], mean.resp[iord])

# Plot confidence limits as dotted lines.

lines(dfmerge$temp[iord], up.bound[iord], lty = 2)

lines(dfmerge$temp[iord], low.bound[iord], lty = 2)

}