An official website of the United States government.

This is not the current EPA website. To navigate to the current EPA website, please go to This website is historical material reflecting the EPA website as it existed on January 19, 2021. This website is no longer updated and links to external websites and some internal pages may not work. More information »

CADDIS Volume 4

R Script: Parametric Regression

# Fit parametric regression models to observations
# Plot resulting curve fits with binned data

# Create storage list
modlist.glm <- as.list(rep(NA, times = length(taxa.names)))

for (i in 1:length(taxa.names)) {

    # Create a logical vector that is true if taxon is
    # present and false if taxon is absent.

    resp <- dfmerge[,taxa.names[i]]>0

    # Fit the regression model and store the results in a list.
    # Here, poly(temp,2) specifies that the
    # model is fitting using a second order polynomial of the
    # explanatory variable. glm calls the function that fits
    # Generalized Linear Models. We specify in this case that
    # the response variable is distributed binomially.
    modlist.glm <- glm(resp ~ poly(temp,2), data = dfmerge,
    family = "binomial")


par(mfrow = c(1,3), pty = "s")
    # Specify 3 plots per page
for (i in 1:length(taxa.names)) {
    # Compute mean predicted probability of occurrence
    # and standard errors about this predicted probability.
    predres <- predict(modlist.glm, type= "link", = T)

    # Compute upper and lower 90% confidence limits <- predres$fit + 1.65*predres$ <- predres$fit - 1.65*predres$ <- predres$fit

    # Convert from logit transformed values to probability.
    up.bound <- exp(
    low.bound <- exp(
    mean.resp <- exp(

    iord <- order(dfmerge$temp)
    # Sort the environmental variable.

    # Define bins to summarize observational data as
    # probabilities of occurrence

    # Define the number of bins
    nbin <- 20

    # Define bin boundaries so each bin has approximately the same
    # number of observations.
    cutp <- quantile(dfmerge$temp,probs = seq(from = 0, to = 1, length = 20))

    # Compute the midpoint of each bin
    cutm <- 0.5*(cutp[-1] + cutp[-nbin])

    # Assign a factor to each bin
    cutf <- cut(dfmerge$temp, cutp, include.lowest = T)

    # Compute the mean of the presence/absence data within each
    # bin.
    vals <- tapply(dfmerge[, taxa.names[i]] > 0, cutf, mean)

    # Now generate the plot, # Plot binned observational data as symbols.
    plot(cutm, vals, xlab = "Temperature",
    ylab = "Probability of occurrence", ylim = c(0,1),
    main = taxa.names[i])

    # Plot mean fit as a solid line.
    lines(dfmerge$temp[iord], mean.resp[iord])

    # Plot confidence limits as dotted lines.
    lines(dfmerge$temp[iord], up.bound[iord], lty = 2)
    lines(dfmerge$temp[iord], low.bound[iord], lty = 2)

Top of Page