aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2022-11-10 12:45:23 -0500
committerKen Kellner <ken@kenkellner.com>2022-11-10 12:45:23 -0500
commit33ea444a2d8aec16e823cdfd77522bbc6daa2219 (patch)
tree07439a5ed6fa89893d3c50f6e39e588727b8801c
parentb5850e8969ef788359683b60f4b8dede264225de (diff)
parent1cc3e845c8d610512da7402edceb8d103154216b (diff)
Merge remote-tracking branch 'upstream/master' into shiny-power
-rw-r--r--.Rbuildignore1
-rw-r--r--DESCRIPTION11
-rw-r--r--NAMESPACE1
-rw-r--r--NEWS.md6
-rw-r--r--R/boot.R147
-rw-r--r--R/gdistremoval.R26
-rw-r--r--R/power.R35
-rw-r--r--R/predict.R21
-rw-r--r--R/ranef.R2
-rw-r--r--R/simulate.R109
-rw-r--r--R/unmarkedFit.R40
-rw-r--r--R/unmarkedFrame.R46
-rw-r--r--data/MesoCarnivores.rdabin0 -> 28948 bytes
-rw-r--r--man/MesoCarnivores.Rd41
-rw-r--r--man/cruz.Rd2
-rw-r--r--man/formatDistData.Rd2
-rw-r--r--tests/testthat/test_distsamp.R7
-rw-r--r--tests/testthat/test_gdistremoval.R5
-rw-r--r--tests/testthat/test_multinomPois.R6
-rw-r--r--tests/testthat/test_occu.R3
-rw-r--r--tests/testthat/test_occuMS.R20
-rw-r--r--tests/testthat/test_parboot.R44
-rw-r--r--tests/testthat/test_powerAnalysis.R11
-rw-r--r--tests/testthat/test_predict.R42
-rw-r--r--tests/testthat/test_simulate.R11
-rw-r--r--vignettes/cap-recap.Rmd (renamed from vignettes/cap-recap.Rnw)565
-rw-r--r--vignettes/colext-cov.pdfbin6443 -> 0 bytes
-rw-r--r--vignettes/colext-data-1.pngbin0 -> 15926 bytes
-rw-r--r--vignettes/colext-est-1.pngbin0 -> 20660 bytes
-rw-r--r--vignettes/colext-gof-1.pngbin0 -> 3713 bytes
-rw-r--r--vignettes/colext-gof.pdfbin4378 -> 0 bytes
-rw-r--r--vignettes/colext-pred-1.pngbin0 -> 6248 bytes
-rw-r--r--vignettes/colext-sim.pdfbin5084 -> 0 bytes
-rw-r--r--vignettes/colext-yearlysim.pdfbin6829 -> 0 bytes
-rw-r--r--vignettes/colext.Rmd (renamed from vignettes/colext.Rnw)1220
-rw-r--r--vignettes/colext.Rmd.orig873
-rw-r--r--vignettes/distsamp.Rmd (renamed from vignettes/distsamp.Rnw)317
-rw-r--r--vignettes/ecology.bst1460
-rw-r--r--vignettes/ecology.csl188
-rw-r--r--vignettes/occuMulti.Rmd (renamed from vignettes/occuMulti.Rnw)425
-rw-r--r--vignettes/powerAnalysis.Rmd321
-rw-r--r--vignettes/powerAnalysis.Rnw365
-rw-r--r--vignettes/random-effects.Rnw233
-rw-r--r--vignettes/simulate.Rmd274
-rw-r--r--vignettes/simulate.Rnw318
-rw-r--r--vignettes/spp-dist-psi2.pdfbin82492 -> 0 bytes
-rw-r--r--vignettes/spp-dist.Rmd (renamed from vignettes/spp-dist.Rnw)335
-rw-r--r--vignettes/unmarked.Rmd (renamed from vignettes/unmarked.Rnw)245
-rw-r--r--vignettes/unmarked.bib106
49 files changed, 3473 insertions, 4411 deletions
diff --git a/.Rbuildignore b/.Rbuildignore
index b1ec806..70e8958 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -11,3 +11,4 @@ README.Rmd
^tests/testthat/_snaps$
^\.github$
^_pkgdown\.yml$
+^vignettes/colext.Rmd.orig
diff --git a/DESCRIPTION b/DESCRIPTION
index c817c51..bccde5e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: unmarked
-Version: 1.2.3.9001
-Date: 2022-05-04
+Version: 1.2.5.9007
+Date: 2022-10-12
Type: Package
Title: Models for Data from Unmarked Animals
Authors@R: c(
@@ -18,22 +18,20 @@ Authors@R: c(
person("Ariel", "Muldoon", role="ctb"),
person("Chris", "Baker", role="ctb")
)
-Depends: R (>= 2.12.0)
+Depends: R (>= 2.12.0), methods
Imports:
graphics,
lattice,
lme4,
MASS,
Matrix,
- methods,
parallel,
pbapply,
- plyr,
Rcpp (>= 0.8.0),
stats,
TMB (>= 1.7.18),
utils
-Suggests: AHMbook, pkgdown, raster, shiny, testthat
+Suggests: knitr, rmarkdown, pkgdown, raster, shiny, testthat
Description: Fits hierarchical models of animal abundance and occurrence to data collected using survey methods such as point counts, site occupancy sampling, distance sampling, removal sampling, and double observer sampling. Parameters governing the state and observation processes can be modeled as functions of covariates. Reference: Fiske and Chandler (2011) <doi:10.18637/jss.v043.i10>.
License: GPL (>=3)
LazyLoad: yes
@@ -64,3 +62,4 @@ URL: https://groups.google.com/d/forum/unmarked,
https://github.com/ianfiske/unmarked,
https://github.com/rbchan/unmarked
BugReports: https://github.com/rbchan/unmarked/issues
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index e355ecc..448be4f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,7 +9,6 @@ importFrom(stats, confint, fitted, coef, vcov, predict, update, profile,
update.formula, sigma)
importFrom(graphics, plot, hist, abline)
importFrom(utils, head, read.csv)
-importFrom(plyr, ldply, alply, ddply)
importFrom(grDevices, devAskNewPage, dev.interactive, palette.colors)
importFrom(MASS, mvrnorm)
importFrom(parallel, detectCores, makeCluster, stopCluster, clusterExport,
diff --git a/NEWS.md b/NEWS.md
index 6f393d2..314f645 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,9 @@
+# unmarked 1.2.4
+
+* Convert vignettes to use rmarkdown
+* Handle suggested packages in vignettes
+* Remove occuMulti vignette due to AHMbook being temporarily off CRAN
+
# unmarked 1.2.3
* Add gdistremoval function to fit distance/removal models, see Amundson et al. 2014
diff --git a/R/boot.R b/R/boot.R
index 2783625..2f8cffc 100644
--- a/R/boot.R
+++ b/R/boot.R
@@ -30,81 +30,66 @@ setMethod("replaceY", "unmarkedFrameOccuMulti",
object
})
-setMethod("parboot", "unmarkedFit",
- function(object, statistic=SSE, nsim=10, report, seed = NULL, parallel = TRUE, ncores, ...)
-{
- dots <- list(...)
- statistic <- match.fun(statistic)
- call <- match.call(call = sys.call(-1))
- formula <- object@formula
- umf <- getData(object)
- y <- getY(object)
- ests <- as.numeric(coef(object))
- starts <- ests
- if(methods::.hasSlot(object, "TMB") && !is.null(object@TMB)) starts <- NULL
- t0 <- statistic(object, ...)
- lt0 <- length(t0)
- t.star <- matrix(NA, nsim, lt0)
- if(!missing(report))
- cat("t0 =", t0, "\n")
- simdata <- umf
- if (!is.null(seed)) set.seed(seed)
- simList <- simulate(object, nsim = nsim, na.rm = FALSE)
- availcores <- detectCores()
- if(missing(ncores)) ncores <- availcores - 1
- if(ncores > availcores) ncores <- availcores
-
- no_par <- ncores < 2 || nsim < 100 || !parallel
-
- if (no_par) {
- if (!missing(report)) {
- for(i in 1:nsim) {
- simdata <- replaceY(simdata, simList[[i]])
- fit <- update(object, data=simdata, starts=starts, se=FALSE)
- t.star[i,] <- statistic(fit, ...)
- if(!missing(report)) {
- if (nsim > report && i %in% seq(report, nsim, by=report))
- cat("iter", i, ": ", t.star[i, ], "\n")
- }
- }
- } else {
- t.star <- pbsapply(1:nsim, function(i) {
- simdata <- replaceY(simdata, simList[[i]])
- fit <- update(object, data=simdata, starts=starts, se=FALSE)
- t.star.tmp <- statistic(fit, ...)
- })
- if (lt0 > 1)
- t.star <- t(t.star)
- else
- t.star <- matrix(t.star, ncol = lt0)
- }
- } else {
- message("Running parametric bootstrap in parallel on ", ncores, " cores.")
- if (!missing(report)) message("Bootstrapped statistics not reported during parallel processing.")
- cl <- makeCluster(ncores)
- if (!is.null(seed)) parallel::clusterSetRNGStream(cl, iseed = seed)
- on.exit(stopCluster(cl))
- varList <- c("simList", "y", "object", "simdata", "starts", "statistic", "dots")
- # If call formula is an object, include it too
- fm.nms <- all.names(object@call)
- if (!any(grepl("~", fm.nms))) varList <- c(varList, fm.nms[2])
- ## Hack to get piFun for unmarkedFitGMM and unmarkedFitMPois
- if(.hasSlot(umf, "piFun")) varList <- c(varList, umf@piFun)
- clusterExport(cl, varList, envir = environment())
- clusterEvalQ(cl, library(unmarked))
- clusterEvalQ(cl, list2env(dots))
- t.star.parallel <- pblapply(1:nsim, function(i) {
- simdata <- replaceY(simdata, simList[[i]])
- fit <- update(object, data = simdata, starts = starts, se = FALSE)
- t.star <- statistic(fit, ...)
- }, cl = cl)
- t.star <- matrix(unlist(t.star.parallel), nrow = length(t.star.parallel), byrow = TRUE)
- }
- if (!is.null(names(t0)))
- colnames(t.star) <- names(t0)
- else colnames(t.star) <- paste("t*", 1:lt0, sep="")
- out <- new("parboot", call = call, t0 = t0, t.star = t.star)
- return(out)
+
+setMethod("parboot", "unmarkedFit", function(object, statistic=SSE, nsim=10,
+ report, seed = NULL, parallel = FALSE, ncores, ...){
+
+ if(!missing(report)){
+ warning("report argument is non-functional and will be deprecated in the next version", call.=FALSE)
+ }
+
+ dots <- list(...)
+ call <- match.call(call = sys.call(-1))
+ stopifnot(is.function(statistic))
+ starts <- as.numeric(coef(object))
+ # Get rid of starting values if model was fit with TMB
+ if(methods::.hasSlot(object, "TMB") && !is.null(object@TMB)) starts <- NULL
+
+ t0 <- statistic(object, ...)
+
+ simList <- simulate(object, nsim = nsim, na.rm = FALSE)
+
+ availcores <- parallel::detectCores() - 1
+ if(missing(ncores) || ncores > availcores) ncores <- availcores
+
+ cl <- NULL
+ if(parallel){
+ cl <- parallel::makeCluster(ncores)
+ on.exit(parallel::stopCluster(cl))
+ parallel::clusterEvalQ(cl, library(unmarked))
+ env_vars <- c("dots", "replaceY")
+ fm.nms <- all.names(object@call)
+ if (!any(grepl("~", fm.nms))) env_vars <- c(env_vars, fm.nms[2])
+ if(.hasSlot(object@data, "piFun")) env_vars <- c(env_vars, object@data@piFun)
+ parallel::clusterExport(cl, env_vars, envir = environment())
+ parallel::clusterEvalQ(cl, list2env(dots))
+ }
+
+ run_sim <- function(x, object, statistic, starts, t0, ...){
+ simdata <- replaceY(object@data, x)
+ tryCatch({
+ #if(runif(1,0,1) < 0.5) stop("fail") # for testing error trapping
+ fit <- update(object, data=simdata, starts=starts, se=FALSE)
+ statistic(fit, ...)
+ }, error=function(e){
+ t0[] <- NA
+ t0
+ })
+ }
+
+ t.star <- t(pbapply::pbsapply(simList, run_sim, object=object,
+ statistic=statistic, starts=starts, t0=t0,
+ cl=cl, ...))
+ if(length(t0) == 1) t.star <- matrix(t.star, ncol=1)
+
+ failed <- apply(t.star, 1, function(x) any(is.na(x)))
+ if(sum(failed) > 0){
+ warning(paste0("Model fitting failed in ",sum(failed), " sims."), call.=FALSE)
+ t.star <- t.star[!failed,,drop=FALSE]
+ }
+
+ new("parboot", call = call, t0 = t0, t.star = t.star)
+
})
@@ -194,9 +179,7 @@ setMethod("nonparboot", "unmarkedFit",
data.b <- data[sites,]
y <- getY(data.b)
if (bsType == "both") {
- obs.per.site <- alply(y, 1, function(row) {
- which(!is.na(row))
- })
+ obs.per.site <- lapply(1:nrow(y), function(i) which(!is.na(y[i,])))
obs <- lapply(obs.per.site,
function(obs) sample(obs, replace = TRUE))
data.b <- data.b[obs]
@@ -382,9 +365,7 @@ setMethod("nonparboot", "unmarkedFitOccuPEN",
data.b <- data[sites,]
y <- getY(data.b)
if (bsType == "both") {
- obs.per.site <- alply(y, 1, function(row) {
- which(!is.na(row))
- })
+ obs.per.site <- lapply(1:nrow(y), function(i) which(!is.na(y[i,])))
obs <- lapply(obs.per.site,
function(obs) sample(obs, replace = TRUE))
data.b <- data.b[obs]
@@ -440,9 +421,7 @@ setMethod("nonparboot", "unmarkedFitOccuPEN_CV",
data.b <- data[sites,]
y <- getY(data.b)
if (bsType == "both") {
- obs.per.site <- alply(y, 1, function(row) {
- which(!is.na(row))
- })
+ obs.per.site <- lapply(1:nrow(y), function(i) which(!is.na(y[i,])))
obs <- lapply(obs.per.site,
function(obs) sample(obs, replace = TRUE))
data.b <- data.b[obs]
diff --git a/R/gdistremoval.R b/R/gdistremoval.R
index bddac3e..dc91934 100644
--- a/R/gdistremoval.R
+++ b/R/gdistremoval.R
@@ -517,9 +517,10 @@ setMethod("fitted", "unmarkedFitGDR", function(object){
T <- object@data@numPrimary
# Adjust log lambda when there is a random intercept
- loglam <- log(predict(object, "lambda", level=NULL)$Predicted)
- loglam <- E_loglam(loglam, object, "lambda")
- lam <- exp(loglam)
+ #loglam <- log(predict(object, "lambda", level=NULL)$Predicted)
+ #loglam <- E_loglam(loglam, object, "lambda")
+ #lam <- exp(loglam)
+ lam <- predict(object, "lambda", level=NULL)$Predicted
if(object@output == "density"){
ua <- getUA(object@data)
A <- rowSums(ua$a)
@@ -587,9 +588,10 @@ setMethod("ranef", "unmarkedFitGDR", function(object){
Kmin = apply(ysum, 1, max, na.rm=T)
- loglam <- log(predict(object, "lambda", level=NULL)$Predicted)
- loglam <- E_loglam(loglam, object, "lambda")
- lam <- exp(loglam)
+ #loglam <- log(predict(object, "lambda", level=NULL)$Predicted)
+ #loglam <- E_loglam(loglam, object, "lambda")
+ #lam <- exp(loglam)
+ lam <- predict(object, "lambda", level=NULL)$Predicted
if(object@output == "density"){
ua <- getUA(object@data)
A <- rowSums(ua$a)
@@ -644,9 +646,10 @@ setMethod("ranef", "unmarkedFitGDR", function(object){
setMethod("simulate", "unmarkedFitGDR", function(object, nsim, seed=NULL, na.rm=FALSE){
# Adjust log lambda when there is a random intercept
- loglam <- log(predict(object, "lambda", level=NULL)$Predicted)
- loglam <- E_loglam(loglam, object, "lambda")
- lam <- exp(loglam)
+ #loglam <- log(predict(object, "lambda", level=NULL)$Predicted)
+ #loglam <- E_loglam(loglam, object, "lambda")
+ #lam <- exp(loglam)
+ lam <- predict(object, "lambda", level=NULL)$Predicted
if(object@output == "density"){
ua <- getUA(object@data)
A <- rowSums(ua$a)
@@ -812,3 +815,8 @@ setMethod("plot", c(x = "unmarkedFitGDR", y = "missing"), function(x, y, ...)
main="Removal")
abline(h = 0, lty = 3, col = "gray")
})
+
+# Used with fitList
+setMethod("fl_getY", "unmarkedFitGDR", function(fit, ...){
+ getDesign(getData(fit), fit@formlist)$yDist
+})
diff --git a/R/power.R b/R/power.R
index e56f8a0..b998a1f 100644
--- a/R/power.R
+++ b/R/power.R
@@ -13,6 +13,7 @@ powerAnalysis <- function(object, coefs=NULL, design=NULL, alpha=0.05, nulls=lis
submodels <- names(object@estimates@estimates)
coefs <- check_coefs(coefs, object)
+ coefs <- generate_random_effects(coefs, object)
fit_temp <- replace_estimates(object, coefs)
T <- 1
@@ -154,12 +155,36 @@ check_coefs <- function(coefs, fit, template=FALSE){
required_subs <- names(fit@estimates@estimates)
required_coefs <- lapply(fit@estimates@estimates, function(x) names(x@estimates))
required_lens <- lapply(required_coefs, length)
+
+ formulas <- sapply(names(fit), function(x) get_formula(fit, x))
+
+ # If there are random effects, adjust the expected coefficient names
+ # to remove the b vector and add the grouping covariate name
+ rand <- lapply(formulas, lme4::findbars)
+ if(!all(sapply(rand, is.null))){
+ stopifnot(all(required_subs %in% names(formulas)))
+ rvar <- lapply(rand, function(x) unlist(lapply(x, all.vars)))
+ if(!all(sapply(rvar, length)<2)){
+ stop("Only 1 random effect per parameter is supported", call.=FALSE)
+ }
+ for (i in required_subs){
+ if(!is.null(rand[[i]][[1]])){
+ signame <- rvar[[i]]
+ old_coefs <- required_coefs[[i]]
+ new_coefs <- old_coefs[!grepl("b_", old_coefs, fixed=TRUE)]
+ new_coefs <- c(new_coefs, signame)
+ required_coefs[[i]] <- new_coefs
+ }
+ }
+ }
+
dummy_coefs <- lapply(required_coefs, function(x){
out <- rep(0, length(x))
x <- gsub("(Intercept)", "intercept", x, fixed=TRUE)
names(out) <- x
out
})
+
if(template) return(dummy_coefs)
if(is.null(coefs)){
@@ -225,6 +250,7 @@ check_coefs <- function(coefs, fit, template=FALSE){
}
coefs[required_subs]
}
+
wald <- function(est, se, null_hyp=NULL){
if(is.null(null_hyp) || is.na(null_hyp)) null_hyp <- 0
Z <- (est-null_hyp)/se
@@ -250,13 +276,15 @@ setMethod("summary", "unmarkedPower", function(object, ...){
x
})
+ coefs_no_rand <- unlist(object@coefs)[!grepl("b_", names(unlist(object@coefs)))]
+
pow <- sapply(1:npar, function(ind){
submod <- sum_dfs[[1]]$submodel[ind]
param <- sum_dfs[[1]]$param[ind]
ni <- nulls[[submod]][param]
pcrit <- sapply(sum_dfs, function(x) wald(x$Estimate[ind], x$SE[ind], ni)) < object@alpha
- direct <- sapply(sum_dfs, function(x) diff_dir(x$Estimate[ind], unlist(object@coefs)[ind], ni))
+ direct <- sapply(sum_dfs, function(x) diff_dir(x$Estimate[ind], coefs_no_rand[ind], ni))
mean(pcrit & direct, na.rm=T)
})
@@ -268,11 +296,14 @@ setMethod("summary", "unmarkedPower", function(object, ...){
ni
})
- out <- cbind(sum_dfs[[1]][,1:2], effect=unlist(object@coefs), null=all_nulls, power=pow)
+ effect_no_random <- unlist(object@coefs)[!grepl("b_",names(unlist(object@coefs)))]
+
+ out <- cbind(sum_dfs[[1]][,1:2], effect=effect_no_random, null=all_nulls, power=pow)
rownames(out) <- NULL
names(out) <- c("Submodel", "Parameter", "Effect", "Null", "Power")
out
})
+
setMethod("show", "unmarkedPower", function(object){
cat("\nModel:\n")
print(object@call)
diff --git a/R/predict.R b/R/predict.R
index cd14637..68efbb7 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -132,9 +132,12 @@ setGeneric("get_formula", function(object, type, ...){
})
setMethod("get_formula", "unmarkedFit", function(object, type, ...){
- if(type == "state") return(as.formula(paste("~", object@formula[3], sep="")))
- if(type == "det") return(as.formula(object@formula[[2]]))
- stop("Invalid type")
+ if(type == "state"){
+ return(as.formula(paste("~", object@formula[3], sep="")))
+ } else if(type == "det"){
+ return(as.formula(object@formula[[2]]))
+ }
+ NULL
})
# When newdata is data.frame/raster, get original dataset
@@ -574,6 +577,12 @@ setMethod("get_orig_data", "unmarkedFitGDR", function(object, type, ...){
# bespoke predict method since it has numerious unusual options
# and requires bootstrapping
+# This method is used by simulate but not by predict
+setMethod("get_formula", "unmarkedFitOccuMulti", function(object, type, ...){
+ switch(type, state=object@stateformulas,
+ det=object@detformulas)
+})
+
setMethod("predict", "unmarkedFitOccuMulti",
function(object, type, newdata,
#backTransform = TRUE, na.rm = TRUE,
@@ -748,6 +757,12 @@ setMethod("predict", "unmarkedFitOccuMulti",
# bespoke predict method since it requires bootstrapping
+# This method is used by simulate by not by predict
+setMethod("get_formula", "unmarkedFitOccuMS", function(object, type, ...){
+ switch(type, psi=object@psiformulas, phi=object@phiformulas,
+ det=object@detformulas)
+})
+
setMethod("predict", "unmarkedFitOccuMS",
function(object, type, newdata,
#backTransform = TRUE, na.rm = TRUE,
diff --git a/R/ranef.R b/R/ranef.R
index 5547214..9dca184 100644
--- a/R/ranef.R
+++ b/R/ranef.R
@@ -121,6 +121,7 @@ setMethod("ranef", "unmarkedFitOccuMS", function(object, ...)
g <- rep(1, S)
p_raw <- sapply(p_all, function(x) x[i,])
for (j in 1:nrow(p_raw)){
+ if(any(is.na(p_raw[j,])) | is.na(y[i,j])) next
sdp <- matrix(0, nrow=S, ncol=S)
sdp[guide] <- p_raw[j,]
sdp[,1] <- 1 - rowSums(sdp)
@@ -142,6 +143,7 @@ setMethod("ranef", "unmarkedFitOccuMS", function(object, ...)
p_raw <- sapply(p_all, function(x) x[i,])
for (j in 1:nrow(p_raw)){
probs <- p_raw[j,]
+ if(any(is.na(probs)) | is.na(y[i,j])) next
sdp <- matrix(0, nrow=S, ncol=S)
sdp[1,1] <- 1
sdp[2,1:2] <- c(1-probs[1], probs[1])
diff --git a/R/simulate.R b/R/simulate.R
index 3503ba5..a8887cb 100644
--- a/R/simulate.R
+++ b/R/simulate.R
@@ -59,6 +59,89 @@ blank_umFit <- function(fit_function){
}
+setMethod("simulate", "character",
+ function(object, nsim=1, seed=NULL, formulas, coefs=NULL, design, guide=NULL, ...){
+ model <- blank_umFit(object)
+ fit <- suppressWarnings(simulate_fit(model, formulas, guide, design, ...))
+ coefs <- check_coefs(coefs, fit)
+ #fit <- replace_sigma(coefs, fit)
+ coefs <- generate_random_effects(coefs, fit)
+ fit <- replace_estimates(fit, coefs)
+ ysims <- suppressWarnings(simulate(fit, nsim))
+ umf <- fit@data
+ # fix this
+ umfs <- lapply(ysims, function(x){
+ if(object=="occuMulti"){
+ umf@ylist <- x
+ } else if(object=="gdistremoval"){
+ umf@yDistance=x$yDistance
+ umf@yRemoval=x$yRemoval
+ } else {
+ umf@y <- x
+ }
+ umf
+ })
+ if(length(umfs)==1) umfs <- umfs[[1]]
+ umfs
+})
+
+# Insert specified random effects SD into proper S4 slot in model object
+# This is mostly needed by GDR which uses the SD to calculate
+# N with E_loglam (this is currently disabled so the function is not needed)
+#replace_sigma <- function(coefs, fit){
+# required_subs <- names(fit@estimates@estimates)
+# formulas <- sapply(names(fit), function(x) get_formula(fit, x))
+# rand <- lapply(formulas, lme4::findbars)
+# if(!all(sapply(rand, is.null))){
+# rvar <- lapply(rand, function(x) unlist(lapply(x, all.vars)))
+# for (i in required_subs){
+# if(!is.null(rand[[i]][[1]])){
+# signame <- rvar[[i]]
+# old_coefs <- coefs[[i]]
+# fit@estimates@estimates[[i]]@randomVarInfo$estimates <- coefs[[i]][[signame]]
+# }
+# }
+# }
+# fit
+#}
+
+generate_random_effects <- function(coefs, fit){
+ required_subs <- names(fit@estimates@estimates)
+ formulas <- sapply(names(fit), function(x) get_formula(fit, x))
+ rand <- lapply(formulas, lme4::findbars)
+ if(!all(sapply(rand, is.null))){
+ rvar <- lapply(rand, function(x) unlist(lapply(x, all.vars)))
+ for (i in required_subs){
+ if(!is.null(rand[[i]][[1]])){
+ signame <- rvar[[i]]
+ old_coefs <- coefs[[i]]
+ new_coefs <- old_coefs[names(old_coefs)!=signame]
+
+ # Find levels of factor variable
+ if(signame %in% names(siteCovs(fit@data))){
+ lvldata <- siteCovs(fit@data)[[signame]]
+ } else if(signame %in% names(obsCovs(fit@data))){
+ lvldata <- obsCovs(fit@data)[[signame]]
+ } else if(methods::.hasSlot(fit@data, "yearlySiteCovs") && signame %in% names(yearlySiteCovs(fit@data))){
+ lvldata <- yearlySiteCovs(fit@data)[[signame]]
+ } else {
+ stop("Random effect covariate missing from data", call.=FALSE)
+ }
+
+ if(!is.factor(lvldata)){
+ stop("Random effect covariates must be specified as factors with guide argument", call.=FALSE)
+ }
+ b <- stats::rnorm(length(levels(lvldata)), 0, old_coefs[signame])
+ names(b) <- rep(paste0("b_",i), length(b))
+ new_coefs <- c(new_coefs, b)
+ coefs[[i]] <- new_coefs
+ }
+ }
+ }
+ coefs
+}
+
+
setGeneric("get_umf_components", function(object, ...) standardGeneric("get_umf_components"))
setMethod("get_umf_components", "unmarkedFit",
@@ -103,30 +186,6 @@ setMethod("simulate_fit", "unmarkedFitOccuRN",
})
-setMethod("simulate", "character",
- function(object, nsim=1, seed=NULL, formulas, coefs=NULL, design, guide=NULL, ...){
- model <- blank_umFit(object)
- fit <- suppressWarnings(simulate_fit(model, formulas, guide, design, ...))
- coefs <- check_coefs(coefs, fit)
- fit <- replace_estimates(fit, coefs)
- ysims <- simulate(fit, nsim)
- umf <- fit@data
- # fix this
- umfs <- lapply(ysims, function(x){
- if(object=="occuMulti"){
- umf@ylist <- x
- } else if(object=="gdistremoval"){
- umf@yDistance=x$yDistance
- umf@yRemoval=x$yRemoval
- } else {
- umf@y <- x
- }
- umf
- })
- if(length(umfs)==1) umfs <- umfs[[1]]
- umfs
-})
-
setMethod("get_umf_components", "unmarkedFitMPois",
function(object, formulas, guide, design, ...){
args <- list(...)
@@ -495,5 +554,5 @@ setMethod("simulate_fit", "unmarkedFitGDR",
gdistremoval(lambdaformula=formulas$lambda, phiformula=formulas$phi,
removalformula=formulas$rem, distanceformula=formulas$dist,
data=umf, keyfun=keyfun, output=output, unitsOut=unitsOut,
- mixture=mixture, K=K, se=FALSE, control=list(maxit=1))
+ mixture=mixture, K=K, se=FALSE, control=list(maxit=1), method='L-BFGS-B')
})
diff --git a/R/unmarkedFit.R b/R/unmarkedFit.R
index 0a8d107..a672c6f 100644
--- a/R/unmarkedFit.R
+++ b/R/unmarkedFit.R
@@ -1631,13 +1631,13 @@ setMethod("getP", "unmarkedFitDS",
umf <- object@data
designMats <- getDesign(umf, formula, na.rm = na.rm)
y <- designMats$y
- V <- designMats$V
+ V <- cbind(designMats$V, designMats$Z_det)
V.offset <- designMats$V.offset
if (is.null(V.offset))
V.offset <- rep(0, nrow(V))
M <- nrow(y)
J <- ncol(y)
- ppars <- coef(object, type = "det")
+ ppars <- coef(object, type = "det", fixedOnly=FALSE)
db <- umf@dist.breaks
w <- diff(db)
survey <- umf@survey
@@ -1902,13 +1902,13 @@ setMethod("getP", "unmarkedFitMPois", function(object, na.rm = TRUE)
umf <- object@data
designMats <- getDesign(umf, formula, na.rm = na.rm)
y <- designMats$y
- V <- designMats$V
+ V <- as.matrix(cbind(designMats$V, designMats$Z_det))
V.offset <- designMats$V.offset
if (is.null(V.offset))
V.offset <- rep(0, nrow(V))
M <- nrow(y)
J <- obsNum(umf) #ncol(y)
- ppars <- coef(object, type = "det")
+ ppars <- coef(object, type = "det", fixedOnly=FALSE)
p <- plogis(V %*% ppars + V.offset)
p <- matrix(p, M, J, byrow = TRUE)
pi <- do.call(piFun, list(p = p))
@@ -2063,13 +2063,13 @@ setMethod("simulate", "unmarkedFitDS",
w <- diff(db)
designMats <- getDesign(umf, formula, na.rm = na.rm)
y <- designMats$y
- X <- designMats$X
+ X <- as.matrix(cbind(designMats$X, designMats$Z_state))
X.offset <- designMats$X.offset
if (is.null(X.offset))
X.offset <- rep(0, nrow(X))
M <- nrow(y)
J <- ncol(y)
- lamParms <- coef(object, type = "state")
+ lamParms <- coef(object, type = "state", fixedOnly=FALSE)
lambda <- drop(exp(X %*% lamParms + X.offset))
if(identical(object@output, "density")) {
switch(umf@survey,
@@ -2342,14 +2342,14 @@ setMethod("simulate", "unmarkedFitMPois",
umf <- object@data
designMats <- getDesign(umf, formula, na.rm = na.rm)
y <- designMats$y
- X <- designMats$X
+ X <- as.matrix(cbind(designMats$X, designMats$Z_state))
X.offset <- designMats$X.offset
if (is.null(X.offset)) {
X.offset <- rep(0, nrow(X))
}
M <- nrow(y)
J <- ncol(y)
- lamParms <- coef(object, type = "state")
+ lamParms <- coef(object, type = "state", fixedOnly=FALSE)
lam <- as.numeric(exp(X %*% lamParms + X.offset))
lamvec <- rep(lam, each = J)
pivec <- as.vector(t(getP(object, na.rm = na.rm)))
@@ -2552,14 +2552,15 @@ setMethod("simulate", "unmarkedFitOccuMS",
for (n in 1:N){
yindex <- 1
for (t in 1:T){
- if (z[n,t] == 0) {
- yindex <- yindex + J
- next
- }
for (j in 1:J){
-
if(prm == "multinomial"){
probs_raw <- sapply(p, function(x) x[n,yindex])
+ # Make sure output is NA if probs have NA
+ if(any(is.na(probs_raw))){
+ y[n,yindex] <- NA
+ yindex <- yindex + 1
+ next
+ }
sdp <- matrix(0, nrow=S, ncol=S)
sdp[guide] <- probs_raw
@@ -2571,13 +2572,22 @@ setMethod("simulate", "unmarkedFitOccuMS",
p11 <- p[[1]][n,yindex]
p12 <- p[[2]][n,yindex]
p22 <- p[[3]][n,yindex]
+ # Trap NAs in probability of detection
+ if(any(is.na(c(p11, p12, p22)))){
+ y[n,yindex] <- NA
+ next
+ }
probs <- switch(z[n,t]+1,
c(1,0,0),
c(1-p11,p11,0),
c(1-p12,p12*(1-p22),p12*p22))
}
-
- y[n,yindex] <- sample(0:(S-1), 1, prob=probs)
+ # this NA trap probably isn't necessary but leaving it in just in case
+ if(all(!is.na(probs))){
+ y[n,yindex] <- sample(0:(S-1), 1, prob=probs)
+ } else {
+ y[n,yindex] <- NA
+ }
yindex <- yindex + 1
}
}
diff --git a/R/unmarkedFrame.R b/R/unmarkedFrame.R
index 6d4c7fb..b6a9273 100644
--- a/R/unmarkedFrame.R
+++ b/R/unmarkedFrame.R
@@ -1133,9 +1133,10 @@ setMethod("[", c("unmarkedFrame", "numeric", "missing", "missing"),
if (!is.null(obsCovs)) {
R <- obsNum(x)
.site <- rep(1:M, each = R)
- obsCovs <- ldply(i, function(site) {
- subset(obsCovs, .site == site)
- })
+ oc <- lapply(i, function(ind){
+ obsCovs[.site==ind,,drop=FALSE]
+ })
+ obsCovs <- do.call(rbind, oc)
}
umf <- x
umf@y <- y
@@ -1191,17 +1192,20 @@ setMethod("[", c("unmarkedFrame","list", "missing", "missing"),
if (m != length(i)) stop("list length must be same as number of sites.")
siteCovs <- siteCovs(x)
y <- cbind(.site=1:m, getY(x))
- obsCovs <- as.data.frame(cbind(.site=rep(1:m, each=R), obsCovs(x)))
-
- obsCovs <- ddply(obsCovs, ~.site, function(df) {
- site <- df$.site[1]
- obs <- i[[site]]
- if (length(obs) > R)
- stop("All elements of list must be less than or equal to R.")
- obs <- c(obs, rep(NA, R-length(obs)))
- df[obs,]
- })
- obsCovs$.site <- NULL
+ obsCovs <- obsCovs(x)
+ site_idx <- rep(1:m, each=R)
+ stopifnot(length(site_idx) == nrow(obsCovs))
+
+ oc <- lapply(1:m, function(ind){
+ df <- obsCovs[site_idx==ind,,drop=FALSE]
+ obs <- i[[ind]]
+ if (length(obs) > R)
+ stop("All elements of list must be less than or equal to R.")
+ obs <- c(obs, rep(NA, R-length(obs)))
+ df[obs,,drop=FALSE]
+ })
+ obsCovs <- do.call(rbind, oc)
+ rownames(obsCovs) <- NULL
y <- apply(y, 1, function(row) {
site <- row[1]
@@ -1235,9 +1239,10 @@ setMethod("[", c("unmarkedFrameOccuMulti", "numeric", "missing", "missing"),
if (!is.null(obsCovs)) {
R <- obsNum(x)
.site <- rep(1:M, each = R)
- obsCovs <- ldply(i, function(site) {
- subset(obsCovs, .site == site)
- })
+ oc <- lapply(i, function(ind){
+ obsCovs[.site==ind,,drop=FALSE]
+ })
+ obsCovs <- do.call(rbind, oc)
}
umf <- x
umf@y <- ylist[[1]]
@@ -1310,9 +1315,10 @@ setMethod("[", c("unmarkedMultFrame", "numeric", "missing", "missing"),
if (!is.null(obsCovs)) {
R <- obsNum(x)
.site <- rep(1:M, each = obsNum(x)) #NULL ## testing
- obsCovs <- ldply(i, function(site) {
- subset(obsCovs, .site == site)
- })
+ oc <- lapply(i, function(ind){
+ obsCovs[.site==ind,,drop=FALSE]
+ })
+ obsCovs <- do.call(rbind, oc)
}
u <- unmarkedMultFrame(y=matrix(y, ncol=ncol(oldy)),
siteCovs=siteCovs,
diff --git a/data/MesoCarnivores.rda b/data/MesoCarnivores.rda
new file mode 100644
index 0000000..ba91386
--- /dev/null
+++ b/data/MesoCarnivores.rda
Binary files differ
diff --git a/man/MesoCarnivores.Rd b/man/MesoCarnivores.Rd
new file mode 100644
index 0000000..c51e06f
--- /dev/null
+++ b/man/MesoCarnivores.Rd
@@ -0,0 +1,41 @@
+\name{MesoCarnivores}
+\alias{MesoCarnivores}
+\docType{data}
+\title{
+ Occupancy data for coyote, red fox, and bobcat
+}
+\description{
+ Occupancy data and site covariates for coyote, red fox, and bobcat from 1437 camera trap sites sampled 3 times. Each sampling period represents one week. This data is a simplified form of the dataset used by Rota et al. (2016).
+}
+
+\usage{data(MesoCarnivores)}
+
+\format{
+ A list with four elements:
+ \describe{
+ \item{\code{bobcat}}{A 1437x3 occupancy matrix for bobcat}
+ \item{\code{coyote}}{A 1437x3 occupancy matrix for coyote}
+ \item{\code{redfox}}{A 1437x3 occupancy matrix for red fox}
+ \item{\code{sitecovs}}{A data frame containing covariates for the 1437 sites, with the following columns:
+ \itemize{
+ \item{\code{Dist_5km}{Proportion of disturbed land in 5 km radius}}
+ \item{\code{HDens_5km}{Housing density in 5 km radius}}
+ \item{\code{Latitude}{Latitude / 100}}
+ \item{\code{Longitude}{Longitude / 100}}
+ \item{\code{People_site}{Number of photos of people at site / 1000}}
+ \item{\code{Trail}{1 if camera was on trail, 0 if not}}
+ }
+ }
+ }
+}
+
+\source{
+ Used with permission of Roland Kays and Arielle Parsons at North Carolina State University and the North Carolina Museum of Natural Sciences.
+}
+
+\references{
+Rota, C.T., et al. 2016. A multi-species occupancy model for two or more
+ interacting species. Methods in Ecology and Evolution 7: 1164-1173.
+}
+
+\keyword{datasets}
diff --git a/man/cruz.Rd b/man/cruz.Rd
index c749a97..e2a73a7 100644
--- a/man/cruz.Rd
+++ b/man/cruz.Rd
@@ -46,6 +46,7 @@ Sillett, S. and Chandler, R.B. and Royle, J.A. and Kery, M. and
endemic. \emph{Ecological Applications}
}
\examples{
+\dontrun{
library(lattice)
data(cruz)
str(cruz)
@@ -59,6 +60,7 @@ elev <- rasterFromXYZ(cruz[,1:3],
elev
plot(elev)
}
+}
diff --git a/man/formatDistData.Rd b/man/formatDistData.Rd
index e4b1b66..f5aaa83 100644
--- a/man/formatDistData.Rd
+++ b/man/formatDistData.Rd
@@ -19,7 +19,7 @@ transect names.}
than once, this can be used to format data for \code{gdistsamp}. It is
the name of the column in distData that contains the occasion
numbers. The occasion column should be a factor.}
-\item{effortMatrix}{optional matrix of 1 and 0s that is M * J in size and will allow for the insertion of NAs where the matrix = 0, indicating that a survey was not completed. When not supplied a matrix of all 1s is created since it is assumed all surveys were completed.}
+\item{effortMatrix}{optional matrix of 1 and 0s that is M * T in size and will allow for the insertion of NAs where the matrix = 0, indicating that a survey was not completed. When not supplied a matrix of all 1s is created since it is assumed all surveys were completed.}
}
\details{This function creates a site (M) by distance interval (J) response
matrix from a data.frame containing the detection distances for each
diff --git a/tests/testthat/test_distsamp.R b/tests/testthat/test_distsamp.R
index 9c42119..9d6fe04 100644
--- a/tests/testthat/test_distsamp.R
+++ b/tests/testthat/test_distsamp.R
@@ -265,6 +265,7 @@ test_that("getP works with distsamp",{
test_that("distsamp works with random effects",{
+ set.seed(123)
data(linetran)
umf <- unmarkedFrameDS(y=as.matrix(linetran[,1:4]), siteCovs=linetran[,6:7],
survey="line", tlength=linetran$Length, unitsIn='m',
@@ -301,4 +302,10 @@ test_that("distsamp works with random effects",{
pr <- lapply(mods, function(x) predict(x, "state"))
expect_true(all(sapply(pr, inherits, "data.frame")))
+ # Make sure simulate accounts for random effects
+ s <- simulate(hn, nsim=30)
+ avg <- apply(sapply(s, function(x) apply(x,1,sum)),1, mean)
+ # average first count and predicted abundance should be highly correlated
+ pr <- predict(hn, 'state')
+ expect_true(cor(avg, pr$Predicted) > 0.7)
})
diff --git a/tests/testthat/test_gdistremoval.R b/tests/testthat/test_gdistremoval.R
index c780643..8f453a5 100644
--- a/tests/testthat/test_gdistremoval.R
+++ b/tests/testthat/test_gdistremoval.R
@@ -338,6 +338,11 @@ test_that("gdistremoval can fit models",{
pb <- parboot(fit, nsim=2)
expect_is(pb, "parboot")
+ # Fit list construction
+ fl <- fitList(fits=list(fit1=fit, fit2=fit))
+ expect_is(fl, "unmarkedFitList")
+ ms <- modSel(fl)
+ expect_is(ms, "unmarkedModSel")
})
test_that("gdistremoval predict method works",{
diff --git a/tests/testthat/test_multinomPois.R b/tests/testthat/test_multinomPois.R
index ee51335..6b4b693 100644
--- a/tests/testthat/test_multinomPois.R
+++ b/tests/testthat/test_multinomPois.R
@@ -237,6 +237,12 @@ test_that("multinomPois can fit models with random effects",{
expect_equivalent(dim(pr), c(100, 4))
expect_equivalent(dim(pr2), c(5,4))
+ # Make sure simulate accounts for random effects
+ s <- simulate(fm, nsim=30)
+ avg <- apply(sapply(s, function(x) x[,1]),1, mean)
+ # average first count and predicted abundance should be highly correlated
+ expect_true(cor(avg, pr$Predicted) > 0.7)
+
umf2@y[1,1] <- NA
umf2@y[2,] <- NA
umf2@siteCovs$x1[3] <- NA
diff --git a/tests/testthat/test_occu.R b/tests/testthat/test_occu.R
index 060c3f2..7ea3afe 100644
--- a/tests/testthat/test_occu.R
+++ b/tests/testthat/test_occu.R
@@ -246,8 +246,7 @@ test_that("occu cloglog link function works",{
test_that("occu predict works",{
skip_on_cran()
- if(!require(raster))
- stop("raster package required")
+ skip_if(!require(raster), "raster package unavailable")
set.seed(55)
R <- 20
J <- 4
diff --git a/tests/testthat/test_occuMS.R b/tests/testthat/test_occuMS.R
index 17ca610..aa0d024 100644
--- a/tests/testthat/test_occuMS.R
+++ b/tests/testthat/test_occuMS.R
@@ -213,7 +213,7 @@ test_that("occuMS can fit the multinomial model",{
expect_equivalent(length(sim),3)
expect_true(all(unlist(sim)%in%c(0:2)))
expect_equivalent(mean(fit_C@data@y),0.268)
- expect_equivalent(sapply(sim,mean),c(0.244,0.280,0.288))
+ expect_equivalent(sapply(sim,mean),c(0.232,0.252,0.276))
#check fitted
set.seed(123)
@@ -326,7 +326,7 @@ test_that("occuMS can fit the conditional binomial model",{
expect_equivalent(length(sim),3)
expect_true(all(unlist(sim)%in%c(0:2)))
expect_equivalent(mean(fit_C@data@y),0.2)
- expect_equivalent(sapply(sim,mean),c(0.200,0.156,0.128))
+ expect_equivalent(sapply(sim,mean),c(0.172,0.196,0.184))
})
test_that("occuMS handles NAs properly",{
@@ -361,10 +361,22 @@ test_that("occuMS handles NAs properly",{
yna <- y
yna[1,1] <- NA
+ obs_covs[1,1] <- NA
umf <- unmarkedFrameOccuMS(y=yna,siteCovs=site_covs,obsCovs=obs_covs)
fit <- occuMS(rep('~1',3),rep('~1',2),data=umf,se=F)
expect_equivalent(fit@AIC,53.06711,tol=1e-4)
+ # Check simulate and ranef methods
+ fit <- occuMS(rep('~V1',3),rep('~1',2),data=umf,se=F)
+ s <- simulate(fit, nsim=3)
+ expect_equal(sum(is.na(unlist(s))), 3)
+ r <- ranef(fit)
+ expect_true(!any(is.na(r@post)))
+
+ fit_cb <- occuMS(rep('~V1',3),rep('~1',2),data=umf,se=F, parameterization='condbinom')
+ s <- simulate(fit_cb, nsim=3)
+ expect_equal(sum(is.na(unlist(s))), 3)
+
yna <- y
yna[1,] <- NA
sc_na <- site_covs
@@ -514,7 +526,7 @@ expect_equivalent(length(coef(fit_new)),14)
set.seed(123)
fit_sim <- simulate(fitC,nsim=2)
-expect_equivalent(fit_sim[[1]][2,],c(0,2,1,0,0,2))
+expect_equivalent(fit_sim[[1]][2,],c(0,0,0,0,0,0))
nul <- capture.output(pr_phi <- predict(fitC,'phi'))
pr_phi <- sapply(pr_phi, function(x) x$Predicted[1])
@@ -642,7 +654,7 @@ expect_equivalent(fit_cbC@AIC,820.0645,tol=1e-4)
set.seed(123)
fit_sim <- simulate(fit_cbC,nsim=1)
-expect_equivalent(fit_sim[[1]][1,],c(0,0,0,2,1,0))
+expect_equivalent(fit_sim[[1]][1,],c(0,0,0,0,2,1))
nul <- capture.output(pr_phi <- predict(fit_cbC,'phi'))
pr_phi <- sapply(pr_phi, function(x) x$Predicted[1])
diff --git a/tests/testthat/test_parboot.R b/tests/testthat/test_parboot.R
index c3ece25..af62658 100644
--- a/tests/testthat/test_parboot.R
+++ b/tests/testthat/test_parboot.R
@@ -32,15 +32,49 @@ test_that("parboot works", {
dev.off()
expect_equal(pl, NULL)
- # check that report works
- rep_output <- capture.output(parboot(fm, fitstats, nsim=3, report=TRUE))
- expect_equal(substr(rep_output[1], 1,2), "t0")
+ # check that report argument gives warning
+ expect_warning(parboot(fm, fitstats, nsim=3, report=TRUE))
})
test_that("parboot works in parallel",{
skip_on_cran()
skip_on_ci()
# check parallel
- pb <- parboot(fm, nsim=101, parallel=TRUE, ncores=2)
- expect_equal(length(pb@t.star), 101)
+ pb <- parboot(fm, nsim=10, parallel=TRUE, ncores=2)
+ expect_equal(length(pb@t.star), 10)
+})
+
+test_that("parboot handles failing model fits", {
+
+ fail_func <- function(x){
+ rand <- rnorm(1)
+ if(rand > 0.5){
+ stop("fail")
+ }
+ return(rand)
+ }
+
+ set.seed(123)
+ expect_warning(pb <- parboot(fm, nsim=20, statistic=fail_func))
+ expect_equal(nrow(pb@t.star), 13)
+
+ expect_warning(pb <- parboot(fm, nsim=20, statistic=fail_func, parallel=TRUE))
+ expect_true(nrow(pb@t.star) < 20)
+
+})
+
+test_that("parboot handles statistic functions with additional arguments", {
+
+ opt_func <- function(x, y){
+ res <- mean(residuals(x), na.rm=TRUE)
+ c(res=res, y=y)
+ }
+
+ pb <- parboot(fm, nsim=10, statistic=opt_func, y=0.1)
+ expect_equal(colnames(pb@t.star), c("res", "y"))
+ expect_true(all(pb@t.star[,"y"]==0.1))
+
+ pb <- parboot(fm, nsim=10, statistic=opt_func, y=0.1, parallel=TRUE)
+ expect_equal(colnames(pb@t.star), c("res", "y"))
+ expect_true(all(pb@t.star[,"y"]==0.1))
})
diff --git a/tests/testthat/test_powerAnalysis.R b/tests/testthat/test_powerAnalysis.R
index fbf75a4..d316a64 100644
--- a/tests/testthat/test_powerAnalysis.R
+++ b/tests/testthat/test_powerAnalysis.R
@@ -61,6 +61,17 @@ test_that("powerAnalysis method works",{
pl <- unmarkedPowerList(template_model, effect_sizes, design=scenarios, nsim=10)
expect_is(pl, "unmarkedPowerList")
+ # With random effect
+ set.seed(123)
+ rguide <- list(group=factor(levels=letters[1:20]))
+ rform <- list(state=~x+(1|group), det=~1)
+ rcf <- list(state=c(intercept=0, x=0.5, group=0.7), det=c(intercept=0))
+ umfr <- simulate("occu", formulas=rform, design=design, coefs=rcf, guide=rguide)
+ fm <- occu(~1~x+(1|group), umfr)
+ pa5 <- powerAnalysis(fm, rcf, nsim=10)
+ s <- summary(pa5)
+ expect_equal(nrow(s), 3)
+ expect_equal(s$Power[2], 1)
})
})
diff --git a/tests/testthat/test_predict.R b/tests/testthat/test_predict.R
index 346d56f..e92add1 100644
--- a/tests/testthat/test_predict.R
+++ b/tests/testthat/test_predict.R
@@ -12,25 +12,6 @@ forms <- list(state=~elev+group, det=~1)
umf <- simulate("occu", design=des, formulas=forms, coefs=cf, guide=guide)
mod <- occu(~1~elev+group, umf)
-# Create rasters
-# Elevation
-r_elev <- data.frame(x=rep(1:10, 10), y=rep(1:10, each=10), z=rnorm(100))
-r_elev <- raster::rasterFromXYZ(r_elev)
-
-#Group
-r_group <- data.frame(x=rep(1:10, 10), y=rep(1:10, each=10),
- z=sample(1:length(levels(umf@siteCovs$group)), 100, replace=T))
-# Convert to 'factor' raster
-r_group <- raster::as.factor(raster::rasterFromXYZ(r_group))
-r_group@data@attributes <- data.frame(ID=raster::levels(r_group)[[1]], group=levels(umf@siteCovs$group))
-
-# Stack
-nd_raster <- raster::stack(r_elev, r_group)
-names(nd_raster) <- c("elev", "group")
-raster::crs(nd_raster) <- 32616
-
-
-
test_that("clean_up_covs works with dynamic model data",{
# Dynamic data
@@ -121,10 +102,31 @@ test_that("clean_up_covs works with models where length(y) != length(p)",{
})
test_that("predicting from raster works",{
+
+ skip_if(!require(raster), "raster package unavailable")
+
+ set.seed(123)
+ # Create rasters
+ # Elevation
+ r_elev <- data.frame(x=rep(1:10, 10), y=rep(1:10, each=10), z=rnorm(100))
+ r_elev <- raster::rasterFromXYZ(r_elev)
+
+ #Group
+ r_group <- data.frame(x=rep(1:10, 10), y=rep(1:10, each=10),
+ z=sample(1:length(levels(umf@siteCovs$group)), 100, replace=T))
+ # Convert to 'factor' raster
+ r_group <- raster::as.factor(raster::rasterFromXYZ(r_group))
+ r_group@data@attributes <- data.frame(ID=raster::levels(r_group)[[1]], group=levels(umf@siteCovs$group))
+
+ # Stack
+ nd_raster <- raster::stack(r_elev, r_group)
+ names(nd_raster) <- c("elev", "group")
+ raster::crs(nd_raster) <- 32616
+
pr <- predict(mod, 'state', newdata=nd_raster)
expect_is(pr, 'RasterStack')
expect_equal(names(pr), c("Predicted","SE","lower","upper"))
- expect_equal(pr[1,1][1], 0.695741, tol=1e-5)
+ expect_equal(pr[1,1][1], 0.3675313, tol=1e-5)
expect_equal(crs(pr), crs(nd_raster))
#append data
diff --git a/tests/testthat/test_simulate.R b/tests/testthat/test_simulate.R
index c1cda3e..be8a356 100644
--- a/tests/testthat/test_simulate.R
+++ b/tests/testthat/test_simulate.R
@@ -34,6 +34,15 @@ test_that("simulate can generate new datasets from scratch",{
expect_true(is.factor(umf2@siteCovs$landcover))
expect_equivalent(mean(umf2@siteCovs$elev), 2.01722, tol=1e-5)
+ # With random effect
+ set.seed(123)
+ rguide <- list(group=factor(levels=letters[1:20]))
+ rform <- list(state=~(1|group), det=~1)
+ rcf <- list(state=c(intercept=0, group=0.7), det=c(intercept=0))
+ umfr <- simulate("occu", formulas=rform, design=design, coefs=rcf, guide=rguide)
+ fm <- occu(~1~(1|group), umfr)
+ expect_equal(sigma(fm)$sigma, 0.6903913, tol=1e-5)
+
# pcount
set.seed(123)
cf$alpha <- c(alpha=0.5)
@@ -173,7 +182,7 @@ test_that("simulate can generate new datasets from scratch",{
cf <- list(state=bstate, det=bdet)
expect_warning(umf15 <- simulate("occuMS", formulas=forms, coefs=cf, design=list(M=500, J=5, T=1)))
fm <- occuMS(forms$det, forms$state, data=umf15, parameterization="multinomial")
- expect_equivalent(coef(fm, 'state'), c(-0.437,0.767,-0.671,-0.595), tol=1e-3)
+ expect_equivalent(coef(fm, 'state'), c(-0.657,1.033,-0.633,-0.582), tol=1e-3)
# gdistremoval
set.seed(123)
diff --git a/vignettes/cap-recap.Rnw b/vignettes/cap-recap.Rmd
index 0e44dc3..0cc3149 100644
--- a/vignettes/cap-recap.Rnw
+++ b/vignettes/cap-recap.Rmd
@@ -1,59 +1,36 @@
-<<echo=false>>=
-options(width=70)
-options(continue=" ")
-library(tools)
-@
-
-\documentclass[a4paper]{article}
-\usepackage[OT1]{fontenc}
-\usepackage{Sweave}
-\usepackage[authoryear,round]{natbib}
-%\usepackage{fullpage}
-\usepackage[vmargin=1in,hmargin=1in]{geometry}
-\usepackage{amsmath}
-\bibliographystyle{ecology}
-
-\DefineVerbatimEnvironment{Sinput}{Verbatim} {xleftmargin=2em}
-\DefineVerbatimEnvironment{Soutput}{Verbatim}{xleftmargin=2em}
-\DefineVerbatimEnvironment{Scode}{Verbatim}{xleftmargin=2em}
-\fvset{listparameters={\setlength{\topsep}{0pt}}}
-\renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}}
-
-%%\VignetteIndexEntry{Capture-recapture}
-
-\title{Modeling variation in abundance using capture-recapture data}
-\author{Richard Chandler}
-\date{Feb 24, 2019}
-
-\begin{document}
-
-\maketitle
-
-\abstract{The ``{\tt un}'' in {\tt unmarked} is somewhat misleading
- because the package can be used to analyze data from marked
- animals. The three
- most common sampling methods that produce suitable data are removal
- sampling, double observer sampling, and capture-recapture
- methods. This document focuses on the analysis of capture-recapture
- data using a class of models known as multinomial $N$-mixture
- models \citep{royle_generalized_2004, dorazio_etal:2005}, which
- assume that capture-recapture data have been collected at a
- collection of sample locations (``sites''). Capture-recapture models
- can be fitted with
- constant parameters ($M_0$), time-specific parameters ($M_t$),
- and behavioral responses ($M_b$). In addition, spatial
- variation in abundance and capture probability can also be
- modeled using covariates. \texttt{unmarked} has two
- functions for fitting
- capture-recapture models: \texttt{multinomPois} and
- \texttt{gmultmix}. Both allow for user-defined functions to describe
- the capture process, and the latter allows for modeling of temporary
- emigration when data have been collected using the so-called robust
- design \citep{kendall_etal:1997,chandlerEA_2011}.
-}
-
-
-\section{Introduction}
+---
+title: Modeling variation in abundance using capture-recapture data
+author: Richard Chandler
+date: Feb 24, 2019
+bibliography: unmarked.bib
+csl: ecology.csl
+output:
+ rmarkdown::html_vignette:
+ fig_width: 5
+ fig_height: 3.5
+ number_sections: true
+ toc: true
+vignette: >
+ %\VignetteIndexEntry{Capture-recapture}
+ %\VignetteEngine{knitr::rmarkdown}
+ \usepackage[utf8]{inputenc}
+---
+
+```{r,echo=FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+```
+
+# Abstract
+
+The "un" in `unmarked` is somewhat misleading because the package can be used to analyze data from marked animals.
+The three most common sampling methods that produce suitable data are removal sampling, double observer sampling, and capture-recapture methods.
+This document focuses on the analysis of capture-recapture data using a class of models known as multinomial $N$-mixture models [@royle_generalized_2004; @dorazio_etal:2005], which assume that capture-recapture data have been collected at a collection of sample locations ("sites").
+Capture-recapture models can be fitted with constant parameters ($M_0$), time-specific parameters ($M_t$), and behavioral responses ($M_b$).
+In addition, spatial variation in abundance and capture probability can also be modeled using covariates.
+`unmarked` has two functions for fitting capture-recapture models: `multinomPois` and `gmultmix`.
+Both allow for user-defined functions to describe the capture process, and the latter allows for modeling of temporary emigration when data have been collected using the so-called robust design [@kendall_etal:1997; @chandlerEA_2011].
+
+# Introduction
In traditional capture-recapture models, $n$ individuals are captured
at a site during the course of $J$ sampling occasions. The encounter
@@ -66,44 +43,48 @@ Although traditional capture-recapture models are useful
for estimating population size when $p<1$, they do not allow one to model
variation in abundance, which is a central focus of much ecological
research.
-\citet{royle_generalized_2004} and \citet{dorazio_etal:2005} developed a framework for
+@royle_generalized_2004 and @dorazio_etal:2005 developed a framework for
modeling variation in both abundance and capture
probability when capture-recapture data are collected at a set of
$R$ sites. Site-specific abundance ($N_i; i=1,2,...,R$) is regarded
as latent variable following a discrete distribution such as the
Poisson or negative binomial. The encounter histories are then
tabulated at each site so that they can be regarded as an outcome of a
-multinomial distribution with cell probabilities {$\bf \pi$}
+multinomial distribution with cell probabilities $\bf \pi$
determined by a protocol-specific function of capture
probability. Assuming a Poisson
distribution, the model can be written as
+$$
\begin{gather}
N_i \sim \mbox{Poisson}(\lambda) \nonumber \\
{\bf y_i}|N_i \sim \mbox{Multinomial}(N_i, \pi(p))
\label{mod}
\end{gather}
+$$
In the above, $\lambda$ is the expected number of individuals at each
site. ${\bf y_i}$ is a vector containing the number of
individuals with encounter history $k; k=1,2,...K$ at site $i$. The
number of observable encounter histories $K$ depends on the sampling
protocol. For a capture-recapture study with 2 occasions, there are
-3 possible encounter histories $H \in (11, 10, 01)$. In Equation~\ref{mod},
+3 possible encounter histories $H \in (11, 10, 01)$. In Equation 1,
$\pi(p)$ is a function that that converts capture probability $p$ to
multinomial cell probabilities, \emph{i.e.}, the proportion
of individuals expected to have capture history $k$. For example, the
cell probabilities corresponding to the capture histories listed above
are
-\[
+
+$$
{\bf \pi}(p) = \{ p^2, p(1-p), (1-p)p \}.
-\]
+$$
+
The probability of not capturing an individual in this case ($H=00$)
is $(1-p)^2$.
Spatial variation in abundance can be modeled using covariates
with a log-link function
-\[
+$$
\log(\lambda_i) = \beta_0 + \beta_1 x_i
-\]
+$$
where $x_i$ is some site-specific covariate such as habitat type or
elevation. Multiple covariates can be considered and a more general
form of the above can be written as $\log(\lambda_i) =
@@ -111,95 +92,65 @@ form of the above can be written as $\log(\lambda_i) =
${\bf \beta}$ is a vector
of coefficients, possibly including an intercept.
Capture probability can be modeled using the logit-link in much the same way
-\[
+$$
\text{logit}(p_{ij}) = \alpha_0 + \alpha_1 v_{ij}
-\]
+$$
where $v_{ij}$ is some covariate specific to the site and
capture occasion. When $p$ is assumed to be constant, the model is
often referred to as model $M_0$. Alternatively, $p$ may be
occasion-specific
(model $M_t$) or may be influenced by animal behavior (model
-$M_b$). \citet{otis_etal:1978} and \citet{williams_etal:2002} are
+$M_b$). @otis_etal:1978 and @williams_etal:2002 are
comprehensive references.
-\section{Data}
-As previously mentioned, the data required by \texttt{unmarked} are an $R
-\times K$
-matrix in which each row is the vector of tabulated encounter
+# Data
+
+As previously mentioned, the data required by `unmarked` are an $R
+\times K$ matrix in which each row is the vector of tabulated encounter
histories for animals captured at some site. Capture-recapture data,
-however, is typically recorded in the format shown in
-Table~\ref{tab:raw}.
-
-\begin{table}[h]
- \footnotesize
- \begin{center}
- \caption{Capture-recapture data for 6 individuals sampled on 3
- occasions}
- \vspace{0.3cm}
- \begin{tabular}{lcc}
- \hline
- Animal ID & Site & Capture history \\
- \hline
- GB & A & 101 \\
- YR & A & 101 \\
- RO & A & 111 \\
- PP & A & 100 \\
- GY & B & 100 \\
- PR & B & 010 \\
- \hline
- \label{tab:raw}
- \end{tabular}
- \end{center}
-\end{table}
+however, is typically recorded in the format shown in Table 1.
+
+```{r, echo=FALSE}
+tab <- data.frame(
+ id = c("GB","YR","RO","PP","GY","PR"),
+ site = c(rep("A",4), "B", "B"),
+ cap = c("101","101","111","100","100","010")
+)
+names(tab) <- c("Animal ID", "Site", "Capture history")
+
+knitr::kable(tab, align="lcc",
+ caption="Table 1. Capture-recapture data for 6 individuals sampled on 3 occasions"
+)
+```
In the absence of individual covariates, the data in
-Table~\ref{tab:raw} can be converted to the requisite format as shown
-in Table~\ref{tab:format}. Notice that no captures were made in sites
+Table 1 can be converted to the requisite format as shown
+in Table 2. Notice that no captures were made in sites
C and D. It is important that such sites are retained in the analysis
in order to make inference about spatial variation in abundance.
-\begin{table}[h]
- \footnotesize
- \begin{center}
- \caption{Capture-recapture data from Table~\ref{tab:raw} in the
- format required by \texttt{unmarked}}
- \vspace{0.3cm}
- \begin{tabular}{lccccccc}
- \hline
- Site & \multicolumn{7}{c}{Encounter history} \\
- \cline{2-8}
- & 100 & 010 & 001 & 110 & 011 & 101 & 111 \\
- \hline
- A & 1 & 0 & 0 & 0 & 0 & 2 & 1 \\
- B & 1 & 1 & 0 & 0 & 0 & 0 & 0 \\
- C & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
- D & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
- \hline
- \label{tab:format}
- \end{tabular}
- \end{center}
-\end{table}
-
-
-
+```{r, echo=FALSE}
+tab2 <- data.frame(
+ Site=c("A","B","C","D"),
+ eh100=c(1,1,0,0), eh010=c(0,1,0,0), eh001=c(0,0,0,0),
+ eh110=c(0,0,0,0), eh011=c(0,0,0,0), eh101=c(2,0,0,0),
+ eh111=c(1,0,0,0))
+names(tab2) <- c("Site", "100","010","001","110","011","101","111")
+knitr::kable(tab2, align="lccccccc",
+ caption="Table 2. Capture-recapture data from Table 1 in the format required by unmarked")
+```
+# Closed Population Models
-
-\section{Closed Population Models}
-
-
-
-
-\subsection{Models $M_0$, $M_t$, and models with covariates of $p$.}
-
+## Models $M_0$, $M_t$, and models with covariates of $p$.
In this example we will analyze point count data collected on alder
-flycatchers (\emph{Empidonax alnorum}) by
-\citet{chandler_etal:2009}. Point count data such as these are
+flycatchers (*Empidonax alnorum*) by
+@chandler_etal:2009. Point count data such as these are
collected on unmarked animals, but one can apply
capture-recapture models because it is possible to keep track of
-individual birds during a short period of time \citep{alldredge_etal:2007}. That is, we can
+individual birds during a short period of time [@alldredge_etal:2007]. That is, we can
pretend like birds are marked by noting which time intervals they are
detected in during a short survey. The alder flycatcher data were
collected using fixed-area 15-minute point counts, which were divided
@@ -209,12 +160,11 @@ The following command imports the
capture histories for 50 individuals detected in 2005 at 49 point
count locations.
-\newpage
-
-<<>>=
+```{r}
alfl <- read.csv(system.file("csv", "alfl.csv", package="unmarked"))
head(alfl, 5)
-@
+```
+
We see 5 rows of data representing the encounter histories for 5 birds
detected at 2 points during 3 survey occasions. From these 5 birds, it appears as though
detection probability is high since each bird was detected during at
@@ -224,79 +174,82 @@ Associated with the bird data are site- and visit-specific covariates
for each of the 49 sites. We can import these data using the following
command:
-<<>>=
+```{r}
+
alfl.covs <- read.csv(system.file("csv", "alflCovs.csv",
package="unmarked"), row.names=1)
head(alfl.covs)
-@
-Each row of this \texttt{data.frame} corresponds to a point count
-location. The variable \texttt{struct} is a measure of vegetation
-structure, and \texttt{woody} is the percent cover of woody vegetation
+```
+
+Each row of this `data.frame` corresponds to a point count
+location. The variable `struct` is a measure of vegetation
+structure, and `woody` is the percent cover of woody vegetation
at each of the 50-m radius plots. Time of day and date were measured
for each of the three visits.
-To format the data for \texttt{unmarked}, we need to tabulate the
+To format the data for `unmarked`, we need to tabulate the
encounter histories for each site. Before doing so, let's first put
our capture histories in a single column. Let's also be explicit about
-the levels of our factors for both the newly created captureHistory
+the levels of our factors for both the newly created `captureHistory`
column and the point id column.
-<<>>=
+```{r}
alfl$captureHistory <- paste(alfl$interval1, alfl$interval2, alfl$interval3, sep="")
alfl$captureHistory <- factor(alfl$captureHistory,
levels=c("001", "010", "011", "100", "101", "110", "111"))
## Don't do this:
#levels(alfl$id) <- rownames(alfl.covs)
alfl$id <- factor(alfl$id, levels=rownames(alfl.covs))
-@
-Specifying the levels of \texttt{captureHistory} ensures that when we
+```
+
+Specifying the levels of `captureHistory` ensures that when we
tabulate the encounter histories, we will include zeros for histories
-that were not observed. Similarly, setting the levels of
-\texttt{alfl\$id} tells \textbf{R} that there
+that were not observed. Similarly, setting the levels of `alfl$id` tells `R` that there
were some sites where no ALFL were detected. This way, when we
tabulate the data, we get a frequency for each site, not just the ones
-with $>1$ detection. Here are the commands to extract data from the
+with >1 detection. Here are the commands to extract data from the
first primary period and to tabulate the encounter
histories.
-<<>>=
+```{r}
alfl.v1 <- alfl[alfl$survey==1,]
alfl.H1 <- table(alfl.v1$id, alfl.v1$captureHistory)
head(alfl.H1, 5)
-@
-The object \texttt{alfl.H1} contains the tabulated capture histories for
-each site. This is the format required by {\tt unmarked}. The data from
+```
+
+The object `alfl.H1` contains the tabulated capture histories for
+each site. This is the format required by `unmarked`. The data from
the first 5 sites
suggest that detection probability was high since the most
-common encounter history was $111$.
+common encounter history was `111`.
-Now we are almost ready to create our \texttt{unmarkedFrame} and begin
+Now we are almost ready to create our `unmarkedFrame` and begin
fitting models. We will fit our first series of models using the
-\texttt{multinomPois} function, which requires data formated using the
-\texttt{unmarkedFrameMPois} function. This constructor function
+`multinomPois` function, which requires data formated using the
+`unmarkedFrameMPois` function. This constructor function
has an argument \texttt{type}, which currently can be set to
-\texttt{"removal"} or \texttt{"double"}, corresponding to removal sampling
+`"removal"` or `"double"`, corresponding to removal sampling
data and double observer sampling respectively. In doing so, the
function automatically creates the function to convert $p$ to
-${\bf \pi}$. If \texttt{type} is missing, however, the user needs to
+${\bf \pi}$. If `type` is missing, however, the user needs to
specify
a function to convert detection probability to multinomial cell
probabilities. In the future, we may add an
\texttt{type} option to automatically handle standard
capture-recapture data too,
-but here we show how to supply it using a user-defined \texttt{piFun},
+but here we show how to supply it using a user-defined `piFun`,
which allows flexibility in converting detection probability
-to multinomial cell probabilities $\bf \pi$. The \texttt{piFun} must
+to multinomial cell probabilities $\bf \pi$. The `piFun` must
take a matrix of detection probabilities with $J$ columns
(3 in this case), and convert them to a
matrix of multinomial
cell probabilities with $K$ columns. Each column corresponds to the
probability of observing the encounter history $k$. Here is a
-\texttt{piFun} to compute the multinomial cell probabilities when there
+`piFun` to compute the multinomial cell probabilities when there
were 3 sampling occasions. This function allows us to fit models
$M_0$, $M_t$, or models with covariates of $p$.
-<<>>=
+```{r}
crPiFun <- function(p) {
p1 <- p[,1]
p2 <- p[,2]
@@ -309,47 +262,46 @@ crPiFun <- function(p) {
"110" = p1 * p2 * (1-p3),
"111" = p1 * p2 * p3)
}
-@
+```
To demonstrate how this works, imagine that we surveyed 2 sites and
detection probability was constant ($p=0.2$) among sites and survey
occasions. The function converts these capture probabilities to
multinomial cell probabilities. Note that these cell probabilities will
-sum to $< 1$ if capture probability is less than 1 over the 3 occasions.
+sum to <1 if capture probability is less than 1 over the 3 occasions.
-<<>>=
+```{r}
p <- matrix(0.4, 2, 3)
crPiFun(p)
rowSums(crPiFun(p))
-@
+```
When providing a user-defined \texttt{piFun}, we also need to provide
information about how to handle missing values. That is, if we have a
-missing value in a covariate, we need to know which values of {\bf y}
-are affected. In \texttt{unmarked}, this can be done by supplying a
-mapping-matrix to the \texttt{obsToY} argument in the
-\texttt{unmarkedFrameMPois} function. \texttt{obsToY} needs to be a matrix
+missing value in a covariate, we need to know which values of ${\bf y}$
+are affected. In `unmarked`, this can be done by supplying a
+mapping-matrix to the `obsToY` argument in the
+`unmarkedFrameMPois` function. `obsToY` needs to be a matrix
of zeros and ones with
the number of rows equal to the number of columns for some obsCov, and
-the number columns equal to the number of columns in {\bf y}.
-If \texttt{obsToY[j,k]}=1, then a missing value in {\tt obsCov[i,j]}
+the number columns equal to the number of columns in ${\bf y}$.
+If `obsToY[j,k]`=1, then a missing value in `obsCov[i,j]`
translates to
-a missing value in {\tt y[i,k]}. For the capture-recapture data
+a missing value in `y[i,k]`. For the capture-recapture data
considered here, we can set all
-elements of
-\texttt{obsToY} to 1.
+elements of `obsToY` to 1.
-<<>>=
+```{r}
o2y <- matrix(1, 3, 7)
-@
+```
-We are now ready to create the \texttt{unmarkedFrame}. In order to fit
+We are now ready to create the `unmarkedFrame`. In order to fit
model $M_t$, we need a covariate that references the time interval
-, which we call \texttt{intervalMat} below. We also provide a
+, which we call `intervalMat` below. We also provide a
couple of site-specific covariates: the percent cover of woody
vegetation and vegetation structure.
-<<>>=
+```{r}
library(unmarked)
intervalMat <- matrix(c('1','2','3'), 50, 3, byrow=TRUE)
class(alfl.H1) <- "matrix"
@@ -357,12 +309,11 @@ umf.cr1 <- unmarkedFrameMPois(y=alfl.H1,
siteCovs=alfl.covs[,c("woody", "struct", "time.1", "date.1")],
obsCovs=list(interval=intervalMat),
obsToY=o2y, piFun="crPiFun")
-@
-
+```
-Writing a \texttt{piFun} and creating the \texttt{obsToY} object are
+Writing a `piFun` and creating the `obsToY` object are
the hardest parts of a capture-recapture analysis in
-\texttt{unmarked}. Again, this is done automatically for removal models
+`unmarked`. Again, this is done automatically for removal models
and double observer models, and we may add an option to do this
automatically for capture-recapture data too, but hopefully
the flexibility allowed by specifying user-defined
@@ -372,87 +323,71 @@ Now that we have our data formatted we can fit some models. The
following correspond to model $M_0$, model $M_t$, and a model with a
continuous covariate effect on $p$.
-
-<<>>=
+```{r}
M0 <- multinomPois(~1 ~1, umf.cr1, engine="R")
Mt <- multinomPois(~interval-1 ~1, umf.cr1, engine="R")
Mx <- multinomPois(~time.1 ~1, umf.cr1, engine="R")
-@
+```
The first two models can be fit in other software programs. What is
-unique about \texttt{unmarked} is that we can also model variation in
+unique about `unmarked` is that we can also model variation in
abundance and detection probability among sites. The following model
treats abundance as
a function of the percent cover of woody vegetation.
-<<>>=
+```{r}
(M0.woody <- multinomPois(~1 ~woody, umf.cr1, engine="R"))
-@
+```
-
-This final model has a much lower AIC score than the other models, and
-it indicates
-that alder flycatcher abundance increases with the percent cover
-of woody vegetation. We can plot this relationship by predicting
-abundance at a sequence of woody vegetation values.
-<<woody,fig=TRUE,include=FALSE,width=5,height=5>>=
+```{r, fig.width=5, fig.height=5}
nd <- data.frame(woody=seq(0, 0.8, length=50))
E.abundance <- predict(M0.woody, type="state", newdata=nd, appendData=TRUE)
plot(Predicted ~ woody, E.abundance, type="l", ylim=c(0, 6),
ylab="Alder flycatchers / plot", xlab="Woody vegetation cover")
lines(lower ~ woody, E.abundance, col=gray(0.7))
lines(upper ~ woody, E.abundance, col=gray(0.7))
-@
-\begin{figure}[h!]
- \begin{center}
- \includegraphics[width=4in,height=4in]{cap-recap-woody}
- \end{center}
-\end{figure}
-
-\newpage
+```
What about detection probability? Since there was no evidence of
variation in $p$, we can simply back-transform the logit-scale estimate
to obtain $\hat{p}$.
-<<>>=
+```{r}
backTransform(M0.woody, type="det")
-@
+```
As suggested by the raw data, detection probability was very high. The
corresponding multinomial cell probabilities can be computed by
plugging this estimate of detection probability into our
-\texttt{piFun}. This \texttt{getP} function makes this easy.
+`piFun`. This `getP` function makes this easy.
-<<>>=
+```{r}
round(getP(M0.woody), 2)[1,]
-@
+```
Note that the encounter probability most likely to be observed was
111. In fact $p$ was so high that the probability of not detecting an
alder flycatcher was essentially zero, $(1-0.81)^3 = 0.007$.
-
-
-\subsection{Modeling behavioral responses, Model $M_b$}
+## Modeling behavioral responses, Model $M_b$
An animal's behavior might change after being captured. Both
trap avoidance and trap attraction are frequently
observed in a variety of taxa. A simple model of these two behaviors
-is known as model $M_b$ \citep{otis_etal:1978}. The model assumes that
+is known as model $M_b$ [@otis_etal:1978]. The model assumes that
newly-captured individuals are captured with probability $p_{naive}$
and then are subsequently recaptured with probability $p_{wise}$. If
$p_{wise} < p_{naive}$, then animals exhibit trap avoidance. In some
cases, such as when traps are baited, we might observed $p_{wise} >
-p_{naive}$ in which case the animals are said to be ``trap-happy''.
+p_{naive}$ in which case the animals are said to be "trap-happy".
-To fit model $M_b$ in \texttt{unmarked}, we need to create a new
-\texttt{piFun} and we need to provide an occasion-specific covariate
-(\texttt{obsCov}) that
+To fit model $M_b$ in `unmarked`, we need to create a new
+`piFun` and we need to provide an occasion-specific covariate
+(`obsCov`) that
distinguishes the two capture probabilities, $p_{naive}$ and
$p_{wise}$. The simplest possible approach is the following
-<<>>=
+```{r}
crPiFun.Mb <- function(p) { # p should have 3 columns
pNaive <- p[,1]
pWise <- p[,3]
@@ -464,64 +399,66 @@ crPiFun.Mb <- function(p) { # p should have 3 columns
"110" = pNaive * pWise * (1-pWise),
"111" = pNaive * pWise * pWise)
}
-@
-This function \texttt{crPiFun.Mb} allows capture probability to be
+```
+
+This function `crPiFun.Mb` allows capture probability to be
modeled as
-\[
+$$
\text{logit}(p_{ij}) = \alpha_{naive} + \alpha_{wise} behavior_j + \alpha_1 x_i
-\]
+$$
where $behavior_j$ is simply a dummy variable. Thus, when no
site-specific covariates ($x_i$) are included, $p_{ij}$ is either $p_{naive}$
or $p_{wise}$. The following code constructs a new
-\texttt{unmarkedFrame} and fits model $M_b$ to the alder
+`unmarkedFrame` and fits model $M_b$ to the alder
flycatcher data.
-<<>>=
+```{r}
behavior <- matrix(c('Naive','Naive','Wise'), 50, 3, byrow=TRUE)
umf.cr1Mb <- unmarkedFrameMPois(y=alfl.H1,
siteCovs=alfl.covs[,c("woody", "struct", "time.1")],
obsCovs=list(behavior=behavior),
obsToY=o2y, piFun="crPiFun.Mb")
M0 <- multinomPois(~1 ~1, umf.cr1Mb, engine="R")
-@
-
-\newpage
+```
-<<>>=
+```{r}
(Mb <- multinomPois(~behavior-1 ~1, umf.cr1Mb, engine="R"))
-@
+```
+
AIC gives us no reason to favor model $M_b$ over model $M_0$. This is
perhaps not too surprising given that the alder
flycatchers were not actually captured. Here is a command to compute
-95\% confidence intervals for the two detection probabilities.
-<<>>=
+95% confidence intervals for the two detection probabilities.
+
+```{r}
plogis(confint(Mb, type="det", method="profile"))
-@
+```
-\subsection{Caution, Warning, Danger}
-The function \texttt{crPiFun.Mb} is not generic and could easily be
+## Caution, Warning, Danger
+
+The function `crPiFun.Mb` is not generic and could easily be
abused. For example, you would get bogus results if you tried to use
this function to fit model $M_{bt}$, or if you incorrectly formatted
-the \texttt{behavior} covariate. Thus, extreme caution is advised when
-writing user-defined \texttt{piFun}s.
+the `behavior` covariate. Thus, extreme caution is advised when
+writing user-defined `piFun`s.
There are also a few limitations regarding user-defined
-\texttt{piFun}s. First, they can only take a single argument \verb+p+,
+`piFun`s. First, they can only take a single argument `p`,
which must be the $R \times J$ matrix of detection probabilities. This
makes it cumbersome to fit models such as model $M_h$ as described
below. It also makes it impossible to fit models such as model
$M_{bt}$. It
-would be better if \texttt{piFun}s could accept multiple
+would be better if `piFun`s could accept multiple
arguments, but this would require some modifications to
-\texttt{multinomPois} and \texttt{gmultmix}, which we may do in the
+`multinomPois` and `gmultmix`, which we may do in the
future.
-\subsection{Individual Heterogeneity in Capture Probability, Model $M_h$}
+## Individual Heterogeneity in Capture Probability, Model $M_h$
The capture-recapture models covered thus far assume
that variation in capture probability can be
explained by site-specific covariates, time, or behavior. Currently,
-\texttt{unmarked} can not fit so-called individual covariate models,
+`unmarked` can not fit so-called individual covariate models,
in which heterogeneity in $p$ is attributable to animal-specific
covariates. However, one could
partition the data into strata and analyze the strata separately. For
@@ -532,17 +469,15 @@ into 2 subsets.
Although individual covariate models cannot be considered, it is possible
to fit model $M_h$, which assumes
random variation in capture probability among individuals.
-Here is a \texttt{piFun}, based on code by Andy Royle. It assumes
+Here is a `piFun`, based on code by Andy Royle. It assumes
a logit-normal distribution for the random effects
-\[
+$$
\mbox{logit}(p_i) \sim Normal(\mu, \sigma^2).
-\]
+$$
These random effects are integrated out of the likelihood to obtain the
marginal probability of capture.
-\newpage
-
-<<>>=
+```{r}
MhPiFun <- function(p) {
mu <- qlogis(p[,1]) # logit(p)
sig <- exp(qlogis(p[1,2]))
@@ -575,65 +510,55 @@ for(i in 1:M) {
}
return(il)
}
-@
+```
This function does not allow for temporal variation in capture
-probability because we are using the second column of \verb+p+ as
+probability because we are using the second column of `p` as
$\sigma$, the parameter governing the variance of the random
effects. Once again, this is somewhat clumsy and it would be better to
-allow \texttt{piFun} to accept additional arguments, which could be
-controlled from \texttt{multinomPois} using an additional
-\texttt{formula}. Such features may be added evenually.
-
-Having defined our new \texttt{piFun}, we can fit the model as follows
-%<<>>=
-%library(unmarked)
-%parID <- matrix(c('p','sig','sig'), 50, 3, byrow=TRUE)
-%umf.cr2 <- unmarkedFrameMPois(y=alfl.H1,
-% siteCovs=alfl.covs[,c("woody", "struct", "time.1")],
-% obsCovs=list(parID=parID),
-% obsToY=o2y, piFun="MhPiFun")
-%multinomPois(~parID-1 ~woody, umf.cr2)
-%@
-\begin{Schunk}
-\begin{Sinput}
-> library(unmarked)
-> parID <- matrix(c('p','sig','sig'), 50, 3, byrow=TRUE)
-> umf.cr2 <- unmarkedFrameMPois(y=alfl.H1,
- siteCovs=alfl.covs[,c("woody", "struct", "time.1")],
- obsCovs=list(parID=parID),
- obsToY=o2y, piFun="MhPiFun")
-> multinomPois(~parID-1 ~woody, umf.cr2)
-\end{Sinput}
-\begin{Soutput}
-Call:
-multinomPois(formula = ~parID - 1 ~ woody, data = umf.cr2)
-
-Abundance:
- Estimate SE z P(>|z|)
-(Intercept) -0.84 0.363 -2.31 0.02078
-woody 2.59 0.680 3.81 0.00014
-
-Detection:
- Estimate SE z P(>|z|)
-parIDp 1.637 0.645 2.54 0.0112
-parIDsig 0.841 0.622 1.35 0.1762
-
-AIC: 242.3731
-\end{Soutput}
-\end{Schunk}
+allow `piFun` to accept additional arguments, which could be
+controlled from `multinomPois` using an additional
+`formula`. Such features may be added eventually.
+
+Having defined our new `piFun`, we can fit the model as follows
+
+```{r, eval=FALSE}
+parID <- matrix(c('p','sig','sig'), 50, 3, byrow=TRUE)
+umf.cr2 <- unmarkedFrameMPois(y=alfl.H1,
+ siteCovs=alfl.covs[,c("woody", "struct", "time.1")],
+ obsCovs=list(parID=parID),
+ obsToY=o2y, piFun="MhPiFun")
+multinomPois(~parID-1 ~woody, umf.cr2)
+```
+
+```
+## Call:
+## multinomPois(formula = ~parID - 1 ~ woody, data = umf.cr2)
+##
+## Abundance:
+## Estimate SE z P(>|z|)
+## (Intercept) -0.84 0.363 -2.31 0.02078
+## woody 2.59 0.680 3.81 0.00014
+##
+## Detection:
+## Estimate SE z P(>|z|)
+## parIDp 1.637 0.645 2.54 0.0112
+## parIDsig 0.841 0.622 1.35 0.1762
+##
+## AIC: 242.3731
+```
+
The estimate of $\sigma$ is high, indicating the existence of substantial
heterogeneity in detection probability. However, one should be aware of the
-concerns about $M_h$ raised by \citet{link:2003} who demonstrated that
+concerns about $M_h$ raised by @link:2003 who demonstrated that
population size $N$ is not an identifiable parameter among various
classes of models assumed for the random effects. For example, we
might use a beta distribution rather than a logit-normal distribution,
-and obtain very different estimates of abundance. \citet{link:2003}
+and obtain very different estimates of abundance. @link:2003
demonstrated that conventional methods such as AIC cannot be used to
discriminate among these models.
-
-\subsection{Distance-related heterogeneity}
+## Distance-related heterogeneity
Another source of individual heterogeneity in capture probability
arises from the distance between animal activity centers and
@@ -641,25 +566,24 @@ sample locations. Traditional capture-recapture models ignore this
important source of variation in capture probability, but
recently developed spatial capture-recapture (SCR) models overcome this
limitation
-\citep{efford:2004,royle_young:2008,royle_dorazio:2008}. Distance-related
+[@efford:2004; @royle_young:2008; @royle_dorazio:2008]. Distance-related
heterogeneity in detection
probability was probably not an important concern in the alder
flycatcher dataset because the plots were very small (0.785 ha) and
only singing birds were included in the analysis. If it were a
concern, we could of course collect distance data and use the
-\verb+gdistsamp+ function to fit a distance sampling model.
+`gdistsamp` function to fit a distance sampling model.
In other contexts, such as when using arrays of live traps,
distance sampling is not an option and
SCR models offer numerous advantages over traditional
capture-recapture models.
-
-\section{Modeling Temporary Emigration}
+# Modeling Temporary Emigration
In the previous analysis we used data from the first visit only.
-\citet{chandlerEA_2011} proposed a model that allows us to
+@chandlerEA_2011 proposed a model that allows us to
make use of the entire alder flycatcher dataset. The model is similar
-to the temporary emigration model of \citet{kendall_etal:1997} except
+to the temporary emigration model of @kendall_etal:1997 except
that we are
interested in modeling variation in abundance among sites.
@@ -674,25 +598,27 @@ to sampling during primary period $t$. We now collect
capture-recapture data
at each site during each primary period, and obtain the data $\bf
y_{it}$. The model can be written as
+$$
\begin{gather}
M_i \sim \mbox{Poisson}(\lambda) \nonumber \\
N_{it}|M_i \sim \mbox{Binomial}(M_i, \phi) \nonumber \\
{\bf y_{it}}|N_{it} \sim \mbox{Multinomial}(N_{it}, \pi(p))
\label{mod:te}
\end{gather}
+$$
where $\phi$ is the probability of being available for capture. This
can be modeled as a function of covariates using the logit-link.
The data structure for the robust design is more complex than before,
-but it is easy to create in \textbf{R}. We can once again use the
-\texttt{table} function---but this time, we create a three-dimensional table
+but it is easy to create in `R`. We can once again use the
+`table` function - but this time, we create a three-dimensional table
rather than a two-dimensional one. We also need to expand the
-\texttt{obsToY} mapping matrix so that it has a block diagonal
+`obsToY` mapping matrix so that it has a block diagonal
structure. This isn't so intuitive, but the
commands below are generic and can be applied to other
capture-recapture designs.
-<<>>=
+```{r}
alfl.H <- table(alfl$id, alfl$captureHistory, alfl$survey)
alfl.Hmat <- cbind(alfl.H[,,1], alfl.H[,,2], alfl.H[,,3])
nVisits <- 3
@@ -702,41 +628,34 @@ umf.cr <- unmarkedFrameGMM(y=alfl.Hmat,
yearlySiteCovs=list(date=alfl.covs[,3:5], time=alfl.covs[,6:8]),
obsCovs=list(interval=cbind(intervalMat,intervalMat,intervalMat)),
obsToY=o2yGMM, piFun="crPiFun", numPrimary=nVisits)
-@
-
+```
Notice that we have 3 types of covariates now. The site-specific
covariates are the same as before. Now, however, the observation
-covariates must match the dimensions of the {\bf y} matrix. We can
+covariates must match the dimensions of the ${\bf y}$ matrix. We can
also have a class of covariates that vary among primary periods but
-not within primary periods. These are called yearlySiteCovs, which is
-a misleading name. It is a carry-over from other ``open population"
-models in \texttt{unmarked}, but it should be remembered that these
+not within primary periods. These are called `yearlySiteCovs`, which is
+a misleading name. It is a carry-over from other "open population"
+models in `unmarked`, but it should be remembered that these
models are most suitable for data from a single year, since we assume
no births or mortalities.
-We can fit the model using the \texttt{gmultmix} function, which has a
+We can fit the model using the `gmultmix` function, which has a
slightly different set of arguments. Rather than a single formula, the
function takes 3 formulas for abundance covariates, availability
covariates, and detection covariates in that order.
-<<>>=
+```{r}
(fm1 <- gmultmix(~woody, ~1, ~time+date, umf.cr, engine="R"))
-@
+```
Results from this model are similar to those obtained using the subset
of data, but the standard error for the woody estimate has
-decreased. If we back-transform the estimate of $\phi$, we see that
+decreased. If we back-transform the estimate of $\phi$, we see that
the probability of being available for detection is 0.31.
-Another feature of \texttt{gmultmix} is that $N$ can be modeled using
+Another feature of `gmultmix` is that $N$ can be modeled using
either the Poisson or negative binomial distribution. We might
eventually add other options such as the zero-inflated Poisson.
-
-
-\bibliography{unmarked}
-
-
-
-\end{document}
+# References
diff --git a/vignettes/colext-cov.pdf b/vignettes/colext-cov.pdf
deleted file mode 100644
index d831e7c..0000000
--- a/vignettes/colext-cov.pdf
+++ /dev/null
Binary files differ
diff --git a/vignettes/colext-data-1.png b/vignettes/colext-data-1.png
new file mode 100644
index 0000000..6d8aa71
--- /dev/null
+++ b/vignettes/colext-data-1.png
Binary files differ
diff --git a/vignettes/colext-est-1.png b/vignettes/colext-est-1.png
new file mode 100644
index 0000000..7df8149
--- /dev/null
+++ b/vignettes/colext-est-1.png
Binary files differ
diff --git a/vignettes/colext-gof-1.png b/vignettes/colext-gof-1.png
new file mode 100644
index 0000000..dfe673a
--- /dev/null
+++ b/vignettes/colext-gof-1.png
Binary files differ
diff --git a/vignettes/colext-gof.pdf b/vignettes/colext-gof.pdf
deleted file mode 100644
index da28c00..0000000
--- a/vignettes/colext-gof.pdf
+++ /dev/null
Binary files differ
diff --git a/vignettes/colext-pred-1.png b/vignettes/colext-pred-1.png
new file mode 100644
index 0000000..c248a7a
--- /dev/null
+++ b/vignettes/colext-pred-1.png
Binary files differ
diff --git a/vignettes/colext-sim.pdf b/vignettes/colext-sim.pdf
deleted file mode 100644
index 04e0c5d..0000000
--- a/vignettes/colext-sim.pdf
+++ /dev/null
Binary files differ
diff --git a/vignettes/colext-yearlysim.pdf b/vignettes/colext-yearlysim.pdf
deleted file mode 100644
index 02d67e0..0000000
--- a/vignettes/colext-yearlysim.pdf
+++ /dev/null
Binary files differ
diff --git a/vignettes/colext.Rnw b/vignettes/colext.Rmd
index dbb5a9b..0bfc1a1 100644
--- a/vignettes/colext.Rnw
+++ b/vignettes/colext.Rmd
@@ -1,103 +1,38 @@
-<<echo=false>>=
-options(width=70)
-options(continue=" ")
-@
-
-
-\documentclass[12pt]{article}
-
-
-\usepackage[OT1]{fontenc}
-\usepackage{Sweave}
-%\usepackage{natbib}
-\usepackage{fullpage}
-\usepackage[vmargin=1in,hmargin=1in]{geometry}
-%\bibliographystyle{plain}
-
-\SweaveOpts{keep.source=TRUE}
-
-\DefineVerbatimEnvironment{Sinput}{Verbatim} {xleftmargin=2em}
-\DefineVerbatimEnvironment{Soutput}{Verbatim}{xleftmargin=2em}
-\DefineVerbatimEnvironment{Scode}{Verbatim}{xleftmargin=2em}
-\fvset{listparameters={\setlength{\topsep}{0pt}}}
-\renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}}
-
-%%\VignetteIndexEntry{Dynamic occupancy models}
-
-
-\usepackage{amsmath}
-\usepackage{amssymb} % used for symbols in figure legends
-\usepackage{url}
-\usepackage{framed}
-\usepackage{float}
-
-\usepackage{lineno}
-\floatstyle{plain}
-\floatname{panel}{Panel}
-\newfloat{panel}{h}{txt}
-
-\renewcommand{\baselinestretch}{1}
-\setlength{\textwidth}{6.5in}
-%\setlength{\evensidemargin}{0.1875in}
-%\setlength{\oddsidemargin}{0.1875in}
-\setlength{\evensidemargin}{0in}
-\setlength{\oddsidemargin}{0in}
-
-\setlength{\textheight}{8.425in}
-%\setlength{\headheight}{.5in}
-%\setlength{\headsep}{.5in}
-%\setlength{\parindent}{.25in}
-\setlength{\topmargin}{0.025in}
-
-
-%paragraph formatting
-\usepackage{indentfirst} % indent first line of paragraph in new sections
-\usepackage{setspace} % for double or single space
-%\singlespacing
-%
-\usepackage{graphicx}
-
-\begin{document}
-
-% \textbf{Running title: Dynamic occupancy modeling in unmarked}
-
-\vspace{1 cm}
-
-\begin{center}
- \Large \textbf{Dynamic occupancy models in unmarked}
-\end{center}
-
-\vspace{1 cm}
-
-\noindent Marc K\'{e}ry and Richard Chandler\\
-
-\noindent Swiss Ornithological Institute and University of Georgia \\
-
-\vspace{1 cm}
-\begin{center}
- \textbf{16 August 2016}
-\end{center}
-
-% \today
-
-% Key words:
-
-\vspace{1 cm}
-
-\begin{center}
- \textbf{Abstract}
-\end{center}
-Dynamic occupancy models (MacKenzie et al. 2003) allow inference about
-the occurrence of ``things'' at collections of ``sites''
+---
+title: Dynamic occupancy models in unmarked
+author:
+- name: Marc Kéry, Swiss Ornithological Institute
+- name: Richard Chandler, University of Georgia
+date: August 16, 2016
+bibliography: unmarked.bib
+csl: ecology.csl
+output:
+ rmarkdown::html_vignette:
+ fig_width: 5
+ fig_height: 3.5
+ number_sections: true
+ toc: true
+vignette: >
+ %\VignetteIndexEntry{Dynamic occupancy models}
+ %\VignetteEngine{knitr::rmarkdown}
+ \usepackage[utf8]{inputenc}
+---
+
+
+
+# Abstract
+
+Dynamic occupancy models [@mackenzie_estimating_2003] allow inference about
+the occurrence of "things" at collections of "sites"
and about how changes in occurrence are driven by colonization and
local extinction. These models also account for imperfect detection
-probability. Depending on how ``thing'' and ``site'' are defined,
+probability. Depending on how "thing" and "site" are defined,
occupancy may have vastly different biological meanings,
including the presence of a disease in an individual (disease
incidence) of a species at a site (occurrence, distribution), or of an
individual in a territory.
-Dynamic occupancy models in \textbf{unmarked} are fit using the
-function \emph{colext}.
+Dynamic occupancy models in `unmarked` are fit using the
+function `colext`.
All parameters can be modeled as functions of covariates, i.e.,
first-year occupancy with covariates varying by site
(site-covariates),
@@ -109,22 +44,8 @@ bird survey MHB.
We also give examples to show how predictions, along with standard
errors and confidence intervals, can be obtained.
+# Introduction
-\newpage
-
-\singlespacing % single
-% \doublespacing
-
-
-\newpage
-\raggedright
-% \indentfirst
-\setlength{\parindent}{.25in}
-% \linenumbers % Switch on/off line numbers
-
-
-
-\section{Introduction}
Occurrence is a quantity of central importance in many branches of
ecology and related sciences.
The presence of a disease in an individual or of a species
@@ -135,14 +56,14 @@ Thus, depending on how we define the thing we are looking for and the
sample unit, very different biological quantities can be analyzed
using statistical models for occupancy.
-If we denote presence of the ``thing'' as $y=1$ and its absence as
+If we denote presence of the "thing" as $y=1$ and its absence as
$y=0$, then it is natural to characterize all these metrics by the
-probability that a randomly chosen sample unit (``site'') is occupied,
-i.e., has a ``thing'' present: $Pr(y=1) = \psi$.
+probability that a randomly chosen sample unit ("site") is occupied,
+i.e., has a "thing" present: $Pr(y=1) = \psi$.
We call this the occupancy probability, or occupancy for short, and
from now on will call the sample unit,
-where the presence or absence of a ``thing'' is assessed, generically
-a ``site''.
+where the presence or absence of a "thing" is assessed, generically
+a "site".
Naturally, we would like to explore factors that affect the likelihood
that a site is occupied.
@@ -151,12 +72,12 @@ customary statistical model for occurrence.
In this model, we treat occurrence $y$ as a binomial random variable
with trial size 1 and success probability $p$, or, equivalently, a
Bernoulli trial with $p$.
-``Success'' means occurrence, so $p$ is the occurrence probability.
+"Success" means occurrence, so $p$ is the occurrence probability.
It can be modeled as a linear or other function of covariates via a
suitable link function, e.g., the logit link.
-This simple model is described in many places, including McCullagh and
-Nelder (1989), Royle and Dorazio (2008, chapter 3), K\'{e}ry (2010,
-chapter 17) and K\'{e}ry and Schaub (2011, chapter 3).
+This simple model is described in many places, including @McCullagh_1989,
+Royle and Dorazio [-@royle_dorazio:2008, chapter 3], Kéry [-@Kery_2010,
+chapter 17] and Kéry and Schaub [-@Kery_2011, chapter 3].
A generalization of this model accounts for changes in the occupancy
state of sites by introducing parameters for survival
@@ -170,58 +91,54 @@ a site occupied at $t$ is again occupied at $t+1$ as $Pr(y_{i,t+1} = 1
This represents the survival probability of a site that is occupied.
Of course, we could also choose to express this component of occupancy
dynamics by the converse, extinction probability $\epsilon$ ---
-the parameterization used in \textbf{unmarked}.
+the parameterization used in `unmarked`.
To model the fate of an unoccupied site, we denote as $Pr(y_{i,t+1} =
1 | y_{i,t} = 0 ) = \gamma$ the probability that an unoccupied site at
$t$ becomes occupied at $t+1$.
This is the colonization probability of an empty site.
-Such a dynamic model of occurrence has become famous in the ecological literature under the name ``metapopulation model'' (Hanski 1998).
+Such a dynamic model of occurrence has become famous in the ecological literature under the name "metapopulation model" [@Hanski_1998].
However, when using ecological data collected in the field to fit such
models of occurrence, we face the usual challenge of imperfect
-detection (e.g. K\'{e}ry and Schmidt 2008).
+detection [e.g. @Kery_2008].
For instance, a species can go unobserved at a surveyed site or an
occupied territory can appear unoccupied during a particular survey,
perhaps because both birds are away hunting.
Not accounting for detection error may seriously bias all parameter
-estimators of a metapopulation model (Moilanen 2002; Royle and Dorazio
-2008).
+estimators of a metapopulation model [@Moilanen_2002; @royle_dorazio:2008].
To account for this additional stochastic component in the generation
of most ecological field data, the classical metapopulation model may
be generalized to include a submodel for the observation process,
which allows an occupied site to be recorded as unoccupied.
-This model has been developed by MacKenzie et al. (2003). It is
-described as a hierarchical model by Royle and K\'{e}ry (2007), Royle
-and Dorazio (2008, chapter 9) and K\'{e}ry and Schaub (2011, chapter
-13). The model is usually called a multi-season, multi-year or a
+This model has been developed by @mackenzie_estimating_2003. It is
+described as a hierarchical model by @Royle_2007, Royle
+and Dorazio [-@royle_dorazio:2008, chapter 9] and Kéry and Schaub [-@Kery_2011, chapter 13].
+The model is usually called a multi-season, multi-year or a
dynamic site-occupancy model.
The former terms denote the fact that it is applied to multiple
-``seasons'' or years and the latter emphasizes that the model allows
+"seasons" or years and the latter emphasizes that the model allows
for between-season occurrence dynamics.
-This vignette describes the use of the \textbf{unmarked} function
-\emph{colext} to fit dynamic occupancy models. Note that we will use
+This vignette describes the use of the `unmarked` function
+`colext` to fit dynamic occupancy models. Note that we will use
italics for the names of functions.
Static occupancy models, i.e., for a single season without changes in
-the occupancy state (MacKenzie et al. 2002), can be fit with \emph{occu},
-for the model described by MacKenzie et al. (2002) and Tyre et
-al. (2003), and with \emph{occuRN}, for the heterogeneity occupancy model
-described by Royle and Nichols (2003).
+the occupancy state [@mackenzie_estimating_2002], can be fit with `occu`,
+for the model described by @mackenzie_estimating_2002 and @Tyre_2002, and with `occuRN`, for the heterogeneity occupancy model
+described by @royle_estimating_2003.
In the next section (section 2), we give a more technical description
of the dynamic occupancy model.
In section 3, we provide R code for generating data under a basic
-dynamic occupancy model and illustrate use of \emph{colext} for fitting the
+dynamic occupancy model and illustrate use of `colext` for fitting the
model.
In section 4, we use real data from the Swiss breeding bird survey MHB
-(Schmid et al. 2004) to fit a few more elaborate models with
+[@schmid_etal:2004] to fit a few more elaborate models with
covariates for all parameters.
We also give examples illustrating how to compute predictions, with
-standard errors and 95\% confidence intervals, for the parameters.
-
+standard errors and 95% confidence intervals, for the parameters.
+# Dynamic occupancy models
-
-\section{Dynamic occupancy models}
To be able to estimate the parameters of the dynamic occupancy model
(probabilities of occurrence, survival and colonization) separately
from the parameters for the observation process (detection
@@ -236,15 +153,14 @@ That is, $y_{ijt}=1$ if at least one individual is detected and
$y_{ijt}=0$ if none is detected.
The model makes the following assumptions:
-\begin{itemize}
-\item replicate surveys at a site during a single season are
+* replicate surveys at a site during a single season are
independent (or else dependency must be modeled)
-\item occurrence state $z_{it}$ (see below) does not change over
+* occurrence state $z_{it}$ (see below) does not change over
replicate surveys at site $i$ during season $t$
-\item there are no false-positive errors, i.e., a species can only be
+* there are no false-positive errors, i.e., a species can only be
overlooked where it occurs, but it cannot be detected where it does
not in fact occur (i.e., there are no false-positives)
-\end{itemize}
+
The complete model consists of one submodel to describe the ecological
process, or state, and another submodel for the observation process,
which is dependent on the result of the ecological process.
@@ -255,26 +171,24 @@ The observation process describes the probability of detecting a
presence (i.e., $y = 1$) at a site that is occupied and takes account
of false-negative observation errors.
+## Ecological or state process
-
-
-\subsection{Ecological or state process}
This initial state is denoted $z_{i1}$ and represents occurrence at
site $i$ during season 1.
For this, the model assumes a Bernoulli trial governed by the
occupancy probability in the first season $\psi_{i1}$:
-\[
- z_{i1} = Bernoulli(\psi_{i1})
-\]
+$$
+z_{i1} = Bernoulli(\psi_{i1})
+$$
-We must distinguish the sample quantity ``occurrence'' at a site, $z$,
-from the population quantity ``occupancy probability'', $\psi$.
+We must distinguish the sample quantity "occurrence" at a site, $z$,
+from the population quantity "occupancy probability", $\psi$.
The former is the realization of a Bernoulli random variable with
parameter $\psi$.
This distinction becomes important when we want to compute the number
of occupied sites among the sample of surveyed sites;
-see Royle and K\'{e}ry (2007) and Weir et al. (2009) for this
+see @Royle_2007 and @Weir_2009 for this
distinction.
For all later seasons ($t = 2, 3, \ldots T$), occurrence is a function
@@ -285,9 +199,9 @@ $\phi_{it}$, also called probability of persistence (= 1 minus the
probability of local extinction),
and the probability of colonization $\gamma_{it}$.
-\[
- z_{it} \sim Bernoulli(z_{i,t-1} \phi_{it} + (1-z_{i,t-1}) \gamma_{it})
-\]
+$$
+z_{it} \sim Bernoulli(z_{i,t-1} \phi_{it} + (1-z_{i,t-1}) \gamma_{it})
+$$
Hence, if site $i$ is unoccupied at $t-1$ , $z_{i,t-1}=0$, and the
success probability of the Bernoulli is
@@ -304,23 +218,23 @@ $z_{i1}$ , $\phi_{it}$ and $\gamma_{it}$.
Variances of these derived estimates can be obtained via the delta
method or the bootstrap.
+## Observation process
-\subsection{Observation process}
To account for the observation error (specifically, false-negative
observations), the conventional Bernoulli detection process is
assumed, such that
-\[
- y_{ijt} \sim Bernoulli(z_{it} p_{ijt})
-\]
+$$
+y_{ijt} \sim Bernoulli(z_{it} p_{ijt})
+$$
Here, $y_{ijt}$ is the detection probability at site $i$ during
survey $j$ and season $t$. Detection is conditional on occurrence, and
multiplying $p_{ijt}$ with $z_{it}$ ensures that occurrence can only
be detected where in fact a species occurs, i.e. where $z_{it}=1$.
+## Modeling of parameters
-\subsection{Modeling of parameters}
The preceding, fully general model description allows for site-($i$)
dependence of all parameters. In addition to that, survival and
colonization probabilities may be season-($t$)dependent and detection
@@ -344,18 +258,16 @@ form
Julian date of the survey $j$ at site $i$ in season $t$.
We note that for first-year occupancy, only covariates that vary among
-sites (``site covariates'') can be fitted, while for survival and
-colonization, covariates that vary by site and by season (``yearly
-site covariates'') may be fitted as well.
+sites ("site covariates") can be fitted, while for survival and
+colonization, covariates that vary by site and by season ("yearly
+site covariates") may be fitted as well.
For detection, covariates of three formats may be fitted:
-``site-covariates'', ``yearly-site-covariates'' and
-``observation-covariates'', as
-they are called in \textbf{unmarked}.
+"site-covariates", "yearly-site-covariates" and
+"observation-covariates", as
+they are called in `unmarked`.
+# Dynamic occupancy models for simulated data
-
-
-\section{Dynamic occupancy models for simulated data}
We first generate a simple, simulated data set
with specified, year-specific values for
the parameters as well as design specifications, i.e., number of
@@ -364,18 +276,15 @@ Then, we show how to fit a dynamic occupancy model with
year-dependence in the parameters for colonization, extinction and
detection probability.
-\subsection{Simulating, formatting, and summarizing data}
+## Simulating, formatting, and summarizing data
+
To simulate the data, we execute the following R code.
The actual values for these parameters for each year are drawn
randomly from a uniform distribution with
the specified bounds.
-%%<<eval=true,echo=false>>=
-%%load(system.file("ws", "dynocc.RData", package="unmarked"))
-%%@
-\begin{small}
-<<>>=
+```r
M <- 250 # Number of sites
J <- 3 # num secondary sample periods
T <- 10 # num primary sample periods
@@ -415,12 +324,10 @@ for(i in 1:M){
for (k in 2:T){
psi[k] <- psi[k-1]*phi[k-1] + (1-psi[k-1])*gamma[k-1]
}
-@
-\end{small}
-
+```
We have now generated a single realization from the stochastic system
-thus defined. Figure~\ref{fig:sim}
+thus defined. Figure 1
illustrates the fundamental issue
of imperfect detection --- the actual proportion of sites occupied
differs greatly from the observed proportion of sites occupied, and
@@ -428,8 +335,9 @@ because $p$ varies among years, the observed data cannot be used as a
valid index of the parameter of interest $\psi_i$.
-\begin{small}
-<<sim,fig=true,include=false,width=6,height=6>>=
+
+
+```r
plot(1:T, colMeans(z), type = "b", xlab = "Year",
ylab = "Proportion of sites occupied",
col = "black", xlim=c(0.5, 10.5), xaxp=c(1,10,9),
@@ -440,192 +348,173 @@ psi.app <- colMeans(apply(y, c(1,3), max))
lines(1:T, psi.app, type = "b", col = "blue", lty=3, lwd = 2)
legend(1, 0.6, c("truth", "observed"),
col=c("black", "blue"), lty=c(1,3), pch=c(16,1))
-@
-\end{small}
-
-
-\begin{figure}[!h]
-\centering
-\includegraphics[width=5in,height=5in]{colext-sim.pdf}
-\caption{Summary of the multi-year occupancy data set generated.}
-\label{fig:sim}
-\end{figure}
+```
+![Figure 1. Summary of the multi-year occupancy data set generated.](colext-data-1.png)
To analyze this data set with a dynamic occupancy model in
-\textbf{unmarked}, we first load the package.
+`unmarked`, we first load the package.
+
-<<>>=
+```r
library(unmarked)
-@
+```
Next, we reformat the detection/non-detection data from a 3-dimensional
array (as generated) into a 2-dimensional matrix with M rows.
That is, we put the annual tables of data (the slices of the former
-3-D array) sideways to produce a ``wide'' layout of the data.
+3-D array) sideways to produce a "wide" layout of the data.
-\begin{small}
-<<>>=
-yy <- matrix(y, M, J*T)
-@
-\end{small}
+```r
+yy <- matrix(y, M, J*T)
+```
Next, we create a matrix indicating the year each site was surveyed.
-\begin{small}
-<<>>=
+
+```r
year <- matrix(c('01','02','03','04','05','06','07','08','09','10'),
nrow(yy), T, byrow=TRUE)
-@
-\end{small}
+```
-To organize the data in the format required by \emph{colext}, we make
-use of the function \emph{unmarkedMultFrame}. The only required
-arguments are \emph{y}, the detection/non-detection data, and
-\emph{numPrimary}, the number of seasons. The three types of
+To organize the data in the format required by `colext`, we make
+use of the function `unmarkedMultFrame`. The only required
+arguments are `y`, the detection/non-detection data, and
+`numPrimary`, the number of seasons. The three types of
covariates described earlier can also be supplied using the arguments
-\emph{siteCovs}, \emph{yearlySiteCovs}, and \emph{obsCovs}. In this case,
+`siteCovs`, `yearlySiteCovs`, and `obsCovs`. In this case,
we only make use of the second type, which must have M rows and T
columns.
-\begin{small}
-<<>>=
+```r
simUMF <- unmarkedMultFrame(
y = yy,
yearlySiteCovs = list(year = year),
numPrimary=T)
summary(simUMF)
-@
-\end{small}
-
-
+```
+
+```
+## unmarkedFrame Object
+##
+## 250 sites
+## Maximum number of observations per site: 30
+## Mean number of observations per site: 30
+## Number of primary survey periods: 10
+## Number of secondary survey periods: 3
+## Sites with at least one detection: 195
+##
+## Tabulation of y observations:
+## 0 1
+## 6430 1070
+##
+## Yearly-site-level covariates:
+## year
+## 01 : 250
+## 02 : 250
+## 03 : 250
+## 04 : 250
+## 05 : 250
+## 06 : 250
+## (Other):1000
+```
+
+## Model fitting
-\subsection{Model fitting}
We are ready to fit a few dynamic occupancy models.
We will fit a model with constant values for all parameters and
another with full time-dependence for colonization, extinction and
detection probability. We also time the calculations.
-\begin{small}
-
-%<<eval=true>>=
-%# Model with all constant parameters
-%m0 <- colext(psiformula= ~1, gammaformula = ~ 1, epsilonformula = ~ 1,
-% pformula = ~ 1, data = simUMF, method="BFGS")
-%@
-
-%<<>>=
-%summary(m0)
-%@
-
-
-
-\begin{Schunk}
-\begin{Sinput}
-> # Model with all constant parameters
-> m0 <- colext(psiformula= ~1, gammaformula = ~ 1, epsilonformula = ~ 1,
- pformula = ~ 1, data = simUMF, method="BFGS")
-\end{Sinput}
-\end{Schunk}
-
-\begin{Schunk}
-\begin{Sinput}
-> summary(m0)
-\end{Sinput}
-\begin{Soutput}
-Call:
-colext(psiformula = ~1, gammaformula = ~1, epsilonformula = ~1,
- pformula = ~1, data = simUMF, method = "BFGS")
-
-Initial (logit-scale):
- Estimate SE z P(>|z|)
- -0.813 0.158 -5.16 2.46e-07
-
-Colonization (logit-scale):
- Estimate SE z P(>|z|)
- -1.77 0.0807 -22 2.75e-107
-
-Extinction (logit-scale):
- Estimate SE z P(>|z|)
- -0.59 0.102 -5.79 7.04e-09
-
-Detection (logit-scale):
- Estimate SE z P(>|z|)
- -0.0837 0.0562 -1.49 0.137
-
-AIC: 4972.597
-Number of sites: 250
-optim convergence code: 0
-optim iterations: 27
-Bootstrap iterations: 0
-\end{Soutput}
-\end{Schunk}
-
-
-
-\end{small}
+```r
+# Model with all constant parameters
+m0 <- colext(psiformula= ~1, gammaformula = ~ 1, epsilonformula = ~ 1,
+ pformula = ~ 1, data = simUMF, method="BFGS")
+summary(m0)
+```
+
+```
+##
+## Call:
+## colext(psiformula = ~1, gammaformula = ~1, epsilonformula = ~1,
+## pformula = ~1, data = simUMF, method = "BFGS")
+##
+## Initial (logit-scale):
+## Estimate SE z P(>|z|)
+## -0.813 0.158 -5.16 2.46e-07
+##
+## Colonization (logit-scale):
+## Estimate SE z P(>|z|)
+## -1.77 0.0807 -22 2.75e-107
+##
+## Extinction (logit-scale):
+## Estimate SE z P(>|z|)
+## -0.59 0.102 -5.79 7.04e-09
+##
+## Detection (logit-scale):
+## Estimate SE z P(>|z|)
+## -0.0837 0.0562 -1.49 0.137
+##
+## AIC: 4972.597
+## Number of sites: 250
+## optim convergence code: 0
+## optim iterations: 27
+## Bootstrap iterations: 0
+```
The computation time was only a few seconds.
Note that all parameters were estimated on the logit scale. To
back-transform to the original scale, we can simply use the
-inverse-logit function, named \emph{plogis} in R.
+inverse-logit function, named `plogis` in R.
-\begin{small}
-<<>>=
+```r
plogis(-0.813)
-@
-\end{small}
+```
+```
+## [1] 0.3072516
+```
-Alternatively, we can use \emph{backTransform}, which
+Alternatively, we can use `backTransform`, which
computes standard errors using the delta method. Confidence intervals
-are also easily obtained using the function \emph{confint}.
+are also easily obtained using the function `confint`.
We first remind ourselves of the names of parameters, which can all be
used as arguments for these functions.
-\begin{small}
-
-%<<>>=
-%names(m0)
-%backTransform(m0, type="psi")
-%confint(backTransform(m0, type="psi"))
-%@
-
+```r
+names(m0)
+```
-\begin{Schunk}
-\begin{Sinput}
-> names(m0)
-\end{Sinput}
-\begin{Soutput}
-[1] "psi" "col" "ext" "det"
-\end{Soutput}
-\begin{Sinput}
-> backTransform(m0, type="psi")
-\end{Sinput}
-\begin{Soutput}
-Backtransformed linear combination(s) of Initial estimate(s)
+```
+## [1] "psi" "col" "ext" "det"
+```
- Estimate SE LinComb (Intercept)
- 0.307 0.0335 -0.813 1
+```r
+backTransform(m0, type="psi")
+```
-Transformation: logistic
-\end{Soutput}
-\begin{Sinput}
-> confint(backTransform(m0, type="psi"))
-\end{Sinput}
-\begin{Soutput}
- 0.025 0.975
- 0.2457313 0.3765804
-\end{Soutput}
-\end{Schunk}
+```
+## Backtransformed linear combination(s) of Initial estimate(s)
+##
+## Estimate SE LinComb (Intercept)
+## 0.307 0.0335 -0.813 1
+##
+## Transformation: logistic
+```
-\end{small}
+```r
+confint(backTransform(m0, type="psi"))
+```
+```
+## 0.025 0.975
+## 0.2457313 0.3765804
+```
Next, we fit the dynamic occupancy model with full year-dependence in
the parameters describing occupancy dynamics and also in detection.
@@ -643,116 +532,101 @@ differences. For simple presentation, a means parameterization is more
practical. It can be specified by adding a -1 to the formula for the
time-dependent parameters.
-\begin{small}
-<<eval=false>>=
+```r
m1 <- colext(psiformula = ~1, # First-year occupancy
gammaformula = ~ year-1, # Colonization
epsilonformula = ~ year-1, # Extinction
pformula = ~ year-1, # Detection
data = simUMF)
-@
-<<eval=false,echo=false>>=
m1
-@
-\begin{Schunk}
-\begin{Sinput}
-> m1
-\end{Sinput}
-\begin{Soutput}
-Call:
-colext(psiformula = ~1, gammaformula = ~year - 1, epsilonformula = ~year -
- 1, pformula = ~year - 1, data = simUMF)
-
-Initial:
- Estimate SE z P(>|z|)
- -0.273 0.302 -0.906 0.365
-
-Colonization:
- Estimate SE z P(>|z|)
-year01 -2.08 0.951 -2.19 2.86e-02
-year02 -2.18 0.365 -5.96 2.52e-09
-year03 -1.98 0.274 -7.23 4.88e-13
-year04 -2.32 0.678 -3.42 6.37e-04
-year05 -1.89 0.478 -3.95 7.78e-05
-year06 -1.76 0.294 -5.97 2.44e-09
-year07 -1.55 0.230 -6.73 1.75e-11
-year08 -1.43 0.228 -6.29 3.19e-10
-year09 -2.35 0.470 -5.00 5.64e-07
-
-Extinction:
- Estimate SE z P(>|z|)
-year01 -1.4209 0.418 -3.401 6.72e-04
-year02 -0.4808 0.239 -2.009 4.45e-02
-year03 -1.2606 0.366 -3.440 5.83e-04
-year04 -0.0907 0.650 -0.139 8.89e-01
-year05 -0.6456 0.599 -1.078 2.81e-01
-year06 -0.9586 0.378 -2.539 1.11e-02
-year07 -1.2279 0.365 -3.362 7.74e-04
-year08 -1.1894 0.292 -4.076 4.58e-05
-year09 -0.6292 0.635 -0.991 3.22e-01
-
-Detection:
- Estimate SE z P(>|z|)
-year01 -1.0824 0.244 -4.434 9.26e-06
-year02 -0.2232 0.148 -1.508 1.32e-01
-year03 0.2951 0.154 1.918 5.52e-02
-year04 0.0662 0.161 0.412 6.81e-01
-year05 -2.0396 0.433 -4.706 2.52e-06
-year06 -0.6982 0.232 -3.005 2.66e-03
-year07 0.2413 0.165 1.466 1.43e-01
-year08 0.0847 0.155 0.548 5.84e-01
-year09 0.6052 0.140 4.338 1.44e-05
-year10 -1.1699 0.306 -3.828 1.29e-04
-
-AIC: 4779.172
-\end{Soutput}
-\end{Schunk}
-\end{small}
-
-
-
-
-\subsection{Manipulating results: prediction and plotting}
+```
+
+```
+##
+## Call:
+## colext(psiformula = ~1, gammaformula = ~year - 1, epsilonformula = ~year -
+## 1, pformula = ~year - 1, data = simUMF)
+##
+## Initial:
+## Estimate SE z P(>|z|)
+## -0.273 0.302 -0.906 0.365
+##
+## Colonization:
+## Estimate SE z P(>|z|)
+## year01 -2.08 0.951 -2.19 2.86e-02
+## year02 -2.18 0.365 -5.96 2.52e-09
+## year03 -1.98 0.274 -7.23 4.88e-13
+## year04 -2.32 0.678 -3.42 6.37e-04
+## year05 -1.89 0.478 -3.95 7.78e-05
+## year06 -1.76 0.294 -5.97 2.44e-09
+## year07 -1.55 0.230 -6.73 1.75e-11
+## year08 -1.43 0.228 -6.29 3.19e-10
+## year09 -2.35 0.470 -5.00 5.64e-07
+##
+## Extinction:
+## Estimate SE z P(>|z|)
+## year01 -1.4209 0.418 -3.401 6.72e-04
+## year02 -0.4808 0.239 -2.009 4.45e-02
+## year03 -1.2606 0.366 -3.440 5.83e-04
+## year04 -0.0907 0.650 -0.139 8.89e-01
+## year05 -0.6456 0.599 -1.078 2.81e-01
+## year06 -0.9586 0.378 -2.539 1.11e-02
+## year07 -1.2279 0.365 -3.362 7.74e-04
+## year08 -1.1894 0.292 -4.076 4.58e-05
+## year09 -0.6292 0.635 -0.991 3.22e-01
+##
+## Detection:
+## Estimate SE z P(>|z|)
+## year01 -1.0824 0.244 -4.434 9.26e-06
+## year02 -0.2232 0.148 -1.508 1.32e-01
+## year03 0.2951 0.154 1.918 5.52e-02
+## year04 0.0662 0.161 0.412 6.81e-01
+## year05 -2.0396 0.433 -4.706 2.52e-06
+## year06 -0.6982 0.232 -3.005 2.66e-03
+## year07 0.2413 0.165 1.466 1.43e-01
+## year08 0.0847 0.155 0.548 5.84e-01
+## year09 0.6052 0.140 4.338 1.44e-05
+## year10 -1.1699 0.306 -3.828 1.29e-04
+##
+## AIC: 4779.172
+```
+
+## Manipulating results: prediction and plotting
Again, all estimates are shown on the logit-scale. Back-transforming
estimates when covariates, such as year, are present involves an
-extra step. Specifically, we need to tell \textbf{unmarked} the values
+extra step. Specifically, we need to tell `unmarked` the values
of our covariate
at which we want an estimate. This can be done using
-\emph{backTransform} in combination with \emph{linearComb}, although
-it can be easier to use \emph{predict}. \emph{predict} allows the user
+`backTransform` in combination with `linearComb`, although
+it can be easier to use `predict`. `predict` allows the user
to supply a data.frame in which each row represents a combination of
covariate values of interest. Below, we create data.frames called
-\emph{nd} with each row representing a year.
+`nd` with each row representing a year.
Then we request yearly estimates of the probability of extinction,
colonization and detection,
-and compare them to ``truth'', i.e., the values with which we
+and compare them to "truth", i.e., the values with which we
simulated the data set. Note that there are T-1 extinction and
colonization parameters in this case, so we do not need to include
-year `10' in \emph{nd}.
+year 10 in `nd`.
-\begin{small}
-<<eval=false>>=
+```r
nd <- data.frame(year=c('01','02','03','04','05','06','07','08','09'))
E.ext <- predict(m1, type='ext', newdata=nd)
E.col <- predict(m1, type='col', newdata=nd)
nd <- data.frame(year=c('01','02','03','04','05','06','07','08','09','10'))
E.det <- predict(m1, type='det', newdata=nd)
-@
-\end{small}
+```
-
-Predict returns the predictions along with standard errors and
+`predict` returns the predictions along with standard errors and
confidence intervals. These can be used to create plots. The
-\emph{with} function is used to simplify the process of requesting the
-columns of data.frame returned by \emph{predict}.
-
+`with` function is used to simplify the process of requesting the
+columns of `data.frame` returned by `predict`.
-\begin{small}
-<<yearlysim,eval=false,fig=true,include=false,width=3,height=7>>=
+```r
op <- par(mfrow=c(3,1), mai=c(0.6, 0.6, 0.1, 0.1))
with(E.ext, { # Plot for extinction probability
@@ -787,99 +661,75 @@ with(E.det, { # Plot for detection probability: note 10 years
legend(7.5, 1, c('Parameter','Estimate'), col=c(1,4), pch=c(16, 1),
cex=0.8)
})
+```
-par(op)
-@
-
-\end{small}
-
-
+![Figure 2. Yearly estimates of parameters](colext-est-1.png)
+```r
+par(op)
+```
-\begin{figure}
- \centering
- \includegraphics[width=3in,height=7in]{colext-yearlysim.pdf}
- \caption{Yearly estimates of $\epsilon$, $\gamma$ and $p$.}
- \label{fig:yearlysim}
-\end{figure}
-
-Figure~\ref{fig:yearlysim} shows that the 95\% confidence intervals
+Figure 2 shows that the 95% confidence intervals
include the true parameter values, and the point estimates are not too
far off.
-
-\subsection{Derived parameters}
+## Derived parameters
Estimates of occupancy probability in years $T>1$ must be derived from the
estimates of first-year occupancy and the two parameters governing the
dynamics, extinction/survival and colonization.
-\textbf{unmarked} does this automatically in two ways. First, the
+`unmarked` does this automatically in two ways. First, the
population-level estimates of occupancy probability
$\psi_t = \psi_{t-1}\phi_{t-1} + (1-\phi_{t-1})\gamma$ are calculated
and stored in the slot named \emph{projected}. Slots can be accessed
-using the @ operator, e.g. fm@projected.
+using the `@` operator, e.g. `fm@projected`.
In some cases, interest may lie in making
inference about the proportion of the sampled sites that are occupied,
rather than the entire population of sites. These estimates are
-contained in the \emph{smoothed} slot of the fitted model. Thus, the
-\emph{projected} values are estimates of population parameters, and
-the \emph{smoothed} estimates are of the finite-sample
-quantities. Discussions of the differences can be found in Weir et
-al. (2009).
+contained in the `smoothed` slot of the fitted model. Thus, the
+`projected` values are estimates of population parameters, and
+the `smoothed` estimates are of the finite-sample
+quantities. Discussions of the differences can be found in @Weir_2009.
Bootstrap methods can be used to compute standard errors of derived
parameter estimates. Here we employ a non-parametric bootstrap to obtain
standard errors of the smoothed estimates of occupancy probability
during each year.
-\begin{small}
-%<<eval=true>>=
-%m1 <- nonparboot(m1, B = 10)
-%@
-%<<eval=true>>=
-%cbind(psi=psi, smoothed=smoothed(m1)[2,], SE=m1@smoothed.mean.bsse[2,])
-%@
-
-
-\begin{Schunk}
-\begin{Sinput}
-> m1 <- nonparboot(m1, B = 10)
-> cbind(psi=psi, smoothed=smoothed(m1)[2,], SE=m1@smoothed.mean.bsse[2,])
-\end{Sinput}
-\begin{Soutput}
- psi smoothed SE
-1 0.4000000 0.4320671 0.05781066
-2 0.3493746 0.4110124 0.03605041
-3 0.2977125 0.3139967 0.02811819
-4 0.3148447 0.3278179 0.02861192
-5 0.3192990 0.2316695 0.06840470
-6 0.2915934 0.2528485 0.03243467
-7 0.3114415 0.2928429 0.02950853
-8 0.3636580 0.3504885 0.03023857
-9 0.3654064 0.3936991 0.02617258
-10 0.3460641 0.3095786 0.07354332
-\end{Soutput}
-\end{Schunk}
-
-
-\end{small}
-
-
-In practice, B should be much higher, possibly $>$ 1000 for complex
+
+```r
+m1 <- nonparboot(m1, B = 10)
+cbind(psi=psi, smoothed=smoothed(m1)[2,], SE=m1@smoothed.mean.bsse[2,])
+```
+
+```
+## psi smoothed SE
+## 1 0.4000000 0.4320671 0.06783911
+## 2 0.3493746 0.4110124 0.03786402
+## 3 0.2977125 0.3139967 0.02780818
+## 4 0.3148447 0.3278179 0.04303542
+## 5 0.3192990 0.2316695 0.10858419
+## 6 0.2915934 0.2528485 0.04179036
+## 7 0.3114415 0.2928429 0.03113920
+## 8 0.3636580 0.3504885 0.04224678
+## 9 0.3654064 0.3936991 0.02103870
+## 10 0.3460641 0.3095786 0.06830698
+```
+
+In practice, `B` should be much higher, possibly >1000 for complex
models .
Another derived parameters of interest is turnover probability
-\[
+$$
\tau_t = \frac{\gamma_{t-1}(1-\psi_{t-1})}{\gamma_{t-1}(1-\psi_{t-1})
+ \phi_{t-1}\psi_{t-1}}
-\]
+$$
The following function returns these estimates.
-\begin{small}
-<<>>=
+```r
turnover <- function(fm) {
psi.hat <- plogis(coef(fm, type="psi"))
if(length(psi.hat) > 1)
@@ -897,53 +747,38 @@ turnover <- function(fm) {
}
return(tau.hat)
}
-@
-\end{small}
-
-
+```
The bootstrap again offers a means of estimating variance. Here we
show how to generate 95\% confidence intervals for the turnover
estimates using the parametric bootstrap.
-\begin{small}
-
-
-\begin{Schunk}
-\begin{Sinput}
-> pb <- parboot(m1, statistic=turnover, nsim=2)
-> turnCI <- cbind(pb@t0,
- t(apply(pb@t.star, 2, quantile, probs=c(0.025, 0.975))))
-> colnames(turnCI) <- c("tau", "lower", "upper")
-\end{Sinput}
-\end{Schunk}
-\begin{Schunk}
-\begin{Sinput}
-> turnCI
-\end{Sinput}
-\begin{Soutput}
- tau lower upper
-t*1 0.1532645 0.09613841 0.1060208
-t*2 0.1911530 0.14770601 0.1694802
-t*3 0.2537292 0.18514773 0.2469985
-t*4 0.2604356 0.18947604 0.2029920
-t*5 0.3989303 0.49754613 0.5671067
-t*6 0.3758690 0.28901007 0.3022026
-t*7 0.3537473 0.39050385 0.4278534
-t*8 0.3174983 0.25300357 0.3222170
-t*9 0.1704449 0.14896392 0.1815896
-\end{Soutput}
-\end{Schunk}
-
-
-\end{small}
-
+```r
+pb <- parboot(m1, statistic=turnover, nsim=2)
+turnCI <- cbind(pb@t0,
+ t(apply(pb@t.star, 2, quantile, probs=c(0.025, 0.975))))
+colnames(turnCI) <- c("tau", "lower", "upper")
+turnCI
+```
+
+```
+## tau lower upper
+## t*1 0.1532645 0.00536045 0.1974714
+## t*2 0.1911530 0.07881180 0.2119585
+## t*3 0.2537292 0.19777204 0.2785973
+## t*4 0.2604356 0.04063769 0.4197328
+## t*5 0.3989303 0.34078483 0.4720357
+## t*6 0.3758690 0.32703698 0.5370796
+## t*7 0.3537473 0.32696166 0.3564059
+## t*8 0.3174983 0.32925238 0.4139696
+## t*9 0.1704449 0.18946470 0.3186236
+```
Which bootstrap method is most appropriate for variance estimation?
For detailed distinctions between the
-non-parametric and the parametric bootstrap, see Davison and Hinkley
-(1997). We note simply that the parametric bootstrap resamples from
+non-parametric and the parametric bootstrap, see @Davison_1997.
+We note simply that the parametric bootstrap resamples from
the fitted model, and thus the
measures of uncertainty are purely
functions of the distributions assumed by the model. Non-parametric
@@ -951,10 +786,7 @@ bootstrap samples, in contrast, are obtained by resampling the
data, not the model, and thus are not necessarily affected by the
variance formulas of the model's distributions.
-
-
-\subsection{Goodness-of-fit}
-
+## Goodness-of-fit
In addition to estimating the variance of an estimate, the parametric
bootstrap can be used to assess goodness-of-fit. For this purpose, a
@@ -966,21 +798,17 @@ the distribution of the fit-statistic, and a \emph{P}-value
can be computed as the proportion of simulated values greater than the
observed value.
-Hosmer et al. (1997) found that a $\chi^2$ statistic performed
+@Hosmer_1997 found that a $\chi^2$ statistic performed
reasonably well in assessing lack of fit for logistic regression
models. We know of no studies formally
evaluating the performance of various fit-statistics for dynamic
occupancy models, so this approach should be
considered experimental. Fit-statistics applied to aggregated
-encounter histories offer an alternative approach (MacKenzie and
-Bailey 2004), but are difficult to implement when J*T is high and
+encounter histories offer an alternative approach [@MacKenzie_2004], but are difficult to implement when J*T is high and
missing values or continuous covariates are present.
-\begin{small}
-
-<<eval=false,echo=true>>=
-
+```r
chisq <- function(fm) {
umf <- getData(fm)
y <- getY(umf)
@@ -994,81 +822,79 @@ chisq <- function(fm) {
set.seed(344)
pb.gof <- parboot(m0, statistic=chisq, nsim=100)
-@
-<<gof,fig=true,include=false,width=5,height=5,echo=false,eval=false>>=
plot(pb.gof, xlab=expression(chi^2), main="", col=gray(0.95),
xlim=c(7300, 7700))
-@
-\end{small}
-
+```
-\begin{figure}[!h]
-\centering
-\includegraphics[width=5in,height=5in]{colext-gof.pdf}
-\caption{Goodness-of-fit}
-\label{fig:gof}
-\end{figure}
+![Figure 3. Goodness-of-fit](colext-gof-1.png)
-Figure~\ref{fig:gof} indicates that, as expected, the constant
+Figure 3 indicates that, as expected, the constant
parameter model does not fit the data well.
+# Dynamic occupancy models for crossbill data from the Swiss MHB
+## The crossbill data set
-\section{Dynamic occupancy models for crossbill data from the Swiss MHB}
-
-\subsection{The crossbill data set}
-The crossbill data are included with the \texttt{unmarked} package.
+The crossbill data are included with the `unmarked` package.
The dataset contains the results of nine years of surveys (1999--2007)
-for the European crossbill (\emph{Loxia curvirostra}),
+for the European crossbill (*Loxia curvirostra*),
a pine-seed eating finch, in 267 1-km$^2$ sample quadrats in Switzerland.
Quadrats are surveyed annually as part of the Swiss breeding bird
-survey MHB (Schmid et al. 2004).
+survey MHB [@schmid_etal:2004].
They are laid out as a grid over Switzerland and surveyed 2 or 3 times
every breeding season (mid-April to late June)
by experienced field ornithologists along a haphazard survey route of
-length 1--9 km (average 5 km).
+length 1-9 km (average 5 km).
High-elevation sites are only surveyed twice per breeding season.
+## Importing, formatting, and summarizing data
+The data can be loaded into an open R workspace using the `data` command.
-\subsection{Importing, formatting, and summarizing data}
-The data can be loaded into an open R workspace using the \verb+data+
-command.
-\begin{small}
-<<>>=
+```r
data(crossbill)
colnames(crossbill)
-@
-\end{small}
-
+```
+
+```
+## [1] "id" "ele" "forest" "surveys" "det991" "det992" "det993"
+## [8] "det001" "det002" "det003" "det011" "det012" "det013" "det021"
+## [15] "det022" "det023" "det031" "det032" "det033" "det041" "det042"
+## [22] "det043" "det051" "det052" "det053" "det061" "det062" "det063"
+## [29] "det071" "det072" "det073" "date991" "date992" "date993" "date001"
+## [36] "date002" "date003" "date011" "date012" "date013" "date021" "date022"
+## [43] "date023" "date031" "date032" "date033" "date041" "date042" "date043"
+## [50] "date051" "date052" "date053" "date061" "date062" "date063" "date071"
+## [57] "date072" "date073"
+```
We have three covariates that vary by site: median elevation of the
-quadrat (ele, in metres), forest cover of the quadrat (forest, in
+quadrat (`ele`, in metres), forest cover of the quadrat (`forest`, in
percent) and the number of surveys per season (i.e., 2 or 3,
surveys).
These are called site covariates, because they vary by sites only.
-The 27 columns entitled ``det991''--``det073'' contain the crossbill
+The 27 columns entitled `det991` - `det073` contain the crossbill
detection/nondetection data during all surveys over the 9 years.
They contain a 1 when at least one crossbill was recorded during a
survey and a 0 otherwise.
-NAs indicate surveys that did not take place, either because a site is
+`NA`s indicate surveys that did not take place, either because a site is
high-elevation and has no third survey or because it failed to be
surveyed altogether in a year.
-The final 27 columns entitled ``date991'' -- ``date073'' give the Julian
+The final 27 columns entitled `date991` - `date073` give the Julian
date of each survey.
-They represent a `survey-covariate' or `observation covariate'.
-We note that the paper by Royle and K\'{e}ry (2007) used a subset of this
+They represent a "survey-covariate" or "observation covariate".
+We note that the paper by @Royle_2007 used a subset of this
data set.
-AIC-based model selection (see section 4.4.) requires
+AIC-based model selection (see section 5.4) requires
that all models are fit to the same data.
-\textbf{unmarked} removes missing data in a context specific way. For
-missing siteCovs, the entire row of data must be removed. However, for
-missing \emph{yearlySiteCovs} or \emph{obsCovs}, only the
+`unmarked` removes missing data in a context specific way. For
+missing `siteCovs`, the entire row of data must be removed. However, for
+missing `yearlySiteCovs` or `obsCovs`, only the
corresponding observation
-are removed. Thus, if \textbf{unmarked} removes different observations
+are removed. Thus, if `unmarked` removes different observations
from different models, the models cannot be compared using AIC. A way
around this is to remove the detection data corresponding to
missing covariates before fitting the models.
@@ -1076,47 +902,42 @@ The crossbill data have missing dates and so we remove the associated
detection/non-detection data.
-<<>>=
+
+```r
DATE <- as.matrix(crossbill[,32:58])
y.cross <- as.matrix(crossbill[,5:31])
y.cross[is.na(DATE) != is.na(y.cross)] <- NA
-@
+```
In addition, continuous covariates should be transformed in a way
that brings their values close to zero in order to improve
or even enable numerical convergence of the maximum-likelihood routine.
-We do this ``by hand'' and note that we could also have used the R
-function \emph{scale}. We subtract the mean and divide by the standard
+We do this "by hand" and note that we could also have used the R
+function `scale`. We subtract the mean and divide by the standard
deviation.
-\begin{small}
-<<eval=true>>=
+```r
sd.DATE <- sd(c(DATE), na.rm=TRUE)
mean.DATE <- mean(DATE, na.rm=TRUE)
DATE <- (DATE - mean.DATE) / sd.DATE
-@
-\end{small}
+```
Before we can fit occupancy models, we need to format this data set
appropriately.
-\begin{small}
-
-<<>>=
+```r
years <- as.character(1999:2007)
years <- matrix(years, nrow(crossbill), 9, byrow=TRUE)
umf <- unmarkedMultFrame(y=y.cross,
siteCovs=crossbill[,2:3], yearlySiteCovs=list(year=years),
obsCovs=list(date=DATE),
numPrimary=9)
-@
-\end{small}
-
+```
+## Model fitting
-\subsection{Model fitting}
We fit a series of models that represent different hypotheses about
the colonization-extinction dynamics of Swiss crossbills
at a spatial scale of 1 km$^2$.
@@ -1125,136 +946,94 @@ parameterization,
but for detection probability, we choose an effects parameterization.
The latter is more useful for getting predictions in the presence of
other explanatory variables for that parameter.
-For model fm5 with more complex covariate relationships, we use as
+For model `fm5` with more complex covariate relationships, we use as
starting values for the optimization routine
-the solution from a ``neighboring'' model with slightly less
-complexity, model fm4.
+the solution from a "neighboring" model with slightly less
+complexity, model `fm4`.
Wise choice of starting values can be decisive for success or failure
of maximum likelihood estimation.
-\begin{small}
-
-<<eval=false>>=
+```r
# A model with constant parameters
fm0 <- colext(~1, ~1, ~1, ~1, umf)
-@
-<<eval=false>>=
+
# Like fm0, but with year-dependent detection
fm1 <- colext(~1, ~1, ~1, ~year, umf)
-@
-<<eval=false>>=
+
# Like fm0, but with year-dependent colonization and extinction
fm2 <- colext(~1, ~year-1, ~year-1, ~1, umf)
-@
-<<eval=false>>=
+
# A fully time-dependent model
fm3 <- colext(~1, ~year-1, ~year-1, ~year, umf)
-@
-<<eval=false>>=
+
# Like fm3 with forest-dependence of 1st-year occupancy
fm4 <- colext(~forest, ~year-1, ~year-1, ~year, umf)
-@
-<<eval=false>>=
+
# Like fm4 with date- and year-dependence of detection
fm5 <- colext(~forest, ~year-1, ~year-1, ~year + date + I(date^2),
umf, starts=c(coef(fm4), 0, 0))
-@
-<<eval=false>>=
+
# Same as fm5, but with detection in addition depending on forest cover
fm6 <- colext(~forest, ~year-1, ~year-1, ~year + date + I(date^2) +
forest, umf)
-@
-\end{small}
-
-
+```
+## Model selection
-\subsection{Model selection}
We can compare models using the Akaike information criterion
($AIC$).
-Note that \textbf{unmarked} yields $AIC$, not $AIC_c$
+Note that `unmarked` yields $AIC$, not $AIC_c$
because the latter would require the sample size,
which is not really known for
hierarchical models such as the dynamic occupancy model.
-Model selection and model-averaged prediction in \textbf{unmarked}
-require that we create a list of models using \emph{fitList}.
+Model selection and model-averaged prediction in `unmarked`
+require that we create a list of models using `fitList`.
This function organizes models and conducts a series of tests to
ensure that the models were fit to the same data.
-\begin{small}
-
-%<<eval=true>>=
-%models <- fitList('psi(.)gam(.)eps(.)p(.)' = fm0,
-% 'psi(.)gam(.)eps(.)p(Y)' = fm1,
-% 'psi(.)gam(Y)eps(Y)p(.)' = fm2,
-% 'psi(.)gam(Y)eps(Y)p(Y)' = fm3,
-% 'psi(F)gam(Y)eps(Y)p(Y)' = fm4,
-% 'psi(F)gam(Y)eps(Y)p(YD2)' = fm5,
-% 'psi(F)gam(Y)eps(Y)p(YD2F)' = fm6)
-%ms <- modSel(models)
-%ms
-%@
-
-\begin{Schunk}
-\begin{Sinput}
-> models <- fitList('psi(.)gam(.)eps(.)p(.)' = fm0,
- 'psi(.)gam(.)eps(.)p(Y)' = fm1,
- 'psi(.)gam(Y)eps(Y)p(.)' = fm2,
- 'psi(.)gam(Y)eps(Y)p(Y)' = fm3,
- 'psi(F)gam(Y)eps(Y)p(Y)' = fm4,
- 'psi(F)gam(Y)eps(Y)p(YD2)' = fm5,
- 'psi(F)gam(Y)eps(Y)p(YD2F)' = fm6)
-> ms <- modSel(models)
-> ms
-\end{Sinput}
-\begin{Soutput}
- nPars AIC delta AICwt cumltvWt
-psi(F)gam(Y)eps(Y)p(YD2F) 30 4986.39 0.00 1.0e+00 1.00
-psi(F)gam(Y)eps(Y)p(YD2) 29 5059.30 72.91 1.5e-16 1.00
-psi(F)gam(Y)eps(Y)p(Y) 27 5095.38 108.99 2.2e-24 1.00
-psi(.)gam(.)eps(.)p(Y) 12 5111.32 124.93 7.5e-28 1.00
-psi(.)gam(Y)eps(Y)p(Y) 26 5127.63 141.24 2.1e-31 1.00
-psi(.)gam(Y)eps(Y)p(.) 18 5170.54 184.15 1.0e-40 1.00
-psi(.)gam(.)eps(.)p(.) 4 5193.50 207.11 1.1e-45 1.00
-\end{Soutput}
-\end{Schunk}
-
-\end{small}
-
+```r
+models <- fitList('psi(.)gam(.)eps(.)p(.)' = fm0,
+ 'psi(.)gam(.)eps(.)p(Y)' = fm1,
+ 'psi(.)gam(Y)eps(Y)p(.)' = fm2,
+ 'psi(.)gam(Y)eps(Y)p(Y)' = fm3,
+ 'psi(F)gam(Y)eps(Y)p(Y)' = fm4,
+ 'psi(F)gam(Y)eps(Y)p(YD2)' = fm5,
+ 'psi(F)gam(Y)eps(Y)p(YD2F)' = fm6)
+ms <- modSel(models)
+ms
+```
+
+```
+## nPars AIC delta AICwt cumltvWt
+## psi(F)gam(Y)eps(Y)p(YD2F) 30 4986.39 0.00 1.0e+00 1.00
+## psi(F)gam(Y)eps(Y)p(YD2) 29 5059.30 72.91 1.5e-16 1.00
+## psi(F)gam(Y)eps(Y)p(Y) 27 5095.38 108.99 2.2e-24 1.00
+## psi(.)gam(.)eps(.)p(Y) 12 5111.32 124.93 7.5e-28 1.00
+## psi(.)gam(Y)eps(Y)p(Y) 26 5127.63 141.24 2.1e-31 1.00
+## psi(.)gam(Y)eps(Y)p(.) 18 5170.54 184.15 1.0e-40 1.00
+## psi(.)gam(.)eps(.)p(.) 4 5193.50 207.11 1.1e-45 1.00
+```
One model has overwhelming support, so we can base inference on that
one alone. Before doing so, we point out how to extract coefficients
-from a \emph{fitList} object, and convert the results to a
-\emph{data.frame}, which could be exported from R.
-
-\begin{small}
+from a `fitList` object, and convert the results to a
+`data.frame`, which could be exported from R.
-%<<eval=false>>=
-%coef(ms) # Estimates only
-%SE(ms) # Standard errors only
-%toExport <- as(ms, "data.frame") # Everything
-%@
-\begin{Schunk}
-\begin{Sinput}
-> coef(ms) # Estimates only
-> SE(ms) # Standard errors only
-> toExport <- as(ms, "data.frame") # Everything
-\end{Sinput}
-\end{Schunk}
+```r
+coef(ms) # Estimates only
+SE(ms) # Standard errors only
+toExport <- as(ms, "data.frame") # Everything
+```
+## Manipulating results: Prediction and plotting
-\end{small}
-
-
-
-\subsection{Manipulating results: Prediction and plotting}
Fitted models can be used to predict expected outcomes when given new
-data. For example, one could ask ``how many crossbills would you
-expect to find in a quadrat with 50\% forest cover?'' Prediction also
+data. For example, one could ask "how many crossbills would you
+expect to find in a quadrat with 50% forest cover?" Prediction also
offers a way of
presenting the results of an analysis. We illustrate by plotting the
predictions of $\psi$ and $p$ over the range of covariate values studied.
@@ -1263,9 +1042,7 @@ to its original scale after obtaining predictions on the
standardized scale.
-\begin{small}
-
-<<cov,eval=false,fig=true,include=false,width=6,height=3>>=
+```r
op <- par(mfrow=c(1,2), mai=c(0.8,0.8,0.1,0.1))
nd <- data.frame(forest=seq(0, 100, length=50))
@@ -1292,138 +1069,23 @@ with(E.p, {
lines(dateOrig, Predicted+1.96*SE, col=gray(0.7))
lines(dateOrig, Predicted-1.96*SE, col=gray(0.7))
})
-par(op)
-@
-
-\end{small}
-
-
-\begin{figure}[!h]
-\centering
-\includegraphics[width=6in,height=3in]{colext-cov.pdf}
-\caption{Covariates}
-\label{fig:cov}
-\end{figure}
-
-
+```
+![Figure 4. Covariates](colext-pred-1.png)
+```r
+par(op)
+```
+**Acknowledgments**
-\section*{Acknowledgments}
-Special thanks goes to Ian Fiske, the author of \emph{colext} and the
-original developer of \textbf{unmarked}. Andy Royle provided the
+Special thanks goes to Ian Fiske, the author of `colext` and the
+original developer of `unmarked`. Andy Royle provided the
initial funding and support for the package. The questions of many
people on the users' list motivated the writing of this document.
+# References
-
-\newpage
-
-\section*{References}
-\newcommand{\rf}{\vskip .1in\par\sloppy\hangindent=1pc\hangafter=1
- \noindent}
-
-\rf Davison, A.C and D.V. Hinkley. 1997. \emph{Bootstrap Methods and Their
-Application}, first ed. Cambridge University Press.
-
-\rf Dorazio, R.M., and Royle, J.A. 2005. Estimating size and
-composition of biological communities by modeling the occurrence of
-species. Journal of the American Statistical Association 100:
-389--398.
-
-\rf Dorazio, R.M., K\'{e}ry, M., Royle, J.A., and Plattner,
-M. 2010. Models for inference in dynamic metacommunity
-systems. Ecology 91: 2466--2475.
-
-\rf Hanski, I. 1998. Metapopulation dynamics. Nature 396: 41--49.
-
-\rf Hosmer, D.W., T. Hosmer, S. le Cressie, and S. Lemeshow. 1997. A
-comparision of goodness-of-fit tests for the logistic
-regression model. Statistics in Medicine 16:965--980.
-
-\rf K\'{e}ry, M. 2010. \emph{Introduction to WinBUGS for
- Ecologists. A Bayesian approach to regression, ANOVA, mixed
- models and related analyses}. Academic Press, Burlington, MA.
-
-\rf K\'{e}ry, M., Royle, J.A., Plattner, M, and Dorazio,
-R.M. 2009. Species richness and occupancy estimation in communities
-subject to temporary emigration. Ecology 90: 1279--1290.
-
-\rf K\'{e}ry, M., and Schaub, M. 2011. \emph{Bayesian population
- analysis using WinBUGS}. Academic Press, Burlington. (due December
-2011)
-
-\rf K\'{e}ry, M., and Schmidt, B.R. 2008. Imperfect detection and its
-consequences for monitoring for conservation. Community Ecology 9:
-207--216.
-
-\rf MacKenzie, D.I and L. Bailey. 2004. Assessing the fit of
-site-occupancy models. Journal of Agricultural, Biological, and
-Environmental Statistics 9:300--318.
-
-\rf MacKenzie, D.I., Nichols, J.D., Hines, J.E., Knutson, M.G., and
-Franklin, A.B. 2003. Estimating site occupancy, colonization, and
-local extinction when a species is detected imperfectly. Ecology 84:
-2200--2207.
-
-\rf MacKenzie, D.I., Nichols, J.D., Lachman, G.B., Droege, S., Royle,
-J.A., and Langtimm, C.A. 2002. Estimating site occupancy rates when
-detection probability rates are less than one. Ecology 83:
-2248--2255.
-
-\rf MacKenzie, D.I., Nichols, J.D., Seamans, M.E., and Gutierrez,
-R.J. 2009. Modeling species occurrence dynamics with multiple states
-and imperfect detection. Ecology 90: 823--835.
-
-\rf McCullagh, P., and Nelder, J.A. 1989. \emph{Generalized linear
- models}. Chapman and Hall.
-
-\rf Miller, D.A., Nichols, J.D., McClintock, B.T., Grant, E.H.C.,
-Bailey, L.L., and Weir, L. 2011. Improving occupancy estimation when
-two types of observational errors occur: non-detection and species
-misidentification. Ecology, in press.
-
-\rf Moilanen, A. 2002. Implications of empirical data quality to
-metapopulation model parameter estimation and application. Oikos 96:
-516--530.
-
-\rf Nichols, J.D., Hines, J.E., MacKenzie, D.I., Seamans, M.E., and
-Gutierrez, R.J. 2007. Occupancy estimation and modeling with multiple
-states and state uncertainty. Ecology 88: 1395--1400.
-
-\rf Royle, J.A., Dorazio, R.M. 2008. \emph{Hierarchical modeling and
- inference in ecology: The analysis of data from populations,
- metapopulations, and communities}. Academic Press, San Diego.
-
-\rf Royle, J.A., and K\'{e}ry, M. 2007. A Bayesian state-space
-formulation of dynamic occupancy models. Ecology 88: 1813--1823.
-
-\rf Royle, J.A., and Link, W.A., 2005. A general class of multinomial
-mixture models for anuran calling survey data. Ecology 86:
-2505--2512.
-
-\rf Royle, J.A., and Link, W.A., 2006. Generalized site occupancy
-models allowing for false positive and false negative errors. Ecology
-87: 835--841.
-
-\rf Royle, J.A., and Nichols, J.D. 2003. Estimating abundance from
-repeated presence-absence data or point counts. Ecology 84, 777--790.
-
-\rf Schmid, H., Zbinden, N., and Keller,
-V. 2004. \emph{\"{U}berwachung der Bestandsentwicklung h\"{a}ufiger
- Brutv\"{o}gel in der Schweiz}. Swiss Ornithological Institute,
-Sempach, Switzerland.
-
-\rf Tyre, A.J., Tenhumberg, B., Field, S.A., Niejalke, D., Parris, K.,
-and Possingham, H.P. 2003. Improving precision and reducing bias in
-biological surveys: estimating false-negative error rates. Ecological
-Applications 13, 1790--1801.
-
-\rf Weir, L., I.J. Fiske, and J.A. Royle. 2009. Trends in anuran
-occupancy from northeastern states of the North American Amphibian
-Monitoring Program. Herpetological Conservation and Biology
-4:389--402.
-
-
-\end{document}
+```{r, echo=FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+```
diff --git a/vignettes/colext.Rmd.orig b/vignettes/colext.Rmd.orig
new file mode 100644
index 0000000..9e5a2cb
--- /dev/null
+++ b/vignettes/colext.Rmd.orig
@@ -0,0 +1,873 @@
+---
+title: Dynamic occupancy models in unmarked
+author:
+- name: Marc Kéry, Swiss Ornithological Institute
+- name: Richard Chandler, University of Georgia
+date: August 16, 2016
+bibliography: unmarked.bib
+csl: ecology.csl
+output:
+ rmarkdown::html_vignette:
+ fig_width: 5
+ fig_height: 3.5
+ number_sections: true
+ toc: true
+vignette: >
+ %\VignetteIndexEntry{Dynamic occupancy models}
+ %\VignetteEngine{knitr::rmarkdown}
+ \usepackage[utf8]{inputenc}
+---
+
+```{r,echo=FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+knitr::opts_chunk$set(message=FALSE, warning=FALSE)
+knitr::opts_chunk$set(fig.path="")
+set.seed(456)
+```
+
+# Abstract
+
+Dynamic occupancy models [@mackenzie_estimating_2003] allow inference about
+the occurrence of "things" at collections of "sites"
+and about how changes in occurrence are driven by colonization and
+local extinction. These models also account for imperfect detection
+probability. Depending on how "thing" and "site" are defined,
+occupancy may have vastly different biological meanings,
+including the presence of a disease in an individual (disease
+incidence) of a species at a site (occurrence, distribution), or of an
+individual in a territory.
+Dynamic occupancy models in `unmarked` are fit using the
+function `colext`.
+All parameters can be modeled as functions of covariates, i.e.,
+first-year occupancy with covariates varying by site
+(site-covariates),
+colonization and survival with site- and yearly-site-covariates and
+detection with site-, yearly-site- and sample-occasion-covariates.
+We give two commented example analyses: one for a simulated data set
+and another for a real data set on crossbills in the Swiss breeding
+bird survey MHB.
+We also give examples to show how predictions, along with standard
+errors and confidence intervals, can be obtained.
+
+# Introduction
+
+Occurrence is a quantity of central importance in many branches of
+ecology and related sciences.
+The presence of a disease in an individual or of a species
+at a site are two common types of occurrence studies.
+The associated biological metrics are the incidence of the disease and
+species occurrence or species distribution.
+Thus, depending on how we define the thing we are looking for and the
+sample unit, very different biological quantities can be analyzed
+using statistical models for occupancy.
+
+If we denote presence of the "thing" as $y=1$ and its absence as
+$y=0$, then it is natural to characterize all these metrics by the
+probability that a randomly chosen sample unit ("site") is occupied,
+i.e., has a "thing" present: $Pr(y=1) = \psi$.
+We call this the occupancy probability, or occupancy for short, and
+from now on will call the sample unit,
+where the presence or absence of a "thing" is assessed, generically
+a "site".
+
+Naturally, we would like to explore factors that affect the likelihood
+that a site is occupied.
+A binomial generalized linear model, or logistic regression, is the
+customary statistical model for occurrence.
+In this model, we treat occurrence $y$ as a binomial random variable
+with trial size 1 and success probability $p$, or, equivalently, a
+Bernoulli trial with $p$.
+"Success" means occurrence, so $p$ is the occurrence probability.
+It can be modeled as a linear or other function of covariates via a
+suitable link function, e.g., the logit link.
+This simple model is described in many places, including @McCullagh_1989,
+Royle and Dorazio [-@royle_dorazio:2008, chapter 3], Kéry [-@Kery_2010,
+chapter 17] and Kéry and Schaub [-@Kery_2011, chapter 3].
+
+A generalization of this model accounts for changes in the occupancy
+state of sites by introducing parameters for survival
+(or alternatively, extinction) and colonization probability.
+Thus, when we have observations of occurrence for more than a single
+point in time, we can model the transition of the occupancy
+state at site $i$ between successive times as another Bernoulli trial.
+To model the fate of an occupied site, we denote the probability that
+a site occupied at $t$ is again occupied at $t+1$ as $Pr(y_{i,t+1} = 1
+| y_{i,t} = 1 ) = \phi$.
+This represents the survival probability of a site that is occupied.
+Of course, we could also choose to express this component of occupancy
+dynamics by the converse, extinction probability $\epsilon$ ---
+the parameterization used in `unmarked`.
+To model the fate of an unoccupied site, we denote as $Pr(y_{i,t+1} =
+1 | y_{i,t} = 0 ) = \gamma$ the probability that an unoccupied site at
+$t$ becomes occupied at $t+1$.
+This is the colonization probability of an empty site.
+Such a dynamic model of occurrence has become famous in the ecological literature under the name "metapopulation model" [@Hanski_1998].
+
+However, when using ecological data collected in the field to fit such
+models of occurrence, we face the usual challenge of imperfect
+detection [e.g. @Kery_2008].
+For instance, a species can go unobserved at a surveyed site or an
+occupied territory can appear unoccupied during a particular survey,
+perhaps because both birds are away hunting.
+Not accounting for detection error may seriously bias all parameter
+estimators of a metapopulation model [@Moilanen_2002; @royle_dorazio:2008].
+To account for this additional stochastic component in the generation
+of most ecological field data, the classical metapopulation model may
+be generalized to include a submodel for the observation process,
+which allows an occupied site to be recorded as unoccupied.
+This model has been developed by @mackenzie_estimating_2003. It is
+described as a hierarchical model by @Royle_2007, Royle
+and Dorazio [-@royle_dorazio:2008, chapter 9] and Kéry and Schaub [-@Kery_2011, chapter 13].
+The model is usually called a multi-season, multi-year or a
+dynamic site-occupancy model.
+The former terms denote the fact that it is applied to multiple
+"seasons" or years and the latter emphasizes that the model allows
+for between-season occurrence dynamics.
+
+This vignette describes the use of the `unmarked` function
+`colext` to fit dynamic occupancy models. Note that we will use
+italics for the names of functions.
+Static occupancy models, i.e., for a single season without changes in
+the occupancy state [@mackenzie_estimating_2002], can be fit with `occu`,
+for the model described by @mackenzie_estimating_2002 and @Tyre_2002, and with `occuRN`, for the heterogeneity occupancy model
+described by @royle_estimating_2003.
+In the next section (section 2), we give a more technical description
+of the dynamic occupancy model.
+In section 3, we provide R code for generating data under a basic
+dynamic occupancy model and illustrate use of `colext` for fitting the
+model.
+In section 4, we use real data from the Swiss breeding bird survey MHB
+[@schmid_etal:2004] to fit a few more elaborate models with
+covariates for all parameters.
+We also give examples illustrating how to compute predictions, with
+standard errors and 95% confidence intervals, for the parameters.
+
+# Dynamic occupancy models
+
+To be able to estimate the parameters of the dynamic occupancy model
+(probabilities of occurrence, survival and colonization) separately
+from the parameters for the observation process (detection
+probability), replicate observations are required from a period of
+closure,
+during which the occupancy state of a site must remain constant, i.e.,
+it is either occupied or unoccupied.
+The modeled data $y_{ijt}$ are indicators for whether a species is
+detected at site $i$ ($i = 1, 2, \ldots M$), during replicate survey
+$j$ ($j = 1, 2, \ldots J$) in season $t$ ($t = 1, 2, \ldots T$).
+That is, $y_{ijt}=1$ if at least one individual is detected and
+$y_{ijt}=0$ if none is detected.
+
+The model makes the following assumptions:
+* replicate surveys at a site during a single season are
+ independent (or else dependency must be modeled)
+* occurrence state $z_{it}$ (see below) does not change over
+ replicate surveys at site $i$ during season $t$
+* there are no false-positive errors, i.e., a species can only be
+ overlooked where it occurs, but it cannot be detected where it does
+ not in fact occur (i.e., there are no false-positives)
+
+The complete model consists of one submodel to describe the ecological
+process, or state, and another submodel for the observation process,
+which is dependent on the result of the ecological process.
+The ecological process describes the latent occurrence dynamics for
+all sites in terms of parameters for the probability of initial
+occurrence and site survival and colonization.
+The observation process describes the probability of detecting a
+presence (i.e., $y = 1$) at a site that is occupied and takes account
+of false-negative observation errors.
+
+## Ecological or state process
+
+This initial state is denoted $z_{i1}$ and represents occurrence at
+site $i$ during season 1.
+For this, the model assumes a Bernoulli trial governed by the
+occupancy probability in the first season $\psi_{i1}$:
+
+$$
+z_{i1} = Bernoulli(\psi_{i1})
+$$
+
+We must distinguish the sample quantity "occurrence" at a site, $z$,
+from the population quantity "occupancy probability", $\psi$.
+The former is the realization of a Bernoulli random variable with
+parameter $\psi$.
+This distinction becomes important when we want to compute the number
+of occupied sites among the sample of surveyed sites;
+see @Royle_2007 and @Weir_2009 for this
+distinction.
+
+For all later seasons ($t = 2, 3, \ldots T$), occurrence is a function
+of occurrence at site $i$ at time $t-1$ and one of two parameters that
+describe the colonization-extinction dynamics of the system.
+These dynamic parameters are the probability of local survival
+$\phi_{it}$, also called probability of persistence (= 1 minus the
+probability of local extinction),
+and the probability of colonization $\gamma_{it}$.
+
+$$
+z_{it} \sim Bernoulli(z_{i,t-1} \phi_{it} + (1-z_{i,t-1}) \gamma_{it})
+$$
+
+Hence, if site $i$ is unoccupied at $t-1$ , $z_{i,t-1}=0$, and the
+success probability of the Bernoulli is
+$0*\phi_{it} + (1-0) * \gamma_{it}$, so the site is occupied
+(=colonized) in season $t$ with probability $\gamma_{it}$
+. Conversely, if site $i$ is occupied at $t-1$ , $z_{i,t-1}=1$, and
+the success probability of the Bernoulli is given by $1*\phi_{it} +
+(1-1) * \gamma_{it}$, so the site is occupied in (=survives to) season
+$t$ with probability $\phi_{it}$.
+
+Occupancy probability ($\psi_{it}$) and occurrence ($z_{it}$) at all
+later times $t$ can be computed recursively from $\psi_{i1}$,
+$z_{i1}$ , $\phi_{it}$ and $\gamma_{it}$.
+Variances of these derived estimates can be obtained via the delta
+method or the bootstrap.
+
+## Observation process
+
+To account for the observation error (specifically, false-negative
+observations), the conventional Bernoulli detection process is
+assumed, such that
+
+$$
+y_{ijt} \sim Bernoulli(z_{it} p_{ijt})
+$$
+
+Here, $y_{ijt}$ is the detection probability at site $i$ during
+survey $j$ and season $t$. Detection is conditional on occurrence, and
+multiplying $p_{ijt}$ with $z_{it}$ ensures that occurrence can only
+be detected where in fact a species occurs, i.e. where $z_{it}=1$.
+
+## Modeling of parameters
+
+The preceding, fully general model description allows for site-($i$)
+dependence of all parameters. In addition to that, survival and
+colonization probabilities may be season-($t$)dependent and detection
+probability season-($t$) and survey-($j$) dependent.
+All this complexity may be dropped, especially the dependence on
+sites. On the other hand, all parameters that are indexed in some way
+can be modeled, e.g., as functions of covariates that vary along the
+dimension denoted by an index. We will fit linear functions (on the
+logit link scale) of covariates into first-year occupancy, survival
+and colonization and into detection probability.
+That is, for probabilities of first-year occupancy, survival,
+colonization and detection, respectively, we will fit models of the
+form
+ $logit(\psi_{i1}) = \alpha + \beta x_i$, where $x_i$ may be forest
+ cover or elevation of site $i$ ,
+ $logit(\phi_{it}) = \alpha + \beta x_{it}$, where $x_{it}$ may be
+ tree mast at site $i$ during season $t$,
+ $logit(\gamma_{it}) = \alpha + \beta x_{it}$, for a similarly
+ defined covariate $x_{it}$, or
+ $logit(p_{ijt}) = \alpha + \beta x_{ijt}$ , where $x_{ijt}$ is the
+ Julian date of the survey $j$ at site $i$ in season $t$.
+
+We note that for first-year occupancy, only covariates that vary among
+sites ("site covariates") can be fitted, while for survival and
+colonization, covariates that vary by site and by season ("yearly
+site covariates") may be fitted as well.
+For detection, covariates of three formats may be fitted:
+"site-covariates", "yearly-site-covariates" and
+"observation-covariates", as
+they are called in `unmarked`.
+
+# Dynamic occupancy models for simulated data
+
+We first generate a simple, simulated data set
+with specified, year-specific values for
+the parameters as well as design specifications, i.e., number of
+sites, years and surveys per year.
+Then, we show how to fit a dynamic occupancy model with
+year-dependence in the parameters for colonization, extinction and
+detection probability.
+
+## Simulating, formatting, and summarizing data
+
+To simulate the data, we execute the following R code.
+The actual values for these parameters for each year are drawn
+randomly from a uniform distribution with
+the specified bounds.
+
+```{r}
+M <- 250 # Number of sites
+J <- 3 # num secondary sample periods
+T <- 10 # num primary sample periods
+
+psi <- rep(NA, T) # Occupancy probability
+muZ <- z <- array(dim = c(M, T)) # Expected and realized occurrence
+y <- array(NA, dim = c(M, J, T)) # Detection histories
+
+set.seed(13973)
+psi[1] <- 0.4 # Initial occupancy probability
+p <- c(0.3,0.4,0.5,0.5,0.1,0.3,0.5,0.5,0.6,0.2)
+phi <- runif(n=T-1, min=0.6, max=0.8) # Survival probability (1-epsilon)
+gamma <- runif(n=T-1, min=0.1, max=0.2) # Colonization probability
+
+# Generate latent states of occurrence
+# First year
+z[,1] <- rbinom(M, 1, psi[1]) # Initial occupancy state
+# Later years
+for(i in 1:M){ # Loop over sites
+ for(k in 2:T){ # Loop over years
+ muZ[k] <- z[i, k-1]*phi[k-1] + (1-z[i, k-1])*gamma[k-1]
+ z[i,k] <- rbinom(1, 1, muZ[k])
+ }
+}
+
+# Generate detection/non-detection data
+for(i in 1:M){
+ for(k in 1:T){
+ prob <- z[i,k] * p[k]
+ for(j in 1:J){
+ y[i,j,k] <- rbinom(1, 1, prob)
+ }
+ }
+}
+
+# Compute annual population occupancy
+for (k in 2:T){
+ psi[k] <- psi[k-1]*phi[k-1] + (1-psi[k-1])*gamma[k-1]
+ }
+```
+
+We have now generated a single realization from the stochastic system
+thus defined. Figure 1
+illustrates the fundamental issue
+of imperfect detection --- the actual proportion of sites occupied
+differs greatly from the observed proportion of sites occupied, and
+because $p$ varies among years, the observed data cannot be used as a
+valid index of the parameter of interest $\psi_i$.
+
+
+
+```{r colext-data, fig.width=5, fig.height=5, fig.cap="Figure 1. Summary of the multi-year occupancy data set generated."}
+plot(1:T, colMeans(z), type = "b", xlab = "Year",
+ ylab = "Proportion of sites occupied",
+ col = "black", xlim=c(0.5, 10.5), xaxp=c(1,10,9),
+ ylim = c(0,0.6), lwd = 2, lty = 1,
+ frame.plot = FALSE, las = 1, pch=16)
+
+psi.app <- colMeans(apply(y, c(1,3), max))
+lines(1:T, psi.app, type = "b", col = "blue", lty=3, lwd = 2)
+legend(1, 0.6, c("truth", "observed"),
+ col=c("black", "blue"), lty=c(1,3), pch=c(16,1))
+```
+
+To analyze this data set with a dynamic occupancy model in
+`unmarked`, we first load the package.
+
+```{r}
+library(unmarked)
+```
+
+Next, we reformat the detection/non-detection data from a 3-dimensional
+array (as generated) into a 2-dimensional matrix with M rows.
+That is, we put the annual tables of data (the slices of the former
+3-D array) sideways to produce a "wide" layout of the data.
+
+```{r}
+yy <- matrix(y, M, J*T)
+```
+
+Next, we create a matrix indicating the year each site was surveyed.
+
+```{r}
+year <- matrix(c('01','02','03','04','05','06','07','08','09','10'),
+ nrow(yy), T, byrow=TRUE)
+```
+
+To organize the data in the format required by `colext`, we make
+use of the function `unmarkedMultFrame`. The only required
+arguments are `y`, the detection/non-detection data, and
+`numPrimary`, the number of seasons. The three types of
+covariates described earlier can also be supplied using the arguments
+`siteCovs`, `yearlySiteCovs`, and `obsCovs`. In this case,
+we only make use of the second type, which must have M rows and T
+columns.
+
+```{r}
+simUMF <- unmarkedMultFrame(
+ y = yy,
+ yearlySiteCovs = list(year = year),
+ numPrimary=T)
+summary(simUMF)
+```
+
+## Model fitting
+
+We are ready to fit a few dynamic occupancy models.
+We will fit a model with constant values for all parameters and
+another with full time-dependence for colonization, extinction and
+detection probability. We also time the calculations.
+
+```{r}
+# Model with all constant parameters
+m0 <- colext(psiformula= ~1, gammaformula = ~ 1, epsilonformula = ~ 1,
+ pformula = ~ 1, data = simUMF, method="BFGS")
+summary(m0)
+```
+
+The computation time was only a few seconds.
+Note that all parameters were estimated on the logit scale. To
+back-transform to the original scale, we can simply use the
+inverse-logit function, named `plogis` in R.
+
+```{r}
+plogis(-0.813)
+```
+
+Alternatively, we can use `backTransform`, which
+computes standard errors using the delta method. Confidence intervals
+are also easily obtained using the function `confint`.
+We first remind ourselves of the names of parameters, which can all be
+used as arguments for these functions.
+
+```{r}
+names(m0)
+backTransform(m0, type="psi")
+confint(backTransform(m0, type="psi"))
+```
+
+Next, we fit the dynamic occupancy model with full year-dependence in
+the parameters describing occupancy dynamics and also in detection.
+This is the same model under which we generated the data set, so we
+would expect accurate estimates.
+
+By default in R, a factor such as year in this analysis, is a
+parameterized in terms of an intercept and effects representing
+differences. This would mean that the parameter for the first year is
+the intercept and the effects would denote the differences between
+the parameter values in all other years, relative to the parameter
+value in the first year, which serves as a reference level.
+This treatment or effects parameterization is useful for testing for
+differences. For simple presentation, a means parameterization is more
+practical. It can be specified by adding a -1 to the formula for the
+time-dependent parameters.
+
+```{r}
+m1 <- colext(psiformula = ~1, # First-year occupancy
+ gammaformula = ~ year-1, # Colonization
+ epsilonformula = ~ year-1, # Extinction
+ pformula = ~ year-1, # Detection
+ data = simUMF)
+m1
+```
+
+## Manipulating results: prediction and plotting
+
+Again, all estimates are shown on the logit-scale. Back-transforming
+estimates when covariates, such as year, are present involves an
+extra step. Specifically, we need to tell `unmarked` the values
+of our covariate
+at which we want an estimate. This can be done using
+`backTransform` in combination with `linearComb`, although
+it can be easier to use `predict`. `predict` allows the user
+to supply a data.frame in which each row represents a combination of
+covariate values of interest. Below, we create data.frames called
+`nd` with each row representing a year.
+Then we request yearly estimates of the probability of extinction,
+colonization and detection,
+and compare them to "truth", i.e., the values with which we
+simulated the data set. Note that there are T-1 extinction and
+colonization parameters in this case, so we do not need to include
+year 10 in `nd`.
+
+```{r}
+nd <- data.frame(year=c('01','02','03','04','05','06','07','08','09'))
+E.ext <- predict(m1, type='ext', newdata=nd)
+E.col <- predict(m1, type='col', newdata=nd)
+nd <- data.frame(year=c('01','02','03','04','05','06','07','08','09','10'))
+E.det <- predict(m1, type='det', newdata=nd)
+```
+
+`predict` returns the predictions along with standard errors and
+confidence intervals. These can be used to create plots. The
+`with` function is used to simplify the process of requesting the
+columns of `data.frame` returned by `predict`.
+
+```{r colext-est, fig.height=7, fig.width=3, fig.cap="Figure 2. Yearly estimates of parameters"}
+op <- par(mfrow=c(3,1), mai=c(0.6, 0.6, 0.1, 0.1))
+
+with(E.ext, { # Plot for extinction probability
+ plot(1:9, Predicted, pch=1, xaxt='n', xlab='Year',
+ ylab=expression(paste('Extinction probability ( ', epsilon, ' )')),
+ ylim=c(0,1), col=4)
+ axis(1, at=1:9, labels=nd$year[1:9])
+ arrows(1:9, lower, 1:9, upper, code=3, angle=90, length=0.03, col=4)
+ points((1:9)-0.1, 1-phi, col=1, lwd = 1, pch=16)
+ legend(7, 1, c('Parameter', 'Estimate'), col=c(1,4), pch=c(16, 1),
+ cex=0.8)
+ })
+
+with(E.col, { # Plot for colonization probability
+ plot(1:9, Predicted, pch=1, xaxt='n', xlab='Year',
+ ylab=expression(paste('Colonization probability ( ', gamma, ' )')),
+ ylim=c(0,1), col=4)
+ axis(1, at=1:9, labels=nd$year[1:9])
+ arrows(1:9, lower, 1:9, upper, code=3, angle=90, length=0.03, col=4)
+ points((1:9)-0.1, gamma, col=1, lwd = 1, pch=16)
+ legend(7, 1, c('Parameter', 'Estimate'), col=c(1,4), pch=c(16, 1),
+ cex=0.8)
+ })
+
+with(E.det, { # Plot for detection probability: note 10 years
+ plot(1:10, Predicted, pch=1, xaxt='n', xlab='Year',
+ ylab=expression(paste('Detection probability ( ', p, ' )')),
+ ylim=c(0,1), col=4)
+ axis(1, at=1:10, labels=nd$year)
+ arrows(1:10, lower, 1:10, upper, code=3, angle=90, length=0.03, col=4)
+ points((1:10)-0.1, p, col=1, lwd = 1, pch=16)
+ legend(7.5, 1, c('Parameter','Estimate'), col=c(1,4), pch=c(16, 1),
+ cex=0.8)
+ })
+
+par(op)
+```
+
+Figure 2 shows that the 95% confidence intervals
+include the true parameter values, and the point estimates are not too
+far off.
+
+## Derived parameters
+
+Estimates of occupancy probability in years $T>1$ must be derived from the
+estimates of first-year occupancy and the two parameters governing the
+dynamics, extinction/survival and colonization.
+`unmarked` does this automatically in two ways. First, the
+population-level estimates of occupancy probability
+$\psi_t = \psi_{t-1}\phi_{t-1} + (1-\phi_{t-1})\gamma$ are calculated
+and stored in the slot named \emph{projected}. Slots can be accessed
+using the `@` operator, e.g. `fm@projected`.
+In some cases, interest may lie in making
+inference about the proportion of the sampled sites that are occupied,
+rather than the entire population of sites. These estimates are
+contained in the `smoothed` slot of the fitted model. Thus, the
+`projected` values are estimates of population parameters, and
+the `smoothed` estimates are of the finite-sample
+quantities. Discussions of the differences can be found in @Weir_2009.
+
+Bootstrap methods can be used to compute standard errors of derived
+parameter estimates. Here we employ a non-parametric bootstrap to obtain
+standard errors of the smoothed estimates of occupancy probability
+during each year.
+
+```{r}
+m1 <- nonparboot(m1, B = 10)
+cbind(psi=psi, smoothed=smoothed(m1)[2,], SE=m1@smoothed.mean.bsse[2,])
+```
+
+In practice, `B` should be much higher, possibly >1000 for complex
+models .
+
+Another derived parameters of interest is turnover probability
+
+$$
+\tau_t = \frac{\gamma_{t-1}(1-\psi_{t-1})}{\gamma_{t-1}(1-\psi_{t-1})
+ + \phi_{t-1}\psi_{t-1}}
+$$
+
+The following function returns these estimates.
+
+```{r}
+turnover <- function(fm) {
+ psi.hat <- plogis(coef(fm, type="psi"))
+ if(length(psi.hat) > 1)
+ stop("this function only works if psi is scalar")
+ T <- getData(fm)@numPrimary
+ tau.hat <- numeric(T-1)
+ gamma.hat <- plogis(coef(fm, type="col"))
+ phi.hat <- 1 - plogis(coef(fm, type="ext"))
+ if(length(gamma.hat) != T-1 | length(phi.hat) != T-1)
+ stop("this function only works if gamma and phi T-1 vectors")
+ for(t in 2:T) {
+ psi.hat[t] <- psi.hat[t-1]*phi.hat[t-1] +
+ (1-psi.hat[t-1])*gamma.hat[t-1]
+ tau.hat[t-1] <- gamma.hat[t-1]*(1-psi.hat[t-1]) / psi.hat[t]
+ }
+ return(tau.hat)
+ }
+```
+
+The bootstrap again offers a means of estimating variance. Here we
+show how to generate 95\% confidence intervals for the turnover
+estimates using the parametric bootstrap.
+
+```{r}
+pb <- parboot(m1, statistic=turnover, nsim=2)
+turnCI <- cbind(pb@t0,
+ t(apply(pb@t.star, 2, quantile, probs=c(0.025, 0.975))))
+colnames(turnCI) <- c("tau", "lower", "upper")
+turnCI
+```
+
+Which bootstrap method is most appropriate for variance estimation?
+For detailed distinctions between the
+non-parametric and the parametric bootstrap, see @Davison_1997.
+We note simply that the parametric bootstrap resamples from
+the fitted model, and thus the
+measures of uncertainty are purely
+functions of the distributions assumed by the model. Non-parametric
+bootstrap samples, in contrast, are obtained by resampling the
+data, not the model, and thus are not necessarily affected by the
+variance formulas of the model's distributions.
+
+## Goodness-of-fit
+
+In addition to estimating the variance of an estimate, the parametric
+bootstrap can be used to assess goodness-of-fit. For this purpose, a
+fit-statistic, i.e. one that compares
+observed and expected values, is evaluated using the original fitted
+model, and numerous other models fitted to simulated datasets. The
+simulation yields an approximation of
+the distribution of the fit-statistic, and a \emph{P}-value
+can be computed as the proportion of simulated values greater than the
+observed value.
+
+@Hosmer_1997 found that a $\chi^2$ statistic performed
+reasonably well in assessing lack of fit for logistic regression
+models. We know of no studies formally
+evaluating the performance of various fit-statistics for dynamic
+occupancy models, so this approach should be
+considered experimental. Fit-statistics applied to aggregated
+encounter histories offer an alternative approach [@MacKenzie_2004], but are difficult to implement when J*T is high and
+missing values or continuous covariates are present.
+
+```{r colext-gof, fig.height=5, fig.width=5, fig.cap="Figure 3. Goodness-of-fit"}
+chisq <- function(fm) {
+ umf <- getData(fm)
+ y <- getY(umf)
+ sr <- fm@sitesRemoved
+ if(length(sr)>0)
+ y <- y[-sr,,drop=FALSE]
+ fv <- fitted(fm, na.rm=TRUE)
+ y[is.na(fv)] <- NA
+ sum((y-fv)^2/(fv*(1-fv)))
+ }
+
+set.seed(344)
+pb.gof <- parboot(m0, statistic=chisq, nsim=100)
+
+plot(pb.gof, xlab=expression(chi^2), main="", col=gray(0.95),
+ xlim=c(7300, 7700))
+```
+
+Figure 3 indicates that, as expected, the constant
+parameter model does not fit the data well.
+
+# Dynamic occupancy models for crossbill data from the Swiss MHB
+
+## The crossbill data set
+
+The crossbill data are included with the `unmarked` package.
+The dataset contains the results of nine years of surveys (1999--2007)
+for the European crossbill (*Loxia curvirostra*),
+a pine-seed eating finch, in 267 1-km$^2$ sample quadrats in Switzerland.
+Quadrats are surveyed annually as part of the Swiss breeding bird
+survey MHB [@schmid_etal:2004].
+They are laid out as a grid over Switzerland and surveyed 2 or 3 times
+every breeding season (mid-April to late June)
+by experienced field ornithologists along a haphazard survey route of
+length 1-9 km (average 5 km).
+High-elevation sites are only surveyed twice per breeding season.
+
+## Importing, formatting, and summarizing data
+
+The data can be loaded into an open R workspace using the `data` command.
+
+```{r}
+data(crossbill)
+colnames(crossbill)
+```
+
+We have three covariates that vary by site: median elevation of the
+quadrat (`ele`, in metres), forest cover of the quadrat (`forest`, in
+percent) and the number of surveys per season (i.e., 2 or 3,
+surveys).
+These are called site covariates, because they vary by sites only.
+The 27 columns entitled `det991` - `det073` contain the crossbill
+detection/nondetection data during all surveys over the 9 years.
+They contain a 1 when at least one crossbill was recorded during a
+survey and a 0 otherwise.
+`NA`s indicate surveys that did not take place, either because a site is
+high-elevation and has no third survey or because it failed to be
+surveyed altogether in a year.
+The final 27 columns entitled `date991` - `date073` give the Julian
+date of each survey.
+They represent a "survey-covariate" or "observation covariate".
+We note that the paper by @Royle_2007 used a subset of this
+data set.
+
+AIC-based model selection (see section 5.4) requires
+that all models are fit to the same data.
+`unmarked` removes missing data in a context specific way. For
+missing `siteCovs`, the entire row of data must be removed. However, for
+missing `yearlySiteCovs` or `obsCovs`, only the
+corresponding observation
+are removed. Thus, if `unmarked` removes different observations
+from different models, the models cannot be compared using AIC. A way
+around this is to remove the detection data corresponding to
+missing covariates before fitting the models.
+The crossbill data have missing dates and so we remove the associated
+detection/non-detection data.
+
+
+```{r}
+DATE <- as.matrix(crossbill[,32:58])
+y.cross <- as.matrix(crossbill[,5:31])
+y.cross[is.na(DATE) != is.na(y.cross)] <- NA
+```
+
+In addition, continuous covariates should be transformed in a way
+that brings their values close to zero in order to improve
+or even enable numerical convergence of the maximum-likelihood routine.
+We do this "by hand" and note that we could also have used the R
+function `scale`. We subtract the mean and divide by the standard
+deviation.
+
+```{r}
+sd.DATE <- sd(c(DATE), na.rm=TRUE)
+mean.DATE <- mean(DATE, na.rm=TRUE)
+DATE <- (DATE - mean.DATE) / sd.DATE
+```
+
+Before we can fit occupancy models, we need to format this data set
+appropriately.
+
+```{r}
+years <- as.character(1999:2007)
+years <- matrix(years, nrow(crossbill), 9, byrow=TRUE)
+umf <- unmarkedMultFrame(y=y.cross,
+ siteCovs=crossbill[,2:3], yearlySiteCovs=list(year=years),
+ obsCovs=list(date=DATE),
+ numPrimary=9)
+```
+
+## Model fitting
+
+We fit a series of models that represent different hypotheses about
+the colonization-extinction dynamics of Swiss crossbills
+at a spatial scale of 1 km$^2$.
+We fit year effects on colonization and extinction in the means
+parameterization,
+but for detection probability, we choose an effects parameterization.
+The latter is more useful for getting predictions in the presence of
+other explanatory variables for that parameter.
+For model `fm5` with more complex covariate relationships, we use as
+starting values for the optimization routine
+the solution from a "neighboring" model with slightly less
+complexity, model `fm4`.
+Wise choice of starting values can be decisive for success or failure
+of maximum likelihood estimation.
+
+```{r}
+# A model with constant parameters
+fm0 <- colext(~1, ~1, ~1, ~1, umf)
+
+# Like fm0, but with year-dependent detection
+fm1 <- colext(~1, ~1, ~1, ~year, umf)
+
+# Like fm0, but with year-dependent colonization and extinction
+fm2 <- colext(~1, ~year-1, ~year-1, ~1, umf)
+
+# A fully time-dependent model
+fm3 <- colext(~1, ~year-1, ~year-1, ~year, umf)
+
+# Like fm3 with forest-dependence of 1st-year occupancy
+fm4 <- colext(~forest, ~year-1, ~year-1, ~year, umf)
+
+# Like fm4 with date- and year-dependence of detection
+fm5 <- colext(~forest, ~year-1, ~year-1, ~year + date + I(date^2),
+ umf, starts=c(coef(fm4), 0, 0))
+
+# Same as fm5, but with detection in addition depending on forest cover
+fm6 <- colext(~forest, ~year-1, ~year-1, ~year + date + I(date^2) +
+ forest, umf)
+```
+
+## Model selection
+
+We can compare models using the Akaike information criterion
+($AIC$).
+Note that `unmarked` yields $AIC$, not $AIC_c$
+because the latter would require the sample size,
+which is not really known for
+hierarchical models such as the dynamic occupancy model.
+
+Model selection and model-averaged prediction in `unmarked`
+require that we create a list of models using `fitList`.
+This function organizes models and conducts a series of tests to
+ensure that the models were fit to the same data.
+
+```{r}
+models <- fitList('psi(.)gam(.)eps(.)p(.)' = fm0,
+ 'psi(.)gam(.)eps(.)p(Y)' = fm1,
+ 'psi(.)gam(Y)eps(Y)p(.)' = fm2,
+ 'psi(.)gam(Y)eps(Y)p(Y)' = fm3,
+ 'psi(F)gam(Y)eps(Y)p(Y)' = fm4,
+ 'psi(F)gam(Y)eps(Y)p(YD2)' = fm5,
+ 'psi(F)gam(Y)eps(Y)p(YD2F)' = fm6)
+ms <- modSel(models)
+ms
+```
+
+One model has overwhelming support, so we can base inference on that
+one alone. Before doing so, we point out how to extract coefficients
+from a `fitList` object, and convert the results to a
+`data.frame`, which could be exported from R.
+
+```{r, eval=FALSE}
+coef(ms) # Estimates only
+SE(ms) # Standard errors only
+toExport <- as(ms, "data.frame") # Everything
+```
+
+## Manipulating results: Prediction and plotting
+
+Fitted models can be used to predict expected outcomes when given new
+data. For example, one could ask "how many crossbills would you
+expect to find in a quadrat with 50% forest cover?" Prediction also
+offers a way of
+presenting the results of an analysis. We illustrate by plotting the
+predictions of $\psi$ and $p$ over the range of covariate values studied.
+Note that because we standardized date, we need to transform it back
+to its original scale after obtaining predictions on the
+standardized scale.
+
+```{r colext-pred, fig.height=3, fig.width=6, fig.cap="Figure 4. Covariates"}
+op <- par(mfrow=c(1,2), mai=c(0.8,0.8,0.1,0.1))
+
+nd <- data.frame(forest=seq(0, 100, length=50))
+E.psi <- predict(fm6, type="psi", newdata=nd, appendData=TRUE)
+
+with(E.psi, {
+ plot(forest, Predicted, ylim=c(0,1), type="l",
+ xlab="Percent cover of forest",
+ ylab=expression(hat(psi)), cex.lab=0.8, cex.axis=0.8)
+ lines(forest, Predicted+1.96*SE, col=gray(0.7))
+ lines(forest, Predicted-1.96*SE, col=gray(0.7))
+ })
+
+nd <- data.frame(date=seq(-2, 2, length=50),
+ year=factor("2005", levels=c(unique(years))),
+ forest=50)
+E.p <- predict(fm6, type="det", newdata=nd, appendData=TRUE)
+E.p$dateOrig <- E.p$date*sd.DATE + mean.DATE
+
+with(E.p, {
+ plot(dateOrig, Predicted, ylim=c(0,1), type="l",
+ xlab="Julian date", ylab=expression( italic(p) ),
+ cex.lab=0.8, cex.axis=0.8)
+ lines(dateOrig, Predicted+1.96*SE, col=gray(0.7))
+ lines(dateOrig, Predicted-1.96*SE, col=gray(0.7))
+ })
+par(op)
+```
+
+**Acknowledgments**
+
+Special thanks goes to Ian Fiske, the author of `colext` and the
+original developer of `unmarked`. Andy Royle provided the
+initial funding and support for the package. The questions of many
+people on the users' list motivated the writing of this document.
+
+# References
diff --git a/vignettes/distsamp.Rnw b/vignettes/distsamp.Rmd
index 1e969c8..8fa04ec 100644
--- a/vignettes/distsamp.Rnw
+++ b/vignettes/distsamp.Rmd
@@ -1,58 +1,49 @@
-<<echo=false>>=
-options(width=70)
-options(continue=" ")
-@
-
-\documentclass[a4paper]{article}
-\usepackage[OT1]{fontenc}
-\usepackage{Sweave}
-\usepackage{natbib}
-%\usepackage{fullpage}
-\usepackage[vmargin=1in,hmargin=1in]{geometry}
-\bibliographystyle{ecology}
-
-\DefineVerbatimEnvironment{Sinput}{Verbatim} {xleftmargin=2em}
-\DefineVerbatimEnvironment{Soutput}{Verbatim}{xleftmargin=2em}
-\DefineVerbatimEnvironment{Scode}{Verbatim}{xleftmargin=2em}
-\fvset{listparameters={\setlength{\topsep}{0pt}}}
-\renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}}
-
-%%\VignetteIndexEntry{Distance sampling analysis}
-
-\title{Distance sampling analysis in unmarked}
-\author{Richard Chandler\\USGS Patuxent Wildlife Research Center}
-\date{March 4, 2020}
-
-
-\begin{document}
-
-\newcommand{\code}[1]{\texttt{\small{#1}}}
-\newcommand{\package}[1]{\textsf{\small{#1}}}
-
-\maketitle
-
-\abstract{Distance sampling is a wildlife sampling technique used to
- estimate population size or density. Describing how density varies
- spatially is often of equal interest; however, conventional methods
- of analysis do not allow for explicit modeling of both density and
- detection probability. The function \code{distsamp} implements the
- multinomial-Poisson mixture model of %Royle et. al
- \citet{royle_modeling_2004}, which was developed to overcome this
- limitation. This model requires that line- or point-transects are
- spatially replicated and that distance data are recorded in discrete
- intervals. The function \code{gdistsamp} extends this basic model,
- by introducing the parameter $\phi$, the probability of
- being available for detection \citep{chandlerEA_2011}. Furthermore,
- this function allows
- abundance to be modeled using the negative binomial distribution,
- which may be useful for dealing with over-dispersion. This document
- describes how to format data, fit models,
- and manipulate results in package \package{unmarked}. It does not
- cover the statistical theory and assumptions underlying distance
- sampling \citep{buckland_distsamp_2001}, which the user is expected
- to be familiar with. }
-
-\section{Introduction}
+---
+title: Distance sampling analysis in unmarked
+author: Richard Chandler, USGS Patuxent Wildlife Research Center
+date: March 4, 2020
+bibliography: unmarked.bib
+csl: ecology.csl
+output:
+ rmarkdown::html_vignette:
+ fig_width: 5
+ fig_height: 3.5
+ number_sections: true
+ toc: true
+vignette: >
+ %\VignetteIndexEntry{Distance sampling analysis}
+ %\VignetteEngine{knitr::rmarkdown}
+ \usepackage[utf8]{inputenc}
+
+---
+
+```{r,echo=FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+```
+
+# Abstract
+
+Distance sampling is a wildlife sampling technique used to
+estimate population size or density. Describing how density varies
+spatially is often of equal interest; however, conventional methods
+of analysis do not allow for explicit modeling of both density and
+detection probability. The function `distsamp` implements the
+multinomial-Poisson mixture model of @royle_modeling_2004, which was developed to overcome this
+limitation. This model requires that line- or point-transects are
+spatially replicated and that distance data are recorded in discrete
+intervals. The function \code{gdistsamp} extends this basic model,
+by introducing the parameter $\phi$, the probability of
+being available for detection [@chandlerEA_2011]. Furthermore,
+this function allows
+abundance to be modeled using the negative binomial distribution,
+which may be useful for dealing with over-dispersion. This document
+describes how to format data, fit models,
+and manipulate results in package \package{unmarked}. It does not
+cover the statistical theory and assumptions underlying distance
+sampling [@buckland_distsamp_2001], which the user is expected
+to be familiar with.
+
+# Introduction
Spatial variation in density is common to virtually all wildlife
populations, and describing this variation is a central objective of
@@ -61,28 +52,23 @@ necessary to account for individuals present but not
detected. Distance from observer is a ubiquitous source of variation
in detection probability, and thus distance sampling has become a
commonly used survey methodology. Numerous options exist for analyzing
-distance sampling data, but here the focus is on the model of Royle et
-al. \citep{royle_modeling_2004}, which assumes that multiple transects
+distance sampling data, but here the focus is on the model of @royle_modeling_2004, which assumes that multiple transects
have been surveyed and distance data are recorded in discrete
intervals. The details of the model formulation are as follows:
The latent transect-level abundance distribution is currently assumed
to be
-
-\begin{equation}
+$$
\label{eq:1}
N_{i} \sim \mathrm{Poisson}(\lambda)
\quad i=1,\dots,M\
-\end{equation}
-
+$$
The detection process is modeled as
-
-\begin{equation}
+$$
\label{eq:2}
y_{ij} \sim \mathrm{Multinomial}(N_{i}, \pi_{ij})
\quad i=1,\dots,M\;j=1,\dots,J
-\end{equation}
-
+$$
where $\pi_{ij}$ is the multinomial cell probability for transect $i$
in distance class $j$. These are computed by integrating a detection
function such as the half-normal (with scale parameter $\sigma$) over
@@ -91,20 +77,20 @@ each distance interval.
Parameters $\lambda$ and $\sigma$ can be vectors affected by
transect-specific covariates using the log link.
-\section{Importing, formatting, and summarizing data}
+# Importing, formatting, and summarizing data
The first step is to import the data into R. The simplest option is to
-use the \code{read.csv} function to import a .csv file that has been
+use the `read.csv` function to import a .csv file that has been
formatted so that each row represents a transect, and columns describe
either the number of individuals detected in each distance interval or
transect-specific covariates. Alternatively, if data were not recorded
in discrete distance intervals, a .csv file could be imported that
contains a row for each individual detected and columns for the
distances and transect names. This could then be converted to
-transect-level data using the function \code{formatDistData}. For
+transect-level data using the function `formatDistData`. For
example,
-<<>>=
+```{r}
library(unmarked)
dists <- read.csv(system.file("csv", "distdata.csv", package="unmarked"),
stringsAsFactors=TRUE)
@@ -114,86 +100,79 @@ levels(dists$transect)
yDat <- formatDistData(dists, distCol="distance",
transectNameCol="transect", dist.breaks=c(0, 5, 10, 15, 20))
yDat
-@
+```
-Here we have created an object called yDat that contains counts for
+Here we have created an object called `yDat` that contains counts for
each transect (row) in each distance interval (columns). Note the
-method used to include transect "g", which was surveyd but where no
-individuals were detected. It is important that all survyed transects
+method used to include transect `"g"`, which was surveyed but where no
+individuals were detected. It is important that all surveyed transects
are included in the analysis.
Suppose there also exists transect-specific covariate data.
-<<>>=
+```{r}
(covs <- data.frame(canopyHt = c(5, 8, 3, 2, 4, 7, 5),
habitat = c('A','A','A','A','B','B','B'), row.names=letters[1:7]))
-@
+```
-The function \code{unmarkedFrameDS} can now be used to organize these
+The function `unmarkedFrameDS` can now be used to organize these
data along with their metadata (study design (line- or
point-transect), distance class break points, transect lengths, and
-units of measurement) into an object to be used as the \code{data}
-argument in \code{distsamp}. By organizing the data this way, the user
+units of measurement) into an object to be used as the `data`
+argument in `distsamp`. By organizing the data this way, the user
does not need to repetitively specify these arguments during each call
-to \code{distsamp}, thereby reducing the potential for errors and
+to `distsamp`, thereby reducing the potential for errors and
facilitating data summary and manipulation.
-<<>>=
+```{r}
umf <- unmarkedFrameDS(y=as.matrix(yDat), siteCovs=covs, survey="line",
dist.breaks=c(0, 5, 10, 15, 20), tlength=rep(100, 7),
unitsIn="m")
-@
+```
-Note that there is no \code{obsCovs} argument, meaning that
+Note that there is no `obsCovs` argument, meaning that
distance-interval-level covariates cannot be included in the
-analysis. The call to \code{unmarkedFrameDS} indicates that the data
+analysis. The call to `unmarkedFrameDS` indicates that the data
were collected on seven line transects, each 100 meters long, and
detections were tabulated into distance intervals defined by the
-\code{dist.breaks} cutpoints. It is important that both transect
+`dist.breaks` cutpoints. It is important that both transect
lengths and distance break points are provided in the same units
-specified by \code{unitsIn}.
+specified by `unitsIn`.
We can look at these data using a variety of methods.
-<<umfhist,fig=TRUE,include=FALSE,width=4,height=4>>=
+```{r, fig.height=4, fig.width=4, fig.cap="Figure 1. Histogram of detection distances"}
summary(umf)
hist(umf, xlab="distance (m)", main="", cex.lab=0.8, cex.axis=0.8)
-@
-\begin{figure}[!ht]
- \centering
- \includegraphics[width=4in,height=4in]{distsamp-umfhist}
- \caption{Histogram of detection distances.}
- \label{fig:umfhist}
-\end{figure}
+```
-\newpage
-\section{Model fitting}
+# Model fitting
Now that we have put our data into an object of class
-\code{unmarkedFrameDS}, we are ready to fit some models with
-\code{distsamp}. The first argument is a \code{formula} which
+`unmarkedFrameDS`, we are ready to fit some models with
+`distsamp`. The first argument is a `formula` which
specifies the detection covariates followed by the density (or
abundance) covariates. The only other required argument is the
-\code{data}, but several other optional arguments exist. By default,
+`data`, but several other optional arguments exist. By default,
the half-normal detection function is used to model density in animals
-/ ha. The detection function can be selected using the \code{keyfun}
-argument. The response can be changed from ``density", to ``abund"
-with the \code{output} argument. When modeling density, the output
-units can be changed from ``ha" to ``kmsq" using the \code{unitsOut}
-argument. \code{distsamp} also includes the arguments \code{starts},
-\code{method}, and \code{control}, which are common to all unmarked
+/ ha. The detection function can be selected using the `keyfun`
+argument. The response can be changed from `"density"`, to `"abund"`
+with the `output` argument. When modeling density, the output
+units can be changed from `"ha"` to `"kmsq"` using the `unitsOut`
+argument. `distsamp` also includes the arguments `starts`,
+`method`, and `control`, which are common to all unmarked
fitting functions.
-Below is a series of models that demonstrates \code{distsamp}'s
+Below is a series of models that demonstrates `distsamp`'s
arguments and defaults.
-<<>>=
+```{r}
hn_Null <- distsamp(~1~1, umf)
hn_Null <- distsamp(~1~1, umf, keyfun="halfnorm", output="density",
unitsOut="ha")
haz_Null <- distsamp(~1~1, umf, keyfun="hazard")
hn_Hab.Ht <- distsamp(~canopyHt ~habitat, umf)
-@
+```
The first two models are the same, a null half-normal detection
function with density returned in animals / ha (on the log-scale). The
@@ -201,58 +180,57 @@ third model uses the hazard-rate detection function, and the fourth
model includes covariates affecting the Poisson mean ($\lambda$) and
the half-normal scale parameter ($\sigma$).
-
Once a model has been fit, typing its name will display parameter
estimate information and AIC. A summary method shows extra details
including the scale on which parameters were estimated and convergence
results.
-<<>>=
+```{r}
haz_Null
-@
+```
-\section{Manipulating results}
+# Manipulating results
Back-transforming estimates to the original scale and obtaining
standard errors via the delta method is easily accomplished:
-<<>>=
+```{r}
names(haz_Null)
backTransform(haz_Null, type="state")
backTransform(haz_Null, type="det")
backTransform(haz_Null, type="scale")
backTransform(linearComb(hn_Hab.Ht['det'], c(1, 5)))
-@
+```
The first back-transformation returns population density, since this
-is the default state parameter modeled when \code{distsamp}'s
-\code{output} argument is set to ``density". The second
+is the default state parameter modeled when `distsamp`'s
+`output` argument is set to `"density"`. The second
back-transformation returns the hazard-rate shape parameter, and third
is the hazard-rate scale parameter. When covariates are present,
-\code{backTransform} in conjunction with \code{linearComb} should be
+`backTransform` in conjunction with `linearComb` should be
used. Here, we requested the value of sigma when canopy height was 5
meters tall. Note that the intercept was included in the calculation
by setting the first value in the linear equation to 1.
Parameters that do not occur in the likelihood may also be of
-interest. For example, the number of individuals occuring in the
+interest. For example, the number of individuals occurring in the
sampled plots (local population size) is a fundamental parameter in
monitoring and conservation efforts. The following commands can be
used to derive this parameter from our model of density:
-<<>>=
+```{r}
site.level.density <- predict(hn_Hab.Ht, type="state")$Predicted
plotArea.inHectares <- 100 * 40 / 10000
site.level.abundance <- site.level.density * plotArea.inHectares
(N.hat <- sum(site.level.abundance))
-@
+```
-To describe the uncertainty of N.hat, or any other derived parameter,
+To describe the uncertainty of `N.hat`, or any other derived parameter,
we can use a parametric bootstrap approach. First we define a function
-to estimate N.hat, and then we apply this function to numerous models
+to estimate `N.hat`, and then we apply this function to numerous models
fit to data simulated from our original model.
-<<>>=
+```{r}
getN.hat <- function(fit) {
d <- predict(fit, type="state")$Predicted
a <- d * (100 * 40 / 10000)
@@ -261,51 +239,51 @@ getN.hat <- function(fit) {
}
pb <- parboot(hn_Hab.Ht, statistic=getN.hat, nsim=25)
pb
-@
+```
-Here, \code{t\_B} is an approximation of the sampling distribution for
-\code{N.hat}, conditioned on our fitted model. Confidence intervals
-can be calculated from the quantiles of \code{t\_B}. Note that in
-practice nsim should be set to a much larger value and a
+Here, `t_B` is an approximation of the sampling distribution for
+`N.hat`, conditioned on our fitted model. Confidence intervals
+can be calculated from the quantiles of `t_B`. Note that in
+practice `nsim` should be set to a much larger value and a
goodness-of-fit test should be performed before making inference from
-a fitted model. Parameteric bootstrapping can be used for the latter
-by supplying a fit statistic such as \code{SSE} instead of
-\code{getN.hat}. See ?parboot and vignette('unmarked') for examples.
+a fitted model. Parametric bootstrapping can be used for the latter
+by supplying a fit statistic such as `SSE` instead of
+`getN.hat`. See `?parboot` and `vignette('unmarked')` for examples.
-\section{Prediction and plotting}
+# Prediction and plotting
-A \code{predict} method exits for all \code{unmarkedFit} objects,
+A `predict` method exits for all `unmarkedFit` objects,
which is useful when multiple covariate combinations exist. This
method also facilitates plotting. Suppose we wanted model predictions
from the covariate model along the range of covariate values
-studied. First we need to make new \code{data.frame}s holding the
+studied. First we need to make new `data.frame`s holding the
desired covariate combinations. Note that column names must match
those of the original data, and factor variables must contain the same
levels.
-<<>>=
+```{r}
head(habConstant <- data.frame(canopyHt = seq(2, 8, length=20),
habitat=factor("A", levels=c("A", "B"))))
(htConstant <- data.frame(canopyHt = 5,
habitat=factor(c("A", "B"))))
-@
+```
-Now \code{predict} can be used to estimate density and $\sigma$ for
-each row of our new \code{data.frame}s.
+Now `predict` can be used to estimate density and $\sigma$ for
+each row of our new `data.frame`s.
-<<>>=
+```{r}
(Elambda <- predict(hn_Hab.Ht, type="state", newdata=htConstant,
appendData=TRUE))
head(Esigma <- predict(hn_Hab.Ht, type="det", newdata=habConstant,
appendData=TRUE))
-@
+```
Once predictions have been made, plotting is
-straight-forward. Figure~\ref{fig:predplot}a, shows density as a
-function of habitat type, and Figure~\ref{fig:predplot}b shows that
+straight-forward. Figure 2a shows density as a
+function of habitat type, and Figure 2b shows that
$\sigma$ is not related to canopy height.
-<<predplot,fig=TRUE,include=FALSE,width=6,height=3>>=
+```{r, fig.height=3, fig.width=6, fig.cap="Figure 2. Predicted covariate relationships"}
par(mfrow=c(1, 2))
with(Elambda, {
x <- barplot(Predicted, names=habitat, xlab="Habitat",
@@ -322,32 +300,24 @@ with(Esigma, {
lines(canopyHt, Predicted+SE, lty=2)
lines(canopyHt, Predicted-SE, lty=2)
})
-@
-\begin{figure}[!ht]
- \centering
- \includegraphics{distsamp-predplot}
- \caption{Predicted covariate relatonships.}
- \label{fig:predplot}
-\end{figure}
-
+```
Plots of the detection function parameters can be less informative
than plots of the detection functions themselves. To do the latter, we
can plug predicted values of $\sigma$ at given covariate values into
-the \code{gxhn} function. For instance, Figure~\ref{fig:detplot}a
+the `gxhn` function. For instance, Figure 3a
shows to the half-normal function at a canopy height of 2m. This was
plotted by setting $\sigma$ to 10.8, the predicted value shown
above. The available detection functions are described on the
-\code{detFuns} help page. Probability density functions such as
-\code{dxhn} can be plotted with the distance histogram using the
-\code{hist} method for \code{unmarkedFitDS} objects
-(Figure\ref{fig:detplot}b). This only works for models without
+`detFuns` help page. Probability density functions such as
+`dxhn` can be plotted with the distance histogram using the
+`hist` method for `unmarkedFitDS` objects
+(Figure 3b). This only works for models without
detection covariates; however, probability density functions at
specific covariate values can be added in a fashion similar to that
-above (Figure\ref{fig:detplot}b).
-
+above (Figure 3b).
-<<detplot,fig=TRUE,include=FALSE,width=6,height=3>>=
+```{r, fig.width=6, fig.height=3, fig.cap="Figure 3. Detection and probability density functions"}
par(mfrow=c(1, 2))
plot(function(x) gxhn(x, sigma=10.8), 0, 20, xlab="Distance (m)",
ylab="Detection prob. at 2m canopy ht.", cex.lab=0.7,
@@ -358,17 +328,9 @@ plot(function(x) dxhn(x, sigma=10.8), 0, 20, add=TRUE, col="blue")
plot(function(x) dxhn(x, sigma=9.9), 0, 20, add=TRUE, col="green")
legend('topright', c("Canopy ht. = 2m", "Null", "Canopy ht. = 8m"),
col=c("blue", "black", "green"), lty=1, cex=0.4)
-@
-\begin{figure}[!ht]
- \centering
- \includegraphics{distsamp-detplot}
- \caption{Detection and probability density functions.}
- \label{fig:detplot}
-\end{figure}
-
-\newpage
+```
-\section{Model extensions}
+# Model extensions
A common criticism of distance sampling is that all individuals must
be available for detection. Similarly, the probability of detecting an
@@ -376,29 +338,26 @@ individual a distance of 0 must be 1. These assumptions often cannot
be met. For instance, when counting cues such as bird songs or whale
blows, the probability that an individual will produce a cue (and thus
be available for detection) is rarely 1 during the sampling
-interval. Recently developed methods \citep{chandlerEA_2011}
+interval. Recently developed methods [@chandlerEA_2011]
allow for the estimation of the
probability of being available for detection $\phi$. To do so,
replicate distance sampling observations must be collected at each
transect. These replicates could be collected using repeated visits or
multiple observers working independently. Implementation of this model
-in \package{unmarked} is accomplished using the \code{gdistsamp}
+in `unmarked` is accomplished using the `gdistsamp`
function. The function also provides the option to model abundance
using the negative binomial distribution. Formatting data and
specifying models is similar to methods described above and is more
fully outlined in the help pages.
-\section{Conclusion}
+# Conclusion
This document has emphasized methods tailored to distance sampling
analysis; however, the more general methods available in package
-\package{unmarked} can also be applied to models fitted using
-\code{distsamp} and \code{gdistsamp}.
+`unmarked` can also be applied to models fitted using
+`distsamp` and `gdistsamp`.
For example, model-selection and model-averaging can be
-accomplished using the \code{fitList} function and the \code{modSel}
-and \code{predict} methods.
-
-
-\bibliography{unmarked}
+accomplished using the `fitList` function and the `modSel`
+and `predict` methods.
-\end{document}
+# References
diff --git a/vignettes/ecology.bst b/vignettes/ecology.bst
deleted file mode 100644
index 1896827..0000000
--- a/vignettes/ecology.bst
+++ /dev/null
@@ -1,1460 +0,0 @@
-%%
-%% This is file `ecology.bst',
-%% generated with the docstrip utility.
-%%
-%% The original source files were:
-%%
-%% merlin.mbs (with options: `head,exlang,ay,nat,nm-rev1,dt-beg,note-yr,vol-bf,vnum-x,jnm-x,btit-rm,english,nfss,{}')
-%% english.mbs (with options: `exlang,ay,nat,nm-rev1,dt-beg,note-yr,vol-bf,vnum-x,jnm-x,btit-rm,english,nfss,{}')
-%% merlin.mbs (with options: `tail,exlang,ay,nat,nm-rev1,dt-beg,note-yr,vol-bf,vnum-x,jnm-x,btit-rm,english,nfss,{}')
-%% ----------------------------------------
-%% *** Bibliographystyle for Ecology ***
-%%
- %-------------------------------------------------------------------
- % The original source file contains the following version information:
- % \ProvidesFile{merlin.mbs}[1996/02/05 3.7 (PWD)]
- %
- % NOTICE:
- % This file may be used for non-profit purposes.
- % It may not be distributed in exchange for money,
- % other than distribution costs.
- %
- % The author provides it `as is' and does not guarantee it in any way.
- %
- % Copyright (C) 1994, 1995, 1996 Patrick W. Daly
- %-------------------------------------------------------------------
- % For use with BibTeX version 0.99a or later
- %-------------------------------------------------------------------
- % This bibliography style file is intended for texts in
- % ENGLISH
- % This is an author-year citation style bibliography. As such, it is
- % non-standard LaTeX, and requires a special package file to function properly.
- % Such a package is natbib.sty by Patrick W. Daly
- % The form of the \bibitem entries is
- % \bibitem[Jones et al.(1990)]{key}...
- % \bibitem[Jones et al.(1990)Jones, Baker, and Smith]{key}...
- % The essential feature is that the label (the part in brackets) consists
- % of the author names, as they should appear in the citation, with the year
- % in parentheses following. There must be no space before the opening
- % parenthesis!
- % With natbib v5.3, a full list of authors may also follow the year.
- % In natbib.sty, it is possible to define the type of enclosures that is
- % really wanted (brackets or parentheses), but in either case, there must
- % be parentheses in the label.
- % The \cite command functions as follows:
- % \cite{key} ==>> Jones et al. (1990)
- % \cite[]{key} ==>> (Jones et al., 1990)
- % \cite[chap. 2]{key} ==>> (Jones et al., 1990, chap. 2)
- % \cite[e.g.][]{key} ==>> (e.g. Jones et al., 1990)
- % \cite[e.g.][p. 32]{key} ==>> (e.g. Jones et al., p. 32)
- % \citeauthor{key} Jones et al.
- % \citefullauthor{key} Jones, Baker, and Smith
- % \citeyear{key} 1990
- %---------------------------------------------------------------------
-
-ENTRY
- { address
- author
- booktitle
- chapter
- edition
- editor
- howpublished
- institution
- journal
- key
- month
- note
- number
- organization
- pages
- publisher
- school
- series
- title
- type
- volume
- year
- }
- {}
- { label extra.label sort.label short.list }
-
-INTEGERS { output.state before.all mid.sentence after.sentence after.block }
-
-FUNCTION {init.state.consts}
-{ #0 'before.all :=
- #1 'mid.sentence :=
- #2 'after.sentence :=
- #3 'after.block :=
-}
-
-STRINGS { s t }
-
-FUNCTION {output.nonnull}
-{ 's :=
- output.state mid.sentence =
- { ", " * write$ }
- { output.state after.block =
- { add.period$ write$
- newline$
- "\newblock " write$
- }
- { output.state before.all =
- 'write$
- { add.period$ " " * write$ }
- if$
- }
- if$
- mid.sentence 'output.state :=
- }
- if$
- s
-}
-
-FUNCTION {output}
-{ duplicate$ empty$
- 'pop$
- 'output.nonnull
- if$
-}
-
-FUNCTION {output.check}
-{ 't :=
- duplicate$ empty$
- { pop$ "empty " t * " in " * cite$ * warning$ }
- 'output.nonnull
- if$
-}
-
-FUNCTION {fin.entry}
-{ add.period$
- write$
- newline$
-}
-
-FUNCTION {new.block}
-{ output.state before.all =
- 'skip$
- { after.block 'output.state := }
- if$
-}
-
-FUNCTION {new.sentence}
-{ output.state after.block =
- 'skip$
- { output.state before.all =
- 'skip$
- { after.sentence 'output.state := }
- if$
- }
- if$
-}
-
-FUNCTION {add.blank}
-{ " " * before.all 'output.state :=
-}
-
-FUNCTION {date.block}
-{
- new.block
-}
-
-FUNCTION {not}
-{ { #0 }
- { #1 }
- if$
-}
-
-FUNCTION {and}
-{ 'skip$
- { pop$ #0 }
- if$
-}
-
-FUNCTION {or}
-{ { pop$ #1 }
- 'skip$
- if$
-}
-
-FUNCTION {non.stop}
-{ duplicate$
- "}" * add.period$
- #-1 #1 substring$ "." =
-}
-
-FUNCTION {new.block.checkb}
-{ empty$
- swap$ empty$
- and
- 'skip$
- 'new.block
- if$
-}
-
-FUNCTION {field.or.null}
-{ duplicate$ empty$
- { pop$ "" }
- 'skip$
- if$
-}
-
-FUNCTION {emphasize}
-{ duplicate$ empty$
- { pop$ "" }
- { "\emph{" swap$ * "}" * }
- if$
-}
-
-FUNCTION {bolden}
-{ duplicate$ empty$
- { pop$ "" }
- { "\textbf{" swap$ * "}" * }
- if$
-}
-
-FUNCTION {capitalize}
-{ "u" change.case$ "t" change.case$ }
-
-FUNCTION {space.word}
-{ " " swap$ * " " * }
-
- % Here are the language-specific definitions for explicit words.
- % Each function has a name bbl.xxx where xxx is the English word.
- %-------------------------------------------------------------------
- % The original source file contains the following version information:
- % \ProvidesFile{english.mbs}[1995/05/04 1.1 (PWD)]
- % Copyright (C) 1994, 1995 Patrick W. Daly
- %-------------------------------------------------------------------
-
- % The language selected here is ENGLISH
-FUNCTION {bbl.and}
-{ "and"}
-
-FUNCTION {bbl.editors}
-{ "editors" }
-
-FUNCTION {bbl.editor}
-{ "editor" }
-
-FUNCTION {bbl.edby}
-{ "edited by" }
-
-FUNCTION {bbl.edition}
-{ "edition" }
-
-FUNCTION {bbl.volume}
-{ "volume" }
-
-FUNCTION {bbl.of}
-{ "of" }
-
-FUNCTION {bbl.number}
-{ "number" }
-
-FUNCTION {bbl.nr}
-{ "no." }
-
-FUNCTION {bbl.in}
-{ "in" }
-
-FUNCTION {bbl.pages}
-{ "pages" }
-
-FUNCTION {bbl.page}
-{ "page" }
-
-FUNCTION {bbl.chapter}
-{ "chapter" }
-
-FUNCTION {bbl.techrep}
-{ "Technical Report" }
-
-FUNCTION {bbl.mthesis}
-{ "Master's thesis" }
-
-FUNCTION {bbl.phdthesis}
-{ "Ph.D. thesis" }
-
-FUNCTION {bbl.first}
-{ "First" }
-
-FUNCTION {bbl.second}
-{ "Second" }
-
-FUNCTION {bbl.third}
-{ "Third" }
-
-FUNCTION {bbl.fourth}
-{ "Fourth" }
-
-FUNCTION {bbl.fifth}
-{ "Fifth" }
-
-FUNCTION {bbl.st}
-{ "st" }
-
-FUNCTION {bbl.nd}
-{ "nd" }
-
-FUNCTION {bbl.rd}
-{ "rd" }
-
-FUNCTION {bbl.th}
-{ "th" }
-
-FUNCTION {eng.ord}
-{ duplicate$ "1" swap$ *
- #-2 #1 substring$ "1" =
- { bbl.th * }
- { duplicate$ #-1 #1 substring$
- duplicate$ "1" =
- { pop$ bbl.st * }
- { duplicate$ "2" =
- { pop$ bbl.nd * }
- { "3" =
- { bbl.rd * }
- { bbl.th * }
- if$
- }
- if$
- }
- if$
- }
- if$
-}
-
-MACRO {jan} {"January"}
-
-MACRO {feb} {"February"}
-
-MACRO {mar} {"March"}
-
-MACRO {apr} {"April"}
-
-MACRO {may} {"May"}
-
-MACRO {jun} {"June"}
-
-MACRO {jul} {"July"}
-
-MACRO {aug} {"August"}
-
-MACRO {sep} {"September"}
-
-MACRO {oct} {"October"}
-
-MACRO {nov} {"November"}
-
-MACRO {dec} {"December"}
-
- % End of language definition file
-
-MACRO {acmcs} {"ACM Computing Surveys"}
-
-MACRO {acta} {"Acta Informatica"}
-
-MACRO {cacm} {"Communications of the ACM"}
-
-MACRO {ibmjrd} {"IBM Journal of Research and Development"}
-
-MACRO {ibmsj} {"IBM Systems Journal"}
-
-MACRO {ieeese} {"IEEE Transactions on Software Engineering"}
-
-MACRO {ieeetc} {"IEEE Transactions on Computers"}
-
-MACRO {ieeetcad}
- {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"}
-
-MACRO {ipl} {"Information Processing Letters"}
-
-MACRO {jacm} {"Journal of the ACM"}
-
-MACRO {jcss} {"Journal of Computer and System Sciences"}
-
-MACRO {scp} {"Science of Computer Programming"}
-
-MACRO {sicomp} {"SIAM Journal on Computing"}
-
-MACRO {tocs} {"ACM Transactions on Computer Systems"}
-
-MACRO {tods} {"ACM Transactions on Database Systems"}
-
-MACRO {tog} {"ACM Transactions on Graphics"}
-
-MACRO {toms} {"ACM Transactions on Mathematical Software"}
-
-MACRO {toois} {"ACM Transactions on Office Information Systems"}
-
-MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"}
-
-MACRO {tcs} {"Theoretical Computer Science"}
-
-INTEGERS { nameptr namesleft numnames }
-
-FUNCTION {format.names}
-{ 's :=
- #1 'nameptr :=
- s num.names$ 'numnames :=
- numnames 'namesleft :=
- { namesleft #0 > }
- { nameptr #1 >
- { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't := }
- { s nameptr "{vv~}{ll}{, f.}{, jj}" format.name$ 't := }
- if$
- nameptr #1 >
- {
- namesleft #1 >
- { ", " * t * }
- {
- numnames #2 >
- { "," * }
- 'skip$
- if$
- t "others" =
- { " et~al." * }
- { bbl.and space.word * t * }
- if$
- }
- if$
- }
- 't
- if$
- nameptr #1 + 'nameptr :=
- namesleft #1 - 'namesleft :=
- }
- while$
-}
-
-FUNCTION {format.names.ed}
-{ 's :=
- #1 'nameptr :=
- s num.names$ 'numnames :=
- numnames 'namesleft :=
- { namesleft #0 > }
- { s nameptr
- "{f.~}{vv~}{ll}{, jj}"
- format.name$ 't :=
- nameptr #1 >
- {
- namesleft #1 >
- { ", " * t * }
- {
- numnames #2 >
- { "," * }
- 'skip$
- if$
- t "others" =
- { " et~al." * }
- { bbl.and space.word * t * }
- if$
- }
- if$
- }
- 't
- if$
- nameptr #1 + 'nameptr :=
- namesleft #1 - 'namesleft :=
- }
- while$
-}
-
-FUNCTION {format.key}
-{ empty$
- { key field.or.null }
- { "" }
- if$
-}
-
-FUNCTION {format.authors}
-{ author empty$
- { "" }
- {
- author format.names
- }
- if$
-}
-
-FUNCTION {format.editors}
-{ editor empty$
- { "" }
- {
- editor format.names
- editor num.names$ #1 >
- { ", " * bbl.editors * }
- { ", " * bbl.editor * }
- if$
- }
- if$
-}
-
-FUNCTION {format.in.editors}
-{ editor empty$
- { "" }
- { editor format.names.ed
- editor num.names$ #1 >
- { ", " * bbl.editors * }
- { ", " * bbl.editor * }
- if$
- }
- if$
-}
-
-FUNCTION {format.title}
-{ title empty$
- { "" }
- { title "t" change.case$
- }
- if$
-}
-
-FUNCTION {format.full.names}
-{'s :=
- #1 'nameptr :=
- s num.names$ 'numnames :=
- numnames 'namesleft :=
- { namesleft #0 > }
- { s nameptr
- "{vv~}{ll}" format.name$ 't :=
- nameptr #1 >
- {
- namesleft #1 >
- { ", " * t * }
- {
- numnames #2 >
- { "," * }
- 'skip$
- if$
- t "others" =
- { " et~al." * }
- { bbl.and space.word * t * }
- if$
- }
- if$
- }
- 't
- if$
- nameptr #1 + 'nameptr :=
- namesleft #1 - 'namesleft :=
- }
- while$
-}
-
-FUNCTION {author.editor.key.full}
-{ author empty$
- { editor empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { editor format.full.names }
- if$
- }
- { author format.full.names }
- if$
-}
-
-FUNCTION {author.key.full}
-{ author empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { author format.full.names }
- if$
-}
-
-FUNCTION {editor.key.full}
-{ editor empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { editor format.full.names }
- if$
-}
-
-FUNCTION {make.full.names}
-{ type$ "book" =
- type$ "inbook" =
- or
- 'author.editor.key.full
- { type$ "proceedings" =
- 'editor.key.full
- 'author.key.full
- if$
- }
- if$
-}
-
-FUNCTION {output.bibitem}
-{ newline$
- "\bibitem[" write$
- label write$
- ")" make.full.names duplicate$ short.list =
- { pop$ }
- { * }
- if$
- "]{" * write$
- cite$ write$
- "}" write$
- newline$
- ""
- before.all 'output.state :=
-}
-
-FUNCTION {n.dashify}
-{ 't :=
- ""
- { t empty$ not }
- { t #1 #1 substring$ "-" =
- { t #1 #2 substring$ "--" = not
- { "--" *
- t #2 global.max$ substring$ 't :=
- }
- { { t #1 #1 substring$ "-" = }
- { "-" *
- t #2 global.max$ substring$ 't :=
- }
- while$
- }
- if$
- }
- { t #1 #1 substring$ *
- t #2 global.max$ substring$ 't :=
- }
- if$
- }
- while$
-}
-
-FUNCTION {word.in}
-{ bbl.in capitalize
- " " * }
-
-FUNCTION {format.date}
-{ year duplicate$ empty$
- { "empty year in " cite$ * "; set to ????" * warning$
- pop$ "????" }
- 'skip$
- if$
- extra.label *
-}
-
-FUNCTION {format.btitle}
-{ title
-}
-
-FUNCTION {tie.or.space.connect}
-{ duplicate$ text.length$ #3 <
- { "~" }
- { " " }
- if$
- swap$ * *
-}
-
-FUNCTION {either.or.check}
-{ empty$
- 'pop$
- { "can't use both " swap$ * " fields in " * cite$ * warning$ }
- if$
-}
-
-FUNCTION {format.bvolume}
-{ volume empty$
- { "" }
- { bbl.volume volume tie.or.space.connect
- series empty$
- 'skip$
- { bbl.of space.word * series emphasize * }
- if$
- "volume and number" number either.or.check
- }
- if$
-}
-
-FUNCTION {format.number.series}
-{ volume empty$
- { number empty$
- { series field.or.null }
- { output.state mid.sentence =
- { bbl.number }
- { bbl.number capitalize }
- if$
- number tie.or.space.connect
- series empty$
- { "there's a number but no series in " cite$ * warning$ }
- { bbl.in space.word * series * }
- if$
- }
- if$
- }
- { "" }
- if$
-}
-
-FUNCTION {is.num}
-{ chr.to.int$
- duplicate$ "0" chr.to.int$ < not
- swap$ "9" chr.to.int$ > not and
-}
-
-FUNCTION {extract.num}
-{ duplicate$ 't :=
- "" 's :=
- { t empty$ not }
- { t #1 #1 substring$
- t #2 global.max$ substring$ 't :=
- duplicate$ is.num
- { s swap$ * 's := }
- { pop$ "" 't := }
- if$
- }
- while$
- s empty$
- 'skip$
- { pop$ s }
- if$
-}
-
-FUNCTION {convert.edition}
-{ edition extract.num "l" change.case$ 's :=
- s "first" = s "1" = or
- { bbl.first 't := }
- { s "second" = s "2" = or
- { bbl.second 't := }
- { s "third" = s "3" = or
- { bbl.third 't := }
- { s "fourth" = s "4" = or
- { bbl.fourth 't := }
- { s "fifth" = s "5" = or
- { bbl.fifth 't := }
- { s #1 #1 substring$ is.num
- { s eng.ord 't := }
- { edition 't := }
- if$
- }
- if$
- }
- if$
- }
- if$
- }
- if$
- }
- if$
- t
-}
-
-FUNCTION {format.edition}
-{ edition empty$
- { "" }
- { output.state mid.sentence =
- { convert.edition "l" change.case$ " " * bbl.edition * }
- { convert.edition "t" change.case$ " " * bbl.edition * }
- if$
- }
- if$
-}
-
-INTEGERS { multiresult }
-
-FUNCTION {multi.page.check}
-{ 't :=
- #0 'multiresult :=
- { multiresult not
- t empty$ not
- and
- }
- { t #1 #1 substring$
- duplicate$ "-" =
- swap$ duplicate$ "," =
- swap$ "+" =
- or or
- { #1 'multiresult := }
- { t #2 global.max$ substring$ 't := }
- if$
- }
- while$
- multiresult
-}
-
-FUNCTION {format.pages}
-{ pages empty$
- { "" }
- { pages multi.page.check
- { bbl.pages pages n.dashify tie.or.space.connect }
- { bbl.page pages tie.or.space.connect }
- if$
- }
- if$
-}
-
-FUNCTION {format.vol.num.pages}
-{ volume field.or.null
- bolden
- pages empty$
- 'skip$
- { duplicate$ empty$
- { pop$ format.pages }
- { ":" * pages n.dashify * }
- if$
- }
- if$
-}
-
-FUNCTION {format.chapter.pages}
-{ chapter empty$
- 'format.pages
- { type empty$
- { bbl.chapter }
- { type "l" change.case$ }
- if$
- chapter tie.or.space.connect
- pages empty$
- 'skip$
- { ", " * format.pages * }
- if$
- }
- if$
-}
-
-FUNCTION {format.in.ed.booktitle}
-{ booktitle empty$
- { "" }
- { editor empty$
- { word.in booktitle emphasize * }
- { word.in format.in.editors * ", " *
- booktitle emphasize * }
- if$
- }
- if$
-}
-
-FUNCTION {format.thesis.type}
-{ type empty$
- 'skip$
- { pop$
- type "t" change.case$
- }
- if$
-}
-
-FUNCTION {format.tr.number}
-{ type empty$
- { bbl.techrep }
- 'type
- if$
- number empty$
- { "t" change.case$ }
- { number tie.or.space.connect }
- if$
-}
-
-FUNCTION {format.article.crossref}
-{
- word.in
- " \cite{" * crossref * "}" *
-}
-
-FUNCTION {format.book.crossref}
-{ volume empty$
- { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
- word.in
- }
- { bbl.volume capitalize
- volume tie.or.space.connect
- bbl.of space.word *
- }
- if$
- " \cite{" * crossref * "}" *
-}
-
-FUNCTION {format.incoll.inproc.crossref}
-{
- word.in
- " \cite{" * crossref * "}" *
-}
-
-FUNCTION {article}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.title "title" output.check
- new.block
- crossref missing$
- { journal
- emphasize
- "journal" output.check
- add.blank
- format.vol.num.pages output
- }
- { format.article.crossref output.nonnull
- format.pages output
- }
- if$
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {book}
-{ output.bibitem
- author empty$
- { format.editors "author and editor" output.check
- editor format.key output
- }
- { format.authors output.nonnull
- crossref missing$
- { "author and editor" editor either.or.check }
- 'skip$
- if$
- }
- if$
- format.date "year" output.check
- date.block
- format.btitle "title" output.check
- crossref missing$
- { format.bvolume output
- new.block
- format.number.series output
- new.sentence
- publisher "publisher" output.check
- address output
- }
- {
- new.block
- format.book.crossref output.nonnull
- }
- if$
- format.edition output
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {booklet}
-{ output.bibitem
- format.authors output
- author format.key output
- format.date "year" output.check
- date.block
- format.title "title" output.check
- new.block
- howpublished output
- address output
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {inbook}
-{ output.bibitem
- author empty$
- { format.editors "author and editor" output.check
- editor format.key output
- }
- { format.authors output.nonnull
- crossref missing$
- { "author and editor" editor either.or.check }
- 'skip$
- if$
- }
- if$
- format.date "year" output.check
- date.block
- format.btitle "title" output.check
- crossref missing$
- { format.bvolume output
- format.chapter.pages "chapter and pages" output.check
- new.block
- format.number.series output
- new.sentence
- publisher "publisher" output.check
- address output
- }
- {
- format.chapter.pages "chapter and pages" output.check
- new.block
- format.book.crossref output.nonnull
- }
- if$
- format.edition output
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {incollection}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.title "title" output.check
- new.block
- crossref missing$
- { format.in.ed.booktitle "booktitle" output.check
- format.bvolume output
- format.number.series output
- format.chapter.pages output
- new.sentence
- publisher "publisher" output.check
- address output
- format.edition output
- }
- { format.incoll.inproc.crossref output.nonnull
- format.chapter.pages output
- }
- if$
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {inproceedings}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.title "title" output.check
- new.block
- crossref missing$
- { format.in.ed.booktitle "booktitle" output.check
- format.bvolume output
- format.number.series output
- format.pages output
- address output
- new.sentence
- organization output
- publisher output
- }
- { format.incoll.inproc.crossref output.nonnull
- format.pages output
- }
- if$
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {conference} { inproceedings }
-
-FUNCTION {manual}
-{ output.bibitem
- format.authors output
- author format.key output
- format.date "year" output.check
- date.block
- format.btitle "title" output.check
- organization address new.block.checkb
- organization output
- address output
- format.edition output
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {mastersthesis}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.btitle "title" output.check
- new.block
- bbl.mthesis format.thesis.type output.nonnull
- school "school" output.check
- address output
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {misc}
-{ output.bibitem
- format.authors output
- author format.key output
- format.date "year" output.check
- date.block
- format.title output
- new.block
- howpublished output
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {phdthesis}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.btitle "title" output.check
- new.block
- bbl.phdthesis format.thesis.type output.nonnull
- school "school" output.check
- address output
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {proceedings}
-{ output.bibitem
- format.editors output
- editor format.key output
- format.date "year" output.check
- date.block
- format.btitle "title" output.check
- format.bvolume output
- format.number.series output
- address output
- new.sentence
- organization output
- publisher output
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {techreport}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.title "title" output.check
- new.block
- format.tr.number output.nonnull
- institution "institution" output.check
- address output
- new.block
- note output
- fin.entry
-}
-
-FUNCTION {unpublished}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.title "title" output.check
- new.block
- note "note" output.check
- fin.entry
-}
-
-FUNCTION {default.type} { misc }
-
-READ
-
-FUNCTION {sortify}
-{ purify$
- "l" change.case$
-}
-
-INTEGERS { len }
-
-FUNCTION {chop.word}
-{ 's :=
- 'len :=
- s #1 len substring$ =
- { s len #1 + global.max$ substring$ }
- 's
- if$
-}
-
-FUNCTION {format.lab.names}
-{ 's :=
- s #1 "{vv~}{ll}" format.name$
- s num.names$ duplicate$
- #2 >
- { pop$ " et~al." * }
- { #2 <
- 'skip$
- { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
- { " et~al." * }
- { bbl.and
- space.word * s #2 "{vv~}{ll}" format.name$ * }
- if$
- }
- if$
- }
- if$
-}
-
-FUNCTION {author.key.label}
-{ author empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { author format.lab.names }
- if$
-}
-
-FUNCTION {author.editor.key.label}
-{ author empty$
- { editor empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { editor format.lab.names }
- if$
- }
- { author format.lab.names }
- if$
-}
-
-FUNCTION {editor.key.label}
-{ editor empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { editor format.lab.names }
- if$
-}
-
-FUNCTION {calc.short.authors}
-{ type$ "book" =
- type$ "inbook" =
- or
- 'author.editor.key.label
- { type$ "proceedings" =
- 'editor.key.label
- 'author.key.label
- if$
- }
- if$
- 'short.list :=
-}
-
-FUNCTION {calc.label}
-{ calc.short.authors
- short.list
- "("
- *
- year duplicate$ empty$
- { pop$ "????" }
- 'skip$
- if$
- *
- 'label :=
-}
-
-FUNCTION {sort.format.names}
-{ 's :=
- #1 'nameptr :=
- ""
- s num.names$ 'numnames :=
- numnames 'namesleft :=
- { namesleft #0 > }
- { s nameptr
- "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}"
- format.name$ 't :=
- nameptr #1 >
- {
- " " *
- namesleft #1 = t "others" = and
- { "aaaaa" * }
- { t sortify * }
- if$
- }
- { t sortify * }
- if$
- nameptr #1 + 'nameptr :=
- namesleft #1 - 'namesleft :=
- }
- while$
-}
-
-FUNCTION {sort.format.title}
-{ 't :=
- "A " #2
- "An " #3
- "The " #4 t chop.word
- chop.word
- chop.word
- sortify
- #1 global.max$ substring$
-}
-
-FUNCTION {author.sort}
-{ author empty$
- { key empty$
- { "to sort, need author or key in " cite$ * warning$
- ""
- }
- { key sortify }
- if$
- }
- { author sort.format.names }
- if$
-}
-
-FUNCTION {author.editor.sort}
-{ author empty$
- { editor empty$
- { key empty$
- { "to sort, need author, editor, or key in " cite$ * warning$
- ""
- }
- { key sortify }
- if$
- }
- { editor sort.format.names }
- if$
- }
- { author sort.format.names }
- if$
-}
-
-FUNCTION {editor.sort}
-{ editor empty$
- { key empty$
- { "to sort, need editor or key in " cite$ * warning$
- ""
- }
- { key sortify }
- if$
- }
- { editor sort.format.names }
- if$
-}
-
-FUNCTION {presort}
-{ calc.label
- label sortify
- " "
- *
- type$ "book" =
- type$ "inbook" =
- or
- 'author.editor.sort
- { type$ "proceedings" =
- 'editor.sort
- 'author.sort
- if$
- }
- if$
- #1 entry.max$ substring$
- 'sort.label :=
- sort.label
- *
- " "
- *
- title field.or.null
- sort.format.title
- *
- #1 entry.max$ substring$
- 'sort.key$ :=
-}
-
-ITERATE {presort}
-
-SORT
-
-STRINGS { last.label next.extra }
-
-INTEGERS { last.extra.num number.label }
-
-FUNCTION {initialize.extra.label.stuff}
-{ #0 int.to.chr$ 'last.label :=
- "" 'next.extra :=
- #0 'last.extra.num :=
- #0 'number.label :=
-}
-
-FUNCTION {forward.pass}
-{ last.label label =
- { last.extra.num #1 + 'last.extra.num :=
- last.extra.num int.to.chr$ 'extra.label :=
- }
- { "a" chr.to.int$ 'last.extra.num :=
- "" 'extra.label :=
- label 'last.label :=
- }
- if$
- number.label #1 + 'number.label :=
-}
-
-FUNCTION {reverse.pass}
-{ next.extra "b" =
- { "a" 'extra.label := }
- 'skip$
- if$
- extra.label 'next.extra :=
- extra.label
- duplicate$ empty$
- 'skip$
- { "{" swap$ * "}" * }
- if$
- 'extra.label :=
- label extra.label * 'label :=
-}
-
-EXECUTE {initialize.extra.label.stuff}
-
-ITERATE {forward.pass}
-
-REVERSE {reverse.pass}
-
-FUNCTION {bib.sort.order}
-{ sort.label
- " "
- *
- year field.or.null sortify
- *
- " "
- *
- title field.or.null
- sort.format.title
- *
- #1 entry.max$ substring$
- 'sort.key$ :=
-}
-
-ITERATE {bib.sort.order}
-
-SORT
-
-FUNCTION {begin.bib}
-{ preamble$ empty$
- 'skip$
- { preamble$ write$ newline$ }
- if$
- "\begin{thebibliography}{" number.label int.to.str$ * "}" *
- write$ newline$
-}
-
-EXECUTE {begin.bib}
-
-EXECUTE {init.state.consts}
-
-ITERATE {call.type$}
-
-FUNCTION {end.bib}
-{ newline$
- "\end{thebibliography}" write$ newline$
-}
-
-EXECUTE {end.bib}
-%% End of customized bst file
-%%
-%% End of file `ecology.bst'. \ No newline at end of file
diff --git a/vignettes/ecology.csl b/vignettes/ecology.csl
new file mode 100644
index 0000000..1d3c0c7
--- /dev/null
+++ b/vignettes/ecology.csl
@@ -0,0 +1,188 @@
+<?xml version="1.0" encoding="utf-8"?>
+<style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" default-locale="en-US" version="1.0" demote-non-dropping-particle="sort-only">
+ <info>
+ <title>Ecology</title>
+ <id>http://www.zotero.org/styles/ecology</id>
+ <link href="http://www.zotero.org/styles/ecology" rel="self"/>
+ <link href="http://esapubs.org/esapubs/AuthorInstructions.htm" rel="documentation"/>
+ <author>
+ <name>Rintze Zelle</name>
+ <uri>http://twitter.com/rintzezelle</uri>
+ </author>
+ <category citation-format="author-date"/>
+ <category field="biology"/>
+ <issn>0012-9658</issn>
+ <updated>2012-09-27T22:06:38+00:00</updated>
+ <rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
+ </info>
+ <macro name="container-contributors">
+ <choose>
+ <if type="chapter paper-conference" match="any">
+ <text term="in" suffix=" " font-style="italic"/>
+ <names variable="editor translator" delimiter=", ">
+ <name and="text" initialize-with=". " delimiter=", "/>
+ <label form="long" prefix=", " suffix="."/>
+ </names>
+ </if>
+ </choose>
+ </macro>
+ <macro name="secondary-contributors">
+ <choose>
+ <if type="chapter paper-conference" match="none">
+ <names variable="editor translator" delimiter=", " prefix=" (" suffix=")">
+ <name and="text" initialize-with=". " delimiter=", "/>
+ <label form="short" prefix=", " text-case="capitalize-first"/>
+ </names>
+ </if>
+ </choose>
+ </macro>
+ <macro name="author">
+ <names variable="author">
+ <name name-as-sort-order="first" and="text" sort-separator=", " initialize-with=". " delimiter=", " delimiter-precedes-last="always"/>
+ <label prefix=", "/>
+ <substitute>
+ <names variable="editor"/>
+ <names variable="translator"/>
+ <text variable="title"/>
+ </substitute>
+ </names>
+ </macro>
+ <macro name="author-short">
+ <names variable="author">
+ <name form="short" and="text" delimiter=", " initialize-with=". "/>
+ <substitute>
+ <names variable="editor"/>
+ <names variable="translator"/>
+ <choose>
+ <if type="bill book graphic legal_case legislation motion_picture report song" match="any">
+ <text variable="title" form="short" font-style="italic"/>
+ </if>
+ <else>
+ <text variable="title" form="short" quotes="true"/>
+ </else>
+ </choose>
+ </substitute>
+ </names>
+ </macro>
+ <macro name="access">
+ <choose>
+ <if type="webpage">
+ <text variable="URL"/>
+ </if>
+ </choose>
+ </macro>
+ <macro name="publisher">
+ <group delimiter=", ">
+ <choose>
+ <if type="article-journal article-magazine" match="none">
+ <text variable="genre"/>
+ <text variable="publisher"/>
+ <text variable="publisher-place"/>
+ </if>
+ </choose>
+ </group>
+ </macro>
+ <macro name="issued">
+ <choose>
+ <if variable="issued">
+ <group prefix=" " suffix=".">
+ <date variable="issued">
+ <date-part name="year"/>
+ </date>
+ <choose>
+ <if type="article-journal bill book chapter graphic legal_case legislation motion_picture paper-conference report song" match="none">
+ <date variable="issued">
+ <date-part prefix=", " name="month"/>
+ <date-part prefix=" " name="day"/>
+ </date>
+ </if>
+ </choose>
+ </group>
+ </if>
+ <else>
+ <text prefix=" (" term="no date" suffix=")." form="short"/>
+ </else>
+ </choose>
+ </macro>
+ <macro name="issued-year">
+ <choose>
+ <if variable="issued">
+ <date variable="issued">
+ <date-part name="year"/>
+ </date>
+ </if>
+ <else>
+ <text term="no date" form="short"/>
+ </else>
+ </choose>
+ </macro>
+ <macro name="edition">
+ <choose>
+ <if type="bill book chapter graphic legal_case legislation motion_picture paper-conference report song" match="any">
+ <choose>
+ <if is-numeric="edition">
+ <number variable="edition" form="long-ordinal" text-case="capitalize-first"/>
+ <text term="edition" form="long" prefix=" " suffix="."/>
+ </if>
+ <else>
+ <text variable="edition" suffix="."/>
+ </else>
+ </choose>
+ </if>
+ </choose>
+ </macro>
+ <macro name="locators">
+ <choose>
+ <if type="article-journal article-magazine article-newspaper" match="any">
+ <text variable="container-title" prefix=". "/>
+ <text variable="volume" prefix=" "/>
+ <text variable="page" prefix=":"/>
+ </if>
+ <else-if type="bill book chapter graphic legal_case legislation motion_picture paper-conference report song" match="any">
+ <group prefix=". " delimiter=" ">
+ <label variable="page" form="long" text-case="capitalize-first"/>
+ <text variable="page"/>
+ <text macro="container-contributors"/>
+ <text macro="secondary-contributors"/>
+ <text variable="container-title"/>
+ </group>
+ </else-if>
+ </choose>
+ </macro>
+ <macro name="citation-locator">
+ <group>
+ <label variable="locator" form="short"/>
+ <text variable="locator" prefix=" "/>
+ </group>
+ </macro>
+ <citation et-al-min="3" et-al-use-first="1" disambiguate-add-year-suffix="true" collapse="year">
+ <sort>
+ <key variable="issued"/>
+ <key macro="author"/>
+ </sort>
+ <layout prefix="(" suffix=")" delimiter=", ">
+ <group delimiter=" ">
+ <text macro="author-short"/>
+ <text macro="issued-year"/>
+ <text macro="citation-locator"/>
+ </group>
+ </layout>
+ </citation>
+ <bibliography hanging-indent="true" entry-spacing="0" line-spacing="2">
+ <sort>
+ <key macro="author"/>
+ <key variable="issued"/>
+ </sort>
+ <layout suffix=".">
+ <text macro="author" suffix="."/>
+ <text macro="issued" suffix=" "/>
+ <text variable="title"/>
+ <text macro="locators"/>
+ <group delimiter=". " prefix=". ">
+ <text macro="edition"/>
+ <text macro="publisher"/>
+ <text macro="access"/>
+ </group>
+ </layout>
+ </bibliography>
+</style>
diff --git a/vignettes/occuMulti.Rnw b/vignettes/occuMulti.Rmd
index 21afa3b..25eae85 100644
--- a/vignettes/occuMulti.Rnw
+++ b/vignettes/occuMulti.Rmd
@@ -1,243 +1,209 @@
-<<echo=false>>=
-options(width=70)
-options(continue=" ")
-@
-
-\documentclass[a4paper]{article}
-\usepackage[OT1]{fontenc}
-\usepackage{Sweave}
-\usepackage{natbib}
-\usepackage{amsmath}
-%\usepackage{fullpage}
-\usepackage[vmargin=1in,hmargin=1in]{geometry}
-\bibliographystyle{ecology}
-
-\usepackage{hyperref}
-\hypersetup{
- colorlinks=true,
- linkcolor=blue,
- urlcolor=cyan,
- citecolor=black
-}
-
-\DefineVerbatimEnvironment{Sinput}{Verbatim} {xleftmargin=2em}
-\DefineVerbatimEnvironment{Soutput}{Verbatim}{xleftmargin=2em}
-\DefineVerbatimEnvironment{Scode}{Verbatim}{xleftmargin=2em}
-\fvset{listparameters={\setlength{\topsep}{0pt}}}
-\renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}}
-
-%%\VignetteIndexEntry{Multispecies occupancy models with occuMulti}
-
-\title{Multispecies occupancy models with occuMulti}
-\author{Ken Kellner}
-\date{November 15, 2021}
-
-
-\begin{document}
-
-\newcommand{\code}[1]{\texttt{\small{#1}}}
-\newcommand{\package}[1]{\textsf{\small{#1}}}
-
-\maketitle
-
-\section*{Outline}
-
-\begin{enumerate}
- \item Introduction
- \item Simple multispecies analysis
- \item Analysis with covariates
- \item Model selection
- \item Model fitting challenges
- \item Penalized likelihood
-\end{enumerate}
-
-
-\section{Introduction}
-
-The Rota et al. (\citeyear{Rota2016}) occupancy model is designed for presence/absence datasets with two or more (potentially) interacting species.
+---
+title: Multispecies occupancy models with occuMulti
+author: Ken Kellner
+date: November 15, 2021
+bibliography: unmarked.bib
+csl: ecology.csl
+output:
+ rmarkdown::html_vignette:
+ fig_width: 5
+ fig_height: 3.5
+ number_sections: true
+ toc: true
+vignette: >
+ %\VignetteIndexEntry{Multispecies occupancy models with occuMulti}
+ %\VignetteEngine{knitr::rmarkdown}
+ \usepackage[utf8]{inputenc}
+
+---
+
+```{r,echo=FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+```
+
+# Introduction
+
+The @Rota2016 occupancy model is designed for presence/absence datasets with two or more (potentially) interacting species.
The model allows for estimation of occupancy probabilities and the strength of interactions between species, as well as covariate effects on these parameters.
-The model generalizes the standard single-species occupancy model from MacKenzie et al. (\citeyear{mackenzie_estimating_2002}).
+The model generalizes the standard single-species occupancy model from @mackenzie_estimating_2002.
The model assumes the latent occupancy state at site $i$ for a set of $s$ potentially interacting species is a vector $Z_i$ of length $s$ containing a sequence of the values 0 or 1.
For example, when $s = 2$, the possible states are [11], [10], [01], or [00], corresponding to both species present, only species 1 or species 2 present, or both species absent, respectively.
The latent state modeled as a multivariate Bernoulli random variable:
-\begin{equation}
+$$
Z_i \sim \mathrm{MVB}(\psi_i)
-\end{equation}
+$$
where $\psi_i$ is a vector of length $2^s$ containing the probability of each possible combination of 0s and 1s, such that $\sum \psi_i = 1$.
For $s = 2$, the corresponding natural parameters, $f$, are
-\begin{equation}
+$$
\begin{split}
f_1 &= \mathrm{log}(\psi_{10}/\psi_{00}) \\
f_2 &= \mathrm{log}(\psi_{01}/\psi_{00}) \\
f_{12} &= \mathrm{log}((\psi_{11} * \psi_{00})/(\psi_{10} * \psi_{01}))
\end{split}
-\end{equation}
+$$
The natural parameters can then be modeled as linear functions of covariates.
The observation process is similar to the standard single-species occupancy model, except that the observations $y_{ij}$ at site $i$ on occasion $j$ are vectors of length $s$ and there are independent values of detection probability $p$ for each species $s$:
-\begin{equation}
+$$
y_{ij} | Z_i \sim \mathrm{Bernoulli}(Z_i * p_{sij})
-\end{equation}
-See \citet{Rota2016} for more details on model structure.
-In \code{unmarked}, the model can be fit with the \code{occuMulti} function.
+$$
+See @Rota2016 for more details on model structure.
+In `unmarked`, the model can be fit with the `occuMulti` function.
-\section{Simple multispecies analysis}
+# Simple multispecies analysis
We will start with a simple analysis using presence/absence data from camera traps for three species: coyote, red fox, and bobcat.
-The data are a simplified version of the data used in \citet{Rota2016}, with the data collapsed three into three-week long time periods.
+The data are a simplified version of the data used in @Rota2016, with the data collapsed three into three-week long time periods.
-\subsection{Formatting the data}
+## Formatting the data
-The dataset is called \code{MesoCarnivores} and is provided by the \code{AHMbook} package.
+The dataset is included with `unmarked` and is called `MesoCarnivores`.
First, we need to load in the dataset, which is a list with several components.
-<<>>=
+```{r, eval=FALSE}
library(unmarked)
-library(AHMbook)
data(MesoCarnivores)
names(MesoCarnivores)
-@
+```
-Presence/absence matrices for the three species are in list elements \code{bobcat}, \code{coyote}, and \code{redfox}, and \code{sitecovs} contains the site-level covariate data.
+Presence/absence matrices for the three species are in list elements `bobcat`, `coyote`, and `redfox`, and `sitecovs` contains the site-level covariate data.
-Using this information, we will create an \code{unmarkedFrameOccuMulti} object.
-You can get more information by looking at the help file for \code{unmarkedFrameOccuMulti}:
+Using this information, we will create an `unmarkedFrameOccuMulti` object.
+You can get more information by looking at the help file for `unmarkedFrameOccuMulti`:
-<<eval=FALSE>>=
+```{r, eval=FALSE}
?unmarkedFrameOccuMulti
-@
+```
First we combine the detection data for the 3 species into one named list.
These names will be used throughout the multispecies analysis, so pick useful ones.
-<<>>=
+```{r}
ylist <- list(bobcat=MesoCarnivores$bobcat, coyote=MesoCarnivores$coyote,
redfox=MesoCarnivores$redfox)
lapply(ylist, head)
-@
+```
The site covariates are contained in a data frame:
-<<>>=
+```{r}
head(MesoCarnivores$sitecovs)
-@
+```
-The site covariates include a metric of disturbance in a 5 km radius (\code{Dist\_5km}), housing density in a 5 km radius (\code{HDens\_5km}), latitude, longitude, a metric of how many people use the site (\code{People\_site}) and whether the camera site is on (or off) a trail.
-Using our \code{ylist}, site covariates, and observation covariates, we can construct an \code{unmarkedFrameOccuMulti} object.
+The site covariates include a metric of disturbance in a 5 km radius (`Dist_5km`), housing density in a 5 km radius (`HDens_5km`), latitude, longitude, a metric of how many people use the site (`People_site`) and whether the camera site is on (or off) a trail.
+Using our `ylist`, site covariates, and observation covariates, we can construct an `unmarkedFrameOccuMulti` object.
-<<>>=
+```{r}
umf <- unmarkedFrameOccuMulti(y=ylist, siteCovs=MesoCarnivores$sitecovs)
-@
+```
-\subsection{Occupancy formulas}
+## Occupancy formulas
-While most \code{unmarked} models have only one or two formulas (and a single formula for occupancy or abundance), \code{occuMulti} requires one formula per natural parameter $f$.
+While most `unmarked` models have only one or two formulas (and a single formula for occupancy or abundance), `occuMulti` requires one formula per natural parameter $f$.
Thus, there will be multiple formulas associated with occupancy.
These formulas are organized into an ordered character vector.
It can be hard to keep track of how many natural parameters there are and what each one represents.
-It can be helpful to look at the $f$-design matrix, which is generated by \code{unmarkedFrameOccuMulti}.
+It can be helpful to look at the $f$-design matrix, which is generated by `unmarkedFrameOccuMulti`.
-<<>>=
+```{r}
umf@fDesign
-@
+```
The number and order of the formulas in the vector should match the column names of this matrix.
There are 7 columns in the matrix: thus, we'll need 7 formulas total, and they should be provided in the following order:
-<<>>=
+```{r}
colnames(umf@fDesign)
-@
+```
For this model we'll set the 1st and 2nd-order $f$ parameters to be intercept-only, and fix the 3rd order parameter at 0.
-We will combine our formulas into a vector called \code{stateformulas}, like this:
+We will combine our formulas into a vector called `stateformulas`, like this:
-<<>>=
+```{r}
stateformulas <- c("~1","~1","~1","~1","~1","~1","0")
-@
+```
-Notice that the formulas are character strings (each wrapped in \code{""}). This is required.
+Notice that the formulas are character strings (each wrapped in `""`). This is required.
-\subsection{Detection formulas}
+## Detection formulas
Each species has its own detection probability formula.
-Thus, there should be 3 total formulas combined in a \code{detformulas} vector.
-The order of the formulas should match the order of species in \code{ylist}.
+Thus, there should be 3 total formulas combined in a `detformulas` vector.
+The order of the formulas should match the order of species in `ylist`.
For this model, all three species will have intercept-only detection formulas.
-<<>>=
+```{r}
detformulas <- c("~1","~1","~1")
-@
+```
-\subsection{Fit the model}
+## Fit the model
-First, look at the help file for \code{occuMulti} to check what the required arguments are:
+First, look at the help file for `occuMulti` to check what the required arguments are:
-<<eval=FALSE>>=
+```{r, eval=FALSE}
?occuMulti
-@
+```
-We now have all the pieces we need (\code{unmarkedFrameOccuMulti}, \code{stateformulas}, \code{detformulas}) needed to run a basic model which we will call \code{mod\_null}.
+We now have all the pieces we need (`unmarkedFrameOccuMulti`, `stateformulas`, `detformulas`) needed to run a basic model which we will call `mod_null`.
-<<>>=
+```{r}
mod_null <- occuMulti(detformulas=detformulas, stateformulas=stateformulas, data=umf)
summary(mod_null)
-@
+```
The regression parameters associated with each $f$ are identified by the species name (or combination of species names) in brackets.
A few things to notice:
-\begin{itemize}
- \item Coyote occupancy is the highest of the three species.
- \item Negative relationship between bobcat and red fox
- \item Positive relationships between coyote and the other two species
- \item There is no three-species interaction term in the summary, because we fixed it at 0.
-\end{itemize}
+* Coyote occupancy is the highest of the three species.
+* Negative relationship between bobcat and red fox
+* Positive relationships between coyote and the other two species
+* There is no three-species interaction term in the summary, because we fixed it at 0.
-\subsection{Occupancy probabilities}
+## Occupancy probabilities
-To get the expected probability for each occupancy state ([11], [10] and so on) at each site, use the \code{predict} function.
+To get the expected probability for each occupancy state ([11], [10] and so on) at each site, use the `predict` function.
This gives you the probabilities along with standard errors and a 95% CI.
-<<>>=
+```{r}
occ_prob <- predict(mod_null, type="state")
head(occ_prob$Predicted)
-@
+```
The rows of this matrix should sum to 1.
All rows are the same because estimated occupancies at all sites are the same - we didn't include any covariates.
-\subsection{Marginal occupancy}
+## Marginal occupancy
It's often more interesting to look at occupancy for species individually.
For example, you might want to know the marginal occupancy of one species at each site (across all possible occupancy states).
-You can do this by specifying the \code{species} argument in \code{predict}, which will automatically sum up the appropriate occupancy states.
+You can do this by specifying the `species` argument in `predict`, which will automatically sum up the appropriate occupancy states.
-<<>>=
+```{r}
redfox_marginal <- predict(mod_null, type="state", species="redfox")
head(redfox_marginal)
-@
+```
-\subsection{Plotting marginal occupancy}
+## Plotting marginal occupancy
-Outputs from \code{predict} can be used to compare marginal occupancy across species with a plot.
+Outputs from `predict` can be used to compare marginal occupancy across species with a plot.
First, we'll need to get marginal occupancy for the other two species, and combine the three estimates into a single data frame.
-<<>>=
+```{r}
coy_marginal <- predict(mod_null, type="state", species="coyote")
bob_marginal <- predict(mod_null, type="state", species="bobcat")
all_marginal <- rbind(redfox_marginal[1,], coy_marginal[1,], bob_marginal[1,])
all_marginal$Species <- c("Red fox", "Coyote", "Bobcat")
-@
+```
Now we can plot the estimated marginal occupancy for each species, along with 95\% CIs.
-<<fig=TRUE>>=
+```{r, fig.height=5}
plot(1:3, all_marginal$Predicted, ylim=c(0.1,0.4),
xlim=c(0.5,3.5), pch=19, cex=1.5, xaxt='n',
xlab="", ylab="Marginal occupancy and 95% CI")
@@ -250,31 +216,31 @@ for (i in 1:3){
segments(i-top, all_marginal$lower[i], i+top)
segments(i-top, all_marginal$upper[i], i+top)
}
-@
+```
-\subsection{Conditional occupancy}
+## Conditional occupancy
-Alternatively, you might want to know the probability of occupancy of one species, conditional on the presence of another. Use the \code{species} and \code{cond} arguments together for this.
+Alternatively, you might want to know the probability of occupancy of one species, conditional on the presence of another. Use the `species` and `cond` arguments together for this.
For example, the probability of red fox occupancy, conditional on coyote presence:
-<<>>=
+```{r}
redfox_coy <- predict(mod_null, type="state", species="redfox", cond="coyote")
head(redfox_coy)
-@
+```
What about conditional on coyote *absence*?
-Simply add a \code{"-"} before the species name.
+Simply add a `"-"` before the species name.
-<<>>=
+```{r}
redfox_nocoy <- predict(mod_null, type="state", species="redfox", cond="-coyote")
head(redfox_nocoy)
-@
+```
-\subsection{Plotting conditional occupancy}
+## Plotting conditional occupancy
-You can use this output from \code{predict} to generate comparison plots.
+You can use this output from `predict` to generate comparison plots.
-<<fig=TRUE>>=
+```{r, fig.height=5}
cond_data <- rbind(redfox_coy[1,], redfox_nocoy[1,])
cond_data$Coyote_status <- c("Present","Absent")
@@ -290,179 +256,217 @@ for (i in 1:2){
segments(i-top, cond_data$lower[i], i+top)
segments(i-top, cond_data$upper[i], i+top)
}
-@
+```
Note that red fox occupancy is higher at sites where coyotes were present, which corresponds with the positive interaction term between the two species we saw in the model output summary.
-\section{Multispecies model with covariates}
+# Multispecies model with covariates
Now we'll fit a model with covariates on some natural parameters.
It might be helpful to look at the order for our $f$ parameters again:
-<<>>=
+```{r}
colnames(umf@fDesign)
-@
+```
And our available site covariates:
-<<>>=
+```{r}
head(siteCovs(umf))
-@
+```
-\subsection{Add housing density as a covariate}
+## Add housing density as a covariate
-We'll fit a model with an effect of housing density (\code{HDens\_5km}) on the first-order parameters for all three species.
+We'll fit a model with an effect of housing density (`HDens_5km`) on the first-order parameters for all three species.
The two-way interactions will remain intercept-only, and the three-way interaction will remain fixed at 0.
Here's the vector of $f$ formulas:
-<<>>=
+```{r}
sf <- c("~HDens_5km","~HDens_5km","~HDens_5km","~1","~1","~1","0")
-@
+```
Inside your formula, you can wrap the variable name in \code{scale()} to standardize it, which we should do, because the housing density variable is not scaled.
-<<>>=
+```{r}
sf <- c("~scale(HDens_5km)","~scale(HDens_5km)","~scale(HDens_5km)","~1","~1","~1","0")
-@
+```
-Detection formulas will remain the same, so we're now ready to fit a new model, \code{mod\_hdens}.
+Detection formulas will remain the same, so we're now ready to fit a new model, `mod_hdens`.
-<<>>=
+```{r}
mod_hdens <- occuMulti(stateformulas=sf, detformulas=detformulas, umf)
summary(mod_hdens)
-@
+```
A few things to note from the results:
-\begin{itemize}
- \item Housing density has a significant negative effect on occupancy of bobcat
- \item Housing density has a significant positive effect on red fox
- \item No effect of housing density on coyote.
-\end{itemize}
+* Housing density has a significant negative effect on occupancy of bobcat
+* Housing density has a significant positive effect on red fox
+* No effect of housing density on coyote.
-\subsection{Plotting covariate effects}
+## Plotting covariate effects
-To plot the effect of housing density on marginal occupancy, we again use \code{predict}.
-First, we need to generate sequence of possible \code{Hdens\_5km} values for the X-axis of our plot.
+To plot the effect of housing density on marginal occupancy, we again use `predict`.
+First, we need to generate sequence of possible `Hdens_5km` values for the X-axis of our plot.
We'll generate a sequence of 100 values, starting at the minimum observed housing density and ending at the maximum observed value.
-<<>>=
+```{r}
hdens_range <- range(siteCovs(umf)$HDens_5km)
hdens_seq <- seq(hdens_range[1], hdens_range[2], length.out=100)
-@
+```
-Next, we'll \code{predict} marginal coyote occupancy at each value of \code{Hdens\_5km} along our sequence.
-Our custom housing density values should be in a data frame and provided to the \code{newdata} argument.
-Because we used \code{scale()} in our formulas above, there is no need to manually scale these new housing density values - \code{unmarked} will do it for us.
+Next, we'll `predict` marginal coyote occupancy at each value of `Hdens_5km` along our sequence.
+Our custom housing density values should be in a data frame and provided to the `newdata` argument.
+Because we used `scale()` in our formulas above, there is no need to manually scale these new housing density values - `unmarked` will do it for us.
-<<>>=
+```{r}
nd <- data.frame(HDens_5km = hdens_seq)
occ_hdens_coy <- predict(mod_hdens, type="state", species="coyote", newdata=nd)
occ_hdens_coy$Species <- "Coyote"
occ_hdens_coy$Hdens <- hdens_seq
head(occ_hdens_coy)
-@
+```
We'll do the same thing for the other two species.
-<<>>=
+```{r}
occ_hdens_bob <- predict(mod_hdens, type="state", species="bobcat", newdata=nd)
occ_hdens_fox <- predict(mod_hdens, type="state", species="redfox", newdata=nd)
occ_hdens_bob$Species <- "Bobcat"
occ_hdens_fox$Species <- "Red fox"
occ_hdens_bob$Hdens <- hdens_seq
occ_hdens_fox$Hdens <- hdens_seq
-@
+```
Finally, we'll build our plot.
Housing density will be on the x-axis, marginal occupancy on the y-axis, and species will be identified by colors.
-<<fig=TRUE>>=
+```{r, fig.height=5}
plot(occ_hdens_coy$Hdens, occ_hdens_coy$Predicted, type='l', ylim=c(0,0.6),
col='red', lwd=2, xlab="Housing density", ylab="Marginal occupancy")
lines(occ_hdens_bob$Hdens, occ_hdens_bob$Predicted, col='blue', lwd=2)
lines(occ_hdens_fox$Hdens, occ_hdens_fox$Predicted, col='green', lwd=2)
legend('topleft', col=c('red', 'blue', 'green'), lty=1,
legend=c("Coyote", "Bobcat", "Red fox"))
-@
+```
-\section{Model selection}
+# Model selection
-\code{unmarked} can calculate AIC, $\Delta$AIC, and weights for a list of models automatically.
-Start by creating a \code{fitList} object containing our two models:
+`unmarked` can calculate AIC, $\Delta$AIC, and weights for a list of models automatically.
+Start by creating a `fitList` object containing our two models:
-<<>>=
+```{r}
mods <- fitList(mod_null, mod_hdens)
-@
+```
-Then call the function \code{modSel} on our list of models to generate a model selection table:
+Then call the function `modSel` on our list of models to generate a model selection table:
-<<>>=
+```{r}
modSel(mods)
-@
+```
Looks like the model with housing density is better.
-\section{Model fitting challenges}
+# Model fitting challenges
Multispecies occupancy models often have many parameters, and can be difficult to fit in some situations.
You might get poor estimates (i.e., very large absolute values and/or large SEs) under certain conditions:
-\begin{itemize}
- \item Sparse data (many 0s)
- \item Boundary estimates (occupancy close to 0 or 1)
- \item Few observations where multiple species are detected
- \item Separation (perfect correlation with covariate)
-\end{itemize}
+* Sparse data (many 0s)
+* Boundary estimates (occupancy close to 0 or 1)
+* Few observations where multiple species are detected
+* Separation (perfect correlation with covariate)
Here's an example of a complex model with many covariates that results in poor estimates.
-<<>>=
+```{r, eval=FALSE}
state_complex <- c(rep("~scale(Dist_5km)+scale(HDens_5km)", 6), 0)
det_complex <- rep("~Trail",3)
mod_complex <- occuMulti(stateformulas=state_complex, detformulas=det_complex, umf)
summary(mod_complex)
-@
+```
+
+```
+##
+## Call:
+## occuMulti(detformulas = det_complex, stateformulas = state_complex,
+## data = umf, maxOrder = 3L)
+##
+## Occupancy (logit-scale):
+## Estimate SE z P(>|z|)
+## [bobcat] (Intercept) -23.0171 5.784 -3.980 6.90e-05
+## [bobcat] scale(Dist_5km) -2.4249 0.689 -3.519 4.34e-04
+## [bobcat] scale(HDens_5km) -82.3836 19.788 -4.163 3.14e-05
+## [coyote] (Intercept) -0.6789 0.225 -3.017 2.55e-03
+## [coyote] scale(Dist_5km) -0.0176 0.139 -0.127 8.99e-01
+## [coyote] scale(HDens_5km) -0.5534 0.748 -0.740 4.59e-01
+## [redfox] (Intercept) -1.3946 0.257 -5.425 5.78e-08
+## [redfox] scale(Dist_5km) -0.5293 0.250 -2.115 3.45e-02
+## [redfox] scale(HDens_5km) 0.2108 0.261 0.808 4.19e-01
+## [bobcat:coyote] (Intercept) 6.7598 6.384 1.059 2.90e-01
+## [bobcat:coyote] scale(Dist_5km) 1.6979 0.695 2.445 1.45e-02
+## [bobcat:coyote] scale(HDens_5km) 17.9202 21.442 0.836 4.03e-01
+## [bobcat:redfox] (Intercept) 15.3983 3.462 4.448 8.67e-06
+## [bobcat:redfox] scale(Dist_5km) 0.8836 0.439 2.014 4.40e-02
+## [bobcat:redfox] scale(HDens_5km) 64.1330 12.377 5.182 2.20e-07
+## [coyote:redfox] (Intercept) 1.1084 0.363 3.050 2.29e-03
+## [coyote:redfox] scale(Dist_5km) 0.1149 0.340 0.338 7.35e-01
+## [coyote:redfox] scale(HDens_5km) 0.9046 0.781 1.159 2.47e-01
+##
+## Detection (logit-scale):
+## Estimate SE z P(>|z|)
+## [bobcat] (Intercept) -2.83 0.1419 -19.91 3.10e-88
+## [bobcat] Trail 1.74 0.1542 11.26 2.10e-29
+## [coyote] (Intercept) -1.96 0.0984 -19.88 5.89e-88
+## [coyote] Trail 2.17 0.1220 17.75 1.63e-70
+## [redfox] (Intercept) -1.59 0.1601 -9.93 2.97e-23
+## [redfox] Trail 1.78 0.1997 8.93 4.12e-19
+##
+## AIC: 5958.196
+## Number of sites: 1437
+## optim convergence code: 0
+## optim iterations: 196
+## Bootstrap iterations: 0
+```
+
Note that several estimates are very large (>10) and also have large SEs.
You should be very skeptical about using a model with poor estimates, like this one, for inference.
Potential solutions when you get poor estimates include:
-\begin{itemize}
- \item Fit simpler models with fewer covariates
- \item If possible, fit the model with fewer species
- \item Adjust observation period length if possible (e.g. collapse from one-week periods to three-week periods)
- \item Use penalized likelihood to fit the model
-\end{itemize}
+* Fit simpler models with fewer covariates
+* If possible, fit the model with fewer species
+* Adjust observation period length if possible (e.g. collapse from one-week periods to three-week periods)
+* Use penalized likelihood to fit the model
-\section{Penalized likelihood}
+# Penalized likelihood
-\code{occuMulti} uses maximum likelihood to estimate parameters.
+`occuMulti` uses maximum likelihood to estimate parameters.
We can add a "penalty" to the calculated likelihood to keep parameter estimates from getting stuck at huge values
-Use of penalized likelihiood has been shown to help with separation/boundary issues, eliminate unreasonably large estimates, and reduce error,
+Use of penalized likelihood has been shown to help with separation/boundary issues, eliminate unreasonably large estimates, and reduce error,
However, note that the penalty term introduces a small amount of bias in the parameter estimates: thus, we are making a tradeoff between bias and variance
With huge SEs as with the previous model, this may be a good tradeoff to make.
One type of penalty is the "Bayes" penalty:
-\begin{equation}
+$$
-\lambda\frac{1}{2}\sum_i{}\theta_i^2
-\end{equation}
+$$
In the formula above, $\lambda$ is the penalty value, and $\theta$ is the vector of estimated parameters.
As the parameter values get bigger, the total penalty increases.
-\subsection{Penalized likelihood with occuMulti}
+## Penalized likelihood with occuMulti
-\code{occuMulti} can use penalized likelihood to fit models.
-You can provide a value to the \code{penalty} argument directly, or use the \code{optimizePenalty} function on a fitted model to choose the best value of $\lambda$ using K-fold cross-validation, and re-fit the model using the optimal penalty term value.
+`occuMulti` can use penalized likelihood to fit models.
+You can provide a value to the `penalty` argument directly, or use the `optimizePenalty` function on a fitted model to choose the best value of $\lambda$ using K-fold cross-validation, and re-fit the model using the optimal penalty term value.
-<<eval=FALSE>>=
+```{r,eval=FALSE}
set.seed(123)
mod_penalty <- optimizePenalty(mod_complex, penalties=c(0.5,1))
summary(mod_penalty)
-@
+```
-<<eval=FALSE>>=
+```
## Optimal penalty is 1
## Bootstraping covariance matrix
@@ -508,11 +512,10 @@ summary(mod_penalty)
## optim convergence code: 0
## optim iterations: 100
## Bootstrap iterations: 30
-@
+```
Notice that parameter estimates and SEs for the model using penalized likelihood are no longer gigantic.
-For more info on the use of penalized likelihood with multispecies occupancy models, see \citet{Clipp_2021}.
+For more info on the use of penalized likelihood with multispecies occupancy models, see @Clipp_2021.
-\bibliography{unmarked}
-\end{document}
+# References
diff --git a/vignettes/powerAnalysis.Rmd b/vignettes/powerAnalysis.Rmd
new file mode 100644
index 0000000..a5788b6
--- /dev/null
+++ b/vignettes/powerAnalysis.Rmd
@@ -0,0 +1,321 @@
+---
+title: Power Analysis in unmarked
+author: Ken Kellner
+date: September 3, 2021
+bibliography: unmarked.bib
+csl: ecology.csl
+output:
+ rmarkdown::html_vignette:
+ fig_width: 5
+ fig_height: 3.5
+ number_sections: true
+ toc: true
+vignette: >
+ %\VignetteIndexEntry{Power Analysis in unmarked}
+ %\VignetteEngine{knitr::rmarkdown}
+ \usepackage[utf8]{inputenc}
+---
+
+# Hypothesis Testing
+
+For many analyses in `unmarked`, a primary goal is to determine if a certain covariate affects the state or detection process.
+For example, we may want to determine if elevation has an effect on probability of site occupancy, or if wind speed has an effect on detection.
+We can formulate this idea as set of statistical hypotheses: the null hypothesis ($H_0$) and the alternative hypothesis ($H_a$):
+
+* $H_0$: There is no effect of elevation on occupancy
+* $H_a$: Elevation has an effect on occupancy
+
+In order to test these hypotheses, we must collected appropriate data, perhaps by sampling a series of sites at varying elevation for the presence of the species.
+We can then fit a model in `unmarked`, specifying in the formula that we are interested in estimating the effect of elevation on occupancy.
+For example, here is a simple model fit to the `crossbill` presence-absence dataset included with `unmarked`:
+
+```{r, warning=FALSE}
+set.seed(123)
+library(unmarked)
+data(crossbill)
+
+umf <- unmarkedFrameOccu(y=crossbill[,11:13],
+ siteCovs=data.frame(elev=scale(crossbill$ele)))
+(mod <- occu(~1~elev, umf))
+```
+
+## Wald tests
+
+In the code{unmarked} output, we obtain an estimate ($\hat{\theta}$) of the regression coefficient associated with elevation (`elev`) along with its standard error.
+Our null hypothesis is that elevation has no effect on occupancy, i.e. $\theta_0 = 0$.
+With this information, we can conduct a statistical hypothesis test called a Wald test:
+$$
+\sqrt{W} = \frac{(\hat{\theta} -\theta_0)}{se(\hat{\theta})}
+$$
+
+Or simplified:
+$$
+\sqrt{W} = \frac{(0.5939 - 0)}{0.1656} = 3.59
+$$
+
+It turns out that the square root of the Wald statistic, $\sqrt{W}$, follows a standard normal distribution.
+Thus, we can calculate the probability that our observed statistic, $\sqrt{W} = 3.59$, occurred by chance assuming that the null hypothesis $\theta = 0$ is true.
+In R, for a two-tailed test, this can be calculated as:
+
+```{r}
+z = sqrt_w = coef(mod)[2] / SE(mod)[2]
+2*pnorm(abs(z), lower.tail=FALSE)
+```
+
+This is the p-value. These values we calculated manually match the results that `unmarked` gave us in the summary output.
+
+## Making a conclusion
+
+Before conducting our study, we should have defined a threshold p-value (the significance level or $\alpha$) below which we reject the null hypothesis.
+Traditionally, $\alpha = 0.05$.
+Our calculated p-value is less than $\alpha$, so we reject the null hypothesis that elevation has no effect on occupancy.
+
+## Types of error
+
+There are two types of errors that we could be making at this point:
+
+1. Type I error: We reject the null hypothesis when in fact it is true. Type I error is conceptually the same as $\alpha$. If we set $\alpha$ larger, we have a greater chance of Type I error.
+2. Type II error: We fail to reject the null hypothesis when in fact it is false. This can occur, for example, if we did not have enough data to detect an effect.
+
+In this vignette, we are most concerned with Type II error.
+How do we know we have enough data to detect if a covariate has a certain effect?
+To answer this question we can use power analysis.
+
+# Power Analysis
+
+## Overview of power analysis
+
+Statistical power is defined as 1 - Type II error.
+So more power means less chance of false negatives, i.e., less chance of failing to reject the null hypothesis when it is false.
+Statistical power depends on three other pieces of information:
+
+1. The effect size: the magnitude of the effect of the covariate. The larger the effect, the more power we have to detect it.
+2. The sample size: how many sites or surveys we've done. The more samples, the more power we have.
+3. The significance level, $\alpha$. The smaller we make $\alpha$, the less power we have: thus there is a tradeoff between Type I and Type II error.
+
+Of the three factors (2) is the one that makes the most sense for researchers to manipulate in order to increase power.
+However, increasing the sample size requires additional effort and money - so how large does it need to be?
+
+For many statistical models, mathematical formulas have been developed so that power can be calculated for any combination of values for factors 1-3 above.
+This is not true for most occupancy and abundance models available in `unmarked` (but see @Guillera_2012 for one example with occupancy models).
+Thus, `unmarked` uses a simulation-based approach for estimating power under various combinations of values for effect size, sample size, and significance level.
+
+## Power analysis inputs in unmarked
+
+When conducting power analysis, `unmarked` needs three pieces of information corresponding to 1-3 above.
+Of these, (1) the effect size and (3) the significance level are easy to set depending on our hypotheses and desired Type I error.
+The sample size (2) is trickier: it isn't enough to just provide the number of sites, since datasets in `unmarked` also require a variety of other information such as number of surveys per site, number of distance bins, or number of primary periods.
+Thus, power analysis in `unmarked` requires a complete dataset in the form of an appropriate `unmarkedFrame`.
+
+In some cases, we may want to calculate power using an already collected dataset.
+Importantly, this step must be done \textit{before} running our final analysis.
+If power analysis is done after the final model is fit, and the effect sizes are defined based on what was observed in that fitted model, we have done what is called a *post-hoc* power analysis, which is a bad idea (see [this post](https://statmodeling.stat.columbia.edu/2018/09/24/dont-calculate-post-hoc-power-using-observed-estimate-effect-size/) for an example of why this is so bad).
+In most cases, the real value of power analysis comes before we actually go collect any data, because it helps us decide how much data to collect.
+But how to get an `unmarkedFrame` of data before we've done our study?
+Once again the solution is simulation: `unmarked` provides a set of tools for simulating datasets for any of its supported model types.
+
+## Simulating datasets
+
+To simulate a dataset for a given `unmarked` model, we need at a minimum four pieces of information:
+
+1. The type of model (the name of the corresponding fitting function)
+2. The covariates affecting each submodel, such as occupancy or detection (supplied as formulas)
+3. The effect size for each intercept and covariate
+4. Study design parameters such as number of sites and number of surveys
+
+For example, suppose we want to simulate an occupancy dataset (`"occu"`) in which site occupancy is affected by elevation.
+The first step is to organize the model structure as a list of formulas, one per submodel.
+This list must be named in a specific way depending on the model type.
+To get the required names for a given model, fit an example of that model (the documentation should have one) and call `names(model)`.
+A single-season occupancy model requires a list with two named components: `state` and `det`.
+We supply a formula for each including an effect of elevation on occupancy (note we could name this whatever we want, here we call it `elev`).
+
+```{r}
+forms <- list(state=~elev, det=~1)
+```
+
+Next we must tell `unmarked` what the values for the intercept and regression coefficients in each submodel should be.
+Once again, this is a named list, one element for each submodel.
+Within each element we need a named vector with names that match the covariates in our list of formulas above.
+Note also that each must include a value for the intercept term (this can be named `intercept` or `Intercept`).
+If we are not sure exactly how to structure this list, just skip it for now: `unmarked` can generate a template for us to fill in later.
+
+```{r}
+coefs <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
+```
+
+Finally, we need to give `unmarked` information about the study design.
+This is pretty simple: we just need a list containing values for `M`, the number of sites, and `J` the number of surveys per site.
+For models with multiple primary periods, we'd also need a value of `T`, the number of primary periods.
+
+```{r}
+design <- list(M=300, J=8) # 300 sites, 8 occasions per site
+```
+
+We're now ready to simulate a dataset.
+To do this we use the `simulate` function, providing as arguments the name of the model `"occu"` and the three lists we constructed above.
+Actually, first, let's not supply the `coefs` list, to show how `unmarked` will generate a template for us to use:
+
+```{r, eval=FALSE}
+simulate("occu", formulas=forms, design=design)
+```
+
+```{r, echo=FALSE}
+try(simulate("occu", formulas=forms, design=design))
+```
+
+Once we have our covariates set up properly, add them to the function call:
+
+```{r}
+occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design)
+head(occu_umf)
+```
+
+`unmarked` has generated a presence-absence dataset as well as values for covariate `elev`.
+
+### Customizing the covariates
+
+By default, a covariate will be continuous and come from a standard normal distribution.
+However, we can control this using the `guide` argument.
+For example, suppose we want elevation to have a mean of 2 and a standard deviation of 0.5, and we also want a categorical covariate called `landcover`.
+The corresponding formulas and list to supply to `guide` would look like this:
+
+```{r}
+forms2 <- list(state=~elev+landcover, det=~1)
+guide <- list(landcover=factor(levels=c("forest","grass")), # landcover is factor
+ elev=list(dist=rnorm, mean=2, sd=0.5)) # custom distribution
+```
+
+We'd also need an updated `coefs`:
+
+```{r}
+coefs2 <- list(state=c(intercept=0, elev=-0.4, landcovergrass=0.2), det=c(intercept=0))
+```
+
+```{r}
+head(simulate("occu", formulas=forms2, coefs=coefs2, design=design, guide=guide))
+```
+
+Our output dataset now includes a new categorical covariate, and the elevation values are adjusted.
+
+### Models that require more information
+
+More complex models might require more information for simulation, such as the distribution to use for abundance with `pcount`.
+This information is simply added as additional arguments to `simulate`.
+For example, we can simulate a `pcount` dataset using the negative binomial (`"NB"`) distribution.
+The negative binomial has an additional parameter to estimate (`alpha`) so we must also add an element to `coefs`.
+
+```{r}
+coefs$alpha <- c(alpha=0.5)
+head(simulate("pcount", formulas=forms, coefs=coefs, design=design, mixture="NB"))
+```
+
+## Conducting a power analysis
+
+Power analyses are conducted with the `powerAnalysis` function.
+A `powerAnalysis` power analysis depends on the input dataset, as well as the covariates of interest and other settings depending on the model (e.g. the distribution used in an N-mixture model or the detection key function in a distance sampling analysis).
+The easiest way combine all this information and send it to `powerAnalysis` is to actually fit a model with all the correct settings and our simulated dataset and send *that* to `powerAnalysis`.
+This has the added benefit that it checks to make sure we have all the required information for a valid model.
+Note that the actual parameter estimates from this model template don't matter - they aren't used in the power analysis.
+Thus, there are two required arguments to `powerAnalysis`: a fitted model template, and a list of effect sizes.
+
+The first step is to fit a model:
+
+```{r}
+template_model <- occu(~1~elev, occu_umf)
+```
+
+If we run `powerAnalysis` on `template_model` with no other arguments, `unmarked` will again give us a template for the list of effect sizes, which looks exactly like the one for simulation above.
+
+```{r, eval=FALSE}
+powerAnalysis(template_model)
+```
+
+```{r, echo=FALSE}
+try(powerAnalysis(template_model))
+```
+
+We will set our desired effect sizes to match what we used for simulation:
+
+```{r}
+effect_sizes <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
+```
+
+It is also possible to set the significance level `alpha`; the default is 0.05.
+We now have all the required information to conduct the power analysis.
+Remember, `unmarked` does this by simulation, so you will see a progress bar as `unmarked` conducts simulations.
+You can control how many with the `nsim` argument; we'll set `nsim=20` just to speed things up, but normally you should use more.
+
+```{r}
+(pa <- powerAnalysis(template_model, coefs=effect_sizes, alpha=0.05, nsim=20))
+```
+
+The result is an object `pa` of class `unmarkedPower`.
+If you look at `pa` in the console you will get a summary of power for each parameter in the model.
+The summary includes the submodel, parameter name, supplied effect size, null hypothesis, and the calculated power based on simulation.
+By default the null for each parameter is 0, you can change this by supplying a list to the `nulls` argument with the same structure as `coefs`.
+
+We have power = 0.95 for the effect of elevation on occupancy probability.
+This power is calculated by simulating a bunch of datasets based on the template model and supplied effect sizes, fitting a model to each simulated dataset, and then calculating the proportion of these models for which an effect of the covariate would have been detected at the given value of `alpha`.
+You can see the raw results from each simulated model with
+
+```{r, eval=FALSE}
+pa@estimates
+```
+
+### Varying the sample size
+
+One approach to determining how sample size affects power for our model is to simulate a range of `unmarkedFrames` with varying number of sites, observations, etc. and do a power analysis for each.
+However `powerAnalysis` also has a argument `design` which can help do this automatically.
+
+The `design` argument will subsample within the original data to generate datasets which are smaller or larger than the original, and conduct power analyses for each scenario.
+For example, to test power for a dataset with only 50 sites and 3 sample occasions at each:
+
+```{r}
+# 50 sites and 3 obs per site
+(pa2 <- powerAnalysis(template_model, effect_sizes, design=list(M=50, J=3), nsim=20))
+```
+
+With fewer sites and sampling occasions, our power to detect the elevation effect is reduced.
+
+You can also get a larger number of sites via sampling the original sites with replacement:
+
+```{r}
+(pa3 <- powerAnalysis(template_model, effect_sizes, design=list(M=400, J=4), nsim=20))
+```
+
+### Combining unmarkedPower objects
+
+The `unmarkedPowerList` function creates a `unmarkedPowerList` object for holding multiple `unmarkedPower` objects so they can be easily compared.
+The summary of an `unmarkedPowerList` is a `data.frame` with all the outputs shown together, including relevant sample sizes.
+
+```{r}
+unmarkedPowerList(list(pa, pa2, pa3))
+```
+
+We can also create an `unmarkedPowerList` by providing a template model and a range of design scenarios in the `design` argument.
+A power analysis will be run for each scenario (sampling the original dataset as shown above) and the results combined.
+
+```{r}
+scenarios <- expand.grid(M=c(50,200,400),
+ J=c(3,5,8))
+pl <- unmarkedPowerList(template_model, effect_sizes, design=scenarios, nsim=20)
+head(summary(pl))
+tail(summary(pl))
+```
+
+There is a built-in `plot` method for `unmarkedPowerList`.
+You can specify a target power on the plot to the `power` argument.
+You also need to specify the parameter of interest (`"elev"`).
+
+```{r, fig.height=5}
+plot(pl, power=0.8, param="elev")
+```
+
+# Conclusion
+
+Power analysis is an important step in the research process that is often overlooked in studies of animal abundance and occurrence.
+Getting an estimate of the sample size required to detect a particular effect can help with efficient data collection and set expectations for what covariate relationships might be possible to detect.
+The power analysis tools in `unmarked` should help make this part of the research process quick and easy for researchers as the begin to develop study designs.
+
+# References
diff --git a/vignettes/powerAnalysis.Rnw b/vignettes/powerAnalysis.Rnw
deleted file mode 100644
index 222ea36..0000000
--- a/vignettes/powerAnalysis.Rnw
+++ /dev/null
@@ -1,365 +0,0 @@
-<<echo=false>>=
-options(width=70)
-options(continue=" ")
-@
-
-\documentclass[a4paper]{article}
-\usepackage[OT1]{fontenc}
-\usepackage{Sweave}
-\usepackage{natbib}
-%\usepackage{fullpage}
-\usepackage[vmargin=1in,hmargin=1in]{geometry}
-\bibliographystyle{ecology}
-
-\usepackage{hyperref}
-\hypersetup{
- colorlinks=true,
- linkcolor=blue,
- urlcolor=cyan,
- citecolor=black
-}
-
-\DefineVerbatimEnvironment{Sinput}{Verbatim} {xleftmargin=2em}
-\DefineVerbatimEnvironment{Soutput}{Verbatim}{xleftmargin=2em}
-\DefineVerbatimEnvironment{Scode}{Verbatim}{xleftmargin=2em}
-\fvset{listparameters={\setlength{\topsep}{0pt}}}
-\renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}}
-
-%%\VignetteIndexEntry{Power Analysis in unmarked}
-
-\title{Power Analysis in unmarked}
-\author{Ken Kellner}
-\date{September 3, 2021}
-
-
-\begin{document}
-
-\newcommand{\code}[1]{\texttt{\small{#1}}}
-\newcommand{\package}[1]{\textsf{\small{#1}}}
-
-\maketitle
-
-\section{Outline}
-
-\begin{enumerate}
- \item Review of hypothesis testing
- \item Introduction to power analysis
- \item Conducting power analysis in unmarked
- \item Conclusion
-\end{enumerate}
-
-\section{Hypothesis Testing}
-
-For many analyses in \code{unmarked}, a primary goal is to determine if a certain covariate affects the state or detection process.
-For example, we may want to determine if elevation has an effect on probability of site occupancy, or if wind speed has an effect on detection.
-We can formulate this idea as set of statistical hypotheses: the null hypothesis ($H_0$) and the alternative hypothesis ($H_a$):
-
-\begin{description}
- \item $H_0$: There is no effect of elevation on occupancy
- \item $H_a$: Elevation has an effect on occupancy
-\end{description}
-
-In order to test these hypotheses, we must collected appropriate data, perhaps by sampling a series of sites at varying elevation for the presence of the species.
-We can then fit a model in \code{unmarked}, specifying in the formula that we are interested in estimating the effect of elevation on occupancy.
-For example, here is a simple model fit to the \code{crossbill} presence-absence dataset included with \code{unmarked}:
-
-<<echo=FALSE>>=
-set.seed(123)
-@
-
-<<>>=
-library(unmarked)
-data(crossbill)
-
-umf <- unmarkedFrameOccu(y=crossbill[,11:13],
- siteCovs=data.frame(elev=scale(crossbill$ele)))
-(mod <- occu(~1~elev, umf))
-@
-
-\subsection{Wald tests}
-
-In the code{unmarked} output, we obtain an estimate ($\hat{\theta}$) of the regression coefficient associated with elevation (\code{elev}) along with its standard error.
-Our null hypothesis is that elevation has no effect on occupancy, i.e. $\theta_0 = 0$.
-With this information, we can conduct a statistical hypothesis test called a Wald test:
-
-\begin{equation}
-\sqrt{W} = \frac{(\hat{\theta} -\theta_0)}{se(\hat{\theta})}
-\end{equation}
-
-Or simplified:
-
-\begin{equation}
-\sqrt{W} = \frac{(0.5939 - 0)}{0.1656} = 3.59
-\end{equation}
-
-It turns out that the square root of the Wald statistic, $\sqrt{W}$, follows a standard normal distribution.
-Thus, we can calculate the probability that our observed statistic, $\sqrt{W} = 3.59$, occurred by chance assuming that the null hypothesis $\theta = 0$ is true.
-In R, for a two-tailed test, this can be calculated as:
-
-<<>>=
-z = sqrt_w = coef(mod)[2] / SE(mod)[2]
-2*pnorm(abs(z), lower.tail=FALSE)
-@
-
-This is the p-value. These values we calculated manually match the results that \code{unmarked} gave us in the summary output.
-
-\subsection{Making a conclusion}
-
-Before conducting our study, we should have defined a threshold p-value (the significance level or $\alpha$) below which we reject the null hypothesis.
-Traditionally, $\alpha = 0.05$.
-Our calculated p-value is less than $\alpha$, so we reject the null hypothesis that elevation has no effect on occupancy.
-
-\subsection{Types of error}
-
-There are two types of errors that we could be making at this point:
-
-1. Type I error: We reject the null hypothesis when in fact it is true. Type I error is conceptually the same as $\alpha$. If we set $\alpha$ larger, we have a greater chance of Type I error.
-2. Type II error: We fail to reject the null hypothesis when in fact it is false. This can occur, for example, if we did not have enough data to detect an effect.
-
-In this vignette, we are most concerned with Type II error.
-How do we know we have enough data to detect if a covariate has a certain effect?
-To answer this question we can use power analysis.
-
-\section{Power Analysis}
-
-\subsection{Overview of power analysis}
-
-Statistical power is defined as 1 - Type II error.
-So more power means less chance of false negatives, i.e., less chance of failing to reject the null hypothesis when it is false.
-Statistical power depends on three other pieces of information:
-
-\begin{enumerate}
- \item The effect size: the magnitude of the effect of the covariate. The larger the effect, the more power we have to detect it.
- \item The sample size: how many sites or surveys we've done. The more samples, the more power we have.
- \item The significance level, $\alpha$. The smaller we make $\alpha$, the less power we have: thus there is a tradeoff between Type I and Type II error.
-\end{enumerate}
-
-Of the three factors (2) is the one that makes the most sense for researchers to manipulate in order to increase power.
-However, increasing the sample size requires additional effort and money - so how large does it need to be?
-
-For many statistical models, mathematical formulas have been developed so that power can be calculated for any combination of values for factors 1-3 above.
-This is not true for most occupancy and abundance models available in \code{unmarked} (but see \cite{Guillera_2012} for one example with occupancy models).
-Thus, \code{unmarked} uses a simulation-based approach for estimating power under various combinations of values for effect size, sample size, and significance level.
-
-\subsection{Power analysis inputs in unmarked}
-
-When conducting power analysis, \code{unmarked} needs three pieces of information corresponding to 1-3 above.
-Of these, (1) the effect size and (3) the significance level are easy to set depending on our hypotheses and desired Type I error.
-The sample size (2) is trickier: it isn't enough to just provide the number of sites, since datasets in \code{unmarked} also require a variety of other information such as number of surveys per site, number of distance bins, or number of primary periods.
-Thus, power analysis in \code{unmarked} requires a complete dataset in the form of an appropriate \code{unmarkedFrame}.
-
-In some cases, we may want to calculate power using an already collected dataset.
-Importantly, this step must be done \textit{before} running our final analysis.
-If power analysis is done after the final model is fit, and the effect sizes are defined based on what was observed in that fitted model, we have done what is called a \textit{post-hoc} power analysis, which is a bad idea (see \href{https://statmodeling.stat.columbia.edu/2018/09/24/dont-calculate-post-hoc-power-using-observed-estimate-effect-size/}{this link} for an example of why this is so bad).
-In most cases, the real value of power analysis comes before we actually go collect any data, because it helps us decide how much data to collect.
-But how to get an \code{unmarkedFrame} of data before we've done our study?
-Once again the solution is simulation: \code{unmarked} provides a set of tools for simulating datasets for any of its supported model types.
-
-\subsection{Simulating datasets}
-
-To simulate a dataset for a given \code{unmarked} model, we need at a minimum four pieces of information:
-
-1. The type of model (the name of the corresponding fitting function)
-2. The covariates affecting each submodel, such as occupancy or detection (supplied as formulas)
-3. The effect size for each intercept and covariate
-4. Study design parameters such as number of sites and number of surveys
-
-For example, suppose we want to simulate an occupancy dataset (\code{"occu"}) in which site occupancy is affected by elevation.
-The first step is to organize the model structure as a list of formulas, one per submodel.
-This list must be named in a specific way depending on the model type.
-To get the required names for a given model, fit an example of that model (the documentation should have one) and call \code{names(model)}.
-A single-season occupancy model requires a list with two named components: \code{state} and \code{det}.
-We supply a formula for each including an effect of elevation on occupancy (note we could name this whatever we want, here we call it \code{elev}).
-
-<<>>=
-forms <- list(state=~elev, det=~1)
-@
-
-Next we must tell \code{unmarked} what the values for the intercept and regression coefficients in each submodel should be.
-Once again, this is a named list, one element for each submodel.
-Within each element we need a named vector with names that match the covariates in our list of formulas above.
-Note also that each must include a value for the intercept term (this can be named \code{intercept} or code{Intercept}).
-If we are not sure exactly how to structure this list, just skip it for now: \code{unmarked} can generate a template for us to fill in later.
-
-<<>>=
-coefs <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
-@
-
-Finally, we need to give \code{unmarked} information about the study design.
-This is pretty simple: we just need a list containing values for \code{M}, the number of sites, and \code{J} the number of surveys per site.
-For models with multiple primary periods, we'd also need a value of \code{T}, the number of primary periods.
-
-<<>>=
-design <- list(M=300, J=8) # 300 sites, 8 occasions per site
-@
-
-We're now ready to simulate a dataset.
-To do this we use the \code{simulate} function, providing as arguments the name of the model \code{"occu"} and the three lists we constructed above.
-Actually, first, let's not supply the \code{coefs} list, to show how \code{unmarked} will generate a template for us to use:
-
-<<eval=FALSE>>=
-simulate("occu", formulas=forms, design=design)
-@
-
-<<echo=FALSE>>=
-try(simulate("occu", formulas=forms, design=design))
-@
-
-
-Once we have our covariates set up properly, add them to the function call:
-
-<<>>=
-occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design)
-head(occu_umf)
-@
-
-\code{unmarked} has generated a presence-absence dataset as well as values for covariate \code{elev}.
-
-\subsubsection{Customizing the covariates}
-
-By default, a covariate will be continuous and come from a standard normal distribution.
-However, we can control this using the \code{guide} argument.
-For example, suppose we want elevation to have a mean of 2 and a standard deviation of 0.5, and we also want a categorical covariate called \code{landcover}.
-The corresponding formulas and list to supply to \code{guide} would look like this:
-
-<<>>=
-forms2 <- list(state=~elev+landcover, det=~1)
-guide <- list(landcover=factor(levels=c("forest","grass")), # landcover is factor
- elev=list(dist=rnorm, mean=2, sd=0.5)) # custom distribution
-@
-
-We'd also need an updated \code{coefs}:
-
-<<>>=
-coefs2 <- list(state=c(intercept=0, elev=-0.4, landcovergrass=0.2), det=c(intercept=0))
-@
-
-<<>>=
-head(simulate("occu", formulas=forms2, coefs=coefs2, design=design, guide=guide))
-@
-
-Our output dataset now includes a new categorical covariate, and the elevation values are adjusted.
-
-\subsubsection{Models that require more information}
-
-More complex models might require more information for simulation, such as the distribution to use for abundance with \code{pcount}.
-This information is simply added as additional arguments to \code{simulate}.
-For example, we can simulate a \code{pcount} dataset using the negative binomial (\code{"NB"}) distribution.
-The negative binomial has an additional parameter to estimate (\code{alpha}) so we must also add an element to \code{coefs}.
-
-<<>>=
-coefs$alpha <- c(alpha=0.5)
-head(simulate("pcount", formulas=forms, coefs=coefs, design=design, mixture="NB"))
-@
-
-\subsection{Conducting a power analysis}
-
-Power analyses are conducted with the \code{powerAnalysis} function.
-An \code{powerAnalysis} power analysis depends on the input dataset, as well as the covariates of interest and other settings depending on the model (e.g. the distribution used in an N-mixture model or the detection key function in a distance sampling analysis).
-The easiest way combine all this information and send it to \code{powerAnalysis} is to actually fit a model with all the correct settings and our simulated dataset and send \textit{that} to \code{powerAnalysis}.
-This has the added benefit that it checks to make sure we have all the required information for a valid model.
-Note that the actual parameter estimates from this model template don't matter - they aren't used in the power analysis.
-Thus, there are two required arguments to \code{powerAnalysis}: a fitted model template, and a list of effect sizes.
-
-The first step is to fit a model:
-
-<<>>=
-template_model <- occu(~1~elev, occu_umf)
-@
-
-If we run \code{powerAnalysis} on \code{template\_model} with no other arguments, \code{unmarked} will again give us a template for the list of effect sizes, which looks exactly like the one for simulation above.
-
-<<eval=FALSE>>=
-powerAnalysis(template_model)
-@
-
-<<echo=FALSE>>=
-try(powerAnalysis(template_model))
-@
-
-We will set our desired effect sizes to match what we used for simulation:
-
-<<>>=
-effect_sizes <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
-@
-
-It is also possible to set the significance level \code{alpha}; the default is 0.05.
-We now have all the required information to conduct the power analysis.
-Remember, \code{unmarked} does this by simulation, so you will see a progress bar as \code{unmarked} conducts simulations.
-You can control how many with the \code{nsim} argument; we'll set \code{nsim=20} just to speed things up, but normally you should use more.
-
-<<>>=
-(pa <- powerAnalysis(template_model, coefs=effect_sizes, alpha=0.05, nsim=20))
-@
-
-The result is an object \code{pa} of class \code{unmarkedPower}.
-If you look at \code{pa} in the console you will get a summary of power for each parameter in the model.
-The summary includes the submodel, parameter name, supplied effect size, null hypothesis, and the calculated power based on simulation.
-By default the null for each parameter is 0, you can change this by supplying a list to the \code{nulls} argument with the same structure as \code{coefs}.
-
-We have power = 0.95 for the effect of elevation on occupancy probability.
-This power is calculated by simulating a bunch of datasets based on the template model and supplied effect sizes, fitting a model to each simulated dataset, and then calculating the proportion of these models for which an effect of the covariate would have been detected at the given value of \code{alpha}.
-You can see the raw results from each simulated model with
-
-<<eval=FALSE>>=
-pa@estimates
-@
-
-\subsubsection{Varying the sample size}
-
-One approach to determining how sample size affects power for our model is to simulate a range of \code{unmarkedFrames} with varying number of sites, observations, etc. and do a power analysis for each.
-However \code{powerAnalysis} also has a argument \code{design} which can help do this automatically.
-
-The \code{design} argument will subsample within the original data to generate datasets which are smaller or larger than the original, and conduct power analyses for each scenario.
-For example, to test power for a dataset with only 50 sites and 3 sample occasions at each:
-
-<<>>=
-# 50 sites and 3 obs per site
-(pa2 <- powerAnalysis(template_model, effect_sizes, design=list(M=50, J=3), nsim=20))
-@
-
-With fewer sites and sampling occasions, our power to detect the elevation effect is reduced.
-
-You can also get a larger number of sites via sampling the original sites with replacement:
-
-<<>>=
-(pa3 <- powerAnalysis(template_model, effect_sizes, design=list(M=400, J=4), nsim=20))
-@
-
-\subsubsection{Combining unmarkedPower objects}
-
-The \code{unmarkedPowerList} function creates a \code{unmarkedPowerList} object for holding multiple \code{unmarkedPower} objects so they can be easily compared.
-The summary of an \code{unmarkedPowerList} is a \code{data.frame} with all the outputs shown together, including relevant sample sizes.
-
-<<>>=
-unmarkedPowerList(list(pa, pa2, pa3))
-@
-
-We can also create an \code{unmarkedPowerList} by providing a template model and a range of design scenarios in the \code{design} argument.
-A power analysis will be run for each scenario (sampling the original dataset as shown above) and the results combined.
-
-<<>>=
-scenarios <- expand.grid(M=c(50,200,400),
- J=c(3,5,8))
-pl <- unmarkedPowerList(template_model, effect_sizes, design=scenarios, nsim=20)
-head(summary(pl))
-tail(summary(pl))
-@
-
-There is a built-in \code{plot} method for \code{unmarkedPowerList}.
-You can specify a target power on the plot to the \code{power} argument.
-You also need to specify the parameter of interest (\code{"elev"}).
-
-<<fig=TRUE>>=
-plot(pl, power=0.8, param="elev")
-@
-
-\section{Conclusion}
-
-Power analysis is an important step in the research process that is often overlooked in studies of animal abundance and occurrence.
-Getting an estimate of the sample size required to detect a particular effect can help with efficient data collection and set expectations for what covariate relationships might be possible to detect.
-The power analysis tools in \code{unmarked} should help make this part of the research process quick and easy for researchers as the begin to develop study designs.
-
-\bibliography{unmarked}
-
-\end{document}
diff --git a/vignettes/random-effects.Rnw b/vignettes/random-effects.Rnw
deleted file mode 100644
index ec75a3a..0000000
--- a/vignettes/random-effects.Rnw
+++ /dev/null
@@ -1,233 +0,0 @@
-<<echo=false>>=
-options(width=70)
-options(continue=" ")
-@
-
-\documentclass[a4paper]{article}
-\usepackage[OT1]{fontenc}
-\usepackage{Sweave}
-\usepackage{natbib}
-%\usepackage{fullpage}
-\usepackage[vmargin=1in,hmargin=1in]{geometry}
-\bibliographystyle{ecology}
-
-\usepackage{hyperref}
-\hypersetup{
- colorlinks=true,
- linkcolor=blue,
- urlcolor=cyan,
- citecolor=black
-}
-
-\DefineVerbatimEnvironment{Sinput}{Verbatim} {xleftmargin=2em}
-\DefineVerbatimEnvironment{Soutput}{Verbatim}{xleftmargin=2em}
-\DefineVerbatimEnvironment{Scode}{Verbatim}{xleftmargin=2em}
-\fvset{listparameters={\setlength{\topsep}{0pt}}}
-\renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}}
-
-%%\VignetteIndexEntry{Random effects in unmarked}
-
-\title{Experimental Support for Random Effects in unmarked}
-\author{Ken Kellner}
-\date{December 1, 2021}
-
-
-\begin{document}
-
-\newcommand{\code}[1]{\texttt{\small{#1}}}
-\newcommand{\package}[1]{\textsf{\small{#1}}}
-
-\maketitle
-
-\section{Introduction}
-
-Random effects are often useful when fitting hierarchical ecological models.
-For example, if we have many different observers collecting count or presence data, it may be appropriate to include observer as a random effect on the detection process.
-If we have multiple years of data at our sites, and are fitting a so-called "stacked" model where site-years are the response data instead of sites, it may be appropriate to include site as a random effect on the state process.
-Until recently, including random effects as part of the linear predictor for the state or detection parameter in a model was possible only by fitting the model in a Bayesian framework with e.g. WinBUGS or JAGS.
-
-The \code{unmarked} package now includes experimental support for fitting models with random effects, via the use of \href{https://kaskr.github.io/adcomp/Introduction.html}{Template Model Builder} (TMB).
-TMB uses Laplace approximation to estimate the random effects.
-Currently, only a few model types including single-season occupancy (\code{occu}) and $N$-mixture models (\code{pcount}) have random effects support, but more models may be supported in the future.
-
-\subsection{Caveats}
-
-The first caveat is that \code{unmarked} can only fit normally-distributed random effects with mean 0 on the link scale, and nested and correlated random effects are not supported.
-Second, we have found that estimation of random effects in \code{unmarked} via TMB works well in many cases, but for datasets with small sample sizes or sparse (many 0) observations, estimates are often poor and can result in misleading inference.
-We urge \code{unmarked} users to incorporate random effects with caution, and compare results to similar models without random effects.
-Some datasets may simply not be appropriate for models with random effects in \code{unmarked}.
-In these cases Bayesian methods may be more appropriate.
-
-\section{Example model with random effects}
-
-Below, we demonstrate fitting an $N$-mixture model via \code{pcount} to a dataset, including different combinations of random effects.
-
-\subsection{Simulating a dataset}
-
-We begin by simulating a count dataset, in which 100 sites have been visited 3 times per year in each of 3 years.
-Abundance will be a function of a single fixed-effect covariate, and we will consider site a random effect.
-We will simulate detection with a random observer effect.
-
-First, define the simulation parameters, covariate data, and the random effect values:
-
-<<>>=
-set.seed(35)
-nsites <- 100
-nyears <- 3
-nvisits <- 3
-
-# Abundance parameters
-beta0 <- 0 # Intercept
-beta1 <- 1 # fixed covariate slope
-sd_site <- 0.5 # SD of site-level random effect
-re_site <- rnorm(nsites, 0, sd_site) # simulate random effect
-
-# Detection parameters
-alpha0 <- 0 # Intercept
-sd_obs <- 0.3 # SD of observer-level random effect (20 unique observers)
-re_obs <- rnorm(20, 0, sd_obs) # simulate random effect
-
-# Covariates
-x <- rnorm(nsites*nyears) # a covariate on abundance
-site_id <- rep(1:100, each=3)
-obs_id <- sample(1:20, nsites*nyears*nvisits, replace=TRUE)
-@
-
-Next, calculate $\lambda$ for each site and simulate the actual abundance \code{N}:
-
-<<fig=TRUE>>=
-lambda <- exp(beta0 + beta1*x + # fixed part of linear predictor
- re_site[site_id]) # site random effect
-
-# Generate latent abundance N
-N <- rpois(nsites*nyears, lambda)
-hist(N)
-@
-
-Simulate \code{p} by observer, and using \code{N}, simulate the observed counts \code{y}:
-
-<<>>=
-p <- plogis(alpha0 + re_obs[obs_id])
-p <- matrix(p, nrow=nsites*nyears, ncol=nvisits, byrow=TRUE)
-
-y <- matrix(NA, nsites*nyears, nvisits)
-
-for (i in 1:(nsites*nyears)){
- for (j in 1:nvisits){
- y[i,j] <- rbinom(1, N[i], p[i,j])
- }
-}
-@
-
-Finally, organize the data into an \code{unmarkedFrame}.
-Note that we are specifying our random effect parameters as R factors.
-
-<<>>=
-library(unmarked)
-site_covs <- data.frame(x=x,
- site_id=factor(as.character(site_id)))
-obs_covs <- data.frame(obs_id=factor(as.character(obs_id)))
-umf <- unmarkedFramePCount(y=y, siteCovs=site_covs, obsCovs=obs_covs)
-@
-
-\subsection{Fitting models}
-
-First we will fit a model without random effects, including only the fixed effect covariate \code{x}.
-
-<<>>=
-mod_x <- pcount(~1~x, umf, K=40)
-summary(mod_x)
-@
-
-This model overestimates the abundance intercept (truth = 0) and underestimates the effect of \code{x} (truth = 1).
-Next, we will fit a model with random intercepts by site.
-This is specified in the abundance formula, using syntax similar to that found in package \code{lme4}.
-To specify random intercepts based on our site covariate \code{site\_id}, add \code{(1|site\_id)} to the abundance formula (the parentheses are required).
-You can read this as "the intercepts should be random based on \code{site\_id}".
-Note that the fixed effect \code{x} is outside the parentheses in the abundance formula.
-
-<<>>=
-mod_site <- pcount(~1~x+(1|site_id), umf, K=40)
-mod_site
-@
-
-In the summary output \code{unmarked} provides an estimate of the random effect SD, which is similar to the true value (0.5).
-The other abundance parameters are also now closer to their true values.
-Now we'll fit the model with random detection intercepts by observer, in addition to the site random effect.
-As before, we add \code{(1|obs\_id)} to the detection formula.
-
-<<>>=
-mod_obs <- pcount(~1 + (1|obs_id) ~ x + (1|site_id), umf, K=40)
-mod_obs
-@
-
-Both estimated random effect SDs are similar to the "true" values.
-
-\subsection{Model inference}
-
-To get more details, including 95\% CIs, on the random effects SDs, use the \code{sigma} function:
-
-<<>>=
-sigma(mod_obs)
-@
-
-We can also extract the actual random effect values using the \code{randomTerms} function.
-We'll extract the values for the abundance model:
-
-<<>>=
-head(randomTerms(mod_obs, "state"))
-@
-
-Note the they are sorted incorrectly because \code{site\_id} in this example, while numeric, is a factor so R sorts it like a character.
-Also note that these values are just the random part of the intercept - to get the complete intercept for each grouping level, we must add the mean intercept value (in this case 0.186).
-We can compare these estimates to the true values of the random intercepts.
-
-<<fig=TRUE>>=
-ran <- randomTerms(mod_obs, "state")
-ran <- ran[order(as.numeric(ran$Level)),] # sort them correctly
-ints <- coef(mod_obs)[1] + ran$Estimate # Calculate the total intercept for each level
-
-plot(re_site, ints, xlab="Truth", ylab="Estimate", pch=19)
-abline(a=0, b=1)
-@
-
-We can also use \code{predict} on models with random effects.
-A new argument is available, \code{re.form}, which specifies if the random effect(s) should be included when calculating the predicted estimate.
-By default, \code{re.form=NULL} meaning they are included; to exclude them set \code{re.form=NA}.
-These values are a bit confusing but they match the way it is done in \code{lme4}.
-
-<<>>=
-# with random effect
-head(predict(mod_obs, "det"))
-
-# without random effect
-head(predict(mod_obs, "det", re.form=NA))
-@
-
-In the latter case, all the estimates of $p$ are identical because there are no fixed covariates on $p$ in this model.
-
-\subsection{More complicated random effects structures}
-
-It is possible to include multiple random effects on a single parameter; for example to include both observer and site as random effects on $p$, the formula for $p$ would look like this:
-
-<<eval=FALSE>>=
-~1 + (1|obs_id) + (1|site_id)
-@
-
-Additionally it is possible to have random slopes as well as intercepts.
-For example, to also have random slopes for the covariate \code{x} by site, the formula for abundance would be
-
-<<eval=FALSE>>=
-~x + (1 + x || site_id)
-@
-
-The \code{||} (indicating no correlation estimated between the two random effects) is necessary instead of \code{|}, as \code{unmarked} does not support correlated random effects.
-
-\section{A note on model selection}
-
-As you can see above, \code{unmarked} returns an AIC value for models with random effects.
-This AIC value is calculated in the normal manner, with the number of parameters equal to the number of fixed parameters plus the number of random effects standard deviations.
-There isn't a consensus on how to calculate AIC for models with fixed and random effects.
-Thus, it is probably not a good idea to use AIC to compare between models with and without random effects, even though \code{unmarked} will allow you to do so with \code{fitList} and \code{modSel}.
-
-\end{document}
diff --git a/vignettes/simulate.Rmd b/vignettes/simulate.Rmd
new file mode 100644
index 0000000..5b7decf
--- /dev/null
+++ b/vignettes/simulate.Rmd
@@ -0,0 +1,274 @@
+---
+title: Simulating datasets
+author: Ken Kellner
+date: September 10, 2021
+bibliography: unmarked.bib
+csl: ecology.csl
+output:
+ rmarkdown::html_vignette:
+ fig_width: 5
+ fig_height: 3.5
+ number_sections: true
+ toc: true
+vignette: >
+ %\VignetteIndexEntry{Simulating datasets}
+ %\VignetteEngine{knitr::rmarkdown}
+ \usepackage[utf8]{inputenc}
+---
+
+# Introduction
+
+Simulating datasets is a powerful and varied tool when conducting `unmarked` analyses.
+Writing our own code to simulate a dataset based on a given model is an excellent learning tool, and can help us test if a given model is generating the expected results.
+If we simulate a series of datasets based on a fitted model, and calculate a statistic from each of those fits, we can generate a distribution of the statistic - this is what the `parboot` function does.
+This can be helpful, for example, when testing goodness of fit.
+Finally, simulation can be a useful component of power analysis when a closed-form equation for power is not available.
+
+`unmarked` provides two different ways of generating simulated datasets, depending on the stage we are at in the modeling process.
+
+1. Generating simulated datasets from a fitted model we already have
+2. Generating simulated datasets from scratch
+
+For (1), we simply call the `simulate` method on our fitted model object and new dataset(s) are generated.
+This is the approach used by `parboot`.
+In this vignette we will focus on (2), a more flexible approach to simulation, also using the `simulate` method, that allows us to generate a dataset corresponding to any `unmarked` model from scratch.
+
+# Components of a call to simulate
+
+We will need to provide, at a minimum, four pieces of information to `simulate` in order to simulate a dataset from scratch in `unmarked`.
+
+1. The name of the fitting function for the model we want to simulate from, as a character string
+2. A list of formulas, one per submodel, containing the names of the covariates we want to include in each
+3. A list of vectors of regression coefficients (intercepts and slopes), one per submodel, matching the formulas
+4. A list of design components; for example, the number of sites and number of observations per site
+
+A number of other arguments are available, e.g. for how to customize how the covariates are randomly generated or for distributions to use when simulating abundances.
+We'll show those later.
+The easiest way to demonstrate how to use `simulate` is to look at an example: we'll start with a simple one for occupancy.
+
+# Simulating an occupancy dataset
+
+Suppose we want to simulate an occupancy dataset in which site occupancy is affected by elevation.
+The first piece of information needed is the name of model to use: the fitting function for occupancy is `occu`, so the first argument to `simulate` and the name of the model will be `"occu"`.
+
+## Formulas
+
+Second we must define the desired model structure as a list of formulas, one per submodel.
+"Submodels" here are the hierarchical components of the model; for example, an occupancy model has a state (occupancy) submodel and an observation (detection) submodel.
+These submodels are identified by short names: `state` and `det`.
+We will use these short names repeatedly.
+In order to identify which submodels are needed and what their short names are, we can simply fit any model of that type (e.g. from the example) and call `names(model)`.
+
+```{r}
+set.seed(123)
+library(unmarked)
+umf <- unmarkedFrameOccu(y=matrix(c(0,1,0,1,1,0,0,0,1), nrow=3))
+mod <- occu(~1~1, umf)
+names(mod)
+```
+
+Formulas are supplied as a named list.
+The list has one element per submodel, and the names of the elements are the short names defined above.
+Each list element is a formula, containing the desired number of covariates to use, and the names of these covariates.
+Below we define our list of formulas, including an effect of elevation on occupancy (note we could name this whatever we want, here we call it `elev`).
+We don't want any covariates on detection probability, so the formula defines the model as intercept only: `~1`.
+
+```{r}
+forms <- list(state=~elev, det=~1)
+```
+
+## Regression coefficients
+
+Next we must tell `unmarked` what the values for the intercept and regression coefficients in each submodel should be.
+Once again, this is a named list, one element for each submodel.
+Each list element is a numeric vector.
+The components of each numeric vector must also be named, matching the covariate names in our list of formulas.
+Don't forget we also must specify a value for the intercept in each submodel (can be named `Intercept` or `intercept`).
+If we are not sure exactly how to structure this list, just skip it for now: `unmarked` can generate a template for us to fill in later.
+
+```{r}
+coefs <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
+```
+
+We have a list with two elements, each a numeric vector.
+Both contain intercept values, and the `state` vector also contains a value corresponding to the desired effect of our covariate `elev`.
+
+## Study design information
+
+Finally, we need to give `unmarked` information about the study design.
+This is pretty simple: we just need a list containing values for `M`, the number of sites, and `J` the number of surveys per site.
+For models with multiple primary periods, we'd also need a value of `T`, the number of primary periods.
+
+```{r}
+design <- list(M=300, J=8) # 300 sites, 8 occasions per site
+```
+
+## Put it all together
+
+We're now ready to simulate a dataset.
+To do this we use the `simulate` function, providing as arguments the name of the model `"occu"` and the three lists we constructed above.
+Actually, first, let's not supply the `coefs` list, to show how `unmarked` will generate a template for us to use:
+
+```{r, eval=FALSE}
+simulate("occu", formulas=forms, design=design)
+```
+
+```{r, echo=FALSE}
+try(simulate("occu", formulas=forms, design=design))
+```
+
+We can replicate this provided list structure and fill in our own numeric values.
+Once we have our coefficients set up properly, add them to the function call:
+
+```{r}
+occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design)
+head(occu_umf)
+```
+
+`unmarked` has generated a presence-absence dataset as well as values for covariate `elev`.
+We can check that it worked as expected by fitting the corresponding model to the dataset, and making sure the estimated values are similar:
+
+```{r}
+(occu(~1 ~elev, occu_umf))
+```
+
+## Customizing the covariates
+
+By default, a covariate will be continuous and come from a standard normal distribution (mean 0, SD 1).
+However, we can control this using the `guide` argument.
+For example, suppose we want elevation to come from a random normal, but with a mean of 2 and a standard deviation of 0.5.
+We can provide a named list to the `guide` argument as follows:
+
+```{r}
+guide <- list(elev=list(dist=rnorm, mean=2, sd=0.5))
+```
+
+`guide` contains one element, called `elev`, which is also a list and contains three components:
+
+1. The random distribution function to use, `rnorm`
+2. The mean of the distribution
+3. The SD of the distribution
+
+```{r}
+occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design, guide=guide)
+head(occu_umf)
+```
+
+You can see the `elev` covariate now has values corresponding to the desired distribution.
+Note that the elements of the list will depend on the arguments required by the random distribution function.
+For example, to use a uniform distribution instead:
+
+```{r}
+guide <- list(elev=list(dist=runif, min=0, max=1))
+occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design, guide=guide)
+head(occu_umf)
+```
+
+It is also possible to define a categorical (factor) covariate.
+We specify an entry in the `guide` list, but instead of a list, we supply a call to `factor` which defines the desired factor levels.
+For example, suppose we want to add a new `landcover` covariate to our simulated model.
+First, define the new formulas:
+
+```{r}
+forms2 <- list(state=~elev+landcover, det=~1)
+```
+
+And then the new guide, including the information about factor levels:
+
+```{r}
+guide <- list(landcover=factor(levels=c("forest","grass","urban")))
+```
+
+We'd also need an updated `coefs` since we have a new covariate.
+Defining the `coefs` when you have factors in your model is a little trickier, since R names the effects as a combination of the factor name and the level name.
+There is no coefficient for the reference level (`"forest"` in our example), but we need to provide coefficients for both `"grass"` and `"urban"`.
+When combined with the factor name the complete coefficient names for these two will be `landcovergrass` and `landcoverurban`.
+The easiest way to make sure we get these names right is to let `unmarked` generate a template `coefs` for you as shown above, and then fill it in.
+
+```{r}
+# forest is the reference level for landcover since it was listed first
+coefs2 <- list(state=c(intercept=0, elev=-0.4, landcovergrass=0.2,
+ landcoverurban=-0.7), det=c(intercept=0))
+```
+
+```{r}
+head(simulate("occu", formulas=forms2, coefs=coefs2, design=design, guide=guide))
+```
+
+Our output dataset now includes a new categorical covariate.
+
+## Models that require more information
+
+More complex models might require more information for simulation.
+Nearly any argument provided to either the fitting function for the model, or the corresponding `unmarkedFrame` constructor, can be provided as an optional argument to `simulate` to customize the simulation.
+For example, we may want to specify that abundance should be simulated as a negative binomial, instead of a Poisson, for `pcount`.
+This information is simply added as additional arguments to `simulate`.
+For example, we can simulate a `pcount` dataset using the negative binomial (`"NB"`) distribution.
+The negative binomial has an additional parameter to estimate (`alpha`) so we must also add an element to `coefs`.
+
+```{r}
+coefs$alpha <- c(alpha=0.5)
+head(simulate("pcount", formulas=forms, coefs=coefs, design=design, mixture="NB"))
+```
+
+In the next section we will show a more detailed example involving these additional arguments.
+
+## Simulating a more complex dataset: gdistremoval
+
+The `gdistremoval` function fits the model of @Amundson_2014, which estimates abundance using a combination of distance sampling and removal sampling data.
+When simulating a dataset based on this model, we have to provide several additional pieces of information related to the structure of the distance and removal sampling analyses.
+
+To begin, we will define the list of formulas.
+A `gdistremoval` model, when there is only one primary period, has three submodels: abundance (`"lambda"`), distance sampling (`"dist"`), and removal sampling (`"rem"`).
+We will fit a model with an effect of elevation `elev` on abundance and an effect of wind `wind` on removal probability.
+
+```{r}
+forms <- list(lambda=~elev, dist=~1, rem=~wind)
+```
+
+Next we will define the corresponding coefficients.
+We will set mean abundance at 5.
+The intercept is on the log scale, thus the intercept for `lambda` will be `log(5)`.
+The scale parameter for the detection function will be 50, and again it is on the log scale.
+The intercept for the removal probability is on the logit scale, so we will set the intercept at -1 (equivalent to a mean removal probability of about 0.27).
+Don't forget the covariate effects on `lambda` and removal.
+
+```{r}
+coefs <- list(lambda=c(intercept=log(5), elev=0.7),
+ dist=c(intercept=log(50)), rem=c(intercept=-1, wind=-0.3))
+```
+
+Our study will have 300 sites.
+This model is unique in that we have to specify the number of two different types of observations: (1) the number of distance sampling bins (`Jdist`), and the number of removal intervals (`Jrem`).
+
+```{r}
+design <- list(M = 300, Jdist=4, Jrem=5)
+```
+
+Finally we are ready to simulate the dataset.
+In addition to the name of the model, `forms`, `coefs` and `design`, we also need to provide some additional information.
+We need to define the distance breaks for the distance sampling part of the model (there should be `Jdist+1` of these), and also the key function to use when simulating the detection process.
+
+```{r}
+umf <- simulate("gdistremoval", formulas=forms, coefs=coefs, design=design,
+ dist.breaks=c(0,25,50,75,100), keyfun="halfnorm", unitsIn="m")
+head(umf)
+```
+
+The result is a dataset containing a combination of distance, removal, and covariate data.
+We can check to see if fitting a model to this dataset recovers our specified coefficient values:
+
+```{r}
+(fit <- gdistremoval(lambdaformula=~elev, removalformula=~wind,
+ distanceformula=~1, data=umf))
+```
+
+Looks good.
+
+# Conclusion
+
+The `simulate` function provides a flexible tool for simulating data from any model in `unmarked`.
+These datasets can be used for a variety of purposes, such as for teaching examples, testing models, or developing new tools that work with `unmarked`.
+Additionally, simulating datasets is a key component of the power analysis workflow in `unmarked` - see the power analysis vignette for more examples.
+
+# References
diff --git a/vignettes/simulate.Rnw b/vignettes/simulate.Rnw
deleted file mode 100644
index c2f5f3c..0000000
--- a/vignettes/simulate.Rnw
+++ /dev/null
@@ -1,318 +0,0 @@
-<<echo=false>>=
-options(width=70)
-options(continue=" ")
-@
-
-\documentclass[a4paper]{article}
-\usepackage[OT1]{fontenc}
-\usepackage{Sweave}
-\usepackage{natbib}
-%\usepackage{fullpage}
-\usepackage[vmargin=1in,hmargin=1in]{geometry}
-\bibliographystyle{ecology}
-
-\usepackage{hyperref}
-\hypersetup{
- colorlinks=true,
- linkcolor=blue,
- urlcolor=cyan,
- citecolor=black
-}
-
-\DefineVerbatimEnvironment{Sinput}{Verbatim} {xleftmargin=2em}
-\DefineVerbatimEnvironment{Soutput}{Verbatim}{xleftmargin=2em}
-\DefineVerbatimEnvironment{Scode}{Verbatim}{xleftmargin=2em}
-\fvset{listparameters={\setlength{\topsep}{0pt}}}
-\renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}}
-
-%%\VignetteIndexEntry{Simulating datasets}
-
-\title{Simulating datasets}
-\author{Ken Kellner}
-\date{September 10, 2021}
-
-
-\begin{document}
-
-\newcommand{\code}[1]{\texttt{\small{#1}}}
-\newcommand{\package}[1]{\textsf{\small{#1}}}
-
-\maketitle
-
-\section{Outline}
-
-\begin{enumerate}
- \item Introduction
- \item Components of a call to \code{simulate}
- \item Simulating an occupancy dataset
- \item Simulating a more complex dataset: \code{gdistremoval}
- \item Conclusion
-\end{enumerate}
-
-\section{Introduction}
-
-Simulating datasets is a powerful and varied tool when conducting \code{unmarked} analyses.
-Writing our own code to simulate a dataset based on a given model is an excellent learning tool, and can help us test if a given model is generating the expected results.
-If we simulate a series of datasets based on a fitted model, and calculate a statistic from each of those fits, we can generate a distribution of the statistic - this is what the \code{parboot} function does.
-This can be helpful, for example, when testing goodness of fit.
-Finally, simulation can be a useful component of power analysis when a closed-form equation for power is not available.
-
-\code{unmarked} provides two different ways of generating simulated datasets, depending on the stage we are at in the modeling process.
-
-\begin{enumerate}
- \item Generating simulated datasets from a fitted model we already have
- \item Generating simulated datasets from scratch
-\end{enumerate}
-
-For (1), we simply call the \code{simulate} method on our fitted model object and new dataset(s) are generated.
-This is the approach used by \code{parboot}.
-In this vignette we will focus on (2), a more flexible approach to simulation, also using the \code{simulate} method, that allows us to generate a dataset corresponding to any \code{unmarked} model from scratch.
-
-\section{Components of a call to simulate}
-
-We will need to provide, at a minimum, four pieces of information to \code{simulate} in order to simulate a dataset from scratch in \code{unmarked}.
-
-\begin{enumerate}
- \item The name of the fitting function for the model we want to simulate from, as a character string
- \item A list of formulas, one per submodel, containing the names of the covariates we want to include in each
- \item A list of vectors of regression coefficients (intercepts and slopes), one per submodel, matching the formulas
- \item A list of design components; for example, the number of sites and number of observations per site
-\end{enumerate}
-
-A number of other arguments are available, e.g. for how to customize how the covariates are randomly generated or for distributions to use when simulating abundances.
-We'll show those later.
-The easiest way to demonstrate how to use \code{simulate} is to look at an example: we'll start with a simple one for occupancy.
-
-\section{Simulating an occupancy dataset}
-
-Suppose we want to simulate an occupancy dataset in which site occupancy is affected by elevation.
-The first piece of information needed is the name of model to use: the fitting function for occupancy is \code{occu}, so the first argument to \code{simulate} and the name of the model will be \code{"occu"}.
-
-\subsection{Formulas}
-
-Second we must define the desired model structure as a list of formulas, one per submodel.
-"Submodels" here are the hierarchical components of the model; for example, an occupancy model has a state (occupancy) submodel and an observation (detection) submodel.
-These submodels are identified by short names: \code{state} and \code{det}.
-We will use these short names repeatedly.
-In order to identify which submodels are needed and what their short names are, we can simply fit any model of that type (e.g. from the example) and call \code{names(model)}.
-
-<<echo=FALSE>>=
-set.seed(123)
-@
-
-<<>>=
-library(unmarked)
-umf <- unmarkedFrameOccu(y=matrix(c(0,1,0,1,1,0,0,0,1), nrow=3))
-mod <- occu(~1~1, umf)
-names(mod)
-@
-
-Formulas are supplied as a named list.
-The list has one element per submodel, and the names of the elements are the short names defined above.
-Each list element is a formula, containing the desired number of covariates to use, and the names of these covariates.
-Below we define our list of formulas, including an effect of elevation on occupancy (note we could name this whatever we want, here we call it \code{elev}).
-We don't want any covariates on detection probability, so the formula defines the model as intercept only: \code{~1}.
-
-<<>>=
-forms <- list(state=~elev, det=~1)
-@
-
-\subsection{Regression coefficients}
-
-Next we must tell \code{unmarked} what the values for the intercept and regression coefficients in each submodel should be.
-Once again, this is a named list, one element for each submodel.
-Each list element is a numeric vector.
-The components of each numeric vector must also be named, matching the covariate names in our list of formulas.
-Don't forget we also must specify a value for the intercept in each submodel (can be named \code{Intercept} or \code{intercept}).
-If we are not sure exactly how to structure this list, just skip it for now: \code{unmarked} can generate a template for us to fill in later.
-
-<<>>=
-coefs <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
-@
-
-We have a list with two elements, each a numeric vector.
-Both contain intercept values, and the \code{state} vector also contains a value corresponding to the desired effect of our covariate \code{elev}.
-
-\subsection{Study design information}
-
-Finally, we need to give \code{unmarked} information about the study design.
-This is pretty simple: we just need a list containing values for \code{M}, the number of sites, and \code{J} the number of surveys per site.
-For models with multiple primary periods, we'd also need a value of \code{T}, the number of primary periods.
-
-<<>>=
-design <- list(M=300, J=8) # 300 sites, 8 occasions per site
-@
-
-\subsection{Put it all together}
-
-We're now ready to simulate a dataset.
-To do this we use the \code{simulate} function, providing as arguments the name of the model \code{"occu"} and the three lists we constructed above.
-Actually, first, let's not supply the \code{coefs} list, to show how \code{unmarked} will generate a template for us to use:
-
-<<eval=FALSE>>=
-simulate("occu", formulas=forms, design=design)
-@
-
-<<echo=FALSE>>=
-try(simulate("occu", formulas=forms, design=design))
-@
-
-We can replicate this provided list structure and fill in our own numeric values.
-Once we have our coefficients set up properly, add them to the function call:
-
-<<>>=
-occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design)
-head(occu_umf)
-@
-
-\code{unmarked} has generated a presence-absence dataset as well as values for covariate \code{elev}.
-We can check that it worked as expected by fitting the corresponding model to the dataset, and making sure the estimated values are similar:
-
-<<>>=
-(occu(~1 ~elev, occu_umf))
-@
-
-\subsection{Customizing the covariates}
-
-By default, a covariate will be continuous and come from a standard normal distribution (mean 0, SD 1).
-However, we can control this using the \code{guide} argument.
-For example, suppose we want elevation to come from a random normal, but with a mean of 2 and a standard deviation of 0.5.
-We can provide a named list to the \code{guide} argument as follows:
-
-<<>>=
-guide <- list(elev=list(dist=rnorm, mean=2, sd=0.5))
-@
-
-\code{guide} contains one element, called \code{elev}, which is also a list and contains three components:
-
-\begin{enumerate}
- \item{The random distribution function to use, \code{rnorm}}
- \item{The mean of the distribution}
- \item{The SD of the distribution}
-\end{enumerate}
-
-<<>>=
-occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design, guide=guide)
-head(occu_umf)
-@
-
-You can see the \code{elev} covariate now has values corresponding to the desired distribution.
-Note that the elements of the list will depend on the arguments required by the random distribution function.
-For example, to use a uniform distribution instead:
-
-<<>>=
-guide <- list(elev=list(dist=runif, min=0, max=1))
-occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design, guide=guide)
-head(occu_umf)
-@
-
-It is also possible to define a categorical (factor) covariate.
-We specify an entry in the \code{guide} list, but instead of a list, we supply a call to \code{factor} which defines the desired factor levels.
-For example, suppose we want to add a new \code{landcover} covariate to our simulated model.
-First, define the new formulas:
-
-<<>>=
-forms2 <- list(state=~elev+landcover, det=~1)
-@
-
-And then the new guide, including the information about factor levels:
-
-<<>>=
-guide <- list(landcover=factor(levels=c("forest","grass","urban")))
-@
-
-We'd also need an updated \code{coefs} since we have a new covariate.
-Defining the \code{coefs} when you have factors in your model is a little trickier, since R names the effects as a combination of the factor name and the level name.
-There is no coefficient for the reference level (\code{"forest"} in our example), but we need to provide coefficients for both \code{"grass"} and \code{"urban"}.
-When combined with the factor name the complete coefficient names for these two will be \code{landcovergrass} and \code{landcoverurban}.
-The easiest way to make sure we get these names right is to let \code{unmarked} generate a template \code{coefs} for you as shown above, and then fill it in.
-
-<<>>=
-# forest is the reference level for landcover since it was listed first
-coefs2 <- list(state=c(intercept=0, elev=-0.4, landcovergrass=0.2,
- landcoverurban=-0.7), det=c(intercept=0))
-@
-
-<<>>=
-head(simulate("occu", formulas=forms2, coefs=coefs2, design=design, guide=guide))
-@
-
-Our output dataset now includes a new categorical covariate.
-
-\subsection{Models that require more information}
-
-More complex models might require more information for simulation.
-Nearly any argument provided to either the fitting function for the model, or the corresponding \code{unmarkedFrame} constructor, can be provided as an optional argument to \code{simulate} to customize the simulation.
-For example, we may want to specify that abundance should be simulated as a negative binomial, instead of a Poisson, for \code{pcount}.
-This information is simply added as additional arguments to \code{simulate}.
-For example, we can simulate a \code{pcount} dataset using the negative binomial (\code{"NB"}) distribution.
-The negative binomial has an additional parameter to estimate (\code{alpha}) so we must also add an element to \code{coefs}.
-
-<<>>=
-coefs$alpha <- c(alpha=0.5)
-head(simulate("pcount", formulas=forms, coefs=coefs, design=design, mixture="NB"))
-@
-
-In the next section we will show a more detailed example involving these additional arguments.
-
-\section{Simulating a more complex dataset: gdistremoval}
-
-The \code{gdistremoval} function fits the model of \cite{Amundson_2014}, which estimates abundance using a combination of distance sampling and removal sampling data.
-When simulating a dataset based on this model, we have to provide several additional pieces of information related to the structure of the distance and removal sampling analyses.
-
-To begin, we will define the list of formulas.
-A \code{gdistremoval} model, when there is only one primary period, has three submodels: abundance (\code{"lambda"}), distance sampling (\code{"dist"}), and removal sampling (\code{"rem"}).
-We will fit a model with an effect of elevation \code{elev} on abundance and an effect of wind \code{wind} on removal probability.
-
-<<>>=
-forms <- list(lambda=~elev, dist=~1, rem=~wind)
-@
-
-Next we will define the corresponding coefficients.
-We will set mean abundance at 5.
-The intercept is on the log scale, thus the intercept for \code{lambda} will be \code{log(5)}.
-The scale parameter for the detection function will be 50, and again it is on the log scale.
-The intercept for the removal probability is on the logit scale, so we will set the intercept at -1 (equivalent to a mean removal probability of about 0.27).
-Don't forget the covariate effects on \code{lambda} and removal.
-
-<<>>=
-coefs <- list(lambda=c(intercept=log(5), elev=0.7),
- dist=c(intercept=log(50)), rem=c(intercept=-1, wind=-0.3))
-@
-
-Our study will have 300 sites.
-This model is unique in that we have to specify the number of two different types of observations: (1) the number of distance sampling bins (\code{Jdist}), and the number of removal intervals (\code{Jrem}).
-
-<<>>=
-design <- list(M = 300, Jdist=4, Jrem=5)
-@
-
-Finally we are ready to simulate the dataset.
-In addition to the name of the model, \code{forms}, \code{coefs} and \code{design}, we also need to provide some additional information.
-We need to define the distance breaks for the distance sampling part of the model (there should be \code{Jdist+1} of these), and also the key function to use when simulating the detection process.
-
-<<>>=
-umf <- simulate("gdistremoval", formulas=forms, coefs=coefs, design=design,
- dist.breaks=c(0,25,50,75,100), keyfun="halfnorm", unitsIn="m")
-head(umf)
-@
-
-The result is a dataset containing a combination of distance, removal, and covariate data.
-We can check to see if fitting a model to this dataset recovers our specified coefficient values:
-
-<<>>=
-(fit <- gdistremoval(lambdaformula=~elev, removalformula=~wind,
- distanceformula=~1, data=umf))
-@
-
-Looks good.
-
-\section{Conclusion}
-
-The \code{simulate} function provides a flexible tool for simulating data from any model in \code{unmarked}.
-These datasets can be used for a variety of purposes, such as for teaching examples, testing models, or developing new tools that work with \code{unmarked}.
-Additionally, simulating datasets is a key component of the power analysis workflow in \code{unmarked} - see the power analysis vignette for more examples.
-
-\bibliography{unmarked}
-
-\end{document}
diff --git a/vignettes/spp-dist-psi2.pdf b/vignettes/spp-dist-psi2.pdf
deleted file mode 100644
index 745774b..0000000
--- a/vignettes/spp-dist-psi2.pdf
+++ /dev/null
Binary files differ
diff --git a/vignettes/spp-dist.Rnw b/vignettes/spp-dist.Rmd
index b533891..8157e75 100644
--- a/vignettes/spp-dist.Rnw
+++ b/vignettes/spp-dist.Rmd
@@ -1,91 +1,67 @@
-<<echo=false>>=
-options(width=70)
-options(continue=" ")
-@
+---
+title: Modeling and mapping species distributions
+author: Richard Chandler
+date: Feb 5, 2019
+bibliography: unmarked.bib
+csl: ecology.csl
+output:
+ rmarkdown::html_vignette:
+ fig_width: 5
+ fig_height: 3.5
+ number_sections: true
+ toc: true
+vignette: >
+ %\VignetteIndexEntry{Species distributions}
+ %\VignetteEngine{knitr::rmarkdown}
+ \usepackage[utf8]{inputenc}
+---
+
+```{r,echo=FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+```
+
+# Abstract
-\documentclass[a4paper]{article}
-\usepackage[OT1]{fontenc}
-\usepackage{Sweave}
-\usepackage[authoryear,round]{natbib}
-%\usepackage{fullpage}
-\usepackage[vmargin=1in,hmargin=1in]{geometry}
-\usepackage{verbatim}
-\usepackage{color}
-
-%\usepackage[a4paper, hmargin={2cm,2cm}, vmargin={2cm,2cm}]{geometry}
-
-
-\bibliographystyle{ecology}
-
-\DefineVerbatimEnvironment{Sinput}{Verbatim} {xleftmargin=2em}
-\DefineVerbatimEnvironment{Soutput}{Verbatim}{xleftmargin=2em}
-\DefineVerbatimEnvironment{Scode}{Verbatim}{xleftmargin=2em}
-\fvset{listparameters={\setlength{\topsep}{0pt}}}
-\renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}}
-
-%%\VignetteIndexEntry{Species distributions}
-
-\title{Modeling and mapping species distributions}
-\author{Richard Chandler}
-\date{Feb 5, 2019}
-
-
-\begin{document}
-
-\maketitle
-
-\abstract{
A species' distribution can be characterized by either
occurrence probability or population density, defined for all
locations in some spatial extent. Defining distribution in terms of
these two parameters %These definitions of species distribution
avoids the ambiguity surrounding the indices of occurrence
or abundance produced by many presence-only algorithms. The
-\texttt{unmarked} package contains methods of fitting
+`unmarked` package contains methods of fitting
occurrence and abundance models, and can be used to
-produce distribution maps with the help of \textbf{R}'s GIS
+produce distribution maps with the help of `R`'s GIS
capabilities,
-%, such as the \texttt{raster} package
-%\citep{hijmans_vanEtten:2012}
as is demonstrated in this vignette.
Unlike many other tools for modeling
-species distributions, the models in \texttt{unmarked} account for
+species distributions, the models in `unmarked` account for
bias due to spatial and temporal heterogeneity in detection
-probability. Furthermore, \texttt{unmarked} includes models
+probability. Furthermore, `unmarked` includes models
of population dynamics, allowing one to map quantities
such as local colonization or extinction probability.
-}
+# Mapping Occurrence Probability
-<<echo=false>>=
-library(unmarked)
-library(raster)
-@
-
-
-\section*{Mapping Occurrence Probability}
-
-
-
-In this example, we use the \verb+occu+ function to fit the
+In this example, we use the `occu` function to fit the
single-season occupancy model of
-\citep{mackenzie_estimating_2002} to data on the European crossbill
-(\emph{Loxia curvirostra}) collected in 267 1-km$^2$ sample
-quadrats in Switzerland, 1999 \citep{schmid_etal:2004}.
+@mackenzie_estimating_2002 to data on the European crossbill
+(*Loxia curvirostra*) collected in 267 1-km$^2$ sample
+quadrats in Switzerland, 1999 [@schmid_etal:2004].
We then use the model to compute the expected probability of
occurrence at each pixel in a raster defining the Swiss
landscape.
-First we load the \verb+crossbill+ data, which contains the
+First we load the `crossbill` data, which contains the
detection/non-detection data and covariates. The dataset actually
contains data from multiple years, but we are only going to analyze
data from the first year, 1999. A multi-year analysis of occupancy
-dynamics could be accomplished using the \verb+colext+ function, and
+dynamics could be accomplished using the `colext` function, and
in addition to mapping occurrence probability, it would be possible to
also map colonization and extinction probabilities. The following
commands format the data.
-<<>>=
+```{r}
+library(unmarked)
data(crossbill)
umf <- unmarkedFrameOccu(
y=as.matrix(crossbill[,c("det991", "det992", "det993")]),
@@ -94,68 +70,85 @@ umf <- unmarkedFrameOccu(
sc <- scale(siteCovs(umf))
siteCovs(umf) <- sc
head(umf)
-@
+```
Notice that the site covariates, elevation and forest, were
-standardized using the \verb+scale+ function. Standardization isn't
+standardized using the `scale` function. Standardization isn't
always necessary, but it can make it easier to find the maximum
likelihood estimates. When standardizing covariates and then making
predictions, it is important to retain the original sample mean and
standard deviation. The reason for this is explained below.
-\newpage
-
Fitting a model is now straight-forward. In many cases, we would fit
several models corresponding to competing hypotheses, but for
simplicity, we stick with this single model.
-<<>>=
+```{r}
(fm.occu <- occu(~date ~ele + I(ele^2) + forest, umf))
-@
+```
Now that we have our fitted model, we can use it to predict occurrence
-probability at each pixel in the Swiss landscape. The \verb+Switzerland+
+probability at each pixel in the Swiss landscape. The `Switzerland`
dataset contains country-wide data. There are many ways to display
it---here is an example of mapping elevation using the
-\verb+levelplot+ function in the \texttt{lattice} package \citep{sarkar:2008}.
+`levelplot` function in the `lattice` package [@sarkar:2008].
-<<swiss,fig=TRUE,include=FALSE,width=5,height=4>>=
+```{r, eval=FALSE}
library(lattice)
data(Switzerland)
print(levelplot(elevation ~ x + y, Switzerland, aspect="iso",
xlab="Easting (m)", ylab="Northing (m)",
col.regions=terrain.colors(100)))
-@
-\begin{figure}
- \centering
- \includegraphics[width=5in,height=4in]{spp-dist-swiss}
- \caption{Elevation in Switzerland}
- \label{fig:swiss}
-\end{figure}
-
-The \texttt{raster} package \citep{hijmans_vanEtten:2012}
+```
+
+```{r, echo=FALSE, fig.height=4, fig.width=5, fig.cap="Figure 1. Elevation in Switzerland"}
+if(requireNamespace("lattice", quietly = TRUE)){
+ library(lattice)
+ data(Switzerland)
+ print(levelplot(elevation ~ x + y, Switzerland, aspect="iso",
+ xlab="Easting (m)", ylab="Northing (m)",
+ col.regions=terrain.colors(100)))
+} else {
+ message("lattice package is required for this vignette but is not available\n")
+ knitr::knit_exit()
+}
+```
+
+The `raster` package [@hijmans_vanEtten:2012]
provides another alternative. Here we create two raster objects and
specify the coordinate system.
-<<>>=
+```{r,eval=FALSE}
library(raster)
+```
+
+```{r,echo=FALSE}
+if(requireNamespace("raster", quietly = TRUE)){
+ suppressMessages(library(raster))
+} else {
+ message("raster package is required for this vignette but is not available\n")
+ knitr::knit_exit()
+}
+```
+
+```{r}
elevation <- rasterFromXYZ(Switzerland[,c("x","y","elevation")],
crs="+proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=600000 +y_0=200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs")
forest <- rasterFromXYZ(Switzerland[,c("x","y","forest")],
crs="+proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=600000 +y_0=200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs")
-@
+```
Since we standardized the covariates during the model fitting process,
we need to transform the country-wide data using the same
values. Note, we don't want to use the mean and SD of the rasters
themselves, we want to use the mean and SD of the original covariates
used to fit the models, which are stored as attributes of the
-\verb+sc+ object. The following commands display the original means
+`sc` object. The following commands display the original means
and SDs and then transform the rasters and join them in a raster
-``stack.''
+"stack".
-<<ef,fig=TRUE,include=FALSE,height=3,width=6>>=
+```{r, fig.height=3, fig.width=6, fig.cap="Figure 2. Elevation and forest cover, standardized"}
attr(sc, "scaled:center")
attr(sc, "scaled:scale")
ele.s <- (elevation-1189)/640
@@ -163,114 +156,85 @@ forest.s <- (forest-34.7)/27.7
ef <- stack(ele.s, forest.s)
names(ef) <- c("ele", "forest")
plot(ef, col=terrain.colors(100))
-@
-\begin{figure}
- \centering
- \includegraphics[width=6in,height=3in]{spp-dist-ef}
- \caption{Elevation and forest cover, standardized.}
-\label{fig:ef}
-\end{figure}
-
-It is important to assign \verb+names+
+```
+
+It is important to assign `names`
that exactly match the covariate names used to fit the model. This
-is required by the \verb+predict+ function as demonstrated later.
-The \verb+predict+ function is useful for computing
+is required by the `predict` function as demonstrated later.
+The `predict` function is useful for computing
spatially-referenced model predictions, standard errors, and
confidence intervals, but it is computationally demanding when
there are many pixels in the raster. Thus, if measures of uncertainty
are not required, the following code can be used to quickly produce
-the species distribution map shown in Fig.\ref{fig:psi1}.
+the species distribution map shown in Fig. 3.
-<<psi,fig=TRUE,include=FALSE>>=
+```{r, fig.height=4, fig.width=4, fig.cap="Figure 3. A species distribution map for the European crossbill in Switzerland. The colors represent occurrence probability."}
(beta <- coef(fm.occu, type="state"))
logit.psi <- beta[1] + beta[2]*ele.s + beta[3]*ele.s^2 + beta[4]*forest.s
psi <- exp(logit.psi) / (1 + exp(logit.psi))
-#plot(psi, col=terrain.colors(100))
print(spplot(psi, col.regions=terrain.colors(100)))
-@
-\begin{figure}
- \includegraphics[width=4in,height=4in]{spp-dist-psi}
- \centering
- \caption{A species distribution map for the European crossbill in
- Switzerland. The colors represent occurrence probability.}
-\label{fig:psi1}
-\end{figure}
-
-As of version 0.9-6, the \verb+predict+ method in \texttt{unmarked}
-can make predictions using an object of class \verb+RasterStack+ from the
+```
+
+As of version 0.9-6, the `predict` method in `unmarked`
+can make predictions using an object of class `RasterStack` from the
\texttt{raster} package. As mentioned previously, the rasters must be
-named, perhaps by using the \verb+names(someraster) <- somename+
+named, perhaps by using the `names(someraster) <- somename`
method. The object
-returned by \verb+predict+ is another raster stack with rasters for
+returned by `predict` is another raster stack with rasters for
the expected values of the parameter of interest, the standard errors,
and the upper and lower confidence intervals. The following example
is very slow because there are many of pixels in the raster. The
-resulting map is shown in Fig.~\ref{fig:predict}.
+resulting map is shown in Fig. 4.
-<<psi2,eval=false,echo=false,fig=TRUE,include=FALSE>>=
+```{r, fig.height=5, fig.width=5, fig.cap="Figure 4. Expected occurrence probability along with standard errors and the limits of the asymptotic 95% confidence interval."}
E.psi <- predict(fm.occu, type="state", newdata=ef)
plot(E.psi, axes=FALSE, col=terrain.colors(100))
-@
-\begin{Schunk}
-\begin{Sinput}
-> E.psi <- predict(fm.occu, type="state", newdata=ef)
-\end{Sinput}
-\begin{Sinput}
-> plot(E.psi, axes=FALSE, col=terrain.colors(100))
-\end{Sinput}
-\end{Schunk}
-\begin{figure}%[b!]
- \centering
-\includegraphics[width=5in,height=5in]{spp-dist-psi2}
-\caption{Expected occurrence probability along with standard errors
- and the limits of the asymptotic 95\% confidence interval.}
-\label{fig:predict}
-\end{figure}
+```
Users should be cautious when predicting from models that have
-categorical predictor variables, \emph{i.e.} \verb+factor+s. The
+categorical predictor variables, i.e. `factor`s. The
\texttt{raster} package does not have advanced methods for handling
factors, and thus it is not easy to automatically create dummy
variables from them as can typically be done using
-\verb+model.matrix+. The safest option is to create the dummy
+`model.matrix`. The safest option is to create the dummy
variables manually before fitting the models, and to use the same
variables as rasters for prediction.
A more important consideration when creating species distribution maps
based upon occurrence probability is that of spatial scale. Occurrence
-probability will typically depend upon the area of the ``site'' in
+probability will typically depend upon the area of the "site" in
question. Thus, in our crossbill example, it would not be appropriate
-to use our model to predict occcurrence probability for 10-km$^2$
+to use our model to predict occurrence probability for 10-km$^2$
pixels since the surveys were done in 1-km$^2$ quadrats. In some
cases it might be possible to directly model the effect of site area
on occurrence probability, in which case the effect could be accounted
for in the predictions.
-\section*{Mapping Population Density}
+# Mapping Population Density
Although distribution is typically described in terms of
-ocurrence probability, which is always better than an index of
+occurrence probability, which is always better than an index of
occurrence probability, the best parameter for modeling species
distribution is population density because density allows for
-inference about popualation size in any region of
+inference about population size in any region of
the species' range. Furthermore, occurrence probability is simply the
-probablity that abundance is greater than 0, so with density/abundance
-estimates, it is always possible to compute occurrence probablity as a
+probability that abundance is greater than 0, so with density/abundance
+estimates, it is always possible to compute occurrence probability as a
derived parameter.
In this example, we create a distribution map for the Island Scrub-Jay
-(\textit{Aphelocoma insularis}), which is restricted to Santa Cruz
+(*Aphelocoma insularis*), which is restricted to Santa Cruz
Island, California. To do so, we fit the hierarchical distance
-sampling model of \citet{royle_modeling_2004}, which allows for the
+sampling model of @royle_modeling_2004, which allows for the
estimation of abundance in each of the $300 \times 300$m pixels
representing the island. The data were collected 307, 300-m radius
-point count (or ``point transect'') surveyed during the Fall of 2008.
+point count (or "point transect") surveyed during the Fall of 2008.
-{\color{red} Important} This analysis is for demonstration
+IMPORTANT: This analysis is for demonstration
purposes only, and the estimates of population size should not be used
for conservation or management purposes. Indeed, the Poisson
assumption used here was found to be inadequate by
-\citet{sillett_etal:2012} who conducted a rigorous analysis and
+@sillett_etal:2012 who conducted a rigorous analysis and
reported reliable estimate of population size.
Although we are fitting a model of population density, the steps of
@@ -282,7 +246,7 @@ chapararral cover. We also include include the area of the survey
plots in the analysis so that we can make predictions for regions of
any area. Here is the code to format the data and fit the model.
-<<>>=
+```{r}
data(issj)
covs <- scale(issj[,c("elevation", "forest", "chaparral")])
area <- pi*300^2 / 10000
@@ -295,46 +259,20 @@ fm1 <- distsamp(~chaparral ~chaparral + elevation + offset(log(area)),
jayumf, keyfun="halfnorm", output="abund",
starts=c(-2.8,1,0,4.5,0))
fm1
-@
+```
Remarks. 1) The distance data were binned into 3 distance classes. 2)
-We used \verb+output="abund"+ even though, by specifying the offset,
+We used `output="abund"` even though, by specifying the offset,
we effectively modeled population density. As stated previously, this
allows us to make predictions of abundance for regions of arbitrary size.
-
-
-\begin{comment}
-
-<<fig=TRUE,width=6,height=4>>=
-data(issj)
-data(cruz)
-elev <- rasterFromXYZ(cruz[,c("x","y","elevation")],
- crs="+proj=utm +zone=11 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")
-#plot(elev, col=terrain.colors(100))
-#points(issj[,c("x","y")], cex=0.5)
-@
-print(
-wireframe(elevation ~ x + y, cruz, drape=TRUE,
- screen=list(z=10, x=-10),
- aspect=0.5, xlab="", ylab="", zlab="",
-# xlim=c(229900,267000), ylim=c(3762000,3770000),
- par.settings = list(axis.line = list(col = "transparent")),
- par.box = c(col = "transparent"),
- col.regions=terrain.colors(100),
- colorkey=FALSE)
-)
-
-\end{comment}
-
-
The next thing to do is to format the raster data. For details, see
-the previous section---the process is the same, except that we need a
-raster for ``area'', the size of each pixel in the raster data. This
+the previous section-the process is the same, except that we need a
+raster for `"area"`, the size of each pixel in the raster data. This
is necessary because the survey plots were larger than the pixels for
which we want predictions of abundance.
-<<>>=
+```{r}
data(cruz)
elev <- rasterFromXYZ(cruz[,c("x","y","elevation")],
crs="+proj=utm +zone=11 +ellps=GRS80 +datum=NAD83 +units=m +no_defs")
@@ -351,53 +289,18 @@ forest.s <- (forest-0.0673)/0.137
chap.s <- (chap-0.270)/0.234
habitat <- stack(elev.s, forest.s, chap.s, area.raster)
names(habitat) <- c("elevation", "forest", "chaparral", "area")
-@
-
+```
-Now, when we use \verb+predict+, it will return the expected number of
+Now, when we use `predict`, it will return the expected number of
jays in each pixel along with the standard errors and the 95\%
confidence intervals. We could sum these up to obtain an estimate of
-total population size. \citet{sillett_etal:2012} did this and used the
-parametric boostrap to estimate the variance of total population
+total population size. @sillett_etal:2012 did this and used the
+parametric bootstrap to estimate the variance of total population
size.
-<<issj,fig=TRUE,include=FALSE,width=6,height=5>>=
+```{r, fig.height=5, fig.width=6, fig.cap="Figure 5. Expeted Island Scrub-Jay abundance, SEs, and 95% CIs."}
E <- predict(fm1, type="state", newdata=habitat)
plot(E, axes=FALSE, col=terrain.colors(100))
-@
-\begin{figure}
- \centering
-\includegraphics[width=6in,height=5in]{spp-dist-issj}
-\caption{Expected Island Scrub-Jay abundance, SEs, and 95\% CIs.}
-\label{fig:issj}
-\end{figure}
-
-
-\begin{comment}
-<<>>=
-cruz2 <- data.frame(cruz[,1:2],
- chaparral=(cruz$chaparral-0.270)/0.234,
- elevation=(cruz$elevation-202)/125)
-cruz2$E.N <- exp(-2.827 + 0.957*cruz2$chaparral + -0.244*cruz2$elevation)
-wireframe(E.N ~ x + y, cruz2,
- shade=TRUE, #shade.colors.palette=terrain.colors(100),
-# drape=TRUE,
- aspect=0.5, colorkey=FALSE,
- screen=list(z=10, x=-10))
-
-@
-\end{comment}
-
-
-
-<<echo=FALSE>>=
-detach(package:raster)
-@
-
-
-
-\newpage
-
-\bibliography{unmarked}
+```
-\end{document}
+# References
diff --git a/vignettes/unmarked.Rnw b/vignettes/unmarked.Rmd
index a0d90dc..8c02c5a 100644
--- a/vignettes/unmarked.Rnw
+++ b/vignettes/unmarked.Rmd
@@ -1,44 +1,37 @@
-<<echo=false>>=
-options(width=70)
-options(continue=" ")
-@
-
-\documentclass[a4paper]{article}
-\usepackage[OT1]{fontenc}
-\usepackage{Sweave}
-\usepackage{natbib}
-%\usepackage{fullpage}
-\usepackage[vmargin=1in,hmargin=1in]{geometry}
-\bibliographystyle{plain}
-
-\DefineVerbatimEnvironment{Sinput}{Verbatim} {xleftmargin=2em}
-\DefineVerbatimEnvironment{Soutput}{Verbatim}{xleftmargin=2em}
-\DefineVerbatimEnvironment{Scode}{Verbatim}{xleftmargin=2em}
-\fvset{listparameters={\setlength{\topsep}{0pt}}}
-\renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}}
-
-%%\VignetteIndexEntry{Overview of unmarked}
-
-\title{Overview of Unmarked:\\
-An R Package for the Analysis of Data from Unmarked Animals}
-\author{Ian Fiske and Richard Chandler}
-\date{Feb 5, 2019}
-
-
-\begin{document}
-
-\maketitle
-
-\abstract{Unmarked aims to be a complete environment for the
- statistical analysis of data from surveys of unmarked
- animals. Currently, the focus is on hierarchical models that
- separately model a latent state (or states) and an observation
- process. This vignette provides a brief overview of the package ---
- for a more thorough treatment see \citep{fiskeChandler_2011}
-}
-
-
-\section{Overview of unmarked}
+---
+title: "Overview of unmarked: an R Package for the Analysis of Data from Unmarked Animals"
+author:
+- name: Ian Fiske
+- name: Richard Chandler
+date: February 5, 2019
+bibliography: unmarked.bib
+csl: ecology.csl
+output:
+ rmarkdown::html_vignette:
+ fig_width: 5
+ fig_height: 3.5
+ number_sections: true
+ toc: true
+vignette: >
+ %\VignetteIndexEntry{Overview of unmarked}
+ %\VignetteEngine{knitr::rmarkdown}
+ \usepackage[utf8]{inputenc}
+---
+
+```{r,echo=FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+```
+
+# Abstract
+
+`unmarked` aims to be a complete environment for the
+statistical analysis of data from surveys of unmarked
+animals. Currently, the focus is on hierarchical models that
+separately model a latent state (or states) and an observation
+process. This vignette provides a brief overview of the package -
+for a more thorough treatment see @fiskeChandler_2011.
+
+# Overview of unmarked
Unmarked provides methods to estimate site occupancy, abundance, and
density of animals (or possibly other organisms/objects) that cannot be
@@ -54,43 +47,42 @@ way that allows for easy data manipulation, summarization, and model
specification. Table 1 lists the currently implemented models and
their associated fitting functions and data classes.
-\begin{table}[!h] %%\footnotesize
-\centering
-\begin{tabular}{cccc}
-\hline
-Model & Fitting Function & Data & Citation \\ \hline
-Occupancy & occu & unmarkedFrameOccu & \citep{mackenzie_estimating_2002} \\
-Royle-Nichols & occuRN & unmarkedFrameOccu & \citep{royle_estimating_2003} \\
-Point Count & pcount & unmarkedFramePCount & \citep{royle_n-mixture_2004} \\
-Distance-sampling & distsamp & unmarkedFrameDS & \citep{royle_modeling_2004} \\
-Generalized distance-sampling & gdistsamp & unmarkedFrameGDS & \citep{chandlerEA_2011} \\
-Arbitrary multinomial-Poisson & multinomPois & unmarkedFrameMPois & \citep{royle_generalized_2004} \\
-Colonization-extinction & colext & unmarkedMultFrame & \citep{mackenzie_estimating_2003} \\
-Generalized multinomial-mixture & gmultmix & unmarkedFrameGMM & \citep{royle_generalized_2004} \\
-\hline
-\end{tabular}
-\caption{Models handled by unmarked.}
-\label{tab:models}
-\end{table}
+```{r, echo=FALSE}
+tab1 <- data.frame(
+ Model=c("Occupancy", "Royle-Nichols", "Point Count", "Distance-sampling",
+ "Generalized distance-sampling", "Arbitrary multinomial-Poisson",
+ "Colonization-extinction", "Generalized multinomial-mixture"),
+ `Fitting Function`=c("occu","occuRN","pcount","distsamp","gdistsamp",
+ "multinomPois","colext","gmultmix"),
+ Data=c("unmarkedFrameOccu","unmarkedFrameOccu","unmarkedFramePCount",
+ "unmarkedFrameDS","unmarkedFrameGDS","unmarkedFrameMPois",
+ "unmarkedMultFrame","unmarkedFrameGMM"),
+ Citation=c("@mackenzie_estimating_2002","@royle_estimating_2003",
+ "@royle_n-mixture_2004","@royle_modeling_2004",
+ "@chandlerEA_2011","@royle_generalized_2004",
+ "@mackenzie_estimating_2003","@royle_generalized_2004"),
+ check.names=FALSE)
+
+knitr::kable(tab1, format='markdown', align="lccc",
+ caption="Table 1. Models handled by unmarked.")
+```
Each data class can be created with a call to the constructor function
of the same name as described in the examples below.
-%%\newpage
-
-\section{Typical unmarked session}
+# Typical unmarked session
The first step is to import the data into R, which we do below using
-the \textbf{read.csv} function. Next, the data need to be formatted for
+the `read.csv` function. Next, the data need to be formatted for
use with a specific model fitting function. This can be accomplished
-with a call to the appropriate type of \textbf{unmarkedFrame}. For
+with a call to the appropriate type of `unmarkedFrame`. For
example, to prepare the data for a single-season site-occupancy
-analysis, the function \textbf{unmarkedFrameOccu} is used.
+analysis, the function `unmarkedFrameOccu` is used.
-\subsection{Importing and formatting data}
+## Importing and formatting data
-<<>>=
+```{r}
library(unmarked)
wt <- read.csv(system.file("csv","widewt.csv", package="unmarked"))
y <- wt[,2:4]
@@ -99,63 +91,63 @@ obsCovs <- list(date=wt[,c("date.1", "date.2", "date.3")],
ivel=wt[,c("ivel.1", "ivel.2", "ivel.3")])
wt <- unmarkedFrameOccu(y = y, siteCovs = siteCovs, obsCovs = obsCovs)
summary(wt)
-@
+```
-Alternatively, the convenience function \textbf{csvToUMF} can be used
+Alternatively, the convenience function `csvToUMF` can be used
-<<>>=
+```{r}
wt <- csvToUMF(system.file("csv","widewt.csv", package="unmarked"),
long = FALSE, type = "unmarkedFrameOccu")
-@
+```
If not all sites have the same numbers of observations, then manual
-importation of data in long format can be tricky. \textbf{csvToUMF}
+importation of data in long format can be tricky. `csvToUMF`
seamlessly handles this situation.
-<<>>=
+```{r}
pcru <- csvToUMF(system.file("csv","frog2001pcru.csv", package="unmarked"),
long = TRUE, type = "unmarkedFrameOccu")
-@
+```
To help stabilize the numerical optimization algorithm, we recommend
standardizing the covariates.
-<<>>=
+```{r}
obsCovs(pcru) <- scale(obsCovs(pcru))
-@
-
+```
-\subsection{Fitting models}
+## Fitting models
Occupancy models can then be fit with the occu() function:
-<<>>=
+```{r}
fm1 <- occu(~1 ~1, pcru)
fm2 <- occu(~ MinAfterSunset + Temperature ~ 1, pcru)
fm2
-@
+```
Here, we have specified that the detection process is modeled with the
-MinAfterSunset and Temperature covariates. No covariates are
-specified for occupancy here. See ?occu for more details.
+`MinAfterSunset` and `Temperature` covariates. No covariates are
+specified for occupancy here. See `?occu` for more details.
-\subsection{Back-transforming parameter estimates}
+## Back-transforming parameter estimates
-Unmarked fitting functions return unmarkedFit objects which can be
+`unmarked` fitting functions return `unmarkedFit` objects which can be
queried to investigate the model fit. Variables can be
-back-transformed to the unconstrained scale using backTransform.
+back-transformed to the unconstrained scale using `backTransform`.
Standard errors are computed using the delta method.
-<<>>=
+```{r}
backTransform(fm2, 'state')
-@
+```
+
The expected probability that a site was
occupied is 0.823. This estimate applies to the hypothetical
population of all possible sites, not the sites found in our sample.
For a good discussion of population-level vs finite-sample inference,
-see Royle and Dorazio \citep{royle_dorazio:2008} page 117. Note also that finite-sample
-quantities can be computed in \texttt{unmarked} using empirical Bayes
+see @royle_dorazio:2008 page 117. Note also that finite-sample
+quantities can be computed in `unmarked` using empirical Bayes
methods as demonstrated at the end of this document.
Back-transforming the estimate of $\psi$ was easy because there were
@@ -166,50 +158,53 @@ estimate of $p$. Here, we request
the probability of detection given a site is occupied and all
covariates are set to 0.
-<<>>=
+```{r}
backTransform(linearComb(fm2, coefficients = c(1,0,0), type = 'det'))
-@
+```
Thus, we can say that the expected probability of detection was 0.552
when time of day and temperature are fixed at their mean value. A
-predict method also exists, which can be used to obtain estimates of
+`predict` method also exists, which can be used to obtain estimates of
parameters at specific covariate values.
-<<>>=
+```{r}
newData <- data.frame(MinAfterSunset = 0, Temperature = -2:2)
round(predict(fm2, type = 'det', newdata = newData, appendData=TRUE), 2)
-@
+```
-
-Confidence intervals are requested with confint, using either the
+Confidence intervals are requested with `confint`, using either the
asymptotic normal approximation or profiling.
-
-<<>>=
+```{r, eval=FALSE}
confint(fm2, type='det')
confint(fm2, type='det', method = "profile")
-@
+```
+```{r, echo=FALSE}
+confint(fm2, type='det')
+nul <- capture.output(ci <- confint(fm2, type='det', method = "profile"))
+ci
+```
-\subsection{Model selection and model fit}
+## Model selection and model fit
Model selection and multi-model inference can be implemented after
-organizing models using the fitList function.
+organizing models using the `fitList` function.
-<<>>=
+```{r}
fms <- fitList('psi(.)p(.)' = fm1, 'psi(.)p(Time+Temp)' = fm2)
modSel(fms)
predict(fms, type='det', newdata = newData)
-@
+```
The parametric bootstrap can be used to check the adequacy of model fit.
Here we use a $\chi^2$ statistic appropriate for binary data.
-<<>>=
+```{r, warning=FALSE}
chisq <- function(fm) {
- umf <- getData(fm)
- y <- getY(umf)
+ umf <- fm@data
+ y <- umf@y
y[y>1] <- 1
sr <- fm@sitesRemoved
if(length(sr)>0)
@@ -220,44 +215,40 @@ chisq <- function(fm) {
}
(pb <- parboot(fm2, statistic=chisq, nsim=100, parallel=FALSE))
-@
+```
We fail to reject the null hypothesis, and conclude that the model fit
is adequate.
-\subsection{Derived parameters and empirical Bayes methods}
+## Derived parameters and empirical Bayes methods
-The \texttt{parboot} function can be also be used to compute confidence
+The `parboot` function can be also be used to compute confidence
intervals for estimates of derived parameters, such as the proportion
-of sites occupied $\mbox{PAO} = \sum_i z_i$ where $z_i$ is the true
+of $N$ sites occupied $\mbox{PAO} = \frac{\sum_i z_i}{N}$ where $z_i$ is the true
occurrence state at site $i$, which is unknown at sites where no individuals
-were detected. The ``colext'' vignette shows examples of using
-\texttt{parboot} to obtain confidence intervals for such derived
+were detected. The `colext` vignette shows examples of using
+`parboot` to obtain confidence intervals for such derived
quantities. An alternative way achieving this goal is to use empirical Bayes
-methods, which were introduced in \texttt{unmarked} version 0.9-5. These methods estimate
+methods, which were introduced in `unmarked` version 0.9-5. These methods estimate
the posterior distribution of the latent variable given the data and
the estimates of the fixed effects (the MLEs). The mean or the mode of
the estimated posterior distibution is referred to as the empirical
-best unbiased predictor (EBUP), which in \texttt{unmarked} can be
-obtained by applying the \texttt{bup} function to the estimates of the
-posterior distributions returned by the \texttt{ranef} function. The
-following code returns the estimate of PAO and a 90\% confidence
-interval.
+best unbiased predictor (EBUP), which in `unmarked` can be
+obtained by applying the `bup` function to the estimates of the
+posterior distributions returned by the `ranef` function. The
+following code returns an estimate of PAO using EBUP.
-<<>>=
+```{r}
re <- ranef(fm2)
EBUP <- bup(re, stat="mode")
-CI <- confint(re, level=0.9)
-rbind(PAO = c(Estimate = sum(EBUP), colSums(CI)) / 130)
-@
+sum(EBUP) / numSites(pcru)
+```
+
Note that this is similar, but slightly lower than the
population-level estimate of $\psi$ obtained above.
-A plot method also exists for objects returned by \texttt{ranef}, but
+A plot method also exists for objects returned by `ranef`, but
distributions of binary variables are not so pretty. Try it out on a
fitted abundance model instead.
-
-\bibliography{unmarked}
-
-\end{document}
+# References
diff --git a/vignettes/unmarked.bib b/vignettes/unmarked.bib
index b32d258..c6091c6 100644
--- a/vignettes/unmarked.bib
+++ b/vignettes/unmarked.bib
@@ -68,7 +68,7 @@ year = {2004}
@Article{fiskeChandler_2011,
author = {Ian Fiske and Richard Chandler},
- title = {\textbf{unmarked}: An \textbf{R} Package for Fitting Hierarchical Models of Wildlife Occurrence and Abundance},
+ title = {\textbf{unmarked}: An {\textbf{R}} Package for Fitting Hierarchical Models of Wildlife Occurrence and Abundance},
journal = {Journal of Statistical Software},
year = {2011},
volume = {43},
@@ -142,6 +142,15 @@ year = {2004}
pages = {2248--2255}
}
+@article{Tyre_2002,
+ author = {Tyre, A.J. and Tenhumberg, B. and Field, S.A. and Niejalke, D. and Parris, K.,
+and Possingham, H.P.},
+ year = {2003},
+ title = {Improving precision and reducing bias in biological surveys: estimating false-negative error rates},
+ journal = {Ecological Applications},
+ volume = {13},
+ pages = {1790-1801}
+}
@@ -305,7 +314,9 @@ year = {2008}
title={Hierarchical distance sampling models to estimate population size and habitat-specific abundance of an island endemic},
author={Sillett, S. and Chandler, R.B. and Royle, J.A. and K{\'e}ry, M. and Morrison, S.A.},
journal={Ecological Applications},
- year={{In press}},
+ volume={22},
+ pages={1997-2006},
+ year={2012},
publisher={Eco Soc America}
}
@@ -366,3 +377,94 @@ year = {2012}
pages = {e03520},
addendum = {\href{https://doi.org/10.1002/ecy.3520}{[view]}}
}
+
+@book{Kery_2010,
+ author = {K\'{e}ry, Marc},
+ title = {{Introduction to WinBUGS for Ecologists}},
+ year = {2010},
+ publisher={Academic Press}
+}
+
+@article{Hanski_1998,
+ author = {Hanski, I},
+ title = {Metapopulation dynamics},
+ year = {1998},
+ journal = {Nature},
+ volume = {396},
+ pages = {41-49}
+}
+
+@book{Kery_2011,
+ author = {K\'{e}ry, Marc and Schaub, Michael},
+ title = {{Bayesian Population Analysis using WinBUGS: A Hierarchical Perspective}},
+ year = {2011},
+ publisher = {Academic Press}
+}
+
+@book{McCullagh_1989,
+ author = {McCullagh, P and Nelder, J. A.},
+ year = {1989},
+ title = {Generalized linear models},
+ publisher = {Chapman and Hall}
+}
+
+@article{Kery_2008,
+ author = {K\'{e}ry, M., and Schmidt, B.R},
+ year = {2008},
+ title = {Imperfect detection and its consequences for monitoring for conservation},
+ journal = {Community Ecology},
+ volume = {9},
+ pages = {207-2016}
+}
+
+@article{Royle_2007,
+ author = {Royle, J.A.,and K\'{e}ry, M.},
+ year = {2007},
+ title = {A {B}ayesian state-space formulation of dynamic occupancy models},
+ journal = {Ecology},
+ volume = {88},
+ pages = {1813-1823}
+}
+
+@article{Moilanen_2002,
+ author = {Moilanen, A.},
+ year = {2002},
+ title = {Implications of empirical data quality to metapopulation model parameter estimation and application},
+ journal = {Oikos},
+ volume = {96},
+ pages = {516-530}
+}
+
+@article{Weir_2009,
+ author = {Weir, L., I.J. Fiske, and J.A. Royle},
+ year = {2009},
+ title = {Trends in anuran occupancy from northeastern states of the {N}orth {A}merican {A}mphibian {M}onitoring {P}rogram},
+ journal = {Herpetological Conservation and Biology},
+ volume = {4},
+ pages = {389-402}
+}
+
+@book{Davison_1997,
+ author = {Davison, A.C and Hinkley, D.V.},
+ year = {1997},
+ title = {Bootstrap Methods and Their Application},
+ publisher = {Cambridge University Press}
+}
+
+@article{Hosmer_1997,
+ author = {Hosmer, D.W. and Hosmer, T. and le Cressie, S. and Lemeshow, S.},
+ year = {1997},
+ title = {A comparision of goodness-of-fit tests for the logistic regression model},
+ journal = {Statistics in Medicine},
+ volume = {16},
+ pages = {965-980}
+}
+
+@article{MacKenzie_2004,
+ author = {MacKenzie, D.I and Bailey, L.},
+ year = {2004},
+ title = {Assessing the fit of site-occupancy models},
+ journal = {Journal of Agricultural, Biological, and Environmental Statistics},
+ volume = {9},
+ pages = {300-318}
+}