aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <kenkellner@users.noreply.github.com>2023-08-11 16:21:52 -0400
committerGitHub <noreply@github.com>2023-08-11 16:21:52 -0400
commitac40eede793b0997209117080912a866a2ff18ae (patch)
treed80c94901adad9211f8cffb782d0e91e9860299b
parent516906afa4166978133c61541fa32f80ebda4416 (diff)
parent47dc21f52187912ec7cba52e351b1af256899f44 (diff)
Merge pull request #261 from rbchan/goccu
Add multistate occupancy model (goccu)
-rw-r--r--DESCRIPTION5
-rw-r--r--NAMESPACE4
-rw-r--r--R/goccu.R334
-rw-r--r--man/fitted-methods.Rd1
-rw-r--r--man/getP-methods.Rd1
-rw-r--r--man/goccu.Rd105
-rw-r--r--man/ranef-methods.Rd1
-rw-r--r--man/simulate-methods.Rd1
-rw-r--r--man/unmarkedFit-class.Rd1
-rw-r--r--man/unmarkedMultFrame.Rd1
-rw-r--r--src/TMB/tmb_goccu.hpp130
-rw-r--r--src/TMB/unmarked_TMBExports.cpp3
-rw-r--r--tests/testthat/test_goccu.R150
13 files changed, 733 insertions, 4 deletions
diff --git a/DESCRIPTION b/DESCRIPTION
index b658bba..ef05301 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: unmarked
-Version: 1.3.2.9001
-Date: 2023-07-27
+Version: 1.3.2.9002
+Date: 2023-08-11
Type: Package
Title: Models for Data from Unmarked Animals
Authors@R: c(
@@ -51,6 +51,7 @@ Collate: 'classes.R' 'unmarkedEstimate.R' 'mapInfo.R' 'unmarkedFrame.R'
'power.R'
'simulate.R'
'predict.R'
+ 'goccu.R'
'RcppExports.R'
'zzz.R'
LinkingTo:
diff --git a/NAMESPACE b/NAMESPACE
index 7273ce1..3234918 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -24,7 +24,7 @@ importFrom(pbapply, pbsapply, pblapply)
export(occu, occuFP, occuRN, pcount, pcountOpen, multinomPois, distsamp,
colext, gmultmix, gdistsamp, gpcount, occuPEN, occuPEN_CV, occuMulti,
occuMS, computeMPLElambda, pcount.spHDS, occuTTD, distsampOpen,
- multmixOpen, nmixTTD, gdistremoval)
+ multmixOpen, nmixTTD, gdistremoval, goccu)
export(removalPiFun, doublePiFun)
export(makeRemPiFun, makeCrPiFun, makeCrPiFunMb, makeCrPiFunMh)
@@ -61,7 +61,7 @@ export(unmarkedEstimate, fitList, mapInfo, unmarkedFrame,
unmarkedFrameDS, unmarkedMultFrame, unmarkedFrameGMM,
unmarkedFramePCO, unmarkedFrameGDS, unmarkedFrameGPC, unmarkedFrameOccuMulti,
unmarkedFrameOccuMS, unmarkedFrameOccuTTD, unmarkedFrameDSO,
- unmarkedFrameMMO, unmarkedFrameGDR)
+ unmarkedFrameMMO, unmarkedFrameGDR, unmarkedFrameGOccu)
# Formatting
export(csvToUMF, formatLong, formatWide, formatMult, formatDistData)
diff --git a/R/goccu.R b/R/goccu.R
new file mode 100644
index 0000000..28c97bb
--- /dev/null
+++ b/R/goccu.R
@@ -0,0 +1,334 @@
+setClass("unmarkedFitGOccu",
+ representation(
+ formlist = "list"),
+ contains = "unmarkedFit")
+
+setClass("unmarkedFrameGOccu", contains = "unmarkedFrameG3")
+
+setMethod("getDesign", "unmarkedFrameGOccu",
+ function(umf, formula, na.rm=TRUE){
+ out <- methods::callNextMethod(umf, formula=formula, na.rm=na.rm)
+ names(out)[2] <- "Xpsi"
+ names(out)[5] <- "Xpsi.offset"
+ out
+})
+
+unmarkedFrameGOccu <- function(y, siteCovs=NULL, obsCovs=NULL, numPrimary,
+ yearlySiteCovs=NULL) {
+ y[y > 1] <- 1
+ if(numPrimary < 2) stop("numPrimary < 2, use occu instead")
+ umf <- unmarkedFrameGPC(y, siteCovs=siteCovs, obsCovs=obsCovs,
+ numPrimary=numPrimary, yearlySiteCovs=NULL)
+ class(umf) <- "unmarkedFrameGOccu"
+ umf
+}
+
+goccu <- function(psiformula, phiformula, pformula, data,
+ linkPsi = c("logit", "cloglog"), starts, method = "BFGS",
+ se = TRUE, ...){
+
+ linkPsi <- match.arg(linkPsi, c("logit","cloglog"))
+ psiLinkFunc <- ifelse(linkPsi=="cloglog", cloglog, plogis)
+ psiInvLink <- ifelse(linkPsi=="cloglog", "cloglog", "logistic")
+ psiLinkGrad <- ifelse(linkPsi=="cloglog", "cloglog.grad", "logistic.grad")
+
+ # Pass phiformula as gamma/eps formula so it will be applied to
+ # yearlySiteCovs in getDesign
+ formlist <- list(psiformula=psiformula, phi=phiformula,
+ pformula=pformula)
+
+ formula <- as.formula(paste(unlist(formlist), collapse=" "))
+
+ data@y[data@y > 1] <- 1
+
+ class(data) <- "unmarkedFrameGOccu"
+
+ # handle offsets
+
+ gd <- getDesign(data, formula = formula)
+
+ y <- gd$y
+ Xpsi <- gd$Xpsi
+ Xphi <- gd$Xphi
+ Xp <- gd$Xdet
+
+ M <- nrow(y)
+ T <- data@numPrimary
+ J <- ncol(y) / T
+
+ # Identify entirely missing primary periods at each site
+ y_array <- array(t(y), c(J, T, M))
+ missing_session <- t(apply(y_array, c(2,3),
+ function(x) as.numeric(all(is.na(x)))))
+
+ # Create possible states in each T
+ alpha_potential <- as.matrix(expand.grid(rep(list(c(0, 1)), T)))
+ n_possible <- nrow(alpha_potential)
+
+ # Known present at each site
+ known_present <- rep(0, M)
+ # Known available at each site and T
+ known_available <- matrix(0, nrow=M, ncol=T)
+
+ for (i in 1:M){
+ for (t in 1:T){
+ for (j in 1:J){
+ if(is.na(y_array[j,t,i])) next
+ if(y_array[j, t, i] == 1){
+ known_present[i] <- 1
+ known_available[i,t] <- 1
+ }
+ }
+ }
+ }
+
+ # Bundle data for TMB
+ dataList <- list(y=y, T=T, link=ifelse(linkPsi=='cloglog', 1, 0),
+ Xpsi=Xpsi, Xphi=Xphi, Xp=Xp,
+ n_possible=n_possible,
+ alpha_potential=alpha_potential,
+ known_present=known_present, known_available=known_available,
+ missing_session=missing_session)
+
+ # Provide dimensions and starting values for parameters
+ # This part should change to be more like occu() if we add random effects
+ psi_ind <- 1:ncol(Xpsi)
+ phi_ind <- 1:ncol(Xphi) + max(psi_ind)
+ p_ind <- 1:ncol(Xp) + max(phi_ind)
+ nP <- max(p_ind)
+ params <- list(beta_psi = rep(0, length(psi_ind)),
+ beta_phi = rep(0, length(phi_ind)),
+ beta_p = rep(0, length(p_ind)))
+
+ # Create TMB object
+ tmb_mod <- TMB::MakeADFun(data = c(model = "tmb_goccu", dataList),
+ parameters = params,
+ DLL = "unmarked_TMBExports", silent = TRUE)
+
+ # Optimize TMB object, print and save results
+ if(missing(starts) || is.null(starts)) starts <- tmb_mod$par
+ opt <- optim(starts, fn = tmb_mod$fn, gr = tmb_mod$gr, method = method,
+ hessian = se, ...)
+
+ covMat <- invertHessian(opt, nP, se)
+ ests <- opt$par
+ names(ests) <- c(colnames(Xpsi), colnames(Xphi), colnames(Xp))
+ fmAIC <- 2 * opt$value + 2 * nP
+
+
+ psi_est <- unmarkedEstimate(name = "Occupancy", short.name = "psi",
+ estimates = ests[psi_ind],
+ covMat = covMat[psi_ind, psi_ind, drop=FALSE],
+ fixed = 1:ncol(Xpsi),
+ invlink = psiInvLink,
+ invlinkGrad = psiLinkGrad,
+ randomVarInfo=list()
+ )
+
+ phi_est <- unmarkedEstimate(name = "Availability", short.name = "phi",
+ estimates = ests[phi_ind],
+ covMat = covMat[phi_ind, phi_ind, drop=FALSE],
+ fixed = 1:ncol(Xphi),
+ invlink = "logistic",
+ invlinkGrad = "logistic.grad",
+ randomVarInfo=list()
+ )
+
+ p_est <- unmarkedEstimate(name = "Detection", short.name = "p",
+ estimates = ests[p_ind],
+ covMat = covMat[p_ind, p_ind, drop=FALSE],
+ fixed = 1:ncol(Xp),
+ invlink = "logistic",
+ invlinkGrad = "logistic.grad",
+ randomVarInfo=list()
+ )
+
+ estimate_list <- unmarkedEstimateList(list(psi=psi_est, phi=phi_est,
+ det=p_est))
+
+ # Create unmarkedFit object--------------------------------------------------
+ umfit <- new("unmarkedFitGOccu", fitType = "goccu", call = match.call(),
+ formula = formula, formlist=formlist, data = data,
+ sitesRemoved = gd$removed.sites,
+ estimates = estimate_list, AIC = fmAIC, opt = opt,
+ negLogLike = opt$value,
+ nllFun = tmb_mod$fn, TMB=tmb_mod)
+
+ return(umfit)
+
+}
+
+# Methods
+
+setMethod("predict_inputs_from_umf", "unmarkedFitGOccu",
+ function(object, type, newdata, na.rm, re.form=NA){
+ designMats <- getDesign(newdata, object@formula, na.rm=na.rm)
+ X_idx <- switch(type, psi="Xpsi", phi="Xphi", det="Xdet")
+ off_idx <- paste0(X_idx, ".offset")
+ list(X=designMats[[X_idx]], offset=NULL)
+})
+
+setMethod("get_formula", "unmarkedFitGOccu", function(object, type, ...){
+ fl <- object@formlist
+ switch(type, psi=fl$psiformula, phi=fl$phiformula, det=fl$pformula)
+})
+
+setMethod("get_orig_data", "unmarkedFitGOccu", function(object, type, ...){
+ clean_covs <- clean_up_covs(object@data, drop_final=FALSE)
+ datatype <- switch(type, psi='site_covs', phi='yearly_site_covs',
+ det='obs_covs')
+ clean_covs[[datatype]]
+})
+
+setMethod("getP", "unmarkedFitGOccu",
+ function(object, na.rm=FALSE){
+ gd <- getDesign(object@data, object@formula, na.rm=na.rm)
+ p <- drop(plogis(gd$Xdet %*% coef(object, "det")))
+ M <- numSites(object@data)
+ p <- matrix(p, nrow=M, ncol=obsNum(object@data),
+ byrow=TRUE)
+ p
+})
+
+setMethod("fitted", "unmarkedFitGOccu",
+ function(object, na.rm= FALSE){
+
+ M <- numSites(object@data)
+ JT <- obsNum(object@data)
+ gd <- getDesign(object@data, object@formula, na.rm=na.rm)
+
+ psi <- drop(plogis(gd$Xpsi %*% coef(object, "psi")))
+ psi <- matrix(psi, nrow=M, ncol=JT)
+
+ phi <- drop(plogis(gd$Xphi %*% coef(object, "phi")))
+ phi <- rep(phi, each = JT / object@data@numPrimary)
+ phi <- matrix(phi, nrow=M, ncol=JT, byrow=TRUE)
+
+ p <- getP(object)
+
+ psi * phi * p
+})
+
+
+# based on ranef for GPC
+setMethod("ranef", "unmarkedFitGOccu", function(object, ...){
+
+ M <- numSites(object@data)
+ JT <- obsNum(object@data)
+ T <- object@data@numPrimary
+ J <- JT / T
+
+ gd <- getDesign(object@data, object@formula, na.rm=FALSE)
+ y_array <- array(t(gd$y), c(J, T, M))
+
+ psi <- drop(plogis(gd$Xpsi %*% coef(object, "psi")))
+ phi <- drop(plogis(gd$Xphi %*% coef(object, "phi")))
+ phi <- matrix(phi, nrow=M, ncol=T, byrow=TRUE)
+ p <- getP(object)
+ p_array <- array(t(p), c(J, T, M))
+
+ Z <- ZZ <- 0:1
+ post <- array(0, c(M, 2, 1))
+ colnames(post) <- Z
+
+ for(i in 1:M) {
+ f <- dbinom(Z, 1, psi[i])
+
+ ghi <- rep(0, 2)
+
+ for(t in 1:T) {
+ gh <- matrix(-Inf, 2, 2)
+ for(z in Z) {
+ if(z < max(y_array[,,i], na.rm=TRUE)){
+ gh[,z+1] <- -Inf
+ next
+ }
+ if(is.na(phi[i,t])) {
+ g <- rep(0, 2)
+ g[ZZ>z] <- -Inf
+ } else{
+ g <- dbinom(ZZ, z, phi[i,t], log=TRUE)
+ }
+ h <- rep(0, 2)
+ for(j in 1:J) {
+ if(is.na(y_array[j,t,i]) | is.na(p_array[j,t,i])) next
+ h <- h + dbinom(y_array[j,t,i], ZZ, p_array[j,t,i], log=TRUE)
+ }
+ gh[,z+1] <- g + h
+ }
+ ghi <- ghi + log(colSums(exp(gh)))
+ }
+ fgh <- exp(f + ghi)
+ prM <- fgh/sum(fgh)
+ post[i,,1] <- prM
+ }
+
+ new("unmarkedRanef", post=post)
+})
+
+
+setMethod("simulate", "unmarkedFitGOccu",
+ function(object, nsim = 1, seed = NULL, na.rm = FALSE){
+
+ gd <- getDesign(object@data, object@formula, na.rm=FALSE)
+ M <- nrow(gd$y)
+ T <- object@data@numPrimary
+ JT <- ncol(gd$y)
+ J <- JT / T
+ y_array <- array(t(gd$y), c(J, T, M))
+
+ psi <- drop(plogis(gd$Xpsi %*% coef(object, "psi")))
+ phi <- drop(plogis(gd$Xphi %*% coef(object, "phi")))
+ phi <- matrix(phi, nrow=M, ncol=T, byrow=TRUE)
+ p <- getP(object)
+
+ sim_list <- list()
+
+ for (i in 1:nsim){
+ z <- suppressWarnings(rbinom(M, 1, psi))
+ z <- matrix(z, nrow=M, ncol=T)
+
+ zz <- suppressWarnings(rbinom(M*T, 1, phi*z))
+ zz <- matrix(zz, M, T)
+
+ colrep <- rep(1:T, each=J)
+ zz <- zz[,colrep]
+
+ y <- suppressWarnings(rbinom(M*T*J, 1, zz*p))
+ y <- matrix(y, M, JT)
+ if(na.rm) y[which(is.na(gd$y))] <- NA
+ sim_list[[i]] <- y
+ }
+
+ return(sim_list)
+})
+
+
+setMethod("update", "unmarkedFitGOccu",
+ function(object, psiformula, phiformula, pformula, ...,
+ evaluate = TRUE)
+{
+ call <- object@call
+ if (is.null(call))
+ stop("need an object with call slot")
+ formlist <- object@formlist
+ if (!missing(psiformula))
+ call$psiformula <- update.formula(formlist$psiformula, psiformula)
+ if (!missing(phiformula))
+ call$phiformula <- update.formula(formlist$phiformula, phiformula)
+ if (!missing(pformula))
+ call$pformula <- update.formula(formlist$pformula, pformula)
+ extras <- match.call(call=sys.call(-1),
+ expand.dots = FALSE)$...
+ if(length(extras) > 0) {
+ existing <- !is.na(match(names(extras), names(call)))
+ for (a in names(extras)[existing]) call[[a]] <- extras[[a]]
+ if (any(!existing)) {
+ call <- c(as.list(call), extras[!existing])
+ call <- as.call(call)
+ }
+ }
+ if (evaluate)
+ eval(call, parent.frame(2))
+ else call
+})
diff --git a/man/fitted-methods.Rd b/man/fitted-methods.Rd
index a06b930..8868565 100644
--- a/man/fitted-methods.Rd
+++ b/man/fitted-methods.Rd
@@ -15,6 +15,7 @@
\alias{fitted,unmarkedFitGMM-method}
\alias{fitted,unmarkedFitGDR-method}
\alias{fitted,unmarkedFitDailMadsen-method}
+\alias{fitted,unmarkedFitGOccu-method}
\title{Methods for Function fitted in Package `unmarked'}
\description{Extracted fitted values from a fitted model.
}
diff --git a/man/getP-methods.Rd b/man/getP-methods.Rd
index f44ec68..c554ebe 100644
--- a/man/getP-methods.Rd
+++ b/man/getP-methods.Rd
@@ -17,6 +17,7 @@
\alias{getP,unmarkedFitDSO-method}
\alias{getP,unmarkedFitMMO-method}
\alias{getP,unmarkedFitGDR-method}
+\alias{getP,unmarkedFitGOccu-method}
\title{Methods for Function getP in Package `unmarked'}
\description{
Methods for function \code{getP} in Package `unmarked'. These methods
diff --git a/man/goccu.Rd b/man/goccu.Rd
new file mode 100644
index 0000000..ef9f08c
--- /dev/null
+++ b/man/goccu.Rd
@@ -0,0 +1,105 @@
+\name{goccu}
+\alias{goccu}
+\title{
+Fit multi-scale occupancy models
+}
+\description{
+Fit multi-scale occupancy models as described in Nichols et al. (2008) to
+repeated presence-absence data collected using the robust design. This model
+allows for inference about occupancy, availability, and detection probability.
+}
+\usage{
+goccu(psiformula, phiformula, pformula, data, linkPsi = c("logit", "cloglog"),
+ starts, method = "BFGS", se = TRUE, ...)
+}
+\arguments{
+ \item{psiformula}{
+ Right-hand sided formula describing occupancy covariates
+}
+ \item{phiformula}{
+ Right-hand sided formula describing availability covariates
+}
+ \item{pformula}{
+ Right-hand sided formula for detection probability covariates
+}
+ \item{data}{
+ An object of class unmarkedFrameGOccu or unmarkedMultFrame
+}
+\item{linkPsi}{Link function for the occupancy model. Options are
+ \code{"logit"} for the standard occupancy model or \code{"cloglog"}
+ for the complimentary log-log link, which relates occupancy
+ to site-level abundance.
+}
+\item{starts}{
+ Starting values
+}
+ \item{method}{
+ Optimization method used by \code{\link{optim}}
+}
+ \item{se}{
+ Logical. Should standard errors be calculated?
+}
+\item{\dots}{
+ Additional arguments to \code{\link{optim}}, such as lower and upper
+ bounds
+}
+}
+\details{
+ Primary periods could represent spatial or temporal sampling replicates.
+ For example, you could have several spatial sub-units within each site, where each
+ sub-unit was then sampled repeatedly. This is a frequent design for eDNA studies.
+ Or, you could have multiple primary periods of sampling at each site
+ (conducted at different times within a season), each of which contains
+ several secondary sampling periods. In both cases the robust design structure
+ can be used to estimate an availability probability in addition to
+ detection probability. See Kery and Royle (2015) 10.10 for more details.
+}
+\value{
+ An object of class unmarkedFitGOccu
+}
+\references{
+ Kery, M., & Royle, J. A. (2015). Applied hierarchical modeling in ecology:
+ Volume 1: Prelude and static models. Elsevier Science.
+
+ Nichols, J. D., Bailey, L. L., O'Connell Jr, A. F., Talancy, N. W.,
+ Campbell Grant, E. H., Gilbert, A. T., Annand E. M., Husband, T. P., & Hines, J. E.
+ (2008). Multi-scale occupancy estimation and modelling using multiple detection methods.
+ Journal of Applied Ecology, 45(5), 1321-1329.
+}
+\author{
+ Ken Kellner \email{contact@kenkellner.com}
+}
+
+\seealso{
+\code{\link{occu}}, \code{\link{colext}},
+ \code{\link{unmarkedMultFrame}}, \code{\link{unmarkedFrameGOccu}}
+}
+
+\examples{
+
+set.seed(123)
+M <- 100
+T <- 5
+J <- 4
+
+psi <- 0.5
+phi <- 0.3
+p <- 0.4
+
+z <- rbinom(M, 1, psi)
+zmat <- matrix(z, nrow=M, ncol=T)
+
+zz <- rbinom(M*T, 1, zmat*phi)
+zz <- matrix(zz, nrow=M, ncol=T)
+
+zzmat <- zz[,rep(1:T, each=J)]
+y <- rbinom(M*T*J, 1, zzmat*p)
+y <- matrix(y, M, J*T)
+umf <- unmarkedMultFrame(y=y, numPrimary=T)
+
+\dontrun{
+ mod <- goccu(psiformula = ~1, phiformula = ~1, pformula = ~1, umf)
+ plogis(coef(mod))
+}
+
+}
diff --git a/man/ranef-methods.Rd b/man/ranef-methods.Rd
index abf0c68..c38e82e 100644
--- a/man/ranef-methods.Rd
+++ b/man/ranef-methods.Rd
@@ -20,6 +20,7 @@
\alias{ranef,unmarkedFitNmixTTD-method}
\alias{ranef,unmarkedFitGDR-method}
\alias{ranef,unmarkedFitDailMadsen-method}
+\alias{ranef,unmarkedFitGOccu-method}
\title{ Methods for Function \code{ranef} in Package \pkg{unmarked} }
\description{
Estimate posterior distributions of the random variables (latent
diff --git a/man/simulate-methods.Rd b/man/simulate-methods.Rd
index 67d942d..fc8a008 100644
--- a/man/simulate-methods.Rd
+++ b/man/simulate-methods.Rd
@@ -18,6 +18,7 @@
\alias{simulate,unmarkedFitGPC-method}
\alias{simulate,unmarkedFitGDR-method}
\alias{simulate,unmarkedFitDailMadsen-method}
+\alias{simulate,unmarkedFitGOccu-method}
\alias{simulate,character-method}
\title{Methods for Function simulate in Package `unmarked'}
\description{
diff --git a/man/unmarkedFit-class.Rd b/man/unmarkedFit-class.Rd
index 91e1723..4d7aa8f 100644
--- a/man/unmarkedFit-class.Rd
+++ b/man/unmarkedFit-class.Rd
@@ -35,6 +35,7 @@
\alias{update,unmarkedFitNmixTTD-method}
\alias{update,unmarkedFitGDR-method}
\alias{update,unmarkedFitDailMadsen-method}
+\alias{update,unmarkedFitGOccu-method}
\alias{sampleSize}
\alias{sampleSize,unmarkedFit-method}
\alias{unmarkedFitOccu-class}
diff --git a/man/unmarkedMultFrame.Rd b/man/unmarkedMultFrame.Rd
index 622b3ff..383b905 100644
--- a/man/unmarkedMultFrame.Rd
+++ b/man/unmarkedMultFrame.Rd
@@ -8,6 +8,7 @@
\alias{unmarkedFrameGMM}
\alias{unmarkedFrameGDS}
\alias{unmarkedFrameGPC}
+\alias{unmarkedFrameGOccu}
\title{Create an unmarkedMultFrame, unmarkedFrameGMM, unmarkedFrameGDS,
or unmarkedFrameGPC object}
diff --git a/src/TMB/tmb_goccu.hpp b/src/TMB/tmb_goccu.hpp
new file mode 100644
index 0000000..7788ad6
--- /dev/null
+++ b/src/TMB/tmb_goccu.hpp
@@ -0,0 +1,130 @@
+#undef TMB_OBJECTIVE_PTR
+#define TMB_OBJECTIVE_PTR obj
+
+// Adapted from Stan code by Maxwell B. Joseph,
+// https://discourse.mc-stan.org/t/divergent-transition-every-iteration-multi-scale-occupancy-model/13739/5
+
+// name of function below **MUST** match filename
+template<class Type>
+Type tmb_goccu(objective_function<Type>* obj) {
+ //Describe input data
+ DATA_MATRIX(y); //observations
+ DATA_INTEGER(T);
+
+ DATA_INTEGER(link);
+
+ DATA_MATRIX(Xpsi);
+ DATA_MATRIX(Xphi);
+ DATA_MATRIX(Xp);
+
+ DATA_INTEGER(n_possible);
+ DATA_MATRIX(alpha_potential);
+ DATA_VECTOR(known_present);
+ DATA_MATRIX(known_available);
+ DATA_MATRIX(missing_session);
+
+ PARAMETER_VECTOR(beta_psi);
+ PARAMETER_VECTOR(beta_phi);
+ PARAMETER_VECTOR(beta_p);
+
+ vector<Type> psi = Xpsi * beta_psi;
+ vector<Type> phi = Xphi * beta_phi;
+ vector<Type> p = Xp * beta_p;
+
+ if(link == 1){
+ //psi = cloglog(psi);
+ } else {
+ psi = invlogit(psi);
+ }
+
+ phi = invlogit(phi);
+ p = invlogit(p);
+
+ Type loglik = 0.0;
+
+ int M = y.rows();
+ int J = y.cols() / T;
+
+ Type obs_lp;
+ Type poss_lp;
+ Type exp_poss_lp;
+
+ int ystart;
+ vector<Type> ysite;
+ vector<Type> psite;
+ vector<Type> ysub;
+ vector<Type> psub;
+
+ int tstart;
+ int pstart;
+
+ for (int i=0; i<M; i++){
+
+ tstart = i*T;
+ pstart = i*(T*J);
+
+ ysite = y.row(i);
+ psite = p.segment(pstart, T*J);
+
+ if(known_present(i) == 1){
+ for (int t=0; t<T; t++){
+
+ if(missing_session(i, t) == 1) continue;
+
+ ystart = t*J;
+
+ psub = psite.segment(ystart, J);
+ ysub = ysite.segment(ystart, J);
+
+ obs_lp = log(phi(tstart+t));
+ for (int j=0; j<J; j++){
+ if(R_IsNA(asDouble(ysub(j)))) continue;
+ obs_lp += dbinom(ysub(j), Type(1), psub(j), true);
+ }
+ if(known_available(i, t) == 1){
+ loglik += obs_lp;
+ } else {
+ loglik += log(exp(obs_lp) + exp(log(1-phi(tstart+t))));
+ }
+ }
+ loglik += log(psi(i));
+ } else {
+
+ exp_poss_lp = 0.0;
+
+ for (int k=0; k<n_possible; k++){
+ poss_lp = log(psi(i));
+
+ for (int t=0; t<T; t++){
+
+ if(missing_session(i, t) == 1) continue;
+
+ ystart = t*J;
+
+ psub = psite.segment(ystart, J);
+ ysub = ysite.segment(ystart, J);
+
+ if(alpha_potential(k, t) == 0){
+ poss_lp += log(1 - phi(tstart+t));
+ } else {
+ poss_lp += log(phi(tstart+t));
+ for (int j=0; j<J; j++){
+ if(R_IsNA(asDouble(ysub(j)))) continue;
+ poss_lp += dbinom(ysub(j), Type(1), psub(j), true);
+ }
+ }
+ }
+ exp_poss_lp += exp(poss_lp);
+ }
+ exp_poss_lp += exp(log(1-psi(i)));
+
+ loglik += log(exp_poss_lp);
+ }
+ }
+
+ return -loglik;
+
+}
+
+#undef TMB_OBJECTIVE_PTR
+#define TMB_OBJECTIVE_PTR this
diff --git a/src/TMB/unmarked_TMBExports.cpp b/src/TMB/unmarked_TMBExports.cpp
index 5c1bfea..6aa57de 100644
--- a/src/TMB/unmarked_TMBExports.cpp
+++ b/src/TMB/unmarked_TMBExports.cpp
@@ -9,6 +9,7 @@
#include "tmb_multinomPois.hpp"
#include "tmb_distsamp.hpp"
#include "tmb_gdistremoval.hpp"
+#include "tmb_goccu.hpp"
template<class Type>
Type objective_function<Type>::operator() () {
@@ -23,6 +24,8 @@ Type objective_function<Type>::operator() () {
return tmb_distsamp(this);
} else if(model == "tmb_gdistremoval"){
return tmb_gdistremoval(this);
+ } else if(model == "tmb_goccu"){
+ return tmb_goccu(this);
} else {
error("Unknown model.");
}
diff --git a/tests/testthat/test_goccu.R b/tests/testthat/test_goccu.R
new file mode 100644
index 0000000..4572515
--- /dev/null
+++ b/tests/testthat/test_goccu.R
@@ -0,0 +1,150 @@
+context("goccu fitting function")
+skip_on_cran()
+
+set.seed(123)
+M <- 100
+T <- 5
+J <- 4
+
+psi <- 0.5
+phi <- 0.3
+p <- 0.4
+
+z <- rbinom(M, 1, psi)
+zmat <- matrix(z, nrow=M, ncol=T)
+
+zz <- rbinom(M*T, 1, zmat*phi)
+zz <- matrix(zz, nrow=M, ncol=T)
+
+zzmat <- zz[,rep(1:T, each=J)]
+y <- rbinom(M*T*J, 1, zzmat*p)
+y <- matrix(y, M, J*T)
+umf <- unmarkedMultFrame(y=y, numPrimary=T)
+
+unmarkedFrameGOccu <- function(y, siteCovs=NULL, obsCovs=NULL, numPrimary,
+ yearlySiteCovs=NULL) {
+ y[y > 1] <- 1
+ if(numPrimary < 2) stop("numPrimary < 2, use occu instead")
+ umf <- unmarkedFrameGPC(y, siteCovs=siteCovs, obsCovs=obsCovs,
+ numPrimary=numPrimary, yearlySiteCovs=NULL)
+ class(umf) <- "unmarkedFrameGOccu"
+ umf
+}
+
+test_that("unmarkedFrameGOccu can be constructed", {
+ set.seed(123)
+ sc <- data.frame(x=rnorm(M))
+ ysc <- matrix(rnorm(M*T), M, T)
+ oc <- matrix(rnorm(M*T*J), M, T*J)
+
+ umf2 <- unmarkedFrameGOccu(y, siteCovs=sc, obsCovs=list(x2=oc),
+ yearlySiteCovs=list(x3=ysc), numPrimary=T)
+ expect_is(umf2, "unmarkedFrameGOccu")
+})
+
+test_that("goccu can fit models", {
+
+ # Without covariates
+ mod <- goccu(~1, ~1, ~1, umf)
+ expect_equivalent(coef(mod), c(0.16129, -0.97041, -0.61784), tol=1e-5)
+
+ # With covariates
+ set.seed(123)
+ sc <- data.frame(x=rnorm(M))
+ ysc <- matrix(rnorm(M*T), M, T)
+ oc <- matrix(rnorm(M*T*J), M, T*J)
+
+ umf2 <- unmarkedMultFrame(y=y, siteCovs=sc, yearlySiteCovs=list(x2=ysc),
+ obsCovs=list(x3=oc), numPrimary=T)
+
+ mod2 <- goccu(~x, ~x2, ~x3, umf2)
+ expect_equivalent(coef(mod2), c(0.18895, -0.23629,-0.97246,-0.094335,-0.61808,
+ -0.0040056), tol=1e-5)
+
+ # predict
+ pr <- predict(mod2, 'psi')
+ expect_equal(dim(pr), c(M, 4))
+ expect_equal(pr$Predicted[1], 0.5796617, tol=1e-5)
+
+ # phi should not drop last level
+ pr2 <- predict(mod2, 'phi')
+ expect_equal(dim(pr2), c(M*T, 4))
+
+ nd <- data.frame(x=1)
+ pr3 <- predict(mod2, 'psi', newdata=nd)
+ expect_true(nrow(pr3) == 1)
+ expect_equal(pr3$Predicted[1], 0.488168, tol=1e-5)
+
+ # Other methods
+ ft <- fitted(mod2)
+ expect_equal(dim(ft), dim(umf2@y))
+ expect_true(all(ft >=0 & ft <= 1))
+
+ res <- residuals(mod2)
+ expect_equal(dim(res), dim(umf2@y))
+
+ gp <- getP(mod2)
+ expect_equal(dim(gp), dim(umf2@y))
+ expect_equal(gp[1,1], 0.349239, tol=1e-5)
+
+ set.seed(123)
+ s <- simulate(mod2, nsim=2)
+ expect_equal(length(s), 2)
+ expect_equal(dim(s[[1]]), dim(mod2@data@y))
+ simumf <- umf2
+ simumf@y <- s[[1]]
+ simmod <- update(mod2, data=simumf)
+ expect_equivalent(coef(simmod),
+ c(0.174991, -0.27161, -1.32766, 0.054459,-0.41610,-0.073922), tol=1e-5)
+
+ r <- ranef(mod2)
+ expect_equal(dim(r@post), c(M, 2, 1))
+ expect_equal(sum(bup(r)), 53.13565, tol=1e-4)
+
+ pb <- parboot(mod2, nsim=2)
+ expect_is(pb, "parboot")
+
+ npb <- nonparboot(mod2, B=2, bsType='site')
+
+
+})
+
+test_that("goccu handles missing values", {
+
+ set.seed(123)
+ y2 <- y
+ y2[1,1] <- NA
+ y2[2,1:J] <- NA
+
+ sc <- data.frame(x=rnorm(M))
+ sc$x[3] <- NA
+ ysc <- matrix(rnorm(M*T), M, T)
+ ysc[4,1] <- NA
+ oc <- matrix(rnorm(M*T*J), M, T*J)
+ oc[5,1] <- NA
+ oc[6,1:J] <- NA
+
+ umf_na <- unmarkedMultFrame(y=y2, siteCovs=sc, yearlySiteCovs=list(x2=ysc),
+ obsCovs=list(x3=oc), numPrimary=T)
+
+ mod_na <- expect_warning(goccu(~x, ~x2, ~x3, umf_na))
+
+ pr <- expect_warning(predict(mod_na, 'psi'))
+ expect_equal(nrow(pr), M-1)
+
+ # Need to re-write these to use the design matrix instead of predict
+ gp <- getP(mod_na)
+ expect_equal(dim(gp), c(100, 20))
+ expect_true(is.na(gp[5,1]))
+ expect_true(all(is.na(gp[6, 1:4])))
+ s <- simulate(mod_na)
+ expect_equal(dim(s[[1]]), dim(mod_na@data@y))
+ ft <- fitted(mod_na)
+ expect_equal(dim(ft), dim(mod_na@data@y))
+ r <- ranef(mod_na)
+ expect_equal(dim(r@post), c(100, 2, 1))
+ expect_true(is.na(bup(r)[3]))
+
+ pb <- expect_warning(parboot(mod_na, nsim=2))
+ expect_is(pb, "parboot")
+})