diff options
author | Ken Kellner <kenkellner@users.noreply.github.com> | 2023-08-11 16:21:52 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-11 16:21:52 -0400 |
commit | ac40eede793b0997209117080912a866a2ff18ae (patch) | |
tree | d80c94901adad9211f8cffb782d0e91e9860299b | |
parent | 516906afa4166978133c61541fa32f80ebda4416 (diff) | |
parent | 47dc21f52187912ec7cba52e351b1af256899f44 (diff) |
Merge pull request #261 from rbchan/goccu
Add multistate occupancy model (goccu)
-rw-r--r-- | DESCRIPTION | 5 | ||||
-rw-r--r-- | NAMESPACE | 4 | ||||
-rw-r--r-- | R/goccu.R | 334 | ||||
-rw-r--r-- | man/fitted-methods.Rd | 1 | ||||
-rw-r--r-- | man/getP-methods.Rd | 1 | ||||
-rw-r--r-- | man/goccu.Rd | 105 | ||||
-rw-r--r-- | man/ranef-methods.Rd | 1 | ||||
-rw-r--r-- | man/simulate-methods.Rd | 1 | ||||
-rw-r--r-- | man/unmarkedFit-class.Rd | 1 | ||||
-rw-r--r-- | man/unmarkedMultFrame.Rd | 1 | ||||
-rw-r--r-- | src/TMB/tmb_goccu.hpp | 130 | ||||
-rw-r--r-- | src/TMB/unmarked_TMBExports.cpp | 3 | ||||
-rw-r--r-- | tests/testthat/test_goccu.R | 150 |
13 files changed, 733 insertions, 4 deletions
diff --git a/DESCRIPTION b/DESCRIPTION index b658bba..ef05301 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: unmarked -Version: 1.3.2.9001 -Date: 2023-07-27 +Version: 1.3.2.9002 +Date: 2023-08-11 Type: Package Title: Models for Data from Unmarked Animals Authors@R: c( @@ -51,6 +51,7 @@ Collate: 'classes.R' 'unmarkedEstimate.R' 'mapInfo.R' 'unmarkedFrame.R' 'power.R' 'simulate.R' 'predict.R' + 'goccu.R' 'RcppExports.R' 'zzz.R' LinkingTo: @@ -24,7 +24,7 @@ importFrom(pbapply, pbsapply, pblapply) export(occu, occuFP, occuRN, pcount, pcountOpen, multinomPois, distsamp, colext, gmultmix, gdistsamp, gpcount, occuPEN, occuPEN_CV, occuMulti, occuMS, computeMPLElambda, pcount.spHDS, occuTTD, distsampOpen, - multmixOpen, nmixTTD, gdistremoval) + multmixOpen, nmixTTD, gdistremoval, goccu) export(removalPiFun, doublePiFun) export(makeRemPiFun, makeCrPiFun, makeCrPiFunMb, makeCrPiFunMh) @@ -61,7 +61,7 @@ export(unmarkedEstimate, fitList, mapInfo, unmarkedFrame, unmarkedFrameDS, unmarkedMultFrame, unmarkedFrameGMM, unmarkedFramePCO, unmarkedFrameGDS, unmarkedFrameGPC, unmarkedFrameOccuMulti, unmarkedFrameOccuMS, unmarkedFrameOccuTTD, unmarkedFrameDSO, - unmarkedFrameMMO, unmarkedFrameGDR) + unmarkedFrameMMO, unmarkedFrameGDR, unmarkedFrameGOccu) # Formatting export(csvToUMF, formatLong, formatWide, formatMult, formatDistData) diff --git a/R/goccu.R b/R/goccu.R new file mode 100644 index 0000000..28c97bb --- /dev/null +++ b/R/goccu.R @@ -0,0 +1,334 @@ +setClass("unmarkedFitGOccu", + representation( + formlist = "list"), + contains = "unmarkedFit") + +setClass("unmarkedFrameGOccu", contains = "unmarkedFrameG3") + +setMethod("getDesign", "unmarkedFrameGOccu", + function(umf, formula, na.rm=TRUE){ + out <- methods::callNextMethod(umf, formula=formula, na.rm=na.rm) + names(out)[2] <- "Xpsi" + names(out)[5] <- "Xpsi.offset" + out +}) + +unmarkedFrameGOccu <- function(y, siteCovs=NULL, obsCovs=NULL, numPrimary, + yearlySiteCovs=NULL) { + y[y > 1] <- 1 + if(numPrimary < 2) stop("numPrimary < 2, use occu instead") + umf <- unmarkedFrameGPC(y, siteCovs=siteCovs, obsCovs=obsCovs, + numPrimary=numPrimary, yearlySiteCovs=NULL) + class(umf) <- "unmarkedFrameGOccu" + umf +} + +goccu <- function(psiformula, phiformula, pformula, data, + linkPsi = c("logit", "cloglog"), starts, method = "BFGS", + se = TRUE, ...){ + + linkPsi <- match.arg(linkPsi, c("logit","cloglog")) + psiLinkFunc <- ifelse(linkPsi=="cloglog", cloglog, plogis) + psiInvLink <- ifelse(linkPsi=="cloglog", "cloglog", "logistic") + psiLinkGrad <- ifelse(linkPsi=="cloglog", "cloglog.grad", "logistic.grad") + + # Pass phiformula as gamma/eps formula so it will be applied to + # yearlySiteCovs in getDesign + formlist <- list(psiformula=psiformula, phi=phiformula, + pformula=pformula) + + formula <- as.formula(paste(unlist(formlist), collapse=" ")) + + data@y[data@y > 1] <- 1 + + class(data) <- "unmarkedFrameGOccu" + + # handle offsets + + gd <- getDesign(data, formula = formula) + + y <- gd$y + Xpsi <- gd$Xpsi + Xphi <- gd$Xphi + Xp <- gd$Xdet + + M <- nrow(y) + T <- data@numPrimary + J <- ncol(y) / T + + # Identify entirely missing primary periods at each site + y_array <- array(t(y), c(J, T, M)) + missing_session <- t(apply(y_array, c(2,3), + function(x) as.numeric(all(is.na(x))))) + + # Create possible states in each T + alpha_potential <- as.matrix(expand.grid(rep(list(c(0, 1)), T))) + n_possible <- nrow(alpha_potential) + + # Known present at each site + known_present <- rep(0, M) + # Known available at each site and T + known_available <- matrix(0, nrow=M, ncol=T) + + for (i in 1:M){ + for (t in 1:T){ + for (j in 1:J){ + if(is.na(y_array[j,t,i])) next + if(y_array[j, t, i] == 1){ + known_present[i] <- 1 + known_available[i,t] <- 1 + } + } + } + } + + # Bundle data for TMB + dataList <- list(y=y, T=T, link=ifelse(linkPsi=='cloglog', 1, 0), + Xpsi=Xpsi, Xphi=Xphi, Xp=Xp, + n_possible=n_possible, + alpha_potential=alpha_potential, + known_present=known_present, known_available=known_available, + missing_session=missing_session) + + # Provide dimensions and starting values for parameters + # This part should change to be more like occu() if we add random effects + psi_ind <- 1:ncol(Xpsi) + phi_ind <- 1:ncol(Xphi) + max(psi_ind) + p_ind <- 1:ncol(Xp) + max(phi_ind) + nP <- max(p_ind) + params <- list(beta_psi = rep(0, length(psi_ind)), + beta_phi = rep(0, length(phi_ind)), + beta_p = rep(0, length(p_ind))) + + # Create TMB object + tmb_mod <- TMB::MakeADFun(data = c(model = "tmb_goccu", dataList), + parameters = params, + DLL = "unmarked_TMBExports", silent = TRUE) + + # Optimize TMB object, print and save results + if(missing(starts) || is.null(starts)) starts <- tmb_mod$par + opt <- optim(starts, fn = tmb_mod$fn, gr = tmb_mod$gr, method = method, + hessian = se, ...) + + covMat <- invertHessian(opt, nP, se) + ests <- opt$par + names(ests) <- c(colnames(Xpsi), colnames(Xphi), colnames(Xp)) + fmAIC <- 2 * opt$value + 2 * nP + + + psi_est <- unmarkedEstimate(name = "Occupancy", short.name = "psi", + estimates = ests[psi_ind], + covMat = covMat[psi_ind, psi_ind, drop=FALSE], + fixed = 1:ncol(Xpsi), + invlink = psiInvLink, + invlinkGrad = psiLinkGrad, + randomVarInfo=list() + ) + + phi_est <- unmarkedEstimate(name = "Availability", short.name = "phi", + estimates = ests[phi_ind], + covMat = covMat[phi_ind, phi_ind, drop=FALSE], + fixed = 1:ncol(Xphi), + invlink = "logistic", + invlinkGrad = "logistic.grad", + randomVarInfo=list() + ) + + p_est <- unmarkedEstimate(name = "Detection", short.name = "p", + estimates = ests[p_ind], + covMat = covMat[p_ind, p_ind, drop=FALSE], + fixed = 1:ncol(Xp), + invlink = "logistic", + invlinkGrad = "logistic.grad", + randomVarInfo=list() + ) + + estimate_list <- unmarkedEstimateList(list(psi=psi_est, phi=phi_est, + det=p_est)) + + # Create unmarkedFit object-------------------------------------------------- + umfit <- new("unmarkedFitGOccu", fitType = "goccu", call = match.call(), + formula = formula, formlist=formlist, data = data, + sitesRemoved = gd$removed.sites, + estimates = estimate_list, AIC = fmAIC, opt = opt, + negLogLike = opt$value, + nllFun = tmb_mod$fn, TMB=tmb_mod) + + return(umfit) + +} + +# Methods + +setMethod("predict_inputs_from_umf", "unmarkedFitGOccu", + function(object, type, newdata, na.rm, re.form=NA){ + designMats <- getDesign(newdata, object@formula, na.rm=na.rm) + X_idx <- switch(type, psi="Xpsi", phi="Xphi", det="Xdet") + off_idx <- paste0(X_idx, ".offset") + list(X=designMats[[X_idx]], offset=NULL) +}) + +setMethod("get_formula", "unmarkedFitGOccu", function(object, type, ...){ + fl <- object@formlist + switch(type, psi=fl$psiformula, phi=fl$phiformula, det=fl$pformula) +}) + +setMethod("get_orig_data", "unmarkedFitGOccu", function(object, type, ...){ + clean_covs <- clean_up_covs(object@data, drop_final=FALSE) + datatype <- switch(type, psi='site_covs', phi='yearly_site_covs', + det='obs_covs') + clean_covs[[datatype]] +}) + +setMethod("getP", "unmarkedFitGOccu", + function(object, na.rm=FALSE){ + gd <- getDesign(object@data, object@formula, na.rm=na.rm) + p <- drop(plogis(gd$Xdet %*% coef(object, "det"))) + M <- numSites(object@data) + p <- matrix(p, nrow=M, ncol=obsNum(object@data), + byrow=TRUE) + p +}) + +setMethod("fitted", "unmarkedFitGOccu", + function(object, na.rm= FALSE){ + + M <- numSites(object@data) + JT <- obsNum(object@data) + gd <- getDesign(object@data, object@formula, na.rm=na.rm) + + psi <- drop(plogis(gd$Xpsi %*% coef(object, "psi"))) + psi <- matrix(psi, nrow=M, ncol=JT) + + phi <- drop(plogis(gd$Xphi %*% coef(object, "phi"))) + phi <- rep(phi, each = JT / object@data@numPrimary) + phi <- matrix(phi, nrow=M, ncol=JT, byrow=TRUE) + + p <- getP(object) + + psi * phi * p +}) + + +# based on ranef for GPC +setMethod("ranef", "unmarkedFitGOccu", function(object, ...){ + + M <- numSites(object@data) + JT <- obsNum(object@data) + T <- object@data@numPrimary + J <- JT / T + + gd <- getDesign(object@data, object@formula, na.rm=FALSE) + y_array <- array(t(gd$y), c(J, T, M)) + + psi <- drop(plogis(gd$Xpsi %*% coef(object, "psi"))) + phi <- drop(plogis(gd$Xphi %*% coef(object, "phi"))) + phi <- matrix(phi, nrow=M, ncol=T, byrow=TRUE) + p <- getP(object) + p_array <- array(t(p), c(J, T, M)) + + Z <- ZZ <- 0:1 + post <- array(0, c(M, 2, 1)) + colnames(post) <- Z + + for(i in 1:M) { + f <- dbinom(Z, 1, psi[i]) + + ghi <- rep(0, 2) + + for(t in 1:T) { + gh <- matrix(-Inf, 2, 2) + for(z in Z) { + if(z < max(y_array[,,i], na.rm=TRUE)){ + gh[,z+1] <- -Inf + next + } + if(is.na(phi[i,t])) { + g <- rep(0, 2) + g[ZZ>z] <- -Inf + } else{ + g <- dbinom(ZZ, z, phi[i,t], log=TRUE) + } + h <- rep(0, 2) + for(j in 1:J) { + if(is.na(y_array[j,t,i]) | is.na(p_array[j,t,i])) next + h <- h + dbinom(y_array[j,t,i], ZZ, p_array[j,t,i], log=TRUE) + } + gh[,z+1] <- g + h + } + ghi <- ghi + log(colSums(exp(gh))) + } + fgh <- exp(f + ghi) + prM <- fgh/sum(fgh) + post[i,,1] <- prM + } + + new("unmarkedRanef", post=post) +}) + + +setMethod("simulate", "unmarkedFitGOccu", + function(object, nsim = 1, seed = NULL, na.rm = FALSE){ + + gd <- getDesign(object@data, object@formula, na.rm=FALSE) + M <- nrow(gd$y) + T <- object@data@numPrimary + JT <- ncol(gd$y) + J <- JT / T + y_array <- array(t(gd$y), c(J, T, M)) + + psi <- drop(plogis(gd$Xpsi %*% coef(object, "psi"))) + phi <- drop(plogis(gd$Xphi %*% coef(object, "phi"))) + phi <- matrix(phi, nrow=M, ncol=T, byrow=TRUE) + p <- getP(object) + + sim_list <- list() + + for (i in 1:nsim){ + z <- suppressWarnings(rbinom(M, 1, psi)) + z <- matrix(z, nrow=M, ncol=T) + + zz <- suppressWarnings(rbinom(M*T, 1, phi*z)) + zz <- matrix(zz, M, T) + + colrep <- rep(1:T, each=J) + zz <- zz[,colrep] + + y <- suppressWarnings(rbinom(M*T*J, 1, zz*p)) + y <- matrix(y, M, JT) + if(na.rm) y[which(is.na(gd$y))] <- NA + sim_list[[i]] <- y + } + + return(sim_list) +}) + + +setMethod("update", "unmarkedFitGOccu", + function(object, psiformula, phiformula, pformula, ..., + evaluate = TRUE) +{ + call <- object@call + if (is.null(call)) + stop("need an object with call slot") + formlist <- object@formlist + if (!missing(psiformula)) + call$psiformula <- update.formula(formlist$psiformula, psiformula) + if (!missing(phiformula)) + call$phiformula <- update.formula(formlist$phiformula, phiformula) + if (!missing(pformula)) + call$pformula <- update.formula(formlist$pformula, pformula) + extras <- match.call(call=sys.call(-1), + expand.dots = FALSE)$... + if(length(extras) > 0) { + existing <- !is.na(match(names(extras), names(call))) + for (a in names(extras)[existing]) call[[a]] <- extras[[a]] + if (any(!existing)) { + call <- c(as.list(call), extras[!existing]) + call <- as.call(call) + } + } + if (evaluate) + eval(call, parent.frame(2)) + else call +}) diff --git a/man/fitted-methods.Rd b/man/fitted-methods.Rd index a06b930..8868565 100644 --- a/man/fitted-methods.Rd +++ b/man/fitted-methods.Rd @@ -15,6 +15,7 @@ \alias{fitted,unmarkedFitGMM-method} \alias{fitted,unmarkedFitGDR-method} \alias{fitted,unmarkedFitDailMadsen-method} +\alias{fitted,unmarkedFitGOccu-method} \title{Methods for Function fitted in Package `unmarked'} \description{Extracted fitted values from a fitted model. } diff --git a/man/getP-methods.Rd b/man/getP-methods.Rd index f44ec68..c554ebe 100644 --- a/man/getP-methods.Rd +++ b/man/getP-methods.Rd @@ -17,6 +17,7 @@ \alias{getP,unmarkedFitDSO-method} \alias{getP,unmarkedFitMMO-method} \alias{getP,unmarkedFitGDR-method} +\alias{getP,unmarkedFitGOccu-method} \title{Methods for Function getP in Package `unmarked'} \description{ Methods for function \code{getP} in Package `unmarked'. These methods diff --git a/man/goccu.Rd b/man/goccu.Rd new file mode 100644 index 0000000..ef9f08c --- /dev/null +++ b/man/goccu.Rd @@ -0,0 +1,105 @@ +\name{goccu} +\alias{goccu} +\title{ +Fit multi-scale occupancy models +} +\description{ +Fit multi-scale occupancy models as described in Nichols et al. (2008) to +repeated presence-absence data collected using the robust design. This model +allows for inference about occupancy, availability, and detection probability. +} +\usage{ +goccu(psiformula, phiformula, pformula, data, linkPsi = c("logit", "cloglog"), + starts, method = "BFGS", se = TRUE, ...) +} +\arguments{ + \item{psiformula}{ + Right-hand sided formula describing occupancy covariates +} + \item{phiformula}{ + Right-hand sided formula describing availability covariates +} + \item{pformula}{ + Right-hand sided formula for detection probability covariates +} + \item{data}{ + An object of class unmarkedFrameGOccu or unmarkedMultFrame +} +\item{linkPsi}{Link function for the occupancy model. Options are + \code{"logit"} for the standard occupancy model or \code{"cloglog"} + for the complimentary log-log link, which relates occupancy + to site-level abundance. +} +\item{starts}{ + Starting values +} + \item{method}{ + Optimization method used by \code{\link{optim}} +} + \item{se}{ + Logical. Should standard errors be calculated? +} +\item{\dots}{ + Additional arguments to \code{\link{optim}}, such as lower and upper + bounds +} +} +\details{ + Primary periods could represent spatial or temporal sampling replicates. + For example, you could have several spatial sub-units within each site, where each + sub-unit was then sampled repeatedly. This is a frequent design for eDNA studies. + Or, you could have multiple primary periods of sampling at each site + (conducted at different times within a season), each of which contains + several secondary sampling periods. In both cases the robust design structure + can be used to estimate an availability probability in addition to + detection probability. See Kery and Royle (2015) 10.10 for more details. +} +\value{ + An object of class unmarkedFitGOccu +} +\references{ + Kery, M., & Royle, J. A. (2015). Applied hierarchical modeling in ecology: + Volume 1: Prelude and static models. Elsevier Science. + + Nichols, J. D., Bailey, L. L., O'Connell Jr, A. F., Talancy, N. W., + Campbell Grant, E. H., Gilbert, A. T., Annand E. M., Husband, T. P., & Hines, J. E. + (2008). Multi-scale occupancy estimation and modelling using multiple detection methods. + Journal of Applied Ecology, 45(5), 1321-1329. +} +\author{ + Ken Kellner \email{contact@kenkellner.com} +} + +\seealso{ +\code{\link{occu}}, \code{\link{colext}}, + \code{\link{unmarkedMultFrame}}, \code{\link{unmarkedFrameGOccu}} +} + +\examples{ + +set.seed(123) +M <- 100 +T <- 5 +J <- 4 + +psi <- 0.5 +phi <- 0.3 +p <- 0.4 + +z <- rbinom(M, 1, psi) +zmat <- matrix(z, nrow=M, ncol=T) + +zz <- rbinom(M*T, 1, zmat*phi) +zz <- matrix(zz, nrow=M, ncol=T) + +zzmat <- zz[,rep(1:T, each=J)] +y <- rbinom(M*T*J, 1, zzmat*p) +y <- matrix(y, M, J*T) +umf <- unmarkedMultFrame(y=y, numPrimary=T) + +\dontrun{ + mod <- goccu(psiformula = ~1, phiformula = ~1, pformula = ~1, umf) + plogis(coef(mod)) +} + +} diff --git a/man/ranef-methods.Rd b/man/ranef-methods.Rd index abf0c68..c38e82e 100644 --- a/man/ranef-methods.Rd +++ b/man/ranef-methods.Rd @@ -20,6 +20,7 @@ \alias{ranef,unmarkedFitNmixTTD-method} \alias{ranef,unmarkedFitGDR-method} \alias{ranef,unmarkedFitDailMadsen-method} +\alias{ranef,unmarkedFitGOccu-method} \title{ Methods for Function \code{ranef} in Package \pkg{unmarked} } \description{ Estimate posterior distributions of the random variables (latent diff --git a/man/simulate-methods.Rd b/man/simulate-methods.Rd index 67d942d..fc8a008 100644 --- a/man/simulate-methods.Rd +++ b/man/simulate-methods.Rd @@ -18,6 +18,7 @@ \alias{simulate,unmarkedFitGPC-method} \alias{simulate,unmarkedFitGDR-method} \alias{simulate,unmarkedFitDailMadsen-method} +\alias{simulate,unmarkedFitGOccu-method} \alias{simulate,character-method} \title{Methods for Function simulate in Package `unmarked'} \description{ diff --git a/man/unmarkedFit-class.Rd b/man/unmarkedFit-class.Rd index 91e1723..4d7aa8f 100644 --- a/man/unmarkedFit-class.Rd +++ b/man/unmarkedFit-class.Rd @@ -35,6 +35,7 @@ \alias{update,unmarkedFitNmixTTD-method} \alias{update,unmarkedFitGDR-method} \alias{update,unmarkedFitDailMadsen-method} +\alias{update,unmarkedFitGOccu-method} \alias{sampleSize} \alias{sampleSize,unmarkedFit-method} \alias{unmarkedFitOccu-class} diff --git a/man/unmarkedMultFrame.Rd b/man/unmarkedMultFrame.Rd index 622b3ff..383b905 100644 --- a/man/unmarkedMultFrame.Rd +++ b/man/unmarkedMultFrame.Rd @@ -8,6 +8,7 @@ \alias{unmarkedFrameGMM} \alias{unmarkedFrameGDS} \alias{unmarkedFrameGPC} +\alias{unmarkedFrameGOccu} \title{Create an unmarkedMultFrame, unmarkedFrameGMM, unmarkedFrameGDS, or unmarkedFrameGPC object} diff --git a/src/TMB/tmb_goccu.hpp b/src/TMB/tmb_goccu.hpp new file mode 100644 index 0000000..7788ad6 --- /dev/null +++ b/src/TMB/tmb_goccu.hpp @@ -0,0 +1,130 @@ +#undef TMB_OBJECTIVE_PTR +#define TMB_OBJECTIVE_PTR obj + +// Adapted from Stan code by Maxwell B. Joseph, +// https://discourse.mc-stan.org/t/divergent-transition-every-iteration-multi-scale-occupancy-model/13739/5 + +// name of function below **MUST** match filename +template<class Type> +Type tmb_goccu(objective_function<Type>* obj) { + //Describe input data + DATA_MATRIX(y); //observations + DATA_INTEGER(T); + + DATA_INTEGER(link); + + DATA_MATRIX(Xpsi); + DATA_MATRIX(Xphi); + DATA_MATRIX(Xp); + + DATA_INTEGER(n_possible); + DATA_MATRIX(alpha_potential); + DATA_VECTOR(known_present); + DATA_MATRIX(known_available); + DATA_MATRIX(missing_session); + + PARAMETER_VECTOR(beta_psi); + PARAMETER_VECTOR(beta_phi); + PARAMETER_VECTOR(beta_p); + + vector<Type> psi = Xpsi * beta_psi; + vector<Type> phi = Xphi * beta_phi; + vector<Type> p = Xp * beta_p; + + if(link == 1){ + //psi = cloglog(psi); + } else { + psi = invlogit(psi); + } + + phi = invlogit(phi); + p = invlogit(p); + + Type loglik = 0.0; + + int M = y.rows(); + int J = y.cols() / T; + + Type obs_lp; + Type poss_lp; + Type exp_poss_lp; + + int ystart; + vector<Type> ysite; + vector<Type> psite; + vector<Type> ysub; + vector<Type> psub; + + int tstart; + int pstart; + + for (int i=0; i<M; i++){ + + tstart = i*T; + pstart = i*(T*J); + + ysite = y.row(i); + psite = p.segment(pstart, T*J); + + if(known_present(i) == 1){ + for (int t=0; t<T; t++){ + + if(missing_session(i, t) == 1) continue; + + ystart = t*J; + + psub = psite.segment(ystart, J); + ysub = ysite.segment(ystart, J); + + obs_lp = log(phi(tstart+t)); + for (int j=0; j<J; j++){ + if(R_IsNA(asDouble(ysub(j)))) continue; + obs_lp += dbinom(ysub(j), Type(1), psub(j), true); + } + if(known_available(i, t) == 1){ + loglik += obs_lp; + } else { + loglik += log(exp(obs_lp) + exp(log(1-phi(tstart+t)))); + } + } + loglik += log(psi(i)); + } else { + + exp_poss_lp = 0.0; + + for (int k=0; k<n_possible; k++){ + poss_lp = log(psi(i)); + + for (int t=0; t<T; t++){ + + if(missing_session(i, t) == 1) continue; + + ystart = t*J; + + psub = psite.segment(ystart, J); + ysub = ysite.segment(ystart, J); + + if(alpha_potential(k, t) == 0){ + poss_lp += log(1 - phi(tstart+t)); + } else { + poss_lp += log(phi(tstart+t)); + for (int j=0; j<J; j++){ + if(R_IsNA(asDouble(ysub(j)))) continue; + poss_lp += dbinom(ysub(j), Type(1), psub(j), true); + } + } + } + exp_poss_lp += exp(poss_lp); + } + exp_poss_lp += exp(log(1-psi(i))); + + loglik += log(exp_poss_lp); + } + } + + return -loglik; + +} + +#undef TMB_OBJECTIVE_PTR +#define TMB_OBJECTIVE_PTR this diff --git a/src/TMB/unmarked_TMBExports.cpp b/src/TMB/unmarked_TMBExports.cpp index 5c1bfea..6aa57de 100644 --- a/src/TMB/unmarked_TMBExports.cpp +++ b/src/TMB/unmarked_TMBExports.cpp @@ -9,6 +9,7 @@ #include "tmb_multinomPois.hpp" #include "tmb_distsamp.hpp" #include "tmb_gdistremoval.hpp" +#include "tmb_goccu.hpp" template<class Type> Type objective_function<Type>::operator() () { @@ -23,6 +24,8 @@ Type objective_function<Type>::operator() () { return tmb_distsamp(this); } else if(model == "tmb_gdistremoval"){ return tmb_gdistremoval(this); + } else if(model == "tmb_goccu"){ + return tmb_goccu(this); } else { error("Unknown model."); } diff --git a/tests/testthat/test_goccu.R b/tests/testthat/test_goccu.R new file mode 100644 index 0000000..4572515 --- /dev/null +++ b/tests/testthat/test_goccu.R @@ -0,0 +1,150 @@ +context("goccu fitting function") +skip_on_cran() + +set.seed(123) +M <- 100 +T <- 5 +J <- 4 + +psi <- 0.5 +phi <- 0.3 +p <- 0.4 + +z <- rbinom(M, 1, psi) +zmat <- matrix(z, nrow=M, ncol=T) + +zz <- rbinom(M*T, 1, zmat*phi) +zz <- matrix(zz, nrow=M, ncol=T) + +zzmat <- zz[,rep(1:T, each=J)] +y <- rbinom(M*T*J, 1, zzmat*p) +y <- matrix(y, M, J*T) +umf <- unmarkedMultFrame(y=y, numPrimary=T) + +unmarkedFrameGOccu <- function(y, siteCovs=NULL, obsCovs=NULL, numPrimary, + yearlySiteCovs=NULL) { + y[y > 1] <- 1 + if(numPrimary < 2) stop("numPrimary < 2, use occu instead") + umf <- unmarkedFrameGPC(y, siteCovs=siteCovs, obsCovs=obsCovs, + numPrimary=numPrimary, yearlySiteCovs=NULL) + class(umf) <- "unmarkedFrameGOccu" + umf +} + +test_that("unmarkedFrameGOccu can be constructed", { + set.seed(123) + sc <- data.frame(x=rnorm(M)) + ysc <- matrix(rnorm(M*T), M, T) + oc <- matrix(rnorm(M*T*J), M, T*J) + + umf2 <- unmarkedFrameGOccu(y, siteCovs=sc, obsCovs=list(x2=oc), + yearlySiteCovs=list(x3=ysc), numPrimary=T) + expect_is(umf2, "unmarkedFrameGOccu") +}) + +test_that("goccu can fit models", { + + # Without covariates + mod <- goccu(~1, ~1, ~1, umf) + expect_equivalent(coef(mod), c(0.16129, -0.97041, -0.61784), tol=1e-5) + + # With covariates + set.seed(123) + sc <- data.frame(x=rnorm(M)) + ysc <- matrix(rnorm(M*T), M, T) + oc <- matrix(rnorm(M*T*J), M, T*J) + + umf2 <- unmarkedMultFrame(y=y, siteCovs=sc, yearlySiteCovs=list(x2=ysc), + obsCovs=list(x3=oc), numPrimary=T) + + mod2 <- goccu(~x, ~x2, ~x3, umf2) + expect_equivalent(coef(mod2), c(0.18895, -0.23629,-0.97246,-0.094335,-0.61808, + -0.0040056), tol=1e-5) + + # predict + pr <- predict(mod2, 'psi') + expect_equal(dim(pr), c(M, 4)) + expect_equal(pr$Predicted[1], 0.5796617, tol=1e-5) + + # phi should not drop last level + pr2 <- predict(mod2, 'phi') + expect_equal(dim(pr2), c(M*T, 4)) + + nd <- data.frame(x=1) + pr3 <- predict(mod2, 'psi', newdata=nd) + expect_true(nrow(pr3) == 1) + expect_equal(pr3$Predicted[1], 0.488168, tol=1e-5) + + # Other methods + ft <- fitted(mod2) + expect_equal(dim(ft), dim(umf2@y)) + expect_true(all(ft >=0 & ft <= 1)) + + res <- residuals(mod2) + expect_equal(dim(res), dim(umf2@y)) + + gp <- getP(mod2) + expect_equal(dim(gp), dim(umf2@y)) + expect_equal(gp[1,1], 0.349239, tol=1e-5) + + set.seed(123) + s <- simulate(mod2, nsim=2) + expect_equal(length(s), 2) + expect_equal(dim(s[[1]]), dim(mod2@data@y)) + simumf <- umf2 + simumf@y <- s[[1]] + simmod <- update(mod2, data=simumf) + expect_equivalent(coef(simmod), + c(0.174991, -0.27161, -1.32766, 0.054459,-0.41610,-0.073922), tol=1e-5) + + r <- ranef(mod2) + expect_equal(dim(r@post), c(M, 2, 1)) + expect_equal(sum(bup(r)), 53.13565, tol=1e-4) + + pb <- parboot(mod2, nsim=2) + expect_is(pb, "parboot") + + npb <- nonparboot(mod2, B=2, bsType='site') + + +}) + +test_that("goccu handles missing values", { + + set.seed(123) + y2 <- y + y2[1,1] <- NA + y2[2,1:J] <- NA + + sc <- data.frame(x=rnorm(M)) + sc$x[3] <- NA + ysc <- matrix(rnorm(M*T), M, T) + ysc[4,1] <- NA + oc <- matrix(rnorm(M*T*J), M, T*J) + oc[5,1] <- NA + oc[6,1:J] <- NA + + umf_na <- unmarkedMultFrame(y=y2, siteCovs=sc, yearlySiteCovs=list(x2=ysc), + obsCovs=list(x3=oc), numPrimary=T) + + mod_na <- expect_warning(goccu(~x, ~x2, ~x3, umf_na)) + + pr <- expect_warning(predict(mod_na, 'psi')) + expect_equal(nrow(pr), M-1) + + # Need to re-write these to use the design matrix instead of predict + gp <- getP(mod_na) + expect_equal(dim(gp), c(100, 20)) + expect_true(is.na(gp[5,1])) + expect_true(all(is.na(gp[6, 1:4]))) + s <- simulate(mod_na) + expect_equal(dim(s[[1]]), dim(mod_na@data@y)) + ft <- fitted(mod_na) + expect_equal(dim(ft), dim(mod_na@data@y)) + r <- ranef(mod_na) + expect_equal(dim(r@post), c(100, 2, 1)) + expect_true(is.na(bup(r)[3])) + + pb <- expect_warning(parboot(mod_na, nsim=2)) + expect_is(pb, "parboot") +}) |