Add ZIP support to gpcountgdistsamp_ZIP

author: Ken Kellner <ken@kenkellner.com> 2023-07-27 14:27:02 -0400
committer: Ken Kellner <ken@kenkellner.com> 2023-07-27 14:47:19 -0400
commit: ae793d82bf05b88826bdad4ccb1f685fc7c68def (patch)
tree: 2aa38ffb6f9e78cdd4d0164f19589bd9336c3983
parent: 3c1b8513915579b8420b4f17490d8343f2fbbccc (diff)
6 files changed, 83 insertions, 19 deletions
diff --git a/DESCRIPTION b/DESCRIPTION
index ba0d28e..b658bba 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: unmarked
-Version: 1.3.2
-Date: 2023-07-06
+Version: 1.3.2.9001
+Date: 2023-07-27
 Type: Package
 Title: Models for Data from Unmarked Animals
 Authors@R: c(
diff --git a/R/gpcount.R b/R/gpcount.R
index 2b7afd6..af4dade 100644
--- a/R/gpcount.R
+++ b/R/gpcount.R
@@ -1,15 +1,13 @@
 
 # data will need to be an unmarkedMultFrame
 gpcount <- function(lambdaformula, phiformula, pformula, data,
-    mixture=c('P', 'NB'), K, starts, method = "BFGS", se = TRUE,
+    mixture=c('P', 'NB', 'ZIP'), K, starts, method = "BFGS", se = TRUE,
     engine=c('C', 'R'), threads=1, ...)
 {
 if(!is(data, "unmarkedFrameGPC"))
     stop("Data is not of class unmarkedFrameGPC.")
 mixture <- match.arg(mixture)
 engine <- match.arg(engine)
-if(identical(mixture, "ZIP") & identical(engine, "R"))
-    stop("ZIP mixture not available when 'engine=R'")
 
 formlist <- list(lambdaformula = lambdaformula, phiformula = phiformula,
     pformula = pformula)
@@ -49,7 +47,7 @@ nLP <- ncol(Xlam)
 nPP <- ncol(Xphi)
 phiPars <- colnames(Xphi)
 nDP <- ncol(Xdet)
-nP <- nLP + nPP + nDP + (mixture=='NB')
+nP <- nLP + nPP + nDP + (mixture%in%c('NB','ZIP'))
 if(!missing(starts) && length(starts) != nP)
     stop("There should be", nP, "starting values, not", length(starts))
 
@@ -66,8 +64,9 @@ nll <- function(pars) {
     for(i in 1:I) {
         f <- switch(mixture,
             P = dpois(M, lam[i], log=TRUE),
-            NB = dnbinom(M, mu=lam[i], size=exp(pars[nP]), log=TRUE))
-#            ZIP = dzip())
+            NB = dnbinom(M, mu=lam[i], size=exp(pars[nP]), log=TRUE),
+            ZIP = log(dzip(M, lambda=lam[i], psi=plogis(pars[nP])))
+        )
         ghi <- rep(0, lM)
         for(t in 1:T) {
             gh <- matrix(-Inf, lM, lM)
@@ -118,8 +117,7 @@ covMat <- invertHessian(fm, nP, se)
 ests <- fm$par
 fmAIC <- 2 * fm$value + 2 * nP
 
-if(identical(mixture,"NB")) nbParm <- "alpha"
-	else nbParm <- character(0)
+nbParm <- switch(mixture, P={character(0)}, NB={"alpha"}, ZIP={"psi"})
 
 names(ests) <- c(lamPars, phiPars, detPars, nbParm)
 
@@ -150,6 +148,13 @@ if(identical(mixture,"NB"))
         covMat = as.matrix(covMat[nP, nP]), invlink = "exp",
         invlinkGrad = "exp")
 
+if(identical(mixture,"ZIP")) {
+    estimateList@estimates$psi <- unmarkedEstimate(name="Zero-inflation",
+        short.name = "psi", estimates = ests[nP],
+        covMat=as.matrix(covMat[nP, nP]), invlink = "logistic",
+        invlinkGrad = "logistic.grad")
+}
+
 umfit <- new("unmarkedFitGPC", fitType = "gpcount",
     call = match.call(), formula = form, formlist = formlist,
     data = data, estimates = estimateList, sitesRemoved = D$removed.sites,
diff --git a/R/ranef.R b/R/ranef.R
index 5f0928a..f99efaf 100644
--- a/R/ranef.R
+++ b/R/ranef.R
@@ -531,11 +531,15 @@ setMethod("ranef", "unmarkedFitGPC",
     mix <- object@mixture
     if(identical(mix, "NB"))
         alpha <- exp(coef(object, type="alpha"))
+    if(identical(mix, "ZIP"))
+        psi <- plogis(coef(object, type="psi"))
+
     for(i in 1:R) {
         switch(mix,
                P  = f <- dpois(M, lambda[i]),
-               # FIXME: Add ZIP
-               NB = f <- dnbinom(M, mu=lambda[i], size=alpha))
+               NB = f <- dnbinom(M, mu=lambda[i], size=alpha),
+               ZIP = f <- dzip(M, lambda=lambda[i], psi=psi)
+        )
         ghi <- rep(0, lM)
         for(t in 1:T) {
             gh <- matrix(-Inf, lM, lM)
diff --git a/R/unmarkedFit.R b/R/unmarkedFit.R
index 161616e..95c832c 100644
--- a/R/unmarkedFit.R
+++ b/R/unmarkedFit.R
@@ -2945,10 +2945,15 @@ setMethod("simulate", "unmarkedFitGPC",
     simList <- list()
     for(s in 1:nsim) {
         switch(mixture,
-               P = M <- rpois(n=R, lambda=lam),
+            P = M <- rpois(n=R, lambda=lam),
                #               FIXME: Add ZIP
-               NB = M <- rnbinom(n=R, mu=lam,
-               size=exp(coef(object, type="alpha"))))
+            NB = M <- rnbinom(n=R, mu=lam,
+               size=exp(coef(object, type="alpha"))),   
+            ZIP = {
+              psi <- plogis(coef(object['psi']))
+              M <- rzip(R, lambda=lam, psi=psi)
+            }
+        )
 
         N <- rbinom(R*T, size=M, prob=phi.mat)
         N <- matrix(N, nrow=R, ncol=T, byrow=FALSE)
diff --git a/man/gpcount.Rd b/man/gpcount.Rd
index f847baa..b7e3b1a 100644
--- a/man/gpcount.Rd
+++ b/man/gpcount.Rd
@@ -10,7 +10,7 @@ population size, availability, and detection probability.
 }
 \usage{
 gpcount(lambdaformula, phiformula, pformula, data,
-mixture = c("P", "NB"), K, starts, method = "BFGS", se = TRUE,
+mixture = c("P", "NB", "ZIP"), K, starts, method = "BFGS", se = TRUE,
 engine = c("C", "R"), threads=1, ...)
 }
 \arguments{
@@ -27,7 +27,8 @@ engine = c("C", "R"), threads=1, ...)
     An object of class unmarkedFrameGPC
 }
   \item{mixture}{
-    Either "P" or "NB" for Poisson and negative binomial distributions
+    Either "P", "NB", or "ZIP" for Poisson, negative binomial, or 
+    zero-inflated Poisson distributions
 }
   \item{K}{
     The maximum possible value of M, the super-population size.
@@ -58,11 +59,13 @@ engine = c("C", "R"), threads=1, ...)
 \details{
   The latent transect-level super-population abundance distribution
   \eqn{f(M | \mathbf{\theta})}{f(M | theta)} can be set as either a
-  Poisson or a negative binomial random variable, depending on the
+  Poisson, negative binomial, or zero-inflated Poisson random variable, depending on the
   setting of the \code{mixture} argument. The expected value of
   \eqn{M_i} is \eqn{\lambda_i}{lambda_i}.  If \eqn{M_i \sim NB}{M_i ~ NB},
   then an additional parameter, \eqn{\alpha}{alpha}, describes
-  dispersion (lower \eqn{\alpha}{alpha} implies higher variance).
+  dispersion (lower \eqn{\alpha}{alpha} implies higher variance). If 
+  \eqn{M_i \sim ZIP}{M_i ~ ZIP}, then an additional zero-inflation parameter
+  \eqn{\psi}{psi} is estimated.
 
   The number of individuals available for detection at time j
   is a modeled as binomial:
diff --git a/tests/testthat/test_gpcount.R b/tests/testthat/test_gpcount.R
index 022c2ce..7f23a76 100644
--- a/tests/testthat/test_gpcount.R
+++ b/tests/testthat/test_gpcount.R
@@ -117,3 +117,50 @@ test_that("gpcount R and C++ engines give same results",{
   fmR <- gpcount(~x, ~yr, ~o1, data = umf, K=23, engine="R", control=list(maxit=1))
   expect_equal(coef(fm), coef(fmR))
 })
+
+test_that("gpcount ZIP mixture works", {
+  
+  set.seed(123)
+  M <- 100
+  J <- 5
+  T <- 3
+  lam <- 3
+  psi <- 0.3
+  p <- 0.5
+  phi <- 0.7
+
+  y <- array(NA, c(M, J, T))
+
+  N <- unmarked:::rzip(M, lambda=lam, psi=psi)
+
+  for (i in 1:M){
+    for (t in 1:T){
+      n <- rbinom(1, N[i], phi)
+      for (j in 1:J){
+         y[i,j,t] <- rbinom(1, n, p)
+      }
+    }
+  }
+
+  ywide <- cbind(y[,,1], y[,,2], y[,,3])
+  umf <- unmarkedFrameGPC(y=ywide, numPrimary=T)
+  
+  # check R and C engines match
+  fitC <- gpcount(~1, ~1, ~1, umf, mixture="ZIP", K=10, engine="C",
+                se=FALSE, control=list(maxit=1))
+  fitR <- gpcount(~1, ~1, ~1, umf, mixture="ZIP", K=10, engine="R",
+                se=FALSE, control=list(maxit=1))
+  expect_equal(coef(fitC), coef(fitR))
+  
+  # Properly fit model
+  fit <- gpcount(~1, ~1, ~1, umf, mixture="ZIP", K=10)
+  expect_equivalent(coef(fit), c(1.02437, 0.85104, -0.019588, -1.16139), tol=1e-4)
+
+  # Check methods
+  ft <- fitted(fit)
+  r <- ranef(fit)
+  b <- bup(r)
+  #plot(N, b)
+  #abline(a=0, b=1)
+  s <- simulate(fit)
+})
author	Ken Kellner <ken@kenkellner.com>	2023-07-27 14:27:02 -0400
committer	Ken Kellner <ken@kenkellner.com>	2023-07-27 14:47:19 -0400
commit	ae793d82bf05b88826bdad4ccb1f685fc7c68def (patch)
tree	2aa38ffb6f9e78cdd4d0164f19589bd9336c3983
parent	3c1b8513915579b8420b4f17490d8343f2fbbccc (diff)