Add ZIP support to gmultmix

author: Ken Kellner <ken@kenkellner.com> 2023-07-27 13:51:26 -0400
committer: Ken Kellner <ken@kenkellner.com> 2023-07-27 14:02:15 -0400
commit: 3c1b8513915579b8420b4f17490d8343f2fbbccc (patch)
tree: 3b391be8260a45b18f1c487c02ba420f9386c7d6
parent: a0bb14f8b1b3d4fbc59df6a02dca7553a2f5b319 (diff)
4 files changed, 75 insertions, 23 deletions
diff --git a/R/gmultmix.R b/R/gmultmix.R
index 6dabe9b..a6da9d5 100644
--- a/R/gmultmix.R
+++ b/R/gmultmix.R
@@ -1,7 +1,7 @@
 
 # data will need to be an unmarkedMultFrame
 gmultmix <- function(lambdaformula, phiformula, pformula, data,
-    mixture=c('P', 'NB'), K, starts, method = "BFGS", se = TRUE,
+    mixture=c("P", "NB", "ZIP"), K, starts, method = "BFGS", se = TRUE,
     engine=c("C","R"), threads=1, ...)
 {
 if(!is(data, "unmarkedFrameGMM"))
@@ -59,7 +59,7 @@ if(T==1) {
     phiPars <- colnames(Xphi)
     }
 nDP <- ncol(Xdet)
-nP <- nLP + nPP + nDP + (mixture=='NB')
+nP <- nLP + nPP + nDP + (mixture%in%c('NB','ZIP'))
 if(!missing(starts) && length(starts) != nP)
     stop(paste("The number of starting values should be", nP))
 
@@ -102,9 +102,10 @@ nll_R <- function(pars) {
     cp[,,R+1] <- 1 - apply(cp[,,1:R,drop=FALSE], 1:2, sum, na.rm=TRUE)
 
     switch(mixture,
-        P = f <- sapply(k, function(x) dpois(x, lambda)),
-        NB = f <- sapply(k, function(x) dnbinom(x, mu=lambda,
-            size=exp(pars[nP]))))
+      P = f <- sapply(k, function(x) dpois(x, lambda)),
+      NB = f <- sapply(k, function(x) dnbinom(x, mu=lambda, size=exp(pars[nP]))),
+      ZIP = f <- sapply(k, function(x) dzip(rep(x, length(lambda)), lambda=lambda, psi=plogis(pars[nP])))
+    )
     g <- matrix(as.numeric(NA), M, lk)
     for(i in 1:M) {
         A <- matrix(0, lk, T)
@@ -134,8 +135,8 @@ if(engine=="R"){
   kmytC <- kmyt
   kmytC[which(is.na(kmyt))] <- 0
 
-  mixture_code <- switch(mixture, P={1}, NB={2})
-  n_param <- c(nLP, nPP, nDP, mixture=="NB")
+  mixture_code <- switch(mixture, P={1}, NB={2}, ZIP={3})
+  n_param <- c(nLP, nPP, nDP, mixture%in%c("NB","ZIP"))
   Kmin <- apply(yt, 1, max, na.rm=TRUE)
 
   nll <- function(params) {
@@ -157,8 +158,7 @@ covMat <- invertHessian(fm, nP, se)
 ests <- fm$par
 fmAIC <- 2 * fm$value + 2 * nP
 
-if(identical(mixture,"NB")) nbParm <- "alpha"
-	else nbParm <- character(0)
+nbParm <- switch(mixture, P={character(0)}, NB={"alpha"}, ZIP={"psi"})
 
 names(ests) <- c(lamPars, phiPars, detPars, nbParm)
 
@@ -192,6 +192,13 @@ if(identical(mixture,"NB"))
         covMat = as.matrix(covMat[nP, nP]), invlink = "exp",
         invlinkGrad = "exp")
 
+if(identical(mixture,"ZIP")) {
+    estimateList@estimates$psi <- unmarkedEstimate(name="Zero-inflation",
+        short.name = "psi", estimates = ests[nP],
+        covMat=as.matrix(covMat[nP, nP]), invlink = "logistic",
+        invlinkGrad = "logistic.grad")
+}
+
 umfit <- new("unmarkedFitGMM", fitType = "gmn",
     call = match.call(), formula = form, formlist = formlist,
     data = data, estimates = estimateList, sitesRemoved = D$removed.sites,
diff --git a/R/unmarkedFit.R b/R/unmarkedFit.R
index a856288..161616e 100644
--- a/R/unmarkedFit.R
+++ b/R/unmarkedFit.R
@@ -2874,7 +2874,12 @@ setMethod("simulate", "unmarkedFitGMM",
         switch(mixture,
             P = M <- rpois(n=n, lambda=lam),
             NB = M <- rnbinom(n=n, mu=lam,
-                size=exp(coef(object, type="alpha"))))
+                size=exp(coef(object, type="alpha"))),
+            ZIP = {
+              psi <- plogis(coef(object['psi']))
+              M <- rzip(n, lambda=lam, psi=psi)
+            }
+        )
 
         N <- rbinom(n*T, size=M, prob=phi.mat)
         # bug fix 3/16/2010
diff --git a/man/gmultmix.Rd b/man/gmultmix.Rd
index b54701c..40f6007 100644
--- a/man/gmultmix.Rd
+++ b/man/gmultmix.Rd
@@ -9,7 +9,7 @@ The three model parameters are abundance, availability, and detection
 probability.
 }
 \usage{
-gmultmix(lambdaformula, phiformula, pformula, data, mixture = c("P", "NB"), K, 
+gmultmix(lambdaformula, phiformula, pformula, data, mixture = c("P", "NB", "ZIP"), K, 
          starts, method = "BFGS", se = TRUE, engine=c("C","R"), threads=1, ...)
 }
 \arguments{
@@ -18,8 +18,8 @@ gmultmix(lambdaformula, phiformula, pformula, data, mixture = c("P", "NB"), K,
     \item{phiformula}{RHS formula describing availability covariates}
     \item{pformula}{RHS formula describing detection covariates}
     \item{data}{An object of class unmarkedFrameGMM}
-    \item{mixture}{Either "P" or "NB" for Poisson and Negative Binomial mixing
-        distributions.}
+    \item{mixture}{Either "P", "NB", or "ZIP" for the Poisson, negative binomial, or
+      zero-inflated Poisson models of abundance}
     \item{K}{The upper bound of integration}
     \item{starts}{Starting values}
     \item{method}{Optimization method used by \code{\link{optim}}}
@@ -35,16 +35,16 @@ gmultmix(lambdaformula, phiformula, pformula, data, mixture = c("P", "NB"), K,
     }
 \details{
 The latent transect-level super-population abundance distribution
-\eqn{f(M | \mathbf{\theta})}{f(M | theta)} can be set as either a
-Poisson or
-a negative binomial random variable, depending on the setting of the
-\code{mixture} argument. \code{mixture = "P"} or \code{mixture = "NB"}
-select
-the Poisson or negative binomial distribution respectively.  The mean of
-\eqn{M_i} is \eqn{\lambda_i}{lambda_i}.  If \eqn{M_i \sim NB}{M_i ~ NB},
-then an
-additional parameter, \eqn{\alpha}{alpha}, describes dispersion (lower
-\eqn{\alpha}{alpha} implies higher variance).
+\eqn{f(M | \mathbf{\theta})}{f(M | theta)} can be set as a
+Poisson, negative binomial, or zero-inflated Poisson random variable, 
+depending on the setting of the \code{mixture} argument. 
+\code{mixture = "P"}, \code{mixture = "NB"}, and \code{mixture = "ZIP"}
+select the Poisson, negative binomial, and zero-inflated Poisson distributions 
+respectively.  The mean of \eqn{M_i} is \eqn{\lambda_i}{lambda_i}.  
+If \eqn{M_i \sim NB}{M_i ~ NB}, then an additional parameter, \eqn{\alpha}{alpha}, 
+describes dispersion (lower \eqn{\alpha}{alpha} implies higher variance). If 
+\eqn{M_i \sim ZIP}{M_i ~ ZIP}, then an additional zero-inflation parameter
+\eqn{\psi}{psi} is estimated.
 
 The number of individuals available for detection at time j
 is a modeled as binomial:
diff --git a/tests/testthat/test_gmultmix.R b/tests/testthat/test_gmultmix.R
index 8389b66..5caafa6 100644
--- a/tests/testthat/test_gmultmix.R
+++ b/tests/testthat/test_gmultmix.R
@@ -287,3 +287,43 @@ test_that("getP works when there is only 1 site", {
   expect_equal(dim(gp), c(1,4))
 
 })
+
+test_that("gmultmix ZIP model works",{
+  # Simulate independent double observer data
+  nSites <- 50
+  lambda <- 10
+  psi <- 0.3
+  p1 <- 0.5
+  p2 <- 0.3
+  cp <- c(p1*(1-p2), p2*(1-p1), p1*p2)
+  set.seed(9023)
+  N <- unmarked:::rzip(nSites, lambda, psi)
+  y <- matrix(NA, nSites, 3)
+  for(i in 1:nSites) {
+    y[i,] <- rmultinom(1, N[i], c(cp, 1-sum(cp)))[1:3]
+  }
+
+  # Make unmarkedFrame
+  observer <- matrix(c('A','B'), nSites, 2, byrow=TRUE)
+  expect_warning(umf <- unmarkedFrameGMM(y=y, obsCovs=list(observer=observer),
+         type="double",numPrimary=1))
+
+  # Compare R and C engines
+  fmR <- gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="R", se=FALSE,
+                  control=list(maxit=1))
+  fmC <- gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="C", se=FALSE,
+                  control=list(maxit=1))
+  expect_equal(coef(fmR), coef(fmC))
+
+  #  Fit model full
+  fm <- gmultmix(~1,~1,~observer, umf, mixture="ZIP")
+  expect_equivalent(coef(fm), c(2.2289,0.1858,-0.9285,-0.9501), tol=1e-4)
+
+  # Check methods
+  ft <- fitted(fm)
+  r <- ranef(fm)
+  b <- bup(r)
+  #plot(N, b)
+  s <- simulate(fm)
+
+})
author	Ken Kellner <ken@kenkellner.com>	2023-07-27 13:51:26 -0400
committer	Ken Kellner <ken@kenkellner.com>	2023-07-27 14:02:15 -0400
commit	3c1b8513915579b8420b4f17490d8343f2fbbccc (patch)
tree	3b391be8260a45b18f1c487c02ba420f9386c7d6
parent	a0bb14f8b1b3d4fbc59df6a02dca7553a2f5b319 (diff)