From ae793d82bf05b88826bdad4ccb1f685fc7c68def Mon Sep 17 00:00:00 2001 From: Ken Kellner Date: Thu, 27 Jul 2023 14:27:02 -0400 Subject: Add ZIP support to gpcount --- DESCRIPTION | 4 ++-- R/gpcount.R | 21 +++++++++++-------- R/ranef.R | 8 ++++++-- R/unmarkedFit.R | 11 +++++++--- man/gpcount.Rd | 11 ++++++---- tests/testthat/test_gpcount.R | 47 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 83 insertions(+), 19 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ba0d28e..b658bba 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: unmarked -Version: 1.3.2 -Date: 2023-07-06 +Version: 1.3.2.9001 +Date: 2023-07-27 Type: Package Title: Models for Data from Unmarked Animals Authors@R: c( diff --git a/R/gpcount.R b/R/gpcount.R index 2b7afd6..af4dade 100644 --- a/R/gpcount.R +++ b/R/gpcount.R @@ -1,15 +1,13 @@ # data will need to be an unmarkedMultFrame gpcount <- function(lambdaformula, phiformula, pformula, data, - mixture=c('P', 'NB'), K, starts, method = "BFGS", se = TRUE, + mixture=c('P', 'NB', 'ZIP'), K, starts, method = "BFGS", se = TRUE, engine=c('C', 'R'), threads=1, ...) { if(!is(data, "unmarkedFrameGPC")) stop("Data is not of class unmarkedFrameGPC.") mixture <- match.arg(mixture) engine <- match.arg(engine) -if(identical(mixture, "ZIP") & identical(engine, "R")) - stop("ZIP mixture not available when 'engine=R'") formlist <- list(lambdaformula = lambdaformula, phiformula = phiformula, pformula = pformula) @@ -49,7 +47,7 @@ nLP <- ncol(Xlam) nPP <- ncol(Xphi) phiPars <- colnames(Xphi) nDP <- ncol(Xdet) -nP <- nLP + nPP + nDP + (mixture=='NB') +nP <- nLP + nPP + nDP + (mixture%in%c('NB','ZIP')) if(!missing(starts) && length(starts) != nP) stop("There should be", nP, "starting values, not", length(starts)) @@ -66,8 +64,9 @@ nll <- function(pars) { for(i in 1:I) { f <- switch(mixture, P = dpois(M, lam[i], log=TRUE), - NB = dnbinom(M, mu=lam[i], size=exp(pars[nP]), log=TRUE)) -# ZIP = dzip()) + NB = dnbinom(M, mu=lam[i], size=exp(pars[nP]), log=TRUE), + ZIP = log(dzip(M, lambda=lam[i], psi=plogis(pars[nP]))) + ) ghi <- rep(0, lM) for(t in 1:T) { gh <- matrix(-Inf, lM, lM) @@ -118,8 +117,7 @@ covMat <- invertHessian(fm, nP, se) ests <- fm$par fmAIC <- 2 * fm$value + 2 * nP -if(identical(mixture,"NB")) nbParm <- "alpha" - else nbParm <- character(0) +nbParm <- switch(mixture, P={character(0)}, NB={"alpha"}, ZIP={"psi"}) names(ests) <- c(lamPars, phiPars, detPars, nbParm) @@ -150,6 +148,13 @@ if(identical(mixture,"NB")) covMat = as.matrix(covMat[nP, nP]), invlink = "exp", invlinkGrad = "exp") +if(identical(mixture,"ZIP")) { + estimateList@estimates$psi <- unmarkedEstimate(name="Zero-inflation", + short.name = "psi", estimates = ests[nP], + covMat=as.matrix(covMat[nP, nP]), invlink = "logistic", + invlinkGrad = "logistic.grad") +} + umfit <- new("unmarkedFitGPC", fitType = "gpcount", call = match.call(), formula = form, formlist = formlist, data = data, estimates = estimateList, sitesRemoved = D$removed.sites, diff --git a/R/ranef.R b/R/ranef.R index 5f0928a..f99efaf 100644 --- a/R/ranef.R +++ b/R/ranef.R @@ -531,11 +531,15 @@ setMethod("ranef", "unmarkedFitGPC", mix <- object@mixture if(identical(mix, "NB")) alpha <- exp(coef(object, type="alpha")) + if(identical(mix, "ZIP")) + psi <- plogis(coef(object, type="psi")) + for(i in 1:R) { switch(mix, P = f <- dpois(M, lambda[i]), - # FIXME: Add ZIP - NB = f <- dnbinom(M, mu=lambda[i], size=alpha)) + NB = f <- dnbinom(M, mu=lambda[i], size=alpha), + ZIP = f <- dzip(M, lambda=lambda[i], psi=psi) + ) ghi <- rep(0, lM) for(t in 1:T) { gh <- matrix(-Inf, lM, lM) diff --git a/R/unmarkedFit.R b/R/unmarkedFit.R index 161616e..95c832c 100644 --- a/R/unmarkedFit.R +++ b/R/unmarkedFit.R @@ -2945,10 +2945,15 @@ setMethod("simulate", "unmarkedFitGPC", simList <- list() for(s in 1:nsim) { switch(mixture, - P = M <- rpois(n=R, lambda=lam), + P = M <- rpois(n=R, lambda=lam), # FIXME: Add ZIP - NB = M <- rnbinom(n=R, mu=lam, - size=exp(coef(object, type="alpha")))) + NB = M <- rnbinom(n=R, mu=lam, + size=exp(coef(object, type="alpha"))), + ZIP = { + psi <- plogis(coef(object['psi'])) + M <- rzip(R, lambda=lam, psi=psi) + } + ) N <- rbinom(R*T, size=M, prob=phi.mat) N <- matrix(N, nrow=R, ncol=T, byrow=FALSE) diff --git a/man/gpcount.Rd b/man/gpcount.Rd index f847baa..b7e3b1a 100644 --- a/man/gpcount.Rd +++ b/man/gpcount.Rd @@ -10,7 +10,7 @@ population size, availability, and detection probability. } \usage{ gpcount(lambdaformula, phiformula, pformula, data, -mixture = c("P", "NB"), K, starts, method = "BFGS", se = TRUE, +mixture = c("P", "NB", "ZIP"), K, starts, method = "BFGS", se = TRUE, engine = c("C", "R"), threads=1, ...) } \arguments{ @@ -27,7 +27,8 @@ engine = c("C", "R"), threads=1, ...) An object of class unmarkedFrameGPC } \item{mixture}{ - Either "P" or "NB" for Poisson and negative binomial distributions + Either "P", "NB", or "ZIP" for Poisson, negative binomial, or + zero-inflated Poisson distributions } \item{K}{ The maximum possible value of M, the super-population size. @@ -58,11 +59,13 @@ engine = c("C", "R"), threads=1, ...) \details{ The latent transect-level super-population abundance distribution \eqn{f(M | \mathbf{\theta})}{f(M | theta)} can be set as either a - Poisson or a negative binomial random variable, depending on the + Poisson, negative binomial, or zero-inflated Poisson random variable, depending on the setting of the \code{mixture} argument. The expected value of \eqn{M_i} is \eqn{\lambda_i}{lambda_i}. If \eqn{M_i \sim NB}{M_i ~ NB}, then an additional parameter, \eqn{\alpha}{alpha}, describes - dispersion (lower \eqn{\alpha}{alpha} implies higher variance). + dispersion (lower \eqn{\alpha}{alpha} implies higher variance). If + \eqn{M_i \sim ZIP}{M_i ~ ZIP}, then an additional zero-inflation parameter + \eqn{\psi}{psi} is estimated. The number of individuals available for detection at time j is a modeled as binomial: diff --git a/tests/testthat/test_gpcount.R b/tests/testthat/test_gpcount.R index 022c2ce..7f23a76 100644 --- a/tests/testthat/test_gpcount.R +++ b/tests/testthat/test_gpcount.R @@ -117,3 +117,50 @@ test_that("gpcount R and C++ engines give same results",{ fmR <- gpcount(~x, ~yr, ~o1, data = umf, K=23, engine="R", control=list(maxit=1)) expect_equal(coef(fm), coef(fmR)) }) + +test_that("gpcount ZIP mixture works", { + + set.seed(123) + M <- 100 + J <- 5 + T <- 3 + lam <- 3 + psi <- 0.3 + p <- 0.5 + phi <- 0.7 + + y <- array(NA, c(M, J, T)) + + N <- unmarked:::rzip(M, lambda=lam, psi=psi) + + for (i in 1:M){ + for (t in 1:T){ + n <- rbinom(1, N[i], phi) + for (j in 1:J){ + y[i,j,t] <- rbinom(1, n, p) + } + } + } + + ywide <- cbind(y[,,1], y[,,2], y[,,3]) + umf <- unmarkedFrameGPC(y=ywide, numPrimary=T) + + # check R and C engines match + fitC <- gpcount(~1, ~1, ~1, umf, mixture="ZIP", K=10, engine="C", + se=FALSE, control=list(maxit=1)) + fitR <- gpcount(~1, ~1, ~1, umf, mixture="ZIP", K=10, engine="R", + se=FALSE, control=list(maxit=1)) + expect_equal(coef(fitC), coef(fitR)) + + # Properly fit model + fit <- gpcount(~1, ~1, ~1, umf, mixture="ZIP", K=10) + expect_equivalent(coef(fit), c(1.02437, 0.85104, -0.019588, -1.16139), tol=1e-4) + + # Check methods + ft <- fitted(fit) + r <- ranef(fit) + b <- bup(r) + #plot(N, b) + #abline(a=0, b=1) + s <- simulate(fit) +}) -- cgit v1.2.3