aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2024-03-01 12:22:38 -0500
committerKen Kellner <ken@kenkellner.com>2024-03-01 12:22:38 -0500
commitb66d1e79576787f157f555fe74e380a3f30a3c1f (patch)
tree23e56068b415d392b0d3ceb0d977071dc6066d57
parent85603a57b6d5b3721fd8e5fc4dff50f3e666cc70 (diff)
Better automatic value of K in multinomial models. Fixes #275
-rw-r--r--DESCRIPTION4
-rw-r--r--R/distsampOpen.R18
-rw-r--r--R/gmultmix.R2
-rw-r--r--R/multmixOpen.R7
-rw-r--r--R/utils.R30
-rw-r--r--tests/testthat/test_distsampOpen.R14
-rw-r--r--tests/testthat/test_gdistremoval.R15
-rw-r--r--tests/testthat/test_gdistsamp.R10
-rw-r--r--tests/testthat/test_gmultmix.R29
-rw-r--r--tests/testthat/test_multmixOpen.R9
-rw-r--r--tests/testthat/test_simulate.R2
11 files changed, 104 insertions, 36 deletions
diff --git a/DESCRIPTION b/DESCRIPTION
index c7e5799..33c3563 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: unmarked
-Version: 1.4.1.9001
-Date: 2024-01-23
+Version: 1.4.1.9002
+Date: 2024-03-01
Type: Package
Title: Models for Data from Unmarked Animals
Authors@R: c(
diff --git a/R/distsampOpen.R b/R/distsampOpen.R
index 92588a4..c062a0f 100644
--- a/R/distsampOpen.R
+++ b/R/distsampOpen.R
@@ -85,23 +85,7 @@ distsampOpen <- function(lambdaformula, gammaformula, omegaformula, pformula,
last <- apply(!ytna, 1, function(x) max(which(x)))
first1 <- which(first==1)[1]
- #K stuff
- if(missing(K)) {
- K <- max(y, na.rm=T) + 20
- warning("K was not specified and was set to ", K, ".")
- }
-
- J <- ncol(data@y) / data@numPrimary
- inds <- split(1:ncol(data@y), ceiling(1:ncol(data@y)/J))
- Tobs <- sapply(1:length(inds), function(i){
- rowSums(data@y[,inds[[i]], drop=FALSE], na.rm=TRUE)
- })
- Kmin <- max(Tobs, na.rm=TRUE)
-
- if(K < Kmin){
- stop("Specified K is too small, must be larger than the max total count in a primary period",
- call.=FALSE)
- }
+ K <- check_K_multinomial(K, K_adjust = 20, D$y, T) # in utils.R
k <- 0:K
lk <- length(k)
#Some k-related indices to avoid repeated calculations in likelihood
diff --git a/R/gmultmix.R b/R/gmultmix.R
index a6da9d5..3742609 100644
--- a/R/gmultmix.R
+++ b/R/gmultmix.R
@@ -29,7 +29,7 @@ if(is.null(Xlam.offset)) Xlam.offset <- rep(0, nrow(Xlam))
if(is.null(Xphi.offset)) Xphi.offset <- rep(0, nrow(Xphi))
if(is.null(Xdet.offset)) Xdet.offset <- rep(0, nrow(Xdet))
-if(missing(K) || is.null(K)) K <- max(y, na.rm=TRUE) + 100
+K <- check_K_multinomial(K, K_adjust = 100, y, data@numPrimary)
k <- 0:K
lk <- length(k)
M <- nrow(y)
diff --git a/R/multmixOpen.R b/R/multmixOpen.R
index 3d8eede..56bec1a 100644
--- a/R/multmixOpen.R
+++ b/R/multmixOpen.R
@@ -64,12 +64,7 @@ multmixOpen <- function(lambdaformula, gammaformula, omegaformula, pformula,
if(is.null(Xiota.offset)) Xiota.offset <- rep(0, M*(T-1))
#K stuff
- if(missing(K)) {
- K <- max(y, na.rm=T) + 20
- warning("K was not specified and was set to ", K, ".")
- }
- if(K <= max(y, na.rm = TRUE))
- stop("specified K is too small. Try a value larger than any observation")
+ K <- check_K_multinomial(K, K_adjust = 20, D$y, T)
k <- 0:K
lk <- length(k)
#Some k-related indices to avoid repeated calculations in likelihood
diff --git a/R/utils.R b/R/utils.R
index 8b977eb..f5d8bfd 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -927,3 +927,33 @@ lapply2 <- function(X, FUN, ..., cl = NULL){
}
lapply(X=X, FUN=FUN, ...)
}
+
+# Determine automatic K or check provided K for multinomial-type models
+# (gdistsamp, gmultmix, distsampOpen, multmixOpen, gdistremoval)
+check_K_multinomial <- function(K, K_adjust = 0, y, T = 1){
+
+ safe_sum <- function(x){
+ if(all(is.na(x))) return(NA) else return(sum(x, na.rm=TRUE))
+ }
+
+ if(T == 1){
+ yt <- apply(y, 1, safe_sum)
+ } else {
+ M <- nrow(y)
+ J <- ncol(y) / T
+ ya <- array(y, c(M, J, T))
+ ya <- aperm(ya, c(1,3,2))
+ yt <- apply(ya, 1:2, safe_sum)
+ }
+ Kmin <- max(yt, na.rm = TRUE)
+ if(missing(K)){
+ Kout <- Kmin + K_adjust
+ warning("K was not specified and was set to ", Kout, ".", call.=FALSE)
+ } else {
+ if(K <= Kmin){
+ stop("specified K is too small. Try a value larger than the max count at any site", call.=FALSE)
+ }
+ Kout <- K
+ }
+ Kout
+}
diff --git a/tests/testthat/test_distsampOpen.R b/tests/testthat/test_distsampOpen.R
index 2797b80..fe915f4 100644
--- a/tests/testthat/test_distsampOpen.R
+++ b/tests/testthat/test_distsampOpen.R
@@ -132,6 +132,20 @@ test_that("dso halfnorm key function works",{
# Check K that is too small
expect_error(distsampOpen(~1, ~1, ~1, ~x1, data = umf, K=5,keyfun="halfnorm"))
+ # Check auto-K
+ fm <- expect_warning(distsampOpen(~1, ~1, ~1, ~x1, data = umf, keyfun="halfnorm"))
+ expect_false(fm@K == max(umf@y)+20) # the wrong way
+
+ # Check that automatic K value is correct
+ ya <- array(umf@y, c(50, 4, 7))
+ ya <- aperm(ya, c(1,3,2))
+ yt <- apply(ya, 1:2, function(x) {
+ if(all(is.na(x)))
+ return(NA)
+ else return(sum(x, na.rm=TRUE))
+ })
+ expect_equal(max(yt)+20, fm@K) # the correct way
+
fm <- distsampOpen(~1, ~1, ~1, ~x1, data = umf, K=10,keyfun="halfnorm")
expect_equivalent(coef(fm), c(1.4185,1.0471,-0.8275,3.1969,-0.0790),
diff --git a/tests/testthat/test_gdistremoval.R b/tests/testthat/test_gdistremoval.R
index fdc2ca1..600d84b 100644
--- a/tests/testthat/test_gdistremoval.R
+++ b/tests/testthat/test_gdistremoval.R
@@ -282,6 +282,10 @@ test_that("gdistremoval can fit models",{
expect_is(fit, "unmarkedFitGDR")
expect_equivalent(coef(fit), c(1.4571,0.3374,4.0404,-1.65389,0.16789), tol=1e-3)
+ # Check automatic setting of K
+ yt <- apply(umf@yDistance, 1, sum)
+ expect_equal(max(yt)+40, fit@K)
+
# With unequal period lengths
umf2 <- unmarkedFrameGDR(dat$y, dat$yRem, siteCovs=sc, obsCovs=oc,
dist.breaks=c(0,25,50,75,100), unitsIn='m',
@@ -445,6 +449,17 @@ test_that("multi-period data works with gdistremoval",{
c(2.1013,-0.1142,-1.3187,-0.1483,3.3981,-0.5142,0.233678),
tol=1e-3)
+ # Check that automatic K value is correct
+ ya <- array(umf@y, c(30, 4, 5))
+ ya <- aperm(ya, c(1,3,2))
+ yt <- apply(ya, 1:2, function(x) {
+ if(all(is.na(x)))
+ return(NA)
+ else return(sum(x, na.rm=TRUE))
+ })
+ expect_false(max(umf@y)+40 == fit@K) # the incorrect way
+ expect_equal(max(yt)+40, fit@K) # the correct way
+
# Predict
pr <- predict(fit, 'phi')
expect_equal(dim(pr), c(30*5,4))
diff --git a/tests/testthat/test_gdistsamp.R b/tests/testthat/test_gdistsamp.R
index 96636d0..1b232b2 100644
--- a/tests/testthat/test_gdistsamp.R
+++ b/tests/testthat/test_gdistsamp.R
@@ -96,6 +96,16 @@ test_that("gdistsamp with halfnorm keyfunction works",{
control=list(maxit=1))
expect_equal(coef(fm_R), coef(fm_C))
+ # Check that automatic K value is correct
+ ya <- array(umf@y, c(R, J, T))
+ ya <- aperm(ya, c(1,3,2))
+ yt <- apply(ya, 1:2, function(x) {
+ if(all(is.na(x)))
+ return(NA)
+ else return(sum(x, na.rm=TRUE))
+ })
+ expect_equal(max(yt)+100, fm_C@K)
+
#When output = density
#fm_R <- gdistsamp(~1, ~1, ~1, umf, output="density", se=FALSE, engine="R")
fm_C <- gdistsamp(~1, ~1, ~1, umf, output="density", se=FALSE, engine="C")
diff --git a/tests/testthat/test_gmultmix.R b/tests/testthat/test_gmultmix.R
index 5caafa6..f8e73b6 100644
--- a/tests/testthat/test_gmultmix.R
+++ b/tests/testthat/test_gmultmix.R
@@ -76,8 +76,19 @@ test_that("gmultmix removal model works",{
umf <- unmarkedFrameGMM(y = y, siteCovs = siteCovs, obsCovs = obsCovs,
yearlySiteCovs = yrSiteCovs, type="removal", numPrimary=2)
#fm_R <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, K=23, engine="R")
- expect_warning(fm_C <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, K=23, engine="C"))
-
+ expect_warning(fm_C <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, engine="C"))
+
+ # Check K calculation
+ expect_false(fm_C@K == max(y) + 100) # wrong way to do it
+ # Correct way
+ ya <- array(y, c(5, 2, 2))
+ yt <- apply(ya, c(1,3), sum, na.rm=TRUE)
+ expect_true(fm_C@K == max(yt) + 100)
+ # Throw error when K is too low
+ expect_error(expect_warning(gmultmix(~x, ~yr, ~o1+o2, data=umf, K = 5)))
+
+ # Fit with the old K to keep tests correct
+ expect_warning(fm_C <- gmultmix(~x, ~yr, ~o1 + o2, data = umf, K = 23, engine="C"))
expect_equal(fm_C@sitesRemoved, 3)
coef_truth <- c(2.50638554, 0.06226627, 0.21787839, 6.46029769, -1.51885928,
-0.03409375, 0.43424295)
@@ -164,7 +175,7 @@ test_that("gmultmix double model works",{
umf2 <- umf[1:5,]
expect_equal(numSites(umf2), 5)
- fm <- gmultmix(~1,~1,~observer, umf)
+ fm <- expect_warning(gmultmix(~1,~1,~observer, umf))
expect_equivalent(coef(fm), c(2.2586,0.17385,-0.7425), tol=1e-4)
gp <- getP(fm)
@@ -195,7 +206,7 @@ test_that("gmultmix dependent double model works",{
expect_warning(umf <- unmarkedFrameGMM(y=y[,1:2], obsCovs=list(observer=observer),
type="depDouble",numPrimary=1))
- fm <- gmultmix(~1,~1,~observer, umf)
+ fm <- expect_warning(gmultmix(~1,~1,~observer, umf))
expect_equivalent(coef(fm), c(1.7762,0.2493,0.2008), tol=1e-4)
gp <- getP(fm)
@@ -309,14 +320,14 @@ test_that("gmultmix ZIP model works",{
type="double",numPrimary=1))
# Compare R and C engines
- fmR <- gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="R", se=FALSE,
- control=list(maxit=1))
- fmC <- gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="C", se=FALSE,
- control=list(maxit=1))
+ fmR <- expect_warning(gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="R", se=FALSE,
+ control=list(maxit=1)))
+ fmC <- expect_warning(gmultmix(~1, ~1, ~observer, umf, mixture="ZIP", engine="C", se=FALSE,
+ control=list(maxit=1)))
expect_equal(coef(fmR), coef(fmC))
# Fit model full
- fm <- gmultmix(~1,~1,~observer, umf, mixture="ZIP")
+ fm <- expect_warning(gmultmix(~1,~1,~observer, umf, mixture="ZIP"))
expect_equivalent(coef(fm), c(2.2289,0.1858,-0.9285,-0.9501), tol=1e-4)
# Check methods
diff --git a/tests/testthat/test_multmixOpen.R b/tests/testthat/test_multmixOpen.R
index c66d3f9..b0f162a 100644
--- a/tests/testthat/test_multmixOpen.R
+++ b/tests/testthat/test_multmixOpen.R
@@ -64,6 +64,15 @@ test_that("multmixOpen can fit removal models",{
expect_equivalent(coef(fit), c(1.38860,0.043406,-0.68448,
1.40703,0.03174,-0.00235), tol=1e-5)
+ # Check auto K setting
+ fit <- expect_warning(multmixOpen(~x1, ~1, ~1, ~x1, data=umf))
+ expect_false(fit@K == max(umf@y) + 20) # Wrong way
+
+ ya <- array(umf@y, c(100, 3, 5))
+ yt <- apply(ya, c(1,3), sum, na.rm=TRUE)
+
+ expect_true(fit@K == max(yt) + 20) # correct way
+
#Check predict
pr <- predict(fit, type='lambda')
expect_equivalent(as.numeric(pr[1,]),
diff --git a/tests/testthat/test_simulate.R b/tests/testthat/test_simulate.R
index be8a356..32885f1 100644
--- a/tests/testthat/test_simulate.R
+++ b/tests/testthat/test_simulate.R
@@ -111,7 +111,7 @@ test_that("simulate can generate new datasets from scratch",{
forms_gmm <- list(lambda=~elev, phi=~1, det=~1)
umf9 <- simulate("gmultmix", formulas=forms_gmm, design=design_colext, coefs=cf_gmm,
type='removal')
- fm <- gmultmix(~elev,~1,~1, umf9)
+ fm <- expect_warning(gmultmix(~elev,~1,~1, umf9))
expect_equivalent(coef(fm), c(1.9529,0.5321,0.0529,-0.0373), tol=1e-4)
#gpcount