diff options
author | Ken Kellner <ken@kenkellner.com> | 2023-08-04 14:58:23 -0400 |
---|---|---|
committer | Ken Kellner <ken@kenkellner.com> | 2023-08-04 14:58:23 -0400 |
commit | 74e73f176d019b9ad64fa7199f00b04785ae7668 (patch) | |
tree | 37aaa0391bb0f34b098a789ede92bc9021c18c15 | |
parent | de5c226f82f04d6df0c085eac436ff45ed468a4c (diff) |
Fix missing value handling and add tests
-rw-r--r-- | NAMESPACE | 2 | ||||
-rw-r--r-- | R/goccu.R | 56 | ||||
-rw-r--r-- | tests/testthat/test_goccu.R | 150 |
3 files changed, 186 insertions, 22 deletions
@@ -61,7 +61,7 @@ export(unmarkedEstimate, fitList, mapInfo, unmarkedFrame, unmarkedFrameDS, unmarkedMultFrame, unmarkedFrameGMM, unmarkedFramePCO, unmarkedFrameGDS, unmarkedFrameGPC, unmarkedFrameOccuMulti, unmarkedFrameOccuMS, unmarkedFrameOccuTTD, unmarkedFrameDSO, - unmarkedFrameMMO, unmarkedFrameGDR) + unmarkedFrameMMO, unmarkedFrameGDR, unmarkedFrameGOccu) # Formatting export(csvToUMF, formatLong, formatWide, formatMult, formatDistData) @@ -13,6 +13,16 @@ setMethod("getDesign", "unmarkedFrameGOccu", out }) +unmarkedFrameGOccu <- function(y, siteCovs=NULL, obsCovs=NULL, numPrimary, + yearlySiteCovs=NULL) { + y[y > 1] <- 1 + if(numPrimary < 2) stop("numPrimary < 2, use occu instead") + umf <- unmarkedFrameGPC(y, siteCovs=siteCovs, obsCovs=obsCovs, + numPrimary=numPrimary, yearlySiteCovs=NULL) + class(umf) <- "unmarkedFrameGOccu" + umf +} + goccu <- function(psiformula, phiformula, pformula, data, linkPsi = c("logit", "cloglog"), starts, method = "BFGS", se = TRUE, ...){ @@ -63,6 +73,7 @@ goccu <- function(psiformula, phiformula, pformula, data, for (i in 1:M){ for (t in 1:T){ for (j in 1:J){ + if(is.na(y_array[j,t,i])) next if(y_array[j, t, i] == 1){ known_present[i] <- 1 known_available[i,t] <- 1 @@ -95,7 +106,7 @@ goccu <- function(psiformula, phiformula, pformula, data, DLL = "unmarked_TMBExports", silent = TRUE) # Optimize TMB object, print and save results - if(missing(starts)) starts <- tmb_mod$par + if(missing(starts) || is.null(starts)) starts <- tmb_mod$par opt <- optim(starts, fn = tmb_mod$fn, gr = tmb_mod$gr, method = method, hessian = se, ...) @@ -170,9 +181,11 @@ setMethod("get_orig_data", "unmarkedFitGOccu", function(object, type, ...){ }) setMethod("getP", "unmarkedFitGOccu", - function(object){ - p <- predict(object, "det", level=NULL)$Predicted - p <- matrix(p, nrow=numSites(object@data), ncol=obsNum(object@data), + function(object, na.rm=FALSE){ + gd <- getDesign(object@data, object@formula, na.rm=na.rm) + p <- drop(plogis(gd$Xdet %*% coef(object, "det"))) + M <- numSites(object@data) + p <- matrix(p, nrow=M, ncol=obsNum(object@data), byrow=TRUE) p }) @@ -181,12 +194,13 @@ setMethod("fitted", "unmarkedFitGOccu", function(object, na.rm= FALSE){ M <- numSites(object@data) - JT <- obsNum(object@data) + JT <- obsNum(object@data) + gd <- getDesign(object@data, object@formula, na.rm=na.rm) - psi <- predict(object, "psi", level=NULL)$Predicted + psi <- drop(plogis(gd$Xpsi %*% coef(object, "psi"))) psi <- matrix(psi, nrow=M, ncol=JT) - phi <- predict(object, "phi", level=NULL)$Predicted + phi <- drop(plogis(gd$Xphi %*% coef(object, "phi"))) phi <- rep(phi, each = JT / object@data@numPrimary) phi <- matrix(phi, nrow=M, ncol=JT, byrow=TRUE) @@ -195,6 +209,7 @@ setMethod("fitted", "unmarkedFitGOccu", psi * phi * p }) + # based on ranef for GPC setMethod("ranef", "unmarkedFitGOccu", function(object, ...){ @@ -203,11 +218,11 @@ setMethod("ranef", "unmarkedFitGOccu", function(object, ...){ T <- object@data@numPrimary J <- JT / T - gd <- getDesign(object@data, object@formula) + gd <- getDesign(object@data, object@formula, na.rm=FALSE) y_array <- array(t(gd$y), c(J, T, M)) - psi <- predict(object, "psi", level=NULL)$Predicted - phi <- predict(object, "phi", level=NULL)$Predicted + psi <- drop(plogis(gd$Xpsi %*% coef(object, "psi"))) + phi <- drop(plogis(gd$Xphi %*% coef(object, "phi"))) phi <- matrix(phi, nrow=M, ncol=T, byrow=TRUE) p <- getP(object) p_array <- array(t(p), c(J, T, M)) @@ -253,36 +268,35 @@ setMethod("ranef", "unmarkedFitGOccu", function(object, ...){ setMethod("simulate", "unmarkedFitGOccu", - function(object, nsim = 1, seed = NULL, na.rm = TRUE){ + function(object, nsim = 1, seed = NULL, na.rm = FALSE){ - M <- numSites(object@data) - JT <- obsNum(object@data) + gd <- getDesign(object@data, object@formula, na.rm=FALSE) + M <- nrow(gd$y) T <- object@data@numPrimary + JT <- ncol(gd$y) J <- JT / T - - gd <- unmarked:::getDesign(object@data, object@formula) y_array <- array(t(gd$y), c(J, T, M)) - psi <- predict(object, "psi", level=NULL)$Predicted - phi <- predict(object, "phi", level=NULL)$Predicted + psi <- drop(plogis(gd$Xpsi %*% coef(object, "psi"))) + phi <- drop(plogis(gd$Xphi %*% coef(object, "phi"))) phi <- matrix(phi, nrow=M, ncol=T, byrow=TRUE) p <- getP(object) sim_list <- list() for (i in 1:nsim){ - z <- rbinom(M, 1, psi) + z <- suppressWarnings(rbinom(M, 1, psi)) z <- matrix(z, nrow=M, ncol=T) - zz <- rbinom(M*T, 1, phi*z) + zz <- suppressWarnings(rbinom(M*T, 1, phi*z)) zz <- matrix(zz, M, T) colrep <- rep(1:T, each=J) zz <- zz[,colrep] - y <- rbinom(M*T*J, 1, zz*p) + y <- suppressWarnings(rbinom(M*T*J, 1, zz*p)) y <- matrix(y, M, JT) - if(na.rm) y[which(is.na(object@data@y))] <- NA + if(na.rm) y[which(is.na(gd$y))] <- NA sim_list[[i]] <- y } diff --git a/tests/testthat/test_goccu.R b/tests/testthat/test_goccu.R new file mode 100644 index 0000000..4572515 --- /dev/null +++ b/tests/testthat/test_goccu.R @@ -0,0 +1,150 @@ +context("goccu fitting function") +skip_on_cran() + +set.seed(123) +M <- 100 +T <- 5 +J <- 4 + +psi <- 0.5 +phi <- 0.3 +p <- 0.4 + +z <- rbinom(M, 1, psi) +zmat <- matrix(z, nrow=M, ncol=T) + +zz <- rbinom(M*T, 1, zmat*phi) +zz <- matrix(zz, nrow=M, ncol=T) + +zzmat <- zz[,rep(1:T, each=J)] +y <- rbinom(M*T*J, 1, zzmat*p) +y <- matrix(y, M, J*T) +umf <- unmarkedMultFrame(y=y, numPrimary=T) + +unmarkedFrameGOccu <- function(y, siteCovs=NULL, obsCovs=NULL, numPrimary, + yearlySiteCovs=NULL) { + y[y > 1] <- 1 + if(numPrimary < 2) stop("numPrimary < 2, use occu instead") + umf <- unmarkedFrameGPC(y, siteCovs=siteCovs, obsCovs=obsCovs, + numPrimary=numPrimary, yearlySiteCovs=NULL) + class(umf) <- "unmarkedFrameGOccu" + umf +} + +test_that("unmarkedFrameGOccu can be constructed", { + set.seed(123) + sc <- data.frame(x=rnorm(M)) + ysc <- matrix(rnorm(M*T), M, T) + oc <- matrix(rnorm(M*T*J), M, T*J) + + umf2 <- unmarkedFrameGOccu(y, siteCovs=sc, obsCovs=list(x2=oc), + yearlySiteCovs=list(x3=ysc), numPrimary=T) + expect_is(umf2, "unmarkedFrameGOccu") +}) + +test_that("goccu can fit models", { + + # Without covariates + mod <- goccu(~1, ~1, ~1, umf) + expect_equivalent(coef(mod), c(0.16129, -0.97041, -0.61784), tol=1e-5) + + # With covariates + set.seed(123) + sc <- data.frame(x=rnorm(M)) + ysc <- matrix(rnorm(M*T), M, T) + oc <- matrix(rnorm(M*T*J), M, T*J) + + umf2 <- unmarkedMultFrame(y=y, siteCovs=sc, yearlySiteCovs=list(x2=ysc), + obsCovs=list(x3=oc), numPrimary=T) + + mod2 <- goccu(~x, ~x2, ~x3, umf2) + expect_equivalent(coef(mod2), c(0.18895, -0.23629,-0.97246,-0.094335,-0.61808, + -0.0040056), tol=1e-5) + + # predict + pr <- predict(mod2, 'psi') + expect_equal(dim(pr), c(M, 4)) + expect_equal(pr$Predicted[1], 0.5796617, tol=1e-5) + + # phi should not drop last level + pr2 <- predict(mod2, 'phi') + expect_equal(dim(pr2), c(M*T, 4)) + + nd <- data.frame(x=1) + pr3 <- predict(mod2, 'psi', newdata=nd) + expect_true(nrow(pr3) == 1) + expect_equal(pr3$Predicted[1], 0.488168, tol=1e-5) + + # Other methods + ft <- fitted(mod2) + expect_equal(dim(ft), dim(umf2@y)) + expect_true(all(ft >=0 & ft <= 1)) + + res <- residuals(mod2) + expect_equal(dim(res), dim(umf2@y)) + + gp <- getP(mod2) + expect_equal(dim(gp), dim(umf2@y)) + expect_equal(gp[1,1], 0.349239, tol=1e-5) + + set.seed(123) + s <- simulate(mod2, nsim=2) + expect_equal(length(s), 2) + expect_equal(dim(s[[1]]), dim(mod2@data@y)) + simumf <- umf2 + simumf@y <- s[[1]] + simmod <- update(mod2, data=simumf) + expect_equivalent(coef(simmod), + c(0.174991, -0.27161, -1.32766, 0.054459,-0.41610,-0.073922), tol=1e-5) + + r <- ranef(mod2) + expect_equal(dim(r@post), c(M, 2, 1)) + expect_equal(sum(bup(r)), 53.13565, tol=1e-4) + + pb <- parboot(mod2, nsim=2) + expect_is(pb, "parboot") + + npb <- nonparboot(mod2, B=2, bsType='site') + + +}) + +test_that("goccu handles missing values", { + + set.seed(123) + y2 <- y + y2[1,1] <- NA + y2[2,1:J] <- NA + + sc <- data.frame(x=rnorm(M)) + sc$x[3] <- NA + ysc <- matrix(rnorm(M*T), M, T) + ysc[4,1] <- NA + oc <- matrix(rnorm(M*T*J), M, T*J) + oc[5,1] <- NA + oc[6,1:J] <- NA + + umf_na <- unmarkedMultFrame(y=y2, siteCovs=sc, yearlySiteCovs=list(x2=ysc), + obsCovs=list(x3=oc), numPrimary=T) + + mod_na <- expect_warning(goccu(~x, ~x2, ~x3, umf_na)) + + pr <- expect_warning(predict(mod_na, 'psi')) + expect_equal(nrow(pr), M-1) + + # Need to re-write these to use the design matrix instead of predict + gp <- getP(mod_na) + expect_equal(dim(gp), c(100, 20)) + expect_true(is.na(gp[5,1])) + expect_true(all(is.na(gp[6, 1:4]))) + s <- simulate(mod_na) + expect_equal(dim(s[[1]]), dim(mod_na@data@y)) + ft <- fitted(mod_na) + expect_equal(dim(ft), dim(mod_na@data@y)) + r <- ranef(mod_na) + expect_equal(dim(r@post), c(100, 2, 1)) + expect_true(is.na(bup(r)[3])) + + pb <- expect_warning(parboot(mod_na, nsim=2)) + expect_is(pb, "parboot") +}) |