diff options
author | Ken Kellner <ken@kenkellner.com> | 2023-04-27 11:20:12 -0400 |
---|---|---|
committer | Ken Kellner <ken@kenkellner.com> | 2023-04-27 11:20:12 -0400 |
commit | c82e63947d7df7dfc896066e51dbf63bda3babf4 (patch) | |
tree | a2e6386ed34f441bb10cab6ec6288b258ee63445 | |
parent | d5caacc1c20d8dbc253dfa8d31c26af3119fa1bf (diff) |
Better handling of missing values in gdistremoval
-rw-r--r-- | DESCRIPTION | 4 | ||||
-rw-r--r-- | R/gdistremoval.R | 84 | ||||
-rw-r--r-- | tests/testthat/test_gdistremoval.R | 46 |
3 files changed, 107 insertions, 27 deletions
diff --git a/DESCRIPTION b/DESCRIPTION index de0ed2f..e51f0d7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: unmarked -Version: 1.2.5.9013 -Date: 2023-04-05 +Version: 1.2.5.9014 +Date: 2023-04-27 Type: Package Title: Models for Data from Unmarked Animals Authors@R: c( diff --git a/R/gdistremoval.R b/R/gdistremoval.R index dc91934..b253625 100644 --- a/R/gdistremoval.R +++ b/R/gdistremoval.R @@ -219,9 +219,33 @@ setMethod("getDesign", "unmarkedFrameGDR", Zphi <- get_Z(formula$phiformula, ysc) Zdist <- get_Z(formula$distanceformula, ysc) Zrem <- get_Z(formula$removalformula, oc) + + # Check if there are missing yearlySiteCovs + ydist_mat <- apply(matrix(yDist, nrow=M*T, byrow=TRUE), 1, function(x) any(is.na(x))) + yrem_mat <- apply(matrix(yRem, nrow=M*T, byrow=TRUE), 1, function(x) any(is.na(x))) + ok_missing_phi_covs <- ydist_mat | yrem_mat + missing_phi_covs <- apply(Xphi, 1, function(x) any(is.na(x))) + if(!all(which(missing_phi_covs) %in% which(ok_missing_phi_covs))){ + stop("Missing yearlySiteCovs values for some observations that are not missing", call.=FALSE) + } + + # Check if there are missing dist covs + missing_dist_covs <- apply(Xdist, 1, function(x) any(is.na(x))) + ok_missing_dist_covs <- ydist_mat + if(!all(which(missing_dist_covs) %in% which(ok_missing_dist_covs))){ + stop("Missing yearlySiteCovs values for some distance observations that are not missing", call.=FALSE) + } - if(any(is.na(Xlam)) | any(is.na(Xphi)) | any(is.na(Xdist)) | any(is.na(Xrem))){ - stop("gdistremoval does not currently handle missing values in covariates", call.=FALSE) + # Check if there are missing rem covs + missing_obs_covs <- apply(Xrem, 1, function(x) any(is.na(x))) + missing_obs_covs <- apply(matrix(missing_obs_covs, nrow=M*T, byrow=TRUE), 1, function(x) any(x)) + ok_missing_obs_covs <- yrem_mat + if(!all(which(missing_obs_covs) %in% which(ok_missing_obs_covs))){ + stop("Missing obsCovs values for some removal observations that are not missing", call.=FALSE) + } + + if(any(is.na(Xlam))){ + stop("gdistremoval does not currently handle missing values in siteCovs", call.=FALSE) } list(yDist=yDist, yRem=yRem, Xlam=Xlam, Xphi=Xphi, Xdist=Xdist, Xrem=Xrem, @@ -541,14 +565,14 @@ setMethod("fitted", "unmarkedFitGDR", function(object){ dist[,,t] <- dist[,,t] * p_rem[,rep(t,ncol(dist[,,t]))] if(T > 1){ rem[,,t] <- rem[,,t] * phi[,rep(t, ncol(rem[,,t]))] - det[,,t] <- dist[,,t] * phi[,rep(t, ncol(dist[,,t]))] + dist[,,t] <- dist[,,t] * phi[,rep(t, ncol(dist[,,t]))] } } if(T > 1){ - rem_final <- rem[,,t] - dist_final <- det[,,t] - for (t in 1:T){ + rem_final <- rem[,,1] + dist_final <- dist[,,1] + for (t in 2:T){ rem_final <- cbind(rem_final, rem[,,t]) dist_final <- cbind(dist_final, dist[,,t]) } @@ -615,7 +639,7 @@ setMethod("ranef", "unmarkedFitGDR", function(object){ pr <- apply(cp, c(1,3), sum) prRem <- apply(pif, c(1,3), sum) - post <- array(0, c(M, K+1, T)) + post <- array(0, c(M, K+1, 1)) colnames(post) <- 0:K for (i in 1:M){ if(mixture=="P"){ @@ -625,19 +649,26 @@ setMethod("ranef", "unmarkedFitGDR", function(object){ } else if(mixture=="ZIP"){ f <- dzip(0:K, lam[i], alpha) } - g <- rep(1, K+1) - for (t in 1:T){ - if(has_na[i,t]){ - g <- rep(NA,K+1) - next - } - for (k in 1:(K+1)){ - g[k] <- g[k] * dbinom(ysum[i,t], k-1, prob=pr[i,t]*prRem[i,t]*phi[i,t], - log=FALSE) + + # All sampling periods at site i have at least one missing value + if(all(has_na[i,])){ + g <- rep(NA,K+1) + next + } else { + # At least one sampling period wasn't missing + g <- rep(1, K+1) + for (t in 1:T){ + if(has_na[i,t]){ + next + } + for (k in 1:(K+1)){ + g[k] <- g[k] * dbinom(ysum[i,t], k-1, prob=pr[i,t]*prRem[i,t]*phi[i,t], + log=FALSE) + } } } fg <- f*g - post[i,,t] <- fg/sum(fg) + post[i,,1] <- fg/sum(fg) } new("unmarkedRanef", post=post) @@ -702,17 +733,22 @@ setMethod("simulate", "unmarkedFitGDR", function(object, nsim, seed=NULL, na.rm= yrem <- matrix(NA, M, T*Jrem) for (m in 1:M){ - ysum <- rbinom(T, N[m], p_total[m,]) + ysum <- suppressWarnings(rbinom(T, N[m], p_total[m,])) ydist_m <- yrem_m <- c() for (t in 1:T){ - rem_class <- sample(1:Jrem, ysum[t], replace=TRUE, prob=rem_scaled[m,,t]) - rem_class <- factor(rem_class, levels=1:Jrem) - yrem_m <- c(yrem_m, as.numeric(table(rem_class))) - dist_class <- sample(1:Jdist, ysum[t], replace=TRUE, prob=dist_scaled[m,,t]) - dist_class <- factor(dist_class, levels=1:Jdist) - ydist_m <- c(ydist_m, as.numeric(table(dist_class))) + if(is.na(ysum[t])){ + yrem_m <- c(yrem_m, rep(NA, Jrem)) + ydist_m <- c(ydist_m, rep(NA, Jdist)) + } else { + rem_class <- sample(1:Jrem, ysum[t], replace=TRUE, prob=rem_scaled[m,,t]) + rem_class <- factor(rem_class, levels=1:Jrem) + yrem_m <- c(yrem_m, as.numeric(table(rem_class))) + dist_class <- sample(1:Jdist, ysum[t], replace=TRUE, prob=dist_scaled[m,,t]) + dist_class <- factor(dist_class, levels=1:Jdist) + ydist_m <- c(ydist_m, as.numeric(table(dist_class))) + } } stopifnot(length(ydist_m)==ncol(ydist)) stopifnot(length(yrem_m)==ncol(yrem)) diff --git a/tests/testthat/test_gdistremoval.R b/tests/testthat/test_gdistremoval.R index 3e205fb..fdc2ca1 100644 --- a/tests/testthat/test_gdistremoval.R +++ b/tests/testthat/test_gdistremoval.R @@ -411,9 +411,21 @@ test_that("gdistremoval handles NAs",{ fit <- gdistremoval(~1,removalformula=~1,distanceformula=~1, data=umf) expect_equivalent(coef(fit), c(2.0675,3.908,-2.1433), tol=1e-3) + # Can't have missing site covs umf2 <- umf umf2@siteCovs$sc1[1] <- NA expect_error(gdistremoval(~sc1,removalformula=~1,distanceformula=~1, data=umf2)) + + # This errors because missing obs cov does not match missing removal data + umf2 <- umf + umf2@obsCovs$oc1[1] <- NA + expect_error(gdistremoval(~1,removalformula=~oc1,distanceformula=~1, data=umf2)) + + # This does not error because missing obs cov matches missing removal data + umf2 <- umf + umf2@obsCovs$oc1[6] <- NA + fit <- gdistremoval(~1,removalformula=~oc1,distanceformula=~1, data=umf2) + expect_true(is.na(predict(fit, 'rem')$Predicted[6])) }) test_that("multi-period data works with gdistremoval",{ @@ -444,8 +456,40 @@ test_that("multi-period data works with gdistremoval",{ # ranef r <- ranef(fit) - expect_equal(dim(bup(r)), c(30,5)) + expect_equal(dim(r@post), c(30, 44, 1)) + expect_equal(length(bup(r)), 30) + # fitted + ft <- fitted(fit) + expect_equal(dim(ft$dist), dim(fit@data@yDistance)) + + # Entire missing secondary period + umf2 <- umf + # remove 2nd period at first site + umf2@yDistance[1,5:8] <- NA + umf2@yRemoval[1, 6:10] <- NA + umf2@obsCovs$oc1[6:10] <- NA + umf2@yearlySiteCovs$ysc1[2] <- NA + + + fit2 <- gdistremoval(~sc1,phiformula=~ysc1, removalformula=~oc1,distanceformula=~1, data=umf2) + + gp <- getP(fit2) + expect_true(is.na(gp$phi[1,2])) + + pr <- predict(fit2, 'rem', level=NULL) + expect_true(all(is.na(pr$Predicted[6:10]))) + + r2 <- ranef(fit2) + expect_true(!any(is.na(r2@post))) + + s <- simulate(fit2) + expect_true(all(is.na(s[[1]]$yDistance[1,5:8]))) + + res <- residuals(fit2) + expect_true(all(is.na(res$rem[1,6:10]))) + pb <- parboot(fit2, nsim=2) + expect_is(pb, "parboot") }) test_that("gdistremoval works with random effects",{ |