diff options
author | Ken Kellner <ken@kenkellner.com> | 2024-01-18 12:58:20 -0500 |
---|---|---|
committer | Ken Kellner <ken@kenkellner.com> | 2024-01-18 12:58:46 -0500 |
commit | d5ed5f6732db6bf65868da9f481c6a1aaa8d195b (patch) | |
tree | b0391ec1ac1eb1d1f6aaffb31451acec8aafc709 | |
parent | 303c2913f10ac691ec6782d797aeb9b8d05b325b (diff) |
Some bugfixes and add tests
-rw-r--r-- | DESCRIPTION | 4 | ||||
-rw-r--r-- | R/IDS.R | 31 | ||||
-rw-r--r-- | R/simulate.R | 6 | ||||
-rw-r--r-- | man/IDS.Rd | 22 | ||||
-rw-r--r-- | tests/testthat/test_IDS.R | 170 |
5 files changed, 210 insertions, 23 deletions
diff --git a/DESCRIPTION b/DESCRIPTION index 17cc15d..9db759d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: unmarked -Version: 1.4.1 -Date: 2024-01-08 +Version: 1.4.1.9001 +Date: 2024-01-18 Type: Package Title: Models for Data from Unmarked Animals Authors@R: c( @@ -108,6 +108,9 @@ IDS <- function(lambdaformula = ~1, # Need to add offset support here eventually gd_hds <- getDesign(dataDS, form_hds) + if(any(is.na(gd_hds$y))){ + stop("Missing values in only some distance bins is not supported", call.=FALSE) + } ds_hds <- get_ds_info(dataDS@dist.breaks) Xavail_ds <- matrix(0,0,0) if(has_avail) Xavail_ds <- getDesign(dataDS, form_avail)$X @@ -523,7 +526,7 @@ setMethod("simulate", "unmarkedFitIDS", temp <- lapply(dets, function(x){ if(! x %in% names(object)) return(NULL) conv <- IDS_convert_class(object, type=x) - sims <- simulate(conv, nsim=nsim, seed=seed, na.rm=na.rm) + sims <- simulate(conv, nsim=nsim, na.rm=na.rm) # availability process if("phi" %in% names(object)){ sims <- lapply(sims, function(z){ @@ -569,32 +572,32 @@ setMethod("update", "unmarkedFitIDS", if(!missing(detformulaPC)){ call[["detformulaPC"]] <- detformulaPC - } else { + } else if(!is.null(object@dataPC) & !is.null(call$detformulaPC)){ call[["detformulaPC"]] <- split_formula(object@formlist$pc)[[1]] } if(!missing(detformulaOC)){ call[["detformulaOC"]] <- detformulaOC - } else { + } else if(!is.null(object@dataOC) & !is.null(call$detformulaOC)){ call[["detformulaOC"]] <- split_formula(object@formlist$oc)[[1]] } if(!missing(dataDS)){ - call[["dataDS"]] <- dataDS + call$dataDS <- dataDS } else { - call[["dataDS"]] <- object@data + call$dataDS <- object@data } if(!missing(dataPC)){ - call[["dataPC"]] <- dataPC + call$dataPC <- dataPC } else { - call[["dataPC"]] <- object@dataPC + call$dataPC <- object@dataPC } - + if(!missing(dataOC)){ - call[["dataOC"]] <- dataOC + call$dataOC <- dataOC } else { - call[["dataOC"]] <- object@dataOC + call$dataOC <- object@dataOC } extras <- match.call(call=sys.call(-1), @@ -636,11 +639,15 @@ setMethod("parboot", "unmarkedFitIDS", dataOC <- object@dataOC has_oc <- "oc" %in% names(object) - t.star <- pbapply::pbsapply(1:nsim, function(i){ + t.star <- lapply(1:nsim, function(i){ dataDS@y <- simList[[i]]$ds if(has_pc) dataPC@y <- simList[[i]]$pc if(has_oc) dataOC@y <- simList[[i]]$oc - fit <- update(object, dataDS=dataDS, dataPC=dataPC, dataOC=dataOC, starts=starts) + fit <- update(object, dataDS=dataDS, dataPC=dataPC, dataOC=dataOC, + durationDS = object@surveyDurations$ds, + durationPC = object@surveyDurations$pc, + durationOC = object@surveyDurations$oc, + starts=starts) statistic(fit) }) if(lt0 > 1){ diff --git a/R/simulate.R b/R/simulate.R index 846d90c..3868a72 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -606,7 +606,11 @@ setMethod("get_umf_components", "unmarkedFitIDS", # Presence/absence dataset sc_oc <- yblank_oc <- NULL if(!is.null(design$Moc) && design$Moc > 0){ - if(is.null(formulas$oc)) form_oc <- formulas$ds + if(is.null(formulas$oc)){ + form_oc <- formulas$ds + } else { + form_oc <- formulas$oc + } sc_oc_lam <- generate_data(formulas$lam, guide, design$Moc) sc_oc_det <- generate_data(form_oc, guide, design$Moc) sc_oc <- list(sc_oc_lam, sc_oc_det) @@ -69,9 +69,17 @@ for the point count data, and \code{unmarkedFrameOccu} for OC/DND data. See the help files for these objects for guidance on how to organize the data. } +\note{ + Simulations indicated estimates of availability were very unreliable when + including detection/non-detection data, so the function will not allow you + to use DND data and estimate availability at the same time. + In general estimation of availability can be difficult; use simulations + to see how well it works for your specific situation. +} + \references{ Kery M, Royle JA, Hallman T, Robinson WD, Strebel N, Kellner KF. 2022. - Integrated distance sampling models for simple point counts. + Integrated distance sampling models for simple point counts. Ecology. } \author{Ken Kellner \email{contact@kenkellner.com}} \seealso{\code{\link{distsamp}}} @@ -86,7 +94,7 @@ See the help files for these objects for guidance on how to organize the data. formulas <- list(lam=~elev, ds=~1, phi=~1) # Sample sizes -design <- list(Mds=2912, J=6, Mpc=506, Moc=554) +design <- list(Mds=2912, J=6, Mpc=506) # Model parameters coefs <- list(lam = c(intercept=3, elev=-0.5), @@ -94,8 +102,7 @@ coefs <- list(lam = c(intercept=3, elev=-0.5), phi = c(intercept=-1.3)) # Survey durations -durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30), - oc=runif(design$Moc, 3, 30)) +durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30)) set.seed(456) sim_umf <- simulate("IDS", # name of model we are simulating for @@ -117,10 +124,9 @@ lapply(sim_umf, head) # Fit a model (mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, - dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, availformula = ~1, durationDS=durs$ds, durationPC=durs$pc, - durationOC=durs$oc, - maxDistPC=0.5, maxDistOC=0.5, + maxDistPC=0.5, unitsOut="kmsq")) # Compare with known parameter values @@ -128,7 +134,7 @@ lapply(sim_umf, head) # It is hard to estimate in most cases cbind(truth=unlist(coefs), est=coef(mod_sim)) -# Predict density at each site +# Predict density at each distance sampling site head(predict(mod_sim, 'lam')) } diff --git a/tests/testthat/test_IDS.R b/tests/testthat/test_IDS.R new file mode 100644 index 0000000..8d28322 --- /dev/null +++ b/tests/testthat/test_IDS.R @@ -0,0 +1,170 @@ +context("IDS fitting function") +skip_on_cran() + +test_that("IDS can fit models with covariates", { + set.seed(123) + formulas <- list(lam=~elev, ds=~1, phi=~1) + # Based on values from real data + design <- list(Mds=1000, J=6, Mpc=300) + # Based on values from real data + coefs <- list(lam = c(intercept=3, elev=-0.5), + ds = c(intercept=-2.5), + phi = c(intercept=-1.3)) + # Survey durations, loosely based on real data + durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30)) + + sim_umf <- simulate("IDS", # name of model we are simulating for + nsim=1, # number of replicates + formulas=formulas, + coefs=coefs, + design=design, + # arguments used by unmarkedFrameDS + dist.breaks = seq(0, 0.30, length.out=7), + unitsIn="km", + # arguments used by IDS + # could also have e.g. keyfun here + durationDS=durs$ds, durationPC=durs$pc, durationOC=durs$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + set.seed(123) + (mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, + availformula = ~1, durationDS=durs$ds, durationPC=durs$pc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq")) + + expect_equivalent(coef(mod_sim), c(3.0271179,-0.4858101,-2.5050244,-1.3729505), tol=1e-5) + + # Predict + pr <- predict(mod_sim, type = 'lam') + expect_equal(nrow(pr), 1000) # predicts only for the distance sampled sites + pr <- predict(mod_sim, type = 'lam', newdata=sim_umf$pc) + + pr <- predict(mod_sim, type = 'ds') + expect_equal(nrow(pr), 1000) + expect_equal(pr$Predicted[1], exp(-2.5), tol=0.05) + pr <- predict(mod_sim, type = 'pc') + expect_equal(nrow(pr), 300) + expect_equal(pr$Predicted[1], exp(-2.5), tol=0.05) + pr <- predict(mod_sim, type = 'phi') + expect_equal(pr$ds$Predicted[1], exp(-1.37), tol=0.05) + + # residuals + r <- residuals(mod_sim) + expect_equal(lapply(r, dim), list(ds=c(1000,6), pc = c(300,1))) + + # parboot + pb <- parboot(mod_sim, nsim=2) + + pdf(NULL) + plot(mod_sim) + hist(mod_sim) + dev.off() +}) + +test_that("IDS can fit models with occupancy data", { + + set.seed(123) + formulas <- list(lam=~elev, ds=~1, oc=~1) + # Based on values from real data + design <- list(Mds=100, J=6, Mpc=100, Moc=100) + # Based on values from real data + coefs <- list(lam = c(intercept=3, elev=-0.5), + ds = c(intercept=-2.5), + oc = c(intercept = -2)) + + sim_umf <- simulate("IDS", # name of model we are simulating for + nsim=1, # number of replicates + formulas=formulas, + coefs=coefs, + design=design, + # arguments used by unmarkedFrameDS + dist.breaks = seq(0, 0.30, length.out=7), + unitsIn="km", + # arguments used by IDS + # could also have e.g. keyfun here + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + + mod_oc <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + + expect_equivalent(coef(mod_oc), c(3.0557091, -0.4200719, -2.5384331, -2.0610341), + tol=1e-5) + + pr <- predict(mod_oc, type='oc') + expect_equal(pr$Predicted[1], 0.1273222, tol=1e-5) + + res <- residuals(mod_oc) + expect_equal(lapply(res, dim), list(ds=c(100,6), pc = c(100,1), oc=c(100,1))) + + pb <- parboot(mod_oc, nsim=1) + + # Don't estimate availability if OC data + expect_error( + mod_oc <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + availformula = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + durationDS=durs$ds, durationPC=durs$pc, durationOC=durs$pc, + unitsOut="kmsq") + ) + +}) + +test_that("IDS handles missing values", { + + set.seed(123) + design <- list(Mds=100, J=6, Mpc=100, Moc=100) + formulas <- list(lam=~elev, ds=~1, phi=~1) + # Based on values from real data + coefs <- list(lam = c(intercept=3, elev=-0.5), + ds = c(intercept=-2.5), + phi = c(intercept=-1.3)) + # Survey durations, loosely based on real data + durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30)) + + sim_umf <- simulate("IDS", # name of model we are simulating for + nsim=1, # number of replicates + formulas=formulas, + coefs=coefs, + design=design, + # arguments used by unmarkedFrameDS + dist.breaks = seq(0, 0.30, length.out=7), + unitsIn="km", + # arguments used by IDS + # could also have e.g. keyfun here + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + + sim_umf$pc@y[1,1] <- NA + sim_umf$pc@y[2,] <- NA + + sim_umf$oc@y[1,1] <- NA + sim_umf$oc@y[2,] <- NA + + expect_warning( + mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + ) + + expect_equivalent(coef(mod_sim), c(2.9354934,-0.4759405,-2.5314594,-2.3259133), + tol=1e-5) + + + sim_umf$ds@y[1,1] <- NA + sim_umf$ds@y[2,] <- NA + + expect_error( + expect_warning( + mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + )) + +}) |