Some bugfixes and add tests

author: Ken Kellner <ken@kenkellner.com> 2024-01-18 12:58:20 -0500
committer: Ken Kellner <ken@kenkellner.com> 2024-01-18 12:58:46 -0500
commit: d5ed5f6732db6bf65868da9f481c6a1aaa8d195b (patch)
tree: b0391ec1ac1eb1d1f6aaffb31451acec8aafc709
parent: 303c2913f10ac691ec6782d797aeb9b8d05b325b (diff)
5 files changed, 210 insertions, 23 deletions
diff --git a/DESCRIPTION b/DESCRIPTION
index 17cc15d..9db759d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: unmarked
-Version: 1.4.1
-Date: 2024-01-08
+Version: 1.4.1.9001
+Date: 2024-01-18
 Type: Package
 Title: Models for Data from Unmarked Animals
 Authors@R: c(
diff --git a/R/IDS.R b/R/IDS.R
index 8bb5bbc..2517b70 100644
--- a/R/IDS.R
+++ b/R/IDS.R
@@ -108,6 +108,9 @@ IDS <- function(lambdaformula = ~1,
 
   # Need to add offset support here eventually
   gd_hds <- getDesign(dataDS, form_hds)
+  if(any(is.na(gd_hds$y))){
+    stop("Missing values in only some distance bins is not supported", call.=FALSE)
+  } 
   ds_hds <- get_ds_info(dataDS@dist.breaks)
   Xavail_ds <- matrix(0,0,0)
   if(has_avail) Xavail_ds <- getDesign(dataDS, form_avail)$X
@@ -523,7 +526,7 @@ setMethod("simulate", "unmarkedFitIDS",
   temp <- lapply(dets, function(x){
     if(! x %in% names(object)) return(NULL)
     conv <- IDS_convert_class(object, type=x)
-    sims <- simulate(conv, nsim=nsim, seed=seed, na.rm=na.rm)
+    sims <- simulate(conv, nsim=nsim, na.rm=na.rm)
     # availability process
     if("phi" %in% names(object)){
       sims <- lapply(sims, function(z){
@@ -569,32 +572,32 @@ setMethod("update", "unmarkedFitIDS",
 
     if(!missing(detformulaPC)){
       call[["detformulaPC"]] <- detformulaPC
-    } else {
+    } else if(!is.null(object@dataPC) & !is.null(call$detformulaPC)){
       call[["detformulaPC"]] <- split_formula(object@formlist$pc)[[1]]
     }
 
     if(!missing(detformulaOC)){
       call[["detformulaOC"]] <- detformulaOC
-    } else {
+    } else if(!is.null(object@dataOC) & !is.null(call$detformulaOC)){
       call[["detformulaOC"]] <- split_formula(object@formlist$oc)[[1]]
     }
 
     if(!missing(dataDS)){
-      call[["dataDS"]] <- dataDS
+      call$dataDS <- dataDS
     } else {
-      call[["dataDS"]] <- object@data
+      call$dataDS <- object@data
     }
 
     if(!missing(dataPC)){
-      call[["dataPC"]] <- dataPC
+      call$dataPC <- dataPC
     } else {
-      call[["dataPC"]] <- object@dataPC
+      call$dataPC <- object@dataPC
     }
-
+    
     if(!missing(dataOC)){
-      call[["dataOC"]] <- dataOC
+      call$dataOC <- dataOC
     } else {
-      call[["dataOC"]] <- object@dataOC
+      call$dataOC <- object@dataOC
     }
 
     extras <- match.call(call=sys.call(-1),
@@ -636,11 +639,15 @@ setMethod("parboot", "unmarkedFitIDS",
     dataOC <- object@dataOC
     has_oc <- "oc" %in% names(object)
 
-    t.star <- pbapply::pbsapply(1:nsim, function(i){
+    t.star <- lapply(1:nsim, function(i){
       dataDS@y <- simList[[i]]$ds
       if(has_pc) dataPC@y <- simList[[i]]$pc
       if(has_oc) dataOC@y <- simList[[i]]$oc
-      fit <- update(object, dataDS=dataDS, dataPC=dataPC, dataOC=dataOC, starts=starts)
+      fit <- update(object, dataDS=dataDS, dataPC=dataPC, dataOC=dataOC, 
+                    durationDS = object@surveyDurations$ds,
+                    durationPC = object@surveyDurations$pc,
+                    durationOC = object@surveyDurations$oc,
+                    starts=starts)
       statistic(fit)
     })
     if(lt0 > 1){
diff --git a/R/simulate.R b/R/simulate.R
index 846d90c..3868a72 100644
--- a/R/simulate.R
+++ b/R/simulate.R
@@ -606,7 +606,11 @@ setMethod("get_umf_components", "unmarkedFitIDS",
   # Presence/absence dataset
   sc_oc <- yblank_oc <- NULL
   if(!is.null(design$Moc) && design$Moc > 0){
-    if(is.null(formulas$oc)) form_oc <- formulas$ds
+    if(is.null(formulas$oc)){
+      form_oc <- formulas$ds
+    } else {
+      form_oc <- formulas$oc
+    }
     sc_oc_lam <- generate_data(formulas$lam, guide, design$Moc)
     sc_oc_det <- generate_data(form_oc, guide, design$Moc)
     sc_oc <- list(sc_oc_lam, sc_oc_det)
diff --git a/man/IDS.Rd b/man/IDS.Rd
index b6504f0..bef5b56 100644
--- a/man/IDS.Rd
+++ b/man/IDS.Rd
@@ -69,9 +69,17 @@ for the point count data, and \code{unmarkedFrameOccu} for OC/DND data.
 See the help files for these objects for guidance on how to organize the data.
 }
 
+\note{
+  Simulations indicated estimates of availability were very unreliable when
+  including detection/non-detection data, so the function will not allow you
+  to use DND data and estimate availability at the same time. 
+  In general estimation of availability can be difficult; use simulations
+  to see how well it works for your specific situation.
+}
+
 \references{
   Kery M, Royle JA, Hallman T, Robinson WD, Strebel N, Kellner KF. 2022. 
-  Integrated distance sampling models for simple point counts. 
+  Integrated distance sampling models for simple point counts. Ecology. 
 }
 \author{Ken Kellner \email{contact@kenkellner.com}}
 \seealso{\code{\link{distsamp}}}
@@ -86,7 +94,7 @@ See the help files for these objects for guidance on how to organize the data.
 formulas <- list(lam=~elev, ds=~1, phi=~1)
 
 # Sample sizes
-design <- list(Mds=2912, J=6, Mpc=506, Moc=554)
+design <- list(Mds=2912, J=6, Mpc=506)
 
 # Model parameters
 coefs <- list(lam = c(intercept=3, elev=-0.5),
@@ -94,8 +102,7 @@ coefs <- list(lam = c(intercept=3, elev=-0.5),
               phi = c(intercept=-1.3))
 
 # Survey durations
-durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30), 
-             oc=runif(design$Moc, 3, 30))
+durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30))
 
 set.seed(456)
 sim_umf <- simulate("IDS", # name of model we are simulating for
@@ -117,10 +124,9 @@ lapply(sim_umf, head)
 
 # Fit a model
 (mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1,
-                dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc,
+                dataDS=sim_umf$ds, dataPC=sim_umf$pc,
                 availformula = ~1, durationDS=durs$ds, durationPC=durs$pc,
-                durationOC=durs$oc,
-                maxDistPC=0.5, maxDistOC=0.5,
+                maxDistPC=0.5,
                 unitsOut="kmsq"))
 
 # Compare with known parameter values
@@ -128,7 +134,7 @@ lapply(sim_umf, head)
 # It is hard to estimate in most cases
 cbind(truth=unlist(coefs), est=coef(mod_sim))
 
-# Predict density at each site
+# Predict density at each distance sampling site
 head(predict(mod_sim, 'lam'))
 
 }
diff --git a/tests/testthat/test_IDS.R b/tests/testthat/test_IDS.R
new file mode 100644
index 0000000..8d28322
--- /dev/null
+++ b/tests/testthat/test_IDS.R
@@ -0,0 +1,170 @@
+context("IDS fitting function")
+skip_on_cran()
+
+test_that("IDS can fit models with covariates", {
+  set.seed(123)
+  formulas <- list(lam=~elev, ds=~1, phi=~1)
+  # Based on values from real data
+  design <- list(Mds=1000, J=6, Mpc=300)
+  # Based on values from real data
+  coefs <- list(lam = c(intercept=3, elev=-0.5),
+              ds = c(intercept=-2.5),
+              phi = c(intercept=-1.3))
+  # Survey durations, loosely based on real data
+  durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30))
+
+  sim_umf <- simulate("IDS", # name of model we are simulating for
+                    nsim=1, # number of replicates
+                    formulas=formulas, 
+                    coefs=coefs,
+                    design=design,
+                    # arguments used by unmarkedFrameDS
+                    dist.breaks = seq(0, 0.30, length.out=7),
+                    unitsIn="km", 
+                    # arguments used by IDS
+                    # could also have e.g. keyfun here
+                    durationDS=durs$ds, durationPC=durs$pc, durationOC=durs$oc,
+                    maxDistPC=0.5, maxDistOC=0.5,
+                    unitsOut="kmsq") 
+  set.seed(123)
+  (mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1,
+                dataDS=sim_umf$ds, dataPC=sim_umf$pc,
+                availformula = ~1, durationDS=durs$ds, durationPC=durs$pc,
+                maxDistPC=0.5, maxDistOC=0.5,
+                unitsOut="kmsq"))
+
+  expect_equivalent(coef(mod_sim), c(3.0271179,-0.4858101,-2.5050244,-1.3729505), tol=1e-5)
+
+  # Predict
+  pr <- predict(mod_sim, type = 'lam')
+  expect_equal(nrow(pr), 1000) # predicts only for the distance sampled sites
+  pr <- predict(mod_sim, type = 'lam', newdata=sim_umf$pc)
+
+  pr <- predict(mod_sim, type = 'ds')
+  expect_equal(nrow(pr), 1000)
+  expect_equal(pr$Predicted[1], exp(-2.5), tol=0.05)
+  pr <- predict(mod_sim, type = 'pc')
+  expect_equal(nrow(pr), 300)
+  expect_equal(pr$Predicted[1], exp(-2.5), tol=0.05)
+  pr <- predict(mod_sim, type = 'phi')
+  expect_equal(pr$ds$Predicted[1], exp(-1.37), tol=0.05)
+  
+  # residuals
+  r <- residuals(mod_sim)
+  expect_equal(lapply(r, dim), list(ds=c(1000,6), pc = c(300,1)))
+
+  # parboot
+  pb <- parboot(mod_sim, nsim=2)
+  
+  pdf(NULL)
+  plot(mod_sim)
+  hist(mod_sim)
+  dev.off()
+})
+
+test_that("IDS can fit models with occupancy data", {
+
+  set.seed(123)
+  formulas <- list(lam=~elev, ds=~1, oc=~1)
+  # Based on values from real data
+  design <- list(Mds=100, J=6, Mpc=100, Moc=100)
+  # Based on values from real data
+  coefs <- list(lam = c(intercept=3, elev=-0.5),
+              ds = c(intercept=-2.5),
+              oc = c(intercept = -2))
+
+  sim_umf <- simulate("IDS", # name of model we are simulating for
+                    nsim=1, # number of replicates
+                    formulas=formulas, 
+                    coefs=coefs,
+                    design=design,
+                    # arguments used by unmarkedFrameDS
+                    dist.breaks = seq(0, 0.30, length.out=7),
+                    unitsIn="km", 
+                    # arguments used by IDS
+                    # could also have e.g. keyfun here
+                    maxDistPC=0.5, maxDistOC=0.5,
+                    unitsOut="kmsq")
+
+  mod_oc <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1,
+                dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc,
+                maxDistPC=0.5, maxDistOC=0.5,
+                unitsOut="kmsq")
+
+  expect_equivalent(coef(mod_oc), c(3.0557091, -0.4200719, -2.5384331, -2.0610341),
+                    tol=1e-5)
+  
+  pr <- predict(mod_oc, type='oc')
+  expect_equal(pr$Predicted[1], 0.1273222, tol=1e-5)
+  
+  res <- residuals(mod_oc)
+  expect_equal(lapply(res, dim), list(ds=c(100,6), pc = c(100,1), oc=c(100,1)))
+
+  pb <- parboot(mod_oc, nsim=1)
+
+  # Don't estimate availability if OC data
+  expect_error(
+  mod_oc <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1,
+                availformula = ~1,
+                dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc,
+                maxDistPC=0.5, maxDistOC=0.5,
+                durationDS=durs$ds, durationPC=durs$pc, durationOC=durs$pc,
+                unitsOut="kmsq")
+  )
+
+})
+
+test_that("IDS handles missing values", {
+
+  set.seed(123)
+  design <- list(Mds=100, J=6, Mpc=100, Moc=100)
+  formulas <- list(lam=~elev, ds=~1, phi=~1)
+  # Based on values from real data
+  coefs <- list(lam = c(intercept=3, elev=-0.5),
+              ds = c(intercept=-2.5),
+              phi = c(intercept=-1.3))
+  # Survey durations, loosely based on real data
+  durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30))
+
+  sim_umf <- simulate("IDS", # name of model we are simulating for
+                    nsim=1, # number of replicates
+                    formulas=formulas, 
+                    coefs=coefs,
+                    design=design,
+                    # arguments used by unmarkedFrameDS
+                    dist.breaks = seq(0, 0.30, length.out=7),
+                    unitsIn="km", 
+                    # arguments used by IDS
+                    # could also have e.g. keyfun here
+                    maxDistPC=0.5, maxDistOC=0.5,
+                    unitsOut="kmsq")
+
+  sim_umf$pc@y[1,1] <- NA
+  sim_umf$pc@y[2,] <- NA
+
+  sim_umf$oc@y[1,1] <- NA
+  sim_umf$oc@y[2,] <- NA
+
+  expect_warning(
+  mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1,
+                dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc,
+                maxDistPC=0.5, maxDistOC=0.5,
+                unitsOut="kmsq")
+  )
+
+  expect_equivalent(coef(mod_sim), c(2.9354934,-0.4759405,-2.5314594,-2.3259133),
+                    tol=1e-5)
+
+  
+  sim_umf$ds@y[1,1] <- NA
+  sim_umf$ds@y[2,] <- NA
+
+  expect_error(
+  expect_warning(
+  mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1,
+                dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc,
+                maxDistPC=0.5, maxDistOC=0.5,
+                unitsOut="kmsq")
+  ))
+
+})
author	Ken Kellner <ken@kenkellner.com>	2024-01-18 12:58:20 -0500
committer	Ken Kellner <ken@kenkellner.com>	2024-01-18 12:58:46 -0500
commit	d5ed5f6732db6bf65868da9f481c6a1aaa8d195b (patch)
tree	b0391ec1ac1eb1d1f6aaffb31451acec8aafc709
parent	303c2913f10ac691ec6782d797aeb9b8d05b325b (diff)