Merge pull request #238 from kenkellner/simulate_random_effects

Allow simulating datasets with random effects for model types that support it
author: Ken Kellner <kenkellner@users.noreply.github.com> 2022-07-22 19:15:03 -0400
committer: GitHub <noreply@github.com> 2022-07-22 19:15:03 -0400
commit: 60ad46e43217d92cf0736976f871d2bda7be51d4 (patch)
tree: f858b2da36824d2209f4c9303882f9d7379e737c
parent: 0266f34e3a02003bed5f0f14ff91cb3e39f2fd59 (diff)
parent: 288458c5e7e3fb5775c21d22ac8e4a1c63cbc206 (diff)
9 files changed, 158 insertions, 40 deletions
diff --git a/DESCRIPTION b/DESCRIPTION
index fe34bfc..972f834 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: unmarked
-Version: 1.2.5.9002
-Date: 2022-06-17
+Version: 1.2.5.9003
+Date: 2022-07-22
 Type: Package
 Title: Models for Data from Unmarked Animals
 Authors@R: c(
diff --git a/R/power.R b/R/power.R
index 80af473..2dc5675 100644
--- a/R/power.R
+++ b/R/power.R
@@ -13,6 +13,7 @@ powerAnalysis <- function(object, coefs=NULL, design=NULL, alpha=0.05, nulls=lis
 
   submodels <- names(object@estimates@estimates)
   coefs <- check_coefs(coefs, object)
+  coefs <- generate_random_effects(coefs, object)
   fit_temp <- replace_estimates(object, coefs)
 
   T <- 1
@@ -155,12 +156,36 @@ check_coefs <- function(coefs, fit, template=FALSE){
   required_subs <- names(fit@estimates@estimates)
   required_coefs <- lapply(fit@estimates@estimates, function(x) names(x@estimates))
   required_lens <- lapply(required_coefs, length)
+
+  formulas <- sapply(names(fit), function(x) get_formula(fit, x))
+
+  # If there are random effects, adjust the expected coefficient names
+  # to remove the b vector and add the grouping covariate name
+  rand <- lapply(formulas, lme4::findbars)
+  if(!all(sapply(rand, is.null))){
+    stopifnot(all(required_subs %in% names(formulas)))
+    rvar <- lapply(rand, function(x) unlist(lapply(x, all.vars)))
+    if(!all(sapply(rvar, length)<2)){
+      stop("Only 1 random effect per parameter is supported", call.=FALSE)
+    }
+    for (i in required_subs){
+      if(!is.null(rand[[i]][[1]])){
+        signame <- rvar[[i]]
+        old_coefs <- required_coefs[[i]]
+        new_coefs <- old_coefs[!grepl("b_", old_coefs, fixed=TRUE)]
+        new_coefs <- c(new_coefs, signame)
+        required_coefs[[i]] <- new_coefs
+      }
+    }
+  }
+
   dummy_coefs <- lapply(required_coefs, function(x){
                     out <- rep(0, length(x))
                     x <- gsub("(Intercept)", "intercept", x, fixed=TRUE)
                     names(out) <- x
                     out
                   })
+
   if(template) return(dummy_coefs)
 
   if(is.null(coefs)){
@@ -226,6 +251,7 @@ check_coefs <- function(coefs, fit, template=FALSE){
   }
   coefs[required_subs]
 }
+
 wald <- function(est, se, null_hyp=NULL){
   if(is.null(null_hyp) || is.na(null_hyp)) null_hyp <- 0
   Z <- (est-null_hyp)/se
@@ -251,13 +277,15 @@ setMethod("summary", "unmarkedPower", function(object, ...){
     x
   })
 
+  coefs_no_rand <- unlist(object@coefs)[!grepl("b_", names(unlist(object@coefs)))]
+
   pow <- sapply(1:npar, function(ind){
     submod <- sum_dfs[[1]]$submodel[ind]
     param <- sum_dfs[[1]]$param[ind]
     ni <- nulls[[submod]][param]
 
     pcrit <- sapply(sum_dfs, function(x) wald(x$Estimate[ind], x$SE[ind], ni)) < object@alpha
-    direct <- sapply(sum_dfs, function(x) diff_dir(x$Estimate[ind], unlist(object@coefs)[ind], ni))
+    direct <- sapply(sum_dfs, function(x) diff_dir(x$Estimate[ind], coefs_no_rand[ind], ni))
     mean(pcrit & direct, na.rm=T)
   })
 
@@ -269,11 +297,14 @@ setMethod("summary", "unmarkedPower", function(object, ...){
     ni
   })
 
-  out <- cbind(sum_dfs[[1]][,1:2], effect=unlist(object@coefs), null=all_nulls,  power=pow)
+  effect_no_random <- unlist(object@coefs)[!grepl("b_",names(unlist(object@coefs)))]
+
+  out <- cbind(sum_dfs[[1]][,1:2], effect=effect_no_random, null=all_nulls,  power=pow)
   rownames(out) <- NULL
   names(out) <- c("Submodel", "Parameter", "Effect", "Null", "Power")
   out
 })
+
 setMethod("show", "unmarkedPower", function(object){
   cat("\nModel:\n")
   print(object@call)
diff --git a/R/predict.R b/R/predict.R
index cd14637..68efbb7 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -132,9 +132,12 @@ setGeneric("get_formula", function(object, type, ...){
 })
 
 setMethod("get_formula", "unmarkedFit", function(object, type, ...){
-  if(type == "state") return(as.formula(paste("~", object@formula[3], sep="")))
-  if(type == "det") return(as.formula(object@formula[[2]]))
-  stop("Invalid type")
+  if(type == "state"){
+    return(as.formula(paste("~", object@formula[3], sep="")))
+  } else if(type == "det"){
+    return(as.formula(object@formula[[2]]))
+  }
+  NULL
 })
 
 # When newdata is data.frame/raster, get original dataset
@@ -574,6 +577,12 @@ setMethod("get_orig_data", "unmarkedFitGDR", function(object, type, ...){
 # bespoke predict method since it has numerious unusual options
 # and requires bootstrapping
 
+# This method is used by simulate but not by predict
+setMethod("get_formula", "unmarkedFitOccuMulti", function(object, type, ...){
+  switch(type, state=object@stateformulas,
+               det=object@detformulas)
+})
+
 setMethod("predict", "unmarkedFitOccuMulti",
      function(object, type, newdata,
               #backTransform = TRUE, na.rm = TRUE,
@@ -748,6 +757,12 @@ setMethod("predict", "unmarkedFitOccuMulti",
 
 # bespoke predict method since it requires bootstrapping
 
+# This method is used by simulate by not by predict
+setMethod("get_formula", "unmarkedFitOccuMS", function(object, type, ...){
+  switch(type, psi=object@psiformulas, phi=object@phiformulas,
+               det=object@detformulas)
+})
+
 setMethod("predict", "unmarkedFitOccuMS",
      function(object, type, newdata,
               #backTransform = TRUE, na.rm = TRUE,
diff --git a/R/simulate.R b/R/simulate.R
index 3503ba5..5df7247 100644
--- a/R/simulate.R
+++ b/R/simulate.R
@@ -59,6 +59,69 @@ blank_umFit <- function(fit_function){
 }
 
 
+setMethod("simulate", "character",
+  function(object, nsim=1, seed=NULL, formulas, coefs=NULL, design, guide=NULL, ...){
+  model <- blank_umFit(object)
+  fit <- suppressWarnings(simulate_fit(model, formulas, guide, design, ...))
+  coefs <- check_coefs(coefs, fit)
+  coefs <- generate_random_effects(coefs, fit)
+  fit <- replace_estimates(fit, coefs)
+  ysims <- suppressWarnings(simulate(fit, nsim))
+  umf <- fit@data
+  # fix this
+  umfs <- lapply(ysims, function(x){
+    if(object=="occuMulti"){
+      umf@ylist <- x
+    } else if(object=="gdistremoval"){
+      umf@yDistance=x$yDistance
+      umf@yRemoval=x$yRemoval
+    } else {
+      umf@y <- x
+    }
+    umf
+  })
+  if(length(umfs)==1) umfs <- umfs[[1]]
+  umfs
+})
+
+
+generate_random_effects <- function(coefs, fit){
+  required_subs <- names(fit@estimates@estimates)
+  formulas <- sapply(names(fit), function(x) get_formula(fit, x))
+  rand <- lapply(formulas, lme4::findbars)
+  if(!all(sapply(rand, is.null))){
+    rvar <- lapply(rand, function(x) unlist(lapply(x, all.vars)))
+    for (i in required_subs){
+      if(!is.null(rand[[i]][[1]])){
+        signame <- rvar[[i]]
+        old_coefs <- coefs[[i]]
+        new_coefs <- old_coefs[names(old_coefs)!=signame]
+
+        # Find levels of factor variable
+        if(signame %in% names(siteCovs(fit@data))){
+          lvldata <- siteCovs(fit@data)[[signame]]
+        } else if(signame %in% names(obsCovs(fit@data))){
+          lvldata <- obsCovs(fit@data)[[signame]]
+        } else if(methods::.hasSlot(fit@data, "yearlySiteCovs") && signame %in% names(yearlySiteCovs(fit@data))){
+          lvldata <- yearlySiteCovs(fit@data)[[signame]]
+        } else {
+          stop("Random effect covariate missing from data", call.=FALSE)
+        }
+
+        if(!is.factor(lvldata)){
+          stop("Random effect covariates must be specified as factors with guide argument", call.=FALSE)
+        }
+        b <- stats::rnorm(length(levels(lvldata)), 0, old_coefs[signame])
+        names(b) <- rep(paste0("b_",i), length(b))
+        new_coefs <- c(new_coefs, b)
+        coefs[[i]] <- new_coefs
+      }
+    }
+  }
+  coefs
+}
+
+
 setGeneric("get_umf_components", function(object, ...) standardGeneric("get_umf_components"))
 
 setMethod("get_umf_components", "unmarkedFit",
@@ -103,30 +166,6 @@ setMethod("simulate_fit", "unmarkedFitOccuRN",
 })
 
 
-setMethod("simulate", "character",
-  function(object, nsim=1, seed=NULL, formulas, coefs=NULL, design, guide=NULL, ...){
-  model <- blank_umFit(object)
-  fit <- suppressWarnings(simulate_fit(model, formulas, guide, design, ...))
-  coefs <- check_coefs(coefs, fit)
-  fit <- replace_estimates(fit, coefs)
-  ysims <- simulate(fit, nsim)
-  umf <- fit@data
-  # fix this
-  umfs <- lapply(ysims, function(x){
-    if(object=="occuMulti"){
-      umf@ylist <- x
-    } else if(object=="gdistremoval"){
-      umf@yDistance=x$yDistance
-      umf@yRemoval=x$yRemoval
-    } else {
-      umf@y <- x
-    }
-    umf
-  })
-  if(length(umfs)==1) umfs <- umfs[[1]]
-  umfs
-})
-
 setMethod("get_umf_components", "unmarkedFitMPois",
           function(object, formulas, guide, design, ...){
   args <- list(...)
@@ -495,5 +534,5 @@ setMethod("simulate_fit", "unmarkedFitGDR",
   gdistremoval(lambdaformula=formulas$lambda, phiformula=formulas$phi,
                removalformula=formulas$rem, distanceformula=formulas$dist,
                data=umf, keyfun=keyfun, output=output, unitsOut=unitsOut,
-               mixture=mixture, K=K, se=FALSE, control=list(maxit=1))
+               mixture=mixture, K=K, se=FALSE, control=list(maxit=1), method='L-BFGS-B')
 })
diff --git a/R/unmarkedFit.R b/R/unmarkedFit.R
index 0a8d107..6557b9d 100644
--- a/R/unmarkedFit.R
+++ b/R/unmarkedFit.R
@@ -1631,13 +1631,13 @@ setMethod("getP", "unmarkedFitDS",
     umf <- object@data
     designMats <- getDesign(umf, formula, na.rm = na.rm)
     y <- designMats$y
-    V <- designMats$V
+    V <- cbind(designMats$V, designMats$Z_det)
     V.offset <- designMats$V.offset
     if (is.null(V.offset))
         V.offset <- rep(0, nrow(V))
     M <- nrow(y)
     J <- ncol(y)
-    ppars <- coef(object, type = "det")
+    ppars <- coef(object, type = "det", fixedOnly=FALSE)
     db <- umf@dist.breaks
     w <- diff(db)
     survey <- umf@survey
@@ -1902,13 +1902,13 @@ setMethod("getP", "unmarkedFitMPois", function(object, na.rm = TRUE)
     umf <- object@data
     designMats <- getDesign(umf, formula, na.rm = na.rm)
     y <- designMats$y
-    V <- designMats$V
+    V <- as.matrix(cbind(designMats$V, designMats$Z_det))
     V.offset <- designMats$V.offset
     if (is.null(V.offset))
         V.offset <- rep(0, nrow(V))
     M <- nrow(y)
     J <- obsNum(umf) #ncol(y)
-    ppars <- coef(object, type = "det")
+    ppars <- coef(object, type = "det", fixedOnly=FALSE)
     p <- plogis(V %*% ppars + V.offset)
     p <- matrix(p, M, J, byrow = TRUE)
     pi <- do.call(piFun, list(p = p))
@@ -2063,13 +2063,13 @@ setMethod("simulate", "unmarkedFitDS",
     w <- diff(db)
     designMats <- getDesign(umf, formula, na.rm = na.rm)
     y <- designMats$y
-    X <- designMats$X
+    X <- as.matrix(cbind(designMats$X, designMats$Z_state))
     X.offset <- designMats$X.offset
     if (is.null(X.offset))
         X.offset <- rep(0, nrow(X))
     M <- nrow(y)
     J <- ncol(y)
-    lamParms <- coef(object, type = "state")
+    lamParms <- coef(object, type = "state", fixedOnly=FALSE)
     lambda <- drop(exp(X %*% lamParms + X.offset))
     if(identical(object@output, "density")) {
         switch(umf@survey,
@@ -2342,14 +2342,14 @@ setMethod("simulate", "unmarkedFitMPois",
     umf <- object@data
     designMats <- getDesign(umf, formula, na.rm = na.rm)
     y <- designMats$y
-    X <- designMats$X
+    X <- as.matrix(cbind(designMats$X, designMats$Z_state))
     X.offset <- designMats$X.offset
     if (is.null(X.offset)) {
         X.offset <- rep(0, nrow(X))
         }
     M <- nrow(y)
     J <- ncol(y)
-    lamParms <- coef(object, type = "state")
+    lamParms <- coef(object, type = "state", fixedOnly=FALSE)
     lam <- as.numeric(exp(X %*% lamParms + X.offset))
     lamvec <- rep(lam, each = J)
     pivec <- as.vector(t(getP(object, na.rm = na.rm)))
diff --git a/tests/testthat/test_distsamp.R b/tests/testthat/test_distsamp.R
index 9c42119..9d6fe04 100644
--- a/tests/testthat/test_distsamp.R
+++ b/tests/testthat/test_distsamp.R
@@ -265,6 +265,7 @@ test_that("getP works with distsamp",{
 
 test_that("distsamp works with random effects",{
 
+  set.seed(123)
   data(linetran)
   umf <- unmarkedFrameDS(y=as.matrix(linetran[,1:4]), siteCovs=linetran[,6:7],
                          survey="line", tlength=linetran$Length, unitsIn='m',
@@ -301,4 +302,10 @@ test_that("distsamp works with random effects",{
   pr <- lapply(mods,  function(x) predict(x, "state"))
   expect_true(all(sapply(pr, inherits, "data.frame")))
 
+  # Make sure simulate accounts for random effects
+  s <- simulate(hn, nsim=30)
+  avg <- apply(sapply(s, function(x) apply(x,1,sum)),1, mean)
+  # average first count and predicted abundance should be highly correlated
+  pr <- predict(hn, 'state')
+  expect_true(cor(avg, pr$Predicted) > 0.7)
 })
diff --git a/tests/testthat/test_multinomPois.R b/tests/testthat/test_multinomPois.R
index ee51335..6b4b693 100644
--- a/tests/testthat/test_multinomPois.R
+++ b/tests/testthat/test_multinomPois.R
@@ -237,6 +237,12 @@ test_that("multinomPois can fit models with random effects",{
   expect_equivalent(dim(pr), c(100, 4))
   expect_equivalent(dim(pr2), c(5,4))
 
+  # Make sure simulate accounts for random effects
+  s <- simulate(fm, nsim=30)
+  avg <- apply(sapply(s, function(x) x[,1]),1, mean)
+  # average first count and predicted abundance should be highly correlated
+  expect_true(cor(avg, pr$Predicted) > 0.7)
+
   umf2@y[1,1] <- NA
   umf2@y[2,] <- NA
   umf2@siteCovs$x1[3] <- NA
diff --git a/tests/testthat/test_powerAnalysis.R b/tests/testthat/test_powerAnalysis.R
index fbf75a4..d316a64 100644
--- a/tests/testthat/test_powerAnalysis.R
+++ b/tests/testthat/test_powerAnalysis.R
@@ -61,6 +61,17 @@ test_that("powerAnalysis method works",{
   pl <- unmarkedPowerList(template_model, effect_sizes, design=scenarios, nsim=10)
   expect_is(pl, "unmarkedPowerList")
 
+  # With random effect
+  set.seed(123)
+  rguide <- list(group=factor(levels=letters[1:20]))
+  rform <- list(state=~x+(1|group), det=~1)
+  rcf <- list(state=c(intercept=0, x=0.5, group=0.7), det=c(intercept=0))
+  umfr <- simulate("occu", formulas=rform, design=design, coefs=rcf, guide=rguide)
+  fm <- occu(~1~x+(1|group), umfr)
+  pa5 <- powerAnalysis(fm, rcf, nsim=10)
+  s <- summary(pa5)
+  expect_equal(nrow(s), 3)
+  expect_equal(s$Power[2], 1)
   })
 })
 
diff --git a/tests/testthat/test_simulate.R b/tests/testthat/test_simulate.R
index c1cda3e..04f1686 100644
--- a/tests/testthat/test_simulate.R
+++ b/tests/testthat/test_simulate.R
@@ -34,6 +34,15 @@ test_that("simulate can generate new datasets from scratch",{
   expect_true(is.factor(umf2@siteCovs$landcover))
   expect_equivalent(mean(umf2@siteCovs$elev), 2.01722, tol=1e-5)
 
+  # With random effect
+  set.seed(123)
+  rguide <- list(group=factor(levels=letters[1:20]))
+  rform <- list(state=~(1|group), det=~1)
+  rcf <- list(state=c(intercept=0, group=0.7), det=c(intercept=0))
+  umfr <- simulate("occu", formulas=rform, design=design, coefs=rcf, guide=rguide)
+  fm <- occu(~1~(1|group), umfr)
+  expect_equal(sigma(fm)$sigma, 0.6903913, tol=1e-5)
+
   # pcount
   set.seed(123)
   cf$alpha <- c(alpha=0.5)
author	Ken Kellner <kenkellner@users.noreply.github.com>	2022-07-22 19:15:03 -0400
committer	GitHub <noreply@github.com>	2022-07-22 19:15:03 -0400
commit	60ad46e43217d92cf0736976f871d2bda7be51d4 (patch)
tree	f858b2da36824d2209f4c9303882f9d7379e737c
parent	0266f34e3a02003bed5f0f14ff91cb3e39f2fd59 (diff)
parent	288458c5e7e3fb5775c21d22ac8e4a1c63cbc206 (diff)