diff options
author | Ken Kellner <kenkellner@users.noreply.github.com> | 2022-07-22 19:15:03 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-22 19:15:03 -0400 |
commit | 60ad46e43217d92cf0736976f871d2bda7be51d4 (patch) | |
tree | f858b2da36824d2209f4c9303882f9d7379e737c | |
parent | 0266f34e3a02003bed5f0f14ff91cb3e39f2fd59 (diff) | |
parent | 288458c5e7e3fb5775c21d22ac8e4a1c63cbc206 (diff) |
Merge pull request #238 from kenkellner/simulate_random_effects
Allow simulating datasets with random effects for model types that support it
-rw-r--r-- | DESCRIPTION | 4 | ||||
-rw-r--r-- | R/power.R | 35 | ||||
-rw-r--r-- | R/predict.R | 21 | ||||
-rw-r--r-- | R/simulate.R | 89 | ||||
-rw-r--r-- | R/unmarkedFit.R | 16 | ||||
-rw-r--r-- | tests/testthat/test_distsamp.R | 7 | ||||
-rw-r--r-- | tests/testthat/test_multinomPois.R | 6 | ||||
-rw-r--r-- | tests/testthat/test_powerAnalysis.R | 11 | ||||
-rw-r--r-- | tests/testthat/test_simulate.R | 9 |
9 files changed, 158 insertions, 40 deletions
diff --git a/DESCRIPTION b/DESCRIPTION index fe34bfc..972f834 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: unmarked -Version: 1.2.5.9002 -Date: 2022-06-17 +Version: 1.2.5.9003 +Date: 2022-07-22 Type: Package Title: Models for Data from Unmarked Animals Authors@R: c( @@ -13,6 +13,7 @@ powerAnalysis <- function(object, coefs=NULL, design=NULL, alpha=0.05, nulls=lis submodels <- names(object@estimates@estimates) coefs <- check_coefs(coefs, object) + coefs <- generate_random_effects(coefs, object) fit_temp <- replace_estimates(object, coefs) T <- 1 @@ -155,12 +156,36 @@ check_coefs <- function(coefs, fit, template=FALSE){ required_subs <- names(fit@estimates@estimates) required_coefs <- lapply(fit@estimates@estimates, function(x) names(x@estimates)) required_lens <- lapply(required_coefs, length) + + formulas <- sapply(names(fit), function(x) get_formula(fit, x)) + + # If there are random effects, adjust the expected coefficient names + # to remove the b vector and add the grouping covariate name + rand <- lapply(formulas, lme4::findbars) + if(!all(sapply(rand, is.null))){ + stopifnot(all(required_subs %in% names(formulas))) + rvar <- lapply(rand, function(x) unlist(lapply(x, all.vars))) + if(!all(sapply(rvar, length)<2)){ + stop("Only 1 random effect per parameter is supported", call.=FALSE) + } + for (i in required_subs){ + if(!is.null(rand[[i]][[1]])){ + signame <- rvar[[i]] + old_coefs <- required_coefs[[i]] + new_coefs <- old_coefs[!grepl("b_", old_coefs, fixed=TRUE)] + new_coefs <- c(new_coefs, signame) + required_coefs[[i]] <- new_coefs + } + } + } + dummy_coefs <- lapply(required_coefs, function(x){ out <- rep(0, length(x)) x <- gsub("(Intercept)", "intercept", x, fixed=TRUE) names(out) <- x out }) + if(template) return(dummy_coefs) if(is.null(coefs)){ @@ -226,6 +251,7 @@ check_coefs <- function(coefs, fit, template=FALSE){ } coefs[required_subs] } + wald <- function(est, se, null_hyp=NULL){ if(is.null(null_hyp) || is.na(null_hyp)) null_hyp <- 0 Z <- (est-null_hyp)/se @@ -251,13 +277,15 @@ setMethod("summary", "unmarkedPower", function(object, ...){ x }) + coefs_no_rand <- unlist(object@coefs)[!grepl("b_", names(unlist(object@coefs)))] + pow <- sapply(1:npar, function(ind){ submod <- sum_dfs[[1]]$submodel[ind] param <- sum_dfs[[1]]$param[ind] ni <- nulls[[submod]][param] pcrit <- sapply(sum_dfs, function(x) wald(x$Estimate[ind], x$SE[ind], ni)) < object@alpha - direct <- sapply(sum_dfs, function(x) diff_dir(x$Estimate[ind], unlist(object@coefs)[ind], ni)) + direct <- sapply(sum_dfs, function(x) diff_dir(x$Estimate[ind], coefs_no_rand[ind], ni)) mean(pcrit & direct, na.rm=T) }) @@ -269,11 +297,14 @@ setMethod("summary", "unmarkedPower", function(object, ...){ ni }) - out <- cbind(sum_dfs[[1]][,1:2], effect=unlist(object@coefs), null=all_nulls, power=pow) + effect_no_random <- unlist(object@coefs)[!grepl("b_",names(unlist(object@coefs)))] + + out <- cbind(sum_dfs[[1]][,1:2], effect=effect_no_random, null=all_nulls, power=pow) rownames(out) <- NULL names(out) <- c("Submodel", "Parameter", "Effect", "Null", "Power") out }) + setMethod("show", "unmarkedPower", function(object){ cat("\nModel:\n") print(object@call) diff --git a/R/predict.R b/R/predict.R index cd14637..68efbb7 100644 --- a/R/predict.R +++ b/R/predict.R @@ -132,9 +132,12 @@ setGeneric("get_formula", function(object, type, ...){ }) setMethod("get_formula", "unmarkedFit", function(object, type, ...){ - if(type == "state") return(as.formula(paste("~", object@formula[3], sep=""))) - if(type == "det") return(as.formula(object@formula[[2]])) - stop("Invalid type") + if(type == "state"){ + return(as.formula(paste("~", object@formula[3], sep=""))) + } else if(type == "det"){ + return(as.formula(object@formula[[2]])) + } + NULL }) # When newdata is data.frame/raster, get original dataset @@ -574,6 +577,12 @@ setMethod("get_orig_data", "unmarkedFitGDR", function(object, type, ...){ # bespoke predict method since it has numerious unusual options # and requires bootstrapping +# This method is used by simulate but not by predict +setMethod("get_formula", "unmarkedFitOccuMulti", function(object, type, ...){ + switch(type, state=object@stateformulas, + det=object@detformulas) +}) + setMethod("predict", "unmarkedFitOccuMulti", function(object, type, newdata, #backTransform = TRUE, na.rm = TRUE, @@ -748,6 +757,12 @@ setMethod("predict", "unmarkedFitOccuMulti", # bespoke predict method since it requires bootstrapping +# This method is used by simulate by not by predict +setMethod("get_formula", "unmarkedFitOccuMS", function(object, type, ...){ + switch(type, psi=object@psiformulas, phi=object@phiformulas, + det=object@detformulas) +}) + setMethod("predict", "unmarkedFitOccuMS", function(object, type, newdata, #backTransform = TRUE, na.rm = TRUE, diff --git a/R/simulate.R b/R/simulate.R index 3503ba5..5df7247 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -59,6 +59,69 @@ blank_umFit <- function(fit_function){ } +setMethod("simulate", "character", + function(object, nsim=1, seed=NULL, formulas, coefs=NULL, design, guide=NULL, ...){ + model <- blank_umFit(object) + fit <- suppressWarnings(simulate_fit(model, formulas, guide, design, ...)) + coefs <- check_coefs(coefs, fit) + coefs <- generate_random_effects(coefs, fit) + fit <- replace_estimates(fit, coefs) + ysims <- suppressWarnings(simulate(fit, nsim)) + umf <- fit@data + # fix this + umfs <- lapply(ysims, function(x){ + if(object=="occuMulti"){ + umf@ylist <- x + } else if(object=="gdistremoval"){ + umf@yDistance=x$yDistance + umf@yRemoval=x$yRemoval + } else { + umf@y <- x + } + umf + }) + if(length(umfs)==1) umfs <- umfs[[1]] + umfs +}) + + +generate_random_effects <- function(coefs, fit){ + required_subs <- names(fit@estimates@estimates) + formulas <- sapply(names(fit), function(x) get_formula(fit, x)) + rand <- lapply(formulas, lme4::findbars) + if(!all(sapply(rand, is.null))){ + rvar <- lapply(rand, function(x) unlist(lapply(x, all.vars))) + for (i in required_subs){ + if(!is.null(rand[[i]][[1]])){ + signame <- rvar[[i]] + old_coefs <- coefs[[i]] + new_coefs <- old_coefs[names(old_coefs)!=signame] + + # Find levels of factor variable + if(signame %in% names(siteCovs(fit@data))){ + lvldata <- siteCovs(fit@data)[[signame]] + } else if(signame %in% names(obsCovs(fit@data))){ + lvldata <- obsCovs(fit@data)[[signame]] + } else if(methods::.hasSlot(fit@data, "yearlySiteCovs") && signame %in% names(yearlySiteCovs(fit@data))){ + lvldata <- yearlySiteCovs(fit@data)[[signame]] + } else { + stop("Random effect covariate missing from data", call.=FALSE) + } + + if(!is.factor(lvldata)){ + stop("Random effect covariates must be specified as factors with guide argument", call.=FALSE) + } + b <- stats::rnorm(length(levels(lvldata)), 0, old_coefs[signame]) + names(b) <- rep(paste0("b_",i), length(b)) + new_coefs <- c(new_coefs, b) + coefs[[i]] <- new_coefs + } + } + } + coefs +} + + setGeneric("get_umf_components", function(object, ...) standardGeneric("get_umf_components")) setMethod("get_umf_components", "unmarkedFit", @@ -103,30 +166,6 @@ setMethod("simulate_fit", "unmarkedFitOccuRN", }) -setMethod("simulate", "character", - function(object, nsim=1, seed=NULL, formulas, coefs=NULL, design, guide=NULL, ...){ - model <- blank_umFit(object) - fit <- suppressWarnings(simulate_fit(model, formulas, guide, design, ...)) - coefs <- check_coefs(coefs, fit) - fit <- replace_estimates(fit, coefs) - ysims <- simulate(fit, nsim) - umf <- fit@data - # fix this - umfs <- lapply(ysims, function(x){ - if(object=="occuMulti"){ - umf@ylist <- x - } else if(object=="gdistremoval"){ - umf@yDistance=x$yDistance - umf@yRemoval=x$yRemoval - } else { - umf@y <- x - } - umf - }) - if(length(umfs)==1) umfs <- umfs[[1]] - umfs -}) - setMethod("get_umf_components", "unmarkedFitMPois", function(object, formulas, guide, design, ...){ args <- list(...) @@ -495,5 +534,5 @@ setMethod("simulate_fit", "unmarkedFitGDR", gdistremoval(lambdaformula=formulas$lambda, phiformula=formulas$phi, removalformula=formulas$rem, distanceformula=formulas$dist, data=umf, keyfun=keyfun, output=output, unitsOut=unitsOut, - mixture=mixture, K=K, se=FALSE, control=list(maxit=1)) + mixture=mixture, K=K, se=FALSE, control=list(maxit=1), method='L-BFGS-B') }) diff --git a/R/unmarkedFit.R b/R/unmarkedFit.R index 0a8d107..6557b9d 100644 --- a/R/unmarkedFit.R +++ b/R/unmarkedFit.R @@ -1631,13 +1631,13 @@ setMethod("getP", "unmarkedFitDS", umf <- object@data designMats <- getDesign(umf, formula, na.rm = na.rm) y <- designMats$y - V <- designMats$V + V <- cbind(designMats$V, designMats$Z_det) V.offset <- designMats$V.offset if (is.null(V.offset)) V.offset <- rep(0, nrow(V)) M <- nrow(y) J <- ncol(y) - ppars <- coef(object, type = "det") + ppars <- coef(object, type = "det", fixedOnly=FALSE) db <- umf@dist.breaks w <- diff(db) survey <- umf@survey @@ -1902,13 +1902,13 @@ setMethod("getP", "unmarkedFitMPois", function(object, na.rm = TRUE) umf <- object@data designMats <- getDesign(umf, formula, na.rm = na.rm) y <- designMats$y - V <- designMats$V + V <- as.matrix(cbind(designMats$V, designMats$Z_det)) V.offset <- designMats$V.offset if (is.null(V.offset)) V.offset <- rep(0, nrow(V)) M <- nrow(y) J <- obsNum(umf) #ncol(y) - ppars <- coef(object, type = "det") + ppars <- coef(object, type = "det", fixedOnly=FALSE) p <- plogis(V %*% ppars + V.offset) p <- matrix(p, M, J, byrow = TRUE) pi <- do.call(piFun, list(p = p)) @@ -2063,13 +2063,13 @@ setMethod("simulate", "unmarkedFitDS", w <- diff(db) designMats <- getDesign(umf, formula, na.rm = na.rm) y <- designMats$y - X <- designMats$X + X <- as.matrix(cbind(designMats$X, designMats$Z_state)) X.offset <- designMats$X.offset if (is.null(X.offset)) X.offset <- rep(0, nrow(X)) M <- nrow(y) J <- ncol(y) - lamParms <- coef(object, type = "state") + lamParms <- coef(object, type = "state", fixedOnly=FALSE) lambda <- drop(exp(X %*% lamParms + X.offset)) if(identical(object@output, "density")) { switch(umf@survey, @@ -2342,14 +2342,14 @@ setMethod("simulate", "unmarkedFitMPois", umf <- object@data designMats <- getDesign(umf, formula, na.rm = na.rm) y <- designMats$y - X <- designMats$X + X <- as.matrix(cbind(designMats$X, designMats$Z_state)) X.offset <- designMats$X.offset if (is.null(X.offset)) { X.offset <- rep(0, nrow(X)) } M <- nrow(y) J <- ncol(y) - lamParms <- coef(object, type = "state") + lamParms <- coef(object, type = "state", fixedOnly=FALSE) lam <- as.numeric(exp(X %*% lamParms + X.offset)) lamvec <- rep(lam, each = J) pivec <- as.vector(t(getP(object, na.rm = na.rm))) diff --git a/tests/testthat/test_distsamp.R b/tests/testthat/test_distsamp.R index 9c42119..9d6fe04 100644 --- a/tests/testthat/test_distsamp.R +++ b/tests/testthat/test_distsamp.R @@ -265,6 +265,7 @@ test_that("getP works with distsamp",{ test_that("distsamp works with random effects",{ + set.seed(123) data(linetran) umf <- unmarkedFrameDS(y=as.matrix(linetran[,1:4]), siteCovs=linetran[,6:7], survey="line", tlength=linetran$Length, unitsIn='m', @@ -301,4 +302,10 @@ test_that("distsamp works with random effects",{ pr <- lapply(mods, function(x) predict(x, "state")) expect_true(all(sapply(pr, inherits, "data.frame"))) + # Make sure simulate accounts for random effects + s <- simulate(hn, nsim=30) + avg <- apply(sapply(s, function(x) apply(x,1,sum)),1, mean) + # average first count and predicted abundance should be highly correlated + pr <- predict(hn, 'state') + expect_true(cor(avg, pr$Predicted) > 0.7) }) diff --git a/tests/testthat/test_multinomPois.R b/tests/testthat/test_multinomPois.R index ee51335..6b4b693 100644 --- a/tests/testthat/test_multinomPois.R +++ b/tests/testthat/test_multinomPois.R @@ -237,6 +237,12 @@ test_that("multinomPois can fit models with random effects",{ expect_equivalent(dim(pr), c(100, 4)) expect_equivalent(dim(pr2), c(5,4)) + # Make sure simulate accounts for random effects + s <- simulate(fm, nsim=30) + avg <- apply(sapply(s, function(x) x[,1]),1, mean) + # average first count and predicted abundance should be highly correlated + expect_true(cor(avg, pr$Predicted) > 0.7) + umf2@y[1,1] <- NA umf2@y[2,] <- NA umf2@siteCovs$x1[3] <- NA diff --git a/tests/testthat/test_powerAnalysis.R b/tests/testthat/test_powerAnalysis.R index fbf75a4..d316a64 100644 --- a/tests/testthat/test_powerAnalysis.R +++ b/tests/testthat/test_powerAnalysis.R @@ -61,6 +61,17 @@ test_that("powerAnalysis method works",{ pl <- unmarkedPowerList(template_model, effect_sizes, design=scenarios, nsim=10) expect_is(pl, "unmarkedPowerList") + # With random effect + set.seed(123) + rguide <- list(group=factor(levels=letters[1:20])) + rform <- list(state=~x+(1|group), det=~1) + rcf <- list(state=c(intercept=0, x=0.5, group=0.7), det=c(intercept=0)) + umfr <- simulate("occu", formulas=rform, design=design, coefs=rcf, guide=rguide) + fm <- occu(~1~x+(1|group), umfr) + pa5 <- powerAnalysis(fm, rcf, nsim=10) + s <- summary(pa5) + expect_equal(nrow(s), 3) + expect_equal(s$Power[2], 1) }) }) diff --git a/tests/testthat/test_simulate.R b/tests/testthat/test_simulate.R index c1cda3e..04f1686 100644 --- a/tests/testthat/test_simulate.R +++ b/tests/testthat/test_simulate.R @@ -34,6 +34,15 @@ test_that("simulate can generate new datasets from scratch",{ expect_true(is.factor(umf2@siteCovs$landcover)) expect_equivalent(mean(umf2@siteCovs$elev), 2.01722, tol=1e-5) + # With random effect + set.seed(123) + rguide <- list(group=factor(levels=letters[1:20])) + rform <- list(state=~(1|group), det=~1) + rcf <- list(state=c(intercept=0, group=0.7), det=c(intercept=0)) + umfr <- simulate("occu", formulas=rform, design=design, coefs=rcf, guide=rguide) + fm <- occu(~1~(1|group), umfr) + expect_equal(sigma(fm)$sigma, 0.6903913, tol=1e-5) + # pcount set.seed(123) cf$alpha <- c(alpha=0.5) |