diff options
-rw-r--r-- | DESCRIPTION | 1 | ||||
-rw-r--r-- | R/gdistremoval.R | 8 | ||||
-rw-r--r-- | R/getDesign.R | 4 | ||||
-rw-r--r-- | R/mixedModelTools.R | 248 | ||||
-rw-r--r-- | R/power.R | 2 | ||||
-rw-r--r-- | R/predict.R | 4 | ||||
-rw-r--r-- | R/simulate.R | 4 | ||||
-rw-r--r-- | tests/testthat/lme4_output.Rdata | bin | 0 -> 2701 bytes | |||
-rw-r--r-- | tests/testthat/test_mixed_models.R | 83 | ||||
-rw-r--r-- | tests/testthat/test_pcount.R | 14 |
10 files changed, 298 insertions, 70 deletions
diff --git a/DESCRIPTION b/DESCRIPTION index c7e5799..d42b5b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,7 +23,6 @@ Depends: R (>= 4.0) Imports: graphics, lattice, - lme4, MASS, Matrix, methods, diff --git a/R/gdistremoval.R b/R/gdistremoval.R index b253625..db5439b 100644 --- a/R/gdistremoval.R +++ b/R/gdistremoval.R @@ -199,19 +199,19 @@ setMethod("getDesign", "unmarkedFrameGDR", if(return.frames) return(list(sc=sc, ysc=ysc, oc=oc)) - lam_fixed <- lme4::nobars(formula$lambdaformula) + lam_fixed <- remove_bars(formula$lambdaformula) Xlam <- model.matrix(lam_fixed, model.frame(lam_fixed, sc, na.action=NULL)) - phi_fixed <- lme4::nobars(formula$phiformula) + phi_fixed <- remove_bars(formula$phiformula) Xphi <- model.matrix(phi_fixed, model.frame(phi_fixed, ysc, na.action=NULL)) - dist_fixed <- lme4::nobars(formula$distanceformula) + dist_fixed <- remove_bars(formula$distanceformula) Xdist <- model.matrix(dist_fixed, model.frame(dist_fixed, ysc, na.action=NULL)) - rem_fixed <- lme4::nobars(formula$removalformula) + rem_fixed <- remove_bars(formula$removalformula) Xrem <- model.matrix(rem_fixed, model.frame(rem_fixed, oc, na.action=NULL)) diff --git a/R/getDesign.R b/R/getDesign.R index 8597b2d..7505bf4 100644 --- a/R/getDesign.R +++ b/R/getDesign.R @@ -10,8 +10,8 @@ setGeneric("handleNA", function(umf, ...) standardGeneric("handleNA")) setMethod("getDesign", "unmarkedFrame", function(umf, formula, na.rm=TRUE) { - detformula <- lme4::nobars(as.formula(formula[[2]])) - stateformula <- lme4::nobars(as.formula(paste("~", formula[3], sep=""))) + detformula <- remove_bars(as.formula(formula[[2]])) + stateformula <- remove_bars(as.formula(paste("~", formula[3], sep=""))) detVars <- all.vars(detformula) M <- numSites(umf) diff --git a/R/mixedModelTools.R b/R/mixedModelTools.R index 8c6a96d..985d06e 100644 --- a/R/mixedModelTools.R +++ b/R/mixedModelTools.R @@ -1,50 +1,219 @@ -get_xlev <- function(data, model_frame){ - fac_col <- data[, sapply(data, is.factor), drop=FALSE] - xlevs <- lapply(fac_col, levels) - xlevs[names(xlevs) %in% names(model_frame)] -} - +# Generate required random effects info---------------------------------------- +# Sort-of drop-in replacement for lme4::mkReTrms get_reTrms <- function(formula, data, newdata=NULL){ - fb <- lme4::findbars(formula) - mf <- model.frame(lme4::subbars(formula), data, na.action=stats::na.pass) - if(is.null(newdata)) return(lme4::mkReTrms(fb, mf)) - new_mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass, - xlev=get_xlev(data, mf)) - lme4::mkReTrms(fb, new_mf, drop.unused.levels=FALSE) + if(!has_random(formula)){ + stop("No random effect terms in formula", call.=FALSE) + } + cnms <- get_cnms(formula) + # TODO: check these are factors + flist <- lapply(unique(names(cnms)), function(x){ + out <- data[[x]] + if(!is.factor(out)) out <- factor(out) + out + }) + names(flist) <- unique(names(cnms)) + list(Z = get_Z(formula, data), cnms = cnms, flist=flist) } +#Create Z random effects matrix from a formula, possibly using newdata--------- get_Z <- function(formula, data, newdata=NULL){ - if(is.null(lme4::findbars(formula))){ + # If no random effects in formula, return an empty Matrix + if(!has_random(formula)){ if(is.null(newdata)){ return(Matrix::Matrix(matrix(0, nrow=nrow(data), ncol=0),sparse=TRUE)) } else{ return(Matrix::Matrix(matrix(0, nrow=nrow(newdata), ncol=0),sparse=TRUE)) } } - check_formula(formula, data) - Zt <- get_reTrms(formula, data, newdata)$Zt - Z <- t(as.matrix(Zt)) + + # Get new formula + bars <- find_bars(formula) + check_duplicate_terms(bars) + + # Get partial Z for each bar expression + Z_parts <- lapply(bars, get_partial_Z, data=data, newdata=newdata) + + # Create model frame + #mf <- model.frame(new_form, data, na.action=stats::na.pass) + #if(!is.null(newdata)){ + # mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass, + # xlev=get_xlev(data, mf)) + #} + + # Create sparse Z matrix + #Z <- model.matrix(new_form, mf, contrasts.arg=cont) + Z <- do.call(cbind, Z_parts) + colnames(Z) <- Z_colnames(formula, data) Matrix::Matrix(Z, sparse=TRUE) } -get_group_vars <- function(formula){ - rand <- lme4::findbars(formula) - ifelse(is.null(rand), 0, length(rand)) +# Determine if formula has random effects specified---------------------------- +has_random <- function(form){ + length(find_bars(form)) > 0 } -get_nrandom <- function(formula, data){ - rand <- lme4::findbars(formula) - if(length(rand)==0) return(as.array(0)) +# Find 'bar' random effect parts of formula (i.e., the (x|y) structures)------- +# Operates recursively +find_bars <- function(form){ + if(is.null(form)) return(NULL) + if(is.name(form)) return(NULL) + if(form[[1]] == as.name("(")) return(form) + if(is.call(form)){ + out <- return(unlist(lapply(form, find_bars))) + } + NULL +} - out <- sapply(rand, function(x){ - col_nm <- as.character(x[[3]]) - length(unique(data[[col_nm]])) +# Convert bar components into new formula-------------------------------------- +# E.g. (1|g) becomes ~g - 1 +bar_to_formula <- function(bar){ + bar_terms <- bar_to_terms(bar) + as.formula(str2lang(paste("~", paste(bar_terms, collapse = " + "), "- 1"))) +} + +# Translate bar components into standard formula terms------------------------- +bar_to_terms <- function(bar){ + info <- get_bar_info(bar) + new_terms <- sapply(info$LHS, function(x){ + if(x == "1") return(info$RHS) + paste0(x, ":", info$RHS) + }) + paste(new_terms, collapse=" + ") +} + +# Organize information in a bar expression into a list------------------------- +get_bar_info <- function(bar){ + out <- list( + operator = deparse(bar[[2]][[1]]), + LHS = terms_in_bar(bar, RHS=FALSE), + RHS = terms_in_bar(bar, RHS=TRUE) + ) + check_bar_info(out) +} + +terms_in_bar <- function(bars, RHS=FALSE){ + bars_sub <- bars[[2]][[2]] + if(RHS) bars_sub <- bars[[2]][[3]] + form <- formula(substitute(~X, list(X=bars_sub))) + trms <- attr(stats::terms(form), "term.labels") + int <- attr(stats::terms(form), "intercept") + if(int == 1 & !RHS) trms <- c("1", trms) + trms +} + +# Check bar info to make sure the expression are allowed----------------------- +# For example unmarked doesn't support correlated random effects with | +check_bar_info <- function(info){ + if(info$operator == "|" & length(info$LHS) > 1){ + stop("Correlated random effects not supported, use || instead of |", call.=FALSE) + } + if(any(grepl(":", info$RHS)) | any(grepl("/", info$RHS))){ + stop("Nested random effects notation (: or /) not supported", call.=FALSE) + } + stopifnot(length(info$RHS) == 1) + info +} + +# Check terms to make sure there aren't any duplicates------------------------- +# E.g. as a result of a formula like ~ (1|g) + (x||) where the intercept +# is also implied in the second bar expression +check_duplicate_terms <- function(bars){ + bar_terms <- lapply(bars, bar_to_terms) + all_terms <- lapply(bar_terms, function(x){ + unlist(strsplit(x, " + ", fixed=TRUE)) + }) + all_terms <- unlist(all_terms) + dups <- duplicated(all_terms) + if(any(dups)){ + stop("Formula implies duplicate terms: ", paste0(all_terms[dups], collapse=", "), + call.=FALSE) + } + invisible() +} + + +# Get partial Z for a given bar expression------------------------------------- +get_partial_Z <- function(bar, data, newdata){ + info <- get_bar_info(bar) + formula <- bar_to_formula(bar) + mf <- model.frame(formula, data, na.action=stats::na.pass) + if(!is.null(newdata)){ + mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass, + xlev=get_xlev(data, mf)) + } + has_fac <- any(sapply(info$LHS[info$LHS != "1"], function(x) is.factor(data[[x]]))) + if(has_fac){ + stop("unmarked does not support random slopes for R factors.\n", + "Try converting the variable to a series of indicator variables instead.", call.=FALSE) + } + out <- model.matrix(formula, mf) +} + +# Get levels of factor--------------------------------------------------------- +# For use in specifying model frame +get_xlev <- function(data, model_frame){ + fac_col <- data[, sapply(data, is.factor), drop=FALSE] + xlevs <- lapply(fac_col, levels) + xlevs[names(xlevs) %in% names(model_frame)] +} + +# Generate colnames for Z to match what lme4::mkReTrms does-------------------- +Z_colnames <- function(formula, data){ + bars <- find_bars(formula) + info <- lapply(bars, get_bar_info) + groups <- sapply(info, function(x) x$RHS) + nreps <- sapply(info, function(x) length(x$LHS)) + lvls <- lapply(1:length(groups), function(i){ + group_dat <- data[[groups[i]]] + if(!is.factor(group_dat)) group_dat <- as.factor(group_dat) + rep(levels(group_dat), nreps[i]) + }) + unlist(lvls) +} + +# Get 'cnms' - random effect names - as with lme4::mkReTrms-------------------- +get_cnms <- function(formula){ + bars <- find_bars(formula) + info <- lapply(bars, get_bar_info) + cnms <- lapply(info, function(x){ + out <- lapply(x$LHS, function(y) ifelse(y == "1", "(Intercept)", y)) + names(out) <- rep(x$RHS, length(out)) + out + }) + do.call(c, cnms) +} + +# Get number of random effect SDs/variances------------------------------------ +get_nrandom <- function(formula, data){ + if(!has_random(formula)) return(as.array(0)) + cnms <- get_cnms(formula) + out <- sapply(names(cnms), function(x){ + length(unique(data[[x]])) }) as.array(out) } -has_random <- function(formula){ - length(lme4::findbars(formula)) > 0 +# Get number of grouping variables--------------------------------------------- +get_group_vars <- function(formula){ + if(!has_random(formula)) return(0) + cnms <- get_cnms(formula) + length(cnms) +} + +# Check if function has no support for random effects-------------------------- +check_no_support <- function(formula_list){ + has_bars <- any(sapply(formula_list, has_random)) + if(has_bars){ + stop("This function does not support random effects", call.=FALSE) + } +} + +# Remove all bar components from a formula------------------------------------- +remove_bars <- function(formula){ + s1 <- gsub("\\([^)]+\\|[^)]+\\) ?\\+?", "", deparse1(formula)) + s2 <- gsub(" \\+ {0,}$", "", s1) + if(s2 == "~") return(~1) + as.formula(str2lang(s2)) } sigma_names <- function(formula, data){ @@ -56,22 +225,6 @@ sigma_names <- function(formula, data){ nms } -check_formula <- function(formula, data){ - rand <- lme4::findbars(formula) - if(is.null(rand)) return(invisible()) - - char <- paste(formula, collapse=" ") - if(grepl(":|/", char)){ - stop("Nested random effects (using / and :) are not supported", - call.=FALSE) - } - theta <- get_reTrms(formula, data)$theta - if(0 %in% theta){ - stop("Correlated slopes and intercepts are not supported. Use || instead of |.", - call.=FALSE) - } -} - split_formula <- function(formula){ if(length(formula) != 3) stop("Double right-hand side formula required") char <- lapply(formula, function(x){ @@ -135,7 +288,7 @@ get_randvar_info <- function(tmb_report, type, formula, data){ list(names=sigma_names(formula, data), estimates=sigma_est, covMat=sigma_cov, invlink="exp", invlinkGrad="exp", n_obs=nrow(data), n_levels=lapply(re$flist, function(x) length(levels(x))), cnms=re$cnms, - levels=rownames(re$Zt)) + levels=colnames(re$Z)) } get_fixed_names <- function(tmb_report){ @@ -158,13 +311,6 @@ print_randvar_info <- function(object){ #cat(paste0("Number of obs: ",object$n_obs,", groups: ",group_info,"\n")) } -check_no_support <- function(formula_list){ - has_bars <- any(sapply(formula_list, function(x) !is.null(lme4::findbars(x)))) - if(has_bars){ - stop("This function does not support random effects", call.=FALSE) - } -} - fit_TMB <- function(model, data, params, random, starts, method, ...){ @@ -163,7 +163,7 @@ check_coefs <- function(coefs, fit, template=FALSE){ # If there are random effects, adjust the expected coefficient names # to remove the b vector and add the grouping covariate name - rand <- lapply(formulas, lme4::findbars) + rand <- lapply(formulas, find_bars) if(!all(sapply(rand, is.null))){ stopifnot(all(required_subs %in% names(formulas))) rvar <- lapply(rand, function(x) unlist(lapply(x, all.vars))) diff --git a/R/predict.R b/R/predict.R index 4f3b5fb..10bf4fc 100644 --- a/R/predict.R +++ b/R/predict.R @@ -55,7 +55,7 @@ setMethod("predict", "unmarkedFit", # This function makes sure factor levels in newdata match, and that # any functions in the formula are handled properly (e.g. scale) make_mod_matrix <- function(formula, data, newdata, re.form=NULL){ - form_nobars <- lme4::nobars(formula) + form_nobars <- remove_bars(formula) mf <- model.frame(form_nobars, data, na.action=stats::na.pass) X.terms <- stats::terms(mf) fac_cols <- data[, sapply(data, is.factor), drop=FALSE] @@ -65,7 +65,7 @@ make_mod_matrix <- function(formula, data, newdata, re.form=NULL){ #X <- model.matrix(X.terms, newdata, xlev=xlevs) X <- model.matrix(form_nobars, nmf) offset <- model.offset(nmf) - if(is.null(re.form) & !is.null(lme4::findbars(formula))){ + if(is.null(re.form) & has_random(formula)){ Z <- get_Z(formula, data, newdata) X <- cbind(X, Z) } diff --git a/R/simulate.R b/R/simulate.R index 3868a72..b73f3fd 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -104,7 +104,7 @@ setMethod("simulate", "character", #replace_sigma <- function(coefs, fit){ # required_subs <- names(fit@estimates@estimates) # formulas <- sapply(names(fit), function(x) get_formula(fit, x)) -# rand <- lapply(formulas, lme4::findbars) +# rand <- lapply(formulas, find_bars) # if(!all(sapply(rand, is.null))){ # rvar <- lapply(rand, function(x) unlist(lapply(x, all.vars))) # for (i in required_subs){ @@ -121,7 +121,7 @@ setMethod("simulate", "character", generate_random_effects <- function(coefs, fit){ required_subs <- names(fit@estimates@estimates) formulas <- sapply(names(fit), function(x) get_formula(fit, x)) - rand <- lapply(formulas, lme4::findbars) + rand <- lapply(formulas, find_bars) if(!all(sapply(rand, is.null))){ rvar <- lapply(rand, function(x) unlist(lapply(x, all.vars))) for (i in required_subs){ diff --git a/tests/testthat/lme4_output.Rdata b/tests/testthat/lme4_output.Rdata Binary files differnew file mode 100644 index 0000000..a211494 --- /dev/null +++ b/tests/testthat/lme4_output.Rdata diff --git a/tests/testthat/test_mixed_models.R b/tests/testthat/test_mixed_models.R new file mode 100644 index 0000000..4cf3b88 --- /dev/null +++ b/tests/testthat/test_mixed_models.R @@ -0,0 +1,83 @@ +context("mixed model tools") + +test_that("get_reTrms matches lme4::mkReTrms", { + + #skip_if(!requireNamespace("lme4", quietly=TRUE), + # "lme4 package unavailable") + set.seed(123) + dat <- data.frame(x = rnorm(20), y = rnorm(20), z = factor(sample(letters[1:3], 20, replace=T)), + group = factor(sample(letters[4:6], 20, replace=T)), + id = factor(sample(letters[7:9], 20, replace=T))) + + load('lme4_output.Rdata') + form1 <- ~x + (1|group) + #l1 <- lme4::mkReTrms(lme4::findbars(form1), dat) + r1 <- get_reTrms(form1, dat) + expect_identical(r1$Z, Matrix::t(l1$Zt)) + expect_identical(r1$cnms, l1$cnms) + attributes(l1$flist)$assign <- NULL + expect_identical(r1$flist, l1$flist) + + form2 <- ~x + (x||group) + #l2 <- lme4::mkReTrms(lme4::findbars(form2), dat) + r2 <- get_reTrms(form2, dat) + expect_identical(r2$Z, Matrix::t(l2$Zt)) + expect_identical(r2$cnms, l2$cnms) + attributes(l2$flist)$assign <- NULL + expect_identical(r2$flist, l2$flist) + + form3 <- ~x + (x||group) + (1|id) + #l3 <- lme4::mkReTrms(lme4::findbars(form3), dat) + r3 <- get_reTrms(form3, dat) + expect_identical(r3$Z, Matrix::t(l3$Zt)) + expect_identical(r3$cnms, l3$cnms) + attributes(l3$flist)$assign <- NULL + expect_identical(r3$flist, l3$flist) + + form4 <- ~x + (x||group) + (y||id) + #l4 <- lme4::mkReTrms(lme4::findbars(form4), dat) + r4 <- get_reTrms(form4, dat) + expect_identical(r4$Z, Matrix::t(l4$Zt)) + expect_identical(r4$cnms, l4$cnms) + attributes(l4$flist)$assign <- NULL + expect_identical(r4$flist, l4$flist) + + form5 <- ~x + (x*y||group) + (y||id) + #l5 <- lme4::mkReTrms(lme4::findbars(form5), dat) + r5 <- get_reTrms(form5, dat) + expect_identical(r5$Z, Matrix::t(l5$Zt)) + expect_identical(r5$cnms, l5$cnms) + attributes(l5$flist)$assign <- NULL + expect_identical(r5$flist, l5$flist) + + form6 <- ~(1|group) + #l6 <- lme4::mkReTrms(lme4::findbars(form6), dat) + r6 <- get_reTrms(form6, dat) + expect_identical(r6$Z, Matrix::t(l6$Zt)) + expect_identical(r6$cnms, l6$cnms) + attributes(l6$flist)$assign <- NULL + expect_identical(r6$flist, l6$flist) + + form7 <- ~(1|group) + x + #l7 <- lme4::mkReTrms(lme4::findbars(form7), dat) + r7 <- get_reTrms(form7, dat) + expect_identical(r7$Z, Matrix::t(l7$Zt)) + expect_identical(r7$cnms, l7$cnms) + attributes(l7$flist)$assign <- NULL + expect_identical(r7$flist, l7$flist) + + #save(l1,l2,l3,l4,l5,l6,l7, file='lme4_output.Rdata') +}) + +test_that("get_reTrms errors correctly", { + form1 <- ~x + (x+z||group) + (y||id) + expect_error(get_reTrms(form1, dat)) + + form1 <- ~x + (x|group:id) + expect_error(get_reTrms(form1, dat)) + + form1 <- ~x + (x|group/id) + expect_error(get_reTrms(form1, dat)) + + expect_identical(find_bars(NULL), NULL) +}) diff --git a/tests/testthat/test_pcount.R b/tests/testthat/test_pcount.R index 6db9966..35c9069 100644 --- a/tests/testthat/test_pcount.R +++ b/tests/testthat/test_pcount.R @@ -93,7 +93,7 @@ test_that("pcount predict works",{ test_that("pcount can fit models with random effects",{ -set.seed(35) +set.seed(12) nSites <- 300 nVisits <- 3 x <- rnorm(nSites) # a covariate @@ -101,7 +101,7 @@ beta0 <- 0 beta1 <- 0.4 ran <- rnorm(100, 0, 1) -group <- factor(as.character(rep(1:100, each=3))) +group <- factor(as.character(rep(1:100, each=3)), levels=1:100) ran_ind <- as.numeric(group) lambda <- exp(beta0 + beta1*x + @@ -124,19 +124,19 @@ expect_is(fm, "unmarkedFitPCount") fmr <- pcount(~visit~x+(1|group), umf, K=25) -expect_equivalent(coef(fmr), c(0.05397,0.3197,-0.8760,1.3668,2.078), - tol=1e-3) +expect_equivalent(coef(fmr), c(-0.13648,0.3355,-0.9202,1.4680,2.47077), + tol=1e-4) expect_true(inherits(sigma(fmr), 'data.frame')) -expect_equal(sigma(fmr)$sigma, 1.05945, tol=1e-5) +expect_equal(sigma(fmr)$sigma, 1.038204, tol=1e-5) pr <- predict(fmr, "state") expect_equivalent(as.numeric(pr[1,]), - c(1.037050,0.58179,0.3453,3.1140), tol=1e-3) + c(0.31838, 0.20989, 0.087458,1.15905), tol=1e-3) pr2 <- predict(fmr, "state", re.form=NA) expect_equivalent(as.numeric(pr2[1,]), - c(1.48366,0.2011,1.1374,1.93255), tol=1e-3) + c(0.53086,0.090768,0.37970,0.74220), tol=1e-3) pr3 <- predict(fmr, "det") expect_true(inherits(pr3, "data.frame")) |