diff options
author | Ken Kellner <ken@kenkellner.com> | 2023-12-10 14:56:09 -0500 |
---|---|---|
committer | Ken Kellner <ken@kenkellner.com> | 2023-12-10 14:56:09 -0500 |
commit | 684496cec413c7566ea695ef3f1304983c52238c (patch) | |
tree | 305a13e6b451c84c3a29108cdd960a28a787f03f | |
parent | b4234c9934a114f8ae86a423b363e595081ae868 (diff) |
Initial working lme4 replacement
-rw-r--r-- | DESCRIPTION | 1 | ||||
-rw-r--r-- | R/getDesign.R | 4 | ||||
-rw-r--r-- | R/mixedModelTools.R | 241 | ||||
-rw-r--r-- | R/predict.R | 4 | ||||
-rw-r--r-- | tests/testthat/test_pcount.R | 14 |
5 files changed, 201 insertions, 63 deletions
diff --git a/DESCRIPTION b/DESCRIPTION index d59b985..ecfedd1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,6 @@ Depends: R (>= 4.0) Imports: graphics, lattice, - lme4, MASS, Matrix, methods, diff --git a/R/getDesign.R b/R/getDesign.R index 8597b2d..7505bf4 100644 --- a/R/getDesign.R +++ b/R/getDesign.R @@ -10,8 +10,8 @@ setGeneric("handleNA", function(umf, ...) standardGeneric("handleNA")) setMethod("getDesign", "unmarkedFrame", function(umf, formula, na.rm=TRUE) { - detformula <- lme4::nobars(as.formula(formula[[2]])) - stateformula <- lme4::nobars(as.formula(paste("~", formula[3], sep=""))) + detformula <- remove_bars(as.formula(formula[[2]])) + stateformula <- remove_bars(as.formula(paste("~", formula[3], sep=""))) detVars <- all.vars(detformula) M <- numSites(umf) diff --git a/R/mixedModelTools.R b/R/mixedModelTools.R index 8c6a96d..4581f1e 100644 --- a/R/mixedModelTools.R +++ b/R/mixedModelTools.R @@ -1,50 +1,212 @@ -get_xlev <- function(data, model_frame){ - fac_col <- data[, sapply(data, is.factor), drop=FALSE] - xlevs <- lapply(fac_col, levels) - xlevs[names(xlevs) %in% names(model_frame)] -} - +# Generate required random effects info---------------------------------------- +# Sort-of drop-in replacement for lme4::mkReTrms get_reTrms <- function(formula, data, newdata=NULL){ - fb <- lme4::findbars(formula) - mf <- model.frame(lme4::subbars(formula), data, na.action=stats::na.pass) - if(is.null(newdata)) return(lme4::mkReTrms(fb, mf)) - new_mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass, - xlev=get_xlev(data, mf)) - lme4::mkReTrms(fb, new_mf, drop.unused.levels=FALSE) + if(!has_random(formula)){ + stop("No random effect terms in formula", call.=FALSE) + } + cnms <- get_cnms(formula) + # TODO: check these are factors + flist <- lapply(unique(names(cnms)), function(x){ + out <- data[[x]] + if(!is.factor(out)) out <- factor(out) + out + }) + names(flist) <- unique(names(cnms)) + list(Z = get_Z(formula, data), cnms = cnms, flist=flist) } +#Create Z random effects matrix from a formula, possibly using newdata--------- get_Z <- function(formula, data, newdata=NULL){ - if(is.null(lme4::findbars(formula))){ + # If no random effects in formula, return an empty Matrix + if(!has_random(formula)){ if(is.null(newdata)){ return(Matrix::Matrix(matrix(0, nrow=nrow(data), ncol=0),sparse=TRUE)) } else{ return(Matrix::Matrix(matrix(0, nrow=nrow(newdata), ncol=0),sparse=TRUE)) } } - check_formula(formula, data) - Zt <- get_reTrms(formula, data, newdata)$Zt - Z <- t(as.matrix(Zt)) + + # Get new formula + bars <- find_bars(formula) + new_form <- bars_to_formula(bars) + + # Get partial Z for each bar expression + form_list <- lapply(1:length(bars), function(x) bars_to_formula(bars[x])) + Z_parts <- lapply(form_list, get_partial_Z, data=data, newdata=newdata) + + # Create model frame + #mf <- model.frame(new_form, data, na.action=stats::na.pass) + #if(!is.null(newdata)){ + # mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass, + # xlev=get_xlev(data, mf)) + #} + + # Create sparse Z matrix + #Z <- model.matrix(new_form, mf) + Z <- do.call(cbind, Z_parts) + colnames(Z) <- Z_colnames(formula, data) Matrix::Matrix(Z, sparse=TRUE) } -get_group_vars <- function(formula){ - rand <- lme4::findbars(formula) - ifelse(is.null(rand), 0, length(rand)) +# Determine if formula has random effects specified---------------------------- +has_random <- function(form){ + length(find_bars(form)) > 0 } -get_nrandom <- function(formula, data){ - rand <- lme4::findbars(formula) - if(length(rand)==0) return(as.array(0)) +# Find 'bar' random effect parts of formula (i.e., the (x|y) structures)------- +# Operates recursively +find_bars <- function(form){ + out <- NULL + if(is.name(form)) return(NULL) + if(form[[1]] == as.name("(")) return(form) + if(is.call(form)){ + out <- lapply(form, find_bars) + } + unlist(out) +} + +# Convert bar components into new formula-------------------------------------- +# E.g. ~x + (1|g) becomes ~g - 1 +bars_to_formula <- function(bars){ + bar_terms <- lapply(bars, bars_to_terms) + check_duplicate_terms(bar_terms) + as.formula(str2lang(paste("~", paste(bar_terms, collapse = " + "), "- 1"))) +} + +# Translate bar components into standard formula terms------------------------- +bars_to_terms <- function(bars){ + info <- get_bar_info(bars) + new_terms <- sapply(info$LHS, function(x){ + if(x == "1") return(info$RHS) + paste0(x, ":", info$RHS) + }) + paste(new_terms, collapse=" + ") +} + +# Organize information in a bar expression into a list------------------------- +get_bar_info <- function(bar){ + out <- list( + operator = deparse(bar[[2]][[1]]), + LHS = terms_in_bar(bar, RHS=FALSE), + RHS = terms_in_bar(bar, RHS=TRUE) + ) + check_bar_info(out) +} + +terms_in_bar <- function(bars, RHS=FALSE){ + bars_sub <- bars[[2]][[2]] + if(RHS) bars_sub <- bars[[2]][[3]] + form <- formula(substitute(~X, list(X=bars_sub))) + trms <- attr(terms(form), "term.labels") + int <- attr(terms(form), "intercept") + if(int == 1 & !RHS) trms <- c("1", trms) + trms +} - out <- sapply(rand, function(x){ - col_nm <- as.character(x[[3]]) - length(unique(data[[col_nm]])) +# Check bar info to make sure the expression are allowed----------------------- +# For example unmarked doesn't support correlated random effects with | +check_bar_info <- function(info){ + if(info$operator == "|" & length(info$LHS) > 1){ + stop("Correlated random effects not supported, use || instead of |", call.=FALSE) + } + if(any(grepl(":", info$RHS)) | any(grepl("/", info$RHS))){ + stop("Nested random effects notation (: or /) not supported", call.=FALSE) + } + stopifnot(length(info$RHS) == 1) + info +} + +# Check terms to make sure there aren't any duplicates------------------------- +# E.g. as a result of a formula like ~ (1|g) + (x||) where the intercept +# is also implied in the second bar expression +check_duplicate_terms <- function(bar_terms){ + all_terms <- lapply(bar_terms, function(x){ + unlist(strsplit(x, " + ", fixed=TRUE)) + }) + all_terms <- unlist(all_terms) + dups <- duplicated(all_terms) + if(any(dups)){ + stop("Formula implies duplicate terms: ", paste0(all_terms[dups], collapse=", "), + call.=FALSE) + } + invisible() +} + +# Get partial Z for a given bar expression------------------------------------- +get_partial_Z <- function(formula, data, newdata){ + mf <- model.frame(formula, data, na.action=stats::na.pass) + if(!is.null(newdata)){ + mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass, + xlev=get_xlev(data, mf)) + } + model.matrix(formula, mf) +} + +# Get levels of factor--------------------------------------------------------- +# For use in specifying model frame +get_xlev <- function(data, model_frame){ + fac_col <- data[, sapply(data, is.factor), drop=FALSE] + xlevs <- lapply(fac_col, levels) + xlevs[names(xlevs) %in% names(model_frame)] +} + +# Generate colnames for Z to match what lme4::mkReTrms does-------------------- +Z_colnames <- function(formula, data){ + bars <- find_bars(formula) + info <- lapply(bars, get_bar_info) + groups <- sapply(info, function(x) x$RHS) + nreps <- sapply(info, function(x) length(x$LHS)) + lvls <- lapply(1:length(groups), function(i){ + group_dat <- data[[groups[i]]] + if(!is.factor(group_dat)) group_dat <- as.factor(group_dat) + rep(levels(group_dat), nreps[i]) + }) + unlist(lvls) +} + +# Get 'cnms' - random effect names - as with lme4::mkReTrms-------------------- +get_cnms <- function(formula){ + bars <- find_bars(formula) + info <- lapply(bars, get_bar_info) + cnms <- lapply(info, function(x){ + out <- lapply(x$LHS, function(y) ifelse(y == "1", "(Intercept)", y)) + names(out) <- rep(x$RHS, length(out)) + out + }) + do.call(c, cnms) +} + +# Get number of random effect SDs/variances------------------------------------ +get_nrandom <- function(formula, data){ + if(!has_random(formula)) return(as.array(0)) + cnms <- get_cnms(formula) + out <- sapply(names(cnms), function(x){ + length(unique(data[[x]])) }) as.array(out) } -has_random <- function(formula){ - length(lme4::findbars(formula)) > 0 +# Get number of grouping variables--------------------------------------------- +get_group_vars <- function(formula){ + if(!has_random(formula)) return(0) + cnms <- get_cnms(formula) + length(cnms) +} + +# Check if function has no support for random effects-------------------------- +check_no_support <- function(formula_list){ + has_bars <- any(sapply(formula_list, has_random)) + if(has_bars){ + stop("This function does not support random effects", call.=FALSE) + } +} + +# Remove all bar components from a formula------------------------------------- +remove_bars <- function(formula){ + s1 <- gsub("\\([^)]+\\|[^)]+\\) ?\\+?", "", deparse1(formula)) + s2 <- gsub(" \\+ {0,}$", "", s1) + if(s2 == "~") return(~1) + as.formula(str2lang(s2)) } sigma_names <- function(formula, data){ @@ -56,22 +218,6 @@ sigma_names <- function(formula, data){ nms } -check_formula <- function(formula, data){ - rand <- lme4::findbars(formula) - if(is.null(rand)) return(invisible()) - - char <- paste(formula, collapse=" ") - if(grepl(":|/", char)){ - stop("Nested random effects (using / and :) are not supported", - call.=FALSE) - } - theta <- get_reTrms(formula, data)$theta - if(0 %in% theta){ - stop("Correlated slopes and intercepts are not supported. Use || instead of |.", - call.=FALSE) - } -} - split_formula <- function(formula){ if(length(formula) != 3) stop("Double right-hand side formula required") char <- lapply(formula, function(x){ @@ -135,7 +281,7 @@ get_randvar_info <- function(tmb_report, type, formula, data){ list(names=sigma_names(formula, data), estimates=sigma_est, covMat=sigma_cov, invlink="exp", invlinkGrad="exp", n_obs=nrow(data), n_levels=lapply(re$flist, function(x) length(levels(x))), cnms=re$cnms, - levels=rownames(re$Zt)) + levels=colnames(re$Z)) } get_fixed_names <- function(tmb_report){ @@ -158,13 +304,6 @@ print_randvar_info <- function(object){ #cat(paste0("Number of obs: ",object$n_obs,", groups: ",group_info,"\n")) } -check_no_support <- function(formula_list){ - has_bars <- any(sapply(formula_list, function(x) !is.null(lme4::findbars(x)))) - if(has_bars){ - stop("This function does not support random effects", call.=FALSE) - } -} - fit_TMB <- function(model, data, params, random, starts, method, ...){ diff --git a/R/predict.R b/R/predict.R index 4f3b5fb..10bf4fc 100644 --- a/R/predict.R +++ b/R/predict.R @@ -55,7 +55,7 @@ setMethod("predict", "unmarkedFit", # This function makes sure factor levels in newdata match, and that # any functions in the formula are handled properly (e.g. scale) make_mod_matrix <- function(formula, data, newdata, re.form=NULL){ - form_nobars <- lme4::nobars(formula) + form_nobars <- remove_bars(formula) mf <- model.frame(form_nobars, data, na.action=stats::na.pass) X.terms <- stats::terms(mf) fac_cols <- data[, sapply(data, is.factor), drop=FALSE] @@ -65,7 +65,7 @@ make_mod_matrix <- function(formula, data, newdata, re.form=NULL){ #X <- model.matrix(X.terms, newdata, xlev=xlevs) X <- model.matrix(form_nobars, nmf) offset <- model.offset(nmf) - if(is.null(re.form) & !is.null(lme4::findbars(formula))){ + if(is.null(re.form) & has_random(formula)){ Z <- get_Z(formula, data, newdata) X <- cbind(X, Z) } diff --git a/tests/testthat/test_pcount.R b/tests/testthat/test_pcount.R index 6db9966..35c9069 100644 --- a/tests/testthat/test_pcount.R +++ b/tests/testthat/test_pcount.R @@ -93,7 +93,7 @@ test_that("pcount predict works",{ test_that("pcount can fit models with random effects",{ -set.seed(35) +set.seed(12) nSites <- 300 nVisits <- 3 x <- rnorm(nSites) # a covariate @@ -101,7 +101,7 @@ beta0 <- 0 beta1 <- 0.4 ran <- rnorm(100, 0, 1) -group <- factor(as.character(rep(1:100, each=3))) +group <- factor(as.character(rep(1:100, each=3)), levels=1:100) ran_ind <- as.numeric(group) lambda <- exp(beta0 + beta1*x + @@ -124,19 +124,19 @@ expect_is(fm, "unmarkedFitPCount") fmr <- pcount(~visit~x+(1|group), umf, K=25) -expect_equivalent(coef(fmr), c(0.05397,0.3197,-0.8760,1.3668,2.078), - tol=1e-3) +expect_equivalent(coef(fmr), c(-0.13648,0.3355,-0.9202,1.4680,2.47077), + tol=1e-4) expect_true(inherits(sigma(fmr), 'data.frame')) -expect_equal(sigma(fmr)$sigma, 1.05945, tol=1e-5) +expect_equal(sigma(fmr)$sigma, 1.038204, tol=1e-5) pr <- predict(fmr, "state") expect_equivalent(as.numeric(pr[1,]), - c(1.037050,0.58179,0.3453,3.1140), tol=1e-3) + c(0.31838, 0.20989, 0.087458,1.15905), tol=1e-3) pr2 <- predict(fmr, "state", re.form=NA) expect_equivalent(as.numeric(pr2[1,]), - c(1.48366,0.2011,1.1374,1.93255), tol=1e-3) + c(0.53086,0.090768,0.37970,0.74220), tol=1e-3) pr3 <- predict(fmr, "det") expect_true(inherits(pr3, "data.frame")) |