aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2023-12-10 14:56:09 -0500
committerKen Kellner <ken@kenkellner.com>2023-12-10 14:56:09 -0500
commit684496cec413c7566ea695ef3f1304983c52238c (patch)
tree305a13e6b451c84c3a29108cdd960a28a787f03f
parentb4234c9934a114f8ae86a423b363e595081ae868 (diff)
Initial working lme4 replacement
-rw-r--r--DESCRIPTION1
-rw-r--r--R/getDesign.R4
-rw-r--r--R/mixedModelTools.R241
-rw-r--r--R/predict.R4
-rw-r--r--tests/testthat/test_pcount.R14
5 files changed, 201 insertions, 63 deletions
diff --git a/DESCRIPTION b/DESCRIPTION
index d59b985..ecfedd1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -22,7 +22,6 @@ Depends: R (>= 4.0)
Imports:
graphics,
lattice,
- lme4,
MASS,
Matrix,
methods,
diff --git a/R/getDesign.R b/R/getDesign.R
index 8597b2d..7505bf4 100644
--- a/R/getDesign.R
+++ b/R/getDesign.R
@@ -10,8 +10,8 @@ setGeneric("handleNA", function(umf, ...) standardGeneric("handleNA"))
setMethod("getDesign", "unmarkedFrame",
function(umf, formula, na.rm=TRUE)
{
- detformula <- lme4::nobars(as.formula(formula[[2]]))
- stateformula <- lme4::nobars(as.formula(paste("~", formula[3], sep="")))
+ detformula <- remove_bars(as.formula(formula[[2]]))
+ stateformula <- remove_bars(as.formula(paste("~", formula[3], sep="")))
detVars <- all.vars(detformula)
M <- numSites(umf)
diff --git a/R/mixedModelTools.R b/R/mixedModelTools.R
index 8c6a96d..4581f1e 100644
--- a/R/mixedModelTools.R
+++ b/R/mixedModelTools.R
@@ -1,50 +1,212 @@
-get_xlev <- function(data, model_frame){
- fac_col <- data[, sapply(data, is.factor), drop=FALSE]
- xlevs <- lapply(fac_col, levels)
- xlevs[names(xlevs) %in% names(model_frame)]
-}
-
+# Generate required random effects info----------------------------------------
+# Sort-of drop-in replacement for lme4::mkReTrms
get_reTrms <- function(formula, data, newdata=NULL){
- fb <- lme4::findbars(formula)
- mf <- model.frame(lme4::subbars(formula), data, na.action=stats::na.pass)
- if(is.null(newdata)) return(lme4::mkReTrms(fb, mf))
- new_mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass,
- xlev=get_xlev(data, mf))
- lme4::mkReTrms(fb, new_mf, drop.unused.levels=FALSE)
+ if(!has_random(formula)){
+ stop("No random effect terms in formula", call.=FALSE)
+ }
+ cnms <- get_cnms(formula)
+ # TODO: check these are factors
+ flist <- lapply(unique(names(cnms)), function(x){
+ out <- data[[x]]
+ if(!is.factor(out)) out <- factor(out)
+ out
+ })
+ names(flist) <- unique(names(cnms))
+ list(Z = get_Z(formula, data), cnms = cnms, flist=flist)
}
+#Create Z random effects matrix from a formula, possibly using newdata---------
get_Z <- function(formula, data, newdata=NULL){
- if(is.null(lme4::findbars(formula))){
+ # If no random effects in formula, return an empty Matrix
+ if(!has_random(formula)){
if(is.null(newdata)){
return(Matrix::Matrix(matrix(0, nrow=nrow(data), ncol=0),sparse=TRUE))
} else{
return(Matrix::Matrix(matrix(0, nrow=nrow(newdata), ncol=0),sparse=TRUE))
}
}
- check_formula(formula, data)
- Zt <- get_reTrms(formula, data, newdata)$Zt
- Z <- t(as.matrix(Zt))
+
+ # Get new formula
+ bars <- find_bars(formula)
+ new_form <- bars_to_formula(bars)
+
+ # Get partial Z for each bar expression
+ form_list <- lapply(1:length(bars), function(x) bars_to_formula(bars[x]))
+ Z_parts <- lapply(form_list, get_partial_Z, data=data, newdata=newdata)
+
+ # Create model frame
+ #mf <- model.frame(new_form, data, na.action=stats::na.pass)
+ #if(!is.null(newdata)){
+ # mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass,
+ # xlev=get_xlev(data, mf))
+ #}
+
+ # Create sparse Z matrix
+ #Z <- model.matrix(new_form, mf)
+ Z <- do.call(cbind, Z_parts)
+ colnames(Z) <- Z_colnames(formula, data)
Matrix::Matrix(Z, sparse=TRUE)
}
-get_group_vars <- function(formula){
- rand <- lme4::findbars(formula)
- ifelse(is.null(rand), 0, length(rand))
+# Determine if formula has random effects specified----------------------------
+has_random <- function(form){
+ length(find_bars(form)) > 0
}
-get_nrandom <- function(formula, data){
- rand <- lme4::findbars(formula)
- if(length(rand)==0) return(as.array(0))
+# Find 'bar' random effect parts of formula (i.e., the (x|y) structures)-------
+# Operates recursively
+find_bars <- function(form){
+ out <- NULL
+ if(is.name(form)) return(NULL)
+ if(form[[1]] == as.name("(")) return(form)
+ if(is.call(form)){
+ out <- lapply(form, find_bars)
+ }
+ unlist(out)
+}
+
+# Convert bar components into new formula--------------------------------------
+# E.g. ~x + (1|g) becomes ~g - 1
+bars_to_formula <- function(bars){
+ bar_terms <- lapply(bars, bars_to_terms)
+ check_duplicate_terms(bar_terms)
+ as.formula(str2lang(paste("~", paste(bar_terms, collapse = " + "), "- 1")))
+}
+
+# Translate bar components into standard formula terms-------------------------
+bars_to_terms <- function(bars){
+ info <- get_bar_info(bars)
+ new_terms <- sapply(info$LHS, function(x){
+ if(x == "1") return(info$RHS)
+ paste0(x, ":", info$RHS)
+ })
+ paste(new_terms, collapse=" + ")
+}
+
+# Organize information in a bar expression into a list-------------------------
+get_bar_info <- function(bar){
+ out <- list(
+ operator = deparse(bar[[2]][[1]]),
+ LHS = terms_in_bar(bar, RHS=FALSE),
+ RHS = terms_in_bar(bar, RHS=TRUE)
+ )
+ check_bar_info(out)
+}
+
+terms_in_bar <- function(bars, RHS=FALSE){
+ bars_sub <- bars[[2]][[2]]
+ if(RHS) bars_sub <- bars[[2]][[3]]
+ form <- formula(substitute(~X, list(X=bars_sub)))
+ trms <- attr(terms(form), "term.labels")
+ int <- attr(terms(form), "intercept")
+ if(int == 1 & !RHS) trms <- c("1", trms)
+ trms
+}
- out <- sapply(rand, function(x){
- col_nm <- as.character(x[[3]])
- length(unique(data[[col_nm]]))
+# Check bar info to make sure the expression are allowed-----------------------
+# For example unmarked doesn't support correlated random effects with |
+check_bar_info <- function(info){
+ if(info$operator == "|" & length(info$LHS) > 1){
+ stop("Correlated random effects not supported, use || instead of |", call.=FALSE)
+ }
+ if(any(grepl(":", info$RHS)) | any(grepl("/", info$RHS))){
+ stop("Nested random effects notation (: or /) not supported", call.=FALSE)
+ }
+ stopifnot(length(info$RHS) == 1)
+ info
+}
+
+# Check terms to make sure there aren't any duplicates-------------------------
+# E.g. as a result of a formula like ~ (1|g) + (x||) where the intercept
+# is also implied in the second bar expression
+check_duplicate_terms <- function(bar_terms){
+ all_terms <- lapply(bar_terms, function(x){
+ unlist(strsplit(x, " + ", fixed=TRUE))
+ })
+ all_terms <- unlist(all_terms)
+ dups <- duplicated(all_terms)
+ if(any(dups)){
+ stop("Formula implies duplicate terms: ", paste0(all_terms[dups], collapse=", "),
+ call.=FALSE)
+ }
+ invisible()
+}
+
+# Get partial Z for a given bar expression-------------------------------------
+get_partial_Z <- function(formula, data, newdata){
+ mf <- model.frame(formula, data, na.action=stats::na.pass)
+ if(!is.null(newdata)){
+ mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass,
+ xlev=get_xlev(data, mf))
+ }
+ model.matrix(formula, mf)
+}
+
+# Get levels of factor---------------------------------------------------------
+# For use in specifying model frame
+get_xlev <- function(data, model_frame){
+ fac_col <- data[, sapply(data, is.factor), drop=FALSE]
+ xlevs <- lapply(fac_col, levels)
+ xlevs[names(xlevs) %in% names(model_frame)]
+}
+
+# Generate colnames for Z to match what lme4::mkReTrms does--------------------
+Z_colnames <- function(formula, data){
+ bars <- find_bars(formula)
+ info <- lapply(bars, get_bar_info)
+ groups <- sapply(info, function(x) x$RHS)
+ nreps <- sapply(info, function(x) length(x$LHS))
+ lvls <- lapply(1:length(groups), function(i){
+ group_dat <- data[[groups[i]]]
+ if(!is.factor(group_dat)) group_dat <- as.factor(group_dat)
+ rep(levels(group_dat), nreps[i])
+ })
+ unlist(lvls)
+}
+
+# Get 'cnms' - random effect names - as with lme4::mkReTrms--------------------
+get_cnms <- function(formula){
+ bars <- find_bars(formula)
+ info <- lapply(bars, get_bar_info)
+ cnms <- lapply(info, function(x){
+ out <- lapply(x$LHS, function(y) ifelse(y == "1", "(Intercept)", y))
+ names(out) <- rep(x$RHS, length(out))
+ out
+ })
+ do.call(c, cnms)
+}
+
+# Get number of random effect SDs/variances------------------------------------
+get_nrandom <- function(formula, data){
+ if(!has_random(formula)) return(as.array(0))
+ cnms <- get_cnms(formula)
+ out <- sapply(names(cnms), function(x){
+ length(unique(data[[x]]))
})
as.array(out)
}
-has_random <- function(formula){
- length(lme4::findbars(formula)) > 0
+# Get number of grouping variables---------------------------------------------
+get_group_vars <- function(formula){
+ if(!has_random(formula)) return(0)
+ cnms <- get_cnms(formula)
+ length(cnms)
+}
+
+# Check if function has no support for random effects--------------------------
+check_no_support <- function(formula_list){
+ has_bars <- any(sapply(formula_list, has_random))
+ if(has_bars){
+ stop("This function does not support random effects", call.=FALSE)
+ }
+}
+
+# Remove all bar components from a formula-------------------------------------
+remove_bars <- function(formula){
+ s1 <- gsub("\\([^)]+\\|[^)]+\\) ?\\+?", "", deparse1(formula))
+ s2 <- gsub(" \\+ {0,}$", "", s1)
+ if(s2 == "~") return(~1)
+ as.formula(str2lang(s2))
}
sigma_names <- function(formula, data){
@@ -56,22 +218,6 @@ sigma_names <- function(formula, data){
nms
}
-check_formula <- function(formula, data){
- rand <- lme4::findbars(formula)
- if(is.null(rand)) return(invisible())
-
- char <- paste(formula, collapse=" ")
- if(grepl(":|/", char)){
- stop("Nested random effects (using / and :) are not supported",
- call.=FALSE)
- }
- theta <- get_reTrms(formula, data)$theta
- if(0 %in% theta){
- stop("Correlated slopes and intercepts are not supported. Use || instead of |.",
- call.=FALSE)
- }
-}
-
split_formula <- function(formula){
if(length(formula) != 3) stop("Double right-hand side formula required")
char <- lapply(formula, function(x){
@@ -135,7 +281,7 @@ get_randvar_info <- function(tmb_report, type, formula, data){
list(names=sigma_names(formula, data), estimates=sigma_est, covMat=sigma_cov,
invlink="exp", invlinkGrad="exp", n_obs=nrow(data),
n_levels=lapply(re$flist, function(x) length(levels(x))), cnms=re$cnms,
- levels=rownames(re$Zt))
+ levels=colnames(re$Z))
}
get_fixed_names <- function(tmb_report){
@@ -158,13 +304,6 @@ print_randvar_info <- function(object){
#cat(paste0("Number of obs: ",object$n_obs,", groups: ",group_info,"\n"))
}
-check_no_support <- function(formula_list){
- has_bars <- any(sapply(formula_list, function(x) !is.null(lme4::findbars(x))))
- if(has_bars){
- stop("This function does not support random effects", call.=FALSE)
- }
-}
-
fit_TMB <- function(model, data, params, random,
starts, method, ...){
diff --git a/R/predict.R b/R/predict.R
index 4f3b5fb..10bf4fc 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -55,7 +55,7 @@ setMethod("predict", "unmarkedFit",
# This function makes sure factor levels in newdata match, and that
# any functions in the formula are handled properly (e.g. scale)
make_mod_matrix <- function(formula, data, newdata, re.form=NULL){
- form_nobars <- lme4::nobars(formula)
+ form_nobars <- remove_bars(formula)
mf <- model.frame(form_nobars, data, na.action=stats::na.pass)
X.terms <- stats::terms(mf)
fac_cols <- data[, sapply(data, is.factor), drop=FALSE]
@@ -65,7 +65,7 @@ make_mod_matrix <- function(formula, data, newdata, re.form=NULL){
#X <- model.matrix(X.terms, newdata, xlev=xlevs)
X <- model.matrix(form_nobars, nmf)
offset <- model.offset(nmf)
- if(is.null(re.form) & !is.null(lme4::findbars(formula))){
+ if(is.null(re.form) & has_random(formula)){
Z <- get_Z(formula, data, newdata)
X <- cbind(X, Z)
}
diff --git a/tests/testthat/test_pcount.R b/tests/testthat/test_pcount.R
index 6db9966..35c9069 100644
--- a/tests/testthat/test_pcount.R
+++ b/tests/testthat/test_pcount.R
@@ -93,7 +93,7 @@ test_that("pcount predict works",{
test_that("pcount can fit models with random effects",{
-set.seed(35)
+set.seed(12)
nSites <- 300
nVisits <- 3
x <- rnorm(nSites) # a covariate
@@ -101,7 +101,7 @@ beta0 <- 0
beta1 <- 0.4
ran <- rnorm(100, 0, 1)
-group <- factor(as.character(rep(1:100, each=3)))
+group <- factor(as.character(rep(1:100, each=3)), levels=1:100)
ran_ind <- as.numeric(group)
lambda <- exp(beta0 + beta1*x +
@@ -124,19 +124,19 @@ expect_is(fm, "unmarkedFitPCount")
fmr <- pcount(~visit~x+(1|group), umf, K=25)
-expect_equivalent(coef(fmr), c(0.05397,0.3197,-0.8760,1.3668,2.078),
- tol=1e-3)
+expect_equivalent(coef(fmr), c(-0.13648,0.3355,-0.9202,1.4680,2.47077),
+ tol=1e-4)
expect_true(inherits(sigma(fmr), 'data.frame'))
-expect_equal(sigma(fmr)$sigma, 1.05945, tol=1e-5)
+expect_equal(sigma(fmr)$sigma, 1.038204, tol=1e-5)
pr <- predict(fmr, "state")
expect_equivalent(as.numeric(pr[1,]),
- c(1.037050,0.58179,0.3453,3.1140), tol=1e-3)
+ c(0.31838, 0.20989, 0.087458,1.15905), tol=1e-3)
pr2 <- predict(fmr, "state", re.form=NA)
expect_equivalent(as.numeric(pr2[1,]),
- c(1.48366,0.2011,1.1374,1.93255), tol=1e-3)
+ c(0.53086,0.090768,0.37970,0.74220), tol=1e-3)
pr3 <- predict(fmr, "det")
expect_true(inherits(pr3, "data.frame"))