Initial working lme4 replacement

author: Ken Kellner <ken@kenkellner.com> 2023-12-10 14:56:09 -0500
committer: Ken Kellner <ken@kenkellner.com> 2023-12-10 14:56:09 -0500
commit: 684496cec413c7566ea695ef3f1304983c52238c (patch)
tree: 305a13e6b451c84c3a29108cdd960a28a787f03f
parent: b4234c9934a114f8ae86a423b363e595081ae868 (diff)
5 files changed, 201 insertions, 63 deletions
diff --git a/DESCRIPTION b/DESCRIPTION
index d59b985..ecfedd1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -22,7 +22,6 @@ Depends: R (>= 4.0)
 Imports: 
     graphics,
     lattice,
-    lme4,
     MASS,
     Matrix,
     methods,
diff --git a/R/getDesign.R b/R/getDesign.R
index 8597b2d..7505bf4 100644
--- a/R/getDesign.R
+++ b/R/getDesign.R
@@ -10,8 +10,8 @@ setGeneric("handleNA", function(umf, ...) standardGeneric("handleNA"))
 setMethod("getDesign", "unmarkedFrame",
     function(umf, formula, na.rm=TRUE)
 {
-    detformula <- lme4::nobars(as.formula(formula[[2]]))
-    stateformula <- lme4::nobars(as.formula(paste("~", formula[3], sep="")))
+    detformula <- remove_bars(as.formula(formula[[2]]))
+    stateformula <- remove_bars(as.formula(paste("~", formula[3], sep="")))
     detVars <- all.vars(detformula)
 
     M <- numSites(umf)
diff --git a/R/mixedModelTools.R b/R/mixedModelTools.R
index 8c6a96d..4581f1e 100644
--- a/R/mixedModelTools.R
+++ b/R/mixedModelTools.R
@@ -1,50 +1,212 @@
-get_xlev <- function(data, model_frame){
-  fac_col <- data[, sapply(data, is.factor), drop=FALSE]
-  xlevs <- lapply(fac_col, levels)
-  xlevs[names(xlevs) %in% names(model_frame)]
-}
-
+# Generate required random effects info----------------------------------------
+# Sort-of drop-in replacement for lme4::mkReTrms
 get_reTrms <- function(formula, data, newdata=NULL){
-  fb <- lme4::findbars(formula)
-  mf <- model.frame(lme4::subbars(formula), data, na.action=stats::na.pass)
-  if(is.null(newdata)) return(lme4::mkReTrms(fb, mf))
-  new_mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass,
-                        xlev=get_xlev(data, mf))
-  lme4::mkReTrms(fb, new_mf, drop.unused.levels=FALSE)
+  if(!has_random(formula)){
+    stop("No random effect terms in formula", call.=FALSE)
+  }
+  cnms <- get_cnms(formula)
+  # TODO: check these are factors
+  flist <- lapply(unique(names(cnms)), function(x){
+             out <- data[[x]]
+             if(!is.factor(out)) out <- factor(out)
+             out
+           })
+  names(flist) <- unique(names(cnms))
+  list(Z = get_Z(formula, data), cnms = cnms, flist=flist)
 }
 
+#Create Z random effects matrix from a formula, possibly using newdata---------
 get_Z <- function(formula, data, newdata=NULL){
-  if(is.null(lme4::findbars(formula))){
+  # If no random effects in formula, return an empty Matrix
+  if(!has_random(formula)){
     if(is.null(newdata)){
       return(Matrix::Matrix(matrix(0, nrow=nrow(data), ncol=0),sparse=TRUE))
     } else{
       return(Matrix::Matrix(matrix(0, nrow=nrow(newdata), ncol=0),sparse=TRUE))
     }
   }
-  check_formula(formula, data)
-  Zt <- get_reTrms(formula, data, newdata)$Zt
-  Z <- t(as.matrix(Zt))
+  
+  # Get new formula
+  bars <- find_bars(formula)
+  new_form <- bars_to_formula(bars)
+
+  # Get partial Z for each bar expression
+  form_list <- lapply(1:length(bars), function(x) bars_to_formula(bars[x]))
+  Z_parts <- lapply(form_list, get_partial_Z, data=data, newdata=newdata)
+
+  # Create model frame
+  #mf <- model.frame(new_form, data, na.action=stats::na.pass)
+  #if(!is.null(newdata)){
+  #  mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass,
+  #                    xlev=get_xlev(data, mf))
+  #}
+
+  # Create sparse Z matrix
+  #Z <- model.matrix(new_form, mf)
+  Z <- do.call(cbind, Z_parts)
+  colnames(Z) <- Z_colnames(formula, data)
   Matrix::Matrix(Z, sparse=TRUE)
 }
 
-get_group_vars <- function(formula){
-  rand <- lme4::findbars(formula)
-  ifelse(is.null(rand), 0, length(rand))
+# Determine if formula has random effects specified----------------------------
+has_random <- function(form){
+  length(find_bars(form)) > 0
 }
 
-get_nrandom <- function(formula, data){
-  rand <- lme4::findbars(formula)
-  if(length(rand)==0) return(as.array(0))
+# Find 'bar' random effect parts of formula (i.e., the (x|y) structures)-------
+# Operates recursively
+find_bars <- function(form){
+  out <- NULL
+  if(is.name(form)) return(NULL)
+  if(form[[1]] == as.name("(")) return(form)
+  if(is.call(form)){
+    out <- lapply(form, find_bars)
+  }
+  unlist(out)
+}
+
+# Convert bar components into new formula--------------------------------------
+# E.g. ~x + (1|g) becomes ~g - 1
+bars_to_formula <- function(bars){
+  bar_terms <- lapply(bars, bars_to_terms)
+  check_duplicate_terms(bar_terms)
+  as.formula(str2lang(paste("~", paste(bar_terms, collapse = " + "), "- 1")))
+}
+
+# Translate bar components into standard formula terms-------------------------
+bars_to_terms <- function(bars){
+  info <- get_bar_info(bars)
+  new_terms <- sapply(info$LHS, function(x){
+    if(x == "1") return(info$RHS)
+    paste0(x, ":", info$RHS)
+  })
+  paste(new_terms, collapse=" + ")
+}
+
+# Organize information in a bar expression into a list-------------------------
+get_bar_info <- function(bar){
+  out <- list(
+    operator = deparse(bar[[2]][[1]]),
+    LHS = terms_in_bar(bar, RHS=FALSE),
+    RHS = terms_in_bar(bar, RHS=TRUE)
+  )
+  check_bar_info(out)
+}
+
+terms_in_bar <- function(bars, RHS=FALSE){
+  bars_sub <- bars[[2]][[2]]
+  if(RHS) bars_sub <- bars[[2]][[3]]
+  form <- formula(substitute(~X, list(X=bars_sub)))
+  trms <- attr(terms(form), "term.labels")
+  int <- attr(terms(form), "intercept")
+  if(int == 1 & !RHS) trms <- c("1", trms)
+  trms
+}
 
-  out <- sapply(rand, function(x){
-    col_nm <- as.character(x[[3]])
-    length(unique(data[[col_nm]]))
+# Check bar info to make sure the expression are allowed-----------------------
+# For example unmarked doesn't support correlated random effects with |
+check_bar_info <- function(info){
+  if(info$operator == "|" & length(info$LHS) > 1){
+    stop("Correlated random effects not supported, use || instead of |", call.=FALSE)
+  }
+  if(any(grepl(":", info$RHS)) | any(grepl("/", info$RHS))){
+    stop("Nested random effects notation (: or /) not supported", call.=FALSE)
+  }
+  stopifnot(length(info$RHS) == 1)
+  info
+}
+
+# Check terms to make sure there aren't any duplicates-------------------------
+# E.g. as a result of a formula like ~ (1|g) + (x||) where the intercept
+# is also implied in the second bar expression
+check_duplicate_terms <- function(bar_terms){
+  all_terms <- lapply(bar_terms, function(x){
+    unlist(strsplit(x, " + ", fixed=TRUE))
+  })
+  all_terms <- unlist(all_terms)
+  dups <- duplicated(all_terms)
+  if(any(dups)){
+    stop("Formula implies duplicate terms: ", paste0(all_terms[dups], collapse=", "),
+         call.=FALSE)
+  }
+  invisible()
+}
+
+# Get partial Z for a given bar expression-------------------------------------
+get_partial_Z <- function(formula, data, newdata){
+  mf <- model.frame(formula, data, na.action=stats::na.pass)
+  if(!is.null(newdata)){
+    mf <- model.frame(stats::terms(mf), newdata, na.action=stats::na.pass,
+                      xlev=get_xlev(data, mf))
+  }
+  model.matrix(formula, mf)
+}
+
+# Get levels of factor---------------------------------------------------------
+# For use in specifying model frame
+get_xlev <- function(data, model_frame){
+  fac_col <- data[, sapply(data, is.factor), drop=FALSE]
+  xlevs <- lapply(fac_col, levels)
+  xlevs[names(xlevs) %in% names(model_frame)]
+}
+
+# Generate colnames for Z to match what lme4::mkReTrms does--------------------
+Z_colnames <- function(formula, data){
+  bars <- find_bars(formula)
+  info <- lapply(bars, get_bar_info)
+  groups <- sapply(info, function(x) x$RHS)
+  nreps <- sapply(info, function(x) length(x$LHS))
+  lvls <- lapply(1:length(groups), function(i){
+    group_dat <- data[[groups[i]]]
+    if(!is.factor(group_dat)) group_dat <- as.factor(group_dat)
+    rep(levels(group_dat), nreps[i])
+  })
+  unlist(lvls)
+}
+
+# Get 'cnms' - random effect names - as with lme4::mkReTrms--------------------
+get_cnms <- function(formula){
+  bars <- find_bars(formula)
+  info <- lapply(bars, get_bar_info)
+  cnms <- lapply(info, function(x){
+    out <- lapply(x$LHS, function(y) ifelse(y == "1", "(Intercept)", y))
+    names(out) <- rep(x$RHS, length(out))
+    out
+  })
+  do.call(c, cnms)
+}
+
+# Get number of random effect SDs/variances------------------------------------
+get_nrandom <- function(formula, data){
+  if(!has_random(formula)) return(as.array(0))
+  cnms <- get_cnms(formula)
+  out <- sapply(names(cnms), function(x){
+          length(unique(data[[x]]))
   })
   as.array(out)
 }
 
-has_random <- function(formula){
-  length(lme4::findbars(formula)) > 0
+# Get number of grouping variables---------------------------------------------
+get_group_vars <- function(formula){
+  if(!has_random(formula)) return(0)
+  cnms <- get_cnms(formula)
+  length(cnms)
+}
+
+# Check if function has no support for random effects--------------------------
+check_no_support <- function(formula_list){
+  has_bars <- any(sapply(formula_list, has_random))
+  if(has_bars){
+    stop("This function does not support random effects", call.=FALSE)
+  }
+}
+
+# Remove all bar components from a formula-------------------------------------
+remove_bars <- function(formula){
+  s1 <- gsub("\\([^)]+\\|[^)]+\\) ?\\+?", "", deparse1(formula))
+  s2 <- gsub(" \\+ {0,}$", "", s1)
+  if(s2 == "~") return(~1)
+  as.formula(str2lang(s2))
 }
 
 sigma_names <- function(formula, data){
@@ -56,22 +218,6 @@ sigma_names <- function(formula, data){
   nms
 }
 
-check_formula <- function(formula, data){
-  rand <- lme4::findbars(formula)
-  if(is.null(rand)) return(invisible())
-
-  char <- paste(formula, collapse=" ")
-  if(grepl(":|/", char)){
-    stop("Nested random effects (using / and :) are not supported",
-         call.=FALSE)
-  }
-  theta <- get_reTrms(formula, data)$theta
-  if(0 %in% theta){
-    stop("Correlated slopes and intercepts are not supported. Use || instead of |.",
-         call.=FALSE)
-  }
-}
-
 split_formula <- function(formula){
   if(length(formula) != 3) stop("Double right-hand side formula required")
   char <- lapply(formula, function(x){
@@ -135,7 +281,7 @@ get_randvar_info <- function(tmb_report, type, formula, data){
   list(names=sigma_names(formula, data), estimates=sigma_est, covMat=sigma_cov,
        invlink="exp", invlinkGrad="exp", n_obs=nrow(data),
        n_levels=lapply(re$flist, function(x) length(levels(x))), cnms=re$cnms,
-       levels=rownames(re$Zt))
+       levels=colnames(re$Z))
 }
 
 get_fixed_names <- function(tmb_report){
@@ -158,13 +304,6 @@ print_randvar_info <- function(object){
   #cat(paste0("Number of obs: ",object$n_obs,", groups: ",group_info,"\n"))
 }
 
-check_no_support <- function(formula_list){
-  has_bars <- any(sapply(formula_list, function(x) !is.null(lme4::findbars(x))))
-  if(has_bars){
-    stop("This function does not support random effects", call.=FALSE)
-  }
-}
-
 fit_TMB <- function(model, data, params, random,
                     starts, method, ...){
 
diff --git a/R/predict.R b/R/predict.R
index 4f3b5fb..10bf4fc 100644
--- a/R/predict.R
+++ b/R/predict.R
@@ -55,7 +55,7 @@ setMethod("predict", "unmarkedFit",
 # This function makes sure factor levels in newdata match, and that
 # any functions in the formula are handled properly (e.g. scale)
 make_mod_matrix <- function(formula, data, newdata, re.form=NULL){
-  form_nobars <- lme4::nobars(formula)
+  form_nobars <- remove_bars(formula)
   mf <- model.frame(form_nobars, data, na.action=stats::na.pass)
   X.terms <- stats::terms(mf)
   fac_cols <- data[, sapply(data, is.factor), drop=FALSE]
@@ -65,7 +65,7 @@ make_mod_matrix <- function(formula, data, newdata, re.form=NULL){
   #X <- model.matrix(X.terms, newdata, xlev=xlevs)
   X <- model.matrix(form_nobars, nmf)
   offset <- model.offset(nmf)
-  if(is.null(re.form) & !is.null(lme4::findbars(formula))){
+  if(is.null(re.form) & has_random(formula)){
     Z <- get_Z(formula, data, newdata)
     X <- cbind(X, Z)
   }
diff --git a/tests/testthat/test_pcount.R b/tests/testthat/test_pcount.R
index 6db9966..35c9069 100644
--- a/tests/testthat/test_pcount.R
+++ b/tests/testthat/test_pcount.R
@@ -93,7 +93,7 @@ test_that("pcount predict works",{
 
 test_that("pcount can fit models with random effects",{
 
-set.seed(35)
+set.seed(12)
 nSites <- 300
 nVisits <- 3
 x <- rnorm(nSites)               # a covariate
@@ -101,7 +101,7 @@ beta0 <- 0
 beta1 <- 0.4
 
 ran <- rnorm(100, 0, 1)
-group <- factor(as.character(rep(1:100, each=3)))
+group <- factor(as.character(rep(1:100, each=3)), levels=1:100)
 ran_ind <- as.numeric(group)
 
 lambda <- exp(beta0 + beta1*x +
@@ -124,19 +124,19 @@ expect_is(fm, "unmarkedFitPCount")
 
 fmr <- pcount(~visit~x+(1|group), umf, K=25)
 
-expect_equivalent(coef(fmr), c(0.05397,0.3197,-0.8760,1.3668,2.078),
-                   tol=1e-3)
+expect_equivalent(coef(fmr), c(-0.13648,0.3355,-0.9202,1.4680,2.47077),
+                   tol=1e-4)
 
 expect_true(inherits(sigma(fmr), 'data.frame'))
-expect_equal(sigma(fmr)$sigma, 1.05945, tol=1e-5)
+expect_equal(sigma(fmr)$sigma, 1.038204, tol=1e-5)
 
 pr <- predict(fmr, "state")
 expect_equivalent(as.numeric(pr[1,]),
-                   c(1.037050,0.58179,0.3453,3.1140), tol=1e-3)
+                   c(0.31838, 0.20989, 0.087458,1.15905), tol=1e-3)
 
 pr2 <- predict(fmr, "state", re.form=NA)
 expect_equivalent(as.numeric(pr2[1,]),
-                   c(1.48366,0.2011,1.1374,1.93255), tol=1e-3)
+                   c(0.53086,0.090768,0.37970,0.74220), tol=1e-3)
 
 pr3 <- predict(fmr, "det")
 expect_true(inherits(pr3, "data.frame"))
author	Ken Kellner <ken@kenkellner.com>	2023-12-10 14:56:09 -0500
committer	Ken Kellner <ken@kenkellner.com>	2023-12-10 14:56:09 -0500
commit	684496cec413c7566ea695ef3f1304983c52238c (patch)
tree	305a13e6b451c84c3a29108cdd960a28a787f03f
parent	b4234c9934a114f8ae86a423b363e595081ae868 (diff)