aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlpautrel <lea.pautrel@terroiko.fr>2023-11-28 12:33:03 +0100
committerlpautrel <lea.pautrel@terroiko.fr>2023-11-28 12:33:03 +0100
commit92d8b8fa68eb7c1ed7c1f92e139cd401c8e4671b (patch)
tree5456815ef1ee92c94f1576d41d36897ad7fd1e57
parent436977a45966b76d1290ef627e71797485803697 (diff)
Init occuCOP
-rw-r--r--R/occuCOP.R747
-rw-r--r--test_occuCOP.R369
-rw-r--r--tmp_use_COP.Rmd584
3 files changed, 1700 insertions, 0 deletions
diff --git a/R/occuCOP.R b/R/occuCOP.R
new file mode 100644
index 0000000..9a8444a
--- /dev/null
+++ b/R/occuCOP.R
@@ -0,0 +1,747 @@
+
+# Prerequisites (while its not integrated within the package)
+library(unmarked)
+source("R/unmarkedFrame.R")
+source("R/utils.R")
+source("R/unmarkedEstimate.R")
+source("R/unmarkedFitList.R")
+source("R/predict.R")
+source("R/getDesign.R")
+source("R/mixedModelTools.R")
+# sapply(paste0("./R/", list.files("./R/")), source)
+
+library(lattice)
+library(testthat)
+
+# Fit the occupancy model COP
+# (Counting Occurrences Process)
+
+# Occupancy
+# z_i ~ Bernoulli(psi)
+#
+# with:
+# z_i = Occupancy state of site i
+# = 1 if the site i is occupied
+# = 0 else
+
+# Detection
+# N_ijs | z_i = 1 ~ Poisson(lambda*T_s)
+# N_ijs | z_i = 0 ~ 0
+#
+# with:
+# N_is = Number of detections of site i during session s
+# z_i = Occupancy state of site i
+# lambda = Detection rate
+# T_s = Duration of the session s
+
+# CLASSES ----------------------------------------------------------------------
+
+## unmarkedFrameCOP class ----
+setClass(
+ "unmarkedFrameCOP",
+ representation(obsLength = "matrix"),
+ contains = "unmarkedFrame",
+ validity = function(object) {
+ errors <- character(0)
+ M <- nrow(object@y)
+ J <- ncol(object@y)
+ y_integers = sapply(object@y, check.integer, na.ignore = T)
+ if (!all(y_integers)) {
+ errors <- c(errors,
+ paste(
+ "Count detection should be integers. Non-integer values:",
+ paste(object@y[which(!y_integers)], collapse = ', ')
+ ))
+ }
+ if (!all(all(dim(object@obsLength) == dim(object@y)))){
+ errors <- c( errors, paste(
+ "obsLength should be a matrix of the same dimension as y, with M =", M,
+ "rows (sites) and J =", J, "columns (sampling occasions)."
+ ))}
+ if (length(errors) == 0) TRUE
+ else errors
+ }
+)
+
+## unmarkedFitCOP class ----
+setClass("unmarkedFitCOP",
+ representation(removed_obs = "matrix",
+ formlist = "list",
+ nll = "optionalNumeric",
+ convergence="optionalNumeric"),
+ contains = "unmarkedFit")
+
+
+# METHODS ----------------------------------------------------------------------
+
+## getDesign method ----
+# Example for occu: https://github.com/rbchan/unmarked/blob/536e32ad7b2526f8fac1b315ed2e99accac0d50f/R/getDesign.R#L10-L97
+# Example for GDR: https://github.com/rbchan/unmarked/blob/c82e63947d7df7dfc896066e51dbf63bda3babf4/R/gdistremoval.R#L177-L253
+setMethod(
+ "getDesign", "unmarkedFrameCOP",
+ function(umf, formlist, na.rm = TRUE) {
+
+ # Retrieve useful informations from umf
+ M <- numSites(umf)
+ J <- obsNum(umf)
+ y <- getY(umf)
+ obsLength <- umf@obsLength
+
+ # Occupancy submodel -------------------------------------------------------
+
+ # Retrieve the fixed-effects part of the formula
+ psiformula <- lme4::nobars(as.formula(formlist$psiformula))
+ psiVars <- all.vars(psiformula)
+
+ # Retrieve the site covariates
+ sc <- siteCovs(umf)
+ if(is.null(sc)) {
+ sc <- data.frame(.dummy = rep(0, M))
+ }
+
+ # Check for missing variables
+ psiMissingVars <- psiVars[!(psiVars %in% names(sc))]
+ if (length(psiMissingVars) > 0) {
+ stop(paste(
+ "Variable(s)",
+ paste(psiMissingVars, collapse = ", "),
+ "not found in siteCovs"
+ ), call. = FALSE)
+ }
+
+ # State model matrix
+ Xpsi <- model.matrix(
+ psiformula,
+ model.frame(psiformula, sc, na.action = NULL)
+ )
+ Zpsi <- get_Z(formlist$psiformula, sc)
+
+ # Detection submodel -------------------------------------------------------
+
+ # Retrieve the fixed-effects part of the formula
+ lambdaformula <- lme4::nobars(as.formula(formlist$lambdaformula))
+ lambdaVars <- all.vars(lambdaformula)
+
+ # Retrieve the observation covariates
+ oc <- obsCovs(umf)
+ if(is.null(oc)) {
+ oc <- data.frame(.dummy = rep(0, M*J))
+ }
+
+ # Check for missing variables
+ lambdaMissingVars <- lambdaVars[!(lambdaVars %in% names(oc))]
+ if (length(lambdaMissingVars) > 0) {
+ stop(paste(
+ "Variable(s)",
+ paste(lambdaMissingVars, collapse = ", "),
+ "not found in obsCovs"
+ ), call. = FALSE)
+ }
+
+ # Detection model matrix
+ Xlambda <- model.matrix(
+ lambdaformula,
+ model.frame(lambdaformula, oc, na.action = NULL)
+ )
+ Zlambda <- get_Z(formlist$lambdaformula, oc)
+
+ # Missing data -------------------------------------------------------------
+
+ # Missing count data
+ missing_y <- is.na(y)
+
+ # Missing site covariates
+ # (TRUE if at least one site covariate is missing in a site)
+ missing_sc <- apply(Xpsi, 1, function(x) any(is.na(x)))
+
+ # Missing observation covariates
+ # (TRUE if at least one observation covariate is missing in a sampling occasion in a site)
+ missing_oc <- apply(Xlambda, 1, function(x) any(is.na(x)))
+
+ # Matrix MxJ of values to not use because there is some data missing
+ removed_obs <-
+ # If there is count data missing in site i and obs j
+ missing_y |
+ # If there is any site covariate missing in site i
+ replicate(n = J, missing_sc) |
+ # If there is any observation covariate missing in site i and obs j
+ matrix(missing_oc, M, J, byrow = T)
+
+ if (any(removed_obs)) {
+ if (na.rm) {
+ nb_missing_sites <- sum(apply(removed_obs, 1, function(x) all(is.na(x))))
+ nb_missing_observations <- sum(is.na(removed_obs))
+ warning("There are missing data (count data, site covariate, observation covariate):\n\t",
+ "Only data from ", M-nb_missing_sites, " sites out of ", M, " are used.\n\t",
+ "Only data from ", (M*J)-nb_missing_sites, " observations out of ", (M*J), " are used.")
+ } else {
+ stop("There is missing data:\n\t",
+ sum(missing_y), " missing count data (y)\n\t",
+ sum(missing_sc), " missing site covariates (siteCovs)\n\t",
+ sum(missing_oc), " missing observation covariates (obsCovs)")
+ }
+ }
+
+ # Output -------------------------------------------------------------------
+ return(list(
+ y = y,
+ Xpsi = Xpsi,
+ Zpsi = Zpsi,
+ Xlambda = Xlambda,
+ Zlambda = Zlambda,
+ removed_obs = removed_obs
+ ))
+ })
+
+
+## show method ----
+setMethod("show", "unmarkedFrameCOP", function(object) {
+ J <- ncol(object@obsLength)
+ df_unmarkedFrame <- as(object, "data.frame")
+ df_obsLength <- data.frame(object@obsLength)
+ colnames(df_obsLength) <- paste0("obsLength.", 1:J)
+ if (ncol(df_unmarkedFrame) > J) {
+ df <-
+ cbind(df_unmarkedFrame[, 1:J], df_obsLength, df_unmarkedFrame[, (J + 1):ncol(df_unmarkedFrame)])
+ } else{
+ df <-
+ cbind(df_unmarkedFrame[, 1:J], df_obsLength)
+ }
+ cat("Data frame representation of unmarkedFrame object.\n")
+ print(df)
+})
+# LP note: as is defined in unmarkedFrame.R part "COERCION"
+
+
+## summary method ----
+setMethod("summary", "unmarkedFrameCOP", function(object,...) {
+ cat("unmarkedFrameCOP Object\n\n")
+
+ cat(nrow(object@y), "sites\n")
+ cat("Maximum number of sampling occasions per site:",obsNum(object),"\n")
+ mean.obs <- mean(rowSums(!is.na(getY(object))))
+ cat("Mean number of sampling occasions per site:",round(mean.obs,2),"\n")
+ cat("Sites with at least one detection:",
+ sum(apply(getY(object), 1, function(x) any(x > 0, na.rm=TRUE))),
+ "\n\n")
+
+ cat("Tabulation of y observations:")
+ print(table(object@y, exclude=NULL))
+
+ if(!is.null(object@obsLength)) {
+ cat("\nTabulation of sampling occasions length:")
+ print(table(object@obsLength))
+ }
+
+ if(!is.null(object@siteCovs)) {
+ cat("\nSite-level covariates:\n")
+ print(summary(object@siteCovs))
+ }
+
+ if(!is.null(object@obsCovs)) {
+ cat("\nObservation-level covariates:\n")
+ print(summary(object@obsCovs))
+ }
+
+})
+
+## umf[i, j] ----
+setMethod("[", c("unmarkedFrameCOP", "numeric", "numeric", "missing"),
+ function(x, i, j) {
+ # Gey dimensions of x
+ M <- numSites(x)
+ J <- obsNum(x)
+
+ if (length(i) == 0 & length(j) == 0) {
+ return(x)
+ }
+
+ # Check i
+ if (any(i < 0) &&
+ any(i > 0)) {
+ stop("i must be all positive or all negative indices.")
+ }
+ if (all(j < 0)) {
+ i <- (1:M)[i]
+ }
+
+ # Check j
+ if (any(j < 0) &&
+ any(j > 0)) {
+ stop("j must be all positive or all negative indices.")
+ }
+ if (all(j < 0)) {
+ j <- (1:J)[j]
+ }
+
+ # y observation count data subset
+ y <- getY(x)[i, j]
+ if (min(length(i), length(j)) == 1) {
+ y <- t(y)
+ }
+
+ # obsLength subset
+ obsLength <- x@obsLength[i, j]
+ if (min(length(i), length(j)) == 1) {
+ obsLength <- t(obsLength)
+ }
+
+ # siteCovs subset
+ siteCovs <- siteCovs(x)
+ if (!is.null(siteCovs)) {
+ siteCovs <- siteCovs(x)[i, , drop = FALSE]
+ }
+
+ # obsCovs subset
+ obsCovs <- obsCovs(x)
+ if (!is.null(obsCovs)) {
+ MJ_site <- rep(1:M, each = J)
+ MJ_obs <- rep(1:J, times = M)
+ obsCovs <- obsCovs[((MJ_obs %in% j) & (MJ_site %in% i)), ]
+ rownames(obsCovs) <- NULL
+ }
+
+ # Recreate umf
+ new(
+ Class = "unmarkedFrameCOP",
+ y = y,
+ obsLength = obsLength,
+ siteCovs = siteCovs,
+ obsCovs = obsCovs,
+ obsToY = diag(length(j)),
+ mapInfo = x@mapInfo
+ )
+ })
+
+
+## umf[i, ] ----
+setMethod("[", c("unmarkedFrameCOP", "numeric", "missing", "missing"),
+ function(x, i) {
+ x[i, 1:obsNum(x)]
+ })
+
+## umf[, j] ----
+setMethod("[", c("unmarkedFrameCOP", "missing", "numeric", "missing"),
+ function(x, j) {
+ x[1:numSites(x), j]
+ })
+
+
+## fl_getY ----
+setMethod("fl_getY", "unmarkedFitCOP", function(fit, ...){
+ getDesign(getData(fit), fit@formlist)$y
+})
+
+# PREDICT METHODS ----
+# Fit type-specific methods to generate different components of prediction
+# 5. predict_by_chunk(): Take inputs and generate predictions
+# Basic methods are shown below; fit type-specific methods in their own sections
+
+## predict_inputs_from_umf ----
+setMethod("predict_inputs_from_umf", "unmarkedFitCOP",
+ function(object, type, newdata, na.rm, re.form = NULL) {
+ designMats = getDesign(umf = newdata,
+ formlist = object@formlist,
+ na.rm = na.rm)
+ if (type == "psi") list_els <- c("Xpsi", "Zpsi")
+ if (type == "lambda") list_els <- c("Xlambda", "Zlambda")
+ X <- designMats[[list_els[1]]]
+ if (is.null(re.form)) X <- cbind(X, designMats[[list_els[2]]])
+ return(list(X = X, offset = NULL))
+ })
+
+## get_formula ----
+setMethod("get_formula", "unmarkedFitCOP", function(object, type, ...) {
+ fl <- object@formlist
+ switch(type, psi = fl$psiformula, lambda = fl$lambdaformula)
+})
+
+## get_orig_data ----
+setMethod("get_orig_data", "unmarkedFitCOP", function(object, type, ...){
+ clean_covs <- clean_up_covs(object@data, drop_final=FALSE)
+ datatype <- switch(type, psi = 'site_covs', lambda = 'obs_covs')
+ clean_covs[[datatype]]
+})
+
+
+
+# Useful functions -------------------------------------------------------------
+
+replaceinf = function(x) {
+ max(.Machine$double.xmin, min(.Machine$double.xmax, x))
+}
+
+check.integer <- function(x, na.ignore = F) {
+ if (is.na(x) & na.ignore) {
+ return(T)
+ }
+ x %% 1 == 0
+}
+
+# unmarkedFrame ----------------------------------------------------------------
+
+unmarkedFrameCOP <- function(y, obsLength, siteCovs = NULL, obsCovs = NULL, mapInfo = NULL) {
+
+ # Verification that they are non-NA data in y
+ if (all(is.na(y))) {
+ stop("y only contains NA. y needs to contain non-NA integers.")
+ }
+
+ # Verification that these are count data (and not detection/non-detection)
+ if (max(y, na.rm = T) == 1) {
+ warning("unmarkedFrameCOP is for count data. ",
+ "The data furnished appear to be detection/non-detection.")
+ }
+
+ # Number of sampling occasions
+ J <- ncol(y)
+
+ # If missing obsLength: replace by matrix of 1
+ # and the lambda will be the detection rate per observation length
+ if (missing(obsLength)) {
+ obsLength <- y * 0 + 1
+ warning("obsLength is missing, replacing it by a matrix of 1.")
+ } else if (is.null(obsLength)) {
+ obsLength <- y * 0 + 1
+ warning("obsLength is missing, replacing it by a matrix of 1.")
+ }
+
+ # Transformation observation covariates
+ obsCovs <- covsToDF(
+ covs = obsCovs,
+ name = "obsCovs",
+ obsNum = J,
+ numSites = nrow(y)
+ )
+
+ # Create S4 object of class unmarkedFrameCOP
+ umf <- new(
+ Class = "unmarkedFrameCOP",
+ y = y,
+ obsLength = obsLength,
+ siteCovs = siteCovs,
+ obsCovs = obsCovs,
+ obsToY = diag(J),
+ mapInfo = mapInfo
+ )
+
+ return(umf)
+}
+
+
+# occuCOP ----------------------------------------------------------------------
+
+occuCOP <- function(data,
+ psiformula = ~1,
+ lambdaformula = ~1,
+ psistarts = rep(0, length(attr(terms(psiformula), "term.labels")) + 1),
+ lambdastarts = rep(0, length(attr(terms(lambdaformula), "term.labels")) + 1),
+ method = "BFGS",
+ se = TRUE,
+ engine = "R",
+ threads = 1L,
+ na.rm = TRUE,
+ get.NLL.params = NULL,
+ ...) {
+ #TODO: engines
+ #TODO: NA
+
+ # Neg loglikelihood COP ------------------------------------------------------
+ nll_COP <- function(params) {
+
+ # Reading and transforming params
+ # Taking into account the covariates
+
+ # psi as a function of covariates
+ # psi in params are transformed with a logit transformation (qlogis)
+ # so they're back-transformed to a proba with inverse logit (plogis)
+ # Xpsi is the matrix with occupancy covariates
+ # params is the vector with all the parameters
+ # psiIdx is the index of Occupancy Parameters in params
+ psi <- plogis(Xpsi %*% params[psiIdx])
+
+ # lambda as a function of covariates
+ # lambda in params are transformed with a log-transformation
+ # so they're back-transformed to a rate with exp here
+ # Xlambda is the matrix with detection covariates
+ # params is the vector with all the parameters
+ # lambdaIdx is the index of Occupancy Parameters in params
+ lambda <- exp(Xlambda %*% params[lambdaIdx])
+
+ # Probability for each site (i)
+ iProb <- rep(NA, M)
+
+ for (i in 1:M) {
+ # Probability for the site i for each sampling occasion (j)
+ ijProb <- rep(NA, J)
+
+ # Total obs length in site i
+ obsLengthSitei <- sum(data@obsLength[i,])
+
+ # iIdx is the index to access the vectorised vectors of all obs in site i
+ iIdx <- ((i - 1) * J + 1):(i * J)
+
+ if (SitesWithDetec[i]) {
+ # If there is at least one detection in site i
+ # factNij = sapply(factorial(yvec[iIdx]),replaceinf)
+ # iProb[i] = psi[i] * sum(lambda[iIdx] * obsLengthvec[iIdx] / factNij * exp(-lambda[iIdx] * obsLengthvec[iIdx]))
+
+
+ iProb[i] = psi[i] * (
+ (sum(lambda[iIdx] * obsLengthvec[iIdx])) ^ sum(yvec[iIdx]) /
+ factorial(sum(yvec[iIdx])) *
+ exp(-sum(lambda[iIdx] * obsLengthvec[iIdx]))
+ )
+
+ } else {
+ # If there is zero detection in site i
+ # iProb[i] = psi[i] * sum(exp(-lambda[iIdx] * obsLengthvec[iIdx])) + (1 - psi[i])
+
+ iProb[i] = psi[i] * exp(-sum(lambda[iIdx] * obsLengthvec[iIdx])) + (1 - psi[i])
+
+ }
+
+ }
+ # Note: Why is there "replaceinf(factorial(data@y[i,]))"
+ # instead of just "factorial(data@y[i,])"?
+ # Because if there are a lot of detections (n_ij >= 171 on my machine),
+ # then factorial(n_ij) = Inf,
+ # then 1/factorial(n_ij) = 0,
+ # then log(0) = -Inf,
+ # then loglikelihood = -Inf
+ # then optim does not work.
+ #
+ # We want to maximise likelihood,
+ # (technically, minimise negative loglikelihood).
+ # We don't do anything else with this function.
+ # So as long as we have a tiny value for likelihood,
+ # (ie a huge value for negative loglikelihood)
+ # it doesn't matter if its an approximation.
+
+ # log-likelihood
+ ll = sum(log(iProb))
+ return(-ll)
+ }
+
+ # Check arguments ------------------------------------------------------------
+ if (!is(data, "unmarkedFrameCOP")) {
+ stop("Data is not an unmarkedFrameCOP object. See ?unmarkedFrameCOP if necessary.")
+ }
+ stopifnot(class(psiformula) == "formula")
+ stopifnot(class(lambdaformula) == "formula")
+ stopifnot(class(psistarts) %in% c("numeric", "double", "integer"))
+ stopifnot(class(lambdastarts) %in% c("numeric", "double", "integer"))
+ stopifnot(class(threads) %in% c("numeric", "double", "integer"))
+ se = as.logical(match.arg(
+ arg = as.character(se),
+ choices = c("TRUE", "FALSE", "0", "1")
+ ))
+ na.rm = as.logical(match.arg(
+ arg = as.character(na.rm),
+ choices = c("TRUE", "FALSE", "0", "1")
+ ))
+ engine <- match.arg(engine, c("R"))
+
+ # Format input data ----------------------------------------------------------
+
+ # Retrieve formlist
+ formlist <- mget(c("psiformula", "lambdaformula"))
+
+ # Get the design matrix (calling the getDesign method for unmarkedFrame)
+ # For more informations, see: getMethod("getDesign", "unmarkedFrameCOP")
+ designMats <- getDesign(umf = data, formlist = formlist, na.rm = na.rm)
+
+ # y is the count detection data (matrix of size M sites x J sessions)
+ y <- getY(data)
+
+ # obsLength is the length of sessions (matrix of size M sites x J sessions)
+ obsLength = data@obsLength
+
+ # Xpsi is the matrix with the occupancy covariates
+ Xpsi <- designMats$Xpsi
+
+ # Xlambda is the matrix with the detection covariates
+ Xlambda <- designMats$Xlambda
+
+ # Zpsi is ???
+ Zpsi <- designMats$Zpsi
+
+ # Zlambda is ???
+ Zlambda <- designMats$Zlambda
+
+ # removed_obs is a M x J matrix of the observations removed from the analysis
+ removed_obs <- designMats$removed_obs
+ sitesRemoved <- unname(which(apply(removed_obs, 1, function(x) all(x))))
+
+ # Number of sites
+ M <- nrow(y)
+
+ # Number of sampling occasions
+ J <- ncol(y)
+
+ # Total number of detection per site
+ NbDetecPerSite = rowSums(y)
+
+ # Sites where there was at least one detection
+ SitesWithDetec = NbDetecPerSite > 0
+
+ # Number of sites where there was at least one detection
+ NbSitesWithDetec = sum(SitesWithDetec)
+
+ # Set up parameter names and indices-----------------------------------------
+
+ # ParamPsi Occupancy parameter names
+ ParamPsi <- colnames(Xpsi)
+
+ # ParamLambda Detection parameter names
+ ParamLambda <- colnames(Xlambda)
+
+ # NbParamPsi Number of occupancy parameters
+ NbParamPsi <- ncol(Xpsi)
+
+ # NbParamLambda Number of detection parameters
+ NbParamLambda <- ncol(Xlambda)
+
+ # nP Total number of parameters
+ nP <- NbParamPsi + NbParamLambda
+
+ # psiIdx Index of the occupancy parameters
+ psiIdx <- 1:NbParamPsi
+
+ # lambdaIdx Index of the detection parameters
+ lambdaIdx <- (NbParamPsi+1):nP
+
+ # Re-format some variables for C and R engines
+ # From Matrix of dim MxJ to vector of length MxJ:
+ # c(ySite1Obs1, ySite1Obs2, ..., ySite1ObsJ, ysite2Obs1, ...)
+ yvec <- as.numeric(t(y))
+ obsLengthvec <- as.numeric(t(obsLength))
+
+ # get.NLL.params -------------------------------------------------------------
+ if (!is.null(get.NLL.params)) {
+ df_NLL = data.frame(t(as.data.frame(get.NLL.params)))
+ rownames(df_NLL) = NULL
+ colnames(df_NLL) = c(paste0("logit(psi).", ParamPsi),
+ paste0("log(lambda).", ParamLambda))
+ df_NLL$nll = NA
+ for (i in 1:nrow(df_NLL)) {
+ df_NLL$nll[i] = nll_COP(params = as.numeric(as.vector(df_NLL[i, -ncol(df_NLL)])))
+ }
+ return(df_NLL)
+ }
+
+ # Optimisation ---------------------------------------------------------------
+
+ ## Checking the starting point for optim
+ if (missing(lambdastarts)) {
+ # If lambda starts argument was not given:
+ # 0 by default
+ # so lambda = exp(0) = 1 by default
+ lambdastarts = rep(0, NbParamLambda)
+ message(
+ "No lambda initial values provided for optim. Using lambdastarts = c(",
+ paste(lambdastarts, collapse = ", "),
+ "), equivalent to detection rate",
+ ifelse(length(lambdastarts) == 1, "", "s"),
+ " of 1."
+ )
+ } else if (length(lambdastarts) != NbParamLambda) {
+ stop("lambdastarts (", paste(lambdastarts, collapse = ", "), ") ",
+ "should be of length ", NbParamLambda, " with lambdaformula ", lambdaformula)
+ }
+
+ if (missing(psistarts)) {
+ # If psi starts argument was not given
+ # 0 by default
+ # so psi = plogis(0) = 0.5 by default
+ psistarts = rep(0, NbParamPsi)
+ message(
+ "No psi initial values provided for optim. Using psistarts = c(",
+ paste(psistarts, collapse = ", "),
+ "), equivalent to occupancy probabilit",
+ ifelse(length(psistarts) == 1, "y", "ies"),
+ " of 0.5."
+ )
+ } else if (length(psistarts) != NbParamPsi) {
+ stop("psistarts (", paste(psistarts, collapse = ", "), ") ",
+ "should be of length ", NbParamPsi, " with psiformula ", psiformula)
+ }
+
+ starts <- c(psistarts, lambdastarts)
+
+ ## Run optim
+ opt <- optim(
+ starts,
+ nll_COP,
+ method = method,
+ hessian = se,
+ ...
+ )
+
+ # Get output -----------------------------------------------------------------
+ covMat <- invertHessian(opt, nP, se)
+ ests <- opt$par
+ tmb_mod <- NULL
+ nll <- opt$value
+ fmAIC <- 2 * opt$value + 2 * nP
+
+ # Organize effect estimates
+ names(ests) <- c(ParamPsi, ParamLambda)
+ psi_coef <- list(ests = ests[psiIdx], cov = as.matrix(covMat[psiIdx, psiIdx]))
+ lambda_coef <- list(ests = ests[lambdaIdx],
+ cov = as.matrix(covMat[lambdaIdx, lambdaIdx]))
+
+ # No random effects
+ psi_rand_info <- lambda_rand_info <- list()
+
+ # Create unmarkedEstimates ---------------------------------------------------
+ psi_uE <- unmarkedEstimate(
+ name = "Occupancy probability",
+ short.name = "psi",
+ estimates = psi_coef$ests,
+ covMat = psi_coef$cov,
+ fixed = 1:NbParamPsi,
+ invlink = "logistic",
+ invlinkGrad = "logistic.grad",
+ randomVarInfo = psi_rand_info
+ )
+
+ lambda_uE <- unmarkedEstimate(
+ name = "Detection rate",
+ short.name = "lambda",
+ estimates = lambda_coef$ests,
+ covMat = lambda_coef$cov,
+ fixed = 1:NbParamLambda,
+ invlink = "exp",
+ invlinkGrad = "exp",
+ randomVarInfo = lambda_rand_info
+ )
+
+ estimateList <- unmarkedEstimateList(list(psi = psi_uE, lambda = lambda_uE))
+
+ # Create unmarkedFit object--------------------------------------------------
+ umfit <- new(
+ "unmarkedFitCOP",
+ fitType = "occuCOP",
+ call = match.call(),
+ # formula = as.formula(paste(formlist, collapse = "")),
+ formula = as.formula(paste(
+ formlist["lambdaformula"], formlist["psiformula"], collapse = ""
+ )),
+ formlist = formlist,
+ data = data,
+ estimates = estimateList,
+ sitesRemoved = sitesRemoved,
+ removed_obs = removed_obs,
+ AIC = fmAIC,
+ opt = opt,
+ negLogLike = opt$value,
+ nllFun = nll_COP,
+ nll = nll,
+ convergence = opt$convergence,
+ TMB = tmb_mod
+ )
+
+ return(umfit)
+}
+
diff --git a/test_occuCOP.R b/test_occuCOP.R
new file mode 100644
index 0000000..45fb0e0
--- /dev/null
+++ b/test_occuCOP.R
@@ -0,0 +1,369 @@
+context("occuCOP fitting function")
+skip_on_cran()
+
+COPsimul <- function(psi = 0.5,
+ lambda = 1,
+ M = 100,
+ J = 5) {
+
+ z_i <- sample(
+ x = c(0, 1),
+ size = M,
+ prob = c(1 - psi, psi),
+ replace = T
+ )
+
+ y = matrix(rpois(n = M * J, lambda = lambda), nrow = M, ncol = J) * z_i
+
+ return(y)
+}
+
+
+# COPsimulfit <- function(psi = 0.5,
+# lambda = 1,
+# M = 100,
+# J = 5,
+# obsLength = NULL,
+# ...) {
+# occuCOP(data = unmarkedFrameCOP(
+# y = simulDataCOP(
+# psi = psi,
+# lambda = lambda,
+# M = M,
+# J = J
+# ),
+# obsLength = obsLength
+# ), ...)
+# }
+
+test_that("unmarkedFrameCOP is constructed correctly", {
+ set.seed(123)
+
+ # Simulate data
+ M = 100
+ J = 5
+ y = COPsimul(psi = .5,
+ lambda = 1,
+ M = M,
+ J = J)
+ obsLength = y * 0 + 1
+
+ psiCovs = data.frame(
+ "psiNum" = rnorm(M),
+ "psiInt" = as.integer(rpois(n = M, lambda = 5)),
+ "psiBool" = sample(c(T, F), size = M, replace = T),
+ "psiChar" = sample(letters[1:5], size = M, replace = T),
+ "psiFactUnord" = factor(sample(
+ letters[5:10], size = M, replace = T
+ )),
+ "psiFactOrd" = sample(factor(c("A", "B", "C"), ordered = T), size =
+ M, replace = T)
+ )
+
+ lambdaCovs = list(
+ "lambdaNum" = matrix(
+ rnorm(M * J),
+ nrow = M, ncol = J
+ ),
+ "lambdaInt" = matrix(
+ as.integer(rpois(n = M * J, lambda = 1e5)),
+ nrow = M, ncol = J
+ ),
+ "lambdaBool" = matrix(
+ sample(c(T, F), size = M * J, replace = T),
+ nrow = M, ncol = J
+ ),
+ "lambdaChar" = matrix(
+ sample(letters[1:5], size = M * J, replace = T),
+ nrow = M, ncol = J
+ ),
+ "lambdaFactUnord" = matrix(
+ factor(sample(letters[5:10], size = M * J, replace = T)),
+ nrow = M, ncol = J
+ ),
+ "lambdaFactOrd" = matrix(
+ sample(factor(c("A", "B", "C"), ordered = T), size = M * J, replace = T),
+ nrow = M, ncol = J
+ )
+ )
+
+
+ # Creating a unmarkedFrameCOP object
+ expect_warning(umf <- unmarkedFrameCOP(y = y))
+ expect_no_error(umf <- unmarkedFrameCOP(y = y, obsLength = obsLength))
+
+
+ # Create subsets
+ expect_no_error(umf_sub_i <- umf[1:3, ])
+ expect_no_error(umf_sub_j <- umf[, 1:2])
+ expect_no_error(umf_sub_ij <- umf[1:3, 1:2])
+
+ # unmarkedFrameCOP organisation ----------------------------------------------
+ expect_true(inherits(umf, "unmarkedFrameCOP"))
+ expect_equivalent(numSites(umf_sub_i), 3)
+ expect_equivalent(obsNum(umf_sub_j), 2)
+ expect_equivalent(numSites(umf_sub_ij), 3)
+ expect_equivalent(obsNum(umf_sub_ij), 2)
+
+ # unmarkedFrameCOP display ---------------------------------------------------
+
+ # print method
+ expect_no_error(print(umf))
+ expect_no_warning(print(umf))
+ expect_no_message(print(umf))
+ expect_no_error(print(umf_sub_i))
+ expect_no_error(print(umf_sub_j))
+ expect_no_error(print(umf_sub_ij))
+ expect_no_error(print(umf[1,]))
+ expect_no_error(print(umf[,1]))
+ expect_no_error(print(umf[1,1]))
+
+ # summary method for unmarkedFrameCOP
+ expect_no_error(summary(umf))
+ expect_no_error(summary(umf_sub_ij))
+
+ # plot method for unmarkedFrameCOP
+ expect_no_error(plot(umf))
+ expect_no_error(plot(umf_sub_ij))
+
+
+ # Input handling: covariates -------------------------------------------------
+ expect_no_error(umfCovs <- unmarkedFrameCOP(
+ y = y,
+ obsLength = obsLength,
+ siteCovs = psiCovs,
+ obsCovs = lambdaCovs
+ ))
+ expect_no_error(print(umfCovs))
+ expect_no_error(summary(umfCovs))
+ expect_no_error(plot(umfCovs))
+
+ # Input handling: NA ---------------------------------------------------------
+
+ # NA should not pose problems when creating the unmarkedFrameCOP object.
+ # The warnings and potential errors will be displayed when fitting the model.
+ # Except when y only contains NA: then there's an error.
+
+ ## NA in y
+ yNA <- y
+ yNA[1:2,] <- NA
+ yNA[3:5, 3:4] <- NA
+ yNA[,3] <- NA
+ expect_no_error(umfNA <- unmarkedFrameCOP(y = yNA, obsLength = obsLength))
+ expect_no_error(print(umfNA))
+ expect_no_error(summary(umfNA))
+ expect_no_error(plot(umfNA))
+
+ ## NA in obsLength
+ obsLengthNA <- obsLength
+ obsLengthNA[1:2,] <- NA
+ obsLengthNA[3:5, 3:4] <- NA
+ obsLengthNA[,5] <- NA
+ expect_no_error(umfNA <- unmarkedFrameCOP(y = y, obsLength = obsLengthNA))
+ expect_no_error(print(umfNA))
+ expect_no_error(summary(umfNA))
+ expect_no_error(plot(umfNA))
+
+ ## NA also in covariates
+ psiCovsNA <- psiCovs
+ psiCovsNA[1:5,] <- NA
+ psiCovsNA[c(8,10,12), 3] <- NA
+ psiCovsNA[,1] <- NA
+ lambdaCovsNA <- lambdaCovs
+ lambdaCovsNA[[1]][1:5,] <- NA
+ lambdaCovsNA[[1]][,3] <- NA
+ lambdaCovsNA[[3]][,5] <- NA
+ expect_no_error(umfCovsNA <- unmarkedFrameCOP(
+ y = yNA,
+ obsLength = obsLengthNA,
+ siteCovs = psiCovsNA,
+ obsCovs = lambdaCovsNA
+ ))
+ expect_no_error(print(umfCovsNA))
+ expect_no_error(summary(umfCovsNA))
+ expect_no_error(plot(umfCovsNA))
+
+ ## all NA in y
+ yallNA <- y
+ yallNA[1:M, 1:J] <- NA
+ expect_error(unmarkedFrameCOP(y = yallNA, obsLength = obsLength))
+
+ # Input handling: error and warnings -----------------------------------------
+ # Error when there are decimals in y
+ y_with_decimals = y
+ y_with_decimals[1, 1] = .5
+ expect_error(unmarkedFrameCOP(y = y_with_decimals, obsLength = obsLength))
+
+ # Warning if y is detection/non-detection instead of count
+ y_detec_nodetec = (y > 0) * 1
+ expect_warning(unmarkedFrameCOP(y = y_detec_nodetec, obsLength = obsLength))
+
+ # Error if the dimension of obsLength is different than that of y
+ expect_error(unmarkedFrameCOP(y = y, obsLength = obsLength[1:2, 1:2]))
+})
+
+
+test_that("occuCOP can fit simple models", {
+ # Simulate data
+ set.seed(123)
+ M = 100
+ J = 5
+ y = COPsimul(psi = .5,
+ lambda = 1,
+ M = M,
+ J = J)
+ obsLength = y * 0 + 1
+
+ # Create umf
+ umf <- unmarkedFrameCOP(y = y, obsLength = obsLength)
+
+ # Fitting options ----
+
+ # With default parameters
+ expect_no_error(fit_default <- occuCOP(data = umf))
+ expect_no_error(occuCOP(data = umf, psiformula = ~ 1, lambdaformula = ~ 1))
+
+ # With chosen starting points
+ expect_no_error(occuCOP(data = umf,
+ psiformula = ~ 1, lambdaformula = ~ 1,
+ psistarts = qlogis(.7),
+ lambdastarts = log(0.1)))
+ expect_error(occuCOP(data = umf,
+ psiformula = ~ 1, lambdaformula = ~ 1,
+ psistarts = qlogis(c(0.7, 0.5))))
+ expect_error(occuCOP(data = umf,
+ psiformula = ~ 1, lambdaformula = ~ 1,
+ lambdastarts = log(c(1, 2))))
+
+ # With different options
+ expect_no_error(occuCOP(data = umf, method = "Nelder-Mead"))
+ expect_error(occuCOP(data = umf, method = "ABC"))
+
+ expect_no_error(occuCOP(data = umf, se = F))
+ expect_error(occuCOP(data = umf, se = "ABC"))
+
+ expect_no_error(occuCOP(data = umf, engine = "R"))
+ expect_error(occuCOP(data = umf, engine = "julia"))
+
+ expect_no_error(occuCOP(data = umf, na.rm = F))
+ expect_error(occuCOP(data = umf, na.rm = "no"))
+
+ # Looking at at COP model outputs ----
+ expect_is(fit_default, "unmarkedFitCOP")
+
+ ## backTransform
+ expect_no_error(backTransform(fit_default, type = "psi"))
+ expect_no_error(backTransform(fit_default, type = "lambda"))
+ expect_error(backTransform(fit_default, type = "state"))
+ expect_error(backTransform(fit_default, type = "det"))
+ expect_is(backTransform(fit_default, type = "psi"), "unmarkedBackTrans")
+
+ ## predict with newdata = fit@data
+ expect_no_error(predict(object = fit_default, type = "psi"))
+ expect_no_error(predict(object = fit_default, type = "lambda"))
+ expect_error(predict(object = fit_default, type = "state"))
+
+ ## predict with newdata = 1
+ expect_no_error(predict(
+ object = fit_default,
+ newdata = data.frame(1),
+ type = "psi"
+ ))
+ expect_no_error(predict(
+ object = fit_default,
+ newdata = data.frame(1),
+ type = "lambda"
+ ))
+ expect_no_error(predict(
+ object = fit_default,
+ newdata = data.frame("a"=1:5,"b"=10:14),
+ type = "psi"
+ ))
+
+ # Fitting accurately ----
+
+ ## psi = 0.50, lambda = 1 ----
+ psi_test = .5
+ lambda_test = 1
+ fit_accur <- occuCOP(data = unmarkedFrameCOP(
+ y = COPsimul(
+ psi = psi_test,
+ lambda = lambda_test,
+ M = 1000,
+ J = 10
+ ),
+ obsLength = matrix(1, nrow = 1000, ncol = 10)
+ ))
+ psi_estimate = backTransform(fit_accur, type = "psi")@estimate
+ lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate
+ expect_equivalent(
+ psi_estimate,
+ psi_test,
+ tol = 0.05
+ )
+ expect_equivalent(
+ lambda_estimate,
+ lambda_test,
+ tol = 0.05
+ )
+
+ ## psi = 0.25, lambda = 5 ----
+ psi_test = 0.25
+ lambda_test = 5
+ fit_accur <- occuCOP(data = unmarkedFrameCOP(
+ y = COPsimul(
+ psi = psi_test,
+ lambda = lambda_test,
+ M = 1000,
+ J = 10
+ ),
+ obsLength = matrix(1, nrow = 1000, ncol = 10)
+ ))
+ psi_estimate = backTransform(fit_accur, type = "psi")@estimate
+ lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate
+ expect_equivalent(
+ psi_estimate,
+ psi_test,
+ tol = 0.05
+ )
+ expect_equivalent(
+ lambda_estimate,
+ lambda_test,
+ tol = 0.05
+ )
+
+ ## psi = 0.75, lambda = 0.5 ----
+ psi_test = 0.75
+ lambda_test = 0.5
+ fit_accur <- occuCOP(data = unmarkedFrameCOP(
+ y = COPsimul(
+ psi = psi_test,
+ lambda = lambda_test,
+ M = 1000,
+ J = 10
+ ),
+ obsLength = matrix(1, nrow = 1000, ncol = 10)
+ ))
+ psi_estimate = backTransform(fit_accur, type = "psi")@estimate
+ lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate
+ expect_equivalent(
+ psi_estimate,
+ psi_test,
+ tol = 0.05
+ )
+ expect_equivalent(
+ lambda_estimate,
+ lambda_test,
+ tol = 0.05
+ )
+
+
+ #TODO
+})
+
+
+test_that("occuCOP can fit models with covariates", {
+
+})
+
+
diff --git a/tmp_use_COP.Rmd b/tmp_use_COP.Rmd
new file mode 100644
index 0000000..57e9e55
--- /dev/null
+++ b/tmp_use_COP.Rmd
@@ -0,0 +1,584 @@
+---
+title: "occuCOP example"
+author: "Léa Pautrel"
+date: "`r format(Sys.time(), '%d %B %Y')`"
+output:
+ html_document:
+ toc: yes
+ toc_depth: 3
+ number_sections: true
+ toc_float:
+ collapsed: true
+ code_folding: show
+ theme: cerulean
+editor_options:
+ chunk_output_type: console
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+ echo = TRUE, # show code of the chunk and chunk output
+ error = TRUE,
+ collapse = FALSE,
+ comment = "", # no comment character for the chunk text outputs
+ out.width = "100%" # responsive width for chunk outputs (figures, ...)
+)
+```
+
+```{r library, echo=T, results="hide"}
+source("./R/occuCOP.R")
+library(ggplot2)
+library(ggrepel)
+library(dplyr)
+library(tibble)
+```
+
+
+
+
+
+# The COP model
+
+Hereafter, I use the notations defined in the following table.
+
+| Notation | Parameter |
+| -------------: | :----------------------------------------------------------- |
+| $I$ | Number of sites |
+| $J$ | Number of sampling occasions |
+| $\psi_i$ | Occupancy probability in site $i$ |
+| $Z_i$ | Occupancy state of site $i$ (present = 1, absent = 0) |
+| $\lambda_{ij}$ | Detection rate in site $i$ during sampling occasion $j$ |
+| $T_{ij}$ | Duration or length of sampling occasion $j$ |
+| $N_{i}$ | Number of detections in site $i$ |
+| $N_{ij}$ | Number of detections in site $i$ during sampling occasion $j$ |
+
+
+The model is defined as:
+
+$$
+Z_i \sim \text{Bernoulli}(\psi_{i}) \\
+N_{ij} \sim \text{Poisson}(\lambda_{ij} T_{ij})
+$$
+
+The likelihood of this model is:
+
+\begin{align}
+\begin{split}
+ L(\psi_i, \lambda_{is})
+ &= \prod_{i=1}^{I} \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = n_{i}) \\
+ &=
+ \prod_{i, n_i > 0} \left ( \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = n_i, n_i > 0) \right )
+ \times
+ \prod_{i, n_i = 0} \left ( \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = 0) \right ) \\
+ &=
+ \prod_{i, n_i > 0} \left (
+ \psi_i \frac{ \left( \sum_{s=1}^S(\lambda_{is} T_{is}) \right) ^ {n_i} }{ n_{i}! } e^{-\sum_{s=1}^S(\lambda_{is} T_{is})}
+ \right )
+ \times
+ \prod_{i, n_i = 0} \left (
+ \psi_i e^{-\sum_{s=1}^S(\lambda_{is} T_{is})} + (1-\psi_i)
+ \right )
+\end{split}
+\end{align}
+
+# Simulation
+
+
+```{r init}
+set.seed(0)
+
+M = 100 # Number of sites
+J = 5 # Number of observations (transects, sessions, sampling occasions...)
+```
+
+We first simulate the data set. We'll simulate data in M=`r M` sites during J=`r J` sampling occasions.
+
+We simulate site covariates:
+
+- "elev" will not be used to impact the occupancy probability
+- "habitat" will be used
+
+```{r simul-psi-cov}
+SiteCov <- data.frame(
+ "elev" = pmax(rnorm(n = M, mean = 50, sd = 50), 0),
+ "habitat" = factor(sample(
+ x = c("A", "B", "C"),
+ size = M,
+ replace = T
+ ))
+)
+print(as_tibble(SiteCov))
+```
+
+
+We simulate the occupancy state of all sites depending on the habitat type.
+
+```{r simul-psi-z}
+# Occupancy probability depending on habitat type
+simul_psi_habA = 0.9
+simul_psi_habB = 0.5
+simul_psi_habC = 0.1
+
+# For each site, we simulate occupancy state
+z_i = rep(NA, M)
+for (i in 1:M) {
+ if (SiteCov$habitat[i] == 'A') {
+ z_i[i] <-
+ sample(c(0, 1),
+ size = 1,
+ prob = c(1 - simul_psi_habA, simul_psi_habA))
+ next
+ } else if (SiteCov$habitat[i] == 'B') {
+ z_i[i] <-
+ sample(c(0, 1),
+ size = 1,
+ prob = c(1 - simul_psi_habB, simul_psi_habB))
+ next
+ } else {
+ z_i[i] <-
+ sample(c(0, 1),
+ size = 1,
+ prob = c(1 - simul_psi_habC, simul_psi_habC))
+ }
+}
+
+# We have this data:
+print(as_tibble(data.frame("habitat" = SiteCov$habitat, "z" = z_i)))
+
+# In our data, our occupancy probability per habitat is slightly different
+# from the one we chose to simulate due to randomness:
+print(
+ data.frame("habitat" = SiteCov$habitat, "z" = z_i) %>%
+ group_by(habitat) %>%
+ summarise("NbSites" = n(), "psi" = mean(z))
+)
+```
+
+There are `r sum(z_i)` occupied sites out of `r M` in our simulation.
+
+We simulate temporal covariates:
+
+- "wind" will not impact the detection rate
+- "rain" will impact the detection rate
+
+```{r simul-lambda-cov}
+# Detection rate: 1 detection per day on average,
+simul_lambda = 1
+simul_lambda_rain = .3
+
+# Temporal covariates
+TemporalCov <- list(
+ "rain" = matrix(pmax(rexp(n = M * J, rate = 1 / 10), 0), nrow = M, ncol = J),
+ "wind" = matrix(rnorm(n = M * J, mean = 10), nrow = M, ncol = J)
+)
+print(as_tibble(TemporalCov))
+```
+
+We then simulate the detection rate depending on "rain" as a linear
+
+```{r simul-lambda}
+lambda_from_rain = function(rain) {
+ # pmin(pmax((3 - .2 * rain), 0), 2)
+ 2 / (1 + exp(.2 * (rain - 20)))
+}
+rain_lambda = data.frame("rain" = seq(0, max(TemporalCov$rain), by = c(.1)))
+rain_lambda$lambda = lambda_from_rain(rain_lambda$rain)
+plot(x = rain_lambda$rain,
+ y = rain_lambda$lambda,
+ type = "l")
+
+simul_lambda = lambda_from_rain(TemporalCov$rain)
+```
+
+Finally, for each site, we simulate a number of detections based on the occupancy state $z_i$ of site $i$ and detection rate $lambda_{ij}$ of site $i$ observation $j$.
+
+```{r simul-y}
+y = matrix(
+ rpois(n = M * J, lambda = as.numeric(t(simul_lambda))),
+ nrow = M,
+ ncol = J,
+ byrow = T
+) * z_i
+
+data.frame(
+ "simul_lambda" = as.numeric(t(simul_lambda)),
+ "y" = as.numeric(t(y)),
+ "z" = rep(z_i, each = J)
+) %>%
+ ggplot(aes(x = simul_lambda, y = y)) +
+ geom_point(alpha=.3,shape=16,size=2) +
+ facet_grid(z ~ .,labeller = label_both) +
+ theme_light()
+```
+
+Let's say that each observation lasts one time-unit here, *e.g.* one day per sampling occasion.
+
+```{r simul-obsLength}
+obsLength = y * 0 + 1
+class(obsLength)
+print(as_tibble(obsLength))
+```
+
+
+# unmarkedFrameCOP object
+
+## Creation
+
+We then create our unmarkedFrameCOP object.
+
+```{r umf-creation}
+umf = unmarkedFrameCOP(
+ y = y,
+ obsLength = obsLength,
+ siteCovs = SiteCov,
+ obsCovs = TemporalCov
+)
+```
+
+## Visualisation
+
+```{r umf-visu}
+head(umf)
+summary(umf)
+print(umf[2,4]) # 2nd site, 4th observation
+plot(umf)
+```
+
+## Warning and errors
+
+There is an error if there are decimals in y.
+
+```{r umf-error-decimal}
+y_with_decimals = y
+y_with_decimals[2, 1] = 49.5
+unmarkedFrameCOP(
+ y = y_with_decimals,
+ obsLength = obsLength,
+ siteCovs = SiteCov,
+ obsCovs = TemporalCov
+)
+```
+
+There is a warning if data is detection/non-detection (1/0) instead of count.
+
+```{r umf-warning-01}
+y_detec_nodetec = (y > 0) * 1
+unmarkedFrameCOP(
+ y = y_detec_nodetec,
+ obsLength = obsLength,
+ siteCovs = SiteCov,
+ obsCovs = TemporalCov
+)
+```
+
+There is an error if the dimension of obsLength is different than that of y.
+
+```{r umf-error-obsLength}
+unmarkedFrameCOP(
+ y = y,
+ obsLength = obsLength[1:5, 1:2],
+ siteCovs = SiteCov,
+ obsCovs = TemporalCov
+)
+```
+
+# Model fitting
+
+We fit the model. For more information, see [section How is the model fitted?](#how-is-the-model-fitted)
+
+## Null model
+
+```{r null-fit}
+resCOP_null <- occuCOP(
+ data = umf,
+ psiformula = ~ 1,
+ lambdaformula = ~ 1,
+ method = "Nelder-Mead"
+)
+print(resCOP_null)
+```
+
+We can backtransform the parameters:
+
+```{r null-backtransform}
+# Occcupancy estimate
+plogis(resCOP_null@estimates@estimates$psi@estimates) # plogis(x): "inverse logit"
+backTransform(resCOP_null, type = "psi")
+
+# Detection rate estimate
+exp(resCOP_null@estimates@estimates$lambda@estimates)
+backTransform(resCOP_null, type = "lambda")
+```
+
+## psi ~ elev; lambda ~ rain
+
+```{r cov-fit}
+resCOP_habitat_rain <- occuCOP(
+ data = umf,
+ psiformula = ~ -1 + habitat,
+ lambdaformula = ~ rain,
+ method = "Nelder-Mead"
+)
+print(resCOP_habitat_rain)
+```
+
+A warning tell us that the model did not converge. We can add in parameters for optim in the `occuCOP` function, for example to increase the maximum number of iterations, with `control = list(maxit = 5000)`:
+
+```{r cov-fit-maxit}
+resCOP_habitat_rain <- occuCOP(
+ data = umf,
+ psiformula = ~ -1 + habitat,
+ lambdaformula = ~ rain,
+ method = "Nelder-Mead",
+ control = list(maxit = 5000)
+)
+print(resCOP_habitat_rain)
+```
+
+We can backtransform the parameters. With covariates, we can no longer use the function `backTransform`:
+
+```{r cov-backtransform}
+backTransform(resCOP_habitat_rain, type = "psi")
+```
+
+However, we can use the function `predict`:
+
+```{r cov-predict}
+# Occcupancy estimate
+plogis(resCOP_habitat_rain@estimates@estimates$psi@estimates)
+psipred = predict(object = resCOP_habitat_rain, type = "psi")
+print(as_tibble(cbind(
+ data.frame("habitat" = resCOP_habitat_rain@data@siteCovs$habitat),
+ psipred
+)))
+
+# Detection rate estimate
+exp(resCOP_habitat_rain@estimates@estimates$lambda@estimates)
+lambdapred = predict(object = resCOP_habitat_rain, type = "lambda")
+print(as_tibble(cbind(
+ data.frame("rain" = resCOP_habitat_rain@data@obsCovs$rain),
+ lambdapred
+)))
+```
+
+## Ranking
+
+We can compare several models by their AIC by using `fitList` and `modSel`.
+
+```{r ranking}
+fl <- fitList(fits = list("Null" = resCOP_null,
+ "psi~elev, lambda~rain" = resCOP_habitat_rain))
+modSel(fl)
+```
+
+## How is the model fitted?
+
+The model is fitted by maximum likelihood estimation (MLE).
+
+```{r how-is-the-model-fitted, class.source = 'fold-hide'}
+# M = 100
+# J = 10
+
+cpt = 1
+for (simul_psi in c(.1, .25, .5, .75, .9)) {
+ for (simul_lambda in c(1, 3, 5)) {
+ # simul_z = c(rep(1, simul_psi * 100), rep(0, times=((1 - simul_psi) * 100)))
+ simul_z = sample(
+ x = c(0, 1),
+ size = M,
+ replace = T,
+ prob = c(1 - simul_psi, simul_psi)
+ )
+ simul_y <- matrix(rpois(n = M * J, lambda = simul_lambda),
+ nrow = M,
+ ncol = J) * simul_z
+
+ fit = occuCOP(data = unmarkedFrameCOP(y = simul_y,
+ obsLength = (simul_y * 0 + 1)))
+ estim_psi = backTransform(fit, "psi")
+ estim_lambda = backTransform(fit, "lambda")
+
+ nll_df_plot_i = occuCOP(
+ data = unmarkedFrameCOP(y = simul_y, obsLength = (simul_y * 0 + 1)),
+ get.NLL.params =
+ as.list(as.data.frame(t(
+ expand.grid("psi" = qlogis(seq(0.01, 0.99, by = 0.02)),
+ "lambda" = log(round(
+ simul_lambda * seq(from = 0.25, to = 1.75, by = .25), 2
+ )))
+ )))
+ )
+ nll_df_plot_i$simul_psi = simul_psi
+ nll_df_plot_i$simul_lambda = simul_lambda
+ nll_df_plot_i$estim_psi = estim_psi@estimate
+ nll_df_plot_i$estim_lambda = estim_lambda@estimate
+
+ if (cpt==1) {
+ nll_df_plot = nll_df_plot_i
+ } else{
+ nll_df_plot = rbind(nll_df_plot, nll_df_plot_i)
+ }
+ # cat('\r',cpt,"/ 15")
+ cpt = cpt + 1
+ rm(nll_df_plot_i)
+ }
+}
+
+nll_df_plot$psi = plogis(nll_df_plot[, "logit(psi).(Intercept)"])
+nll_df_plot$lambda = exp(nll_df_plot[, "log(lambda).(Intercept)"])
+nll_df_plot$simulated_lambda = (round(nll_df_plot$lambda, 2) == round(nll_df_plot$simul_lambda, 2))
+nll_df_plot$lambda_txt = format(nll_df_plot$lambda, digits = 2, nsmall = 2)
+nll_df_plot$label = ifelse(nll_df_plot$psi == max(nll_df_plot$psi),
+ paste0("λ=", nll_df_plot$lambda_txt),
+ NA)
+
+df_estimates = nll_df_plot %>%
+ group_by(simul_psi, simul_lambda) %>%
+ summarise(
+ estim_psi = unique(estim_psi),
+ estim_lambda = unique(estim_lambda),
+ maximum_llh = max(-nll),
+ minimum_llh = min(-nll),
+ .groups = "drop"
+ )
+
+ggplot() +
+ geom_line(
+ data = nll_df_plot,
+ aes(
+ x = psi,
+ y = -nll,
+ colour = lambda_txt,
+ linewidth = simulated_lambda,
+ linetype = simulated_lambda
+ )
+ ) +
+ ggh4x::facet_grid2(
+ simul_psi ~ simul_lambda,
+ labeller = label_both,
+ scales = "free_y",
+ independent = "y"
+ ) +
+ labs(
+ title = "Negative log-likelihood of the COP model for different values of ψ and λ",
+ x = "ψ",
+ y = "Log-likelihood",
+ colour = "lambda"
+ ) +
+ scale_linewidth_manual(values = c("TRUE" = 1, "FALSE" = .3)) +
+ scale_linetype_manual(values = c("TRUE" = "solid", "FALSE" = "dashed")) +
+ theme_light()+
+ geom_vline(data = df_estimates,
+ aes(xintercept = estim_psi),
+ linetype = "dotted") +
+ geom_label(data = df_estimates, aes(
+ x = estim_psi,
+ y = minimum_llh + abs(minimum_llh * .1),
+ label = paste0("λ: ", round(estim_lambda, 2))
+ )) +
+ geom_hline(data = df_estimates,
+ aes(yintercept = maximum_llh),
+ linetype = "dotted")+
+ geom_label(data = df_estimates, aes(
+ x = -0.05,
+ y = maximum_llh-25,
+ label = paste0("ψ: ", round(estim_psi, 2))
+ )) +
+ xlim(-.1, 1.1) +
+ theme(legend.position = "none")
+
+```
+
+# Interpret the results
+
+
+
+# Other models examples
+
+## occu
+
+```{r occu}
+resOccuNull = occu(
+ formula = ~ 1 ~ 1,
+ data = unmarkedFrameOccu(
+ y = y,
+ siteCovs = SiteCov,
+ obsCovs = TemporalCov
+ )
+)
+resOccuNull
+backTransform(resOccuNull,"state")
+backTransform(resOccuNull,"det")
+
+resOccu = occu(
+ formula = ~ rain ~ elev,
+ data = unmarkedFrameOccu(
+ y = y,
+ siteCovs = SiteCov,
+ obsCovs = TemporalCov
+ )
+)
+resOccu
+cbind(SiteCov$elev, predict(resOccu, "state"))
+cbind(as.numeric(t(TemporalCov$rain)), predict(resOccu, "det"))
+
+```
+
+## pcount
+
+```{r pcount}
+respcountNull = pcount(
+ formula = ~ 1 ~ 1,
+ data = unmarkedFramePCount(
+ y = y,
+ siteCovs = SiteCov,
+ obsCovs = TemporalCov
+ )
+)
+respcountNull
+backTransform(respcountNull,"state")
+backTransform(respcountNull,"det")
+
+respcount = pcount(
+ formula = ~ rain ~ elev,
+ data = unmarkedFramePCount(
+ y = y,
+ siteCovs = SiteCov,
+ obsCovs = TemporalCov
+ )
+)
+respcount
+cbind(SiteCov$elev, predict(respcount, "state"))
+cbind(as.numeric(t(TemporalCov$rain)), predict(respcount, "det"))
+```
+
+# --
+
+```{r dev}
+# psiformula <- ~ 1
+# lambdaformula <- ~ 1
+# psistarts = rep(0, length(attr(terms(psiformula), "term.labels")) + 1)
+# lambdastarts = rep(0, length(attr(terms(lambdaformula), "term.labels")) + 1)
+
+data <- umf
+psiformula <- ~ habitat
+lambdaformula <- ~ rain
+
+
+method = "BFGS"
+se = TRUE
+engine = "R"
+threads = 1L
+na.rm = TRUE
+
+maxit = 1000
+
+object = occuCOP(
+ data = data,
+ psiformula = psiformula,
+ lambdaformula = lambdaformula
+)
+
+```
+
+ \ No newline at end of file