Init occuCOP

author: lpautrel <lea.pautrel@terroiko.fr> 2023-11-28 12:33:03 +0100
committer: lpautrel <lea.pautrel@terroiko.fr> 2023-11-28 12:33:03 +0100
commit: 92d8b8fa68eb7c1ed7c1f92e139cd401c8e4671b (patch)
tree: 5456815ef1ee92c94f1576d41d36897ad7fd1e57
parent: 436977a45966b76d1290ef627e71797485803697 (diff)
3 files changed, 1700 insertions, 0 deletions
diff --git a/R/occuCOP.R b/R/occuCOP.R
new file mode 100644
index 0000000..9a8444a
--- /dev/null
+++ b/R/occuCOP.R
@@ -0,0 +1,747 @@
+
+# Prerequisites (while its not integrated within the package)
+library(unmarked)
+source("R/unmarkedFrame.R")
+source("R/utils.R")
+source("R/unmarkedEstimate.R")
+source("R/unmarkedFitList.R")
+source("R/predict.R")
+source("R/getDesign.R")
+source("R/mixedModelTools.R")
+# sapply(paste0("./R/", list.files("./R/")), source)
+
+library(lattice)
+library(testthat)
+
+# Fit the occupancy model COP
+# (Counting Occurrences Process)
+
+# Occupancy
+# z_i ~ Bernoulli(psi)
+# 
+# with:
+#   z_i = Occupancy state of site i
+#       = 1 if the site i is occupied
+#       = 0 else
+
+# Detection
+# N_ijs | z_i = 1 ~ Poisson(lambda*T_s)
+# N_ijs | z_i = 0 ~ 0
+# 
+# with:
+#   N_is = Number of detections of site i during session s
+#   z_i = Occupancy state of site i
+#   lambda = Detection rate
+#   T_s = Duration of the session s
+
+# CLASSES ----------------------------------------------------------------------
+
+## unmarkedFrameCOP class ----
+setClass(
+  "unmarkedFrameCOP",
+  representation(obsLength = "matrix"),
+  contains = "unmarkedFrame",
+  validity = function(object) {
+    errors <- character(0)
+    M <- nrow(object@y)
+    J <- ncol(object@y)
+    y_integers = sapply(object@y, check.integer, na.ignore = T)
+    if (!all(y_integers)) {
+      errors <- c(errors,
+                  paste(
+                    "Count detection should be integers. Non-integer values:",
+                    paste(object@y[which(!y_integers)], collapse = ', ')
+                  ))
+    }
+    if (!all(all(dim(object@obsLength) == dim(object@y)))){
+      errors <- c( errors, paste(
+          "obsLength should be a matrix of the same dimension as y, with M =", M,
+          "rows (sites) and J =", J, "columns (sampling occasions)."
+    ))}
+    if (length(errors) == 0) TRUE
+    else errors
+  }
+)
+
+## unmarkedFitCOP class ----
+setClass("unmarkedFitCOP",
+         representation(removed_obs = "matrix",
+                        formlist = "list",
+                        nll = "optionalNumeric",
+                        convergence="optionalNumeric"),
+         contains = "unmarkedFit")
+
+
+# METHODS ----------------------------------------------------------------------
+
+## getDesign method ----
+# Example for occu: https://github.com/rbchan/unmarked/blob/536e32ad7b2526f8fac1b315ed2e99accac0d50f/R/getDesign.R#L10-L97
+# Example for GDR: https://github.com/rbchan/unmarked/blob/c82e63947d7df7dfc896066e51dbf63bda3babf4/R/gdistremoval.R#L177-L253
+setMethod(
+  "getDesign", "unmarkedFrameCOP",
+  function(umf, formlist, na.rm = TRUE) {
+    
+    # Retrieve useful informations from umf
+    M <- numSites(umf)
+    J <- obsNum(umf)
+    y <- getY(umf)
+    obsLength <- umf@obsLength
+
+    # Occupancy submodel -------------------------------------------------------
+
+    # Retrieve the fixed-effects part of the formula
+    psiformula <- lme4::nobars(as.formula(formlist$psiformula))
+    psiVars <- all.vars(psiformula)
+    
+    # Retrieve the site covariates
+    sc <- siteCovs(umf)
+    if(is.null(sc)) {
+      sc <- data.frame(.dummy = rep(0, M))
+    }
+    
+    # Check for missing variables
+    psiMissingVars <- psiVars[!(psiVars %in% names(sc))]
+    if (length(psiMissingVars) > 0) {
+      stop(paste(
+        "Variable(s)",
+        paste(psiMissingVars, collapse = ", "),
+        "not found in siteCovs"
+      ), call. = FALSE)
+    }
+
+    # State model matrix
+    Xpsi <- model.matrix(
+      psiformula,
+      model.frame(psiformula, sc, na.action = NULL)
+    )
+    Zpsi <- get_Z(formlist$psiformula, sc)
+    
+    # Detection submodel -------------------------------------------------------
+    
+    # Retrieve the fixed-effects part of the formula
+    lambdaformula <- lme4::nobars(as.formula(formlist$lambdaformula))
+    lambdaVars <- all.vars(lambdaformula)
+    
+    # Retrieve the observation covariates
+    oc <- obsCovs(umf)
+    if(is.null(oc)) {
+      oc <- data.frame(.dummy = rep(0, M*J))
+    }
+    
+    # Check for missing variables
+    lambdaMissingVars <- lambdaVars[!(lambdaVars %in% names(oc))]
+    if (length(lambdaMissingVars) > 0) {
+      stop(paste(
+        "Variable(s)",
+        paste(lambdaMissingVars, collapse = ", "),
+        "not found in obsCovs"
+      ), call. = FALSE)
+    }
+    
+    # Detection model matrix
+    Xlambda <- model.matrix(
+      lambdaformula,
+      model.frame(lambdaformula, oc, na.action = NULL)
+    )
+    Zlambda <- get_Z(formlist$lambdaformula, oc)
+    
+    # Missing data -------------------------------------------------------------
+    
+    # Missing count data
+    missing_y <- is.na(y)
+    
+    # Missing site covariates
+    # (TRUE if at least one site covariate is missing in a site)
+    missing_sc <- apply(Xpsi, 1, function(x) any(is.na(x)))
+    
+    # Missing observation covariates
+    # (TRUE if at least one observation covariate is missing in a sampling occasion in a site)
+    missing_oc <- apply(Xlambda, 1, function(x) any(is.na(x)))
+    
+    # Matrix MxJ of values to not use because there is some data missing
+    removed_obs <- 
+      # If there is count data missing in site i and obs j
+      missing_y | 
+      # If there is any site covariate missing in site i
+      replicate(n = J, missing_sc) |
+      # If there is any observation covariate missing in site i and obs j
+      matrix(missing_oc, M, J, byrow = T)
+    
+    if (any(removed_obs)) {
+      if (na.rm) {
+        nb_missing_sites <- sum(apply(removed_obs, 1, function(x) all(is.na(x))))
+        nb_missing_observations <- sum(is.na(removed_obs))
+        warning("There are missing data (count data, site covariate, observation covariate):\n\t",
+                "Only data from ", M-nb_missing_sites, " sites out of ", M, " are used.\n\t",
+                "Only data from ", (M*J)-nb_missing_sites, " observations out of ", (M*J), " are used.")
+      } else {
+        stop("There is missing data:\n\t",
+             sum(missing_y), " missing count data (y)\n\t",
+             sum(missing_sc), " missing site covariates (siteCovs)\n\t",
+             sum(missing_oc), " missing observation covariates (obsCovs)")
+      }
+    }
+    
+    # Output -------------------------------------------------------------------
+    return(list(
+        y = y,
+        Xpsi = Xpsi,
+        Zpsi = Zpsi,
+        Xlambda = Xlambda,
+        Zlambda = Zlambda,
+        removed_obs = removed_obs
+    ))
+  })
+
+
+## show method ----
+setMethod("show", "unmarkedFrameCOP", function(object) {
+  J <- ncol(object@obsLength)
+  df_unmarkedFrame <- as(object, "data.frame")
+  df_obsLength <- data.frame(object@obsLength)
+  colnames(df_obsLength) <- paste0("obsLength.", 1:J)
+  if (ncol(df_unmarkedFrame) > J) {
+    df <-
+      cbind(df_unmarkedFrame[, 1:J], df_obsLength, df_unmarkedFrame[, (J + 1):ncol(df_unmarkedFrame)])
+  } else{
+    df <-
+      cbind(df_unmarkedFrame[, 1:J], df_obsLength)
+  }
+  cat("Data frame representation of unmarkedFrame object.\n")
+  print(df)
+})
+# LP note: as is defined in unmarkedFrame.R part "COERCION"
+
+
+## summary method ----
+setMethod("summary", "unmarkedFrameCOP", function(object,...) {
+  cat("unmarkedFrameCOP Object\n\n")
+  
+  cat(nrow(object@y), "sites\n")
+  cat("Maximum number of sampling occasions per site:",obsNum(object),"\n")
+  mean.obs <- mean(rowSums(!is.na(getY(object))))
+  cat("Mean number of sampling occasions per site:",round(mean.obs,2),"\n")
+  cat("Sites with at least one detection:",
+      sum(apply(getY(object), 1, function(x) any(x > 0, na.rm=TRUE))),
+      "\n\n")
+  
+  cat("Tabulation of y observations:")
+  print(table(object@y, exclude=NULL))
+  
+  if(!is.null(object@obsLength)) {
+    cat("\nTabulation of sampling occasions length:")
+    print(table(object@obsLength))
+  }
+
+  if(!is.null(object@siteCovs)) {
+    cat("\nSite-level covariates:\n")
+    print(summary(object@siteCovs))
+  }
+  
+  if(!is.null(object@obsCovs)) {
+    cat("\nObservation-level covariates:\n")
+    print(summary(object@obsCovs))
+  }
+
+})
+
+## umf[i, j] ----
+setMethod("[", c("unmarkedFrameCOP", "numeric", "numeric", "missing"),
+          function(x, i, j) {
+            # Gey dimensions of x
+            M <- numSites(x)
+            J <- obsNum(x)
+            
+            if (length(i) == 0 & length(j) == 0) {
+              return(x)
+            }
+            
+            # Check i
+            if (any(i < 0) &&
+                any(i > 0)) {
+              stop("i must be all positive or all negative indices.")
+            }
+            if (all(j < 0)) {
+              i <- (1:M)[i]
+            }
+            
+            # Check j
+            if (any(j < 0) &&
+                any(j > 0)) {
+              stop("j must be all positive or all negative indices.")
+            }
+            if (all(j < 0)) {
+              j <- (1:J)[j]
+            }
+            
+            # y observation count data subset
+            y <- getY(x)[i, j]
+            if (min(length(i), length(j)) == 1) {
+              y <- t(y)
+            }
+            
+            # obsLength subset
+            obsLength <- x@obsLength[i, j]
+            if (min(length(i), length(j)) == 1) {
+              obsLength <- t(obsLength)
+            }
+            
+            # siteCovs subset
+            siteCovs <- siteCovs(x)
+            if (!is.null(siteCovs)) {
+              siteCovs <- siteCovs(x)[i, , drop = FALSE]
+            }
+            
+            # obsCovs subset
+            obsCovs <- obsCovs(x)
+            if (!is.null(obsCovs)) {
+              MJ_site <- rep(1:M, each = J)
+              MJ_obs <- rep(1:J, times = M)
+              obsCovs <- obsCovs[((MJ_obs %in% j) & (MJ_site %in% i)), ]
+              rownames(obsCovs) <- NULL
+            }
+            
+            # Recreate umf
+            new(
+              Class = "unmarkedFrameCOP",
+              y = y,
+              obsLength = obsLength,
+              siteCovs = siteCovs,
+              obsCovs = obsCovs,
+              obsToY = diag(length(j)),
+              mapInfo = x@mapInfo
+            )
+          })
+
+
+## umf[i, ] ----
+setMethod("[", c("unmarkedFrameCOP", "numeric", "missing", "missing"),
+          function(x, i) {
+            x[i, 1:obsNum(x)]
+          })
+
+## umf[, j] ----
+setMethod("[", c("unmarkedFrameCOP", "missing", "numeric", "missing"),
+          function(x, j) {
+            x[1:numSites(x), j]
+          })
+
+
+## fl_getY ----
+setMethod("fl_getY", "unmarkedFitCOP", function(fit, ...){
+  getDesign(getData(fit), fit@formlist)$y
+})
+
+# PREDICT METHODS ----
+# Fit type-specific methods to generate different components of prediction
+# 5. predict_by_chunk(): Take inputs and generate predictions
+# Basic methods are shown below; fit type-specific methods in their own sections
+
+## predict_inputs_from_umf ----
+setMethod("predict_inputs_from_umf", "unmarkedFitCOP",
+          function(object, type, newdata, na.rm, re.form = NULL) {
+            designMats = getDesign(umf = newdata,
+                                   formlist = object@formlist,
+                                   na.rm = na.rm)
+            if (type == "psi") list_els <- c("Xpsi", "Zpsi")
+            if (type == "lambda") list_els <- c("Xlambda", "Zlambda")
+            X <- designMats[[list_els[1]]]
+            if (is.null(re.form)) X <- cbind(X, designMats[[list_els[2]]])
+            return(list(X = X, offset = NULL))
+          })
+
+## get_formula ----
+setMethod("get_formula", "unmarkedFitCOP", function(object, type, ...) {
+  fl <- object@formlist
+  switch(type, psi = fl$psiformula, lambda = fl$lambdaformula)
+})
+
+## get_orig_data ----
+setMethod("get_orig_data", "unmarkedFitCOP", function(object, type, ...){
+  clean_covs <- clean_up_covs(object@data, drop_final=FALSE)
+  datatype <- switch(type, psi = 'site_covs', lambda = 'obs_covs')
+  clean_covs[[datatype]]
+})
+
+
+
+# Useful functions -------------------------------------------------------------
+
+replaceinf = function(x) {
+  max(.Machine$double.xmin, min(.Machine$double.xmax, x))
+}
+
+check.integer <- function(x, na.ignore = F) {
+  if (is.na(x) & na.ignore) {
+    return(T)
+  }
+  x %% 1 == 0
+}
+
+# unmarkedFrame ----------------------------------------------------------------
+
+unmarkedFrameCOP <- function(y, obsLength, siteCovs = NULL, obsCovs = NULL, mapInfo = NULL) {
+  
+  # Verification that they are non-NA data in y
+  if (all(is.na(y))) {
+    stop("y only contains NA. y needs to contain non-NA integers.")
+  }
+  
+  # Verification that these are count data (and not detection/non-detection)
+  if (max(y, na.rm = T) == 1) {
+    warning("unmarkedFrameCOP is for count data. ",
+            "The data furnished appear to be detection/non-detection.")
+  }
+  
+  # Number of sampling occasions
+  J <- ncol(y)
+  
+  # If missing obsLength: replace by matrix of 1
+  # and the lambda will be the detection rate per observation length
+  if (missing(obsLength)) {
+    obsLength <- y * 0 + 1
+    warning("obsLength is missing, replacing it by a matrix of 1.")
+  } else if (is.null(obsLength)) {
+    obsLength <- y * 0 + 1
+    warning("obsLength is missing, replacing it by a matrix of 1.")
+  }
+  
+  # Transformation observation covariates
+  obsCovs <- covsToDF(
+    covs = obsCovs,
+    name = "obsCovs",
+    obsNum = J,
+    numSites = nrow(y)
+  )
+  
+  # Create S4 object of class unmarkedFrameCOP
+  umf <- new(
+    Class = "unmarkedFrameCOP",
+    y = y,
+    obsLength = obsLength,
+    siteCovs = siteCovs,
+    obsCovs = obsCovs,
+    obsToY = diag(J),
+    mapInfo = mapInfo
+  )
+  
+  return(umf)
+}
+
+
+# occuCOP ----------------------------------------------------------------------
+
+occuCOP <- function(data,
+                    psiformula = ~1,
+                    lambdaformula = ~1,
+                    psistarts = rep(0, length(attr(terms(psiformula), "term.labels")) + 1),
+                    lambdastarts = rep(0, length(attr(terms(lambdaformula), "term.labels")) + 1),
+                    method = "BFGS",
+                    se = TRUE,
+                    engine = "R",
+                    threads = 1L,
+                    na.rm = TRUE,
+                    get.NLL.params = NULL,
+                    ...) {
+  #TODO: engines
+  #TODO: NA
+  
+  # Neg loglikelihood COP ------------------------------------------------------
+  nll_COP <- function(params) {
+    
+    # Reading and transforming params
+    # Taking into account the covariates
+    
+    # psi as a function of covariates
+    #   psi in params are transformed with a logit transformation (qlogis)
+    #       so they're back-transformed to a proba with inverse logit (plogis)
+    #   Xpsi is the matrix with occupancy covariates
+    #   params is the vector with all the parameters
+    #   psiIdx is the index of Occupancy Parameters in params
+    psi <- plogis(Xpsi %*% params[psiIdx])
+    
+    # lambda as a function of covariates
+    #   lambda in params are transformed with a log-transformation
+    #          so they're back-transformed to a rate with exp here
+    #   Xlambda is the matrix with detection covariates
+    #   params is the vector with all the parameters
+    #   lambdaIdx is the index of Occupancy Parameters in params
+    lambda <- exp(Xlambda %*% params[lambdaIdx])
+    
+    # Probability for each site (i)
+    iProb <- rep(NA, M)
+    
+    for (i in 1:M) {
+      # Probability for the site i for each sampling occasion (j)
+      ijProb <- rep(NA, J)
+      
+      # Total obs length in site i
+      obsLengthSitei <- sum(data@obsLength[i,])
+      
+      # iIdx is the index to access the vectorised vectors of all obs in site i
+      iIdx <- ((i - 1) * J + 1):(i * J)
+      
+      if (SitesWithDetec[i]) {
+        # If there is at least one detection in site i
+        # factNij = sapply(factorial(yvec[iIdx]),replaceinf)
+        # iProb[i] = psi[i] * sum(lambda[iIdx] * obsLengthvec[iIdx] / factNij * exp(-lambda[iIdx] * obsLengthvec[iIdx]))
+        
+        
+        iProb[i] = psi[i] * (
+          (sum(lambda[iIdx] * obsLengthvec[iIdx])) ^ sum(yvec[iIdx]) / 
+            factorial(sum(yvec[iIdx])) *
+            exp(-sum(lambda[iIdx] * obsLengthvec[iIdx]))
+        )
+        
+      } else {
+        # If there is zero detection in site i
+        # iProb[i] = psi[i] * sum(exp(-lambda[iIdx] * obsLengthvec[iIdx])) + (1 - psi[i])
+        
+        iProb[i] = psi[i] * exp(-sum(lambda[iIdx] * obsLengthvec[iIdx])) + (1 - psi[i]) 
+        
+      }
+      
+    }
+    # Note: Why is there "replaceinf(factorial(data@y[i,]))" 
+    #       instead of just "factorial(data@y[i,])"?
+    # Because if there are a lot of detections (n_ij >= 171 on my machine),
+    #   then factorial(n_ij) = Inf, 
+    #   then 1/factorial(n_ij) = 0, 
+    #   then log(0) = -Inf,
+    #   then loglikelihood = -Inf
+    #   then optim does not work.
+    # 
+    # We want to maximise likelihood, 
+    # (technically, minimise negative loglikelihood).
+    # We don't do anything else with this function.
+    # So as long as we have a tiny value for likelihood, 
+    # (ie a huge value for negative loglikelihood)
+    # it doesn't matter if its an approximation.
+    
+    # log-likelihood
+    ll = sum(log(iProb))
+    return(-ll)
+  }
+  
+  # Check arguments ------------------------------------------------------------
+  if (!is(data, "unmarkedFrameCOP")) {
+    stop("Data is not an unmarkedFrameCOP object. See ?unmarkedFrameCOP if necessary.")
+  }
+  stopifnot(class(psiformula) == "formula")
+  stopifnot(class(lambdaformula) == "formula")
+  stopifnot(class(psistarts) %in%  c("numeric", "double", "integer"))
+  stopifnot(class(lambdastarts) %in%  c("numeric", "double", "integer"))
+  stopifnot(class(threads) %in%  c("numeric", "double", "integer"))
+  se = as.logical(match.arg(
+    arg = as.character(se),
+    choices = c("TRUE", "FALSE", "0", "1")
+  ))
+  na.rm = as.logical(match.arg(
+    arg = as.character(na.rm),
+    choices = c("TRUE", "FALSE", "0", "1")
+  ))
+  engine <- match.arg(engine, c("R"))
+  
+  # Format input data ----------------------------------------------------------
+  
+  # Retrieve formlist
+  formlist <- mget(c("psiformula", "lambdaformula"))
+  
+  # Get the design matrix (calling the getDesign method for unmarkedFrame)
+  # For more informations, see: getMethod("getDesign", "unmarkedFrameCOP")
+  designMats <- getDesign(umf = data, formlist = formlist, na.rm = na.rm)
+  
+  # y is the count detection data (matrix of size M sites x J sessions)
+  y <- getY(data)
+  
+  # obsLength is the length of sessions (matrix of size M sites x J sessions)
+  obsLength = data@obsLength
+  
+  # Xpsi is the matrix with the occupancy covariates
+  Xpsi <- designMats$Xpsi
+  
+  # Xlambda is the matrix with the detection covariates
+  Xlambda <- designMats$Xlambda
+  
+  # Zpsi is ???
+  Zpsi <- designMats$Zpsi
+  
+  # Zlambda is ???
+  Zlambda <- designMats$Zlambda
+  
+  # removed_obs is a M x J matrix of the observations removed from the analysis
+  removed_obs <- designMats$removed_obs
+  sitesRemoved <- unname(which(apply(removed_obs, 1, function(x) all(x))))
+  
+  # Number of sites
+  M <- nrow(y)
+  
+  # Number of sampling occasions
+  J <- ncol(y)
+  
+  # Total number of detection per site
+  NbDetecPerSite = rowSums(y)
+  
+  # Sites where there was at least one detection
+  SitesWithDetec = NbDetecPerSite > 0
+  
+  # Number of sites where there was at least one detection
+  NbSitesWithDetec = sum(SitesWithDetec)
+  
+  # Set up parameter names and indices-----------------------------------------
+  
+  # ParamPsi Occupancy parameter names
+  ParamPsi <- colnames(Xpsi)
+  
+  # ParamLambda Detection parameter names
+  ParamLambda <- colnames(Xlambda)
+  
+  # NbParamPsi Number of occupancy parameters
+  NbParamPsi <- ncol(Xpsi)
+  
+  # NbParamLambda Number of detection parameters
+  NbParamLambda <- ncol(Xlambda)
+  
+  # nP Total number of parameters
+  nP <- NbParamPsi + NbParamLambda
+  
+  # psiIdx Index of the occupancy parameters
+  psiIdx <- 1:NbParamPsi
+  
+  # lambdaIdx Index of the detection parameters
+  lambdaIdx <- (NbParamPsi+1):nP
+  
+  # Re-format some variables for C and R engines
+  #   From Matrix of dim MxJ to vector of length MxJ:
+  #   c(ySite1Obs1, ySite1Obs2, ..., ySite1ObsJ, ysite2Obs1, ...)
+  yvec <- as.numeric(t(y))
+  obsLengthvec <- as.numeric(t(obsLength))
+  
+  # get.NLL.params -------------------------------------------------------------
+  if (!is.null(get.NLL.params)) {
+    df_NLL = data.frame(t(as.data.frame(get.NLL.params)))
+    rownames(df_NLL) = NULL
+    colnames(df_NLL) = c(paste0("logit(psi).", ParamPsi),
+                         paste0("log(lambda).", ParamLambda))
+    df_NLL$nll = NA
+    for (i in 1:nrow(df_NLL)) {
+      df_NLL$nll[i] = nll_COP(params = as.numeric(as.vector(df_NLL[i, -ncol(df_NLL)])))
+    }
+    return(df_NLL)
+  }
+  
+  # Optimisation ---------------------------------------------------------------
+  
+  ## Checking the starting point for optim
+  if (missing(lambdastarts)) {
+    # If lambda starts argument was not given:
+    # 0 by default 
+    # so lambda = exp(0) = 1 by default
+    lambdastarts = rep(0, NbParamLambda)
+    message(
+      "No lambda initial values provided for optim. Using lambdastarts = c(",
+      paste(lambdastarts, collapse = ", "),
+      "), equivalent to detection rate",
+      ifelse(length(lambdastarts) == 1, "", "s"),
+      " of 1."
+    )
+  } else if (length(lambdastarts) != NbParamLambda) {
+    stop("lambdastarts (", paste(lambdastarts, collapse = ", "), ") ",
+         "should be of length ", NbParamLambda, " with lambdaformula ", lambdaformula)
+  }
+  
+  if (missing(psistarts)) {
+    # If psi starts argument was not given
+    # 0 by default 
+    # so psi = plogis(0) = 0.5 by default
+    psistarts = rep(0, NbParamPsi)
+    message(
+      "No psi initial values provided for optim. Using psistarts = c(",
+      paste(psistarts, collapse = ", "),
+      "), equivalent to occupancy probabilit",
+      ifelse(length(psistarts) == 1, "y", "ies"),
+      " of 0.5."
+    )
+  } else if (length(psistarts) != NbParamPsi) {
+    stop("psistarts (", paste(psistarts, collapse = ", "), ") ",
+         "should be of length ", NbParamPsi, " with psiformula ", psiformula)
+  }
+
+  starts <- c(psistarts, lambdastarts)
+  
+  ## Run optim
+  opt <- optim(
+    starts,
+    nll_COP,
+    method = method,
+    hessian = se,
+    ...
+  )
+  
+  # Get output -----------------------------------------------------------------
+  covMat <- invertHessian(opt, nP, se)
+  ests <- opt$par
+  tmb_mod <- NULL
+  nll <- opt$value
+  fmAIC <- 2 * opt$value + 2 * nP
+  
+  # Organize effect estimates
+  names(ests) <- c(ParamPsi, ParamLambda)
+  psi_coef <- list(ests = ests[psiIdx], cov = as.matrix(covMat[psiIdx, psiIdx]))
+  lambda_coef <- list(ests = ests[lambdaIdx],
+                      cov = as.matrix(covMat[lambdaIdx, lambdaIdx]))
+  
+  # No random effects
+  psi_rand_info <- lambda_rand_info <- list()
+  
+  # Create unmarkedEstimates ---------------------------------------------------
+  psi_uE <- unmarkedEstimate(
+    name = "Occupancy probability",
+    short.name = "psi",
+    estimates = psi_coef$ests,
+    covMat = psi_coef$cov,
+    fixed = 1:NbParamPsi,
+    invlink = "logistic",
+    invlinkGrad = "logistic.grad",
+    randomVarInfo = psi_rand_info
+  )
+  
+  lambda_uE <- unmarkedEstimate(
+    name = "Detection rate",
+    short.name = "lambda",
+    estimates = lambda_coef$ests,
+    covMat = lambda_coef$cov,
+    fixed = 1:NbParamLambda,
+    invlink = "exp",
+    invlinkGrad = "exp",
+    randomVarInfo = lambda_rand_info
+  )
+  
+  estimateList <- unmarkedEstimateList(list(psi = psi_uE, lambda = lambda_uE))
+  
+  # Create unmarkedFit object--------------------------------------------------
+  umfit <- new(
+    "unmarkedFitCOP",
+    fitType = "occuCOP",
+    call = match.call(),
+    # formula = as.formula(paste(formlist, collapse = "")),
+    formula = as.formula(paste(
+      formlist["lambdaformula"], formlist["psiformula"], collapse = ""
+    )),
+    formlist = formlist,
+    data = data,
+    estimates = estimateList,
+    sitesRemoved = sitesRemoved,
+    removed_obs = removed_obs,
+    AIC = fmAIC,
+    opt = opt,
+    negLogLike = opt$value,
+    nllFun = nll_COP,
+    nll = nll,
+    convergence = opt$convergence,
+    TMB = tmb_mod
+  )
+  
+  return(umfit)
+}
+
diff --git a/test_occuCOP.R b/test_occuCOP.R
new file mode 100644
index 0000000..45fb0e0
--- /dev/null
+++ b/test_occuCOP.R
@@ -0,0 +1,369 @@
+context("occuCOP fitting function")
+skip_on_cran()
+
+COPsimul <- function(psi = 0.5,
+                         lambda = 1,
+                         M = 100,
+                         J = 5) {
+  
+  z_i <- sample(
+    x = c(0, 1),
+    size = M,
+    prob = c(1 - psi, psi),
+    replace = T
+  )
+  
+  y = matrix(rpois(n = M * J, lambda = lambda), nrow = M, ncol = J) * z_i
+  
+  return(y)
+}
+
+
+# COPsimulfit <- function(psi = 0.5,
+#                         lambda = 1,
+#                         M = 100,
+#                         J = 5,
+#                         obsLength = NULL,
+#                         ...) {
+#   occuCOP(data = unmarkedFrameCOP(
+#     y = simulDataCOP(
+#       psi = psi,
+#       lambda = lambda,
+#       M = M,
+#       J = J
+#     ),
+#     obsLength = obsLength
+#   ), ...)
+# }
+
+test_that("unmarkedFrameCOP is constructed correctly", {
+  set.seed(123)
+  
+  # Simulate data
+  M = 100
+  J = 5
+  y = COPsimul(psi = .5,
+                   lambda = 1,
+                   M = M,
+                   J = J)
+  obsLength = y * 0 + 1
+  
+  psiCovs = data.frame(
+    "psiNum" = rnorm(M),
+    "psiInt" = as.integer(rpois(n = M, lambda = 5)),
+    "psiBool" = sample(c(T, F), size = M, replace = T),
+    "psiChar" = sample(letters[1:5], size = M, replace = T),
+    "psiFactUnord" = factor(sample(
+      letters[5:10], size = M, replace = T
+    )),
+    "psiFactOrd" = sample(factor(c("A", "B", "C"), ordered = T), size =
+                            M, replace = T)
+  )
+  
+  lambdaCovs = list(
+    "lambdaNum" = matrix(
+      rnorm(M * J), 
+      nrow = M, ncol = J
+    ),
+    "lambdaInt" = matrix(
+      as.integer(rpois(n = M * J, lambda = 1e5)),
+      nrow = M, ncol = J
+    ),
+    "lambdaBool" = matrix(
+      sample(c(T, F), size = M * J, replace = T),
+      nrow = M, ncol = J
+    ),
+    "lambdaChar" = matrix(
+      sample(letters[1:5], size = M * J, replace = T),
+      nrow = M, ncol = J
+    ),
+    "lambdaFactUnord" = matrix(
+      factor(sample(letters[5:10], size = M * J, replace = T)),
+      nrow = M, ncol = J
+    ),
+    "lambdaFactOrd" = matrix(
+      sample(factor(c("A", "B", "C"), ordered = T), size = M * J, replace = T),
+      nrow = M, ncol = J
+    )
+  )
+
+  
+  # Creating a unmarkedFrameCOP object
+  expect_warning(umf <- unmarkedFrameCOP(y = y))
+  expect_no_error(umf <- unmarkedFrameCOP(y = y, obsLength = obsLength))
+  
+  
+  # Create subsets
+  expect_no_error(umf_sub_i <- umf[1:3, ])
+  expect_no_error(umf_sub_j <- umf[, 1:2])
+  expect_no_error(umf_sub_ij <- umf[1:3, 1:2])
+  
+  # unmarkedFrameCOP organisation ----------------------------------------------
+  expect_true(inherits(umf, "unmarkedFrameCOP"))
+  expect_equivalent(numSites(umf_sub_i), 3)
+  expect_equivalent(obsNum(umf_sub_j), 2)
+  expect_equivalent(numSites(umf_sub_ij), 3)
+  expect_equivalent(obsNum(umf_sub_ij), 2)
+  
+  # unmarkedFrameCOP display ---------------------------------------------------
+  
+  # print method
+  expect_no_error(print(umf))
+  expect_no_warning(print(umf))
+  expect_no_message(print(umf))
+  expect_no_error(print(umf_sub_i))
+  expect_no_error(print(umf_sub_j))
+  expect_no_error(print(umf_sub_ij))
+  expect_no_error(print(umf[1,]))
+  expect_no_error(print(umf[,1]))
+  expect_no_error(print(umf[1,1]))
+  
+  # summary method for unmarkedFrameCOP
+  expect_no_error(summary(umf))
+  expect_no_error(summary(umf_sub_ij))
+  
+  # plot method for unmarkedFrameCOP
+  expect_no_error(plot(umf))
+  expect_no_error(plot(umf_sub_ij))
+  
+  
+  # Input handling: covariates -------------------------------------------------
+  expect_no_error(umfCovs <- unmarkedFrameCOP(
+    y = y,
+    obsLength = obsLength,
+    siteCovs = psiCovs,
+    obsCovs = lambdaCovs
+  ))
+  expect_no_error(print(umfCovs))
+  expect_no_error(summary(umfCovs))
+  expect_no_error(plot(umfCovs))
+  
+  # Input handling: NA ---------------------------------------------------------
+  
+  # NA should not pose problems when creating the unmarkedFrameCOP object.
+  # The warnings and potential errors will be displayed when fitting the model.
+  # Except when y only contains NA: then there's an error.
+  
+  ## NA in y
+  yNA <- y
+  yNA[1:2,] <- NA
+  yNA[3:5, 3:4] <- NA
+  yNA[,3] <- NA
+  expect_no_error(umfNA <- unmarkedFrameCOP(y = yNA, obsLength = obsLength))
+  expect_no_error(print(umfNA))
+  expect_no_error(summary(umfNA))
+  expect_no_error(plot(umfNA))
+  
+  ## NA in obsLength
+  obsLengthNA <- obsLength
+  obsLengthNA[1:2,] <- NA
+  obsLengthNA[3:5, 3:4] <- NA
+  obsLengthNA[,5] <- NA
+  expect_no_error(umfNA <- unmarkedFrameCOP(y = y, obsLength = obsLengthNA))
+  expect_no_error(print(umfNA))
+  expect_no_error(summary(umfNA))
+  expect_no_error(plot(umfNA))
+  
+  ## NA also in covariates
+  psiCovsNA <- psiCovs
+  psiCovsNA[1:5,] <- NA
+  psiCovsNA[c(8,10,12), 3] <- NA
+  psiCovsNA[,1] <- NA
+  lambdaCovsNA <- lambdaCovs
+  lambdaCovsNA[[1]][1:5,] <- NA
+  lambdaCovsNA[[1]][,3] <- NA
+  lambdaCovsNA[[3]][,5] <- NA
+  expect_no_error(umfCovsNA <- unmarkedFrameCOP(
+    y = yNA,
+    obsLength = obsLengthNA,
+    siteCovs = psiCovsNA,
+    obsCovs = lambdaCovsNA
+  ))
+  expect_no_error(print(umfCovsNA))
+  expect_no_error(summary(umfCovsNA))
+  expect_no_error(plot(umfCovsNA))
+  
+  ## all NA in y
+  yallNA <- y
+  yallNA[1:M, 1:J] <- NA
+  expect_error(unmarkedFrameCOP(y = yallNA, obsLength = obsLength))
+  
+  # Input handling: error and warnings -----------------------------------------
+  # Error when there are decimals in y
+  y_with_decimals = y
+  y_with_decimals[1, 1] = .5
+  expect_error(unmarkedFrameCOP(y = y_with_decimals, obsLength = obsLength))
+  
+  # Warning if y is detection/non-detection instead of count
+  y_detec_nodetec = (y > 0) * 1
+  expect_warning(unmarkedFrameCOP(y = y_detec_nodetec, obsLength = obsLength))
+  
+  # Error if the dimension of obsLength is different than that of y
+  expect_error(unmarkedFrameCOP(y = y, obsLength = obsLength[1:2, 1:2]))
+})
+
+
+test_that("occuCOP can fit simple models", {
+  # Simulate data
+  set.seed(123)
+  M = 100
+  J = 5
+  y = COPsimul(psi = .5,
+                   lambda = 1,
+                   M = M,
+                   J = J)
+  obsLength = y * 0 + 1
+  
+  # Create umf
+  umf <- unmarkedFrameCOP(y = y, obsLength = obsLength)
+  
+  # Fitting options ----
+  
+  # With default parameters
+  expect_no_error(fit_default <- occuCOP(data = umf))
+  expect_no_error(occuCOP(data = umf, psiformula = ~ 1, lambdaformula = ~ 1))
+  
+  # With chosen starting points
+  expect_no_error(occuCOP(data = umf, 
+                          psiformula = ~ 1, lambdaformula = ~ 1, 
+                          psistarts = qlogis(.7),
+                          lambdastarts = log(0.1)))
+  expect_error(occuCOP(data = umf, 
+                       psiformula = ~ 1, lambdaformula = ~ 1, 
+                       psistarts = qlogis(c(0.7, 0.5))))
+  expect_error(occuCOP(data = umf, 
+                       psiformula = ~ 1, lambdaformula = ~ 1, 
+                       lambdastarts = log(c(1, 2))))
+  
+  # With different options
+  expect_no_error(occuCOP(data = umf, method = "Nelder-Mead"))
+  expect_error(occuCOP(data = umf, method = "ABC"))
+  
+  expect_no_error(occuCOP(data = umf, se = F))
+  expect_error(occuCOP(data = umf, se = "ABC"))
+  
+  expect_no_error(occuCOP(data = umf, engine = "R"))
+  expect_error(occuCOP(data = umf, engine = "julia"))
+  
+  expect_no_error(occuCOP(data = umf, na.rm = F))
+  expect_error(occuCOP(data = umf, na.rm = "no"))
+  
+  # Looking at at COP model outputs ----
+  expect_is(fit_default, "unmarkedFitCOP")
+  
+  ## backTransform
+  expect_no_error(backTransform(fit_default, type = "psi"))
+  expect_no_error(backTransform(fit_default, type = "lambda"))
+  expect_error(backTransform(fit_default, type = "state"))
+  expect_error(backTransform(fit_default, type = "det"))
+  expect_is(backTransform(fit_default, type = "psi"), "unmarkedBackTrans")
+  
+  ## predict with newdata = fit@data
+  expect_no_error(predict(object = fit_default, type = "psi"))
+  expect_no_error(predict(object = fit_default, type = "lambda"))
+  expect_error(predict(object = fit_default, type = "state"))
+  
+  ## predict with newdata = 1
+  expect_no_error(predict(
+    object = fit_default,
+    newdata = data.frame(1),
+    type = "psi"
+  ))
+  expect_no_error(predict(
+    object = fit_default,
+    newdata = data.frame(1),
+    type = "lambda"
+  ))
+  expect_no_error(predict(
+    object = fit_default,
+    newdata = data.frame("a"=1:5,"b"=10:14),
+    type = "psi"
+  ))
+  
+  # Fitting accurately ----
+  
+  ## psi = 0.50, lambda = 1 ----
+  psi_test = .5
+  lambda_test = 1
+  fit_accur <- occuCOP(data = unmarkedFrameCOP(
+    y = COPsimul(
+      psi = psi_test,
+      lambda = lambda_test,
+      M = 1000,
+      J = 10
+    ),
+    obsLength = matrix(1, nrow = 1000, ncol = 10)
+  ))
+  psi_estimate = backTransform(fit_accur, type = "psi")@estimate
+  lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate
+  expect_equivalent(
+    psi_estimate, 
+    psi_test, 
+    tol = 0.05
+  )
+  expect_equivalent(
+    lambda_estimate, 
+    lambda_test, 
+    tol = 0.05
+  )
+  
+  ## psi = 0.25, lambda = 5 ----
+  psi_test = 0.25
+  lambda_test = 5
+  fit_accur <- occuCOP(data = unmarkedFrameCOP(
+    y = COPsimul(
+      psi = psi_test,
+      lambda = lambda_test,
+      M = 1000,
+      J = 10
+    ),
+    obsLength = matrix(1, nrow = 1000, ncol = 10)
+  ))
+  psi_estimate = backTransform(fit_accur, type = "psi")@estimate
+  lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate
+  expect_equivalent(
+    psi_estimate, 
+    psi_test, 
+    tol = 0.05
+  )
+  expect_equivalent(
+    lambda_estimate, 
+    lambda_test, 
+    tol = 0.05
+  )
+  
+  ## psi = 0.75, lambda = 0.5 ----
+  psi_test = 0.75
+  lambda_test = 0.5
+  fit_accur <- occuCOP(data = unmarkedFrameCOP(
+    y = COPsimul(
+      psi = psi_test,
+      lambda = lambda_test,
+      M = 1000,
+      J = 10
+    ),
+    obsLength = matrix(1, nrow = 1000, ncol = 10)
+  ))
+  psi_estimate = backTransform(fit_accur, type = "psi")@estimate
+  lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate
+  expect_equivalent(
+    psi_estimate, 
+    psi_test, 
+    tol = 0.05
+  )
+  expect_equivalent(
+    lambda_estimate, 
+    lambda_test, 
+    tol = 0.05
+  )
+
+  
+  #TODO
+})
+
+
+test_that("occuCOP can fit models with covariates", {
+  
+})
+
+
diff --git a/tmp_use_COP.Rmd b/tmp_use_COP.Rmd
new file mode 100644
index 0000000..57e9e55
--- /dev/null
+++ b/tmp_use_COP.Rmd
@@ -0,0 +1,584 @@
+---
+title: "occuCOP example"
+author: "Léa Pautrel"
+date: "`r format(Sys.time(), '%d %B %Y')`"
+output:
+  html_document:
+    toc: yes
+    toc_depth: 3
+    number_sections: true
+    toc_float: 
+      collapsed: true
+    code_folding: show
+    theme: cerulean
+editor_options: 
+  chunk_output_type: console
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+  echo = TRUE, # show code of the chunk and chunk output
+  error = TRUE,
+  collapse = FALSE,
+  comment = "", # no comment character for the chunk text outputs
+  out.width = "100%" # responsive width for chunk outputs (figures, ...)
+)
+```
+
+```{r library, echo=T, results="hide"}
+source("./R/occuCOP.R")
+library(ggplot2)
+library(ggrepel)
+library(dplyr)
+library(tibble)
+```
+
+
+
+
+
+# The COP model
+
+Hereafter, I use the notations defined in the following table.
+
+|       Notation | Parameter                                                    |
+| -------------: | :----------------------------------------------------------- |
+|            $I$ | Number of sites                                              |
+|            $J$ | Number of sampling occasions                                 |
+|       $\psi_i$ | Occupancy probability in site $i$                            |
+|          $Z_i$ | Occupancy state of site $i$ (present = 1, absent = 0)        |
+| $\lambda_{ij}$ | Detection rate in site $i$ during sampling occasion $j$      |
+|       $T_{ij}$ | Duration or length of sampling occasion $j$                  |
+|       $N_{i}$  | Number of detections in site $i$                             |
+|       $N_{ij}$ | Number of detections in site $i$ during sampling occasion $j$ |
+
+
+The model is defined as:
+
+$$
+Z_i \sim \text{Bernoulli}(\psi_{i}) \\
+N_{ij} \sim \text{Poisson}(\lambda_{ij} T_{ij})
+$$
+
+The likelihood of this model is:
+
+\begin{align}
+\begin{split}
+  L(\psi_i, \lambda_{is})
+    &= \prod_{i=1}^{I} \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = n_{i}) \\
+    &=  
+        \prod_{i, n_i > 0} \left ( \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = n_i, n_i > 0) \right )
+        \times 
+        \prod_{i, n_i = 0} \left ( \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = 0) \right ) \\
+    &= 
+        \prod_{i, n_i > 0} \left ( 
+            \psi_i \frac{ \left( \sum_{s=1}^S(\lambda_{is} T_{is}) \right) ^ {n_i} }{ n_{i}! } e^{-\sum_{s=1}^S(\lambda_{is} T_{is})}
+        \right )
+        \times 
+        \prod_{i, n_i = 0} \left (
+            \psi_i e^{-\sum_{s=1}^S(\lambda_{is} T_{is})}  + (1-\psi_i)
+        \right )
+\end{split}
+\end{align}
+
+# Simulation
+
+
+```{r init}
+set.seed(0)
+
+M = 100 # Number of sites
+J = 5 # Number of observations (transects, sessions, sampling occasions...)
+```
+
+We first simulate the data set. We'll simulate data in M=`r M` sites during J=`r J` sampling occasions.
+
+We simulate site covariates:
+
+- "elev" will not be used to impact the occupancy probability
+- "habitat" will be used
+
+```{r simul-psi-cov}
+SiteCov <- data.frame(
+  "elev" = pmax(rnorm(n = M, mean = 50, sd = 50), 0),
+  "habitat" = factor(sample(
+    x = c("A", "B", "C"),
+    size = M,
+    replace = T
+  ))
+)
+print(as_tibble(SiteCov))
+```
+
+
+We simulate the occupancy state of all sites depending on the habitat type.
+
+```{r simul-psi-z}
+# Occupancy probability depending on habitat type
+simul_psi_habA = 0.9
+simul_psi_habB = 0.5
+simul_psi_habC = 0.1
+
+# For each site, we simulate occupancy state
+z_i = rep(NA, M)
+for (i in 1:M) {
+  if (SiteCov$habitat[i] == 'A') {
+    z_i[i] <-
+      sample(c(0, 1),
+             size = 1,
+             prob = c(1 - simul_psi_habA, simul_psi_habA))
+    next
+  } else if (SiteCov$habitat[i] == 'B') {
+    z_i[i] <-
+      sample(c(0, 1),
+             size = 1,
+             prob = c(1 - simul_psi_habB, simul_psi_habB))
+    next
+  } else {
+    z_i[i] <-
+      sample(c(0, 1),
+             size = 1,
+             prob = c(1 - simul_psi_habC, simul_psi_habC))
+  }
+}
+
+# We have this data:
+print(as_tibble(data.frame("habitat" = SiteCov$habitat, "z" = z_i)))
+
+# In our data, our occupancy probability per habitat is slightly different 
+# from the one we chose to simulate due to randomness:
+print(
+  data.frame("habitat" = SiteCov$habitat, "z" = z_i) %>% 
+    group_by(habitat) %>% 
+    summarise("NbSites" = n(), "psi" = mean(z))
+)
+```
+
+There are `r sum(z_i)` occupied sites out of `r M` in our simulation.
+
+We simulate temporal covariates:
+
+- "wind" will not impact the detection rate
+- "rain" will impact the detection rate
+
+```{r simul-lambda-cov}
+# Detection rate: 1 detection per day on average,
+simul_lambda = 1
+simul_lambda_rain = .3
+
+# Temporal covariates
+TemporalCov <- list(
+  "rain" = matrix(pmax(rexp(n = M * J, rate = 1 / 10), 0), nrow = M, ncol = J),
+  "wind" = matrix(rnorm(n = M * J, mean = 10), nrow = M, ncol = J)
+)
+print(as_tibble(TemporalCov))
+```
+
+We then simulate the detection rate depending on "rain" as a linear 
+
+```{r simul-lambda}
+lambda_from_rain = function(rain) {
+  # pmin(pmax((3 - .2 * rain), 0), 2)
+  2 / (1 + exp(.2 * (rain - 20)))
+}
+rain_lambda = data.frame("rain" = seq(0, max(TemporalCov$rain), by = c(.1)))
+rain_lambda$lambda = lambda_from_rain(rain_lambda$rain)
+plot(x = rain_lambda$rain,
+     y = rain_lambda$lambda,
+     type = "l")
+
+simul_lambda = lambda_from_rain(TemporalCov$rain)
+```
+
+Finally, for each site, we simulate a number of detections based on the occupancy state $z_i$ of site $i$ and detection rate $lambda_{ij}$ of site $i$ observation $j$.
+
+```{r simul-y}
+y = matrix(
+  rpois(n = M * J, lambda = as.numeric(t(simul_lambda))),
+  nrow = M,
+  ncol = J,
+  byrow = T
+) * z_i
+
+data.frame(
+  "simul_lambda" = as.numeric(t(simul_lambda)),
+  "y" = as.numeric(t(y)),
+  "z" = rep(z_i, each = J)
+) %>%
+  ggplot(aes(x = simul_lambda, y = y)) +
+  geom_point(alpha=.3,shape=16,size=2) +
+  facet_grid(z ~ .,labeller = label_both) +
+  theme_light()
+```
+
+Let's say that each observation lasts one time-unit here, *e.g.* one day per sampling occasion.
+
+```{r simul-obsLength}
+obsLength = y * 0 + 1
+class(obsLength)
+print(as_tibble(obsLength))
+```
+
+
+# unmarkedFrameCOP object
+
+## Creation
+
+We then create our unmarkedFrameCOP object.
+
+```{r umf-creation}
+umf = unmarkedFrameCOP(
+  y = y,
+  obsLength = obsLength,
+  siteCovs = SiteCov,
+  obsCovs = TemporalCov
+)
+```
+
+## Visualisation
+
+```{r umf-visu}
+head(umf)
+summary(umf)
+print(umf[2,4]) # 2nd site, 4th observation
+plot(umf)
+```
+
+## Warning and errors
+
+There is an error if there are decimals in y.
+
+```{r umf-error-decimal}
+y_with_decimals = y
+y_with_decimals[2, 1] = 49.5
+unmarkedFrameCOP(
+  y = y_with_decimals,
+  obsLength = obsLength,
+  siteCovs = SiteCov,
+  obsCovs = TemporalCov
+)
+```
+
+There is a warning if data is detection/non-detection (1/0) instead of count.
+
+```{r umf-warning-01}
+y_detec_nodetec = (y > 0) * 1
+unmarkedFrameCOP(
+  y = y_detec_nodetec,
+  obsLength = obsLength,
+  siteCovs = SiteCov,
+  obsCovs = TemporalCov
+)
+```
+
+There is an error if the dimension of obsLength is different than that of y.
+
+```{r umf-error-obsLength}
+unmarkedFrameCOP(
+  y = y,
+  obsLength = obsLength[1:5, 1:2],
+  siteCovs = SiteCov,
+  obsCovs = TemporalCov
+)
+```
+
+# Model fitting
+
+We fit the model. For more information, see [section How is the model fitted?](#how-is-the-model-fitted)
+
+## Null model
+
+```{r null-fit}
+resCOP_null <- occuCOP(
+  data = umf,
+  psiformula = ~ 1,
+  lambdaformula = ~ 1,
+  method = "Nelder-Mead"
+)
+print(resCOP_null)
+```
+
+We can backtransform the parameters:
+
+```{r null-backtransform}
+# Occcupancy estimate
+plogis(resCOP_null@estimates@estimates$psi@estimates) # plogis(x): "inverse logit"
+backTransform(resCOP_null, type = "psi")
+
+# Detection rate estimate
+exp(resCOP_null@estimates@estimates$lambda@estimates)
+backTransform(resCOP_null, type = "lambda")
+```
+
+## psi ~ elev; lambda ~ rain
+
+```{r cov-fit}
+resCOP_habitat_rain <- occuCOP(
+  data = umf,
+  psiformula = ~ -1 + habitat,
+  lambdaformula = ~ rain,
+  method = "Nelder-Mead"
+)
+print(resCOP_habitat_rain)
+```
+
+A warning tell us that the model did not converge. We can add in parameters for optim in the `occuCOP` function, for example to increase the maximum number of iterations, with `control = list(maxit = 5000)`:
+
+```{r cov-fit-maxit}
+resCOP_habitat_rain <- occuCOP(
+  data = umf,
+  psiformula = ~ -1 + habitat,
+  lambdaformula = ~ rain,
+  method = "Nelder-Mead",
+  control = list(maxit = 5000)
+)
+print(resCOP_habitat_rain)
+```
+
+We can backtransform the parameters. With covariates, we can no longer use the function `backTransform`:
+
+```{r cov-backtransform}
+backTransform(resCOP_habitat_rain, type = "psi")
+```
+
+However, we can use the function `predict`:
+
+```{r cov-predict}
+# Occcupancy estimate
+plogis(resCOP_habitat_rain@estimates@estimates$psi@estimates)
+psipred = predict(object = resCOP_habitat_rain, type = "psi")
+print(as_tibble(cbind(
+  data.frame("habitat" = resCOP_habitat_rain@data@siteCovs$habitat),
+  psipred
+)))
+
+# Detection rate estimate
+exp(resCOP_habitat_rain@estimates@estimates$lambda@estimates)
+lambdapred = predict(object = resCOP_habitat_rain, type = "lambda")
+print(as_tibble(cbind(
+  data.frame("rain" = resCOP_habitat_rain@data@obsCovs$rain),
+  lambdapred
+)))
+```
+
+## Ranking
+
+We can compare several models by their AIC by using `fitList` and `modSel`.
+
+```{r ranking}
+fl <- fitList(fits = list("Null" = resCOP_null,
+                          "psi~elev, lambda~rain" = resCOP_habitat_rain))
+modSel(fl)
+```
+
+## How is the model fitted?
+
+The model is fitted by maximum likelihood estimation (MLE). 
+
+```{r how-is-the-model-fitted, class.source = 'fold-hide'}
+# M = 100
+# J = 10
+
+cpt = 1
+for (simul_psi in c(.1, .25, .5, .75, .9)) {
+  for (simul_lambda in c(1, 3, 5)) {
+    # simul_z = c(rep(1, simul_psi * 100), rep(0, times=((1 - simul_psi) * 100)))
+    simul_z = sample(
+      x = c(0, 1),
+      size = M,
+      replace = T,
+      prob = c(1 - simul_psi, simul_psi)
+    )
+    simul_y <- matrix(rpois(n = M * J, lambda = simul_lambda),
+                      nrow = M,
+                      ncol = J) * simul_z
+    
+    fit = occuCOP(data = unmarkedFrameCOP(y = simul_y,
+                                          obsLength = (simul_y * 0 + 1)))
+    estim_psi = backTransform(fit, "psi")
+    estim_lambda = backTransform(fit, "lambda")
+    
+    nll_df_plot_i = occuCOP(
+      data = unmarkedFrameCOP(y = simul_y, obsLength = (simul_y * 0 + 1)),
+      get.NLL.params =
+        as.list(as.data.frame(t(
+          expand.grid("psi" = qlogis(seq(0.01, 0.99, by = 0.02)),
+                      "lambda" = log(round(
+                        simul_lambda * seq(from = 0.25, to = 1.75, by = .25), 2
+                      )))
+        )))
+    )
+    nll_df_plot_i$simul_psi = simul_psi
+    nll_df_plot_i$simul_lambda = simul_lambda
+    nll_df_plot_i$estim_psi = estim_psi@estimate
+    nll_df_plot_i$estim_lambda = estim_lambda@estimate
+    
+    if (cpt==1) {
+      nll_df_plot = nll_df_plot_i
+    } else{
+      nll_df_plot = rbind(nll_df_plot, nll_df_plot_i)
+    }
+    # cat('\r',cpt,"/ 15")
+    cpt = cpt + 1
+    rm(nll_df_plot_i)
+  }
+}
+
+nll_df_plot$psi = plogis(nll_df_plot[, "logit(psi).(Intercept)"])
+nll_df_plot$lambda = exp(nll_df_plot[, "log(lambda).(Intercept)"])
+nll_df_plot$simulated_lambda = (round(nll_df_plot$lambda, 2) == round(nll_df_plot$simul_lambda, 2))
+nll_df_plot$lambda_txt = format(nll_df_plot$lambda, digits = 2, nsmall = 2)
+nll_df_plot$label = ifelse(nll_df_plot$psi == max(nll_df_plot$psi),
+                           paste0("λ=", nll_df_plot$lambda_txt),
+                           NA)
+
+df_estimates = nll_df_plot %>%
+  group_by(simul_psi, simul_lambda) %>%
+  summarise(
+    estim_psi = unique(estim_psi),
+    estim_lambda = unique(estim_lambda),
+    maximum_llh = max(-nll),
+    minimum_llh = min(-nll),
+    .groups = "drop"
+  )
+
+ggplot() +
+  geom_line(
+    data = nll_df_plot,
+    aes(
+      x = psi,
+      y = -nll,
+      colour = lambda_txt,
+      linewidth = simulated_lambda,
+      linetype = simulated_lambda
+    )
+  ) +
+  ggh4x::facet_grid2(
+    simul_psi ~ simul_lambda,
+    labeller = label_both,
+    scales = "free_y",
+    independent = "y"
+  ) +
+  labs(
+    title = "Negative log-likelihood of the COP model for different values of ψ and λ",
+    x = "ψ",
+    y = "Log-likelihood",
+    colour = "lambda"
+  ) +
+  scale_linewidth_manual(values = c("TRUE" = 1, "FALSE" = .3)) +
+  scale_linetype_manual(values = c("TRUE" = "solid", "FALSE" = "dashed")) +
+  theme_light()+
+  geom_vline(data = df_estimates,
+             aes(xintercept = estim_psi),
+             linetype = "dotted") +
+  geom_label(data = df_estimates, aes(
+    x = estim_psi,
+    y = minimum_llh + abs(minimum_llh * .1),
+    label = paste0("λ: ", round(estim_lambda, 2))
+  )) +
+  geom_hline(data = df_estimates,
+             aes(yintercept = maximum_llh),
+             linetype = "dotted")+
+    geom_label(data = df_estimates, aes(
+    x = -0.05,
+    y = maximum_llh-25,
+    label = paste0("ψ: ", round(estim_psi, 2))
+  )) +
+  xlim(-.1, 1.1) +
+  theme(legend.position = "none")
+
+```
+
+# Interpret the results
+
+
+
+# Other models examples
+
+## occu
+
+```{r occu}
+resOccuNull = occu(
+  formula = ~ 1 ~ 1,
+  data = unmarkedFrameOccu(
+    y = y,
+    siteCovs = SiteCov,
+    obsCovs = TemporalCov
+  )
+)
+resOccuNull
+backTransform(resOccuNull,"state")
+backTransform(resOccuNull,"det")
+
+resOccu = occu(
+  formula = ~ rain ~ elev,
+  data = unmarkedFrameOccu(
+    y = y,
+    siteCovs = SiteCov,
+    obsCovs = TemporalCov
+  )
+)
+resOccu
+cbind(SiteCov$elev, predict(resOccu, "state"))
+cbind(as.numeric(t(TemporalCov$rain)), predict(resOccu, "det"))
+
+```
+
+## pcount
+
+```{r pcount}
+respcountNull = pcount(
+  formula = ~ 1 ~ 1,
+  data = unmarkedFramePCount(
+    y = y,
+    siteCovs = SiteCov,
+    obsCovs = TemporalCov
+  )
+)
+respcountNull
+backTransform(respcountNull,"state")
+backTransform(respcountNull,"det")
+
+respcount = pcount(
+  formula = ~ rain ~ elev,
+  data = unmarkedFramePCount(
+    y = y,
+    siteCovs = SiteCov,
+    obsCovs = TemporalCov
+  )
+)
+respcount
+cbind(SiteCov$elev, predict(respcount, "state"))
+cbind(as.numeric(t(TemporalCov$rain)), predict(respcount, "det"))
+```
+
+# --
+ 
+```{r dev}
+# psiformula <-  ~ 1
+# lambdaformula <- ~ 1
+# psistarts = rep(0, length(attr(terms(psiformula), "term.labels")) + 1)
+# lambdastarts = rep(0, length(attr(terms(lambdaformula), "term.labels")) + 1)
+
+data <- umf
+psiformula <- ~ habitat
+lambdaformula <- ~ rain
+
+
+method = "BFGS"
+se = TRUE
+engine = "R"
+threads = 1L
+na.rm = TRUE
+
+maxit = 1000
+
+object = occuCOP(
+  data = data,
+  psiformula = psiformula,
+  lambdaformula = lambdaformula
+)
+
+```
+ 
+ 
+\ No newline at end of file
author	lpautrel <lea.pautrel@terroiko.fr>	2023-11-28 12:33:03 +0100
committer	lpautrel <lea.pautrel@terroiko.fr>	2023-11-28 12:33:03 +0100
commit	92d8b8fa68eb7c1ed7c1f92e139cd401c8e4671b (patch)
tree	5456815ef1ee92c94f1576d41d36897ad7fd1e57
parent	436977a45966b76d1290ef627e71797485803697 (diff)