Add unmarkedFitCOP methods and predict methods for COP

author: lpautrel <lea.pautrel@terroiko.fr> 2023-12-01 14:38:20 +0100
committer: lpautrel <lea.pautrel@terroiko.fr> 2023-12-01 14:38:20 +0100
commit: 8913f59a50152651cf3bc5ab063e567cf185525b (patch)
tree: 43241ea5cba097d1dae5899267da42f1447e9fe8
parent: e5564064b715d149c450d0f56128a1197827b835 (diff)
3 files changed, 716 insertions, 267 deletions
diff --git a/R/occuCOP.R b/R/occuCOP.R
index 9a8444a..a16703d 100644
--- a/R/occuCOP.R
+++ b/R/occuCOP.R
@@ -8,6 +8,8 @@ source("R/unmarkedFitList.R")
 source("R/predict.R")
 source("R/getDesign.R")
 source("R/mixedModelTools.R")
+source("R/simulate.R")
+source("R/power.R")
 # sapply(paste0("./R/", list.files("./R/")), source)
 
 library(lattice)
@@ -39,7 +41,7 @@ library(testthat)
 ## unmarkedFrameCOP class ----
 setClass(
   "unmarkedFrameCOP",
-  representation(obsLength = "matrix"),
+  representation(L = "matrix"),
   contains = "unmarkedFrame",
   validity = function(object) {
     errors <- character(0)
@@ -53,9 +55,9 @@ setClass(
                     paste(object@y[which(!y_integers)], collapse = ', ')
                   ))
     }
-    if (!all(all(dim(object@obsLength) == dim(object@y)))){
+    if (!all(all(dim(object@L) == dim(object@y)))){
       errors <- c( errors, paste(
-          "obsLength should be a matrix of the same dimension as y, with M =", M,
+          "L should be a matrix of the same dimension as y, with M =", M,
           "rows (sites) and J =", J, "columns (sampling occasions)."
     ))}
     if (length(errors) == 0) TRUE
@@ -72,7 +74,7 @@ setClass("unmarkedFitCOP",
          contains = "unmarkedFit")
 
 
-# METHODS ----------------------------------------------------------------------
+# UNMARKED FRAME METHODS -------------------------------------------------------
 
 ## getDesign method ----
 # Example for occu: https://github.com/rbchan/unmarked/blob/536e32ad7b2526f8fac1b315ed2e99accac0d50f/R/getDesign.R#L10-L97
@@ -81,12 +83,20 @@ setMethod(
   "getDesign", "unmarkedFrameCOP",
   function(umf, formlist, na.rm = TRUE) {
     
+    "
+    getDesign convert the information in the unmarkedFrame to a format
+    usable by the likelihood function:
+    - It creates model design matrices for fixed effects (X) for each parameter (here lambda, psi) 
+    - It creates model design matrices for random effects (Z) for each parameter (here lambda, psi)
+    - It handles missing data
+    "
+    
     # Retrieve useful informations from umf
     M <- numSites(umf)
     J <- obsNum(umf)
     y <- getY(umf)
-    obsLength <- umf@obsLength
-
+    L <- getL(umf)
+    
     # Occupancy submodel -------------------------------------------------------
 
     # Retrieve the fixed-effects part of the formula
@@ -102,18 +112,19 @@ setMethod(
     # Check for missing variables
     psiMissingVars <- psiVars[!(psiVars %in% names(sc))]
     if (length(psiMissingVars) > 0) {
-      stop(paste(
-        "Variable(s)",
-        paste(psiMissingVars, collapse = ", "),
-        "not found in siteCovs"
+      stop(paste0(
+        "Variable(s) '",
+        paste(psiMissingVars, collapse = "', '"),
+        "' not found in siteCovs"
       ), call. = FALSE)
     }
 
-    # State model matrix
+    # State model matrix for fixed effects
     Xpsi <- model.matrix(
       psiformula,
       model.frame(psiformula, sc, na.action = NULL)
     )
+    # State model matrix for random effects
     Zpsi <- get_Z(formlist$psiformula, sc)
     
     # Detection submodel -------------------------------------------------------
@@ -138,11 +149,12 @@ setMethod(
       ), call. = FALSE)
     }
     
-    # Detection model matrix
+    # Detection model matrix for fixed effects
     Xlambda <- model.matrix(
       lambdaformula,
       model.frame(lambdaformula, oc, na.action = NULL)
     )
+    # Detection model matrix for random effects
     Zlambda <- get_Z(formlist$lambdaformula, oc)
     
     # Missing data -------------------------------------------------------------
@@ -169,13 +181,17 @@ setMethod(
     
     if (any(removed_obs)) {
       if (na.rm) {
-        nb_missing_sites <- sum(apply(removed_obs, 1, function(x) all(is.na(x))))
+        nb_missing_sites <- sum(rowSums(!removed_obs) == 0)
         nb_missing_observations <- sum(is.na(removed_obs))
-        warning("There are missing data (count data, site covariate, observation covariate):\n\t",
-                "Only data from ", M-nb_missing_sites, " sites out of ", M, " are used.\n\t",
-                "Only data from ", (M*J)-nb_missing_sites, " observations out of ", (M*J), " are used.")
+        warning("There is missing data: ",
+                  sum(missing_y), " missing count data, ",
+                  sum(missing_sc), " missing site covariate(s), ",
+                  sum(missing_oc), " missing observation covariate(s). ",
+                "Data from only ", (M*J)-sum(removed_obs), " observations out of ", (M*J), " are used, ",
+                "from ", M-nb_missing_sites, " sites out of ", M, ".\n\t"
+                )
       } else {
-        stop("There is missing data:\n\t",
+        stop("na.rm=FALSE and there is missing data :\n\t",
              sum(missing_y), " missing count data (y)\n\t",
              sum(missing_sc), " missing site covariates (siteCovs)\n\t",
              sum(missing_oc), " missing observation covariates (obsCovs)")
@@ -194,18 +210,26 @@ setMethod(
   })
 
 
+## getL method ----
+setGeneric("getL", function(object) standardGeneric("getL"))
+setMethod("getL", "unmarkedFrameCOP", function(object) {
+  return(object@L)
+})
+
+
 ## show method ----
 setMethod("show", "unmarkedFrameCOP", function(object) {
-  J <- ncol(object@obsLength)
+  J <- ncol(object@L)
   df_unmarkedFrame <- as(object, "data.frame")
-  df_obsLength <- data.frame(object@obsLength)
-  colnames(df_obsLength) <- paste0("obsLength.", 1:J)
+  df_L <- data.frame(object@L)
+  colnames(df_L) <- paste0("L.", 1:J)
   if (ncol(df_unmarkedFrame) > J) {
-    df <-
-      cbind(df_unmarkedFrame[, 1:J], df_obsLength, df_unmarkedFrame[, (J + 1):ncol(df_unmarkedFrame)])
-  } else{
-    df <-
-      cbind(df_unmarkedFrame[, 1:J], df_obsLength)
+    df <- cbind(df_unmarkedFrame[, 1:J], 
+                df_L, 
+                df_unmarkedFrame[, (J + 1):ncol(df_unmarkedFrame), drop = FALSE])
+  } else {
+    df <- cbind(df_unmarkedFrame[, 1:J], 
+                df_L)
   }
   cat("Data frame representation of unmarkedFrame object.\n")
   print(df)
@@ -228,9 +252,9 @@ setMethod("summary", "unmarkedFrameCOP", function(object,...) {
   cat("Tabulation of y observations:")
   print(table(object@y, exclude=NULL))
   
-  if(!is.null(object@obsLength)) {
+  if(!is.null(object@L)) {
     cat("\nTabulation of sampling occasions length:")
-    print(table(object@obsLength))
+    print(table(object@L))
   }
 
   if(!is.null(object@siteCovs)) {
@@ -261,7 +285,7 @@ setMethod("[", c("unmarkedFrameCOP", "numeric", "numeric", "missing"),
                 any(i > 0)) {
               stop("i must be all positive or all negative indices.")
             }
-            if (all(j < 0)) {
+            if (all(i < 0)) {
               i <- (1:M)[i]
             }
             
@@ -280,10 +304,10 @@ setMethod("[", c("unmarkedFrameCOP", "numeric", "numeric", "missing"),
               y <- t(y)
             }
             
-            # obsLength subset
-            obsLength <- x@obsLength[i, j]
+            # L subset
+            L <- x@L[i, j]
             if (min(length(i), length(j)) == 1) {
-              obsLength <- t(obsLength)
+              L <- t(L)
             }
             
             # siteCovs subset
@@ -297,7 +321,7 @@ setMethod("[", c("unmarkedFrameCOP", "numeric", "numeric", "missing"),
             if (!is.null(obsCovs)) {
               MJ_site <- rep(1:M, each = J)
               MJ_obs <- rep(1:J, times = M)
-              obsCovs <- obsCovs[((MJ_obs %in% j) & (MJ_site %in% i)), ]
+              obsCovs <- obsCovs[((MJ_obs %in% j) & (MJ_site %in% i)), , drop = FALSE]
               rownames(obsCovs) <- NULL
             }
             
@@ -305,7 +329,7 @@ setMethod("[", c("unmarkedFrameCOP", "numeric", "numeric", "missing"),
             new(
               Class = "unmarkedFrameCOP",
               y = y,
-              obsLength = obsLength,
+              L = L,
               siteCovs = siteCovs,
               obsCovs = obsCovs,
               obsToY = diag(length(j)),
@@ -363,6 +387,133 @@ setMethod("get_orig_data", "unmarkedFitCOP", function(object, type, ...){
   clean_covs[[datatype]]
 })
 
+# UNMARKED FIT METHOD ----------------------------------------------------------
+# Required methods include: ranef, simulate,
+
+## getP ----
+setMethod("getP", "unmarkedFitCOP", function(object) {
+  data <- object@data
+  M = nrow(getY(data))
+  J = ncol(getY(data))
+  des <- getDesign(data, object@formlist, na.rm = na.rm)
+  matLambda =  do.call(object["lambda"]@invlink, 
+                       list(matrix(
+                         as.numeric(des$Xlambda %*% coef(object, 'lambda')),
+                         nrow = M, ncol = J, byrow = T)))
+  return(matLambda)
+})
+
+## fitted ----
+setMethod("fitted", "unmarkedFitCOP", function(object) {
+  data <- object@data
+  M = nrow(getY(data))
+  J = ncol(getY(data))
+  des <- getDesign(data, object@formlist, na.rm = na.rm)
+  estim_psi = as.numeric(do.call(object["psi"]@invlink,
+                                 list(as.matrix(des$Xpsi %*% coef(object, 'psi')))))
+  estim_lambda = do.call(object["lambda"]@invlink, 
+                         list(matrix(
+                           as.numeric(des$Xlambda %*% coef(object, 'lambda')),
+                           nrow = M, ncol = J, byrow = T)))
+  return(estim_psi * estim_lambda)
+})
+
+## residuals ----
+setMethod("residuals", "unmarkedFitCOP", function(object) {
+  y <- getY(object@data)
+  e <- fitted(object)
+  r <- y - e
+  return(r)
+})
+
+## plot ----
+setMethod("plot", c(x = "unmarkedFitCOP", y = "missing"), function(x, y, ...)
+{
+  y <- getY(x)
+  r <- residuals(x)
+  e <- fitted(x)
+  
+  old_graph <- graphics::par("mfrow", "mar")
+  on.exit(graphics::par(mfrow = old_graph$mfrow, mar = old_graph$mar))
+  
+  {
+    graphics::par(mfrow = c(2, 1))
+    graphics::par(mar = c(0, 5, 2, 2))
+    plot(e, y,
+         ylab = "Observed data",
+         xlab = "Predicted data",
+         xaxt = 'n')
+    abline(a = 0, b = 1, lty = 3, col = "red")
+    title(main = "COP model - detection events count", outer = F)
+    
+    graphics::par(mar = c(4, 5, 0.5, 2))
+    plot(e, r,
+         ylab = "Residuals",
+         xlab = "Predicted data")
+    abline(h = 0, lty = 3, col = "red")
+  }
+  
+})
+
+# SIMULATION METHODS ----
+
+## get_umf_components ----
+setMethod("get_umf_components", "unmarkedFitCOP",
+  function(object, formulas, guide, design, ...){
+    sc <- generate_data(formulas$psi, guide, design$M)
+    oc <- generate_data(formulas$lambda, guide, design$J*design$M)
+    yblank <- matrix(0, design$M, design$J)
+    list(y=yblank, siteCovs=sc, obsCovs=oc)
+})
+
+## simulate_fit ----
+setMethod("simulate_fit", "unmarkedFitCOP",
+  function(object, formulas, guide, design, ...){
+    # Generate covariates and create a y matrix of zeros
+    parts <- get_umf_components(object, formulas, guide, design, ...)
+    umf <- unmarkedFrameCOP(y = parts$y, siteCovs = parts$siteCovs, obsCovs=parts$obsCovs)
+    fit <- suppressMessages(
+      occuCOP(
+        data = umf,
+        psiformula = formula(formulas$psi),
+        lambdaformula = formula(formulas$lambda),
+        se = FALSE,
+        control = list(maxit = 1)
+      )
+    )
+    return(fit)
+})
+
+## simulate ----
+setMethod("simulate", "unmarkedFitCOP",
+  function(object, nsim = 1, seed = NULL, na.rm = TRUE){
+    set.seed(seed)
+    formula <- object@formula
+    umf <- object@data
+    designMats <- getDesign(umf = umf, formlist = object@formlist, na.rm = na.rm)
+    y <- designMats$y
+    M <- nrow(y)
+    J <- ncol(y)
+    
+    # Occupancy probability psi depending on the site covariates
+    psiParms = coef(object, type = "psi", fixedOnly = FALSE)
+    psi <- as.numeric(plogis(as.matrix(designMats$Xpsi %*% psiParms)))
+    
+    # Detection rate lambda depending on the observation covariates
+    lambda = getP(object = object)
+    
+    # Simulations
+    simList <- vector("list", nsim)
+    for(i in 1:nsim) {
+      Z <- rbinom(M, 1, psi)
+      # Z[object@knownOcc] <- 1
+      y = matrix(rpois(n = M * J, lambda = as.numeric(t(lambda))),
+                 nrow = M, ncol = J, byrow = T) * Z
+      simList[[i]] <- y
+    }
+    return(simList)
+})
+
 
 
 # Useful functions -------------------------------------------------------------
@@ -380,7 +531,7 @@ check.integer <- function(x, na.ignore = F) {
 
 # unmarkedFrame ----------------------------------------------------------------
 
-unmarkedFrameCOP <- function(y, obsLength, siteCovs = NULL, obsCovs = NULL, mapInfo = NULL) {
+unmarkedFrameCOP <- function(y, L, siteCovs = NULL, obsCovs = NULL, mapInfo = NULL) {
   
   # Verification that they are non-NA data in y
   if (all(is.na(y))) {
@@ -396,14 +547,14 @@ unmarkedFrameCOP <- function(y, obsLength, siteCovs = NULL, obsCovs = NULL, mapI
   # Number of sampling occasions
   J <- ncol(y)
   
-  # If missing obsLength: replace by matrix of 1
+  # If missing L: replace by matrix of 1
   # and the lambda will be the detection rate per observation length
-  if (missing(obsLength)) {
-    obsLength <- y * 0 + 1
-    warning("obsLength is missing, replacing it by a matrix of 1.")
-  } else if (is.null(obsLength)) {
-    obsLength <- y * 0 + 1
-    warning("obsLength is missing, replacing it by a matrix of 1.")
+  if (missing(L)) {
+    L <- y * 0 + 1
+    warning("L is missing, replacing it by a matrix of 1.")
+  } else if (is.null(L)) {
+    L <- y * 0 + 1
+    warning("L is missing, replacing it by a matrix of 1.")
   }
   
   # Transformation observation covariates
@@ -418,7 +569,7 @@ unmarkedFrameCOP <- function(y, obsLength, siteCovs = NULL, obsCovs = NULL, mapI
   umf <- new(
     Class = "unmarkedFrameCOP",
     y = y,
-    obsLength = obsLength,
+    L = L,
     siteCovs = siteCovs,
     obsCovs = obsCovs,
     obsToY = diag(J),
@@ -434,17 +585,18 @@ unmarkedFrameCOP <- function(y, obsLength, siteCovs = NULL, obsCovs = NULL, mapI
 occuCOP <- function(data,
                     psiformula = ~1,
                     lambdaformula = ~1,
-                    psistarts = rep(0, length(attr(terms(psiformula), "term.labels")) + 1),
-                    lambdastarts = rep(0, length(attr(terms(lambdaformula), "term.labels")) + 1),
+                    psistarts,
+                    lambdastarts,
                     method = "BFGS",
                     se = TRUE,
                     engine = "R",
                     threads = 1L,
                     na.rm = TRUE,
                     get.NLL.params = NULL,
+                    L1 = FALSE,
                     ...) {
   #TODO: engines
-  #TODO: NA
+  #TODO: random effects
   
   # Neg loglikelihood COP ------------------------------------------------------
   nll_COP <- function(params) {
@@ -468,40 +620,38 @@ occuCOP <- function(data,
     #   lambdaIdx is the index of Occupancy Parameters in params
     lambda <- exp(Xlambda %*% params[lambdaIdx])
     
+    # Listing sites not removed (due to NAs)
+    if (length(sitesRemoved) > 0) {
+      siteAnalysed = (1:M)[-sitesRemoved]
+    } else {
+      siteAnalysed = (1:M)
+    }
+    
     # Probability for each site (i)
     iProb <- rep(NA, M)
     
-    for (i in 1:M) {
-      # Probability for the site i for each sampling occasion (j)
-      ijProb <- rep(NA, J)
-      
-      # Total obs length in site i
-      obsLengthSitei <- sum(data@obsLength[i,])
-      
+    for (i in siteAnalysed) {
       # iIdx is the index to access the vectorised vectors of all obs in site i
-      iIdx <- ((i - 1) * J + 1):(i * J)
+      iIdxall <- ((i - 1) * J + 1):(i * J)
+      
+      # Removing NAs
+      iIdx = iIdxall[!removed_obsvec[iIdxall]]
       
       if (SitesWithDetec[i]) {
         # If there is at least one detection in site i
-        # factNij = sapply(factorial(yvec[iIdx]),replaceinf)
-        # iProb[i] = psi[i] * sum(lambda[iIdx] * obsLengthvec[iIdx] / factNij * exp(-lambda[iIdx] * obsLengthvec[iIdx]))
-        
-        
         iProb[i] = psi[i] * (
-          (sum(lambda[iIdx] * obsLengthvec[iIdx])) ^ sum(yvec[iIdx]) / 
+          (sum(lambda[iIdx] * Lvec[iIdx])) ^ sum(yvec[iIdx]) / 
             factorial(sum(yvec[iIdx])) *
-            exp(-sum(lambda[iIdx] * obsLengthvec[iIdx]))
+            exp(-sum(lambda[iIdx] * Lvec[iIdx]))
         )
         
       } else {
         # If there is zero detection in site i
-        # iProb[i] = psi[i] * sum(exp(-lambda[iIdx] * obsLengthvec[iIdx])) + (1 - psi[i])
-        
-        iProb[i] = psi[i] * exp(-sum(lambda[iIdx] * obsLengthvec[iIdx])) + (1 - psi[i]) 
+        iProb[i] = psi[i] * exp(-sum(lambda[iIdx] * Lvec[iIdx])) + (1 - psi[i]) 
         
       }
-      
     }
+    
     # Note: Why is there "replaceinf(factorial(data@y[i,]))" 
     #       instead of just "factorial(data@y[i,])"?
     # Because if there are a lot of detections (n_ij >= 171 on my machine),
@@ -519,7 +669,7 @@ occuCOP <- function(data,
     # it doesn't matter if its an approximation.
     
     # log-likelihood
-    ll = sum(log(iProb))
+    ll = sum(log(iProb[siteAnalysed]))
     return(-ll)
   }
   
@@ -529,8 +679,8 @@ occuCOP <- function(data,
   }
   stopifnot(class(psiformula) == "formula")
   stopifnot(class(lambdaformula) == "formula")
-  stopifnot(class(psistarts) %in%  c("numeric", "double", "integer"))
-  stopifnot(class(lambdastarts) %in%  c("numeric", "double", "integer"))
+  if(!missing(psistarts)){stopifnot(class(psistarts) %in%  c("numeric", "double", "integer"))}
+  if(!missing(lambdastarts)){stopifnot(class(lambdastarts) %in%  c("numeric", "double", "integer"))}
   stopifnot(class(threads) %in%  c("numeric", "double", "integer"))
   se = as.logical(match.arg(
     arg = as.character(se),
@@ -541,6 +691,16 @@ occuCOP <- function(data,
     choices = c("TRUE", "FALSE", "0", "1")
   ))
   engine <- match.arg(engine, c("R"))
+  L1 = as.logical(match.arg(
+    arg = as.character(L1),
+    choices = c("TRUE", "FALSE", "0", "1")
+  ))
+  
+  
+  # Do not yet manage random effects!!!
+  if (!is.null(lme4::findbars(psiformula)) | !is.null(lme4::findbars(lambdaformula))) {
+    stop("occuCOP does not currently handle random effects.")
+  }
   
   # Format input data ----------------------------------------------------------
   
@@ -551,22 +711,35 @@ occuCOP <- function(data,
   # For more informations, see: getMethod("getDesign", "unmarkedFrameCOP")
   designMats <- getDesign(umf = data, formlist = formlist, na.rm = na.rm)
   
-  # y is the count detection data (matrix of size M sites x J sessions)
+  # y is the count detection data (matrix of size M sites x J observations)
   y <- getY(data)
   
-  # obsLength is the length of sessions (matrix of size M sites x J sessions)
-  obsLength = data@obsLength
+  # L is the length of observations (matrix of size M sites x J observations)
+  L = getL(data)
+  if (!L1) {
+    if (!any(is.na(L))) {
+      if (all(L == 1)) {
+        warning(
+          "All observations lengths (L) are set to 1. ",
+          "If they were not user-defined, lambda corresponds to the ",
+          "detection rate multiplied by the observation length, ",
+          "not just the detection rate per time unit.\n",
+          "You can remove this warning by adding 'L1=TRUE' in the function inputs."
+        )
+      }
+    }
+  }
   
-  # Xpsi is the matrix with the occupancy covariates
+  # Xpsi is the fixed effects design matrix for occupancy
   Xpsi <- designMats$Xpsi
   
-  # Xlambda is the matrix with the detection covariates
+  # Xlambda is the fixed effects design matrix for detection rate
   Xlambda <- designMats$Xlambda
   
-  # Zpsi is ???
+  # Zpsi is the random effects design matrix for occupancy
   Zpsi <- designMats$Zpsi
   
-  # Zlambda is ???
+  # Zlambda is the random effects design matrix for detection rate
   Zlambda <- designMats$Zlambda
   
   # removed_obs is a M x J matrix of the observations removed from the analysis
@@ -580,7 +753,7 @@ occuCOP <- function(data,
   J <- ncol(y)
   
   # Total number of detection per site
-  NbDetecPerSite = rowSums(y)
+  NbDetecPerSite = rowSums(y, na.rm=T)
   
   # Sites where there was at least one detection
   SitesWithDetec = NbDetecPerSite > 0
@@ -615,7 +788,8 @@ occuCOP <- function(data,
   #   From Matrix of dim MxJ to vector of length MxJ:
   #   c(ySite1Obs1, ySite1Obs2, ..., ySite1ObsJ, ysite2Obs1, ...)
   yvec <- as.numeric(t(y))
-  obsLengthvec <- as.numeric(t(obsLength))
+  Lvec <- as.numeric(t(L))
+  removed_obsvec <- as.logical(t(removed_obs))
   
   # get.NLL.params -------------------------------------------------------------
   if (!is.null(get.NLL.params)) {
@@ -641,7 +815,7 @@ occuCOP <- function(data,
     message(
       "No lambda initial values provided for optim. Using lambdastarts = c(",
       paste(lambdastarts, collapse = ", "),
-      "), equivalent to detection rate",
+      "), equivalent to a detection rate",
       ifelse(length(lambdastarts) == 1, "", "s"),
       " of 1."
     )
@@ -658,7 +832,7 @@ occuCOP <- function(data,
     message(
       "No psi initial values provided for optim. Using psistarts = c(",
       paste(psistarts, collapse = ", "),
-      "), equivalent to occupancy probabilit",
+      "), equivalent to an occupancy probabilit",
       ifelse(length(psistarts) == 1, "y", "ies"),
       " of 0.5."
     )
@@ -724,7 +898,6 @@ occuCOP <- function(data,
     "unmarkedFitCOP",
     fitType = "occuCOP",
     call = match.call(),
-    # formula = as.formula(paste(formlist, collapse = "")),
     formula = as.formula(paste(
       formlist["lambdaformula"], formlist["psiformula"], collapse = ""
     )),
diff --git a/test_occuCOP.R b/test_occuCOP.R
index 45fb0e0..a4f0adb 100644
--- a/test_occuCOP.R
+++ b/test_occuCOP.R
@@ -220,8 +220,8 @@ test_that("occuCOP can fit simple models", {
   # Fitting options ----
   
   # With default parameters
-  expect_no_error(fit_default <- occuCOP(data = umf))
-  expect_no_error(occuCOP(data = umf, psiformula = ~ 1, lambdaformula = ~ 1))
+  expect_message(fit_default <- occuCOP(data = umf))
+  expect_no_error(occuCOP(data = umf, psiformula = ~ 1, lambdaformula = ~ 1, psistarts = 0, lambdastarts = 0))
   
   # With chosen starting points
   expect_no_error(occuCOP(data = umf, 
@@ -230,23 +230,23 @@ test_that("occuCOP can fit simple models", {
                           lambdastarts = log(0.1)))
   expect_error(occuCOP(data = umf, 
                        psiformula = ~ 1, lambdaformula = ~ 1, 
-                       psistarts = qlogis(c(0.7, 0.5))))
+                       psistarts = qlogis(c(0.7, 0.5), lambdastarts = 0)))
   expect_error(occuCOP(data = umf, 
                        psiformula = ~ 1, lambdaformula = ~ 1, 
-                       lambdastarts = log(c(1, 2))))
+                       lambdastarts = log(c(1, 2)), psistarts = 0))
   
   # With different options
-  expect_no_error(occuCOP(data = umf, method = "Nelder-Mead"))
-  expect_error(occuCOP(data = umf, method = "ABC"))
+  expect_no_error(occuCOP(data = umf, method = "Nelder-Mead", psistarts = 0, lambdastarts = 0))
+  expect_error(occuCOP(data = umf, method = "ABC", psistarts = 0, lambdastarts = 0))
   
-  expect_no_error(occuCOP(data = umf, se = F))
+  expect_no_error(occuCOP(data = umf, se = F, psistarts = 0, lambdastarts = 0))
   expect_error(occuCOP(data = umf, se = "ABC"))
   
-  expect_no_error(occuCOP(data = umf, engine = "R"))
-  expect_error(occuCOP(data = umf, engine = "julia"))
+  expect_no_error(occuCOP(data = umf, engine = "R", psistarts = 0, lambdastarts = 0))
+  expect_error(occuCOP(data = umf, engine = "julia", psistarts = 0, lambdastarts = 0))
   
-  expect_no_error(occuCOP(data = umf, na.rm = F))
-  expect_error(occuCOP(data = umf, na.rm = "no"))
+  expect_no_error(occuCOP(data = umf, na.rm = F, psistarts = 0, lambdastarts = 0))
+  expect_error(occuCOP(data = umf, na.rm = "no", psistarts = 0, lambdastarts = 0))
   
   # Looking at at COP model outputs ----
   expect_is(fit_default, "unmarkedFitCOP")
@@ -293,7 +293,7 @@ test_that("occuCOP can fit simple models", {
       J = 10
     ),
     obsLength = matrix(1, nrow = 1000, ncol = 10)
-  ))
+  ), psistarts = 0, lambdastarts = 0)
   psi_estimate = backTransform(fit_accur, type = "psi")@estimate
   lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate
   expect_equivalent(
@@ -318,7 +318,7 @@ test_that("occuCOP can fit simple models", {
       J = 10
     ),
     obsLength = matrix(1, nrow = 1000, ncol = 10)
-  ))
+  ), psistarts = 0, lambdastarts = 0)
   psi_estimate = backTransform(fit_accur, type = "psi")@estimate
   lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate
   expect_equivalent(
@@ -343,7 +343,7 @@ test_that("occuCOP can fit simple models", {
       J = 10
     ),
     obsLength = matrix(1, nrow = 1000, ncol = 10)
-  ))
+  ), psistarts = 0, lambdastarts = 0)
   psi_estimate = backTransform(fit_accur, type = "psi")@estimate
   lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate
   expect_equivalent(
@@ -357,6 +357,17 @@ test_that("occuCOP can fit simple models", {
     tol = 0.05
   )
 
+  # With NAs ----
+  yNA <- y
+  yNA[1,] <- NA
+  yNA[3, 1] <- NA
+  yNA[4, 3] <- NA
+  yNA[, 5] <- NA
+  expect_no_error(umfNA <- unmarkedFrameCOP(y = yNA, obsLength = obsLength))
+  
+  expect_warning(fit_NA <- occuCOP(data = umfNA, psistarts = 0, lambdastarts = 0))
+  expect_error(occuCOP(data = umfNA, psistarts = 0, lambdastarts = 0, na.rm = F))
+  
   
   #TODO
 })
@@ -367,3 +378,62 @@ test_that("occuCOP can fit models with covariates", {
 })
 
 
+test_that("We can simulate COP data", {
+  
+  # From scratch ----
+  
+  # With no covariates
+  expect_no_error(simulate(
+    "COP",
+    formulas = list(psi =  ~ 1, lambda =  ~ 1),
+    coefs = list(
+      psi = c(intercept = 0),
+      lambda = c(intercept = 0)
+    ),
+    design = list(M = 100, J = 100)
+  ))
+  
+  # With quantitative covariates
+  expect_no_error(simulate(
+    "COP",
+    formulas = list(psi =  ~ elev, lambda =  ~ rain),
+    coefs = list(
+      psi = c(intercept = qlogis(.5), elev = -0.5),
+      lambda = c(intercept = log(3), rain = -1)
+    ),
+    design = list(M = 100, J = 5)
+  ))
+  
+  # With guides
+  expect_no_error(simulate(
+    "COP",
+    formulas = list(psi =  ~ elev, lambda =  ~ rain),
+    coefs = list(
+      psi = c(intercept = qlogis(.5), elev = -0.5),
+      lambda = c(intercept = log(3), rain = -1)
+    ),
+    design = list(M = 100, J = 5),
+    guide = list(elev=list(dist=rnorm, mean=12, sd=0.5))
+  ))
+  
+  # With qualitative covariates
+  expect_no_error(simulate(
+    "COP",
+    formulas = list(psi =  ~ elev + habitat, lambda =  ~ 1),
+    coefs = list(
+      psi = c(
+        intercept = qlogis(.2),
+        elev = -0.5,
+        habitatB = .5,
+        habitatC = .8
+      ),
+      lambda = c(intercept = log(3))
+    ),
+    design = list(M = 100, J = 5),
+    guide = list(habitat = factor(levels = c("A", "B", "C")))
+  ))
+  
+  # From unmarkedFitCOP ----
+  #TODO
+})
+
diff --git a/tmp_use_COP.Rmd b/tmp_use_COP.Rmd
index 57e9e55..65e29ee 100644
--- a/tmp_use_COP.Rmd
+++ b/tmp_use_COP.Rmd
@@ -31,12 +31,10 @@ library(ggplot2)
 library(ggrepel)
 library(dplyr)
 library(tibble)
+library(tidyr)
 ```
 
 
-
-
-
 # The COP model
 
 Hereafter, I use the notations defined in the following table.
@@ -60,7 +58,9 @@ Z_i \sim \text{Bernoulli}(\psi_{i}) \\
 N_{ij} \sim \text{Poisson}(\lambda_{ij} T_{ij})
 $$
 
-The likelihood of this model is:
+## How is the model fitted?
+
+The model is fitted by maximum likelihood estimation (MLE). Likelihood is the probability of observing these data given a set of parameters. The likelihood $L$ of this model is:
 
 \begin{align}
 \begin{split}
@@ -81,6 +81,124 @@ The likelihood of this model is:
 \end{split}
 \end{align}
 
+To maximise the likelihood, we use an optimisation algorithm, and we actually minimise the log-likelihood $log(L)$ for computational reasons. We can visualise  how can the likelihood vary depending on the parameters, to better understand what the optimisation algorithm does. 
+
+With the COP model, here are examples of log-likelihood for varying simulation scenarios (see simul_lambda on top, simul_psi on the right), and for different values of $\psi$ (on the bottom) and $\lambda$ (line colours, the thick lines corresponding to the detection rate $\lambda$ used to simulate data). The black dotted lines and labels correspond to the value of the parameters $\psi$ and $\lambda$ for the maximum likelihood estimated by the optimisation algorithm.
+
+```{r how-is-the-model-fitted, message=FALSE, class.source='fold-hide', fig.height=10, fig.width=12}
+M = 100 # Number of sites
+J = 5 # Number of observations (transects, sessions, sampling occasions...)
+cpt = 1
+for (simul_psi in c(.1, .25, .5, .75, .9)) {
+  for (simul_lambda in c(1, 3, 5)) {
+    simul_z = sample(
+      x = c(0, 1),
+      size = M,
+      replace = T,
+      prob = c(1 - simul_psi, simul_psi)
+    )
+    simul_y <- matrix(rpois(n = M * J, lambda = simul_lambda),
+                      nrow = M,
+                      ncol = J) * simul_z
+    
+    fit = occuCOP(data = unmarkedFrameCOP(y = simul_y,
+                                          L = (simul_y * 0 + 1)),
+                  psistarts=0, lambdastarts=0)
+    estim_psi = backTransform(fit, "psi")
+    estim_lambda = backTransform(fit, "lambda")
+    
+    nll_df_plot_i = occuCOP(
+      data = unmarkedFrameCOP(y = simul_y, L = (simul_y * 0 + 1)),
+      get.NLL.params =
+        as.list(as.data.frame(t(
+          expand.grid("psi" = qlogis(seq(0.01, 0.99, by = 0.02)),
+                      # "lambda" = log(round(
+                      #   simul_lambda * seq(from = 0.25, to = 1.75, by = .25), 2
+                      # ))
+                      lambda=log(c(1,3,5))
+                      )
+        )))
+    )
+    nll_df_plot_i$simul_psi = simul_psi
+    nll_df_plot_i$simul_lambda = simul_lambda
+    nll_df_plot_i$estim_psi = estim_psi@estimate
+    nll_df_plot_i$estim_lambda = estim_lambda@estimate
+    
+    if (cpt == 1) {
+      nll_df_plot = nll_df_plot_i
+    } else{
+      nll_df_plot = rbind(nll_df_plot, nll_df_plot_i)
+    }
+    # cat('\r',cpt,"/ 15")
+    cpt = cpt + 1
+    rm(nll_df_plot_i)
+  }
+}
+
+nll_df_plot$psi = plogis(nll_df_plot[, "logit(psi).(Intercept)"])
+nll_df_plot$lambda = exp(nll_df_plot[, "log(lambda).(Intercept)"])
+nll_df_plot$simulated_lambda = (round(nll_df_plot$lambda, 2) == round(nll_df_plot$simul_lambda, 2))
+nll_df_plot$lambda_txt = format(nll_df_plot$lambda, digits = 2, nsmall = 2)
+nll_df_plot$label = ifelse(nll_df_plot$psi == max(nll_df_plot$psi),
+                           paste0("λ=", nll_df_plot$lambda_txt),
+                           NA)
+
+df_estimates = nll_df_plot %>%
+  group_by(simul_psi, simul_lambda) %>%
+  summarise(
+    estim_psi = unique(estim_psi),
+    estim_lambda = unique(estim_lambda),
+    maximum_llh = max(-nll),
+    minimum_llh = min(-nll),
+    .groups = "drop"
+  )
+
+ggplot() +
+  geom_line(
+    data = nll_df_plot,
+    aes(
+      x = psi,
+      y = -nll,
+      colour = lambda_txt,
+      linewidth = simulated_lambda
+    )
+  ) +
+  ggh4x::facet_grid2(
+    simul_psi ~ simul_lambda,
+    labeller = label_both,
+    scales = "free_y",
+    independent = "y"
+  ) +
+  labs(
+    title = "Negative log-likelihood of the COP model for different values of ψ and λ",
+    x = "ψ",
+    y = "Log-likelihood",
+    colour = "λ"
+  ) +
+  scale_linewidth_manual(values = c("TRUE" = 2, "FALSE" = 1)) +
+  theme_light()+
+  geom_vline(data = df_estimates,
+             aes(xintercept = estim_psi),
+             linetype = "dotted") +
+  geom_label(data = df_estimates, aes(
+    x = estim_psi,
+    y = minimum_llh + abs(minimum_llh * .1),
+    label = paste0("λ: ", round(estim_lambda, 2))
+  )) +
+  geom_hline(data = df_estimates,
+             aes(yintercept = maximum_llh),
+             linetype = "dotted") +
+  geom_label(data = df_estimates, aes(
+    x = 0,
+    y = maximum_llh - 30,
+    label = paste0("ψ: ", round(estim_psi, 2))
+  )) +
+  xlim(-.1, 1) +
+  theme(legend.position = "bottom") +
+  guides(linewidth = "none")
+
+```
+
 # Simulation
 
 
@@ -119,27 +237,16 @@ simul_psi_habA = 0.9
 simul_psi_habB = 0.5
 simul_psi_habC = 0.1
 
+simul_psi = ifelse(
+  SiteCov$habitat == "A",
+  simul_psi_habA,
+  ifelse(SiteCov$habitat == "B", simul_psi_habB, simul_psi_habC)
+)
+
 # For each site, we simulate occupancy state
 z_i = rep(NA, M)
 for (i in 1:M) {
-  if (SiteCov$habitat[i] == 'A') {
-    z_i[i] <-
-      sample(c(0, 1),
-             size = 1,
-             prob = c(1 - simul_psi_habA, simul_psi_habA))
-    next
-  } else if (SiteCov$habitat[i] == 'B') {
-    z_i[i] <-
-      sample(c(0, 1),
-             size = 1,
-             prob = c(1 - simul_psi_habB, simul_psi_habB))
-    next
-  } else {
-    z_i[i] <-
-      sample(c(0, 1),
-             size = 1,
-             prob = c(1 - simul_psi_habC, simul_psi_habC))
-  }
+  z_i[i] <- sample(c(0, 1), size = 1, prob = c(1 - simul_psi[i], simul_psi[i]))
 }
 
 # We have this data:
@@ -162,10 +269,6 @@ We simulate temporal covariates:
 - "rain" will impact the detection rate
 
 ```{r simul-lambda-cov}
-# Detection rate: 1 detection per day on average,
-simul_lambda = 1
-simul_lambda_rain = .3
-
 # Temporal covariates
 TemporalCov <- list(
   "rain" = matrix(pmax(rexp(n = M * J, rate = 1 / 10), 0), nrow = M, ncol = J),
@@ -213,10 +316,10 @@ data.frame(
 
 Let's say that each observation lasts one time-unit here, *e.g.* one day per sampling occasion.
 
-```{r simul-obsLength}
-obsLength = y * 0 + 1
-class(obsLength)
-print(as_tibble(obsLength))
+```{r simul-L}
+L = y * 0 + 1
+class(L)
+print(as_tibble(L))
 ```
 
 
@@ -229,7 +332,7 @@ We then create our unmarkedFrameCOP object.
 ```{r umf-creation}
 umf = unmarkedFrameCOP(
   y = y,
-  obsLength = obsLength,
+  L = L,
   siteCovs = SiteCov,
   obsCovs = TemporalCov
 )
@@ -239,8 +342,12 @@ umf = unmarkedFrameCOP(
 
 ```{r umf-visu}
 head(umf)
+
 summary(umf)
-print(umf[2,4]) # 2nd site, 4th observation
+
+# You can subset an unmarkedFrame: here the 2nd site, 4th observation
+print(umf[2, 4])
+
 plot(umf)
 ```
 
@@ -253,7 +360,7 @@ y_with_decimals = y
 y_with_decimals[2, 1] = 49.5
 unmarkedFrameCOP(
   y = y_with_decimals,
-  obsLength = obsLength,
+  L = L,
   siteCovs = SiteCov,
   obsCovs = TemporalCov
 )
@@ -265,59 +372,90 @@ There is a warning if data is detection/non-detection (1/0) instead of count.
 y_detec_nodetec = (y > 0) * 1
 unmarkedFrameCOP(
   y = y_detec_nodetec,
-  obsLength = obsLength,
+  L = L,
   siteCovs = SiteCov,
   obsCovs = TemporalCov
 )
 ```
 
-There is an error if the dimension of obsLength is different than that of y.
+There is an error if the dimension of L is different than that of y.
 
-```{r umf-error-obsLength}
+```{r umf-error-L}
 unmarkedFrameCOP(
   y = y,
-  obsLength = obsLength[1:5, 1:2],
+  L = L[1:5, 1:2],
   siteCovs = SiteCov,
   obsCovs = TemporalCov
 )
 ```
 
-# Model fitting
-
-We fit the model. For more information, see [section How is the model fitted?](#how-is-the-model-fitted)
+# Fit the model
 
-## Null model
+## Null model fitting
 
 ```{r null-fit}
-resCOP_null <- occuCOP(
+occuCOP(
   data = umf,
   psiformula = ~ 1,
   lambdaformula = ~ 1,
   method = "Nelder-Mead"
 )
-print(resCOP_null)
 ```
 
-We can backtransform the parameters:
+Note that we a warning because observation lengths (L) were all set to 1 in our unmarkedFrame. This was our aim, so we can add `L1 = TRUE` in the function parameters.
 
-```{r null-backtransform}
-# Occcupancy estimate
-plogis(resCOP_null@estimates@estimates$psi@estimates) # plogis(x): "inverse logit"
-backTransform(resCOP_null, type = "psi")
+```{r}
+occuCOP(
+  data = umf,
+  psiformula = ~ 1,
+  lambdaformula = ~ 1,
+  method = "Nelder-Mead",
+  L1 = TRUE
+)
+```
 
-# Detection rate estimate
-exp(resCOP_null@estimates@estimates$lambda@estimates)
-backTransform(resCOP_null, type = "lambda")
+We also have messages to inform us of the default initial parameters for the optimisation algorithm. These messages can be removed by choosing them manually. For example, we could set initial parameters that are close to what we can intuitively expect with our data.
+
+Intuitively, the initial psi value can be the proportion of sites in which we do not have any observations.
+
+```{r}
+psi_init = mean(rowSums(y) == 0)
+print(psi_init)
+```
+
+And the initial lambda value can be the mean count of detection events in sites in which there was at least one observation.
+
+```{r}
+lambda_init = mean(y[rowSums(y) > 0, ])
+print(lambda_init)
 ```
 
-## psi ~ elev; lambda ~ rain
+We have to transform them. See `?occuCOP` for more information about these transformations.
+
+```{r}
+resCOP_null <- occuCOP(
+  data = umf,
+  psiformula = ~ 1,
+  lambdaformula = ~ 1,
+  psistarts = qlogis(psi_init),
+  lambdastarts = log(lambda_init),
+  method = "Nelder-Mead",
+  L1 = TRUE
+)
+print(resCOP_null)
+```
+
+## $\psi$ ~ habitat, $\lambda$ ~ rain
+
+With covariates, we need initial values for the optimisation to be vectors of the adequate length. Here, we will keep the default initial values of 0, which backtransformed, correspond to an occupancy probability of 0.5 and a detection rate of 1.
 
 ```{r cov-fit}
 resCOP_habitat_rain <- occuCOP(
   data = umf,
   psiformula = ~ -1 + habitat,
   lambdaformula = ~ rain,
-  method = "Nelder-Mead"
+  method = "Nelder-Mead",
+  L1 = TRUE
 )
 print(resCOP_habitat_rain)
 ```
@@ -330,38 +468,104 @@ resCOP_habitat_rain <- occuCOP(
   psiformula = ~ -1 + habitat,
   lambdaformula = ~ rain,
   method = "Nelder-Mead",
+  L1 = TRUE,
   control = list(maxit = 5000)
 )
 print(resCOP_habitat_rain)
 ```
 
-We can backtransform the parameters. With covariates, we can no longer use the function `backTransform`:
+## Back-transforming parameter estimates
+
+To check the names of the parameters for the submodels of the COP occupancy model, we can use the function `names`.
+
+```{r param-names}
+names(resCOP_null)
+```
+
+### For a model without covariates
+
+Without covariates, we can use the function `backTransform`. 
+
+```{r null-backtransform}
+# Occcupancy estimate
+backTransform(resCOP_null, type = "psi")
+
+# Detection rate estimate
+backTransform(resCOP_null, type = "lambda")
+```
+
+Alternatively, we can back-transform the parameter manually. Most users should use the `backTransform` function in this situation, but if you need to go in depth, here's how to do it.
+
+```{r null-backtransform-manual}
+# Occcupancy probability psi
+## What is the inverse link, the function to back-transform the estimate?
+resCOP_null@estimates@estimates$psi@invlink
+## Manual back-transformation
+logistic(coef(resCOP_null, "psi"))
+plogis(coef(resCOP_null, "psi")) # plogis(x): "inverse logit", equivalent to logistic(x)
+
+# Detection rate lambda
+## What is the inverse link, the function to back-transform the estimate?
+resCOP_null@estimates@estimates$lambda@invlink
+## Manual back-transformation
+exp(coef(resCOP_null, "lambda"))
+```
+
+
+### For a model with covariates
+
+With covariates, we can no longer use the function `backTransform`:
 
 ```{r cov-backtransform}
 backTransform(resCOP_habitat_rain, type = "psi")
 ```
 
-However, we can use the function `predict`:
+Instead, we can use the function `predict`, which can be used to back-transform the parameters depending on the covariates. `predict` can also be used, as its name leads on, to predict occupancy and detection with new parameters, if the user specify `newdata`.
 
 ```{r cov-predict}
 # Occcupancy estimate
-plogis(resCOP_habitat_rain@estimates@estimates$psi@estimates)
-psipred = predict(object = resCOP_habitat_rain, type = "psi")
-print(as_tibble(cbind(
-  data.frame("habitat" = resCOP_habitat_rain@data@siteCovs$habitat),
-  psipred
-)))
+psipred = predict(object = resCOP_habitat_rain, type = "psi", appendData = TRUE)
+print(as_tibble(psipred[, c("Predicted", "SE", "lower", "upper", "habitat", "elev")]))
 
 # Detection rate estimate
-exp(resCOP_habitat_rain@estimates@estimates$lambda@estimates)
-lambdapred = predict(object = resCOP_habitat_rain, type = "lambda")
+lambdapred = predict(object = resCOP_habitat_rain, type = "lambda", appendData = FALSE)
 print(as_tibble(cbind(
-  data.frame("rain" = resCOP_habitat_rain@data@obsCovs$rain),
+  data.frame(
+    "site" = rep(1:M, each = J),
+    "observation" = rep(1:J, times = M),
+    "rain" = resCOP_habitat_rain@data@obsCovs$rain
+  ),
   lambdapred
 )))
 ```
 
-## Ranking
+We can also backtransform the parameters manually. This works well for qualitative covariates (*e.g.* habitat), but not for quantitative covariates (*e.g.* rain), because linear regression with the covariate is applied to the **transformed parameters**, and should be back-transformed only afterwards.
+
+```{r cov-backtransform-manual}
+# Occupancy estimate
+logistic(coef(resCOP_habitat_rain, "psi"))
+
+# Detection rate estimate
+exp(coef(resCOP_habitat_rain, "lambda"))
+
+# We're supposed to have a predicted lambda = 1.77 for rain = 6.03.
+
+# With the back-transformed parameters, this doesnt work!
+1.9693466 + 0.9821017 * 6.03 
+
+# We have to back-transform the results AFTER we applied the linear regression.
+exp(0.67770181 - 0.01806041 * 6.03) 
+```
+
+## Plot the model fit
+
+```{r}
+plot(resCOP_null)
+plot(resCOP_habitat_rain)
+```
+
+
+## Model selection
 
 We can compare several models by their AIC by using `fitList` and `modSel`.
 
@@ -371,124 +575,121 @@ fl <- fitList(fits = list("Null" = resCOP_null,
 modSel(fl)
 ```
 
-## How is the model fitted?
-
-The model is fitted by maximum likelihood estimation (MLE). 
 
-```{r how-is-the-model-fitted, class.source = 'fold-hide'}
-# M = 100
-# J = 10
+## How well did the model estimated the simulated parameters?
 
-cpt = 1
-for (simul_psi in c(.1, .25, .5, .75, .9)) {
-  for (simul_lambda in c(1, 3, 5)) {
-    # simul_z = c(rep(1, simul_psi * 100), rep(0, times=((1 - simul_psi) * 100)))
-    simul_z = sample(
-      x = c(0, 1),
-      size = M,
-      replace = T,
-      prob = c(1 - simul_psi, simul_psi)
-    )
-    simul_y <- matrix(rpois(n = M * J, lambda = simul_lambda),
-                      nrow = M,
-                      ncol = J) * simul_z
-    
-    fit = occuCOP(data = unmarkedFrameCOP(y = simul_y,
-                                          obsLength = (simul_y * 0 + 1)))
-    estim_psi = backTransform(fit, "psi")
-    estim_lambda = backTransform(fit, "lambda")
-    
-    nll_df_plot_i = occuCOP(
-      data = unmarkedFrameCOP(y = simul_y, obsLength = (simul_y * 0 + 1)),
-      get.NLL.params =
-        as.list(as.data.frame(t(
-          expand.grid("psi" = qlogis(seq(0.01, 0.99, by = 0.02)),
-                      "lambda" = log(round(
-                        simul_lambda * seq(from = 0.25, to = 1.75, by = .25), 2
-                      )))
-        )))
-    )
-    nll_df_plot_i$simul_psi = simul_psi
-    nll_df_plot_i$simul_lambda = simul_lambda
-    nll_df_plot_i$estim_psi = estim_psi@estimate
-    nll_df_plot_i$estim_lambda = estim_lambda@estimate
-    
-    if (cpt==1) {
-      nll_df_plot = nll_df_plot_i
-    } else{
-      nll_df_plot = rbind(nll_df_plot, nll_df_plot_i)
-    }
-    # cat('\r',cpt,"/ 15")
-    cpt = cpt + 1
-    rm(nll_df_plot_i)
-  }
-}
+We know under which conditions we simulated data, because we chose the simulated parameters $\lambda$ and $\psi$. Therefore, we can compare the "true parameters" to the "estimated parameters". Here, we will focus on this one example only.
 
-nll_df_plot$psi = plogis(nll_df_plot[, "logit(psi).(Intercept)"])
-nll_df_plot$lambda = exp(nll_df_plot[, "log(lambda).(Intercept)"])
-nll_df_plot$simulated_lambda = (round(nll_df_plot$lambda, 2) == round(nll_df_plot$simul_lambda, 2))
-nll_df_plot$lambda_txt = format(nll_df_plot$lambda, digits = 2, nsmall = 2)
-nll_df_plot$label = ifelse(nll_df_plot$psi == max(nll_df_plot$psi),
-                           paste0("λ=", nll_df_plot$lambda_txt),
-                           NA)
+### $\psi$ the occupancy probability
 
-df_estimates = nll_df_plot %>%
-  group_by(simul_psi, simul_lambda) %>%
-  summarise(
-    estim_psi = unique(estim_psi),
-    estim_lambda = unique(estim_lambda),
-    maximum_llh = max(-nll),
-    minimum_llh = min(-nll),
-    .groups = "drop"
-  )
+```{r how-well-did-COP-estimated-psi, class.source = 'fold-hide', out.width = "60%"}
+psi_simulVSestim = data.frame(
+  "habitat" = SiteCov$habitat,
+  "simul" = simul_psi,
+  "estim_null" = predict(resCOP_null, "psi", re.form=T)$Predicted,
+  "estim_habitat_rain" = predict(resCOP_habitat_rain, "psi", re.form=T)$Predicted
+)
 
-ggplot() +
-  geom_line(
-    data = nll_df_plot,
-    aes(
-      x = psi,
-      y = -nll,
-      colour = lambda_txt,
-      linewidth = simulated_lambda,
-      linetype = simulated_lambda
-    )
-  ) +
-  ggh4x::facet_grid2(
-    simul_psi ~ simul_lambda,
-    labeller = label_both,
-    scales = "free_y",
-    independent = "y"
+tidyr::pivot_longer(
+  psi_simulVSestim,
+  cols = c("simul","estim_null","estim_habitat_rain"),
+  names_to = "model",
+  values_to = "estim"
+) %>% 
+  mutate(model = factor(
+    model,
+    levels = c("simul", "estim_null", "estim_habitat_rain"),
+    labels = c("Simulated", "Null model", "ψ ~ habitat, λ ~ rain"),
+    ordered = T
+  )) %>% 
+  ggplot(aes(x = estim, y = habitat, colour = model)) +
+  geom_point() +
+  labs(
+    title = "How well did the COP model estimate ψ?",
+    x = "Occupancy probability (ψ)",
+    y = "Habitat",
+    colour = "Model"
   ) +
+  theme_light() +
+  theme(legend.title = element_blank(),
+        legend.position = "bottom")
+```
+
+### $\lambda$ the detection rate
+
+```{r how-well-did-COP-estimated-lambda, class.source = 'fold-hide', out.width = "60%"}
+lambda_simulVSestim = data.frame(
+  "rain" = as.numeric(t(TemporalCov$rain)),
+  "simul" = as.numeric(t(simul_lambda)),
+  "estim_null" = as.numeric(t(predict(resCOP_null, "lambda", re.form=T)$Predicted)),
+  "estim_habitat_rain" = as.numeric(t(predict(resCOP_habitat_rain, "lambda", re.form=T)$Predicted))
+)
+
+tidyr::pivot_longer(
+  lambda_simulVSestim,
+  cols = c("simul","estim_null","estim_habitat_rain"),
+  names_to = "model",
+  values_to = "estim"
+) %>%
+  mutate(model = factor(
+    model,
+    levels = c("simul", "estim_null", "estim_habitat_rain"),
+    labels = c("Simulated", "Null model", "ψ ~ habitat, λ ~ rain"),
+    ordered = T
+  )) %>% 
+ggplot(aes(x = estim, y = rain, colour = model)) +
+  geom_point() +
   labs(
-    title = "Negative log-likelihood of the COP model for different values of ψ and λ",
-    x = "ψ",
-    y = "Log-likelihood",
-    colour = "lambda"
+    title = "How well did the COP model estimate λ?",
+    x = "Detection rate (λ)",
+    y = "Rain",
+    colour = "Model"
   ) +
-  scale_linewidth_manual(values = c("TRUE" = 1, "FALSE" = .3)) +
-  scale_linetype_manual(values = c("TRUE" = "solid", "FALSE" = "dashed")) +
-  theme_light()+
-  geom_vline(data = df_estimates,
-             aes(xintercept = estim_psi),
-             linetype = "dotted") +
-  geom_label(data = df_estimates, aes(
-    x = estim_psi,
-    y = minimum_llh + abs(minimum_llh * .1),
-    label = paste0("λ: ", round(estim_lambda, 2))
-  )) +
-  geom_hline(data = df_estimates,
-             aes(yintercept = maximum_llh),
-             linetype = "dotted")+
-    geom_label(data = df_estimates, aes(
-    x = -0.05,
-    y = maximum_llh-25,
-    label = paste0("ψ: ", round(estim_psi, 2))
-  )) +
-  xlim(-.1, 1.1) +
-  theme(legend.position = "none")
+  theme_light() +
+  theme(legend.title = element_blank(),
+        legend.position = "bottom")
 
 ```
 
+# Simulate data sets
+
+## Based on chosen parameters
+
+See https://rbchan.github.io/unmarked/articles/simulate.html for more informations.
+
+```{r simul-chosen-params}
+simul_umf <- simulate(
+  "COP",
+  formulas = list(psi =  ~ elev + habitat, lambda =  ~ rain),
+  coefs = list(
+    psi = c(
+      intercept = qlogis(.2),
+      elev = -0.5,
+      habitatB = .5,
+      habitatC = .8
+    ),
+    lambda = c(intercept = log(3), rain = -1)
+  ),
+  design = list(M = 100, J = 3),
+  guide = list(
+    elev = list(dist = rnorm, mean = 100, sd = 80),
+    habitat = factor(levels = c("A", "B", "C"))
+  ),
+  seed = 123
+)
+head(simul_umf)
+summary(simul_umf)
+```
+
+## Based on a unmarkedFitCOP object
+
+```{r}
+
+```
+
+
+<!--
+
 # Interpret the results
 
 
@@ -552,6 +753,8 @@ cbind(SiteCov$elev, predict(respcount, "state"))
 cbind(as.numeric(t(TemporalCov$rain)), predict(respcount, "det"))
 ```
 
+-->
+
 # --
  
 ```{r dev}
@@ -560,10 +763,14 @@ cbind(as.numeric(t(TemporalCov$rain)), predict(respcount, "det"))
 # psistarts = rep(0, length(attr(terms(psiformula), "term.labels")) + 1)
 # lambdastarts = rep(0, length(attr(terms(lambdaformula), "term.labels")) + 1)
 
+psiformula <- ~ 1
+lambdaformula <- ~ 1
+formlist <- mget(c("psiformula", "lambdaformula"))
+
 data <- umf
 psiformula <- ~ habitat
 lambdaformula <- ~ rain
-
+formlist <- mget(c("psiformula", "lambdaformula"))
 
 method = "BFGS"
 se = TRUE
@@ -580,5 +787,4 @@ object = occuCOP(
 )
 
 ```
- 
- 
-\ No newline at end of file
+
author	lpautrel <lea.pautrel@terroiko.fr>	2023-12-01 14:38:20 +0100
committer	lpautrel <lea.pautrel@terroiko.fr>	2023-12-01 14:38:20 +0100
commit	8913f59a50152651cf3bc5ab063e567cf185525b (patch)
tree	43241ea5cba097d1dae5899267da42f1447e9fe8
parent	e5564064b715d149c450d0f56128a1197827b835 (diff)