Add occuCOP documentation

author: lpautrel <lea.pautrel@terroiko.fr> 2023-12-07 15:00:28 +0100
committer: lpautrel <lea.pautrel@terroiko.fr> 2023-12-07 15:00:28 +0100
commit: 90eaf7282e3527951b522843d4389c8846a3dcbb (patch)
tree: c81cf18ac4a5fd799264a6f85e5bac8c95d2007c
parent: 0fa0a8986f0683df79035e0bafee54bfc3a532f3 (diff)
5 files changed, 354 insertions, 15 deletions
diff --git a/R/occuCOP.R b/R/occuCOP.R
index 02d351f..e133030 100644
--- a/R/occuCOP.R
+++ b/R/occuCOP.R
@@ -8,11 +8,10 @@
 #   z_i = Occupancy state of site i
 #       = 1 if the site i is occupied
 #       = 0 else
-# with:
 #   psi_i = Occupancy probability of site i
 
 # Detection
-# N_ij | z_i = 1 ~ Poisson(lambda_is*L_is)
+# N_ij | z_i = 1 ~ Poisson(lambda_ij*L_ij)
 # N_ij | z_i = 0 ~ 0
 # 
 # with:
@@ -54,7 +53,6 @@ setClass(
 setClass("unmarkedFitCOP",
          representation(removed_obs = "matrix",
                         formlist = "list",
-                        nll = "optionalNumeric",
                         convergence="optionalNumeric"),
          contains = "unmarkedFit")
 
@@ -206,7 +204,7 @@ setMethod("show", "unmarkedFrameCOP", function(object) {
   df_L <- data.frame(object@L)
   colnames(df_L) <- paste0("L.", 1:J)
   if (ncol(df_unmarkedFrame) > J) {
-    df <- cbind(df_unmarkedFrame[, 1:J], 
+    df <- cbind(df_unmarkedFrame[, 1:J, drop = FALSE], 
                 df_L, 
                 df_unmarkedFrame[, (J + 1):ncol(df_unmarkedFrame), drop = FALSE])
   } else {
@@ -281,13 +279,13 @@ setMethod("[", c("unmarkedFrameCOP", "numeric", "numeric", "missing"),
             }
             
             # y observation count data subset
-            y <- getY(x)[i, j]
+            y <- getY(x)[i, j, drop = FALSE]
             if (min(length(i), length(j)) == 1) {
               y <- t(y)
             }
             
             # L subset
-            L <- x@L[i, j]
+            L <- x@L[i, j, drop = FALSE]
             if (min(length(i), length(j)) == 1) {
               L <- t(L)
             }
@@ -618,12 +616,10 @@ occuCOP <- function(data,
                     method = "BFGS",
                     se = TRUE,
                     engine = c("C", "R"),
-                    threads = 1L,
                     na.rm = TRUE,
                     get.NLL.params = NULL,
                     L1 = FALSE,
                     ...) {
-  #TODO: engines
   #TODO: random effects
   
   # Neg loglikelihood COP ------------------------------------------------------
@@ -693,7 +689,6 @@ occuCOP <- function(data,
   stopifnot(class(lambdaformula) == "formula")
   if(!missing(psistarts)){stopifnot(class(psistarts) %in%  c("numeric", "double", "integer"))}
   if(!missing(lambdastarts)){stopifnot(class(lambdastarts) %in%  c("numeric", "double", "integer"))}
-  stopifnot(class(threads) %in%  c("numeric", "double", "integer"))
   se = as.logical(match.arg(
     arg = as.character(se),
     choices = c("TRUE", "FALSE", "0", "1")
@@ -811,7 +806,7 @@ occuCOP <- function(data,
                          paste0("log(lambda).", ParamLambda))
     df_NLL$nll = NA
     for (i in 1:nrow(df_NLL)) {
-      df_NLL$nll[i] = nll_COP(params = as.numeric(as.vector(df_NLL[i, -ncol(df_NLL)])))
+      df_NLL$nll[i] = R_nll_occuCOP(params = as.numeric(as.vector(df_NLL[i, -ncol(df_NLL)])))
     }
     return(df_NLL)
   }
@@ -886,7 +881,6 @@ occuCOP <- function(data,
   covMat <- invertHessian(opt, nP, se)
   ests <- opt$par
   tmb_mod <- NULL
-  nll_value <- opt$value
   fmAIC <- 2 * opt$value + 2 * nP
   
   # Organize effect estimates
@@ -940,7 +934,6 @@ occuCOP <- function(data,
     opt = opt,
     negLogLike = opt$value,
     nllFun = nll,
-    nll = nll_value,
     convergence = opt$convergence,
     TMB = tmb_mod
   )
diff --git a/man/occuCOP.Rd b/man/occuCOP.Rd
new file mode 100644
index 0000000..ca37e4e
--- /dev/null
+++ b/man/occuCOP.Rd
@@ -0,0 +1,245 @@
+\name{occuCOP}
+
+\alias{occuCOP}
+
+\title{Fit the occupancy model using count dta}
+
+\usage{
+occuCOP(data, 
+        psiformula = ~1, lambdaformula = ~1, 
+        psistarts, lambdastarts,
+        method = "BFGS", se = TRUE, 
+        engine = c("C", "R"), na.rm = TRUE, 
+        get.NLL.params = NULL, L1 = FALSE, ...)}
+
+\arguments{
+
+    \item{data}{An \code{\linkS4class{unmarkedFrameCOP}} object created with the \code{\link{unmarkedFrameCOP}} function.}
+    
+    \item{psiformula}{Formula describing the occupancy covariates.}
+    
+    \item{lambdaformula}{Formula describing the detection covariates.}
+    
+    \item{psistarts}{Vector of starting values for likelihood maximisation with \code{\link{optim}} for occupancy probability \eqn{\psi}{psi}. These values must be logit-transformed (with \code{\link{qlogis}}). See details. By default, optimisation will start at 0, corresponding to an occupancy probability of 0.5 (\code{plogis(0)} is 0.5).}
+    
+    \item{lambdastarts}{Vector of starting values for likelihood maximisation with \code{\link{optim}} for detection rate \eqn{\lambda}{lambda}. These values must be log-transformed (with \code{\link{log}}). See details. By default, optimisation will start at 0, corresponding to detection rate of 1 (\code{exp(0)} is 1).}
+
+    \item{method}{Optimisation method used by \code{\link{optim}}.}
+    
+    \item{se}{Logical specifying whether to compute (\code{se=TRUE}) standard errors or not (\code{se=FALSE}).}
+    
+    \item{engine}{Code to use for optimisation. Either \code{"C"} for fast C++ code, or \code{"R"} for native R code.}
+    
+    \item{na.rm}{Logical specifying whether to fit the model (\code{na.rm=TRUE}) or not (\code{na.rm=FALSE}) if there are NAs in the \code{\link{unmarkedFrameCOP}} object.}
+    
+    \item{get.NLL.params}{A list of vectors of parameters (\code{c(psiparams, lambdaparams)}). If specified, the function will not maximise likelihood but return the likelihood for the those parameters. See an example below.}
+
+    \item{L1}{Logical specifying whether the length of observations (\code{L}) are purposefully set to 1 (\code{L1=TRUE}) or not (\code{L1=FALSE}).}
+
+    \item{\dots}{Additional arguments to pass to \code{\link{optim}}, such as lower and upper bounds or a list of control parameters.}
+  }
+
+\description{This function fits a single season occupancy model using count data.}
+
+\details{
+  
+  See \code{\link{unmarkedFrameCOP}} for a description of how to supply data to the \code{data} argument. See \code{\link{unmarkedFrame}} for a more general documentation of \code{unmarkedFrame} objects for the different models implemented in \pkg{unmarked}.
+  
+  \subsection{The COP occupancy model}{
+    
+    \code{occuCOP} fits a single season occupancy model using count data, as described in Pautrel et al. (2023). 
+    
+    The \strong{occupancy sub-model} is:
+    
+    \deqn{z_i \sim \text{Bernoulli}(\psi_i)}{z_i ~ Bernoulli(psi_i)}
+    
+    \itemize{
+      \item With \eqn{z_i}{z_i} the occupany state of site \eqn{i}{i}. \eqn{z_i=1}{z_i = 1} if site \eqn{i}{i} is occupied by the species, \emph{i.e.} if the species is present in site \eqn{i}{i}. \eqn{z_i=0}{z_i = 0} if site \eqn{i}{i} is not occupied.
+      \item With \eqn{\psi_i}{psi_i} the occupancy probability of site \eqn{i}{i}.
+    }
+    
+    The \strong{observation sub-model} is:
+    
+    \deqn{
+      N_{ij} | z_i = 1 \sim \text{Poisson}(\lambda_{ij} L_{ij}) \\
+      N_{ij} | z_i = 0 \sim 0
+    }{
+      N_ij | z_i = 1 ~ Poisson(lambda_is*L_is)
+      N_ij | z_i = 0 ~ 0
+    }
+    
+    \itemize{
+      \item With \eqn{N_{ij}}{N_ij} the count of detection events in site \eqn{i}{i} during observation \eqn{j}{j}.
+      \item With \eqn{\lambda_{ij}}{lambda_ij} the detection rate in site \eqn{i}{i} during observation \eqn{j}{j} (\emph{for example, 1 detection per day.}).
+      \item With \eqn{L_{ij}}{L_ij} the length of observation \eqn{j}{j} in site \eqn{i}{i} (\emph{for example, 7 days.}).
+    }
+    
+    What we call "observation" (\eqn{j}{j}) here can be a sampling occasion, a transect, a discretised session. Consequently, the unit of \eqn{\lambda_{ij}}{lambda_ij} and \eqn{L_{ij}}{L_ij} can be either a time-unit (day, hour, ...) or a space-unit (kilometer, meter, ...).
+  }
+  
+  \subsection{The transformation of parameters \eqn{\psi} and \eqn{\lambda}}{
+    In order to perform unconstrained optimisation, parameters are transformed.
+    
+    The occupancy probability (\eqn{\psi}) is transformed with the logit function (\code{psi_transformed = qlogis(psi)}). It can be back-transformed with the "inverse logit" function (\code{psi = plogis(psi_transformed)}).
+
+    The detection rate (\eqn{\lambda}) is transformed with the log function (\code{lambda_transformed = log(lambda)}). It can be back-transformed with the exponential function (\code{lambda = exp(lambda_transformed)}).
+  }
+  
+}
+
+\value{\code{unmarkedFitCOP} object describing the model fit. See the \code{\linkS4class{unmarkedFit}} classes.}
+
+\references{
+
+Pautrel, L., Moulherat, S., Gimenez, O. & Etienne, M.-P. Submitted. \emph{Analysing biodiversity observation data collected in continuous time: Should we use discrete or continuous-time occupancy models?} Preprint at \href{https://doi.org/10.1101/2023.11.17.567350}{https://doi.org/10.1101/2023.11.17.567350}.
+
+}
+
+\author{Léa Pautrel}
+
+\seealso{
+  \code{\link{unmarked}}, 
+  \code{\link{unmarkedFrameCOP}},
+  \code{\link{unmarkedFit-class}}
+}
+
+
+\examples{
+set.seed(123)
+options(max.print = 50)
+
+# We simulate data in 100 sites with 3 observations of 7 days per site.
+nSites <- 100
+nObs <- 3
+
+# For an occupancy covariate, we associate each site to a land-use category.
+landuse <- sample(factor(c("Forest", "Grassland", "City"), ordered = T), 
+                  size = nSites, replace = T)
+simul_psi <- ifelse(landuse == "Forest", 0.8, 
+                    ifelse(landuse == "Grassland", 0.4, 0.1))
+z <- rbinom(n = nSites, size = 1, prob = simul_psi)
+
+# For a detection covariate, we create a fake wind variable.
+wind <- matrix(rexp(n = nSites * nObs), nrow = nSites, ncol = nObs)
+simul_lambda <- wind / 5
+L = matrix(7, nrow = nSites, ncol = nObs)
+
+# We now simulate count detection data
+y <- matrix(rpois(n = nSites * nObs, lambda = simul_lambda * L), 
+            nrow = nSites, ncol = nObs) * z
+
+# We create our unmarkedFrameCOP object
+umf <- unmarkedFrameCOP(
+  y = y,
+  L = L,
+  siteCovs = data.frame("landuse" = landuse),
+  obsCovs = list("wind" = wind)
+)
+print(umf)
+
+# We fit our model without covariates
+fitNull <- occuCOP(data = umf)
+print(fitNull)
+
+# We fit our model with covariates
+fitCov <- occuCOP(data = umf, psiformula = ~ landuse, lambdaformula = ~ wind)
+print(fitCov)
+
+# We back-transform the parameter's estimates
+## Back-transformed occupancy probability with no covariates
+backTransform(fitNull, "psi")
+
+## Back-transformed occupancy probability depending on habitat use
+predict(fitCov,
+        "psi",
+        newdata = data.frame("landuse" = c("Forest", "Grassland", "City")),
+        appendData = TRUE)
+
+## Back-transformed detection rate with no covariates
+backTransform(fitNull, "lambda")
+
+## Back-transformed detection rate depending on wind
+predict(fitCov,
+        "lambda",
+        appendData = TRUE)
+
+## This is not easily readable. We can show the results in a clearer way, by:
+##  - adding the site and observation
+##  - printing only the wind covariate used to get the predicted lambda
+cbind(
+  data.frame(
+    "site" = rep(1:nSites, each = nObs),
+    "observation" = rep(1:nObs, times = nSites),
+    "wind" = getData(fitCov)@obsCovs
+  ),
+  predict(fitCov, "lambda", appendData = FALSE)
+)
+
+# We can choose the initial parameters when fitting our model.
+# For psi, intituively, the initial value can be the proportion of sites 
+#          in which we have observations.
+(psi_init <- mean(rowSums(y) > 0))
+
+# For lambda, the initial value can be the mean count of detection events 
+#             in sites in which there was at least one observation.
+(lambda_init <- mean(y[rowSums(y) > 0, ]))
+
+# We have to transform them.
+occuCOP(
+  data = umf,
+  psiformula = ~ 1,
+  lambdaformula = ~ 1,
+  psistarts = qlogis(psi_init),
+  lambdastarts = log(lambda_init)
+)
+
+# If we have covariates, we need to have the right length for the start vectors.
+# psi ~ landuse --> 3 param to estimate: Intercept, landuseForest, landuseGrassland
+# lambda ~ wind --> 2 param to estimate: Intercept, wind
+occuCOP(
+  data = umf,
+  psiformula = ~ landuse,
+  lambdaformula = ~ wind,
+  psistarts = rep(qlogis(psi_init), 3),
+  lambdastarts = rep(log(lambda_init), 2)
+)
+
+# And with covariates, we could have chosen better initial values, such as the
+# proportion of sites in which we have observations per land-use category.
+(psi_init_covs <- c(
+  "City" = mean(rowSums(y[landuse == "City", ]) > 0),
+  "Forest" = mean(rowSums(y[landuse == "Forest", ]) > 0),
+  "Grassland" = mean(rowSums(y[landuse == "Grassland", ]) > 0)
+))
+occuCOP(
+  data = umf,
+  psiformula = ~ landuse,
+  lambdaformula = ~ wind,
+  psistarts = qlogis(psi_init_covs))
+
+# We can fit our model with a different optimisation algorithm.
+occuCOP(data = umf, method = "Nelder-Mead")
+
+# We can run our model with a C++ or with a R likelihood function.
+## They give the same result. 
+occuCOP(data = umf,engine = "C", psistarts = 0, lambdastarts = 0)
+occuCOP(data = umf, engine = "R", psistarts = 0, lambdastarts = 0)
+
+## The C++ (the default) is faster.
+system.time(occuCOP(data = umf,engine = "C", psistarts = 0, lambdastarts = 0))
+system.time(occuCOP(data = umf, engine = "R", psistarts = 0, lambdastarts = 0))
+
+## However, if you want to understand how the likelihood is calculated,
+## you can easily access the R likelihood function.
+print(occuCOP(data = umf, engine = "R", psistarts = 0, lambdastarts = 0)@nllFun)
+
+# Finally, if you do not want to fit your model but only get the likelihood,
+# you can get the negative log-likelihood for a given set of parameters.
+occuCOP(data = umf, get.NLL.params = list(
+  c("psi" = qlogis(0.25), "lambda" = log(2)),
+  c("psi" = qlogis(0.5), "lambda" = log(1)),
+  c("psi" = qlogis(0.75), "lambda" = log(0.5))
+))
+}
+
+\keyword{models}
diff --git a/man/unmarkedFrame-class.Rd b/man/unmarkedFrame-class.Rd
index 9df4157..bf323cf 100644
--- a/man/unmarkedFrame-class.Rd
+++ b/man/unmarkedFrame-class.Rd
@@ -98,6 +98,7 @@ argument of the fitting functions.
     \item{numSites}{\code{signature(object = "unmarkedFrame")}: extract M }
     \item{numY}{\code{signature(object = "unmarkedFrame")}: extract
       ncol(y) }
+    \item{getL}{\code{signature(object = "unmarkedFrame")}: extract L }
     \item{obsCovs}{\code{signature(object = "unmarkedFrame")}: extract
 		observation-level covariates }
     \item{obsCovs<-}{\code{signature(object = "unmarkedFrame")}: add or
diff --git a/man/unmarkedFrameCOP.Rd b/man/unmarkedFrameCOP.Rd
new file mode 100644
index 0000000..937dcbe
--- /dev/null
+++ b/man/unmarkedFrameCOP.Rd
@@ -0,0 +1,100 @@
+\name{unmarkedFrameCOP}
+\alias{unmarkedFrameCOP}
+
+\title{Organize data for the occupancy model using count data fit by \code{occuCOP}}
+
+\usage{unmarkedFrameCOP(y, L, siteCovs = NULL, obsCovs = NULL, mapInfo = NULL)}
+
+\description{Organizes count data along with the covariates. The \linkS4class{unmarkedFrame} S4 class required by the \code{data} argument of \code{\link{occuCOP}}.}
+
+\arguments{
+  
+  \item{y}{An MxJ matrix of the count data, where M is the number of sites, J is the maximum number of observation periods (sampling occasions, transects, discretised sessions...) per site.}
+
+  \item{L}{An MxJ matrix of the length of the observation periods. For example, duration of the sampling occasion in hours, duration of the discretised session in days, or length of the transect in meters.}
+  
+  \item{siteCovs}{A \code{\link{data.frame}} of covariates that vary at the site level. This should have M rows and one column per covariate}
+  
+  \item{obsCovs}{A named list of dataframes of dimension MxJ, with one dataframe per covariate that varies between sites and observation periods}
+  
+  \item{mapInfo}{Currently ignored}
+}
+
+\details{
+    unmarkedFrameCOP is the \linkS4class{unmarkedFrame} S4 class that holds data to be passed to the \code{\link{occuCOP}} model-fitting function.
+}
+
+\value{an object of class \code{unmarkedFrameCOP}}
+
+\seealso{
+  \code{\link{unmarkedFrame-class}}, 
+  \code{\link{unmarkedFrame}}, 
+  \code{\link{occuCOP}}
+}
+
+\examples{
+# Fake data
+M <- 4 # Number of sites
+J <- 3 # Number of observation periods
+
+# Count data
+(y <- matrix(
+  c(1, 3, 0,
+    0, 0, 0,
+    2, 0, 5,
+    1, NA, 0),
+  nrow = M,
+  ncol = J,
+  byrow = T
+))
+
+# Length of observation periods
+(L <- matrix(
+  c(1, 3, NA,
+    2, 2, 2,
+    1, 2, 1,
+    7, 1, 3),
+  nrow = M,
+  ncol = J,
+  byrow = T
+))
+
+# Site covariates
+(site.covs <- data.frame(
+  "elev" = rexp(4),
+  "habitat" = factor(c("forest", "forest", "grassland", "grassland"))
+))
+
+# Observation covariates (as a list)
+(obs.covs.list <- list(
+  "rain" = matrix(rexp(M * J), nrow = M, ncol = J),
+  "wind" = matrix(
+    sample(letters[1:3], replace = T, size = M * J),
+    nrow = M, ncol = J)
+))
+
+# Organise data in a unmarkedFrameCOP object
+umf <- unmarkedFrameCOP(
+  y = y,
+  L = L,
+  siteCovs = site.covs,
+  obsCovs = obs.covs.list
+)
+
+# Look at data
+print(umf) # Print the whole data set
+print(umf[1, 2]) # Print the data of the 1st site, 2nd observation
+summary(umf) # Summarise the data set
+plot(umf) # Plot the count of detection events
+
+
+# L is optional, if absent, it will be replaced by a MxJ matrix of 1
+unmarkedFrameCOP(
+  y = y,
+  siteCovs = site.covs,
+  obsCovs = obs.covs.list
+)
+
+# Covariates are optional
+unmarkedFrameCOP(y = y)
+}
diff --git a/tests/testthat/test_occuCOP.R b/tests/testthat/test_occuCOP.R
index ffecae0..d19f390 100644
--- a/tests/testthat/test_occuCOP.R
+++ b/tests/testthat/test_occuCOP.R
@@ -2,9 +2,9 @@ context("occuCOP fitting function")
 skip_on_cran()
 
 COPsimul <- function(psi = 0.5,
-                         lambda = 1,
-                         M = 100,
-                         J = 5) {
+                     lambda = 1,
+                     M = 100,
+                     J = 5) {
   
   z_i <- sample(
     x = c(0, 1),
author	lpautrel <lea.pautrel@terroiko.fr>	2023-12-07 15:00:28 +0100
committer	lpautrel <lea.pautrel@terroiko.fr>	2023-12-07 15:00:28 +0100
commit	90eaf7282e3527951b522843d4389c8846a3dcbb (patch)
tree	c81cf18ac4a5fd799264a6f85e5bac8c95d2007c
parent	0fa0a8986f0683df79035e0bafee54bfc3a532f3 (diff)