diff options
author | lpautrel <lea.pautrel@terroiko.fr> | 2023-11-28 12:33:03 +0100 |
---|---|---|
committer | lpautrel <lea.pautrel@terroiko.fr> | 2023-11-28 12:33:03 +0100 |
commit | 92d8b8fa68eb7c1ed7c1f92e139cd401c8e4671b (patch) | |
tree | 5456815ef1ee92c94f1576d41d36897ad7fd1e57 | |
parent | 436977a45966b76d1290ef627e71797485803697 (diff) |
Init occuCOP
-rw-r--r-- | R/occuCOP.R | 747 | ||||
-rw-r--r-- | test_occuCOP.R | 369 | ||||
-rw-r--r-- | tmp_use_COP.Rmd | 584 |
3 files changed, 1700 insertions, 0 deletions
diff --git a/R/occuCOP.R b/R/occuCOP.R new file mode 100644 index 0000000..9a8444a --- /dev/null +++ b/R/occuCOP.R @@ -0,0 +1,747 @@ + +# Prerequisites (while its not integrated within the package) +library(unmarked) +source("R/unmarkedFrame.R") +source("R/utils.R") +source("R/unmarkedEstimate.R") +source("R/unmarkedFitList.R") +source("R/predict.R") +source("R/getDesign.R") +source("R/mixedModelTools.R") +# sapply(paste0("./R/", list.files("./R/")), source) + +library(lattice) +library(testthat) + +# Fit the occupancy model COP +# (Counting Occurrences Process) + +# Occupancy +# z_i ~ Bernoulli(psi) +# +# with: +# z_i = Occupancy state of site i +# = 1 if the site i is occupied +# = 0 else + +# Detection +# N_ijs | z_i = 1 ~ Poisson(lambda*T_s) +# N_ijs | z_i = 0 ~ 0 +# +# with: +# N_is = Number of detections of site i during session s +# z_i = Occupancy state of site i +# lambda = Detection rate +# T_s = Duration of the session s + +# CLASSES ---------------------------------------------------------------------- + +## unmarkedFrameCOP class ---- +setClass( + "unmarkedFrameCOP", + representation(obsLength = "matrix"), + contains = "unmarkedFrame", + validity = function(object) { + errors <- character(0) + M <- nrow(object@y) + J <- ncol(object@y) + y_integers = sapply(object@y, check.integer, na.ignore = T) + if (!all(y_integers)) { + errors <- c(errors, + paste( + "Count detection should be integers. Non-integer values:", + paste(object@y[which(!y_integers)], collapse = ', ') + )) + } + if (!all(all(dim(object@obsLength) == dim(object@y)))){ + errors <- c( errors, paste( + "obsLength should be a matrix of the same dimension as y, with M =", M, + "rows (sites) and J =", J, "columns (sampling occasions)." + ))} + if (length(errors) == 0) TRUE + else errors + } +) + +## unmarkedFitCOP class ---- +setClass("unmarkedFitCOP", + representation(removed_obs = "matrix", + formlist = "list", + nll = "optionalNumeric", + convergence="optionalNumeric"), + contains = "unmarkedFit") + + +# METHODS ---------------------------------------------------------------------- + +## getDesign method ---- +# Example for occu: https://github.com/rbchan/unmarked/blob/536e32ad7b2526f8fac1b315ed2e99accac0d50f/R/getDesign.R#L10-L97 +# Example for GDR: https://github.com/rbchan/unmarked/blob/c82e63947d7df7dfc896066e51dbf63bda3babf4/R/gdistremoval.R#L177-L253 +setMethod( + "getDesign", "unmarkedFrameCOP", + function(umf, formlist, na.rm = TRUE) { + + # Retrieve useful informations from umf + M <- numSites(umf) + J <- obsNum(umf) + y <- getY(umf) + obsLength <- umf@obsLength + + # Occupancy submodel ------------------------------------------------------- + + # Retrieve the fixed-effects part of the formula + psiformula <- lme4::nobars(as.formula(formlist$psiformula)) + psiVars <- all.vars(psiformula) + + # Retrieve the site covariates + sc <- siteCovs(umf) + if(is.null(sc)) { + sc <- data.frame(.dummy = rep(0, M)) + } + + # Check for missing variables + psiMissingVars <- psiVars[!(psiVars %in% names(sc))] + if (length(psiMissingVars) > 0) { + stop(paste( + "Variable(s)", + paste(psiMissingVars, collapse = ", "), + "not found in siteCovs" + ), call. = FALSE) + } + + # State model matrix + Xpsi <- model.matrix( + psiformula, + model.frame(psiformula, sc, na.action = NULL) + ) + Zpsi <- get_Z(formlist$psiformula, sc) + + # Detection submodel ------------------------------------------------------- + + # Retrieve the fixed-effects part of the formula + lambdaformula <- lme4::nobars(as.formula(formlist$lambdaformula)) + lambdaVars <- all.vars(lambdaformula) + + # Retrieve the observation covariates + oc <- obsCovs(umf) + if(is.null(oc)) { + oc <- data.frame(.dummy = rep(0, M*J)) + } + + # Check for missing variables + lambdaMissingVars <- lambdaVars[!(lambdaVars %in% names(oc))] + if (length(lambdaMissingVars) > 0) { + stop(paste( + "Variable(s)", + paste(lambdaMissingVars, collapse = ", "), + "not found in obsCovs" + ), call. = FALSE) + } + + # Detection model matrix + Xlambda <- model.matrix( + lambdaformula, + model.frame(lambdaformula, oc, na.action = NULL) + ) + Zlambda <- get_Z(formlist$lambdaformula, oc) + + # Missing data ------------------------------------------------------------- + + # Missing count data + missing_y <- is.na(y) + + # Missing site covariates + # (TRUE if at least one site covariate is missing in a site) + missing_sc <- apply(Xpsi, 1, function(x) any(is.na(x))) + + # Missing observation covariates + # (TRUE if at least one observation covariate is missing in a sampling occasion in a site) + missing_oc <- apply(Xlambda, 1, function(x) any(is.na(x))) + + # Matrix MxJ of values to not use because there is some data missing + removed_obs <- + # If there is count data missing in site i and obs j + missing_y | + # If there is any site covariate missing in site i + replicate(n = J, missing_sc) | + # If there is any observation covariate missing in site i and obs j + matrix(missing_oc, M, J, byrow = T) + + if (any(removed_obs)) { + if (na.rm) { + nb_missing_sites <- sum(apply(removed_obs, 1, function(x) all(is.na(x)))) + nb_missing_observations <- sum(is.na(removed_obs)) + warning("There are missing data (count data, site covariate, observation covariate):\n\t", + "Only data from ", M-nb_missing_sites, " sites out of ", M, " are used.\n\t", + "Only data from ", (M*J)-nb_missing_sites, " observations out of ", (M*J), " are used.") + } else { + stop("There is missing data:\n\t", + sum(missing_y), " missing count data (y)\n\t", + sum(missing_sc), " missing site covariates (siteCovs)\n\t", + sum(missing_oc), " missing observation covariates (obsCovs)") + } + } + + # Output ------------------------------------------------------------------- + return(list( + y = y, + Xpsi = Xpsi, + Zpsi = Zpsi, + Xlambda = Xlambda, + Zlambda = Zlambda, + removed_obs = removed_obs + )) + }) + + +## show method ---- +setMethod("show", "unmarkedFrameCOP", function(object) { + J <- ncol(object@obsLength) + df_unmarkedFrame <- as(object, "data.frame") + df_obsLength <- data.frame(object@obsLength) + colnames(df_obsLength) <- paste0("obsLength.", 1:J) + if (ncol(df_unmarkedFrame) > J) { + df <- + cbind(df_unmarkedFrame[, 1:J], df_obsLength, df_unmarkedFrame[, (J + 1):ncol(df_unmarkedFrame)]) + } else{ + df <- + cbind(df_unmarkedFrame[, 1:J], df_obsLength) + } + cat("Data frame representation of unmarkedFrame object.\n") + print(df) +}) +# LP note: as is defined in unmarkedFrame.R part "COERCION" + + +## summary method ---- +setMethod("summary", "unmarkedFrameCOP", function(object,...) { + cat("unmarkedFrameCOP Object\n\n") + + cat(nrow(object@y), "sites\n") + cat("Maximum number of sampling occasions per site:",obsNum(object),"\n") + mean.obs <- mean(rowSums(!is.na(getY(object)))) + cat("Mean number of sampling occasions per site:",round(mean.obs,2),"\n") + cat("Sites with at least one detection:", + sum(apply(getY(object), 1, function(x) any(x > 0, na.rm=TRUE))), + "\n\n") + + cat("Tabulation of y observations:") + print(table(object@y, exclude=NULL)) + + if(!is.null(object@obsLength)) { + cat("\nTabulation of sampling occasions length:") + print(table(object@obsLength)) + } + + if(!is.null(object@siteCovs)) { + cat("\nSite-level covariates:\n") + print(summary(object@siteCovs)) + } + + if(!is.null(object@obsCovs)) { + cat("\nObservation-level covariates:\n") + print(summary(object@obsCovs)) + } + +}) + +## umf[i, j] ---- +setMethod("[", c("unmarkedFrameCOP", "numeric", "numeric", "missing"), + function(x, i, j) { + # Gey dimensions of x + M <- numSites(x) + J <- obsNum(x) + + if (length(i) == 0 & length(j) == 0) { + return(x) + } + + # Check i + if (any(i < 0) && + any(i > 0)) { + stop("i must be all positive or all negative indices.") + } + if (all(j < 0)) { + i <- (1:M)[i] + } + + # Check j + if (any(j < 0) && + any(j > 0)) { + stop("j must be all positive or all negative indices.") + } + if (all(j < 0)) { + j <- (1:J)[j] + } + + # y observation count data subset + y <- getY(x)[i, j] + if (min(length(i), length(j)) == 1) { + y <- t(y) + } + + # obsLength subset + obsLength <- x@obsLength[i, j] + if (min(length(i), length(j)) == 1) { + obsLength <- t(obsLength) + } + + # siteCovs subset + siteCovs <- siteCovs(x) + if (!is.null(siteCovs)) { + siteCovs <- siteCovs(x)[i, , drop = FALSE] + } + + # obsCovs subset + obsCovs <- obsCovs(x) + if (!is.null(obsCovs)) { + MJ_site <- rep(1:M, each = J) + MJ_obs <- rep(1:J, times = M) + obsCovs <- obsCovs[((MJ_obs %in% j) & (MJ_site %in% i)), ] + rownames(obsCovs) <- NULL + } + + # Recreate umf + new( + Class = "unmarkedFrameCOP", + y = y, + obsLength = obsLength, + siteCovs = siteCovs, + obsCovs = obsCovs, + obsToY = diag(length(j)), + mapInfo = x@mapInfo + ) + }) + + +## umf[i, ] ---- +setMethod("[", c("unmarkedFrameCOP", "numeric", "missing", "missing"), + function(x, i) { + x[i, 1:obsNum(x)] + }) + +## umf[, j] ---- +setMethod("[", c("unmarkedFrameCOP", "missing", "numeric", "missing"), + function(x, j) { + x[1:numSites(x), j] + }) + + +## fl_getY ---- +setMethod("fl_getY", "unmarkedFitCOP", function(fit, ...){ + getDesign(getData(fit), fit@formlist)$y +}) + +# PREDICT METHODS ---- +# Fit type-specific methods to generate different components of prediction +# 5. predict_by_chunk(): Take inputs and generate predictions +# Basic methods are shown below; fit type-specific methods in their own sections + +## predict_inputs_from_umf ---- +setMethod("predict_inputs_from_umf", "unmarkedFitCOP", + function(object, type, newdata, na.rm, re.form = NULL) { + designMats = getDesign(umf = newdata, + formlist = object@formlist, + na.rm = na.rm) + if (type == "psi") list_els <- c("Xpsi", "Zpsi") + if (type == "lambda") list_els <- c("Xlambda", "Zlambda") + X <- designMats[[list_els[1]]] + if (is.null(re.form)) X <- cbind(X, designMats[[list_els[2]]]) + return(list(X = X, offset = NULL)) + }) + +## get_formula ---- +setMethod("get_formula", "unmarkedFitCOP", function(object, type, ...) { + fl <- object@formlist + switch(type, psi = fl$psiformula, lambda = fl$lambdaformula) +}) + +## get_orig_data ---- +setMethod("get_orig_data", "unmarkedFitCOP", function(object, type, ...){ + clean_covs <- clean_up_covs(object@data, drop_final=FALSE) + datatype <- switch(type, psi = 'site_covs', lambda = 'obs_covs') + clean_covs[[datatype]] +}) + + + +# Useful functions ------------------------------------------------------------- + +replaceinf = function(x) { + max(.Machine$double.xmin, min(.Machine$double.xmax, x)) +} + +check.integer <- function(x, na.ignore = F) { + if (is.na(x) & na.ignore) { + return(T) + } + x %% 1 == 0 +} + +# unmarkedFrame ---------------------------------------------------------------- + +unmarkedFrameCOP <- function(y, obsLength, siteCovs = NULL, obsCovs = NULL, mapInfo = NULL) { + + # Verification that they are non-NA data in y + if (all(is.na(y))) { + stop("y only contains NA. y needs to contain non-NA integers.") + } + + # Verification that these are count data (and not detection/non-detection) + if (max(y, na.rm = T) == 1) { + warning("unmarkedFrameCOP is for count data. ", + "The data furnished appear to be detection/non-detection.") + } + + # Number of sampling occasions + J <- ncol(y) + + # If missing obsLength: replace by matrix of 1 + # and the lambda will be the detection rate per observation length + if (missing(obsLength)) { + obsLength <- y * 0 + 1 + warning("obsLength is missing, replacing it by a matrix of 1.") + } else if (is.null(obsLength)) { + obsLength <- y * 0 + 1 + warning("obsLength is missing, replacing it by a matrix of 1.") + } + + # Transformation observation covariates + obsCovs <- covsToDF( + covs = obsCovs, + name = "obsCovs", + obsNum = J, + numSites = nrow(y) + ) + + # Create S4 object of class unmarkedFrameCOP + umf <- new( + Class = "unmarkedFrameCOP", + y = y, + obsLength = obsLength, + siteCovs = siteCovs, + obsCovs = obsCovs, + obsToY = diag(J), + mapInfo = mapInfo + ) + + return(umf) +} + + +# occuCOP ---------------------------------------------------------------------- + +occuCOP <- function(data, + psiformula = ~1, + lambdaformula = ~1, + psistarts = rep(0, length(attr(terms(psiformula), "term.labels")) + 1), + lambdastarts = rep(0, length(attr(terms(lambdaformula), "term.labels")) + 1), + method = "BFGS", + se = TRUE, + engine = "R", + threads = 1L, + na.rm = TRUE, + get.NLL.params = NULL, + ...) { + #TODO: engines + #TODO: NA + + # Neg loglikelihood COP ------------------------------------------------------ + nll_COP <- function(params) { + + # Reading and transforming params + # Taking into account the covariates + + # psi as a function of covariates + # psi in params are transformed with a logit transformation (qlogis) + # so they're back-transformed to a proba with inverse logit (plogis) + # Xpsi is the matrix with occupancy covariates + # params is the vector with all the parameters + # psiIdx is the index of Occupancy Parameters in params + psi <- plogis(Xpsi %*% params[psiIdx]) + + # lambda as a function of covariates + # lambda in params are transformed with a log-transformation + # so they're back-transformed to a rate with exp here + # Xlambda is the matrix with detection covariates + # params is the vector with all the parameters + # lambdaIdx is the index of Occupancy Parameters in params + lambda <- exp(Xlambda %*% params[lambdaIdx]) + + # Probability for each site (i) + iProb <- rep(NA, M) + + for (i in 1:M) { + # Probability for the site i for each sampling occasion (j) + ijProb <- rep(NA, J) + + # Total obs length in site i + obsLengthSitei <- sum(data@obsLength[i,]) + + # iIdx is the index to access the vectorised vectors of all obs in site i + iIdx <- ((i - 1) * J + 1):(i * J) + + if (SitesWithDetec[i]) { + # If there is at least one detection in site i + # factNij = sapply(factorial(yvec[iIdx]),replaceinf) + # iProb[i] = psi[i] * sum(lambda[iIdx] * obsLengthvec[iIdx] / factNij * exp(-lambda[iIdx] * obsLengthvec[iIdx])) + + + iProb[i] = psi[i] * ( + (sum(lambda[iIdx] * obsLengthvec[iIdx])) ^ sum(yvec[iIdx]) / + factorial(sum(yvec[iIdx])) * + exp(-sum(lambda[iIdx] * obsLengthvec[iIdx])) + ) + + } else { + # If there is zero detection in site i + # iProb[i] = psi[i] * sum(exp(-lambda[iIdx] * obsLengthvec[iIdx])) + (1 - psi[i]) + + iProb[i] = psi[i] * exp(-sum(lambda[iIdx] * obsLengthvec[iIdx])) + (1 - psi[i]) + + } + + } + # Note: Why is there "replaceinf(factorial(data@y[i,]))" + # instead of just "factorial(data@y[i,])"? + # Because if there are a lot of detections (n_ij >= 171 on my machine), + # then factorial(n_ij) = Inf, + # then 1/factorial(n_ij) = 0, + # then log(0) = -Inf, + # then loglikelihood = -Inf + # then optim does not work. + # + # We want to maximise likelihood, + # (technically, minimise negative loglikelihood). + # We don't do anything else with this function. + # So as long as we have a tiny value for likelihood, + # (ie a huge value for negative loglikelihood) + # it doesn't matter if its an approximation. + + # log-likelihood + ll = sum(log(iProb)) + return(-ll) + } + + # Check arguments ------------------------------------------------------------ + if (!is(data, "unmarkedFrameCOP")) { + stop("Data is not an unmarkedFrameCOP object. See ?unmarkedFrameCOP if necessary.") + } + stopifnot(class(psiformula) == "formula") + stopifnot(class(lambdaformula) == "formula") + stopifnot(class(psistarts) %in% c("numeric", "double", "integer")) + stopifnot(class(lambdastarts) %in% c("numeric", "double", "integer")) + stopifnot(class(threads) %in% c("numeric", "double", "integer")) + se = as.logical(match.arg( + arg = as.character(se), + choices = c("TRUE", "FALSE", "0", "1") + )) + na.rm = as.logical(match.arg( + arg = as.character(na.rm), + choices = c("TRUE", "FALSE", "0", "1") + )) + engine <- match.arg(engine, c("R")) + + # Format input data ---------------------------------------------------------- + + # Retrieve formlist + formlist <- mget(c("psiformula", "lambdaformula")) + + # Get the design matrix (calling the getDesign method for unmarkedFrame) + # For more informations, see: getMethod("getDesign", "unmarkedFrameCOP") + designMats <- getDesign(umf = data, formlist = formlist, na.rm = na.rm) + + # y is the count detection data (matrix of size M sites x J sessions) + y <- getY(data) + + # obsLength is the length of sessions (matrix of size M sites x J sessions) + obsLength = data@obsLength + + # Xpsi is the matrix with the occupancy covariates + Xpsi <- designMats$Xpsi + + # Xlambda is the matrix with the detection covariates + Xlambda <- designMats$Xlambda + + # Zpsi is ??? + Zpsi <- designMats$Zpsi + + # Zlambda is ??? + Zlambda <- designMats$Zlambda + + # removed_obs is a M x J matrix of the observations removed from the analysis + removed_obs <- designMats$removed_obs + sitesRemoved <- unname(which(apply(removed_obs, 1, function(x) all(x)))) + + # Number of sites + M <- nrow(y) + + # Number of sampling occasions + J <- ncol(y) + + # Total number of detection per site + NbDetecPerSite = rowSums(y) + + # Sites where there was at least one detection + SitesWithDetec = NbDetecPerSite > 0 + + # Number of sites where there was at least one detection + NbSitesWithDetec = sum(SitesWithDetec) + + # Set up parameter names and indices----------------------------------------- + + # ParamPsi Occupancy parameter names + ParamPsi <- colnames(Xpsi) + + # ParamLambda Detection parameter names + ParamLambda <- colnames(Xlambda) + + # NbParamPsi Number of occupancy parameters + NbParamPsi <- ncol(Xpsi) + + # NbParamLambda Number of detection parameters + NbParamLambda <- ncol(Xlambda) + + # nP Total number of parameters + nP <- NbParamPsi + NbParamLambda + + # psiIdx Index of the occupancy parameters + psiIdx <- 1:NbParamPsi + + # lambdaIdx Index of the detection parameters + lambdaIdx <- (NbParamPsi+1):nP + + # Re-format some variables for C and R engines + # From Matrix of dim MxJ to vector of length MxJ: + # c(ySite1Obs1, ySite1Obs2, ..., ySite1ObsJ, ysite2Obs1, ...) + yvec <- as.numeric(t(y)) + obsLengthvec <- as.numeric(t(obsLength)) + + # get.NLL.params ------------------------------------------------------------- + if (!is.null(get.NLL.params)) { + df_NLL = data.frame(t(as.data.frame(get.NLL.params))) + rownames(df_NLL) = NULL + colnames(df_NLL) = c(paste0("logit(psi).", ParamPsi), + paste0("log(lambda).", ParamLambda)) + df_NLL$nll = NA + for (i in 1:nrow(df_NLL)) { + df_NLL$nll[i] = nll_COP(params = as.numeric(as.vector(df_NLL[i, -ncol(df_NLL)]))) + } + return(df_NLL) + } + + # Optimisation --------------------------------------------------------------- + + ## Checking the starting point for optim + if (missing(lambdastarts)) { + # If lambda starts argument was not given: + # 0 by default + # so lambda = exp(0) = 1 by default + lambdastarts = rep(0, NbParamLambda) + message( + "No lambda initial values provided for optim. Using lambdastarts = c(", + paste(lambdastarts, collapse = ", "), + "), equivalent to detection rate", + ifelse(length(lambdastarts) == 1, "", "s"), + " of 1." + ) + } else if (length(lambdastarts) != NbParamLambda) { + stop("lambdastarts (", paste(lambdastarts, collapse = ", "), ") ", + "should be of length ", NbParamLambda, " with lambdaformula ", lambdaformula) + } + + if (missing(psistarts)) { + # If psi starts argument was not given + # 0 by default + # so psi = plogis(0) = 0.5 by default + psistarts = rep(0, NbParamPsi) + message( + "No psi initial values provided for optim. Using psistarts = c(", + paste(psistarts, collapse = ", "), + "), equivalent to occupancy probabilit", + ifelse(length(psistarts) == 1, "y", "ies"), + " of 0.5." + ) + } else if (length(psistarts) != NbParamPsi) { + stop("psistarts (", paste(psistarts, collapse = ", "), ") ", + "should be of length ", NbParamPsi, " with psiformula ", psiformula) + } + + starts <- c(psistarts, lambdastarts) + + ## Run optim + opt <- optim( + starts, + nll_COP, + method = method, + hessian = se, + ... + ) + + # Get output ----------------------------------------------------------------- + covMat <- invertHessian(opt, nP, se) + ests <- opt$par + tmb_mod <- NULL + nll <- opt$value + fmAIC <- 2 * opt$value + 2 * nP + + # Organize effect estimates + names(ests) <- c(ParamPsi, ParamLambda) + psi_coef <- list(ests = ests[psiIdx], cov = as.matrix(covMat[psiIdx, psiIdx])) + lambda_coef <- list(ests = ests[lambdaIdx], + cov = as.matrix(covMat[lambdaIdx, lambdaIdx])) + + # No random effects + psi_rand_info <- lambda_rand_info <- list() + + # Create unmarkedEstimates --------------------------------------------------- + psi_uE <- unmarkedEstimate( + name = "Occupancy probability", + short.name = "psi", + estimates = psi_coef$ests, + covMat = psi_coef$cov, + fixed = 1:NbParamPsi, + invlink = "logistic", + invlinkGrad = "logistic.grad", + randomVarInfo = psi_rand_info + ) + + lambda_uE <- unmarkedEstimate( + name = "Detection rate", + short.name = "lambda", + estimates = lambda_coef$ests, + covMat = lambda_coef$cov, + fixed = 1:NbParamLambda, + invlink = "exp", + invlinkGrad = "exp", + randomVarInfo = lambda_rand_info + ) + + estimateList <- unmarkedEstimateList(list(psi = psi_uE, lambda = lambda_uE)) + + # Create unmarkedFit object-------------------------------------------------- + umfit <- new( + "unmarkedFitCOP", + fitType = "occuCOP", + call = match.call(), + # formula = as.formula(paste(formlist, collapse = "")), + formula = as.formula(paste( + formlist["lambdaformula"], formlist["psiformula"], collapse = "" + )), + formlist = formlist, + data = data, + estimates = estimateList, + sitesRemoved = sitesRemoved, + removed_obs = removed_obs, + AIC = fmAIC, + opt = opt, + negLogLike = opt$value, + nllFun = nll_COP, + nll = nll, + convergence = opt$convergence, + TMB = tmb_mod + ) + + return(umfit) +} + diff --git a/test_occuCOP.R b/test_occuCOP.R new file mode 100644 index 0000000..45fb0e0 --- /dev/null +++ b/test_occuCOP.R @@ -0,0 +1,369 @@ +context("occuCOP fitting function") +skip_on_cran() + +COPsimul <- function(psi = 0.5, + lambda = 1, + M = 100, + J = 5) { + + z_i <- sample( + x = c(0, 1), + size = M, + prob = c(1 - psi, psi), + replace = T + ) + + y = matrix(rpois(n = M * J, lambda = lambda), nrow = M, ncol = J) * z_i + + return(y) +} + + +# COPsimulfit <- function(psi = 0.5, +# lambda = 1, +# M = 100, +# J = 5, +# obsLength = NULL, +# ...) { +# occuCOP(data = unmarkedFrameCOP( +# y = simulDataCOP( +# psi = psi, +# lambda = lambda, +# M = M, +# J = J +# ), +# obsLength = obsLength +# ), ...) +# } + +test_that("unmarkedFrameCOP is constructed correctly", { + set.seed(123) + + # Simulate data + M = 100 + J = 5 + y = COPsimul(psi = .5, + lambda = 1, + M = M, + J = J) + obsLength = y * 0 + 1 + + psiCovs = data.frame( + "psiNum" = rnorm(M), + "psiInt" = as.integer(rpois(n = M, lambda = 5)), + "psiBool" = sample(c(T, F), size = M, replace = T), + "psiChar" = sample(letters[1:5], size = M, replace = T), + "psiFactUnord" = factor(sample( + letters[5:10], size = M, replace = T + )), + "psiFactOrd" = sample(factor(c("A", "B", "C"), ordered = T), size = + M, replace = T) + ) + + lambdaCovs = list( + "lambdaNum" = matrix( + rnorm(M * J), + nrow = M, ncol = J + ), + "lambdaInt" = matrix( + as.integer(rpois(n = M * J, lambda = 1e5)), + nrow = M, ncol = J + ), + "lambdaBool" = matrix( + sample(c(T, F), size = M * J, replace = T), + nrow = M, ncol = J + ), + "lambdaChar" = matrix( + sample(letters[1:5], size = M * J, replace = T), + nrow = M, ncol = J + ), + "lambdaFactUnord" = matrix( + factor(sample(letters[5:10], size = M * J, replace = T)), + nrow = M, ncol = J + ), + "lambdaFactOrd" = matrix( + sample(factor(c("A", "B", "C"), ordered = T), size = M * J, replace = T), + nrow = M, ncol = J + ) + ) + + + # Creating a unmarkedFrameCOP object + expect_warning(umf <- unmarkedFrameCOP(y = y)) + expect_no_error(umf <- unmarkedFrameCOP(y = y, obsLength = obsLength)) + + + # Create subsets + expect_no_error(umf_sub_i <- umf[1:3, ]) + expect_no_error(umf_sub_j <- umf[, 1:2]) + expect_no_error(umf_sub_ij <- umf[1:3, 1:2]) + + # unmarkedFrameCOP organisation ---------------------------------------------- + expect_true(inherits(umf, "unmarkedFrameCOP")) + expect_equivalent(numSites(umf_sub_i), 3) + expect_equivalent(obsNum(umf_sub_j), 2) + expect_equivalent(numSites(umf_sub_ij), 3) + expect_equivalent(obsNum(umf_sub_ij), 2) + + # unmarkedFrameCOP display --------------------------------------------------- + + # print method + expect_no_error(print(umf)) + expect_no_warning(print(umf)) + expect_no_message(print(umf)) + expect_no_error(print(umf_sub_i)) + expect_no_error(print(umf_sub_j)) + expect_no_error(print(umf_sub_ij)) + expect_no_error(print(umf[1,])) + expect_no_error(print(umf[,1])) + expect_no_error(print(umf[1,1])) + + # summary method for unmarkedFrameCOP + expect_no_error(summary(umf)) + expect_no_error(summary(umf_sub_ij)) + + # plot method for unmarkedFrameCOP + expect_no_error(plot(umf)) + expect_no_error(plot(umf_sub_ij)) + + + # Input handling: covariates ------------------------------------------------- + expect_no_error(umfCovs <- unmarkedFrameCOP( + y = y, + obsLength = obsLength, + siteCovs = psiCovs, + obsCovs = lambdaCovs + )) + expect_no_error(print(umfCovs)) + expect_no_error(summary(umfCovs)) + expect_no_error(plot(umfCovs)) + + # Input handling: NA --------------------------------------------------------- + + # NA should not pose problems when creating the unmarkedFrameCOP object. + # The warnings and potential errors will be displayed when fitting the model. + # Except when y only contains NA: then there's an error. + + ## NA in y + yNA <- y + yNA[1:2,] <- NA + yNA[3:5, 3:4] <- NA + yNA[,3] <- NA + expect_no_error(umfNA <- unmarkedFrameCOP(y = yNA, obsLength = obsLength)) + expect_no_error(print(umfNA)) + expect_no_error(summary(umfNA)) + expect_no_error(plot(umfNA)) + + ## NA in obsLength + obsLengthNA <- obsLength + obsLengthNA[1:2,] <- NA + obsLengthNA[3:5, 3:4] <- NA + obsLengthNA[,5] <- NA + expect_no_error(umfNA <- unmarkedFrameCOP(y = y, obsLength = obsLengthNA)) + expect_no_error(print(umfNA)) + expect_no_error(summary(umfNA)) + expect_no_error(plot(umfNA)) + + ## NA also in covariates + psiCovsNA <- psiCovs + psiCovsNA[1:5,] <- NA + psiCovsNA[c(8,10,12), 3] <- NA + psiCovsNA[,1] <- NA + lambdaCovsNA <- lambdaCovs + lambdaCovsNA[[1]][1:5,] <- NA + lambdaCovsNA[[1]][,3] <- NA + lambdaCovsNA[[3]][,5] <- NA + expect_no_error(umfCovsNA <- unmarkedFrameCOP( + y = yNA, + obsLength = obsLengthNA, + siteCovs = psiCovsNA, + obsCovs = lambdaCovsNA + )) + expect_no_error(print(umfCovsNA)) + expect_no_error(summary(umfCovsNA)) + expect_no_error(plot(umfCovsNA)) + + ## all NA in y + yallNA <- y + yallNA[1:M, 1:J] <- NA + expect_error(unmarkedFrameCOP(y = yallNA, obsLength = obsLength)) + + # Input handling: error and warnings ----------------------------------------- + # Error when there are decimals in y + y_with_decimals = y + y_with_decimals[1, 1] = .5 + expect_error(unmarkedFrameCOP(y = y_with_decimals, obsLength = obsLength)) + + # Warning if y is detection/non-detection instead of count + y_detec_nodetec = (y > 0) * 1 + expect_warning(unmarkedFrameCOP(y = y_detec_nodetec, obsLength = obsLength)) + + # Error if the dimension of obsLength is different than that of y + expect_error(unmarkedFrameCOP(y = y, obsLength = obsLength[1:2, 1:2])) +}) + + +test_that("occuCOP can fit simple models", { + # Simulate data + set.seed(123) + M = 100 + J = 5 + y = COPsimul(psi = .5, + lambda = 1, + M = M, + J = J) + obsLength = y * 0 + 1 + + # Create umf + umf <- unmarkedFrameCOP(y = y, obsLength = obsLength) + + # Fitting options ---- + + # With default parameters + expect_no_error(fit_default <- occuCOP(data = umf)) + expect_no_error(occuCOP(data = umf, psiformula = ~ 1, lambdaformula = ~ 1)) + + # With chosen starting points + expect_no_error(occuCOP(data = umf, + psiformula = ~ 1, lambdaformula = ~ 1, + psistarts = qlogis(.7), + lambdastarts = log(0.1))) + expect_error(occuCOP(data = umf, + psiformula = ~ 1, lambdaformula = ~ 1, + psistarts = qlogis(c(0.7, 0.5)))) + expect_error(occuCOP(data = umf, + psiformula = ~ 1, lambdaformula = ~ 1, + lambdastarts = log(c(1, 2)))) + + # With different options + expect_no_error(occuCOP(data = umf, method = "Nelder-Mead")) + expect_error(occuCOP(data = umf, method = "ABC")) + + expect_no_error(occuCOP(data = umf, se = F)) + expect_error(occuCOP(data = umf, se = "ABC")) + + expect_no_error(occuCOP(data = umf, engine = "R")) + expect_error(occuCOP(data = umf, engine = "julia")) + + expect_no_error(occuCOP(data = umf, na.rm = F)) + expect_error(occuCOP(data = umf, na.rm = "no")) + + # Looking at at COP model outputs ---- + expect_is(fit_default, "unmarkedFitCOP") + + ## backTransform + expect_no_error(backTransform(fit_default, type = "psi")) + expect_no_error(backTransform(fit_default, type = "lambda")) + expect_error(backTransform(fit_default, type = "state")) + expect_error(backTransform(fit_default, type = "det")) + expect_is(backTransform(fit_default, type = "psi"), "unmarkedBackTrans") + + ## predict with newdata = fit@data + expect_no_error(predict(object = fit_default, type = "psi")) + expect_no_error(predict(object = fit_default, type = "lambda")) + expect_error(predict(object = fit_default, type = "state")) + + ## predict with newdata = 1 + expect_no_error(predict( + object = fit_default, + newdata = data.frame(1), + type = "psi" + )) + expect_no_error(predict( + object = fit_default, + newdata = data.frame(1), + type = "lambda" + )) + expect_no_error(predict( + object = fit_default, + newdata = data.frame("a"=1:5,"b"=10:14), + type = "psi" + )) + + # Fitting accurately ---- + + ## psi = 0.50, lambda = 1 ---- + psi_test = .5 + lambda_test = 1 + fit_accur <- occuCOP(data = unmarkedFrameCOP( + y = COPsimul( + psi = psi_test, + lambda = lambda_test, + M = 1000, + J = 10 + ), + obsLength = matrix(1, nrow = 1000, ncol = 10) + )) + psi_estimate = backTransform(fit_accur, type = "psi")@estimate + lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate + expect_equivalent( + psi_estimate, + psi_test, + tol = 0.05 + ) + expect_equivalent( + lambda_estimate, + lambda_test, + tol = 0.05 + ) + + ## psi = 0.25, lambda = 5 ---- + psi_test = 0.25 + lambda_test = 5 + fit_accur <- occuCOP(data = unmarkedFrameCOP( + y = COPsimul( + psi = psi_test, + lambda = lambda_test, + M = 1000, + J = 10 + ), + obsLength = matrix(1, nrow = 1000, ncol = 10) + )) + psi_estimate = backTransform(fit_accur, type = "psi")@estimate + lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate + expect_equivalent( + psi_estimate, + psi_test, + tol = 0.05 + ) + expect_equivalent( + lambda_estimate, + lambda_test, + tol = 0.05 + ) + + ## psi = 0.75, lambda = 0.5 ---- + psi_test = 0.75 + lambda_test = 0.5 + fit_accur <- occuCOP(data = unmarkedFrameCOP( + y = COPsimul( + psi = psi_test, + lambda = lambda_test, + M = 1000, + J = 10 + ), + obsLength = matrix(1, nrow = 1000, ncol = 10) + )) + psi_estimate = backTransform(fit_accur, type = "psi")@estimate + lambda_estimate = backTransform(fit_accur, type = "lambda")@estimate + expect_equivalent( + psi_estimate, + psi_test, + tol = 0.05 + ) + expect_equivalent( + lambda_estimate, + lambda_test, + tol = 0.05 + ) + + + #TODO +}) + + +test_that("occuCOP can fit models with covariates", { + +}) + + diff --git a/tmp_use_COP.Rmd b/tmp_use_COP.Rmd new file mode 100644 index 0000000..57e9e55 --- /dev/null +++ b/tmp_use_COP.Rmd @@ -0,0 +1,584 @@ +--- +title: "occuCOP example" +author: "Léa Pautrel" +date: "`r format(Sys.time(), '%d %B %Y')`" +output: + html_document: + toc: yes + toc_depth: 3 + number_sections: true + toc_float: + collapsed: true + code_folding: show + theme: cerulean +editor_options: + chunk_output_type: console +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + echo = TRUE, # show code of the chunk and chunk output + error = TRUE, + collapse = FALSE, + comment = "", # no comment character for the chunk text outputs + out.width = "100%" # responsive width for chunk outputs (figures, ...) +) +``` + +```{r library, echo=T, results="hide"} +source("./R/occuCOP.R") +library(ggplot2) +library(ggrepel) +library(dplyr) +library(tibble) +``` + + + + + +# The COP model + +Hereafter, I use the notations defined in the following table. + +| Notation | Parameter | +| -------------: | :----------------------------------------------------------- | +| $I$ | Number of sites | +| $J$ | Number of sampling occasions | +| $\psi_i$ | Occupancy probability in site $i$ | +| $Z_i$ | Occupancy state of site $i$ (present = 1, absent = 0) | +| $\lambda_{ij}$ | Detection rate in site $i$ during sampling occasion $j$ | +| $T_{ij}$ | Duration or length of sampling occasion $j$ | +| $N_{i}$ | Number of detections in site $i$ | +| $N_{ij}$ | Number of detections in site $i$ during sampling occasion $j$ | + + +The model is defined as: + +$$ +Z_i \sim \text{Bernoulli}(\psi_{i}) \\ +N_{ij} \sim \text{Poisson}(\lambda_{ij} T_{ij}) +$$ + +The likelihood of this model is: + +\begin{align} +\begin{split} + L(\psi_i, \lambda_{is}) + &= \prod_{i=1}^{I} \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = n_{i}) \\ + &= + \prod_{i, n_i > 0} \left ( \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = n_i, n_i > 0) \right ) + \times + \prod_{i, n_i = 0} \left ( \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = 0) \right ) \\ + &= + \prod_{i, n_i > 0} \left ( + \psi_i \frac{ \left( \sum_{s=1}^S(\lambda_{is} T_{is}) \right) ^ {n_i} }{ n_{i}! } e^{-\sum_{s=1}^S(\lambda_{is} T_{is})} + \right ) + \times + \prod_{i, n_i = 0} \left ( + \psi_i e^{-\sum_{s=1}^S(\lambda_{is} T_{is})} + (1-\psi_i) + \right ) +\end{split} +\end{align} + +# Simulation + + +```{r init} +set.seed(0) + +M = 100 # Number of sites +J = 5 # Number of observations (transects, sessions, sampling occasions...) +``` + +We first simulate the data set. We'll simulate data in M=`r M` sites during J=`r J` sampling occasions. + +We simulate site covariates: + +- "elev" will not be used to impact the occupancy probability +- "habitat" will be used + +```{r simul-psi-cov} +SiteCov <- data.frame( + "elev" = pmax(rnorm(n = M, mean = 50, sd = 50), 0), + "habitat" = factor(sample( + x = c("A", "B", "C"), + size = M, + replace = T + )) +) +print(as_tibble(SiteCov)) +``` + + +We simulate the occupancy state of all sites depending on the habitat type. + +```{r simul-psi-z} +# Occupancy probability depending on habitat type +simul_psi_habA = 0.9 +simul_psi_habB = 0.5 +simul_psi_habC = 0.1 + +# For each site, we simulate occupancy state +z_i = rep(NA, M) +for (i in 1:M) { + if (SiteCov$habitat[i] == 'A') { + z_i[i] <- + sample(c(0, 1), + size = 1, + prob = c(1 - simul_psi_habA, simul_psi_habA)) + next + } else if (SiteCov$habitat[i] == 'B') { + z_i[i] <- + sample(c(0, 1), + size = 1, + prob = c(1 - simul_psi_habB, simul_psi_habB)) + next + } else { + z_i[i] <- + sample(c(0, 1), + size = 1, + prob = c(1 - simul_psi_habC, simul_psi_habC)) + } +} + +# We have this data: +print(as_tibble(data.frame("habitat" = SiteCov$habitat, "z" = z_i))) + +# In our data, our occupancy probability per habitat is slightly different +# from the one we chose to simulate due to randomness: +print( + data.frame("habitat" = SiteCov$habitat, "z" = z_i) %>% + group_by(habitat) %>% + summarise("NbSites" = n(), "psi" = mean(z)) +) +``` + +There are `r sum(z_i)` occupied sites out of `r M` in our simulation. + +We simulate temporal covariates: + +- "wind" will not impact the detection rate +- "rain" will impact the detection rate + +```{r simul-lambda-cov} +# Detection rate: 1 detection per day on average, +simul_lambda = 1 +simul_lambda_rain = .3 + +# Temporal covariates +TemporalCov <- list( + "rain" = matrix(pmax(rexp(n = M * J, rate = 1 / 10), 0), nrow = M, ncol = J), + "wind" = matrix(rnorm(n = M * J, mean = 10), nrow = M, ncol = J) +) +print(as_tibble(TemporalCov)) +``` + +We then simulate the detection rate depending on "rain" as a linear + +```{r simul-lambda} +lambda_from_rain = function(rain) { + # pmin(pmax((3 - .2 * rain), 0), 2) + 2 / (1 + exp(.2 * (rain - 20))) +} +rain_lambda = data.frame("rain" = seq(0, max(TemporalCov$rain), by = c(.1))) +rain_lambda$lambda = lambda_from_rain(rain_lambda$rain) +plot(x = rain_lambda$rain, + y = rain_lambda$lambda, + type = "l") + +simul_lambda = lambda_from_rain(TemporalCov$rain) +``` + +Finally, for each site, we simulate a number of detections based on the occupancy state $z_i$ of site $i$ and detection rate $lambda_{ij}$ of site $i$ observation $j$. + +```{r simul-y} +y = matrix( + rpois(n = M * J, lambda = as.numeric(t(simul_lambda))), + nrow = M, + ncol = J, + byrow = T +) * z_i + +data.frame( + "simul_lambda" = as.numeric(t(simul_lambda)), + "y" = as.numeric(t(y)), + "z" = rep(z_i, each = J) +) %>% + ggplot(aes(x = simul_lambda, y = y)) + + geom_point(alpha=.3,shape=16,size=2) + + facet_grid(z ~ .,labeller = label_both) + + theme_light() +``` + +Let's say that each observation lasts one time-unit here, *e.g.* one day per sampling occasion. + +```{r simul-obsLength} +obsLength = y * 0 + 1 +class(obsLength) +print(as_tibble(obsLength)) +``` + + +# unmarkedFrameCOP object + +## Creation + +We then create our unmarkedFrameCOP object. + +```{r umf-creation} +umf = unmarkedFrameCOP( + y = y, + obsLength = obsLength, + siteCovs = SiteCov, + obsCovs = TemporalCov +) +``` + +## Visualisation + +```{r umf-visu} +head(umf) +summary(umf) +print(umf[2,4]) # 2nd site, 4th observation +plot(umf) +``` + +## Warning and errors + +There is an error if there are decimals in y. + +```{r umf-error-decimal} +y_with_decimals = y +y_with_decimals[2, 1] = 49.5 +unmarkedFrameCOP( + y = y_with_decimals, + obsLength = obsLength, + siteCovs = SiteCov, + obsCovs = TemporalCov +) +``` + +There is a warning if data is detection/non-detection (1/0) instead of count. + +```{r umf-warning-01} +y_detec_nodetec = (y > 0) * 1 +unmarkedFrameCOP( + y = y_detec_nodetec, + obsLength = obsLength, + siteCovs = SiteCov, + obsCovs = TemporalCov +) +``` + +There is an error if the dimension of obsLength is different than that of y. + +```{r umf-error-obsLength} +unmarkedFrameCOP( + y = y, + obsLength = obsLength[1:5, 1:2], + siteCovs = SiteCov, + obsCovs = TemporalCov +) +``` + +# Model fitting + +We fit the model. For more information, see [section How is the model fitted?](#how-is-the-model-fitted) + +## Null model + +```{r null-fit} +resCOP_null <- occuCOP( + data = umf, + psiformula = ~ 1, + lambdaformula = ~ 1, + method = "Nelder-Mead" +) +print(resCOP_null) +``` + +We can backtransform the parameters: + +```{r null-backtransform} +# Occcupancy estimate +plogis(resCOP_null@estimates@estimates$psi@estimates) # plogis(x): "inverse logit" +backTransform(resCOP_null, type = "psi") + +# Detection rate estimate +exp(resCOP_null@estimates@estimates$lambda@estimates) +backTransform(resCOP_null, type = "lambda") +``` + +## psi ~ elev; lambda ~ rain + +```{r cov-fit} +resCOP_habitat_rain <- occuCOP( + data = umf, + psiformula = ~ -1 + habitat, + lambdaformula = ~ rain, + method = "Nelder-Mead" +) +print(resCOP_habitat_rain) +``` + +A warning tell us that the model did not converge. We can add in parameters for optim in the `occuCOP` function, for example to increase the maximum number of iterations, with `control = list(maxit = 5000)`: + +```{r cov-fit-maxit} +resCOP_habitat_rain <- occuCOP( + data = umf, + psiformula = ~ -1 + habitat, + lambdaformula = ~ rain, + method = "Nelder-Mead", + control = list(maxit = 5000) +) +print(resCOP_habitat_rain) +``` + +We can backtransform the parameters. With covariates, we can no longer use the function `backTransform`: + +```{r cov-backtransform} +backTransform(resCOP_habitat_rain, type = "psi") +``` + +However, we can use the function `predict`: + +```{r cov-predict} +# Occcupancy estimate +plogis(resCOP_habitat_rain@estimates@estimates$psi@estimates) +psipred = predict(object = resCOP_habitat_rain, type = "psi") +print(as_tibble(cbind( + data.frame("habitat" = resCOP_habitat_rain@data@siteCovs$habitat), + psipred +))) + +# Detection rate estimate +exp(resCOP_habitat_rain@estimates@estimates$lambda@estimates) +lambdapred = predict(object = resCOP_habitat_rain, type = "lambda") +print(as_tibble(cbind( + data.frame("rain" = resCOP_habitat_rain@data@obsCovs$rain), + lambdapred +))) +``` + +## Ranking + +We can compare several models by their AIC by using `fitList` and `modSel`. + +```{r ranking} +fl <- fitList(fits = list("Null" = resCOP_null, + "psi~elev, lambda~rain" = resCOP_habitat_rain)) +modSel(fl) +``` + +## How is the model fitted? + +The model is fitted by maximum likelihood estimation (MLE). + +```{r how-is-the-model-fitted, class.source = 'fold-hide'} +# M = 100 +# J = 10 + +cpt = 1 +for (simul_psi in c(.1, .25, .5, .75, .9)) { + for (simul_lambda in c(1, 3, 5)) { + # simul_z = c(rep(1, simul_psi * 100), rep(0, times=((1 - simul_psi) * 100))) + simul_z = sample( + x = c(0, 1), + size = M, + replace = T, + prob = c(1 - simul_psi, simul_psi) + ) + simul_y <- matrix(rpois(n = M * J, lambda = simul_lambda), + nrow = M, + ncol = J) * simul_z + + fit = occuCOP(data = unmarkedFrameCOP(y = simul_y, + obsLength = (simul_y * 0 + 1))) + estim_psi = backTransform(fit, "psi") + estim_lambda = backTransform(fit, "lambda") + + nll_df_plot_i = occuCOP( + data = unmarkedFrameCOP(y = simul_y, obsLength = (simul_y * 0 + 1)), + get.NLL.params = + as.list(as.data.frame(t( + expand.grid("psi" = qlogis(seq(0.01, 0.99, by = 0.02)), + "lambda" = log(round( + simul_lambda * seq(from = 0.25, to = 1.75, by = .25), 2 + ))) + ))) + ) + nll_df_plot_i$simul_psi = simul_psi + nll_df_plot_i$simul_lambda = simul_lambda + nll_df_plot_i$estim_psi = estim_psi@estimate + nll_df_plot_i$estim_lambda = estim_lambda@estimate + + if (cpt==1) { + nll_df_plot = nll_df_plot_i + } else{ + nll_df_plot = rbind(nll_df_plot, nll_df_plot_i) + } + # cat('\r',cpt,"/ 15") + cpt = cpt + 1 + rm(nll_df_plot_i) + } +} + +nll_df_plot$psi = plogis(nll_df_plot[, "logit(psi).(Intercept)"]) +nll_df_plot$lambda = exp(nll_df_plot[, "log(lambda).(Intercept)"]) +nll_df_plot$simulated_lambda = (round(nll_df_plot$lambda, 2) == round(nll_df_plot$simul_lambda, 2)) +nll_df_plot$lambda_txt = format(nll_df_plot$lambda, digits = 2, nsmall = 2) +nll_df_plot$label = ifelse(nll_df_plot$psi == max(nll_df_plot$psi), + paste0("λ=", nll_df_plot$lambda_txt), + NA) + +df_estimates = nll_df_plot %>% + group_by(simul_psi, simul_lambda) %>% + summarise( + estim_psi = unique(estim_psi), + estim_lambda = unique(estim_lambda), + maximum_llh = max(-nll), + minimum_llh = min(-nll), + .groups = "drop" + ) + +ggplot() + + geom_line( + data = nll_df_plot, + aes( + x = psi, + y = -nll, + colour = lambda_txt, + linewidth = simulated_lambda, + linetype = simulated_lambda + ) + ) + + ggh4x::facet_grid2( + simul_psi ~ simul_lambda, + labeller = label_both, + scales = "free_y", + independent = "y" + ) + + labs( + title = "Negative log-likelihood of the COP model for different values of ψ and λ", + x = "ψ", + y = "Log-likelihood", + colour = "lambda" + ) + + scale_linewidth_manual(values = c("TRUE" = 1, "FALSE" = .3)) + + scale_linetype_manual(values = c("TRUE" = "solid", "FALSE" = "dashed")) + + theme_light()+ + geom_vline(data = df_estimates, + aes(xintercept = estim_psi), + linetype = "dotted") + + geom_label(data = df_estimates, aes( + x = estim_psi, + y = minimum_llh + abs(minimum_llh * .1), + label = paste0("λ: ", round(estim_lambda, 2)) + )) + + geom_hline(data = df_estimates, + aes(yintercept = maximum_llh), + linetype = "dotted")+ + geom_label(data = df_estimates, aes( + x = -0.05, + y = maximum_llh-25, + label = paste0("ψ: ", round(estim_psi, 2)) + )) + + xlim(-.1, 1.1) + + theme(legend.position = "none") + +``` + +# Interpret the results + + + +# Other models examples + +## occu + +```{r occu} +resOccuNull = occu( + formula = ~ 1 ~ 1, + data = unmarkedFrameOccu( + y = y, + siteCovs = SiteCov, + obsCovs = TemporalCov + ) +) +resOccuNull +backTransform(resOccuNull,"state") +backTransform(resOccuNull,"det") + +resOccu = occu( + formula = ~ rain ~ elev, + data = unmarkedFrameOccu( + y = y, + siteCovs = SiteCov, + obsCovs = TemporalCov + ) +) +resOccu +cbind(SiteCov$elev, predict(resOccu, "state")) +cbind(as.numeric(t(TemporalCov$rain)), predict(resOccu, "det")) + +``` + +## pcount + +```{r pcount} +respcountNull = pcount( + formula = ~ 1 ~ 1, + data = unmarkedFramePCount( + y = y, + siteCovs = SiteCov, + obsCovs = TemporalCov + ) +) +respcountNull +backTransform(respcountNull,"state") +backTransform(respcountNull,"det") + +respcount = pcount( + formula = ~ rain ~ elev, + data = unmarkedFramePCount( + y = y, + siteCovs = SiteCov, + obsCovs = TemporalCov + ) +) +respcount +cbind(SiteCov$elev, predict(respcount, "state")) +cbind(as.numeric(t(TemporalCov$rain)), predict(respcount, "det")) +``` + +# -- + +```{r dev} +# psiformula <- ~ 1 +# lambdaformula <- ~ 1 +# psistarts = rep(0, length(attr(terms(psiformula), "term.labels")) + 1) +# lambdastarts = rep(0, length(attr(terms(lambdaformula), "term.labels")) + 1) + +data <- umf +psiformula <- ~ habitat +lambdaformula <- ~ rain + + +method = "BFGS" +se = TRUE +engine = "R" +threads = 1L +na.rm = TRUE + +maxit = 1000 + +object = occuCOP( + data = data, + psiformula = psiformula, + lambdaformula = lambdaformula +) + +``` + +
\ No newline at end of file |