1 files changed, 584 insertions, 0 deletions
diff --git a/tmp_use_COP.Rmd b/tmp_use_COP.Rmd
new file mode 100644
index 0000000..57e9e55
--- /dev/null
+++ b/tmp_use_COP.Rmd
@@ -0,0 +1,584 @@
+---
+title: "occuCOP example"
+author: "Léa Pautrel"
+date: "`r format(Sys.time(), '%d %B %Y')`"
+output:
+  html_document:
+    toc: yes
+    toc_depth: 3
+    number_sections: true
+    toc_float: 
+      collapsed: true
+    code_folding: show
+    theme: cerulean
+editor_options: 
+  chunk_output_type: console
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+  echo = TRUE, # show code of the chunk and chunk output
+  error = TRUE,
+  collapse = FALSE,
+  comment = "", # no comment character for the chunk text outputs
+  out.width = "100%" # responsive width for chunk outputs (figures, ...)
+)
+```
+
+```{r library, echo=T, results="hide"}
+source("./R/occuCOP.R")
+library(ggplot2)
+library(ggrepel)
+library(dplyr)
+library(tibble)
+```
+
+
+
+
+
+# The COP model
+
+Hereafter, I use the notations defined in the following table.
+
+|       Notation | Parameter                                                    |
+| -------------: | :----------------------------------------------------------- |
+|            $I$ | Number of sites                                              |
+|            $J$ | Number of sampling occasions                                 |
+|       $\psi_i$ | Occupancy probability in site $i$                            |
+|          $Z_i$ | Occupancy state of site $i$ (present = 1, absent = 0)        |
+| $\lambda_{ij}$ | Detection rate in site $i$ during sampling occasion $j$      |
+|       $T_{ij}$ | Duration or length of sampling occasion $j$                  |
+|       $N_{i}$  | Number of detections in site $i$                             |
+|       $N_{ij}$ | Number of detections in site $i$ during sampling occasion $j$ |
+
+
+The model is defined as:
+
+$$
+Z_i \sim \text{Bernoulli}(\psi_{i}) \\
+N_{ij} \sim \text{Poisson}(\lambda_{ij} T_{ij})
+$$
+
+The likelihood of this model is:
+
+\begin{align}
+\begin{split}
+  L(\psi_i, \lambda_{is})
+    &= \prod_{i=1}^{I} \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = n_{i}) \\
+    &=  
+        \prod_{i, n_i > 0} \left ( \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = n_i, n_i > 0) \right )
+        \times 
+        \prod_{i, n_i = 0} \left ( \mathbb{P}_{\psi_i, \lambda_{is}}(N_{i} = 0) \right ) \\
+    &= 
+        \prod_{i, n_i > 0} \left ( 
+            \psi_i \frac{ \left( \sum_{s=1}^S(\lambda_{is} T_{is}) \right) ^ {n_i} }{ n_{i}! } e^{-\sum_{s=1}^S(\lambda_{is} T_{is})}
+        \right )
+        \times 
+        \prod_{i, n_i = 0} \left (
+            \psi_i e^{-\sum_{s=1}^S(\lambda_{is} T_{is})}  + (1-\psi_i)
+        \right )
+\end{split}
+\end{align}
+
+# Simulation
+
+
+```{r init}
+set.seed(0)
+
+M = 100 # Number of sites
+J = 5 # Number of observations (transects, sessions, sampling occasions...)
+```
+
+We first simulate the data set. We'll simulate data in M=`r M` sites during J=`r J` sampling occasions.
+
+We simulate site covariates:
+
+- "elev" will not be used to impact the occupancy probability
+- "habitat" will be used
+
+```{r simul-psi-cov}
+SiteCov <- data.frame(
+  "elev" = pmax(rnorm(n = M, mean = 50, sd = 50), 0),
+  "habitat" = factor(sample(
+    x = c("A", "B", "C"),
+    size = M,
+    replace = T
+  ))
+)
+print(as_tibble(SiteCov))
+```
+
+
+We simulate the occupancy state of all sites depending on the habitat type.
+
+```{r simul-psi-z}
+# Occupancy probability depending on habitat type
+simul_psi_habA = 0.9
+simul_psi_habB = 0.5
+simul_psi_habC = 0.1
+
+# For each site, we simulate occupancy state
+z_i = rep(NA, M)
+for (i in 1:M) {
+  if (SiteCov$habitat[i] == 'A') {
+    z_i[i] <-
+      sample(c(0, 1),
+             size = 1,
+             prob = c(1 - simul_psi_habA, simul_psi_habA))
+    next
+  } else if (SiteCov$habitat[i] == 'B') {
+    z_i[i] <-
+      sample(c(0, 1),
+             size = 1,
+             prob = c(1 - simul_psi_habB, simul_psi_habB))
+    next
+  } else {
+    z_i[i] <-
+      sample(c(0, 1),
+             size = 1,
+             prob = c(1 - simul_psi_habC, simul_psi_habC))
+  }
+}
+
+# We have this data:
+print(as_tibble(data.frame("habitat" = SiteCov$habitat, "z" = z_i)))
+
+# In our data, our occupancy probability per habitat is slightly different 
+# from the one we chose to simulate due to randomness:
+print(
+  data.frame("habitat" = SiteCov$habitat, "z" = z_i) %>% 
+    group_by(habitat) %>% 
+    summarise("NbSites" = n(), "psi" = mean(z))
+)
+```
+
+There are `r sum(z_i)` occupied sites out of `r M` in our simulation.
+
+We simulate temporal covariates:
+
+- "wind" will not impact the detection rate
+- "rain" will impact the detection rate
+
+```{r simul-lambda-cov}
+# Detection rate: 1 detection per day on average,
+simul_lambda = 1
+simul_lambda_rain = .3
+
+# Temporal covariates
+TemporalCov <- list(
+  "rain" = matrix(pmax(rexp(n = M * J, rate = 1 / 10), 0), nrow = M, ncol = J),
+  "wind" = matrix(rnorm(n = M * J, mean = 10), nrow = M, ncol = J)
+)
+print(as_tibble(TemporalCov))
+```
+
+We then simulate the detection rate depending on "rain" as a linear 
+
+```{r simul-lambda}
+lambda_from_rain = function(rain) {
+  # pmin(pmax((3 - .2 * rain), 0), 2)
+  2 / (1 + exp(.2 * (rain - 20)))
+}
+rain_lambda = data.frame("rain" = seq(0, max(TemporalCov$rain), by = c(.1)))
+rain_lambda$lambda = lambda_from_rain(rain_lambda$rain)
+plot(x = rain_lambda$rain,
+     y = rain_lambda$lambda,
+     type = "l")
+
+simul_lambda = lambda_from_rain(TemporalCov$rain)
+```
+
+Finally, for each site, we simulate a number of detections based on the occupancy state $z_i$ of site $i$ and detection rate $lambda_{ij}$ of site $i$ observation $j$.
+
+```{r simul-y}
+y = matrix(
+  rpois(n = M * J, lambda = as.numeric(t(simul_lambda))),
+  nrow = M,
+  ncol = J,
+  byrow = T
+) * z_i
+
+data.frame(
+  "simul_lambda" = as.numeric(t(simul_lambda)),
+  "y" = as.numeric(t(y)),
+  "z" = rep(z_i, each = J)
+) %>%
+  ggplot(aes(x = simul_lambda, y = y)) +
+  geom_point(alpha=.3,shape=16,size=2) +
+  facet_grid(z ~ .,labeller = label_both) +
+  theme_light()
+```
+
+Let's say that each observation lasts one time-unit here, *e.g.* one day per sampling occasion.
+
+```{r simul-obsLength}
+obsLength = y * 0 + 1
+class(obsLength)
+print(as_tibble(obsLength))
+```
+
+
+# unmarkedFrameCOP object
+
+## Creation
+
+We then create our unmarkedFrameCOP object.
+
+```{r umf-creation}
+umf = unmarkedFrameCOP(
+  y = y,
+  obsLength = obsLength,
+  siteCovs = SiteCov,
+  obsCovs = TemporalCov
+)
+```
+
+## Visualisation
+
+```{r umf-visu}
+head(umf)
+summary(umf)
+print(umf[2,4]) # 2nd site, 4th observation
+plot(umf)
+```
+
+## Warning and errors
+
+There is an error if there are decimals in y.
+
+```{r umf-error-decimal}
+y_with_decimals = y
+y_with_decimals[2, 1] = 49.5
+unmarkedFrameCOP(
+  y = y_with_decimals,
+  obsLength = obsLength,
+  siteCovs = SiteCov,
+  obsCovs = TemporalCov
+)
+```
+
+There is a warning if data is detection/non-detection (1/0) instead of count.
+
+```{r umf-warning-01}
+y_detec_nodetec = (y > 0) * 1
+unmarkedFrameCOP(
+  y = y_detec_nodetec,
+  obsLength = obsLength,
+  siteCovs = SiteCov,
+  obsCovs = TemporalCov
+)
+```
+
+There is an error if the dimension of obsLength is different than that of y.
+
+```{r umf-error-obsLength}
+unmarkedFrameCOP(
+  y = y,
+  obsLength = obsLength[1:5, 1:2],
+  siteCovs = SiteCov,
+  obsCovs = TemporalCov
+)
+```
+
+# Model fitting
+
+We fit the model. For more information, see [section How is the model fitted?](#how-is-the-model-fitted)
+
+## Null model
+
+```{r null-fit}
+resCOP_null <- occuCOP(
+  data = umf,
+  psiformula = ~ 1,
+  lambdaformula = ~ 1,
+  method = "Nelder-Mead"
+)
+print(resCOP_null)
+```
+
+We can backtransform the parameters:
+
+```{r null-backtransform}
+# Occcupancy estimate
+plogis(resCOP_null@estimates@estimates$psi@estimates) # plogis(x): "inverse logit"
+backTransform(resCOP_null, type = "psi")
+
+# Detection rate estimate
+exp(resCOP_null@estimates@estimates$lambda@estimates)
+backTransform(resCOP_null, type = "lambda")
+```
+
+## psi ~ elev; lambda ~ rain
+
+```{r cov-fit}
+resCOP_habitat_rain <- occuCOP(
+  data = umf,
+  psiformula = ~ -1 + habitat,
+  lambdaformula = ~ rain,
+  method = "Nelder-Mead"
+)
+print(resCOP_habitat_rain)
+```
+
+A warning tell us that the model did not converge. We can add in parameters for optim in the `occuCOP` function, for example to increase the maximum number of iterations, with `control = list(maxit = 5000)`:
+
+```{r cov-fit-maxit}
+resCOP_habitat_rain <- occuCOP(
+  data = umf,
+  psiformula = ~ -1 + habitat,
+  lambdaformula = ~ rain,
+  method = "Nelder-Mead",
+  control = list(maxit = 5000)
+)
+print(resCOP_habitat_rain)
+```
+
+We can backtransform the parameters. With covariates, we can no longer use the function `backTransform`:
+
+```{r cov-backtransform}
+backTransform(resCOP_habitat_rain, type = "psi")
+```
+
+However, we can use the function `predict`:
+
+```{r cov-predict}
+# Occcupancy estimate
+plogis(resCOP_habitat_rain@estimates@estimates$psi@estimates)
+psipred = predict(object = resCOP_habitat_rain, type = "psi")
+print(as_tibble(cbind(
+  data.frame("habitat" = resCOP_habitat_rain@data@siteCovs$habitat),
+  psipred
+)))
+
+# Detection rate estimate
+exp(resCOP_habitat_rain@estimates@estimates$lambda@estimates)
+lambdapred = predict(object = resCOP_habitat_rain, type = "lambda")
+print(as_tibble(cbind(
+  data.frame("rain" = resCOP_habitat_rain@data@obsCovs$rain),
+  lambdapred
+)))
+```
+
+## Ranking
+
+We can compare several models by their AIC by using `fitList` and `modSel`.
+
+```{r ranking}
+fl <- fitList(fits = list("Null" = resCOP_null,
+                          "psi~elev, lambda~rain" = resCOP_habitat_rain))
+modSel(fl)
+```
+
+## How is the model fitted?
+
+The model is fitted by maximum likelihood estimation (MLE). 
+
+```{r how-is-the-model-fitted, class.source = 'fold-hide'}
+# M = 100
+# J = 10
+
+cpt = 1
+for (simul_psi in c(.1, .25, .5, .75, .9)) {
+  for (simul_lambda in c(1, 3, 5)) {
+    # simul_z = c(rep(1, simul_psi * 100), rep(0, times=((1 - simul_psi) * 100)))
+    simul_z = sample(
+      x = c(0, 1),
+      size = M,
+      replace = T,
+      prob = c(1 - simul_psi, simul_psi)
+    )
+    simul_y <- matrix(rpois(n = M * J, lambda = simul_lambda),
+                      nrow = M,
+                      ncol = J) * simul_z
+    
+    fit = occuCOP(data = unmarkedFrameCOP(y = simul_y,
+                                          obsLength = (simul_y * 0 + 1)))
+    estim_psi = backTransform(fit, "psi")
+    estim_lambda = backTransform(fit, "lambda")
+    
+    nll_df_plot_i = occuCOP(
+      data = unmarkedFrameCOP(y = simul_y, obsLength = (simul_y * 0 + 1)),
+      get.NLL.params =
+        as.list(as.data.frame(t(
+          expand.grid("psi" = qlogis(seq(0.01, 0.99, by = 0.02)),
+                      "lambda" = log(round(
+                        simul_lambda * seq(from = 0.25, to = 1.75, by = .25), 2
+                      )))
+        )))
+    )
+    nll_df_plot_i$simul_psi = simul_psi
+    nll_df_plot_i$simul_lambda = simul_lambda
+    nll_df_plot_i$estim_psi = estim_psi@estimate
+    nll_df_plot_i$estim_lambda = estim_lambda@estimate
+    
+    if (cpt==1) {
+      nll_df_plot = nll_df_plot_i
+    } else{
+      nll_df_plot = rbind(nll_df_plot, nll_df_plot_i)
+    }
+    # cat('\r',cpt,"/ 15")
+    cpt = cpt + 1
+    rm(nll_df_plot_i)
+  }
+}
+
+nll_df_plot$psi = plogis(nll_df_plot[, "logit(psi).(Intercept)"])
+nll_df_plot$lambda = exp(nll_df_plot[, "log(lambda).(Intercept)"])
+nll_df_plot$simulated_lambda = (round(nll_df_plot$lambda, 2) == round(nll_df_plot$simul_lambda, 2))
+nll_df_plot$lambda_txt = format(nll_df_plot$lambda, digits = 2, nsmall = 2)
+nll_df_plot$label = ifelse(nll_df_plot$psi == max(nll_df_plot$psi),
+                           paste0("λ=", nll_df_plot$lambda_txt),
+                           NA)
+
+df_estimates = nll_df_plot %>%
+  group_by(simul_psi, simul_lambda) %>%
+  summarise(
+    estim_psi = unique(estim_psi),
+    estim_lambda = unique(estim_lambda),
+    maximum_llh = max(-nll),
+    minimum_llh = min(-nll),
+    .groups = "drop"
+  )
+
+ggplot() +
+  geom_line(
+    data = nll_df_plot,
+    aes(
+      x = psi,
+      y = -nll,
+      colour = lambda_txt,
+      linewidth = simulated_lambda,
+      linetype = simulated_lambda
+    )
+  ) +
+  ggh4x::facet_grid2(
+    simul_psi ~ simul_lambda,
+    labeller = label_both,
+    scales = "free_y",
+    independent = "y"
+  ) +
+  labs(
+    title = "Negative log-likelihood of the COP model for different values of ψ and λ",
+    x = "ψ",
+    y = "Log-likelihood",
+    colour = "lambda"
+  ) +
+  scale_linewidth_manual(values = c("TRUE" = 1, "FALSE" = .3)) +
+  scale_linetype_manual(values = c("TRUE" = "solid", "FALSE" = "dashed")) +
+  theme_light()+
+  geom_vline(data = df_estimates,
+             aes(xintercept = estim_psi),
+             linetype = "dotted") +
+  geom_label(data = df_estimates, aes(
+    x = estim_psi,
+    y = minimum_llh + abs(minimum_llh * .1),
+    label = paste0("λ: ", round(estim_lambda, 2))
+  )) +
+  geom_hline(data = df_estimates,
+             aes(yintercept = maximum_llh),
+             linetype = "dotted")+
+    geom_label(data = df_estimates, aes(
+    x = -0.05,
+    y = maximum_llh-25,
+    label = paste0("ψ: ", round(estim_psi, 2))
+  )) +
+  xlim(-.1, 1.1) +
+  theme(legend.position = "none")
+
+```
+
+# Interpret the results
+
+
+
+# Other models examples
+
+## occu
+
+```{r occu}
+resOccuNull = occu(
+  formula = ~ 1 ~ 1,
+  data = unmarkedFrameOccu(
+    y = y,
+    siteCovs = SiteCov,
+    obsCovs = TemporalCov
+  )
+)
+resOccuNull
+backTransform(resOccuNull,"state")
+backTransform(resOccuNull,"det")
+
+resOccu = occu(
+  formula = ~ rain ~ elev,
+  data = unmarkedFrameOccu(
+    y = y,
+    siteCovs = SiteCov,
+    obsCovs = TemporalCov
+  )
+)
+resOccu
+cbind(SiteCov$elev, predict(resOccu, "state"))
+cbind(as.numeric(t(TemporalCov$rain)), predict(resOccu, "det"))
+
+```
+
+## pcount
+
+```{r pcount}
+respcountNull = pcount(
+  formula = ~ 1 ~ 1,
+  data = unmarkedFramePCount(
+    y = y,
+    siteCovs = SiteCov,
+    obsCovs = TemporalCov
+  )
+)
+respcountNull
+backTransform(respcountNull,"state")
+backTransform(respcountNull,"det")
+
+respcount = pcount(
+  formula = ~ rain ~ elev,
+  data = unmarkedFramePCount(
+    y = y,
+    siteCovs = SiteCov,
+    obsCovs = TemporalCov
+  )
+)
+respcount
+cbind(SiteCov$elev, predict(respcount, "state"))
+cbind(as.numeric(t(TemporalCov$rain)), predict(respcount, "det"))
+```
+
+# --
+ 
+```{r dev}
+# psiformula <-  ~ 1
+# lambdaformula <- ~ 1
+# psistarts = rep(0, length(attr(terms(psiformula), "term.labels")) + 1)
+# lambdastarts = rep(0, length(attr(terms(lambdaformula), "term.labels")) + 1)
+
+data <- umf
+psiformula <- ~ habitat
+lambdaformula <- ~ rain
+
+
+method = "BFGS"
+se = TRUE
+engine = "R"
+threads = 1L
+na.rm = TRUE
+
+maxit = 1000
+
+object = occuCOP(
+  data = data,
+  psiformula = psiformula,
+  lambdaformula = lambdaformula
+)
+
+```
+ 
+ 
+\ No newline at end of file