Fix factor bug and re-generate vignettes

author: Ken Kellner <ken@kenkellner.com> 2022-11-10 10:34:06 -0500
committer: Ken Kellner <ken@kenkellner.com> 2022-11-10 10:34:06 -0500
commit: d9ad3d4f748811b1a2a27aacce89a97bac1dd736 (patch)
tree: 30e81ce41ec34e22218c58a9d74951bff0fd8eee
parent: acb94f6798b5d168d5cf21df9e365fec1d838d0c (diff)
14 files changed, 86 insertions, 68 deletions
diff --git a/DESCRIPTION b/DESCRIPTION
index a27b17e..61f103c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ubms
-Version: 1.2.1
-Date: 2022-11-09
+Version: 1.2.2
+Date: 2022-11-10
 Title: Bayesian Models for Data from Unmarked Animals using 'Stan'
 Authors@R: person("Ken", "Kellner", email="contact@kenkellner.com", 
                   role=c("cre","aut"))
diff --git a/Makefile b/Makefile
index 0354114..552aa53 100644
--- a/Makefile
+++ b/Makefile
@@ -26,7 +26,7 @@ README:
 	sleep 3
 	rm README.html
 
-vignettes: vignettes/random-effects.Rmd vignettes/JAGS-comparison.Rmd vignettes/spatial-models.Rmd
+vignettes: vignettes/random-effects.Rmd vignettes/JAGS-comparison.Rmd vignettes/spatial-models.Rmd vignettes/grouse-example.Rmd
 	touch test.png; mv *.png vignettes; rm vignettes/test.png
 	Rscript -e "devtools::build_vignettes()"
 
@@ -54,3 +54,6 @@ vignettes/JAGS-comparison.Rmd: vignettes/JAGS-comparison.Rmd.orig
 
 vignettes/spatial-models.Rmd: vignettes/spatial-models.Rmd.orig
 	Rscript -e "knitr::knit('vignettes/spatial-models.Rmd.orig', output='vignettes/spatial-models.Rmd')"
+
+vignettes/grouse-example.Rmd: vignettes/grouse-example.Rmd.orig
+	Rscript -e "knitr::knit('vignettes/grouse-example.Rmd.orig', output='vignettes/grouse-example.Rmd')"
diff --git a/NEWS.md b/NEWS.md
index f5d2cef..d3a8461 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,12 +1,10 @@
-# ubms v1.2.1
+# ubms v1.2.2
 
 * Fix broken URLs in vignettes
-
-# ubms v1.2.0
-
 * New kfold function
 * Add ability to not save log likelihoods in posterior
 * Add new Laplace prior (thanks Justin Cally)
+* Better handling of factors with levels that don't appear in data
 * Fix CRAN clang warnings
 * Misc. bugfixes
 
diff --git a/R/inputs.R b/R/inputs.R
index f3c183b..438adc6 100644
--- a/R/inputs.R
+++ b/R/inputs.R
@@ -142,7 +142,7 @@ get_nrandom <- function(formula, data){
 
   out <- sapply(rand, function(x){
     col_nm <- as.character(x[[3]])
-    length(unique(data[[col_nm]]))
+    length(levels(as.factor(data[[col_nm]])))
   })
   as.array(out)
 }
diff --git a/README.Rmd b/README.Rmd
index 2ab0fa5..481796f 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -91,24 +91,18 @@ umf <- unmarkedFrameOccu(y=y, siteCovs=dat_occ, obsCovs=dat_p)
 Fit a model with a random intercept, using 3 parallel cores:
 
 ```{r, eval=FALSE}
-(fm <- stan_occu(~x2 ~x1 + (1|group), umf, chains=3, cores=3))
+(fm <- stan_occu(~x2 ~x1 + (1|group), umf, chains=3, cores=3, seed=123))
 ```
 
 ```{r, echo=FALSE, warning=FALSE}
-set.seed(123)
-(fm <- stan_occu(~x2 ~x1 + (1|group), umf, chains=3, cores=3, refresh=0))
+(fm <- stan_occu(~x2 ~x1 + (1|group), umf, chains=3, cores=3, refresh=0, seed=123))
 ```
 
 
 Examine residuals for occupancy and detection submodels (following [Wright et al. 2019](https://doi.org/10.1002/ecy.2703)).
 Each panel represents a draw from the posterior distribution.
 
-```{r, eval=FALSE}
-plot(fm)
-```
-
-```{r resids, fig.height=7, echo=FALSE}
-set.seed(123)
+```{r resids, fig.height=7}
 plot(fm)
 ```
 
@@ -120,7 +114,6 @@ plot(fm_fit)
 ```
 
 ```{r gof, echo=FALSE}
-set.seed(123)
 fm_fit <- gof(fm, draws=500, quiet=TRUE)
 plot(fm_fit)
 ```
diff --git a/README.md b/README.md
index 38e91b8..5dfa0d9 100644
--- a/README.md
+++ b/README.md
@@ -95,26 +95,27 @@ umf <- unmarkedFrameOccu(y=y, siteCovs=dat_occ, obsCovs=dat_p)
 Fit a model with a random intercept, using 3 parallel cores:
 
 ``` r
-(fm <- stan_occu(~x2 ~x1 + (1|group), umf, chains=3, cores=3))
+(fm <- stan_occu(~x2 ~x1 + (1|group), umf, chains=3, cores=3, seed=123))
 ```
 
     ## 
     ## Call:
     ## stan_occu(formula = ~x2 ~ x1 + (1 | group), data = umf, chains = 3, 
-    ##     cores = 3, refresh = 0)
+    ##     cores = 3, refresh = 0, seed = 123)
     ## 
     ## Occupancy (logit-scale):
-    ##                 Estimate    SD   2.5%  97.5% n_eff Rhat
-    ## (Intercept)        0.317 0.294 -0.259  0.900   668    1
-    ## x1                -0.461 0.119 -0.693 -0.229  4502    1
-    ## sigma [1|group]    1.388 0.277  0.929  2.018  1982    1
+    ##                 Estimate    SD   2.5%  97.5% n_eff  Rhat
+    ## (Intercept)        0.318 0.290 -0.254  0.887   926 1.001
+    ## x1                -0.461 0.114 -0.683 -0.237  4428 0.999
+    ## sigma [1|group]    1.337 0.261  0.911  1.924  2112 1.002
     ## 
     ## Detection (logit-scale):
-    ##             Estimate     SD  2.5% 97.5% n_eff  Rhat
-    ## (Intercept)    0.382 0.0592 0.268 0.496  5380 1.000
-    ## x2             0.586 0.0615 0.469 0.710  4311 0.999
+    ##             Estimate     SD  2.5% 97.5% n_eff Rhat
+    ## (Intercept)    0.383 0.0607 0.265 0.503  4539    1
+    ## x2             0.586 0.0591 0.470 0.704  4963    1
     ## 
-    ## LOOIC: 2267.997
+    ## LOOIC: 2267.291
+    ## Runtime: 27.948 sec
 
 Examine residuals for occupancy and detection submodels (following
 [Wright et al. 2019](https://doi.org/10.1002/ecy.2703)). Each panel
diff --git a/inst/CITATION b/inst/CITATION
index 695f8e2..2da2ed8 100644
--- a/inst/CITATION
+++ b/inst/CITATION
@@ -9,10 +9,9 @@ bibentry(bibtype = "Article",
                    person("Dean E.", "Beyer"),
                    person("Jerrold L.", "Belant")),
   journal      = "Methods in Ecology and Evolution",
-  year         = "2021"#,
-  #volume       = "1",
-  #number       = "10",
-  #pages        = "1--20",
-  #url          = ""
+  year         = "2021",
+  volume       = "13",
+  pages        = "577--584",
+  url          = "https://doi.org/10.1111/2041-210X.13777"
 )
 
diff --git a/tests/testthat/test_inputs.R b/tests/testthat/test_inputs.R
index 9bf5e9e..914a4e9 100644
--- a/tests/testthat/test_inputs.R
+++ b/tests/testthat/test_inputs.R
@@ -130,6 +130,11 @@ test_that("get_nrandom returns number of levels of each grouping variable",{
                "Nested random effects (using / and :) are not supported", fixed=TRUE)
 })
 
+test_that("get_nrandom handles situation where a factor level is missing in data",{
+  dat <- data.frame(x=factor(c("a","b","c"),levels=letters[1:4]))
+  expect_equivalent(get_nrandom(~(1|x), dat), 4)
+})
+
 test_that("split_formula works",{
   inp <- ~1~1
   expect_equal(split_formula(inp), list(det=~1, state=~1))
diff --git a/vignettes/JAGS-comparison.Rmd b/vignettes/JAGS-comparison.Rmd
index 53a1b7f..3a68421 100644
--- a/vignettes/JAGS-comparison.Rmd
+++ b/vignettes/JAGS-comparison.Rmd
@@ -85,7 +85,7 @@ I probably did not use enough iterations but it's fine for this example.
 
 
 ```r
-ubms_fit <- stan_occu(~x2~x1+(1|group), umf, chains=3, iter=300, cores=3)
+ubms_fit <- stan_occu(~x2~x1+(1|group), umf, chains=3, iter=300, cores=3, seed=123)
 ```
 
 
@@ -153,6 +153,7 @@ Finally, I fit the model using the R package [jagsUI](https://cran.r-project.org
 
 
 ```r
+set.seed(123)
 library(jagsUI)
 jags_fit <- jags(jags_data, inits, params, modfile, n.chains=3, n.adapt=100, n.iter=2000,
                  n.burnin=1000, n.thin=2, parallel=TRUE, verbose=FALSE)
diff --git a/vignettes/JAGS-comparison.Rmd.orig b/vignettes/JAGS-comparison.Rmd.orig
index ce469c6..12af120 100644
--- a/vignettes/JAGS-comparison.Rmd.orig
+++ b/vignettes/JAGS-comparison.Rmd.orig
@@ -84,11 +84,11 @@ Using `stan_occu`, I fit a model with a random effect of group on occupancy (`(1
 I probably did not use enough iterations but it's fine for this example.
 
 ```{r, eval=FALSE}
-ubms_fit <- stan_occu(~x2~x1+(1|group), umf, chains=3, iter=300, cores=3)
+ubms_fit <- stan_occu(~x2~x1+(1|group), umf, chains=3, iter=300, cores=3, seed=123)
 ```
 
 ```{r, echo=FALSE}
-ubms_fit <- stan_occu(~x2~x1+(1|group), umf, chains=3, iter=300, cores=3, refresh=0)
+ubms_fit <- stan_occu(~x2~x1+(1|group), umf, chains=3, iter=300, cores=3, refresh=0, seed=123)
 ```
 
 
@@ -150,6 +150,7 @@ inits <- function(){list(z=apply(y, 1, max, na.rm=TRUE))}
 Finally, I fit the model using the R package [jagsUI](https://cran.r-project.org/package=jagsUI).
 
 ```{r}
+set.seed(123)
 library(jagsUI)
 jags_fit <- jags(jags_data, inits, params, modfile, n.chains=3, n.adapt=100, n.iter=2000,
                  n.burnin=1000, n.thin=2, parallel=TRUE, verbose=FALSE)
diff --git a/vignettes/references.bib b/vignettes/references.bib
index faa7bb8..8933fe0 100644
--- a/vignettes/references.bib
+++ b/vignettes/references.bib
@@ -202,3 +202,15 @@ year = {2013}
   year={2014},
   publisher={CRC press}
 }
+
+@article{Kellner_2021,
+ author = {Kellner, Kenneth F. and Fowler, Nicholas L. and Petroelje, Tyler R. and Kautz, Todd M. and Beyer Jr., Dean E. and Belant, Jerrold L.},
+ title = {ubms: An R package for fitting hierarchical occupancy and N-mixture abundance models in a Bayesian framework},
+ journal = {Methods in Ecology and Evolution},
+ year = {2021},
+ volume = {13},
+ pages = {577-584},
+ url = {https://besjournals.onlinelibrary.wiley.com/doi/abs/10.1111/2041-210X.137},
+ doi = {10.1111/2041-210X.13777},
+ keyword = {Bayesian methods, modelling, population ecology, statistics}
+}
diff --git a/vignettes/spatial-models.Rmd b/vignettes/spatial-models.Rmd
index 63cc2f7..e6af0ee 100644
--- a/vignettes/spatial-models.Rmd
+++ b/vignettes/spatial-models.Rmd
@@ -225,7 +225,7 @@ This will take Stan a few minutes with default MCMC settings.
 
 
 ```r
-fit_ubms <- stan_occu(form, umf, chains=3, cores=3)
+fit_ubms <- stan_occu(form, umf, chains=3, cores=3, seed=123)
 ```
 
 
@@ -246,26 +246,26 @@ fit_ubms
 ## 
 ## Call:
 ## stan_occu(formula = form, data = umf, chains = 3, refresh = 0, 
-##     cores = 3)
+##     cores = 3, seed = 123)
 ## 
 ## Occupancy (logit-scale):
-##             Estimate     SD    2.5%  97.5% n_eff Rhat
-## (Intercept)   -1.054 0.2324 -1.5121 -0.611  1177 1.01
-## habCov12      -0.216 0.3201 -0.8831  0.383  2499 1.00
-## habCov13      -0.261 0.3223 -0.8950  0.356  2262 1.00
-## habCov2        1.089 0.1801  0.7416  1.461  1295 1.01
-## RSR [tau]      0.144 0.0579  0.0675  0.283   264 1.02
+##             Estimate    SD    2.5%  97.5% n_eff  Rhat
+## (Intercept)   -1.068 0.231 -1.5520 -0.646   947 1.001
+## habCov12      -0.216 0.320 -0.8350  0.405  2381 1.000
+## habCov13      -0.264 0.321 -0.9002  0.362  2244 0.999
+## habCov2        1.104 0.181  0.7612  1.493  1035 1.000
+## RSR [tau]      0.140 0.057  0.0599  0.274   205 1.015
 ## 
 ## Detection (logit-scale):
 ##             Estimate    SD   2.5% 97.5% n_eff Rhat
-## (Intercept)    1.734 0.371  1.035 2.488  1756    1
-## detCov1        0.108 0.203 -0.291 0.497  4281    1
-## detCov21       1.183 0.556  0.102 2.270  2319    1
-## detCov22       1.379 0.561  0.307 2.533  2471    1
-## detCov23      -0.245 0.486 -1.204 0.704  2218    1
+## (Intercept)    1.712 0.355  1.061 2.413  1783    1
+## detCov1        0.115 0.210 -0.288 0.528  4778    1
+## detCov21       1.188 0.538  0.152 2.285  2360    1
+## detCov22       1.385 0.552  0.347 2.508  2439    1
+## detCov23      -0.219 0.476 -1.156 0.731  2230    1
 ## 
-## LOOIC: 668.475
-## Runtime: 88.788 sec
+## LOOIC: 668.955
+## Runtime: 2.828 min
 ```
 
 The precision term (tau, $\tau$) associated with the spatial random effect is the final row of the occupancy model estimates.
@@ -292,12 +292,12 @@ head(psi)
 
 ```
 ##   Predicted         SD       2.5%     97.5%
-## 1 0.1441804 0.04624230 0.07020541 0.2517342
-## 2 0.2390063 0.08739192 0.10297331 0.4388321
-## 3 0.3640516 0.11781411 0.16512449 0.6239752
-## 4 0.4593940 0.13070577 0.22866630 0.7265760
-## 5 0.4570641 0.12368794 0.23700396 0.7078097
-## 6 0.3865731 0.11876489 0.18351350 0.6434938
+## 1 0.1406819 0.04367637 0.06798461 0.2395582
+## 2 0.2353870 0.08364601 0.10436390 0.4261004
+## 3 0.3613298 0.11519917 0.16951184 0.6225812
+## 4 0.4581433 0.12669375 0.23324167 0.7257790
+## 5 0.4564998 0.12228635 0.22897164 0.7091163
+## 6 0.3849536 0.11732801 0.17920393 0.6357226
 ```
 
 ## Plotting
@@ -329,7 +329,8 @@ It is important to confirm that the spatial random effect is actually improving
 To check this, first fit a similar model without the RSR:
 
 ```r
-fit_nonspatial <- stan_occu(~detCov1 + detCov2 ~habCov1 + habCov2, umf, chains=3)
+fit_nonspatial <- stan_occu(~detCov1 + detCov2 ~habCov1 + habCov2, umf,
+                            chains=3, seed=123)
 ```
 
 
@@ -350,8 +351,8 @@ round(modSel(fl),2)
 
 ```
 ##                   elpd nparam elpd_diff se_diff weight
-## fit_ubms       -334.24  46.25      0.00    0.00      1
-## fit_nonspatial -380.46   8.36    -46.22    8.26      0
+## fit_ubms       -334.48  47.09       0.0    0.00      1
+## fit_nonspatial -380.68   8.57     -46.2    8.32      0
 ```
 
 A measure of predictive accuracy (`elpd`) is shown for each model, and the model with the highest predictive accuracy is listed first.
@@ -369,6 +370,7 @@ The code for fitting the model in `stocc` is taken directly from the package dem
 
 
 ```r
+set.seed(123)
 names <- list(
   visit=list(site="site",obs="obs"),
   site=list(site="site", coords=c("x","y"))
diff --git a/vignettes/spatial-models.Rmd.orig b/vignettes/spatial-models.Rmd.orig
index 1382c7a..8eb245b 100644
--- a/vignettes/spatial-models.Rmd.orig
+++ b/vignettes/spatial-models.Rmd.orig
@@ -189,14 +189,14 @@ We will then fit the model in Stan with `stan_occu()`, using 3 parallel cores.
 This will take Stan a few minutes with default MCMC settings.
 
 ```{r, eval=FALSE}
-fit_ubms <- stan_occu(form, umf, chains=3, cores=3)
+fit_ubms <- stan_occu(form, umf, chains=3, cores=3, seed=123)
 ```
 
 ```{r, message=FALSE, echo=FALSE, warning=FALSE}
 if(file.exists("fit_ubms.Rds")){
   fit_ubms <- readRDS("fit_ubms.Rds")
 } else {
-  fit_ubms <- stan_occu(form, umf, chains=3, refresh=0, cores=3)
+  fit_ubms <- stan_occu(form, umf, chains=3, refresh=0, cores=3, seed=123)
   saveRDS(fit_ubms, "fit_ubms.Rds")
 }
 ```
@@ -252,7 +252,8 @@ plot_spatial(fit_ubms, "eta")
 It is important to confirm that the spatial random effect is actually improving the predictive power of the model; if it isn't, we ought to remove it.
 To check this, first fit a similar model without the RSR:
 ```{r, eval=FALSE}
-fit_nonspatial <- stan_occu(~detCov1 + detCov2 ~habCov1 + habCov2, umf, chains=3)
+fit_nonspatial <- stan_occu(~detCov1 + detCov2 ~habCov1 + habCov2, umf,
+                            chains=3, seed=123)
 ```
 
 ```{r, echo=FALSE, message=FALSE, warning=FALSE}
@@ -261,7 +262,7 @@ if(file.exists("fit_nonspatial.Rds")){
 } else {
   options(mc.cores=3)
   fit_nonspatial <- stan_occu(~detCov1 + detCov2 ~habCov1 + habCov2, umf, 
-                              chains=3, refresh=0)
+                              chains=3, refresh=0, seed=123)
   saveRDS(fit_nonspatial, "fit_nonspatial.Rds")
 }
 ```
@@ -292,6 +293,7 @@ To show this, we will fit the same model in `stocc` and compare the occupancy pr
 The code for fitting the model in `stocc` is taken directly from the package demo.
 
 ```{r, eval=FALSE}
+set.seed(123)
 names <- list(
   visit=list(site="site",obs="obs"),
   site=list(site="site", coords=c("x","y"))
@@ -308,6 +310,7 @@ fit_stocc <- spatial.occupancy(
 ```
 
 ```{r, echo=FALSE,message=FALSE, warning=FALSE}
+set.seed(123)
 names <- list(
   visit=list(site="site",obs="obs"),
   site=list(site="site", coords=c("x","y"))
diff --git a/vignettes/ubms.Rmd b/vignettes/ubms.Rmd
index 317b2e1..ccbdb50 100644
--- a/vignettes/ubms.Rmd
+++ b/vignettes/ubms.Rmd
@@ -160,12 +160,12 @@ Tell Stan how many parallel cores you want to use by assigning a value to the `c
 ```{r, eval=FALSE}
 library(ubms)
 
-(fit_stan <- stan_occu(~1~1, data=umf, chains=3, iter=300, cores=3))
+(fit_stan <- stan_occu(~1~1, data=umf, chains=3, iter=300, cores=3, seed=123))
 ```
 
 ```{r, echo=FALSE}
 library(ubms)
-(fit_stan <- stan_occu(~1~1, data=umf, chains=3, iter=300, refresh=0))
+(fit_stan <- stan_occu(~1~1, data=umf, chains=3, iter=300, refresh=0, seed=123))
 ```
 
 ### Compare results
@@ -236,7 +236,7 @@ This can help improve model convergence and is generally a good idea.
 fit_null <- fit_stan
 
 fit_global <- stan_occu(~scale(date)~scale(forest)+scale(ele), data=umf,
-                        chains=3, iter=300)
+                        chains=3, iter=300, seed=123)
 ```
 
 ```{r, echo=FALSE}
@@ -245,7 +245,7 @@ fit_null <- fit_stan
 
 ```{r, warning=TRUE, echo=FALSE}
 fit_global <- stan_occu(~scale(date)~scale(forest)+scale(ele), data=umf,
-                        chains=3, iter=300, refresh=0)
+                        chains=3, iter=300, refresh=0, seed=123)
 ```
 
 The `fit_global` model gave us some warnings about the effective sample size (`n_eff`) along with a suggested solution.
author	Ken Kellner <ken@kenkellner.com>	2022-11-10 10:34:06 -0500
committer	Ken Kellner <ken@kenkellner.com>	2022-11-10 10:34:06 -0500
commit	d9ad3d4f748811b1a2a27aacce89a97bac1dd736 (patch)
tree	30e81ce41ec34e22218c58a9d74951bff0fd8eee
parent	acb94f6798b5d168d5cf21df9e365fec1d838d0c (diff)