Fixes to Royle-Nichols model

author: Ken Kellner <ken@kenkellner.com> 2022-02-23 18:57:29 -0500
committer: Ken Kellner <ken@kenkellner.com> 2022-02-23 18:57:29 -0500
commit: c18af620ceb9fddd7316fcde1c3a3eef7914a8b1 (patch)
tree: d90dc34135193d0d4bdc4c6baaa92080d5ceafd3
parent: 873ad366d0ee70e0c20dbb66f0eebb66f619ddec (diff)
2 files changed, 18 insertions, 24 deletions
diff --git a/examples/occu_rn.jl b/examples/occu_rn.jl
index dad8414..c42584c 100644
--- a/examples/occu_rn.jl
+++ b/examples/occu_rn.jl
@@ -8,7 +8,7 @@ using Unmarked
 Random.seed!(123);
 
 #Simulate data
-N = 400;
+N = 4000;
 J = 5;
 z = rand(Poisson(5),N);
 r = 0.1;
@@ -25,7 +25,7 @@ end
 um = UmData(y)
 
 
-mod = rn(@formula(λ~1), @formula(p~1), um)
+@time mod = rn(@formula(λ~1), @formula(p~1), um, K=50)
 
 #Display
 truth = [log(5), logit(r)]; 
diff --git a/src/roylenichols.jl b/src/roylenichols.jl
index cd0a097..e0b849e 100644
--- a/src/roylenichols.jl
+++ b/src/roylenichols.jl
@@ -6,29 +6,31 @@ struct RN <: UnmarkedModel
 end
 
 """
-    rn(λ_formula::FormulaTerm, p_formula::FormulaTerm, data::UmData, K::Int)
+    rn(λ_formula::FormulaTerm, p_formula::FormulaTerm, data::UmData; K::Int)
 
 Fit the mixture model of Royle and Nichols (2003), which estimates latent site
 abundance using presence-absence data. Probability a species is detected at a
 site is assumed to be positively related to species abundance at the site.
-Covariates on abundance and detection are specified with `λ_formula` and 
-`p_formula`, respectively. The `UmData` object should contain `site_covs` and 
-`obs_covs` `DataFrames` containing columns with names matching the formulas. 
-The likelihood is marginalized over possible latent abundance values `0:K` for 
-each site. `K` should be set high enough that the likelihood of true abundance 
-`K` for any site is ≈ 0 (defaults to maximum observed count + 20). Larger `K` 
+Covariates on abundance and detection are specified with `λ_formula` and
+`p_formula`, respectively. The `UmData` object should contain `site_covs` and
+`obs_covs` `DataFrames` containing columns with names matching the formulas.
+The likelihood is marginalized over possible latent abundance values `0:K` for
+each site. `K` should be set high enough that the likelihood of true abundance
+`K` for any site is ≈ 0 (defaults to maximum observed count + 20). Larger `K`
 will increase runtime.
 """
 function rn(λ_formula::FormulaTerm, p_formula::FormulaTerm,
-            data::UmData, K::Union{Int,Nothing}=nothing)
+            data::UmData; K::Union{Int,Nothing}=nothing)
 
   abun = UmDesign(:Abundance, λ_formula, LogLink(), data.site_covs)
   det = UmDesign(:Detection, p_formula, LogitLink(), data.obs_covs)
-  np = add_idx!([abun, det])
+  add_idx!([abun, det])
+  np = get_np([abun, det])
 
   y = data.y
   N, J = size(y)
-  K = isnothing(K) ? maximum(skipmissing(y)) + 20 : K
+  K = check_K(K, y)
+  Kmin = get_Kmin(y)
 
   function loglik(β::Array)
 
@@ -37,26 +39,18 @@ function rn(λ_formula::FormulaTerm, p_formula::FormulaTerm,
 
     ll = zeros(eltype(β), N)
 
-    idx = 0
-    for i = 1:N
+    Threads.@threads for i = 1:N
+      rsub = r[(i*J-J+1):(i*J)]
       fg = zeros(eltype(β),K+1)
-      for k = 0:K
-        if maximum(skipmissing(y[i,:])) > k
-          continue
-        end
+      for k = Kmin[i]:K
         fk = pdf(Poisson(λ[i]), k)
         gk = 1.0
         for j = 1:J
-          idx += 1
           if ismissing(y[i,j]) continue end
-          p = 1 - (1 - r[idx])^k
+          p = 1 - (1 - rsub[j])^k
           gk *= p^y[i,j] * (1-p)^(1-y[i,j])
         end
-
         fg[k+1] = fk * gk
-
-        #Return r index to start for site if necessary
-        if k != K idx -= J end
       end
       ll[i] = log(sum(fg))
     end
author	Ken Kellner <ken@kenkellner.com>	2022-02-23 18:57:29 -0500
committer	Ken Kellner <ken@kenkellner.com>	2022-02-23 18:57:29 -0500
commit	c18af620ceb9fddd7316fcde1c3a3eef7914a8b1 (patch)
tree	d90dc34135193d0d4bdc4c6baaa92080d5ceafd3
parent	873ad366d0ee70e0c20dbb66f0eebb66f619ddec (diff)