Add Royle-Nichols model

author: Ken Kellner <ken@kenkellner.com> 2020-02-13 21:05:41 -0500
committer: Ken Kellner <ken@kenkellner.com> 2020-02-13 21:05:41 -0500
commit: cebd06f9bca0ae36ddfceaecd629ccd3a2a4989c (patch)
tree: aae03634ee3394c23d560795311fc06176ce7c4c
parent: 4594e4705e91d8245fc27010c93e21c783920e7a (diff)
4 files changed, 79 insertions, 38 deletions
diff --git a/examples/occu_rn.jl b/examples/occu_rn.jl
index d20ae80..dad8414 100644
--- a/examples/occu_rn.jl
+++ b/examples/occu_rn.jl
@@ -3,6 +3,8 @@
 using Random, Distributions, StatsFuns, Optim, NLSolversBase, ForwardDiff
 using LinearAlgebra
 
+using Unmarked
+
 Random.seed!(123);
 
 #Simulate data
@@ -20,45 +22,13 @@ for i = 1:N
   end
 end
 
-#Population size values to marginalize over
-K = 25
-
-#Estimate
+um = UmData(y)
 
-function loglik(β_raw::Array)
-  
-  λ = exp(β_raw[1])
-  r = logistic(β_raw[2])
-   
-  ll = zeros(eltype(β_raw),N)
-  for i = 1:N
-    fg = zeros(eltype(β_raw),K+1)
-    for k = 0:K
-      fk = pdf(Poisson(λ), k)
-      gk = 1.0
-      for j = 1:J
-        #repeatedly calculating to allow covs later
-        p = 1 - (1 - r)^k 
-        gk *= p^y[i,j] * (1-p)^(1-y[i,j])
-      end
-      fg[k+1] = fk * gk
-    end
-    ll[i] = log(sum(fg))
-  end
-
-  return -sum(ll)
-end
-      
-func = TwiceDifferentiable(vars -> loglik(vars), zeros(2); autodiff=:forward);
 
-opt = optimize(func, zeros(2));
-param = Optim.minimizer(opt);
-hes = NLSolversBase.hessian!(func, param);
-vcov = inv(hes);
-se = sqrt.(diag(vcov));
+mod = rn(@formula(λ~1), @formula(p~1), um)
 
 #Display
 truth = [log(5), logit(r)]; 
 pnames = ["β[1]", "β[2]"];
 cnames = ["param" "truth" "estimate" "se"];
-[cnames; pnames truth param se]
+[cnames; pnames truth coef(mod) stderror(mod)]
diff --git a/src/Unmarked.jl b/src/Unmarked.jl
index 4f192f8..2ef1b8b 100644
--- a/src/Unmarked.jl
+++ b/src/Unmarked.jl
@@ -11,13 +11,13 @@ import Base: show, getindex, length
 import StatsBase: aic, aicc, bic, coef, coefnames, coeftable, deviance, dof,
                   loglikelihood, modelmatrix, predict, nobs, stderror, vcov
 
-export UmData, Occu, Nmix
+export UmData, Occu, Nmix, RN
 export @formula
 export aic, aicc, bic, coef, coefnames, coeftable, deviance, dof, gof, 
        loglikelihood, modelmatrix, nobs, predict, simulate, stderror, vcov
 
 #Fitting functions       
-export occu, nmix
+export occu, nmix, rn
 
 #Submodel extractors
 export detection, occupancy, abundance
@@ -35,6 +35,7 @@ include("gof.jl")
 include("predict.jl")
 include("occupancy.jl")
 include("nmix.jl")
+include("roylenichols.jl")
 include("plots.jl")
 
 end
diff --git a/src/fit.jl b/src/fit.jl
index 96d8f97..c78a53b 100644
--- a/src/fit.jl
+++ b/src/fit.jl
@@ -171,7 +171,7 @@ end
 #Standard error
 function stderror(x::UnmarkedModel)
   v = vcov(x)
-  return sqrt.diag(v)
+  return sqrt.(diag(v))
 end
 
 function stderror(x::UnmarkedSubmodel)
diff --git a/src/roylenichols.jl b/src/roylenichols.jl
new file mode 100644
index 0000000..cd0a097
--- /dev/null
+++ b/src/roylenichols.jl
@@ -0,0 +1,70 @@
+struct RN <: UnmarkedModel
+  data::UmData
+  opt::UnmarkedOpt
+  submodels::NamedTuple
+  K::Int
+end
+
+"""
+    rn(λ_formula::FormulaTerm, p_formula::FormulaTerm, data::UmData, K::Int)
+
+Fit the mixture model of Royle and Nichols (2003), which estimates latent site
+abundance using presence-absence data. Probability a species is detected at a
+site is assumed to be positively related to species abundance at the site.
+Covariates on abundance and detection are specified with `λ_formula` and 
+`p_formula`, respectively. The `UmData` object should contain `site_covs` and 
+`obs_covs` `DataFrames` containing columns with names matching the formulas. 
+The likelihood is marginalized over possible latent abundance values `0:K` for 
+each site. `K` should be set high enough that the likelihood of true abundance 
+`K` for any site is ≈ 0 (defaults to maximum observed count + 20). Larger `K` 
+will increase runtime.
+"""
+function rn(λ_formula::FormulaTerm, p_formula::FormulaTerm,
+            data::UmData, K::Union{Int,Nothing}=nothing)
+
+  abun = UmDesign(:Abundance, λ_formula, LogLink(), data.site_covs)
+  det = UmDesign(:Detection, p_formula, LogitLink(), data.obs_covs)
+  np = add_idx!([abun, det])
+
+  y = data.y
+  N, J = size(y)
+  K = isnothing(K) ? maximum(skipmissing(y)) + 20 : K
+
+  function loglik(β::Array)
+
+    λ = transform(abun, β)
+    r = transform(det, β)
+
+    ll = zeros(eltype(β), N)
+
+    idx = 0
+    for i = 1:N
+      fg = zeros(eltype(β),K+1)
+      for k = 0:K
+        if maximum(skipmissing(y[i,:])) > k
+          continue
+        end
+        fk = pdf(Poisson(λ[i]), k)
+        gk = 1.0
+        for j = 1:J
+          idx += 1
+          if ismissing(y[i,j]) continue end
+          p = 1 - (1 - r[idx])^k
+          gk *= p^y[i,j] * (1-p)^(1-y[i,j])
+        end
+
+        fg[k+1] = fk * gk
+
+        #Return r index to start for site if necessary
+        if k != K idx -= J end
+      end
+      ll[i] = log(sum(fg))
+    end
+
+    return -sum(ll)
+  end
+
+  opt = optimize_loglik(loglik, np)
+  RN(data, opt, (abun=UnmarkedSubmodel(abun, opt),
+                 det=UnmarkedSubmodel(det, opt)), K)
+end
author	Ken Kellner <ken@kenkellner.com>	2020-02-13 21:05:41 -0500
committer	Ken Kellner <ken@kenkellner.com>	2020-02-13 21:05:41 -0500
commit	cebd06f9bca0ae36ddfceaecd629ccd3a2a4989c (patch)
tree	aae03634ee3394c23d560795311fc06176ce7c4c
parent	4594e4705e91d8245fc27010c93e21c783920e7a (diff)