aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken@kenkellner.com>2020-02-13 21:05:41 -0500
committerKen Kellner <ken@kenkellner.com>2020-02-13 21:05:41 -0500
commitcebd06f9bca0ae36ddfceaecd629ccd3a2a4989c (patch)
treeaae03634ee3394c23d560795311fc06176ce7c4c
parent4594e4705e91d8245fc27010c93e21c783920e7a (diff)
Add Royle-Nichols model
-rw-r--r--examples/occu_rn.jl40
-rw-r--r--src/Unmarked.jl5
-rw-r--r--src/fit.jl2
-rw-r--r--src/roylenichols.jl70
4 files changed, 79 insertions, 38 deletions
diff --git a/examples/occu_rn.jl b/examples/occu_rn.jl
index d20ae80..dad8414 100644
--- a/examples/occu_rn.jl
+++ b/examples/occu_rn.jl
@@ -3,6 +3,8 @@
using Random, Distributions, StatsFuns, Optim, NLSolversBase, ForwardDiff
using LinearAlgebra
+using Unmarked
+
Random.seed!(123);
#Simulate data
@@ -20,45 +22,13 @@ for i = 1:N
end
end
-#Population size values to marginalize over
-K = 25
-
-#Estimate
+um = UmData(y)
-function loglik(β_raw::Array)
-
- λ = exp(β_raw[1])
- r = logistic(β_raw[2])
-
- ll = zeros(eltype(β_raw),N)
- for i = 1:N
- fg = zeros(eltype(β_raw),K+1)
- for k = 0:K
- fk = pdf(Poisson(λ), k)
- gk = 1.0
- for j = 1:J
- #repeatedly calculating to allow covs later
- p = 1 - (1 - r)^k
- gk *= p^y[i,j] * (1-p)^(1-y[i,j])
- end
- fg[k+1] = fk * gk
- end
- ll[i] = log(sum(fg))
- end
-
- return -sum(ll)
-end
-
-func = TwiceDifferentiable(vars -> loglik(vars), zeros(2); autodiff=:forward);
-opt = optimize(func, zeros(2));
-param = Optim.minimizer(opt);
-hes = NLSolversBase.hessian!(func, param);
-vcov = inv(hes);
-se = sqrt.(diag(vcov));
+mod = rn(@formula(λ~1), @formula(p~1), um)
#Display
truth = [log(5), logit(r)];
pnames = ["β[1]", "β[2]"];
cnames = ["param" "truth" "estimate" "se"];
-[cnames; pnames truth param se]
+[cnames; pnames truth coef(mod) stderror(mod)]
diff --git a/src/Unmarked.jl b/src/Unmarked.jl
index 4f192f8..2ef1b8b 100644
--- a/src/Unmarked.jl
+++ b/src/Unmarked.jl
@@ -11,13 +11,13 @@ import Base: show, getindex, length
import StatsBase: aic, aicc, bic, coef, coefnames, coeftable, deviance, dof,
loglikelihood, modelmatrix, predict, nobs, stderror, vcov
-export UmData, Occu, Nmix
+export UmData, Occu, Nmix, RN
export @formula
export aic, aicc, bic, coef, coefnames, coeftable, deviance, dof, gof,
loglikelihood, modelmatrix, nobs, predict, simulate, stderror, vcov
#Fitting functions
-export occu, nmix
+export occu, nmix, rn
#Submodel extractors
export detection, occupancy, abundance
@@ -35,6 +35,7 @@ include("gof.jl")
include("predict.jl")
include("occupancy.jl")
include("nmix.jl")
+include("roylenichols.jl")
include("plots.jl")
end
diff --git a/src/fit.jl b/src/fit.jl
index 96d8f97..c78a53b 100644
--- a/src/fit.jl
+++ b/src/fit.jl
@@ -171,7 +171,7 @@ end
#Standard error
function stderror(x::UnmarkedModel)
v = vcov(x)
- return sqrt.diag(v)
+ return sqrt.(diag(v))
end
function stderror(x::UnmarkedSubmodel)
diff --git a/src/roylenichols.jl b/src/roylenichols.jl
new file mode 100644
index 0000000..cd0a097
--- /dev/null
+++ b/src/roylenichols.jl
@@ -0,0 +1,70 @@
+struct RN <: UnmarkedModel
+ data::UmData
+ opt::UnmarkedOpt
+ submodels::NamedTuple
+ K::Int
+end
+
+"""
+ rn(λ_formula::FormulaTerm, p_formula::FormulaTerm, data::UmData, K::Int)
+
+Fit the mixture model of Royle and Nichols (2003), which estimates latent site
+abundance using presence-absence data. Probability a species is detected at a
+site is assumed to be positively related to species abundance at the site.
+Covariates on abundance and detection are specified with `λ_formula` and
+`p_formula`, respectively. The `UmData` object should contain `site_covs` and
+`obs_covs` `DataFrames` containing columns with names matching the formulas.
+The likelihood is marginalized over possible latent abundance values `0:K` for
+each site. `K` should be set high enough that the likelihood of true abundance
+`K` for any site is ≈ 0 (defaults to maximum observed count + 20). Larger `K`
+will increase runtime.
+"""
+function rn(λ_formula::FormulaTerm, p_formula::FormulaTerm,
+ data::UmData, K::Union{Int,Nothing}=nothing)
+
+ abun = UmDesign(:Abundance, λ_formula, LogLink(), data.site_covs)
+ det = UmDesign(:Detection, p_formula, LogitLink(), data.obs_covs)
+ np = add_idx!([abun, det])
+
+ y = data.y
+ N, J = size(y)
+ K = isnothing(K) ? maximum(skipmissing(y)) + 20 : K
+
+ function loglik(β::Array)
+
+ λ = transform(abun, β)
+ r = transform(det, β)
+
+ ll = zeros(eltype(β), N)
+
+ idx = 0
+ for i = 1:N
+ fg = zeros(eltype(β),K+1)
+ for k = 0:K
+ if maximum(skipmissing(y[i,:])) > k
+ continue
+ end
+ fk = pdf(Poisson(λ[i]), k)
+ gk = 1.0
+ for j = 1:J
+ idx += 1
+ if ismissing(y[i,j]) continue end
+ p = 1 - (1 - r[idx])^k
+ gk *= p^y[i,j] * (1-p)^(1-y[i,j])
+ end
+
+ fg[k+1] = fk * gk
+
+ #Return r index to start for site if necessary
+ if k != K idx -= J end
+ end
+ ll[i] = log(sum(fg))
+ end
+
+ return -sum(ll)
+ end
+
+ opt = optimize_loglik(loglik, np)
+ RN(data, opt, (abun=UnmarkedSubmodel(abun, opt),
+ det=UnmarkedSubmodel(det, opt)), K)
+end