diff options
author | Ken Kellner <ken@kenkellner.com> | 2020-02-13 21:05:41 -0500 |
---|---|---|
committer | Ken Kellner <ken@kenkellner.com> | 2020-02-13 21:05:41 -0500 |
commit | cebd06f9bca0ae36ddfceaecd629ccd3a2a4989c (patch) | |
tree | aae03634ee3394c23d560795311fc06176ce7c4c | |
parent | 4594e4705e91d8245fc27010c93e21c783920e7a (diff) |
Add Royle-Nichols model
-rw-r--r-- | examples/occu_rn.jl | 40 | ||||
-rw-r--r-- | src/Unmarked.jl | 5 | ||||
-rw-r--r-- | src/fit.jl | 2 | ||||
-rw-r--r-- | src/roylenichols.jl | 70 |
4 files changed, 79 insertions, 38 deletions
diff --git a/examples/occu_rn.jl b/examples/occu_rn.jl index d20ae80..dad8414 100644 --- a/examples/occu_rn.jl +++ b/examples/occu_rn.jl @@ -3,6 +3,8 @@ using Random, Distributions, StatsFuns, Optim, NLSolversBase, ForwardDiff using LinearAlgebra +using Unmarked + Random.seed!(123); #Simulate data @@ -20,45 +22,13 @@ for i = 1:N end end -#Population size values to marginalize over -K = 25 - -#Estimate +um = UmData(y) -function loglik(β_raw::Array) - - λ = exp(β_raw[1]) - r = logistic(β_raw[2]) - - ll = zeros(eltype(β_raw),N) - for i = 1:N - fg = zeros(eltype(β_raw),K+1) - for k = 0:K - fk = pdf(Poisson(λ), k) - gk = 1.0 - for j = 1:J - #repeatedly calculating to allow covs later - p = 1 - (1 - r)^k - gk *= p^y[i,j] * (1-p)^(1-y[i,j]) - end - fg[k+1] = fk * gk - end - ll[i] = log(sum(fg)) - end - - return -sum(ll) -end - -func = TwiceDifferentiable(vars -> loglik(vars), zeros(2); autodiff=:forward); -opt = optimize(func, zeros(2)); -param = Optim.minimizer(opt); -hes = NLSolversBase.hessian!(func, param); -vcov = inv(hes); -se = sqrt.(diag(vcov)); +mod = rn(@formula(λ~1), @formula(p~1), um) #Display truth = [log(5), logit(r)]; pnames = ["β[1]", "β[2]"]; cnames = ["param" "truth" "estimate" "se"]; -[cnames; pnames truth param se] +[cnames; pnames truth coef(mod) stderror(mod)] diff --git a/src/Unmarked.jl b/src/Unmarked.jl index 4f192f8..2ef1b8b 100644 --- a/src/Unmarked.jl +++ b/src/Unmarked.jl @@ -11,13 +11,13 @@ import Base: show, getindex, length import StatsBase: aic, aicc, bic, coef, coefnames, coeftable, deviance, dof, loglikelihood, modelmatrix, predict, nobs, stderror, vcov -export UmData, Occu, Nmix +export UmData, Occu, Nmix, RN export @formula export aic, aicc, bic, coef, coefnames, coeftable, deviance, dof, gof, loglikelihood, modelmatrix, nobs, predict, simulate, stderror, vcov #Fitting functions -export occu, nmix +export occu, nmix, rn #Submodel extractors export detection, occupancy, abundance @@ -35,6 +35,7 @@ include("gof.jl") include("predict.jl") include("occupancy.jl") include("nmix.jl") +include("roylenichols.jl") include("plots.jl") end @@ -171,7 +171,7 @@ end #Standard error function stderror(x::UnmarkedModel) v = vcov(x) - return sqrt.diag(v) + return sqrt.(diag(v)) end function stderror(x::UnmarkedSubmodel) diff --git a/src/roylenichols.jl b/src/roylenichols.jl new file mode 100644 index 0000000..cd0a097 --- /dev/null +++ b/src/roylenichols.jl @@ -0,0 +1,70 @@ +struct RN <: UnmarkedModel + data::UmData + opt::UnmarkedOpt + submodels::NamedTuple + K::Int +end + +""" + rn(λ_formula::FormulaTerm, p_formula::FormulaTerm, data::UmData, K::Int) + +Fit the mixture model of Royle and Nichols (2003), which estimates latent site +abundance using presence-absence data. Probability a species is detected at a +site is assumed to be positively related to species abundance at the site. +Covariates on abundance and detection are specified with `λ_formula` and +`p_formula`, respectively. The `UmData` object should contain `site_covs` and +`obs_covs` `DataFrames` containing columns with names matching the formulas. +The likelihood is marginalized over possible latent abundance values `0:K` for +each site. `K` should be set high enough that the likelihood of true abundance +`K` for any site is ≈ 0 (defaults to maximum observed count + 20). Larger `K` +will increase runtime. +""" +function rn(λ_formula::FormulaTerm, p_formula::FormulaTerm, + data::UmData, K::Union{Int,Nothing}=nothing) + + abun = UmDesign(:Abundance, λ_formula, LogLink(), data.site_covs) + det = UmDesign(:Detection, p_formula, LogitLink(), data.obs_covs) + np = add_idx!([abun, det]) + + y = data.y + N, J = size(y) + K = isnothing(K) ? maximum(skipmissing(y)) + 20 : K + + function loglik(β::Array) + + λ = transform(abun, β) + r = transform(det, β) + + ll = zeros(eltype(β), N) + + idx = 0 + for i = 1:N + fg = zeros(eltype(β),K+1) + for k = 0:K + if maximum(skipmissing(y[i,:])) > k + continue + end + fk = pdf(Poisson(λ[i]), k) + gk = 1.0 + for j = 1:J + idx += 1 + if ismissing(y[i,j]) continue end + p = 1 - (1 - r[idx])^k + gk *= p^y[i,j] * (1-p)^(1-y[i,j]) + end + + fg[k+1] = fk * gk + + #Return r index to start for site if necessary + if k != K idx -= J end + end + ll[i] = log(sum(fg)) + end + + return -sum(ll) + end + + opt = optimize_loglik(loglik, np) + RN(data, opt, (abun=UnmarkedSubmodel(abun, opt), + det=UnmarkedSubmodel(det, opt)), K) +end |