diff options
author | Ken Kellner <ken@kenkellner.com> | 2023-08-04 11:57:46 -0400 |
---|---|---|
committer | Ken Kellner <ken@kenkellner.com> | 2023-08-04 12:00:25 -0400 |
commit | de5c226f82f04d6df0c085eac436ff45ed468a4c (patch) | |
tree | 2bf62852507966cdde9fd1458c1fd6ccf6b8ed18 | |
parent | 516906afa4166978133c61541fa32f80ebda4416 (diff) |
Working goccu function
-rw-r--r-- | DESCRIPTION | 1 | ||||
-rw-r--r-- | NAMESPACE | 2 | ||||
-rw-r--r-- | R/goccu.R | 320 | ||||
-rw-r--r-- | src/TMB/tmb_goccu.hpp | 130 | ||||
-rw-r--r-- | src/TMB/unmarked_TMBExports.cpp | 3 |
5 files changed, 455 insertions, 1 deletions
diff --git a/DESCRIPTION b/DESCRIPTION index b658bba..e102c4e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,6 +51,7 @@ Collate: 'classes.R' 'unmarkedEstimate.R' 'mapInfo.R' 'unmarkedFrame.R' 'power.R' 'simulate.R' 'predict.R' + 'goccu.R' 'RcppExports.R' 'zzz.R' LinkingTo: @@ -24,7 +24,7 @@ importFrom(pbapply, pbsapply, pblapply) export(occu, occuFP, occuRN, pcount, pcountOpen, multinomPois, distsamp, colext, gmultmix, gdistsamp, gpcount, occuPEN, occuPEN_CV, occuMulti, occuMS, computeMPLElambda, pcount.spHDS, occuTTD, distsampOpen, - multmixOpen, nmixTTD, gdistremoval) + multmixOpen, nmixTTD, gdistremoval, goccu) export(removalPiFun, doublePiFun) export(makeRemPiFun, makeCrPiFun, makeCrPiFunMb, makeCrPiFunMh) diff --git a/R/goccu.R b/R/goccu.R new file mode 100644 index 0000000..0d1c0c9 --- /dev/null +++ b/R/goccu.R @@ -0,0 +1,320 @@ +setClass("unmarkedFitGOccu", + representation( + formlist = "list"), + contains = "unmarkedFit") + +setClass("unmarkedFrameGOccu", contains = "unmarkedFrameG3") + +setMethod("getDesign", "unmarkedFrameGOccu", + function(umf, formula, na.rm=TRUE){ + out <- methods::callNextMethod(umf, formula=formula, na.rm=na.rm) + names(out)[2] <- "Xpsi" + names(out)[5] <- "Xpsi.offset" + out +}) + +goccu <- function(psiformula, phiformula, pformula, data, + linkPsi = c("logit", "cloglog"), starts, method = "BFGS", + se = TRUE, ...){ + + linkPsi <- match.arg(linkPsi, c("logit","cloglog")) + psiLinkFunc <- ifelse(linkPsi=="cloglog", cloglog, plogis) + psiInvLink <- ifelse(linkPsi=="cloglog", "cloglog", "logistic") + psiLinkGrad <- ifelse(linkPsi=="cloglog", "cloglog.grad", "logistic.grad") + + # Pass phiformula as gamma/eps formula so it will be applied to + # yearlySiteCovs in getDesign + formlist <- list(psiformula=psiformula, phi=phiformula, + pformula=pformula) + + formula <- as.formula(paste(unlist(formlist), collapse=" ")) + + data@y[data@y > 1] <- 1 + + class(data) <- "unmarkedFrameGOccu" + + # handle offsets + + gd <- getDesign(data, formula = formula) + + y <- gd$y + Xpsi <- gd$Xpsi + Xphi <- gd$Xphi + Xp <- gd$Xdet + + M <- nrow(y) + T <- data@numPrimary + J <- ncol(y) / T + + # Identify entirely missing primary periods at each site + y_array <- array(t(y), c(J, T, M)) + missing_session <- t(apply(y_array, c(2,3), + function(x) as.numeric(all(is.na(x))))) + + # Create possible states in each T + alpha_potential <- as.matrix(expand.grid(rep(list(c(0, 1)), T))) + n_possible <- nrow(alpha_potential) + + # Known present at each site + known_present <- rep(0, M) + # Known available at each site and T + known_available <- matrix(0, nrow=M, ncol=T) + + for (i in 1:M){ + for (t in 1:T){ + for (j in 1:J){ + if(y_array[j, t, i] == 1){ + known_present[i] <- 1 + known_available[i,t] <- 1 + } + } + } + } + + # Bundle data for TMB + dataList <- list(y=y, T=T, link=ifelse(linkPsi=='cloglog', 1, 0), + Xpsi=Xpsi, Xphi=Xphi, Xp=Xp, + n_possible=n_possible, + alpha_potential=alpha_potential, + known_present=known_present, known_available=known_available, + missing_session=missing_session) + + # Provide dimensions and starting values for parameters + # This part should change to be more like occu() if we add random effects + psi_ind <- 1:ncol(Xpsi) + phi_ind <- 1:ncol(Xphi) + max(psi_ind) + p_ind <- 1:ncol(Xp) + max(phi_ind) + nP <- max(p_ind) + params <- list(beta_psi = rep(0, length(psi_ind)), + beta_phi = rep(0, length(phi_ind)), + beta_p = rep(0, length(p_ind))) + + # Create TMB object + tmb_mod <- TMB::MakeADFun(data = c(model = "tmb_goccu", dataList), + parameters = params, + DLL = "unmarked_TMBExports", silent = TRUE) + + # Optimize TMB object, print and save results + if(missing(starts)) starts <- tmb_mod$par + opt <- optim(starts, fn = tmb_mod$fn, gr = tmb_mod$gr, method = method, + hessian = se, ...) + + covMat <- invertHessian(opt, nP, se) + ests <- opt$par + names(ests) <- c(colnames(Xpsi), colnames(Xphi), colnames(Xp)) + fmAIC <- 2 * opt$value + 2 * nP + + + psi_est <- unmarkedEstimate(name = "Occupancy", short.name = "psi", + estimates = ests[psi_ind], + covMat = covMat[psi_ind, psi_ind, drop=FALSE], + fixed = 1:ncol(Xpsi), + invlink = psiInvLink, + invlinkGrad = psiLinkGrad, + randomVarInfo=list() + ) + + phi_est <- unmarkedEstimate(name = "Availability", short.name = "phi", + estimates = ests[phi_ind], + covMat = covMat[phi_ind, phi_ind, drop=FALSE], + fixed = 1:ncol(Xphi), + invlink = "logistic", + invlinkGrad = "logistic.grad", + randomVarInfo=list() + ) + + p_est <- unmarkedEstimate(name = "Detection", short.name = "p", + estimates = ests[p_ind], + covMat = covMat[p_ind, p_ind, drop=FALSE], + fixed = 1:ncol(Xp), + invlink = "logistic", + invlinkGrad = "logistic.grad", + randomVarInfo=list() + ) + + estimate_list <- unmarkedEstimateList(list(psi=psi_est, phi=phi_est, + det=p_est)) + + # Create unmarkedFit object-------------------------------------------------- + umfit <- new("unmarkedFitGOccu", fitType = "goccu", call = match.call(), + formula = formula, formlist=formlist, data = data, + sitesRemoved = gd$removed.sites, + estimates = estimate_list, AIC = fmAIC, opt = opt, + negLogLike = opt$value, + nllFun = tmb_mod$fn, TMB=tmb_mod) + + return(umfit) + +} + +# Methods + +setMethod("predict_inputs_from_umf", "unmarkedFitGOccu", + function(object, type, newdata, na.rm, re.form=NA){ + designMats <- getDesign(newdata, object@formula, na.rm=na.rm) + X_idx <- switch(type, psi="Xpsi", phi="Xphi", det="Xdet") + off_idx <- paste0(X_idx, ".offset") + list(X=designMats[[X_idx]], offset=NULL) +}) + +setMethod("get_formula", "unmarkedFitGOccu", function(object, type, ...){ + fl <- object@formlist + switch(type, psi=fl$psiformula, phi=fl$phiformula, det=fl$pformula) +}) + +setMethod("get_orig_data", "unmarkedFitGOccu", function(object, type, ...){ + clean_covs <- clean_up_covs(object@data, drop_final=FALSE) + datatype <- switch(type, psi='site_covs', phi='yearly_site_covs', + det='obs_covs') + clean_covs[[datatype]] +}) + +setMethod("getP", "unmarkedFitGOccu", + function(object){ + p <- predict(object, "det", level=NULL)$Predicted + p <- matrix(p, nrow=numSites(object@data), ncol=obsNum(object@data), + byrow=TRUE) + p +}) + +setMethod("fitted", "unmarkedFitGOccu", + function(object, na.rm= FALSE){ + + M <- numSites(object@data) + JT <- obsNum(object@data) + + psi <- predict(object, "psi", level=NULL)$Predicted + psi <- matrix(psi, nrow=M, ncol=JT) + + phi <- predict(object, "phi", level=NULL)$Predicted + phi <- rep(phi, each = JT / object@data@numPrimary) + phi <- matrix(phi, nrow=M, ncol=JT, byrow=TRUE) + + p <- getP(object) + + psi * phi * p +}) + +# based on ranef for GPC +setMethod("ranef", "unmarkedFitGOccu", function(object, ...){ + + M <- numSites(object@data) + JT <- obsNum(object@data) + T <- object@data@numPrimary + J <- JT / T + + gd <- getDesign(object@data, object@formula) + y_array <- array(t(gd$y), c(J, T, M)) + + psi <- predict(object, "psi", level=NULL)$Predicted + phi <- predict(object, "phi", level=NULL)$Predicted + phi <- matrix(phi, nrow=M, ncol=T, byrow=TRUE) + p <- getP(object) + p_array <- array(t(p), c(J, T, M)) + + Z <- ZZ <- 0:1 + post <- array(0, c(M, 2, 1)) + colnames(post) <- Z + + for(i in 1:M) { + f <- dbinom(Z, 1, psi[i]) + + ghi <- rep(0, 2) + + for(t in 1:T) { + gh <- matrix(-Inf, 2, 2) + for(z in Z) { + if(z < max(y_array[,,i], na.rm=TRUE)){ + gh[,z+1] <- -Inf + next + } + if(is.na(phi[i,t])) { + g <- rep(0, 2) + g[ZZ>z] <- -Inf + } else{ + g <- dbinom(ZZ, z, phi[i,t], log=TRUE) + } + h <- rep(0, 2) + for(j in 1:J) { + if(is.na(y_array[j,t,i]) | is.na(p_array[j,t,i])) next + h <- h + dbinom(y_array[j,t,i], ZZ, p_array[j,t,i], log=TRUE) + } + gh[,z+1] <- g + h + } + ghi <- ghi + log(colSums(exp(gh))) + } + fgh <- exp(f + ghi) + prM <- fgh/sum(fgh) + post[i,,1] <- prM + } + + new("unmarkedRanef", post=post) +}) + + +setMethod("simulate", "unmarkedFitGOccu", + function(object, nsim = 1, seed = NULL, na.rm = TRUE){ + + M <- numSites(object@data) + JT <- obsNum(object@data) + T <- object@data@numPrimary + J <- JT / T + + gd <- unmarked:::getDesign(object@data, object@formula) + y_array <- array(t(gd$y), c(J, T, M)) + + psi <- predict(object, "psi", level=NULL)$Predicted + phi <- predict(object, "phi", level=NULL)$Predicted + phi <- matrix(phi, nrow=M, ncol=T, byrow=TRUE) + p <- getP(object) + + sim_list <- list() + + for (i in 1:nsim){ + z <- rbinom(M, 1, psi) + z <- matrix(z, nrow=M, ncol=T) + + zz <- rbinom(M*T, 1, phi*z) + zz <- matrix(zz, M, T) + + colrep <- rep(1:T, each=J) + zz <- zz[,colrep] + + y <- rbinom(M*T*J, 1, zz*p) + y <- matrix(y, M, JT) + if(na.rm) y[which(is.na(object@data@y))] <- NA + sim_list[[i]] <- y + } + + return(sim_list) +}) + + +setMethod("update", "unmarkedFitGOccu", + function(object, psiformula, phiformula, pformula, ..., + evaluate = TRUE) +{ + call <- object@call + if (is.null(call)) + stop("need an object with call slot") + formlist <- object@formlist + if (!missing(psiformula)) + call$psiformula <- update.formula(formlist$psiformula, psiformula) + if (!missing(phiformula)) + call$phiformula <- update.formula(formlist$phiformula, phiformula) + if (!missing(pformula)) + call$pformula <- update.formula(formlist$pformula, pformula) + extras <- match.call(call=sys.call(-1), + expand.dots = FALSE)$... + if(length(extras) > 0) { + existing <- !is.na(match(names(extras), names(call))) + for (a in names(extras)[existing]) call[[a]] <- extras[[a]] + if (any(!existing)) { + call <- c(as.list(call), extras[!existing]) + call <- as.call(call) + } + } + if (evaluate) + eval(call, parent.frame(2)) + else call +}) diff --git a/src/TMB/tmb_goccu.hpp b/src/TMB/tmb_goccu.hpp new file mode 100644 index 0000000..7788ad6 --- /dev/null +++ b/src/TMB/tmb_goccu.hpp @@ -0,0 +1,130 @@ +#undef TMB_OBJECTIVE_PTR +#define TMB_OBJECTIVE_PTR obj + +// Adapted from Stan code by Maxwell B. Joseph, +// https://discourse.mc-stan.org/t/divergent-transition-every-iteration-multi-scale-occupancy-model/13739/5 + +// name of function below **MUST** match filename +template<class Type> +Type tmb_goccu(objective_function<Type>* obj) { + //Describe input data + DATA_MATRIX(y); //observations + DATA_INTEGER(T); + + DATA_INTEGER(link); + + DATA_MATRIX(Xpsi); + DATA_MATRIX(Xphi); + DATA_MATRIX(Xp); + + DATA_INTEGER(n_possible); + DATA_MATRIX(alpha_potential); + DATA_VECTOR(known_present); + DATA_MATRIX(known_available); + DATA_MATRIX(missing_session); + + PARAMETER_VECTOR(beta_psi); + PARAMETER_VECTOR(beta_phi); + PARAMETER_VECTOR(beta_p); + + vector<Type> psi = Xpsi * beta_psi; + vector<Type> phi = Xphi * beta_phi; + vector<Type> p = Xp * beta_p; + + if(link == 1){ + //psi = cloglog(psi); + } else { + psi = invlogit(psi); + } + + phi = invlogit(phi); + p = invlogit(p); + + Type loglik = 0.0; + + int M = y.rows(); + int J = y.cols() / T; + + Type obs_lp; + Type poss_lp; + Type exp_poss_lp; + + int ystart; + vector<Type> ysite; + vector<Type> psite; + vector<Type> ysub; + vector<Type> psub; + + int tstart; + int pstart; + + for (int i=0; i<M; i++){ + + tstart = i*T; + pstart = i*(T*J); + + ysite = y.row(i); + psite = p.segment(pstart, T*J); + + if(known_present(i) == 1){ + for (int t=0; t<T; t++){ + + if(missing_session(i, t) == 1) continue; + + ystart = t*J; + + psub = psite.segment(ystart, J); + ysub = ysite.segment(ystart, J); + + obs_lp = log(phi(tstart+t)); + for (int j=0; j<J; j++){ + if(R_IsNA(asDouble(ysub(j)))) continue; + obs_lp += dbinom(ysub(j), Type(1), psub(j), true); + } + if(known_available(i, t) == 1){ + loglik += obs_lp; + } else { + loglik += log(exp(obs_lp) + exp(log(1-phi(tstart+t)))); + } + } + loglik += log(psi(i)); + } else { + + exp_poss_lp = 0.0; + + for (int k=0; k<n_possible; k++){ + poss_lp = log(psi(i)); + + for (int t=0; t<T; t++){ + + if(missing_session(i, t) == 1) continue; + + ystart = t*J; + + psub = psite.segment(ystart, J); + ysub = ysite.segment(ystart, J); + + if(alpha_potential(k, t) == 0){ + poss_lp += log(1 - phi(tstart+t)); + } else { + poss_lp += log(phi(tstart+t)); + for (int j=0; j<J; j++){ + if(R_IsNA(asDouble(ysub(j)))) continue; + poss_lp += dbinom(ysub(j), Type(1), psub(j), true); + } + } + } + exp_poss_lp += exp(poss_lp); + } + exp_poss_lp += exp(log(1-psi(i))); + + loglik += log(exp_poss_lp); + } + } + + return -loglik; + +} + +#undef TMB_OBJECTIVE_PTR +#define TMB_OBJECTIVE_PTR this diff --git a/src/TMB/unmarked_TMBExports.cpp b/src/TMB/unmarked_TMBExports.cpp index 5c1bfea..6aa57de 100644 --- a/src/TMB/unmarked_TMBExports.cpp +++ b/src/TMB/unmarked_TMBExports.cpp @@ -9,6 +9,7 @@ #include "tmb_multinomPois.hpp" #include "tmb_distsamp.hpp" #include "tmb_gdistremoval.hpp" +#include "tmb_goccu.hpp" template<class Type> Type objective_function<Type>::operator() () { @@ -23,6 +24,8 @@ Type objective_function<Type>::operator() () { return tmb_distsamp(this); } else if(model == "tmb_gdistremoval"){ return tmb_gdistremoval(this); + } else if(model == "tmb_goccu"){ + return tmb_goccu(this); } else { error("Unknown model."); } |