From 85603a57b6d5b3721fd8e5fc4dff50f3e666cc70 Mon Sep 17 00:00:00 2001 From: Ken Kellner Date: Thu, 25 Jan 2024 11:16:34 -0500 Subject: Add IDS fitting function (#273) --- DESCRIPTION | 5 +- NAMESPACE | 5 +- R/IDS.R | 684 ++++++++++++++++++++++++++++++++++++++++ R/simulate.R | 118 ++++++- R/unmarkedFit.R | 11 +- R/unmarkedFrame.R | 5 +- man/IDS.Rd | 142 +++++++++ man/SSE.Rd | 1 + man/fitted-methods.Rd | 2 + man/getP-methods.Rd | 2 + man/nonparboot-methods.Rd | 2 + man/parboot.Rd | 1 + man/predict-methods.Rd | 1 + man/ranef-methods.Rd | 1 + man/simulate-methods.Rd | 2 + man/unmarkedFit-class.Rd | 5 + src/TMB/tmb_IDS.hpp | 179 +++++++++++ src/TMB/unmarked_TMBExports.cpp | 3 + tests/testthat/test_IDS.R | 190 +++++++++++ 19 files changed, 1346 insertions(+), 13 deletions(-) create mode 100644 R/IDS.R create mode 100644 man/IDS.Rd create mode 100644 src/TMB/tmb_IDS.hpp create mode 100644 tests/testthat/test_IDS.R diff --git a/DESCRIPTION b/DESCRIPTION index 352015a..c7e5799 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: unmarked -Version: 1.4.1 -Date: 2024-01-08 +Version: 1.4.1.9001 +Date: 2024-01-23 Type: Package Title: Models for Data from Unmarked Animals Authors@R: c( @@ -53,6 +53,7 @@ Collate: 'classes.R' 'unmarkedEstimate.R' 'mapInfo.R' 'unmarkedFrame.R' 'unmarkedCrossVal.R' 'piFun.R' 'vif.R' 'makePiFun.R' 'posteriorSamples.R' 'nmixTTD.R' 'gdistremoval.R' + 'IDS.R' 'plotEffects.R' 'mixedModelTools.R' 'power.R' diff --git a/NAMESPACE b/NAMESPACE index aea62db..c4bbc63 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,7 +23,8 @@ importFrom(Rcpp, evalCpp) export(occu, occuFP, occuRN, pcount, pcountOpen, multinomPois, distsamp, colext, gmultmix, gdistsamp, gpcount, occuPEN, occuPEN_CV, occuMulti, occuMS, computeMPLElambda, pcount.spHDS, occuTTD, distsampOpen, - multmixOpen, nmixTTD, gdistremoval, goccu, occuCOP) + multmixOpen, nmixTTD, gdistremoval, goccu, occuCOP, IDS) + export(removalPiFun, doublePiFun) export(makeRemPiFun, makeCrPiFun, makeCrPiFunMb, makeCrPiFunMh) @@ -32,7 +33,7 @@ exportClasses(unmarkedFit, unmarkedFitOccu, unmarkedFitOccuFP, unmarkedFitDS, unmarkedFitPCount, unmarkedFitMPois, unmarkedFitPCO, unmarkedFrameDSO, unmarkedFitDSO, unmarkedFrameMMO, unmarkedFitMMO, - unmarkedFitOccuMulti, + unmarkedFitOccuMulti, unmarkedFitIDS, unmarkedFrame, unmarkedFrameOccu, unmarkedFramePCount, unmarkedFrameMPois, unmarkedFrameDS, unmarkedMultFrame, unmarkedFrameGMM, unmarkedFrameGDS, unmarkedFramePCO, diff --git a/R/IDS.R b/R/IDS.R new file mode 100644 index 0000000..38338b1 --- /dev/null +++ b/R/IDS.R @@ -0,0 +1,684 @@ +setClassUnion("unmarkedFrameOrNULL", members=c("unmarkedFrame", "NULL")) + +setClass("unmarkedFitIDS", + representation( + formlist = "list", + keyfun = "character", + K = "numeric", + dataPC = "unmarkedFrameOrNULL", + dataOC = "unmarkedFrameOrNULL", + maxDist = "list", + surveyDurations = "list", + unitsOut = "character"), + contains = "unmarkedFit") + +get_ds_info <- function(db){ + J <- length(db) - 1 + a <- u <- rep(NA, J) + a[1] <- pi*db[2]^2 + if(J > 1){ + for (j in 2:J){ + a[j] <- pi*db[j+1]^2 - sum(a[1:(j-1)]) + } + } + + total_area <- sum(a) + u <- a/total_area + w <- diff(db) + list(total_area=total_area, a=a, u=u, w=w) +} + +IDS <- function(lambdaformula = ~1, + detformulaDS = ~1, + detformulaPC = NULL, + detformulaOC = NULL, + dataDS, + dataPC = NULL, + dataOC = NULL, + availformula = NULL, + durationDS = NULL, + durationPC = NULL, + durationOC = NULL, + keyfun = "halfnorm", + maxDistPC, + maxDistOC, + K = 100, + unitsOut = "ha", + starts = NULL, + method = "BFGS", + ... + ){ + + # Process inputs------------------------------------------------------------- + + form_hds <- as.formula(paste(c(as.character(detformulaDS), + as.character(lambdaformula)), collapse="")) + + if(is.null(detformulaPC)){ + form_pc <- as.formula(paste(c(as.character(detformulaDS), + as.character(lambdaformula)), collapse="")) + } else{ + form_pc <- as.formula(paste(c(as.character(detformulaPC), + as.character(lambdaformula)), collapse="")) + } + + if(is.null(detformulaOC)){ + form_oc <- as.formula(paste(c(as.character(detformulaDS), + as.character(lambdaformula)), collapse="")) + } else{ + form_oc <- as.formula(paste(c(as.character(detformulaOC), + as.character(lambdaformula)), collapse="")) + } + + formlist <- list(lam=lambdaformula, ds=form_hds, pc=form_pc, oc=form_oc, + phi=availformula) + + stopifnot(inherits(dataDS, "unmarkedFrameDS")) + stopifnot(inherits(dataPC, c("unmarkedFramePCount", "NULL"))) + stopifnot(inherits(dataOC, c("unmarkedFrameOccu", "NULL"))) + #stopifnot(!is.null(dataPC) | !is.null(dataOC)) + + has_avail <- FALSE + if(!is.null(durationDS) | !is.null(durationPC) | !is.null(durationOC)){ + has_avail <- TRUE + if(is.null(availformula)) availformula <- ~1 + form_avail <- as.formula(paste("~1", paste(as.character(availformula), collapse=""))) + stopifnot(!is.null(durationDS)) + if(!is.null(dataPC)) stopifnot(!is.null(durationPC)) + if(!is.null(dataOC)) stopifnot(!is.null(durationOC)) + } + + if(has_avail & !is.null(dataOC)){ + stop("Availability estimation doesn't work with detection-nondetection data", call.=FALSE) + } + + stopifnot(is.null(durationDS) || (length(durationDS) == numSites(dataDS))) + stopifnot(is.null(durationPC) || (length(durationPC) == numSites(dataPC))) + stopifnot(is.null(durationOC) || (length(durationOC) == numSites(dataOC))) + surveyDurations <- list(ds=durationDS, pc=durationPC, oc=durationOC) + + stopifnot(keyfun %in% c("halfnorm", "exp")) + keyidx <- switch(keyfun, "halfnorm"={1}, "exp"={2}) + + if(missing(maxDistPC)) maxDistPC <- max(dataDS@dist.breaks) + if(missing(maxDistOC)) maxDistOC <- max(dataDS@dist.breaks) + + + # Design matrices------------------------------------------------------------ + + # Need to add offset support here eventually + gd_hds <- getDesign(dataDS, form_hds) + if(any(is.na(gd_hds$y))){ + stop("Missing values in only some distance bins is not supported", call.=FALSE) + } + ds_hds <- get_ds_info(dataDS@dist.breaks) + Xavail_ds <- matrix(0,0,0) + if(has_avail) Xavail_ds <- getDesign(dataDS, form_avail)$X + if(is.null(durationDS)) durationDS <- rep(0,0) + + gd_pc <- list(y=matrix(0,0,0), X=matrix(0,0,0), V=matrix(0,0,0)) + ds_pc <- list(total_area=0, db=c(0,0), a=0, w=0, u=0) + Xavail_pc <- matrix(0,0,0) + if(is.null(durationPC)) durationPC <- rep(0,0) + if(!is.null(dataPC)){ + gd_pc <- getDesign(dataPC, form_pc) + ds_pc <- get_ds_info(c(0, maxDistPC)) + if(has_avail) Xavail_pc <- getDesign(dataPC, form_avail)$X + } + + gd_oc <- list(y=matrix(0,0,0), X=matrix(0,0,0), V=matrix(0,0,0)) + ds_oc <- list(total_area=0, db=c(0,0), a=0, w=0, u=0) + Kmin_oc <- rep(0,0) + Xavail_oc <- matrix(0,0,0) + if(is.null(durationOC)) durationOC <- rep(0,0) + if(!is.null(dataOC)){ + gd_oc <- getDesign(dataOC, form_oc) + ds_oc <- get_ds_info(c(0, maxDistOC)) + Kmin_oc <- apply(gd_oc$y, 1, max, na.rm=T) + if(has_avail) Xavail_oc <- getDesign(dataOC, form_avail)$X + } + + # Density conversion and unequal area correction + lam_adjust <- c(ds_hds$total_area, ds_pc$total_area, ds_oc$total_area) + names(lam_adjust) <- c("hds", "pc", "oc") + + switch(dataDS@unitsIn, + m = lam_adjust <- lam_adjust / 1e6, + km = lam_adjust <- lam_adjust) + + stopifnot(unitsOut %in% c("m","ha","kmsq")) + switch(unitsOut, + m = lam_adjust <- lam_adjust * 1e6, + ha = lam_adjust <- lam_adjust * 100, + kmsq = lam_adjust <- lam_adjust) + + # Parameter stuff------------------------------------------------------------ + # Doesn't support hazard + pind_mat <- matrix(0, nrow=5, ncol=2) + pind_mat[1,] <- c(1, ncol(gd_hds$X)) + pind_mat[2,] <- max(pind_mat) + c(1, ncol(gd_hds$V)) + if(!is.null(detformulaPC) & !is.null(dataPC)){ + pind_mat[3,] <- max(pind_mat) + c(1, ncol(gd_pc$V)) + } + if(!is.null(detformulaOC) & !is.null(dataOC)){ + pind_mat[4,] <- max(pind_mat) + c(1, ncol(gd_oc$V)) + } + if(has_avail){ + pind_mat[5,] <- max(pind_mat) + c(1, ncol(Xavail_ds)) + } + + if(is.null(starts)){ + lam_init <- log(mean(apply(dataDS@y, 1, sum, na.rm=TRUE)) / lam_adjust[1]) + params_tmb <- list(beta_lam = c(lam_init, rep(0, ncol(gd_hds$X)-1)), + beta_hds = c(log(median(dataDS@dist.breaks)),rep(0, ncol(gd_hds$V)-1)), + beta_pc = rep(0,0), + beta_oc = rep(0,0), + beta_avail = rep(0,0)) + + if(!is.null(detformulaPC) & !is.null(dataPC)){ + params_tmb$beta_pc <- c(log(maxDistPC/2), rep(0, ncol(gd_pc$V)-1)) + } + if(!is.null(detformulaOC) & !is.null(dataOC)){ + params_tmb$beta_oc <- c(log(maxDistOC/2), rep(0, ncol(gd_oc$V)-1)) + } + if(has_avail){ + params_tmb$beta_avail <- rep(0, ncol(Xavail_ds)) + } + starts <- unlist(params_tmb) + } else { + if(length(starts) != max(pind_mat)){ + stop("Length of starts should be ", max(pind_mat), call.=FALSE) + } + params_tmb <- list(beta_lam = starts[pind_mat[1,1]:pind_mat[1,2]], + beta_hds = starts[pind_mat[2,1]:pind_mat[2,2]], + beta_pc = rep(0,0), + beta_oc = rep(0,0), + beta_avail = rep(0,0)) + + if(!is.null(detformulaPC) & !is.null(dataPC)){ + params_tmb$beta_pc <- starts[pind_mat[3,1]:pind_mat[3,2]] + } + if(!is.null(detformulaOC) & !is.null(dataOC)){ + params_tmb$beta_oc <- starts[pind_mat[4,1]:pind_mat[4,2]] + } + if(has_avail){ + params_tmb$beta_avail <- starts[pind_mat[5,1]:pind_mat[5,2]] + } + } + + # Construct TMB data list---------------------------------------------------- + tmb_dat <- list( + # Common + pind = pind_mat, lam_adjust = lam_adjust, + + # HDS data + y_hds = gd_hds$y, X_hds = gd_hds$X, V_hds = gd_hds$V, key_hds = keyidx, + db_hds = dataDS@dist.breaks, a_hds = ds_hds$a, w_hds = ds_hds$w, + u_hds = ds_hds$u, + + # PC data + y_pc = gd_pc$y, X_pc = gd_pc$X, V_pc = gd_pc$V, key_pc = keyidx, + db_pc = c(0, maxDistPC), a_pc = ds_pc$a, w_pc = ds_pc$w, u_pc = ds_pc$u, + + # occ data + y_oc = gd_oc$y, X_oc = gd_oc$X, V_oc = gd_oc$V, key_oc = keyidx, + db_oc = c(0, maxDistOC), a_oc = ds_oc$a, w_oc = ds_oc$w, u_oc = ds_oc$u, + K_oc = K, Kmin_oc = Kmin_oc, + + # avail data + durationDS = durationDS, durationPC = durationPC, durationOC = durationOC, + Xa_hds = Xavail_ds, Xa_pc = Xavail_pc, Xa_oc = Xavail_oc + ) + + tmb_obj <- TMB::MakeADFun(data = c(model = "tmb_IDS", tmb_dat), parameters = params_tmb, + DLL = "unmarked_TMBExports", silent=TRUE) + + opt <- optim(unlist(params_tmb), fn=tmb_obj$fn, gr=tmb_obj$gr, method=method, ...) + + fmAIC <- 2 * opt$value + 2 * length(unlist(params_tmb)) + + sdr <- TMB::sdreport(tmb_obj) + + lam_coef <- get_coef_info(sdr, "lam", colnames(gd_hds$X), + pind_mat[1,1]:pind_mat[1,2]) + + lam_est <- unmarkedEstimate(name="Density", short.name="lam", + estimates = lam_coef$ests, covMat = lam_coef$cov, fixed=1:ncol(gd_hds$X), + invlink = "exp", invlinkGrad = "exp") + + dist_coef <- get_coef_info(sdr, "hds", colnames(gd_hds$V), + pind_mat[2,1]:pind_mat[2,2]) + + dist_est <- unmarkedEstimate(name="Distance sampling detection", short.name="ds", + estimates = dist_coef$ests, covMat = dist_coef$cov, fixed=1:ncol(gd_hds$V), + invlink = "exp", invlinkGrad = "exp") + + est_list <- list(lam=lam_est, ds=dist_est) + + if(!is.null(detformulaPC) & !is.null(dataPC)){ + pc_coef <- get_coef_info(sdr, "pc", colnames(gd_pc$V), + pind_mat[3,1]:pind_mat[3,2]) + + pc_est <- unmarkedEstimate(name="Point count detection", short.name="pc", + estimates = pc_coef$ests, covMat = pc_coef$cov, fixed=1:ncol(gd_pc$V), + invlink = "exp", invlinkGrad = "exp") + est_list <- c(est_list, list(pc=pc_est)) + } + + if(!is.null(detformulaOC) & !is.null(dataOC)){ + oc_coef <- get_coef_info(sdr, "oc", colnames(gd_oc$V), + pind_mat[4,1]:pind_mat[4,2]) + oc_est <- unmarkedEstimate(name="Presence/absence detection", short.name="oc", + estimates = oc_coef$ests, covMat = oc_coef$cov, fixed=1:ncol(gd_oc$V), + invlink = "exp", invlinkGrad = "exp") + est_list <- c(est_list, list(oc=oc_est)) + } + + if(has_avail){ + avail_coef <- get_coef_info(sdr, "avail", colnames(Xavail_ds), + pind_mat[5,1]:pind_mat[5,2]) + avail_est <- unmarkedEstimate(name="Availability", short.name="phi", + estimates=avail_coef$ests, covMat=avail_coef$cov, fixed=1:ncol(Xavail_ds), + invlink="exp", invlinkGrad="exp") + est_list <- c(est_list, list(phi=avail_est)) + } + + est_list <- unmarkedEstimateList(est_list) + + new("unmarkedFitIDS", fitType = "IDS", call = match.call(), + opt = opt, formula = lambdaformula, formlist=formlist, + data = dataDS, dataPC=dataPC, dataOC=dataOC, + surveyDurations=surveyDurations, + maxDist = list(pc=maxDistPC, oc=maxDistOC), + keyfun=keyfun, + # this needs to be fixed + sitesRemoved = gd_hds$removed.sites, + unitsOut=unitsOut, + estimates = est_list, AIC = fmAIC, negLogLike = opt$value, + nllFun = tmb_obj$fn, TMB=tmb_obj) + +} + +setMethod("summary", "unmarkedFitIDS", function(object) +{ + cat("\nCall:\n") + print(object@call) + cat("\n") + summaryOut <- summary(object@estimates) + cat("AIC:", object@AIC,"\n") + cat("Number of distance sampling sites:", numSites(object@data)) + if(!is.null(object@dataPC)){ + cat("\nNumber of point count sites:", numSites(object@dataPC)) + } + if(!is.null(object@dataOC)){ + cat("\nNumber of presence/absence sites:", numSites(object@dataOC)) + } + cat("\noptim convergence code:", object@opt$convergence) + cat("\noptim iterations:", object@opt$counts[1], "\n") + if(!identical(object@opt$convergence, 0L)) + warning("Model did not converge. Try providing starting values or increasing maxit control argment.") + cat("Bootstrap iterations:", length(object@bootstrapSamples), "\n\n") + invisible(summaryOut) +}) + +# Need special method since an included dataset may not have a corresponding +# unique submodel in the unmarked estimates +setMethod("names", "unmarkedFitIDS", function(x){ + out <- c("lam","ds") + if(!is.null(x@dataPC)) out <- c(out, "pc") + if(!is.null(x@dataOC)) out <- c(out, "oc") + if("phi" %in% names(x@estimates)) out <- c(out, "phi") + out +}) + +# This function converts IDS objects into simpler distsamp objects +# so we can re-use distsamp methods +# the abundance model (lam) is combined with one of the detection models +# to yield a distsamp model +IDS_convert_class <- function(inp, type, ds_type=NULL){ + stopifnot(type %in% names(inp)) + if(is.null(ds_type)) ds_type <- type + if(type == "lam") type <- "ds" + data <- inp@data + if(ds_type == "pc"){ + tempdat <- inp@dataPC + maxDist <- inp@maxDist$pc + } + if(ds_type == "oc"){ + tempdat <- inp@dataOC + maxDist <- inp@maxDist$oc + } + if(ds_type %in% c("pc", "oc")){ + if(is.null(maxDist)) maxDist <- max(data@dist.breaks) + data <- unmarkedFrameDS(y=tempdat@y, siteCovs=siteCovs(tempdat), + dist.breaks=c(0, maxDist), survey="point", + unitsIn=data@unitsIn) + } + + # Select appropriate model estimates; if sigma was not estimated + # separately for this model, pick the DS model for detection + det_mod <- type + if(! det_mod %in% names(inp@estimates)) det_mod <- "ds" + est <- inp@estimates@estimates[c("lam", det_mod)] + names(est) <- c("state", "det") + + form <- inp@formlist[[type]] + if(type=="phi") form <- as.formula(paste(c(as.character(form), "~1"), collapse="")) + + new("unmarkedFitDS", fitType="IDS", opt=inp@opt, formula=form, + data=data, keyfun=inp@keyfun, unitsOut=inp@unitsOut, + estimates=unmarkedEstimateList(est), + AIC=inp@AIC, output="density", TMB=inp@TMB) +} + +# This predict method uses IDS_convert_class to allow pass-through to +# distsamp predict method +setMethod("predict", "unmarkedFitIDS", function(object, type, newdata, + backTransform=TRUE, appendData=FALSE, level=0.95, ...){ + stopifnot(type %in% names(object)) + + # Special case of phi and no newdata + # We need a separate prediction for each detection dataset + if(type == "phi" & missing(newdata)){ + + dists <- names(object)[names(object) %in% c("ds", "pc", "oc")] + out <- lapply(dists, function(x){ + conv <- IDS_convert_class(object, "phi", ds_type=x) + predict(conv, "det", backTransform=backTransform, appendData=appendData, + level=level, ...) + }) + names(out) <- dists + + } else { # Regular situation + conv <- IDS_convert_class(object, type) + type <- switch(type, lam="state", ds="det", pc="det", oc="det", phi="det") + out <- predict(conv, type, newdata, backTransform=backTransform, appendData=appendData, + level=level, ...) + } + out +}) + +# Get availability probability +setGeneric("getAvail", function(object, ...) standardGeneric("getAvail")) + +# Get availability for each data type and site as a probability +setMethod("getAvail", "unmarkedFitIDS", function(object, ...){ + stopifnot("phi" %in% names(object)) + phi <- predict(object, "phi") + dur <- object@surveyDurations + out <- lapply(names(phi), function(x){ + 1 - exp(-1 * dur[[x]] * phi[[x]]$Predicted) + }) + names(out) <- names(phi) + out +}) + +# Fitted method returns a list of matrices, one per data type +setMethod("fitted", "unmarkedFitIDS", function(object, na.rm=FALSE){ + + dists <- names(object)[names(object) %in% c("ds", "pc")] + + # If there is an availability model, get availability + # Otherwise set it to 1 + avail <- list(ds=1, pc=1, oc=1) + if("phi" %in% names(object)){ + avail <- getAvail(object) + } + + # fitted for distance and N-mix data components + out <- lapply(dists, function(x){ + conv <- IDS_convert_class(object, type=x) + fitted(conv) * avail[[x]] + }) + names(out) <- dists + + # fitted for occupancy data + if("oc" %in% names(object)){ + conv <- IDS_convert_class(object, type="oc") + lam <- predict(conv, 'state')$Predicted + A <- pi*max(conv@data@dist.breaks)^2 + switch(conv@data@unitsIn, + m = A <- A / 1e6, + km = A <- A) + switch(conv@unitsOut, + m = A <- A * 1e6, + ha = A <- A * 100, + kmsq = A <- A) + lam <- lam * A + + p <- getP(conv) * avail$oc + out$oc <- 1 - exp(-lam*p) ## analytical integration. + } + + out +}) + +# getP returns detection probability WITHOUT availability +setMethod("getP", "unmarkedFitIDS", function(object, ...){ + + dets <- names(object)[! names(object) %in% c("lam","phi")] + + out <- lapply(dets, function(x){ + conv <- IDS_convert_class(object, type=x) + getP(conv) + }) + names(out) <- dets + out +}) + + +setMethod("residuals", "unmarkedFitIDS", function(object, ...){ + + dists <- names(object)[names(object) %in% c("ds", "pc")] + + # distance and N-mix data + out <- lapply(dists, function(x){ + conv <- IDS_convert_class(object, type=x) + residuals(conv) + }) + names(out) <- dists + + # occupancy data + if("oc" %in% names(object)){ + y <- object@dataOC@y + ft <- fitted(object)$oc + out$oc <- y - ft + } + + out +}) + +setMethod("hist", "unmarkedFitIDS", function(x, lwd=1, lty=1, ...){ + + conv <- IDS_convert_class(x, type='ds') + hist(conv, lwd=lwd, lty=lty, ...) + +}) + +setMethod("plot", c(x="unmarkedFitIDS", y="missing"), function(x, y, ...){ + + r <- residuals(x) + f <- fitted(x) + nr <- length(r) + long_names <- sapply(x@estimates@estimates, function(x) x@name) + long_names <- long_names[long_names != "Density"] + + old_par <- graphics::par()$mfrow + graphics::par(mfrow=c(nr,1)) + + for (i in 1:nr){ + plot(f[[i]], r[[i]], ylab = "Residuals", xlab = "Predicted values", + main=long_names[i]) + abline(h = 0, lty = 3, col = "gray") + } + graphics::par(mfrow=old_par) + +}) + + +setMethod("simulate", "unmarkedFitIDS", + function(object, nsim = 1, seed = NULL, na.rm = FALSE){ + + dets <- c("ds","pc","oc") + + if("phi" %in% names(object)) avail <- getAvail(object) + + temp <- lapply(dets, function(x){ + if(! x %in% names(object)) return(NULL) + conv <- IDS_convert_class(object, type=x) + sims <- simulate(conv, nsim=nsim, na.rm=na.rm) + # availability process + if("phi" %in% names(object)){ + sims <- lapply(sims, function(z){ + avail_expand <- matrix(rep(avail[[x]], ncol(z)), ncol=ncol(z)) + out <- rbinom(length(z), z, avail_expand) + matrix(out, nrow=nrow(z), ncol=ncol(z)) + }) + } + if(x=="oc"){ + sims <- lapply(sims, function(z){ + z[z>1] <- 1 + z + }) + } + sims + }) + + #"permute" + lapply(1:nsim, function(i){ + sim <- lapply(temp, function(x) x[[i]]) + names(sim) <- c("ds","pc","oc") + sim + }) + +}) + +setMethod("update", "unmarkedFitIDS", + function(object, lambdaformula, detformulaDS, detformulaPC, detformulaOC, + dataDS, dataPC, dataOC, ...){ + call <- object@call + + if(!missing(lambdaformula)){ + call[["lambdaformula"]] <- lambdaformula + } else { + call[["lambdaformula"]] <- object@formlist$lam + } + + if(!missing(detformulaDS)){ + call[["detformulaDS"]] <- detformulaDS + } else { + call[["detformulaDS"]] <- split_formula(object@formlist$ds)[[1]] + } + + if(!missing(detformulaPC)){ + call[["detformulaPC"]] <- detformulaPC + } else if(!is.null(object@dataPC) & !is.null(call$detformulaPC)){ + call[["detformulaPC"]] <- split_formula(object@formlist$pc)[[1]] + } + + if(!missing(detformulaOC)){ + call[["detformulaOC"]] <- detformulaOC + } else if(!is.null(object@dataOC) & !is.null(call$detformulaOC)){ + call[["detformulaOC"]] <- split_formula(object@formlist$oc)[[1]] + } + + if(!missing(dataDS)){ + call$dataDS <- dataDS + } else { + call$dataDS <- object@data + } + + if(!missing(dataPC)){ + call$dataPC <- dataPC + } else { + call$dataPC <- object@dataPC + } + + if(!missing(dataOC)){ + call$dataOC <- dataOC + } else { + call$dataOC <- object@dataOC + } + + extras <- match.call(call=sys.call(-1), + expand.dots = FALSE)$... + if (length(extras) > 0) { + existing <- !is.na(match(names(extras), names(call))) + for (a in names(extras)[existing]) + call[[a]] <- extras[[a]] + if (any(!existing)) { + call <- c(as.list(call), extras[!existing]) + call <- as.call(call) + } + } + + eval(call, parent.frame(2)) + +}) + + +setMethod("parboot", "unmarkedFitIDS", + function(object, statistic=SSE, nsim=10, ...) +{ + dots <- list(...) + statistic <- match.fun(statistic) + call <- match.call(call = sys.call(-1)) + starts <- as.numeric(coef(object)) + + t0 <- statistic(object, ...) + lt0 <- length(t0) + t.star <- matrix(NA, nsim, lt0) + #if(!missing(report)) + # cat("t0 =", t0, "\n") + + simList <- simulate(object, nsim = nsim, na.rm = FALSE) + + dataDS <- object@data + dataPC <- object@dataPC + has_pc <- "pc" %in% names(object) + dataOC <- object@dataOC + has_oc <- "oc" %in% names(object) + + t.star <- lapply(1:nsim, function(i){ + dataDS@y <- simList[[i]]$ds + if(has_pc) dataPC@y <- simList[[i]]$pc + if(has_oc) dataOC@y <- simList[[i]]$oc + fit <- update(object, dataDS=dataDS, dataPC=dataPC, dataOC=dataOC, + durationDS = object@surveyDurations$ds, + durationPC = object@surveyDurations$pc, + durationOC = object@surveyDurations$oc, + starts=starts) + statistic(fit) + }) + if(lt0 > 1){ + t.star <- t(t.star) + } else { + t.star <- matrix(t.star, ncol=lt0) + } + + if (!is.null(names(t0))){ + colnames(t.star) <- names(t0) + } else{ + colnames(t.star) <- paste("t*", 1:lt0, sep="") + } + + out <- new("parboot", call = call, t0 = t0, t.star = t.star) + return(out) +}) + +setMethod("SSE", "unmarkedFitIDS", function(fit, ...){ + r <- unlist(residuals(fit)) + return(c(SSE = sum(r^2, na.rm=T))) +}) + +setMethod("nonparboot", "unmarkedFitIDS", + function(object, B = 0, keepOldSamples = TRUE, ...) +{ + stop("Not currently supported for unmarkedFitIDS", call.=FALSE) +}) + +setMethod("ranef", "unmarkedFitIDS", + function(object, B = 0, keepOldSamples = TRUE, ...) +{ + stop("Not currently supported for unmarkedFitIDS", call.=FALSE) +}) diff --git a/R/simulate.R b/R/simulate.R index a8887cb..3868a72 100644 --- a/R/simulate.R +++ b/R/simulate.R @@ -76,6 +76,19 @@ setMethod("simulate", "character", } else if(object=="gdistremoval"){ umf@yDistance=x$yDistance umf@yRemoval=x$yRemoval + } else if(object == "IDS"){ + out <- list() + out$ds <- fit@data + out$ds@y <- x$ds + if("pc" %in% names(fit)){ + out$pc <- fit@dataPC + out$pc@y <- x$pc + } + if("oc" %in% names(fit)){ + out$oc <- fit@dataOC + out$oc@y <- x$oc + } + umf <- out } else { umf@y <- x } @@ -185,7 +198,6 @@ setMethod("simulate_fit", "unmarkedFitOccuRN", data=umf, se=FALSE, control=list(maxit=1)) }) - setMethod("get_umf_components", "unmarkedFitMPois", function(object, formulas, guide, design, ...){ args <- list(...) @@ -556,3 +568,107 @@ setMethod("simulate_fit", "unmarkedFitGDR", data=umf, keyfun=keyfun, output=output, unitsOut=unitsOut, mixture=mixture, K=K, se=FALSE, control=list(maxit=1), method='L-BFGS-B') }) + +# For simulating entirely new datasets +setMethod("get_umf_components", "unmarkedFitIDS", + function(object, formulas, guide, design, ...){ + + # Distance sampling dataset + sc_ds_lam <- generate_data(formulas$lam, guide, design$Mds) + sc_ds_det <- generate_data(formulas$ds, guide, design$Mds) + dat_ds <- list(sc_ds_lam, sc_ds_det) + if(!is.null(formulas$phi)){ + sc_ds_phi <- generate_data(formulas$phi, guide, design$Mds) + dat_ds <- c(dat_ds, list(sc_ds_phi)) + } + keep <- sapply(dat_ds, function(x) !is.null(x)) + dat_ds <- dat_ds[keep] + sc_ds <- do.call(cbind, dat_ds) + yblank_ds <- matrix(1, design$Mds, design$J) + + # Point count dataset + sc_pc <- yblank_pc <- NULL + if(!is.null(design$Mpc) && design$Mpc > 0){ + if(is.null(formulas$pc)) form_pc <- formulas$ds + sc_pc_lam <- generate_data(formulas$lam, guide, design$Mpc) + sc_pc_det <- generate_data(form_pc, guide, design$Mpc) + sc_pc <- list(sc_pc_lam, sc_pc_det) + if(!is.null(formulas$phi)){ + sc_pc_phi <- generate_data(formulas$phi, guide, design$Mpc) + sc_pc <- c(sc_pc, list(sc_pc_phi)) + } + keep <- sapply(sc_pc, function(x) !is.null(x)) + sc_pc <- sc_pc[keep] + sc_pc <- do.call(cbind, sc_pc) + yblank_pc <- matrix(1, design$Mpc, 1) + } + + # Presence/absence dataset + sc_oc <- yblank_oc <- NULL + if(!is.null(design$Moc) && design$Moc > 0){ + if(is.null(formulas$oc)){ + form_oc <- formulas$ds + } else { + form_oc <- formulas$oc + } + sc_oc_lam <- generate_data(formulas$lam, guide, design$Moc) + sc_oc_det <- generate_data(form_oc, guide, design$Moc) + sc_oc <- list(sc_oc_lam, sc_oc_det) + if(!is.null(formulas$phi)){ + sc_oc_phi <- generate_data(formulas$phi, guide, design$Moc) + sc_oc <- c(sc_oc, list(sc_oc_phi)) + } + keep <- sapply(sc_oc, function(x) !is.null(x)) + sc_oc <- sc_oc[keep] + sc_oc <- do.call(cbind, sc_oc) + yblank_oc <- matrix(1, design$Moc, 1) + } + + mget(c("yblank_ds", "sc_ds", "yblank_pc", "sc_pc", "yblank_oc", "sc_oc")) +}) + + +setMethod("simulate_fit", "unmarkedFitIDS", + function(object, formulas, guide, design, ...){ + parts <- get_umf_components(object, formulas, guide, design, ...) + args <- list(...) + + args$tlength <- 0 + args$survey <- "point" + + # Distance sampling dataset + umf_ds <- unmarkedFrameDS(y=parts$yblank_ds, siteCovs=parts$sc_ds, + tlength=args$tlength, survey=args$survey, + unitsIn=args$unitsIn, + dist.breaks=args$dist.breaks) + # Point count dataset + umf_pc <- NULL + if(!is.null(design$Mpc) && design$Mpc > 0){ + umf_pc <- unmarkedFramePCount(y=parts$yblank_pc, siteCovs=parts$sc_pc) + } + + # Occupancy dataset + umf_oc <- NULL + if(!is.null(design$Moc) && design$Moc > 0){ + umf_oc <- unmarkedFrameOccu(y=parts$yblank_oc, siteCovs=parts$sc_oc) + } + + keyfun <- ifelse(is.null(args$keyfun), "halfnorm", args$keyfun) + unitsOut <- ifelse(is.null(args$unitsOut), "ha", args$unitsOut) + K <- ifelse(is.null(args$K), 300, args$K) + if(is.null(args$maxDistPC)) args$maxDistPC <- max(args$dist.breaks) + if(is.null(args$maxDistOC)) args$maxDistOC <- max(args$dist.breaks) + + IDS(lambdaformula = formulas$lam, + detformulaDS = formulas$ds, + detformulaPC = formulas$pc, detformulaOC = formulas$oc, + dataDS = umf_ds, dataPC = umf_pc, dataOC = umf_oc, + availformula = formulas$phi, + durationDS = args$durationDS, durationPC = args$durationPC, + durationOC = args$durationOC, + maxDistPC = args$maxDistPC, maxDistOC = args$maxDistOC, + keyfun=keyfun, unitsOut=unitsOut, K=K ,control=list(maxit=1)) +}) + + + diff --git a/R/unmarkedFit.R b/R/unmarkedFit.R index 97ccfe4..84740b3 100644 --- a/R/unmarkedFit.R +++ b/R/unmarkedFit.R @@ -453,6 +453,7 @@ setMethod("fitted", "unmarkedFitDS", function(object, na.rm = FALSE) m = A <- A / 1e6, km = A <- A) switch(object@unitsOut, + m = A <- A * 1e6, ha = A <- A * 100, kmsq = A <- A) lambda <- lambda * A @@ -1660,11 +1661,13 @@ setMethod("getP", "unmarkedFitDS", point = { for(i in 1:M) { a[i, 1] <- pi*db[2]^2 - for(j in 2:J) + if(J > 1){ + for(j in 2:J) a[i, j] <- pi*db[j+1]^2 - sum(a[i, 1:(j-1)]) - u[i,] <- a[i,] / sum(a[i,]) } - }) + u[i,] <- a[i,] / sum(a[i,]) + } + }) switch(key, @@ -2089,6 +2092,7 @@ setMethod("simulate", "unmarkedFitDS", m = A <- A / 1e6, km = A <- A) switch(object@unitsOut, + m = A <- A * 1e6, ha = A <- A * 100, kmsq = A <- A) lambda <- lambda * A @@ -2104,7 +2108,6 @@ setMethod("simulate", "unmarkedFitDS", - setMethod("simulate", "unmarkedFitPCount", function(object, nsim = 1, seed = NULL, na.rm = TRUE) { diff --git a/R/unmarkedFrame.R b/R/unmarkedFrame.R index c936bdf..b97f23b 100644 --- a/R/unmarkedFrame.R +++ b/R/unmarkedFrame.R @@ -1133,10 +1133,7 @@ setMethod("[", c("unmarkedFrame", "numeric", "missing", "missing"), if(all(i < 0)) { # if i is negative, then convert to positive i <- (1:M)[i] } - y <- getY(x)[i,] - if (length(i) == 1) { - y <- t(y) - } + y <- getY(x)[i,,drop=FALSE] siteCovs <- siteCovs(x) obsCovs <- obsCovs(x) if (!is.null(siteCovs)) { diff --git a/man/IDS.Rd b/man/IDS.Rd new file mode 100644 index 0000000..5ec3923 --- /dev/null +++ b/man/IDS.Rd @@ -0,0 +1,142 @@ +\name{IDS} +\alias{IDS} +\alias{hist,unmarkedFitIDS-method} +\alias{names,unmarkedFitIDS-method} + +\title{ +Fit the integrated distance sampling model of Kery et al. (2022). +} + +\description{Model abundance using a combination of distance sampling data (DS) + and other similar data types, including simple point counts (PC) and + occupancy/detection-nondetection (OC/DND) data.} + +\usage{ +IDS(lambdaformula = ~1, detformulaDS = ~1, detformulaPC = NULL, detformulaOC = NULL, + dataDS, dataPC = NULL, dataOC = NULL, availformula = NULL, + durationDS = NULL, durationPC = NULL, durationOC = NULL, keyfun = "halfnorm", + maxDistPC, maxDistOC, K = 100, unitsOut = "ha", + starts = NULL, method = "BFGS", ...) +} + +\arguments{ + \item{lambdaformula}{Formula for abundance} + \item{detformulaDS}{Formula for distance-based (DS) detection probability} + \item{detformulaPC}{Formula for point count detection probability. If + \code{NULL}, will share a model with DS detection probability} + \item{detformulaOC}{Formula for occupancy/detection-nondetection detection + probability. If \code{NULL}, will share a model with DS detection probability} + \item{dataDS}{An object of class \code{unmarkedFrameDS}. Required} + \item{dataPC}{An object of class \code{unmarkedFramePCount}. If \code{NULL}, + no PC data will be used in the model} + \item{dataOC}{An object of class \code{unmarkedFrameOccu}. If \code{NULL}, + no OC/DND data will be used in the model} + \item{availformula}{Optional. If specified, formula for availability. Only possible to + use if you have variable detection survey lengths (see below)} + \item{durationDS}{Optional. Vector of survey durations at each distance sampling site} + \item{durationPC}{Optional. Vector of survey durations at each PC site} + \item{durationOC}{Optional. Vector of survey durations at each OC/DND site} + \item{keyfun}{Distance sampling key function; either "halfnorm" or "exp"} + \item{maxDistPC}{Maximum observation distance for PC surveys; defaults to + maximum of distance bins from the distance sampling data} + \item{maxDistOC}{Maximum observation distance for OC/DND surveys; defaults to + maximum of distance bins from the distance sampling data} + \item{K}{Integer, upper bound for integrating out latent abundance. Only used if + you have included OC/DND data} + \item{unitsOut}{Units of density for output. Either "ha" or "kmsq" for + hectares and square kilometers, respectively} + \item{starts}{A numeric vector of starting values for the model parameters} + \item{method}{Optimization method used by \code{\link{optim}}} + \item{\dots}{Additional arguments to optim, such as lower and upper + bounds} +} + +\value{An object of class unmarkedFitIDS} + +\details{ +This function facilitates a combined analysis of distance sampling data (DS) with other similar +data types, including simple point counts (PC) and occupancy/detection-nondetection (OC/DND) data. +The combined approach capitalizes on the strengths and minimizes the weaknesses +of each type. The PC and OC/DND data are viewed as latent distance sampling surveys +with an underlying abundance model shared by all data types. All analyses +must include some distance sampling data, but can include either PC or DND data +or both. If surveys are of variable duration, it is also possible to estimate +availability. + +Input data must be provided as a series of separate \code{unmarkedFrame}s: +\code{unmarkedFrameDS} for the distance sampling data, \code{unmarkedFramePCount} +for the point count data, and \code{unmarkedFrameOccu} for OC/DND data. +See the help files for these objects for guidance on how to organize the data. +} + +\note{ + Simulations indicated estimates of availability were very unreliable when + including detection/non-detection data, so the function will not allow you + to use DND data and estimate availability at the same time. + In general estimation of availability can be difficult; use simulations + to see how well it works for your specific situation. +} + +\references{ + Kery M, Royle JA, Hallman T, Robinson WD, Strebel N, Kellner KF. 2024. + Integrated distance sampling models for simple point counts. Ecology. +} +\author{Ken Kellner \email{contact@kenkellner.com}} +\seealso{\code{\link{distsamp}}} + +\examples{ + +\dontrun{ + +# Simulate data based on a real dataset + +# Formulas for each model +formulas <- list(lam=~elev, ds=~1, phi=~1) + +# Sample sizes +design <- list(Mds=2912, J=6, Mpc=506) + +# Model parameters +coefs <- list(lam = c(intercept=3, elev=-0.5), + ds = c(intercept=-2.5), + phi = c(intercept=-1.3)) + +# Survey durations +durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30)) + +set.seed(456) +sim_umf <- simulate("IDS", # name of model we are simulating for + nsim=1, # number of replicates + formulas=formulas, + coefs=coefs, + design=design, + # arguments used by unmarkedFrameDS + dist.breaks = seq(0, 0.30, length.out=7), + unitsIn="km", + # arguments used by IDS + # could also have e.g. keyfun here + durationDS=durs$ds, durationPC=durs$pc, durationOC=durs$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + +# Look at the results +lapply(sim_umf, head) + +# Fit a model +(mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, + availformula = ~1, durationDS=durs$ds, durationPC=durs$pc, + maxDistPC=0.5, + unitsOut="kmsq")) + +# Compare with known parameter values +# Note: this is an unusually good estimate of availability +# It is hard to estimate in most cases +cbind(truth=unlist(coefs), est=coef(mod_sim)) + +# Predict density at each distance sampling site +head(predict(mod_sim, 'lam')) + +} + +} diff --git a/man/SSE.Rd b/man/SSE.Rd index e5354ee..e3204f2 100644 --- a/man/SSE.Rd +++ b/man/SSE.Rd @@ -4,6 +4,7 @@ \alias{SSE,unmarkedFit-method} \alias{SSE,unmarkedFitOccuMulti-method} \alias{SSE,unmarkedFitGDR-method} +\alias{SSE,unmarkedFitIDS-method} \title{Compute Sum of Squared Residuals for a Model Fit.} \description{ Compute the sum of squared residuals for an unmarked fit object. This diff --git a/man/fitted-methods.Rd b/man/fitted-methods.Rd index 7ef7ede..9ead41c 100644 --- a/man/fitted-methods.Rd +++ b/man/fitted-methods.Rd @@ -14,9 +14,11 @@ \alias{fitted,unmarkedFitDS-method} \alias{fitted,unmarkedFitGMM-method} \alias{fitted,unmarkedFitGDR-method} +\alias{fitted,unmarkedFitIDS-method} \alias{fitted,unmarkedFitDailMadsen-method} \alias{fitted,unmarkedFitGOccu-method} \alias{fitted,unmarkedFitOccuCOP-method} + \title{Methods for Function fitted in Package `unmarked'} \description{Extracted fitted values from a fitted model. } diff --git a/man/getP-methods.Rd b/man/getP-methods.Rd index 7a3b87e..32028ab 100644 --- a/man/getP-methods.Rd +++ b/man/getP-methods.Rd @@ -17,8 +17,10 @@ \alias{getP,unmarkedFitDSO-method} \alias{getP,unmarkedFitMMO-method} \alias{getP,unmarkedFitGDR-method} +\alias{getP,unmarkedFitIDS-method} \alias{getP,unmarkedFitGOccu-method} \alias{getP,unmarkedFitOccuCOP-method} + \title{Methods for Function getP in Package `unmarked'} \description{ Methods for function \code{getP} in Package `unmarked'. These methods return a matrix of the back-transformed detection parameter (\eqn{p} the detection probability or \eqn{\lambda} the detection rate, depending on the model). The matrix is of dimension MxJ, with M the number of sites and J the number of sampling periods; or of dimension MxJT for models with multiple primary periods T. diff --git a/man/nonparboot-methods.Rd b/man/nonparboot-methods.Rd index 40dd72f..53ff723 100644 --- a/man/nonparboot-methods.Rd +++ b/man/nonparboot-methods.Rd @@ -17,9 +17,11 @@ \alias{nonparboot,unmarkedFitOccuMulti-method} \alias{nonparboot,unmarkedFitNmixTTD-method} \alias{nonparboot,unmarkedFitGDR-method} +\alias{nonparboot,unmarkedFitIDS-method} \alias{nonparboot,unmarkedFitDailMadsen-method} \alias{nonparboot,unmarkedFitOccuCOP-method} + \title{ Nonparametric bootstrapping in unmarked } \description{ Call \code{nonparboot} on an unmarkedFit to obtain non-parametric diff --git a/man/parboot.Rd b/man/parboot.Rd index 6b6493f..597eb54 100644 --- a/man/parboot.Rd +++ b/man/parboot.Rd @@ -1,5 +1,6 @@ \name{parboot} \alias{parboot} +\alias{parboot,unmarkedFitIDS-method} \alias{plot,parboot,missing-method} \alias{show,parboot-method} \title{Parametric bootstrap method for fitted models inheriting class.} diff --git a/man/predict-methods.Rd b/man/predict-methods.Rd index 8b1fb38..79fa019 100644 --- a/man/predict-methods.Rd +++ b/man/predict-methods.Rd @@ -16,6 +16,7 @@ \alias{predict,unmarkedFitPCO-method} \alias{predict,unmarkedFitDSO-method} \alias{predict,unmarkedFitGDR-method} +\alias{predict,unmarkedFitIDS-method} \alias{predict,unmarkedFitList-method} \alias{predict,unmarkedRanef-method} \title{ Methods for Function predict in Package `unmarked' } diff --git a/man/ranef-methods.Rd b/man/ranef-methods.Rd index 1eb0704..66452d7 100644 --- a/man/ranef-methods.Rd +++ b/man/ranef-methods.Rd @@ -22,6 +22,7 @@ \alias{ranef,unmarkedFitDailMadsen-method} \alias{ranef,unmarkedFitGOccu-method} \alias{ranef,unmarkedFitOccuCOP-method} +\alias{ranef,unmarkedFitIDS-method} \title{ Methods for Function \code{ranef} in Package \pkg{unmarked} } \description{ Estimate posterior distributions of the random variables (latent diff --git a/man/simulate-methods.Rd b/man/simulate-methods.Rd index 21bd377..5d37dad 100644 --- a/man/simulate-methods.Rd +++ b/man/simulate-methods.Rd @@ -17,10 +17,12 @@ \alias{simulate,unmarkedFitGDS-method} \alias{simulate,unmarkedFitGPC-method} \alias{simulate,unmarkedFitGDR-method} +\alias{simulate,unmarkedFitIDS-method} \alias{simulate,unmarkedFitDailMadsen-method} \alias{simulate,unmarkedFitGOccu-method} \alias{simulate,unmarkedFitOccuCOP-method} \alias{simulate,character-method} + \title{Methods for Function simulate in Package `unmarked'} \description{ Simulate data from a fitted model. diff --git a/man/unmarkedFit-class.Rd b/man/unmarkedFit-class.Rd index d8b1aea..8237311 100644 --- a/man/unmarkedFit-class.Rd +++ b/man/unmarkedFit-class.Rd @@ -18,6 +18,7 @@ \alias{plot,unmarkedFit,missing-method} \alias{plot,unmarkedFitOccuMulti,missing-method} \alias{plot,unmarkedFitGDR,missing-method} +\alias{plot,unmarkedFitIDS,missing-method} \alias{plot,unmarkedFitOccuCOP,missing-method} \alias{profile,unmarkedFit-method} \alias{residuals,unmarkedFit-method} @@ -27,6 +28,7 @@ \alias{residuals,unmarkedFitOccuMulti-method} \alias{residuals,unmarkedFitOccuTTD-method} \alias{residuals,unmarkedFitGDR-method} +\alias{residuals,unmarkedFitIDS-method} \alias{residuals,unmarkedFitOccuCOP-method} \alias{update,unmarkedFit-method} \alias{update,unmarkedFitColExt-method} @@ -36,6 +38,7 @@ \alias{update,unmarkedFitOccuTTD-method} \alias{update,unmarkedFitNmixTTD-method} \alias{update,unmarkedFitGDR-method} +\alias{update,unmarkedFitIDS-method} \alias{update,unmarkedFitDailMadsen-method} \alias{update,unmarkedFitGOccu-method} \alias{sampleSize} @@ -55,10 +58,12 @@ \alias{unmarkedFitNmixTTD-class} \alias{unmarkedFitDSO-class} \alias{unmarkedFitMMO-class} +\alias{unmarkedFitIDS-class} \alias{plot,profile,missing-method} \alias{show,unmarkedFit-method} \alias{summary,unmarkedFit-method} \alias{summary,unmarkedFitDS-method} +\alias{summary,unmarkedFitIDS-method} \alias{smoothed} \alias{smoothed,unmarkedFitColExt-method} \alias{projected} diff --git a/src/TMB/tmb_IDS.hpp b/src/TMB/tmb_IDS.hpp new file mode 100644 index 0000000..cf1c5b4 --- /dev/null +++ b/src/TMB/tmb_IDS.hpp @@ -0,0 +1,179 @@ +#undef TMB_OBJECTIVE_PTR +#define TMB_OBJECTIVE_PTR obj + +// name of function below **MUST** match filename +template +Type tmb_IDS(objective_function* obj) { + DATA_MATRIX(pind); + DATA_VECTOR(lam_adjust); + + DATA_MATRIX(y_hds); + DATA_MATRIX(X_hds); + DATA_MATRIX(V_hds); + DATA_INTEGER(key_hds); + DATA_VECTOR(db_hds); + DATA_VECTOR(a_hds); + DATA_VECTOR(w_hds); + DATA_VECTOR(u_hds); + + DATA_MATRIX(y_pc); + DATA_MATRIX(X_pc); + DATA_MATRIX(V_pc); + DATA_INTEGER(key_pc); + DATA_VECTOR(db_pc); + DATA_VECTOR(a_pc); + DATA_VECTOR(w_pc); + DATA_VECTOR(u_pc); + + DATA_MATRIX(y_oc); + DATA_MATRIX(X_oc); + DATA_MATRIX(V_oc); + DATA_INTEGER(key_oc); + DATA_VECTOR(db_oc); + DATA_VECTOR(a_oc); + DATA_VECTOR(w_oc); + DATA_VECTOR(u_oc); + DATA_INTEGER(K_oc); + DATA_IVECTOR(Kmin_oc); + + DATA_VECTOR(durationDS); + DATA_VECTOR(durationPC); + DATA_VECTOR(durationOC); + DATA_MATRIX(Xa_hds); + DATA_MATRIX(Xa_pc); + DATA_MATRIX(Xa_oc); + + PARAMETER_VECTOR(beta_lam); + PARAMETER_VECTOR(beta_hds); + PARAMETER_VECTOR(beta_pc); + PARAMETER_VECTOR(beta_oc); + PARAMETER_VECTOR(beta_avail); + + int survey = 1; // Only point counts supported + + Type loglik = 0; + + // HDS + int M = y_hds.rows(); + int J = y_hds.cols(); + + vector lam_hds = X_hds * beta_lam; + lam_hds = exp(lam_hds); + lam_hds = lam_hds * lam_adjust(0); + + vector sigma_hds = V_hds * beta_hds; + sigma_hds = exp(sigma_hds); + + vector p_avail(M); + p_avail.setOnes(); + if(beta_avail.size() > 0){ + vector phi = Xa_hds * beta_avail; + phi = exp(phi); + p_avail = 1 - exp(-1 * durationDS.array() * phi.array()); + } + + for (int i=0; i cp = distance_prob(key_hds, sigma_hds(i), Type(0), survey, + db_hds, w_hds, a_hds, u_hds); + + for (int j=0; j 0){ + + vector lam_pc = X_pc * beta_lam; + lam_pc = exp(lam_pc); + lam_pc = lam_pc * lam_adjust(1); + + //vector sigma_pc = V_pc * beta_pc; + vector sigma_pc; + if(beta_pc.size() > 0){ + sigma_pc = V_pc * beta_pc; + } else{ + sigma_pc = V_pc * beta_hds; + } + sigma_pc = exp(sigma_pc); + + vector p_avail_pc(M); + p_avail_pc.setOnes(); + if(beta_avail.size() > 0){ + vector phi = Xa_pc * beta_avail; + phi = exp(phi); + p_avail_pc = 1 - exp(-1 * durationPC.array() * phi.array()); + } + + for (int i=0; i cp = distance_prob(key_pc, sigma_pc(i), Type(0), survey, + db_pc, w_pc, a_pc, u_pc); + loglik -= dpois(y_pc(i,0), lam_pc(i) * cp(0) * p_avail_pc(i), true); + } + + } + + //printf("pc done"); + + // R-N occupancy + M = y_oc.rows(); + + if(M > 0){ + + vector lam_oc = X_oc * beta_lam; + lam_oc = exp(lam_oc); + lam_oc = lam_oc * lam_adjust(2); + + //vector sigma_oc = V_oc * beta_oc; + vector sigma_oc; + if(beta_oc.size() > 0){ + sigma_oc = V_oc * beta_oc; + } else{ + sigma_oc = V_oc * beta_hds; + } + sigma_oc = exp(sigma_oc); + + vector p_avail_oc(M); + p_avail_oc.setOnes(); + if(beta_avail.size() > 0){ + vector phi = Xa_oc * beta_avail; + phi = exp(phi); + p_avail_oc = 1 - exp(-1 * durationOC.array() * phi.array()); + } + + Type f; + Type g; + Type p; + Type site_lp; + + for (int i=0; i q = 1 - distance_prob(key_oc, sigma_oc(i), Type(0), survey, + db_oc, w_oc, a_oc, u_oc) * p_avail_oc(i); + //vector q = 1 - distance_prob(key_oc, sigma_oc(i), Type(0), survey, + // db_oc, w_oc, a_oc, u_oc); + + site_lp = 0.0; + for (int k=Kmin_oc(i); k<(K_oc+1); k++){ + f = dpois(Type(k), lam_oc(i), false); + p = 1 - pow(q(0), Type(k)); + g = dbinom(y_oc(i,0), Type(1), p, false); + site_lp += f * g; + } + loglik -= log(site_lp); + + } + + } + + return loglik; + +} + +#undef TMB_OBJECTIVE_PTR +#define TMB_OBJECTIVE_PTR this diff --git a/src/TMB/unmarked_TMBExports.cpp b/src/TMB/unmarked_TMBExports.cpp index 6aa57de..a8fdc7c 100644 --- a/src/TMB/unmarked_TMBExports.cpp +++ b/src/TMB/unmarked_TMBExports.cpp @@ -9,6 +9,7 @@ #include "tmb_multinomPois.hpp" #include "tmb_distsamp.hpp" #include "tmb_gdistremoval.hpp" +#include "tmb_IDS.hpp" #include "tmb_goccu.hpp" template @@ -24,6 +25,8 @@ Type objective_function::operator() () { return tmb_distsamp(this); } else if(model == "tmb_gdistremoval"){ return tmb_gdistremoval(this); + } else if(model == "tmb_IDS"){ + return tmb_IDS(this); } else if(model == "tmb_goccu"){ return tmb_goccu(this); } else { diff --git a/tests/testthat/test_IDS.R b/tests/testthat/test_IDS.R new file mode 100644 index 0000000..61d348d --- /dev/null +++ b/tests/testthat/test_IDS.R @@ -0,0 +1,190 @@ +context("IDS fitting function") +skip_on_cran() + +test_that("IDS can fit models with covariates", { + set.seed(123) + formulas <- list(lam=~elev, ds=~1, phi=~1) + # Based on values from real data + design <- list(Mds=1000, J=6, Mpc=300) + # Based on values from real data + coefs <- list(lam = c(intercept=3, elev=-0.5), + ds = c(intercept=-2.5), + phi = c(intercept=-1.3)) + # Survey durations, loosely based on real data + durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30)) + + sim_umf <- simulate("IDS", # name of model we are simulating for + nsim=1, # number of replicates + formulas=formulas, + coefs=coefs, + design=design, + # arguments used by unmarkedFrameDS + dist.breaks = seq(0, 0.30, length.out=7), + unitsIn="km", + # arguments used by IDS + # could also have e.g. keyfun here + durationDS=durs$ds, durationPC=durs$pc, durationOC=durs$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + set.seed(123) + mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, + availformula = ~1, durationDS=durs$ds, durationPC=durs$pc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + + expect_equivalent(coef(mod_sim), c(3.0271179,-0.4858101,-2.5050244,-1.3729505), tol=1e-5) + + # Predict + pr <- predict(mod_sim, type = 'lam') + expect_equal(nrow(pr), 1000) # predicts only for the distance sampled sites + pr <- predict(mod_sim, type = 'lam', newdata=sim_umf$pc) + + pr <- predict(mod_sim, type = 'ds') + expect_equal(nrow(pr), 1000) + expect_equal(pr$Predicted[1], exp(-2.5), tol=0.05) + pr <- predict(mod_sim, type = 'pc') + expect_equal(nrow(pr), 300) + expect_equal(pr$Predicted[1], exp(-2.5), tol=0.05) + pr <- predict(mod_sim, type = 'phi') + expect_equal(pr$ds$Predicted[1], exp(-1.37), tol=0.05) + + # residuals + r <- residuals(mod_sim) + expect_equal(lapply(r, dim), list(ds=c(1000,6), pc = c(300,1))) + + # parboot + pb <- parboot(mod_sim, nsim=2) + + pdf(NULL) + plot(mod_sim) + hist(mod_sim) + dev.off() + + expect_error(nonparboot(mod_sim)) + expect_error(ranef(mod_sim)) + + # Separate detection models + mod_sep <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaPC = ~1, + dataDS=sim_umf$ds[1:100,], dataPC=sim_umf$pc[1:100,], + availformula = ~1, durationDS=durs$ds[1:100], durationPC=durs$pc[1:100], + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + expect_equal(length(coef(mod_sim)), 4) + expect_equal(length(coef(mod_sep)), 5) +}) + +test_that("IDS can fit models with occupancy data", { + + set.seed(123) + formulas <- list(lam=~elev, ds=~1, oc=~1) + # Based on values from real data + design <- list(Mds=100, J=6, Mpc=100, Moc=100) + # Based on values from real data + coefs <- list(lam = c(intercept=3, elev=-0.5), + ds = c(intercept=-2.5), + oc = c(intercept = -2)) + + sim_umf <- simulate("IDS", # name of model we are simulating for + nsim=1, # number of replicates + formulas=formulas, + coefs=coefs, + design=design, + # arguments used by unmarkedFrameDS + dist.breaks = seq(0, 0.30, length.out=7), + unitsIn="km", + # arguments used by IDS + # could also have e.g. keyfun here + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + + mod_oc <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + + expect_equivalent(coef(mod_oc), c(3.0557091, -0.4200719, -2.5384331, -2.0610341), + tol=1e-5) + + pr <- predict(mod_oc, type='oc') + expect_equal(pr$Predicted[1], 0.1273222, tol=1e-5) + + res <- residuals(mod_oc) + expect_equal(lapply(res, dim), list(ds=c(100,6), pc = c(100,1), oc=c(100,1))) + + pb <- parboot(mod_oc, nsim=1) + + # Don't estimate availability if OC data + expect_error( + mod_oc <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + availformula = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + durationDS=durs$ds, durationPC=durs$pc, durationOC=durs$pc, + unitsOut="kmsq") + ) + + # Just occupancy data + mod_oc <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + dataDS=sim_umf$ds, dataOC=sim_umf$oc, + maxDistOC=0.5, + unitsOut="kmsq") + + expect_equal(names(mod_oc), c("lam","ds","oc")) + +}) + +test_that("IDS handles missing values", { + + set.seed(123) + design <- list(Mds=100, J=6, Mpc=100, Moc=100) + formulas <- list(lam=~elev, ds=~1, phi=~1) + # Based on values from real data + coefs <- list(lam = c(intercept=3, elev=-0.5), + ds = c(intercept=-2.5), + phi = c(intercept=-1.3)) + # Survey durations, loosely based on real data + durs <- list(ds = rep(5, design$Mds), pc=runif(design$Mpc, 3, 30)) + + sim_umf <- simulate("IDS", # name of model we are simulating for + nsim=1, # number of replicates + formulas=formulas, + coefs=coefs, + design=design, + # arguments used by unmarkedFrameDS + dist.breaks = seq(0, 0.30, length.out=7), + unitsIn="km", + # arguments used by IDS + # could also have e.g. keyfun here + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + + sim_umf$pc@y[1,1] <- NA + sim_umf$pc@y[2,] <- NA + + sim_umf$oc@y[1,1] <- NA + sim_umf$oc@y[2,] <- NA + + expect_warning( + mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + ) + + expect_equivalent(coef(mod_sim), c(2.9354934,-0.4759405,-2.5314594,-2.3259133), + tol=1e-5) + + + sim_umf$ds@y[1,1] <- NA + sim_umf$ds@y[2,] <- NA + + expect_error( + expect_warning( + mod_sim <- IDS(lambdaformula = ~elev, detformulaDS = ~1, detformulaOC = ~1, + dataDS=sim_umf$ds, dataPC=sim_umf$pc, dataOC=sim_umf$oc, + maxDistPC=0.5, maxDistOC=0.5, + unitsOut="kmsq") + )) + +}) -- cgit v1.2.3