Add documentation for powerAnalysis and simulate

author: Ken Kellner <ken@kenkellner.com> 2021-09-03 16:03:50 -0400
committer: Ken Kellner <ken@kenkellner.com> 2021-09-03 16:03:50 -0400
commit: b513c39d50f9fc3f14215b026d23d299b1051573 (patch)
tree: 7a866749ff27f1b2a243bafd0cd46b905db0dfa4 /man
parent: 8a6e0dfe3d521a0b1ca7db15057c12cc39947dbe (diff)
5 files changed, 340 insertions, 2 deletions
diff --git a/man/powerAnalysis.Rd b/man/powerAnalysis.Rd
new file mode 100644
index 0000000..8db3fcc
--- /dev/null
+++ b/man/powerAnalysis.Rd
@@ -0,0 +1,99 @@
+\name{powerAnalysis}
+\alias{powerAnalysis}
+
+\title{Conduct a power analysis on an unmarked model}
+
+\description{
+This function uses a simulation-based approach to estimate power for parameters 
+in unmarked models. At a minimum, users must provide a fitted \code{unmarked} model object
+(preferably fit with simulated data) which ensures the model has been properly
+specified, a list of effect sizes for each parameter in the model (\code{coefs}), 
+and the desired Type I error (\code{alpha}). It is also possible to get power
+for a range of other sample sizes besides the sample size in the fitted
+model object using the \code{design} argument to subsample within the
+provided dataset. See the \code{unmarkedPower} vignette for more details and
+examples.
+}
+
+\usage{
+  powerAnalysis(object, coefs=NULL, design=NULL, alpha=0.05, nulls=list(),
+                nsim=100, parallel=FALSE)
+}
+
+\arguments{
+  \item{object}{A fitted model inheriting class \code{unmarkedFit}. This
+    could potentially be fit using real data, but ideally you would simulate
+    an appropriate dataset using \code{simulate}}
+  \item{coefs}{A list containing the desired effect sizes for which you want
+    to estimate power. This list must follow a specific format. There is one
+    named entry in the list per submodel (e.g., occupancy, detection). To
+    get the required submodel names call \code{names(object)} on your fitted model.
+    Then, each list entry is a named vector with the names corresponding to the
+    parameter names for that submodel, and the values corresponding to the 
+    desired effect sizes. It may be easier to leave \code{coefs=NULL}, which
+    will generate an error message with a template that you can fill in.
+  }
+  \item{design}{An optional list of design/sample size parameters containing 
+    at a minimum two named elements: \code{M}, the number of sites, and \code{J} 
+    the number of observations per site. If this list is provided, \code{unmarked} 
+    will subsample the provided dataset to the specified number of sites and
+    observations, allowing you to test power for different designs. If
+    your model has multiple primary periods you must also include \code{T},
+    the number of periods, in the list.
+  }
+  \item{alpha}{Desired Type I error rate}
+  \item{nulls}{If provided, a list matching the structure of \code{coefs} which
+    defines the null hypothesis value for each parameter. By default the null
+    is 0 for all parameters.
+  }
+  \item{nsim}{Number of simulations to conduct}
+  \item{parallel}{If \code{TRUE}, run folds in parallel. This may speed up 
+    the power analysis in some situations
+  }
+} 
+
+\value{\code{unmarkedPower} object containing the results of the power analysis}
+
+\author{Ken Kellner \email{contact@kenkellner.com}}
+
+\seealso{
+  \code{\link{unmarkedPowerList}}
+}
+
+\examples{
+
+\dontrun{
+
+# Simulate an occupancy dataset
+# Covariates to include in simulation
+forms <- list(state=~elev, det=~1)
+
+# Covariate effects and intercept values
+coefs <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
+
+# Study design
+design <- list(M=300, J=8) # 300 sites, 8 occasions per site
+
+# Simulate an unmarkedFrameOccu
+occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design)
+
+# Fit occupancy model to simulated data
+# This will contain all the model structure info powerAnalysis needs
+# The estimates from the model aren't used
+template_model <- occu(~1~elev, occu_umf)
+
+# If we run powerAnalysis without specifying coefs we'll get a template list
+powerAnalysis(template_model)
+
+# Set desired effect sizes to pass to coefs
+effect_sizes <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
+
+# Run power analysis and look at summary
+(pa <- powerAnalysis(template_model, coefs=effect_sizes, alpha=0.05))
+
+# Try a smaller sample size in the study design
+(pa2 <- powerAnalysis(template_model, coefs=effect_sizes, alpha=0.05,
+                      design=list(M=100, J=2)))
+
+}
+}
diff --git a/man/simulate-methods.Rd b/man/simulate-methods.Rd
index 38b4fe6..b0144cb 100644
--- a/man/simulate-methods.Rd
+++ b/man/simulate-methods.Rd
@@ -19,6 +19,7 @@
 \alias{simulate,unmarkedFitDSO-method}
 \alias{simulate,unmarkedFitMMO-method}
 \alias{simulate,unmarkedFitGDR-method}
+\alias{simulate,character-method}
 \title{Methods for Function simulate in Package `unmarked'}
 \description{
 Simulate data from a fitted model.
@@ -30,13 +31,49 @@ Simulate data from a fitted model.
 \S4method{simulate}{unmarkedFitOccu}(object, nsim, seed, na.rm)
 \S4method{simulate}{unmarkedFitOccuRN}(object, nsim, seed, na.rm)
 \S4method{simulate}{unmarkedFitPCount}(object, nsim, seed, na.rm)
+\S4method{simulate}{character}(object, nsim=1, seed=NULL, formulas, coefs=NULL,
+  design, guide=NULL, ...)
 }
+
 \arguments{
 \item{object}{Fitted model of appropriate S4 class}
 \item{nsim}{Number of simulations}
 \item{seed}{Seed for random number generator. Not currently implemented}
 \item{na.rm}{Logical, should missing values be removed?}
+\item{formulas}{
+  A named list of formulas, one per submodel (e.g. a formula for occupancy
+  \code{"state"} and a formula for detection \code{"det"}). To get the correct
+  submodel names for a given model, fit an example for that model, and then
+  call \code{names(fitted_model)}
+}
+\item{coefs}{
+  A named list of vectors of coefficients associated with the regression
+  intercepts and slopes for each submodel. List should be named as with
+  \code{formulas} above. Each element of the list should be a named vector,
+  where the names correspond to the names of the parameters in the model
+  (intercept and covariates). If you are not sure how to structure this list,
+  just run \code{simulate} with \code{coefs=NULL}; this will generate
+  a template list you can copy and fill in.
+}
+\item{design}{
+  A named list of components of the study design. Must include at least \code{M},
+  the number of sites, and \code{J} the number of observations per site. If you
+  are fitting a model with multiple primary periods you must also provide
+  \code{T}, the number of primary periods.
+}
+\item{guide}{
+  An optional list defining the format (continuous or categorical/factor) and distribution,
+  if continuous, of covariates you want to simulate. By default all covariates
+  are simulated from a standard normal. See example below for an example of
+  how to specify entries in the \code{guide} list.
+}
+\item{...}{
+  Additional arguments that are needed to fully specify the simulated dataset
+  for a particular model. For example, \code{mixture} for \code{pcount} models
+  or \code{keyfun} for \code{distsamp} models.
+}
 }
+
 \section{Methods}{
 \describe{
 \item{object = "unmarkedFitColExt"}{A model fit by \code{\link{colext}}}
@@ -45,6 +82,56 @@ Simulate data from a fitted model.
 \item{object = "unmarkedFitOccu"}{A model fit by \code{\link{occu}}}
 \item{object = "unmarkedFitOccuRN"}{A model fit by \code{\link{occuRN}}}
 \item{object = "unmarkedFitPCount"}{A model fit by \code{\link{pcount}}}
+\item{object = "character"}{An \code{unmarkedFrame} of the appropriate type}
 }}
 \keyword{methods}
 
+\examples{
+
+\dontrun{
+
+# Simulation of an occupancy dataset from scratch
+
+# Formulas for each submodel
+# occupancy is a function of elevation, detection is intercept-only
+forms <- list(state=~elev, det=~1)
+
+# Specify list of coefficients - there must be a value for each
+# covariate plus an intercept for each submodel
+coefs <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
+
+# Study design
+design <- list(M=300, J=8) # 300 sites, 8 occasions per site
+
+# If we don't specify coefs, unmarked will generate a template you can copy and use
+simulate("occu", formulas=forms, design=design)
+
+# Generate unmarkedFrameOccu
+occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design)
+head(occu_umf) # note one covariate, elev
+
+# What if we wanted to add a categorical/factor covariate or
+# customize the distribution of elev?
+# Use the guide argument
+
+# Updated formulas with new covariate
+forms2 <- list(state=~elev+landcover, det=~1)
+
+# Guide
+guide <- list(landcover=factor(levels=c("forest","grass")), # landcover is factor, you must provide the levels 
+              elev=list(dist=rnorm, mean=2, sd=0.5)) # custom distribution
+
+# Updated coefficients list
+coefs2 <- list(state=c(intercept=0, elev=-0.4, landcovergrass=0.2), det=c(intercept=0))
+
+# Simulate new dataset
+head(simulate("occu", formulas=forms2, coefs=coefs2, design=design, guide=guide))
+# Note new categorical covariate
+
+# For some models you may want to specify other arguments, such as 'mixture'
+# for pcount or 'keyfun' for distsamp
+# See the documentation for the associated fitting function and unmarkedFrame
+# for what arguments are possible to include for a given model
+head(simulate("pcount", formulas=forms, coefs=coefs, design=design, mixture="NB"))
+}
+}
diff --git a/man/unmarkedFrame-class.Rd b/man/unmarkedFrame-class.Rd
index 68d207d..9df4157 100644
--- a/man/unmarkedFrame-class.Rd
+++ b/man/unmarkedFrame-class.Rd
@@ -27,8 +27,6 @@
 \alias{plot,unmarkedFrame,missing-method}
 \alias{plot,unmarkedFrameOccuMulti,missing-method}
 \alias{plot,unmarkedFrameOccuTTD,missing-method}
-\alias{powerAnalysis}
-\alias{powerAnalysis,formula,unmarkedFramePCount,numeric-method}
 \alias{projection,unmarkedFrame-method}
 \alias{projection}
 \alias{siteCovs,unmarkedFrame-method}
diff --git a/man/unmarkedPower-class.Rd b/man/unmarkedPower-class.Rd
new file mode 100644
index 0000000..3df4d77
--- /dev/null
+++ b/man/unmarkedPower-class.Rd
@@ -0,0 +1,63 @@
+\name{unmarkedPower-methods}
+\alias{unmarkedPower-methods}
+\alias{unmarkedPower-class}
+\alias{show,unmarkedPower-method}
+\alias{summary,unmarkedPower-method}
+\alias{update,unmarkedPower-method}
+
+\title{Methods for unmarkedPower objects}
+
+\description{Various functions to summarize and update unmarkedPower objects}
+
+\usage{
+\S4method{show}{unmarkedPower}(object)
+\S4method{summary}{unmarkedPower}(object, ...)
+\S4method{update}{unmarkedPower}(object, ...)
+}
+
+\arguments{
+  \item{object}{An object of class \code{unmarkedPower} created with the 
+    \code{powerAnalysis} function}
+  \item{...}{For \code{update}, arguments to change in the updated power analysis.
+    Not used by \code{summary}}
+}
+
+\value{
+  For \code{show} and \code{summary}, summary output is printed to the console.
+  For \code{update}, a new \code{powerAnalysis} object corresponding to the 
+  new arguments provided.
+}
+
+\author{Ken Kellner \email{contact@kenkellner.com}}
+
+\seealso{
+  \code{\link{powerAnalysis}}
+}
+
+\examples{
+
+\dontrun{
+
+# Simulate an occupancy dataset
+forms <- list(state=~elev, det=~1)
+coefs <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
+design <- list(M=300, J=8) # 300 sites, 8 occasions per site
+occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design)
+
+# Fit occupancy model to simulated data
+template_model <- occu(~1~elev, occu_umf)
+
+# Set desired effect sizes to pass to coefs
+effect_sizes <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
+
+# Run power analysis
+pa <- powerAnalysis(template_model, coefs=effect_sizes, alpha=0.05)
+
+# Look at summary
+summary(pa)
+
+# Update the analysis with new arguments
+(pa2 <- update(pa, alpha=0.01))
+
+}
+}
diff --git a/man/unmarkedPowerList.Rd b/man/unmarkedPowerList.Rd
new file mode 100644
index 0000000..335d82b
--- /dev/null
+++ b/man/unmarkedPowerList.Rd
@@ -0,0 +1,91 @@
+\name{unmarkedPowerList}
+\alias{unmarkedPowerList}
+\alias{unmarkedPowerList,list-method}
+\alias{unmarkedPowerList,unmarkedFit-method}
+\alias{unmarkedPowerList-class}
+\alias{unmarkedPowerList-methods}
+\alias{show,unmarkedPowerList-method}
+\alias{summary,unmarkedPowerList-method}
+\alias{plot,unmarkedPowerList,ANY-method}
+
+\title{Create or summarize a series of unmarked power analyses}
+
+\description{
+  A list of power analyses created with \code{powerAnalysis} can be combined 
+  using \code{unmarkedPowerList}, allowing comparison e.g. between different
+  study designs/sample sizes. Additionally an \code{unmarkedPowerList} can be
+  created directly from an \code{unmarkedFit} template model by specifying
+  a series of study designs (number of sites, number of observations) 
+  as a \code{data.frame}. A series of methods for \code{unmarkedPowerList}
+  objects are available including a \code{plot} method.
+}
+
+\usage{
+\S4method{unmarkedPowerList}{list}(object, ...)
+\S4method{unmarkedPowerList}{unmarkedFit}(object, coefs, design, alpha=0.05,
+                                          nsim=100, parallel=FALSE, ...)
+\S4method{show}{unmarkedPowerList}(object)
+\S4method{summary}{unmarkedPowerList}(object, ...)
+\S4method{plot}{unmarkedPowerList,ANY}(x, power=NULL, param=NULL, ...)
+}
+
+\arguments{
+  \item{object,x}{A \code{list} of \code{unmarkedPower} objects, a fitted model 
+    inheriting class \code{unmarkedFit}, or an \code{unmarkedPowerList} object,
+    depending on the method
+  }
+  \item{coefs}{A named list of effect sizes, see documentation for
+    \code{powerAnalysis}}
+  \item{design}{A \code{data.frame} with one row per study design to test, and
+    at least 2 named columns: \code{M} for number of sites and \code{J} for
+    number of observations. If you have >1 primary period a \code{T} column
+    must also be provided}
+  \item{alpha}{Type I error rate}
+  \item{nsim}{The number of simulations to run for each scenario/study design}
+  \item{parallel}{If \code{TRUE}, run simulations in parallel}
+  \item{power}{When plotting, the target power. Draws a horizontal line
+    at a given value of power on the plot}
+  \item{param}{When plotting, the model parameter to plot power vs. sample size for.
+    By default this is the first parameter (which is usually an intercept,
+    so not very interesting)}
+  \item{...}{Not used}
+} 
+
+\value{A \code{unmarkedPowerList} object, a summary of the object in the console,
+      or a summary plot, depending on the method}
+
+\author{Ken Kellner \email{contact@kenkellner.com}}
+
+\seealso{
+  \code{\link{powerAnalysis}}
+}
+
+\examples{
+
+\dontrun{
+
+# Simulate an occupancy dataset and build template model
+forms <- list(state=~elev, det=~1)
+coefs <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
+design <- list(M=300, J=8) # 300 sites, 8 occasions per site
+occu_umf <- simulate("occu", formulas=forms, coefs=coefs, design=design)
+template_model <- occu(~1~elev, occu_umf)
+
+# Generate two power analysis
+effect_sizes <- list(state=c(intercept=0, elev=-0.4), det=c(intercept=0))
+pa <- powerAnalysis(template_model, coefs=effect_sizes, alpha=0.05)
+pa2 <- powerAnalysis(template_model, effect_sizes, design=list(M=100,J=2))
+
+# Build unmarkedPowerList and look at summary
+(pl <- unmarkedPowerList(list(pa,pa2)))
+
+# Run a bunch of power analyses for different scenarios all at once
+scenarios <- expand.grid(M=c(50,200,400),
+                         J=c(3,5,8))
+(pl2 <- unmarkedPowerList(template_model, effect_sizes, design=scenarios, nsim=20))
+
+# Look at summary plot for elev effect
+plot(pl2, power=0.8, param='elev')
+
+}
+}
author	Ken Kellner <ken@kenkellner.com>	2021-09-03 16:03:50 -0400
committer	Ken Kellner <ken@kenkellner.com>	2021-09-03 16:03:50 -0400
commit	b513c39d50f9fc3f14215b026d23d299b1051573 (patch)
tree	7a866749ff27f1b2a243bafd0cd46b905db0dfa4 /man
parent	8a6e0dfe3d521a0b1ca7db15057c12cc39947dbe (diff)