aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKen Kellner <ken.kellner@gmail.com>2015-01-05 20:21:08 (GMT)
committerKen Kellner <ken.kellner@gmail.com>2015-01-05 20:21:08 (GMT)
commitb9fa6834e4c508bbe5dd0974037a3a8fb1129eda (patch)
tree8fb1c9e6e7380b18da20c49335c0983199ff6cf4
parent9cc1848f80a7463fa2963598a325aea9fb4c6349 (diff)
Clean up files, add details to README. Prepare for project to be made public.
-rw-r--r--README.md17
-rw-r--r--analysis_id_alltaxa.R (renamed from analysis_id_logistic.R)35
-rw-r--r--analysis_id_correlations.R69
-rw-r--r--analysis_id_septaxa.R (renamed from analysis_id_logistic_septax.R)35
-rw-r--r--data/script_id_database_format.R11
-rw-r--r--figure_id_alltaxa.R43
-rw-r--r--figure_id_logistic.R56
-rw-r--r--models/model_id_alltaxa.R (renamed from models/model_id_logistic.R)46
-rw-r--r--models/model_id_septaxa.R (renamed from models/model_id_logistic_septax.R)0
9 files changed, 114 insertions, 198 deletions
diff --git a/README.md b/README.md
index b7f74c0..c20aeee 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,19 @@
imperfect-detection
===================
-Quantitative literature review of ecological statistical methods accounting for imperfect detection.
+A quantitative literature review of ecological statistical methods accounting for imperfect detection. We read 500+ ecology papers from 40+ years (collected using a stratified sampling method) and assessed which papers accounted for imperfect detection, if necessary. Probability that a paper accounted for imperfect detection was modeled as a function of several covariates including year, journal type, and the taxa studied. We used a hierarchical logistic regression model fit in a Bayesian framework.
+
+The results are published in the following article:
+
+[*Kellner, Kenneth F.; Swihart, Robert K. 2014. Accounting for imperfect detection in ecology: a quantitative review. PLoS ONE 9(10): e111436. doi:10.1371/journal.pone.0111436*](http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0111436)
+
+Metadata
+====================
+
+The '*analysis_id_alltaxa.R* file contain the framework of the Bayesian analysis in JAGS for the project. Additional information is provided in the comments of the file. The model used in the BUGS language is contained in the file *models/model_id_alltaxa.R* and the output is saved in *output/output_id_alltaxa.Rda*. Figure 1 in the paper is based on this analysis; the code for the figure is in *figure_id_alltaxa.R*.
+
+The '*analysis_id_septaxa.R* file contain the framework of the Bayesian analysis in JAGS when each taxa was analyzed separately. Additional information is provided in the comments of the file. The model used in the BUGS language is contained in the file *models/model_id_septaxa.R* and the output is saved in *output/output_id_septaxa.Rda*.
+
+The '*analysis_detection_covs.R* file contains some simple calculations and characterizations of how reported detection probabilities varied in the sampled papers.
+
+The *data* folder contains the raw data files (CSV format) used in the analyses as well as a script used to generate a basic version of the paper database.
diff --git a/analysis_id_logistic.R b/analysis_id_alltaxa.R
index 9222cb9..8f4eda8 100644
--- a/analysis_id_logistic.R
+++ b/analysis_id_alltaxa.R
@@ -1,15 +1,17 @@
-##Work in progress model for imperfect detection
+###################################################
+## Imperfect detection project ##
+## Hierarchical logistic regression analysis ##
+###################################################
#Get dataset
data.raw = read.csv('data/imperfect_detection_master.csv',header=TRUE)
-#Get only important columns and convert some to factors
+#Get only important columns
data = data.raw[,c(2,4,5,7:12,14:20,22,24:25,27:33)]
#Begin data
#Indexes
-njournals = 10
npapers = dim(data)[1]
nobs = rep(1,npapers)
@@ -37,6 +39,7 @@ ri = data$RI
su = data$SU
ra = data$RA
+#Year index for calculating yearly means
yrindex = matrix(data=0,nrow=npapers, ncol=5)
for (i in 1:npapers){
yrindex[i,year[i]] = 1
@@ -45,6 +48,7 @@ for (i in 1:npapers){
#Dependent variable
id = cbind(data$KenID,data$RobID)
+#Index needed to get around missing data
cucount = matrix(NA,nrow=npapers,ncol=2)
index = 1
for (i in 1:dim(id)[1]){
@@ -54,47 +58,44 @@ for (i in 1:dim(id)[1]){
}
}
-#Bundle
-jags.data = list(#'njournals',
- 'journal','jsingletaxa','yrindex'
+#Bundle data
+jags.data = list('journal','jsingletaxa','yrindex'
,'npapers','nobs','year','obs','id'
,'bi','fi','ma','hr','ins','pl'
- ,'cm','land',
- 'ab','oc','ri','su','ra',
- 'cucount'
+ ,'cm','land'
+ ,'ab','oc','ri','su','ra'
+ ,'cucount'
)
#Parameters to save
params = c( 'alpha','p_alpha'
- #'psi_journal_mean','psi_journal_sigma','p_journal_mean','p_journal_sigma'
,'beta_year','beta_bi','beta_fi','beta_ma','beta_hr','beta_ins','beta_pl'
,'beta_ab','beta_oc','beta_ri','beta_su','beta_ra'
, 'beta_singletax'
- # ,'beta_biyr','beta_fiyr','beta_mayr','beta_hryr','beta_insyr','beta_plyr'
,'beta_cm','beta_land'
,'p_beta_obs'
,'p_beta_bi','p_beta_fi','p_beta_ma','p_beta_hr','p_beta_ins','p_beta_pl'
- #,'p_beta_ab','p_beta_oc','p_beta_ri','p_beta_su','p_beta_ra'
,'bi_fi','bi_ma','bi_hr','bi_ins','bi_pl','fi_ma','fi_hr','fi_ins','fi_pl'
,'ma_hr','ma_ins','ma_pl','hr_ins','hr_pl','ins_pl'
,'birdmean','fishmean','mammean','herpmean','insmean','plantmean','pmean'
)
+
+#Model file location
modelFilename = "models/model_id_logistic.R"
#Inits
-#Inits for jags
-
inits <- function(){
zinit = vector(length=npapers)
for (i in 1:npapers){
zinit[i] = max(data$KenID[i],data$RobID[i],na.rm=TRUE)
}
list(alpha = 1, p_alpha=1,
- #psi_journal_sigma = 1, p_journal_sigma = 1,
z = zinit)}
#Analysis
-id.out.new = jags(data=jags.data, inits=inits,parameters.to.save=params, model.file=modelFilename,
+library(jagsUI)
+
+id.out = jags(data=jags.data, inits=inits,parameters.to.save=params, model.file=modelFilename,
n.chains=3, n.iter=2000, n.burnin=1000, n.thin=2)
-save(id.out,file="output_id_logistic.Rdata")
+save(id.out,file="output/output_id_alltaxa.Rda")
diff --git a/analysis_id_correlations.R b/analysis_id_correlations.R
deleted file mode 100644
index 2453970..0000000
--- a/analysis_id_correlations.R
+++ /dev/null
@@ -1,69 +0,0 @@
-#################################################################
-##Script to Measure correlations between variables in ID analysis
-#################################################################
-
-data.raw = read.csv('imperfect_detection_master.csv',header=TRUE)
-
-#Get only important columns and convert some to factors
-data = data.raw[,c(2,4,5,7:12,22,24:25,27:31)]
-
-#Begin data
-
-#Indexes
-njournals = 10
-npapers = dim(data)[1]
-
-nobs = rep(1,npapers)
-for (i in 1:npapers){
- if(data$Rob[i]==1){nobs[i]=2}
-}
-
-journal = data$Jnum
-
-#Covariates
-year = data$YrGrp
-bi = data$BI
-fi = data$FI
-ma = data$MA
-hr = data$HR
-ins = data$IN
-pl = data$PL
-cm = data$CM
-pop = 1-cm
-land = data$LAND + data$REG
-loc = 1-land
-ab = data$AB
-oc = data$OC
-ri = data$RI
-su = data$SU
-ra = data$RA
-
-#Correlation function - inputs must be same length
-phico = function(var1,var2){
- n = length(var1)
- n11 = sum((var1==1)&(var2==1))
- n10 = sum((var1==1)&(var2==0))
- n01 = sum((var1==0)&(var2==1))
- n00 = sum((var1==0)&(var2==0))
-
- na1 = n11+n01
- na0 = n10+n00
- n1a = n11+n10
- n0a = n01+n00
-
- #calculation
- part = .01*n1a*n0a*na0*na1
- denom = sqrt(part*100)
- phi = (n11*n00 - n10*n01)/denom
- return(phi)
-}
-
-phico(fi,bi)
-
-##Chi-square tests
-test = chisq.test()
-
-
-
-
-
diff --git a/analysis_id_logistic_septax.R b/analysis_id_septaxa.R
index 0739eae..b98d272 100644
--- a/analysis_id_logistic_septax.R
+++ b/analysis_id_septaxa.R
@@ -1,11 +1,15 @@
-##Work in progress model for imperfect detection
+###################################################
+## Imperfect detection project ##
+## Hierarchical logistic regression analysis ##
+## Separated by taxanomic group ##
+###################################################
#Get dataset
data.raw = read.csv('imperfect_detection_master.csv',header=TRUE)
#Get only important columns and convert some to factors
data = data.raw[,c(2,4,5,7:12,14:20,22,24:25,27:33)]
-
+#Select a single taxa (plants here)
data = data[data$PL==1,c(1,2,3,17:26)]
#Begin data
@@ -36,39 +40,38 @@ ra = data$RA
id = cbind(data$KenID,data$RobID)
#Bundle
-jags.data = list(#'njournals', 'journal',
- 'npapers','nobs','year','obs','id'
- ,'cm','land',
- 'ab','oc','ri','su','ra'
- ,'jsingletaxa','journal'
+jags.data = list('npapers','nobs','year','obs','id'
+ ,'cm','land'
+ ,'ab','oc','ri','su','ra'
+ ,'jsingletaxa','journal'
)
#Parameters to save
params = c( 'alpha','p_alpha'
- #'psi_journal_mean','psi_journal_sigma','p_journal_mean','p_journal_sigma'
,'beta_ab','beta_oc','beta_ri','beta_su','beta_ra'
, 'beta_year','beta_singletax'
- # ,'beta_biyr','beta_fiyr','beta_mayr','beta_hryr','beta_insyr','beta_plyr'
,'beta_cm','beta_land'
,'p_beta_obs'
- #,'p_beta_ab','p_beta_oc','p_beta_ri','p_beta_su','p_beta_ra'
)
-modelFilename = "model_id_logistic_septax.R"
-#Inits
-#Inits for jags
+#Model file location
+modelFilename = "model_id_septaxa.R"
+#Inits
inits <- function(){
zinit = vector(length=npapers)
for (i in 1:npapers){
zinit[i] = max(data$KenID[i],data$RobID[i],na.rm=TRUE)
}
list(alpha = 1, p_alpha=1,
- #psi_journal_sigma = 1, p_journal_sigma = 1,
z = zinit)}
#Analysis
-id.out = jags(data=jags.data, inits=inits,parameters.to.save=params, model.file=modelFilename,
+library(jagsUI)
+
+id_plants_output = jags(data=jags.data, inits=inits,parameters.to.save=params, model.file=modelFilename,
n.chains=3, n.iter=10000, n.burnin=1000, n.thin=50)
-save(id.out,file="output_id_logistic_PL.Rdata")
+save(id_bird_output,id_fish_output,id_herp_output
+ ,id_insect_output,id_mam_output,id_plants_output
+ ,file="output_id_septaxa.Rdata")
diff --git a/data/script_id_database_format.R b/data/script_id_database_format.R
index 021e66b..8372768 100644
--- a/data/script_id_database_format.R
+++ b/data/script_id_database_format.R
@@ -1,17 +1,16 @@
#Create base database for imperfect detection
#Generates info columns and file links for a spreadsheet
#Based on a folder of literature
-#NOTE. This version does not generate entries for Auk, Ecology, or CJFAS
#Create output file
-output = array(NA,dim=c(550,3))
+output = array(NA,dim=c(750,3))
-#get journal names and remove ones I've done
-journal.names = list.files()[c(3,5:10)]
+#get journal names and remove ones that didn't up getting used
+journal.names = list.files()[c(1,3:11)]
years = c('1971', '1981', '1991', '2001', '2011')
#Short names
-short.names = c('EcoEnt','AnEco','JBio','JEco','JHerp','JMam','JWD')
+short.names = c('Auk','EcoEnt','Ecology','AnEco','JBio','JEco','JHerp','JMam','JWD','TAFS')
#Iterate through Journals
count = 1
@@ -46,7 +45,7 @@ outputformat = as.data.frame(output)
write.csv(outputformat,file='outputformat.csv')
-#Generate subsample for Rob
+#Generate subsample for Rob to read
robpapers = matrix(0,nrow=724,ncol=2)
robpapers[,1] = c(1:724)
diff --git a/figure_id_alltaxa.R b/figure_id_alltaxa.R
new file mode 100644
index 0000000..4d28aba
--- /dev/null
+++ b/figure_id_alltaxa.R
@@ -0,0 +1,43 @@
+##################################################################
+##Barchart Comparing Probability of incorporating ID by taxon/year
+##################################################################
+
+#Load in analysis output for figure
+load("output/output_id_alltaxa.Rda")
+
+data = array(data=NA,dim=c(1500,5,6))
+data[,,1] = id.out$sims.list$fishmean
+data[,,2] = id.out$sims.list$mammean
+data[,,3] = id.out$sims.list$herpmean
+data[,,4] = id.out$sims.list$birdmean
+data[,,5] = id.out$sims.list$insmean
+data[,,6] = id.out$sims.list$plantmean
+
+barplot.input = t(colMeans(data))
+
+#Load in required package for error bars
+#https://github.com/kenkellner/heeparse
+library(heeparse)
+
+#Output figure as TIFF file
+tiff(filename="KellnerFig1.tiff",width=4.5,height=3.5,units="in",res=400, pointsize=8,
+ compression = "lzw")
+
+#Draw barplot
+barplot(barplot.input, beside=T, ylim=c(0,1), names=c('1971', '1981', '1991','2001','2011'),
+ ylab=italic(p)~"IID", xlab="Year",
+ col=c("red","blue","orange","purple","yellow","green"))
+legend(0.5,1,c("Fish","Mammals","Herps","Birds", "Invertebrates","Plants"),bty='n',
+ fill=c("red","blue","orange","purple","yellow","green"),ncol=2)
+
+#Draw error bars on plot
+offset = c(0,1,2,3,4,5)
+structure = c(1.5,8.5,15.5,22.5,29.5)
+for (i in 1:5){
+ for (j in 1:6){
+ error.bars(data=data[,i,j],structure=structure[i]+offset[j],
+ quantiles=c(0.025,0.975),cex=0,col="black")
+}}
+
+dev.off()
+
diff --git a/figure_id_logistic.R b/figure_id_logistic.R
deleted file mode 100644
index 0af344a..0000000
--- a/figure_id_logistic.R
+++ /dev/null
@@ -1,56 +0,0 @@
-##################################################################
-##Barchart Comparing Probability of incorporating ID by taxon/year
-##################################################################
-
-#Format data for figure
-#id.out is output from JAGS (full model with all taxa)
-bird.o = fit$BUGSoutput$sims.list$birdmean
-fish.o = fit$BUGSoutput$sims.list$fishmean
-mam.o = fit$BUGSoutput$sims.list$mammean
-herp.o = fit$BUGSoutput$sims.list$herpmean
-insect.o = fit$BUGSoutput$sims.list$insmean
-plant.o = fit$BUGSoutput$sims.list$plantmean
-
-data = array(data=NA,dim=c(540,5,6))
-data[,,1] = fish.o
-data[,,2] = mam.o
-data[,,3] = herp.o
-data[,,4] = bird.o
-data[,,5] = insect.o
-data[,,6] = plant.o
-
-d = colMeans(data)
-
-#Make error.bars() function
-error.bars = function(data,structure,col){
- structure = structure
- mean = mean(data)
- interval = quantile(data, c(0.025,0.975),na.rm=TRUE)
- if (interval[1] < 0) {interval[1] = 0}
- segments(x0=structure,y0=interval[1],x1=structure,y1=interval[2], lwd=1,col=col)
- segments(x0=structure-0.3,y0=interval[2],x1=structure+0.3,y1=interval[2],col=col)
- segments(x0=structure-0.3,y0=interval[1],x1=structure+0.3,y1=interval[1],col=col)
-}
-
-#Output figure as TIFF file
-tiff(filename="KellnerFig1.tiff",width=4.5,height=3.5,units="in",res=400, pointsize=8,
- compression = "lzw")
-
-#Draw barplot
-barplot(t(d), beside=T, ylim=c(0,1), names=c('1971', '1981', '1991','2001','2011'),
- ylab=italic(p)~"IID", xlab="Year",
- col=c("red","blue","orange","purple","yellow","green"))
-legend(0.5,1,c("Fish","Mammals","Herps","Birds", "Invertebrates","Plants"),bty='n',
- fill=c("red","blue","orange","purple","yellow","green"),ncol=2)
-
-#Draw error bars on plot
-offset = c(0,1,2,3,4,5)
-structure = c(1.5,8.5,15.5,22.5,29.5)
-for (i in 1:5){
- for (j in 1:6){
- error.bars(data=data[,i,j],structure=structure[i]+offset[j],col="black")
-}}
-
-dev.off()
-
-
diff --git a/models/model_id_logistic.R b/models/model_id_alltaxa.R
index f6a9165..df6a9c9 100644
--- a/models/model_id_logistic.R
+++ b/models/model_id_alltaxa.R
@@ -1,16 +1,9 @@
model {
- ##Priors
- alpha ~ dunif(-5,5)
- #psi_journal_mean ~ dunif(-5,5)
- #psi_journal_sigma ~ dunif(0,7)
- #psi_journal_tau <- pow(psi_journal_sigma,-2)
-
+ #Priors
+ alpha ~ dunif(-5,5)
p_alpha ~ dunif(-5,5)
- #p_journal_mean ~ dunif(-5,5)
- #p_journal_sigma ~ dunif(0,7)
- #p_journal_tau <- pow(p_journal_sigma,-2)
beta_year ~ dnorm(0,1)
beta_bi ~ dnorm(0,1)
@@ -19,12 +12,6 @@ model {
beta_hr ~ dnorm(0,1)
beta_ins ~ dnorm(0,1)
beta_pl ~ dnorm(0,1)
- #beta_biyr ~ dnorm(0,1)
- #beta_fiyr ~ dnorm(0,1)
- #beta_mayr ~ dnorm(0,1)
- #beta_hryr ~ dnorm(0,1)
- #beta_insyr ~ dnorm(0,1)
- #beta_plyr ~ dnorm(0,1)
beta_ab ~ dnorm(0,1)
beta_oc ~ dnorm(0,1)
beta_ri ~ dnorm(0,1)
@@ -32,7 +19,6 @@ model {
beta_ra ~ dnorm(0,1)
beta_cm ~ dnorm(0,1)
beta_land ~ dnorm(0,1)
- #beta_ricm ~ dnorm(0,1)
beta_singletax ~ dnorm(0,1)
p_beta_bi ~ dnorm(0,1)
@@ -41,30 +27,18 @@ model {
p_beta_hr ~ dnorm(0,1)
p_beta_ins ~ dnorm(0,1)
p_beta_pl ~ dnorm(0,1)
- #p_beta_ab ~ dnorm(0,1)
- #p_beta_oc ~ dnorm(0,1)
- #p_beta_ri ~ dnorm(0,1)
- #p_beta_su ~ dnorm(0,1)
- #p_beta_ra ~ dnorm(0,1)
p_beta_obs ~ dnorm(0,1)
#Begin Model Specification
- #Random journal intercepts
- #for (i in 1:njournals) {
- # psi_int_journal[i] ~ dnorm(psi_journal_mean, psi_journal_tau)
- # p_int_journal[i] ~ dnorm(p_journal_mean, p_journal_tau)
- #}
-
#State process
for (i in 1:npapers){
+
#linear predictor
- logit(psi[i]) <- alpha #psi_int_journal[journal[i]]
+ logit(psi[i]) <- alpha
+ beta_year*year[i]
+ beta_bi*bi[i] + beta_fi*fi[i]
+ beta_ma*ma[i] + beta_hr*hr[i] + beta_ins*ins[i] + beta_pl*pl[i]
- #+ beta_biyr*bi[i]*year[i] + beta_fiyr*fi[i]*year[i] + beta_mayr*ma[i]*year[i] + beta_hryr*hr[i]*year[i]
- #+ beta_insyr*ins[i]*year[i] + beta_plyr*pl[i]*year[i]
+ beta_cm*cm[i] + beta_land*land[i]
+ beta_ab*ab[i] + beta_oc*oc[i] + beta_ri*ri[i] + beta_su*su[i]+beta_ra*ra[i]
+ beta_singletax*jsingletaxa[journal[i]]
@@ -74,20 +48,26 @@ model {
#Detection process
for (j in 1:nobs[i]){
- logit(p[i,j]) <- p_alpha #p_int_journal[journal[i]]
+
+ logit(p[i,j]) <- p_alpha
+ p_beta_obs*obs[j]
+ p_beta_bi*bi[i] + p_beta_fi*fi[i] + p_beta_pl*pl[i]
+ p_beta_ma*ma[i] + p_beta_hr*hr[i] + p_beta_ins*ins[i]
- #+ p_beta_ab*ab[i] + p_beta_oc*oc[i] + p_beta_ri*ri[i] + p_beta_su*su[i]+ p_beta_ra*ra[i]
p_eff[i,j] <- z[i]*p[i,j]
+
+ #Detection event
id[i,j] ~ dbern(p_eff[i,j])
+
+ #Calculate overall mean p
pmean_hold[cucount[i,j]] <- p[i,j]
+
}
}
#Derived parameters
pmean <- mean(pmean_hold[])
+ #Differences between taxa
bi_fi <- beta_bi - beta_fi
bi_ma <- beta_bi - beta_ma
bi_hr <- beta_bi - beta_hr
@@ -104,7 +84,7 @@ model {
hr_pl <- beta_hr - beta_pl
ins_pl <- beta_ins - beta_pl
- #For graph
+ #Means by taxa (for figure)
for (i in 1:5){
birdmean[i] <- sum(z[]*bi[]*yrindex[,i])/sum(bi[]*yrindex[,i])
fishmean[i] <- sum(z[]*fi[]*yrindex[,i])/sum(fi[]*yrindex[,i])
diff --git a/models/model_id_logistic_septax.R b/models/model_id_septaxa.R
index 070a12e..070a12e 100644
--- a/models/model_id_logistic_septax.R
+++ b/models/model_id_septaxa.R