From 15741cbb86ddb17a0192e5030d9c5dccc69d217c Mon Sep 17 00:00:00 2001
From: Ken Kellner <ken@kenkellner.com>
Date: Thu, 6 Sep 2018 09:43:57 -0400
Subject: Add part 3 of trees analysis

---
 src/buffalo-trees-part1.Rmd |   2 +
 src/buffalo-trees-part2.Rmd |   4 +-
 src/buffalo-trees-part3.Rmd | 153 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 158 insertions(+), 1 deletion(-)
 create mode 100644 src/buffalo-trees-part3.Rmd

diff --git a/src/buffalo-trees-part1.Rmd b/src/buffalo-trees-part1.Rmd
index cf8699a..3d9a724 100644
--- a/src/buffalo-trees-part1.Rmd
+++ b/src/buffalo-trees-part1.Rmd
@@ -276,3 +276,5 @@ Next I plan to more formally examine variables that affect the spatial distribut
 
 I've identified several papers that looked at how economic and demographic variables impact urban trees (for example, [this one](http://journals.sagepub.com/doi/abs/10.1068/a41236) from Tampa, FL).
 As with my analysis of the Buffalo [recycling data](https://kenkellner.com/blog/buffalo-recycle-part2.html), the American Community Survey (ACS) should be a good source for these variables.
+
+View part 2 of the analysis [here](buffalo-trees-part2.html).
diff --git a/src/buffalo-trees-part2.Rmd b/src/buffalo-trees-part2.Rmd
index 63289b5..49a8ddc 100644
--- a/src/buffalo-trees-part2.Rmd
+++ b/src/buffalo-trees-part2.Rmd
@@ -200,7 +200,7 @@ Is the homogeneous PPP model appropriate for this dataset?
 Visually, it appears that trees are non-randomly distributed.
 
 I explicitly tested my first hypothesis (that the distribution is actually inhomogeneous) using a quadrat test.
-First, I divided area of interest is divided into a grid of 5x5 evenly-sized rectangles (*quadrats*), and the number of trees in each quadrat counted:
+First, I divided area of interest into a grid of 5x5 evenly-sized rectangles (*quadrats*), and counted the number of trees in each quadrat:
 
 ```{r}
 tree_quads = quadratcount(trees_ppp)
@@ -350,3 +350,5 @@ I found evidence to suppport all three of my hypotheses.
 First, the distribution of trees in Buffalo is non-random.
 Second, the density of trees is higher inside city parks than outside.
 Finally, the density of trees increases with mean income: richer neighborhoods have more trees, matching the findings of [at least one other U.S. study](http://journals.sagepub.com/doi/abs/10.1068/a41236).
+
+View part 3 of the analysis [here](buffalo-trees-part3.html).
diff --git a/src/buffalo-trees-part3.Rmd b/src/buffalo-trees-part3.Rmd
new file mode 100644
index 0000000..d97ca5f
--- /dev/null
+++ b/src/buffalo-trees-part3.Rmd
@@ -0,0 +1,153 @@
+---
+title: Distribution of Urban Trees in Buffalo (Part 3)
+date: 2018-09-06
+output:
+  html_document:
+    css: style.css
+    highlight: pygments
+---
+
+```{r,echo=FALSE}
+options(tidyverse.quiet=TRUE)
+suppressMessages(library(ggplot2))
+suppressMessages(library(sf))
+```
+
+In my [previous post on the Buffalo tree inventory](buffalo-trees-part2.html), I found that the density of trees was higher in city parks and increased with the mean income of the surrounding area. 
+
+One of my first steps in that analysis was to remove records of stumps and vacant tree spaces from the dataset, so as to focus on living trees.
+However, doing this brings up an interesting question: does the spatial distribution of stumps and vacant spaces follow the same patterns as with living trees?
+
+## Obtain the Datasets
+
+```{r,echo=F}
+#Cache the dataset if necessary
+if (file.exists('../data/trees_raw.Rdata')){
+  load('../data/trees_raw.Rdata')
+} else {
+  dir.create('../data')
+  trees_raw = read.socrata(csv_url) %>% as_tibble()
+  save(trees_raw,file='../data/trees_raw.Rdata')
+}
+```
+
+Below I provide an abbreviated description of downloading and cleaning the required datasets.
+See [part 2](buffalo-trees-part3.html) for a more detailed description.
+First, obtain the URL of the dataset:
+
+```{r}
+library(RSocrata)
+library(tidyverse)
+buf_datasets = ls.socrata('https://data.buffalony.gov')
+ind = which(buf_datasets$title == 'Tree Inventory')
+csv_url = buf_datasets$distribution[[ind]]$downloadURL[1]
+```
+
+```{r, eval=F}
+trees_raw = read.socrata(csv_url) %>% as_tibble()
+```
+
+Download the tree inventory data, filter out all living trees, and convert it to a simple features object with the correct projection:
+
+```{r}
+library(sf)
+nontrees_proj = trees_raw %>%
+  filter(Common.Name %in% c('STUMP', 'VACANT')) %>%
+  st_as_sf(coords = c("Longitude", "Latitude"),
+          crs = "+proj=longlat +datum=WGS84") %>%
+  st_transform(crs="+proj=utm +zone=17 +datum=WGS84") %>%
+  mutate(X = st_coordinates(.)[,1],
+         Y = st_coordinates(.)[,2]) %>%
+  select(X,Y,geometry)
+```
+
+Convert the simple features object to a `ppp` object for use with library `spatstat`:
+
+```{r,message=F,warning=F}
+library(spatstat)
+nontrees_ppp = ppp(x = nontrees_proj$X, y = nontrees_proj$Y, 
+              range(nontrees_proj$X), range(nontrees_proj$Y))
+plot(nontrees_ppp)
+```
+
+```{r, echo=F}
+get_shapefile <- function(url){
+  dest_dir = tempdir()
+  dest <- tempfile(tmpdir=dest_dir)
+  download.file(url, dest, quiet=T)
+  unzip(dest, exdir = dest_dir)
+  shape_name = grep('.shp',list.files(dest_dir),value=T)
+  setwd(dest_dir)
+  sf::st_read(shape_name,quiet=TRUE) 
+}
+
+sf_to_im = function(sf_object, background=0, field=NULL){
+  r = fasterize::raster(sf_object, res=5)
+  r = fasterize::fasterize(sf_object, r, background=background, field=field)
+  maptools::as.im.RasterLayer(r)
+}
+```
+
+Download the Buffalo city parks shapefile and convert it to the special raster form required by `spatstat` (see the previous post for the `get_shapefile()` and `sf_to_im()` functions):
+
+```{r}
+ind = which(buf_datasets$title == 'Parks')
+shape_url = buf_datasets$distribution[26][[1]]$downloadURL[3]
+park_raster = get_shapefile(shape_url) %>%
+  st_transform(crs="+proj=utm +zone=17 +datum=WGS84") %>%
+  sf_to_im()
+```
+
+Do the same for the income data from ACS:
+
+```{r,warning=F,results='hide',message=F}
+library(tidycensus)
+income_data = get_acs(geography = 'tract', variables = c(income='B07011_001'),
+                   state='NY',county='Erie',geometry='TRUE') %>%
+  st_transform(crs="+proj=utm +zone=17 +datum=WGS84") %>%
+  st_crop(st_bbox(nontrees_proj)) %>%
+  mutate(estimateZ = scale(estimate)) %>%
+  st_cast() 
+income_raster = income_data %>%
+  sf_to_im(.,NA,"estimateZ")
+```
+
+## Conduct the Analysis
+
+Fit the same Poisson point process model as in part 2, with location relative to parks and mean income as covariates:
+
+```{r,message=F,warning=F}
+mod_full = ppm(nontrees_ppp, ~park + income, 
+               covariates=list(park=park_raster, income=income_raster))
+mod_full
+```
+
+As with the living trees dataset, both parks and income have a significant effect on the Poisson point process rate.
+However, in this case, the direction of the effect for both is opposite: stumps and vacant tree spaces are *less* frequent inside parks, and their density declines as mean area income increases!
+
+This can also be visualized graphically:
+
+```{r}
+#Generare predicted values and SEs
+inc_vals = seq(min(income_data$estimate,na.rm=T),max(income_data$estimate,na.rm=T),10)
+sc_vals = (inc_vals - mean(income_data$estimate,na.rm=T))/sd(income_data$estimate,na.rm=T)
+
+X = rbind(cbind(1,1,sc_vals),cbind(1,0,sc_vals))
+loglam = X %*% mod_full$coef
+se = sqrt(diag(X %*% vcov(mod_full) %*% t(X)))
+figdata = data.frame(`Park Status` = rep(c('Inside','Outside'),each=length(inc_vals)),
+                     inc=inc_vals, lam=exp(loglam), upper=exp(loglam+1.96*se),
+                     lower=exp(loglam-1.96*se),check.names=F)
+
+#Plot predicted values and 95% confidence envelopes
+ggplot(data=figdata) +
+  geom_ribbon(aes(x=inc,ymin=lower,ymax=upper,group=`Park Status`), alpha=0.1) +
+  geom_line(aes(x=inc,y=lam, color=`Park Status`)) +
+  xlab('Mean income ($)') + ylab(expression(paste('Predicted rate (',lambda,')',sep='')))
+```
+
+## Conclusion
+
+While living trees are associated with parks and higher-income areas, stumps and vacant tree spaces are more frequent outside parks and in lower-income areas.
+One possible explanation is that in lower-income areas, there is less motivation or fewer resources available to replace trees that die, resulting in an increasing disparity in living trees.
+It would be interesting to run a similar analysis on a historical tree inventory in Buffalo (if it exists), and see if some of these lower-income areas have seen a decline in living tree density over time.
-- 
cgit v1.2.3