Spatial Spillover

Loading libraries and data for this session

rm(list=ls())
setwd("D:/_Lab_Data/R/R_Labs")
library(rgdal)
library (spdep)  

# Load Shapefiles
TWN.TB <- readOGR(dsn = "shapefiles/TB", layer = "Taiwan_TB", encoding="big5")

head(TWN.TB@data)

##   FID_1 COUN_CODE COUNTY   TOWN     FULLNAME Count_   ABOR_P   INCOME
## 0     0   1000101 台北縣 板橋市 台北縣板橋市      1 0.004595 189.1450
## 1     1   1000102 台北縣 三重市 台北縣三重市      1 0.003200 162.9353
## 2     2   1000103 台北縣 中和市 台北縣中和市      1 0.003837 204.5399
## 3     3   1000104 台北縣 永和市 台北縣永和市      1 0.002428 256.6338
## 4     4   1000105 台北縣 新莊市 台北縣新莊市      1 0.009076 184.2582
## 5     5   1000106 台北縣 新店市 台北縣新店市      1 0.009339 255.8700
##   TBINCI3 TBINCI6  TBINCI
## 0  0.0019  0.0019 0.00378
## 1  0.0020  0.0018 0.00378
## 2  0.0020  0.0019 0.00385
## 3  0.0015  0.0015 0.00303
## 4  0.0016  0.0015 0.00318
## 5  0.0020  0.0019 0.00389

lm.palette <- colorRampPalette(c("white","orange", "red"), space = "rgb")
spplot(TWN.TB, zcol="TBINCI", col.regions=lm.palette(20), main="TB Incidence")

1. Constructing the Neighboring Matrix

# Neighbors: Construct neighbors list
TWN_nbq<-poly2nb(TWN.TB) #QUEEN = TRUE
summary(TWN_nbq)

## Neighbour list object:
## Number of regions: 352 
## Number of nonzero links: 1924 
## Percentage nonzero weights: 1.552815 
## Average number of links: 5.465909 
## 3 regions with no links:
## 262 284 289
## Link number distribution:
## 
##  0  1  2  3  4  5  6  7  8  9 10 11 
##  3  1  7 28 46 93 87 55 23  6  2  1 
## 1 least connected region:
## 350 with 1 link
## 1 most connected region:
## 339 with 11 links

# Neighborhood Matrix
TWN_nbq_w.mat <-nb2mat(TWN_nbq, style="W", zero.policy=T) # row-standardized
TWN_nbq_w2.mat <-nb2mat(TWN_nbq, style="B", zero.policy=T) # binary

# Row-standardized weights matrix (list)
TWN_nbq_w<- nb2listw(TWN_nbq, zero.policy=T)
# Binary matrix (list)
TWN_nbq_wb2<-nb2listw(TWN_nbq, style="B", zero.policy=T)

2. Constructing the Spatial-Lag Variable

# TB Lag
TBINCI<-TWN.TB@data$TBINCI
TWN.TB$lagTBINCI <- lag.listw(TWN_nbq_w,TBINCI, zero.policy=T)
spplot(TWN.TB, zcol="lagTBINCI", col.regions=lm.palette(20), main="TB Incidence (Lag)")

3. Moran’s I Test for TB Incidence

# Moran's I
M.TB<-moran.test(TBINCI, listw=TWN_nbq_w, zero.policy=T); M.TB

## 
##  Moran's I test under randomisation
## 
## data:  TBINCI  
## weights: TWN_nbq_w  
## 
## Moran I statistic standard deviate = 13.7215, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.439505084      -0.002873563       0.001039412

M<-M.TB$estimate[1]

# Monte-Carlo simulation of Moran's I
set.seed(123456)
bperm<-moran.mc(TBINCI,listw=TWN_nbq_w,nsim=999, zero.policy=T)
hist(bperm$res, freq=TRUE, breaks=20, xlab="Simulated Moran's I")
abline(v=M, col="red")

# Spatial correlogram
cor.TB<-sp.correlogram(TWN_nbq,TBINCI, order=8, method="I", style="W",zero.policy=T )
print(cor.TB); plot(cor.TB)

## Spatial correlogram for TBINCI 
## method: Moran's I
##            estimate expectation    variance standard deviate
## 1 (349)  0.43950508 -0.00287356  0.00103941          13.7215
## 2 (349)  0.25264153 -0.00287356  0.00050848          11.3313
## 3 (349)  0.11135284 -0.00287356  0.00033556           6.2356
## 4 (349)  0.02069682 -0.00287356  0.00024832           1.4958
## 5 (349) -0.01492643 -0.00287356  0.00019733          -0.8580
## 6 (349) -0.01331651 -0.00287356  0.00016553          -0.8117
## 7 (349) -0.01853400 -0.00287356  0.00014411          -1.3045
## 8 (349) -0.01876019 -0.00287356  0.00013317          -1.3767
##         Pr(I) two sided    
## 1 (349)         < 2e-16 ***
## 2 (349)         < 2e-16 ***
## 3 (349)         4.5e-10 ***
## 4 (349)          0.1347    
## 5 (349)          0.3909    
## 6 (349)          0.4170    
## 7 (349)          0.1920    
## 8 (349)          0.1686    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4. OLS Model

# OLS Regression
TBData<-TWN.TB@data
TBINCI<-TWN.TB@data$TBINCI # TB發生率
ABOR_P<-TWN.TB@data$ABOR_P # 原住民比例
INCOME<-TWN.TB@data$INCOME # 平均家戶收入  

TB.lm<- lm(TBINCI ~ ABOR_P + INCOME, data=TBData); summary(TB.lm)

## 
## Call:
## lm(formula = TBINCI ~ ABOR_P + INCOME, data = TBData)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0065203 -0.0009545 -0.0000505  0.0009511  0.0155064 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.852e-03  3.310e-04  14.657  < 2e-16 ***
## ABOR_P       1.273e-02  7.155e-04  17.797  < 2e-16 ***
## INCOME      -6.510e-06  2.135e-06  -3.049  0.00247 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002424 on 349 degrees of freedom
## Multiple R-squared:  0.5427, Adjusted R-squared:  0.5401 
## F-statistic: 207.1 on 2 and 349 DF,  p-value: < 2.2e-16

5. Global Moran’s I for LM regression residuals

TB.moran0 <-lm.morantest(TB.lm, TWN_nbq_w, zero.policy=T); TB.moran0

## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = TBINCI ~ ABOR_P + INCOME, data = TBData)
## weights: TWN_nbq_w
## 
## Moran I statistic standard deviate = 8.808, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##        0.281340487       -0.006537501        0.001068223

6. Lagrange Multiplier Test Statistics for Spatial Autocorrelation

TB.lagrange <- lm.LMtests(TB.lm,TWN_nbq_w,test=c("LMerr","RLMerr","LMlag","RLMlag","SARMA"), zero.policy=T)
TB.lagrange

## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = TBINCI ~ ABOR_P + INCOME, data = TBData)
## weights: TWN_nbq_w
## 
## LMerr = 73.2094, df = 1, p-value < 2.2e-16
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = TBINCI ~ ABOR_P + INCOME, data = TBData)
## weights: TWN_nbq_w
## 
## RLMerr = 29.8628, df = 1, p-value = 4.637e-08
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = TBINCI ~ ABOR_P + INCOME, data = TBData)
## weights: TWN_nbq_w
## 
## LMlag = 43.3837, df = 1, p-value = 4.499e-11
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = TBINCI ~ ABOR_P + INCOME, data = TBData)
## weights: TWN_nbq_w
## 
## RLMlag = 0.0371, df = 1, p-value = 0.8472
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = TBINCI ~ ABOR_P + INCOME, data = TBData)
## weights: TWN_nbq_w
## 
## SARMA = 73.2466, df = 2, p-value < 2.2e-16

7. MLE of the Spatial Lag Model

TB.lag <- lagsarlm(TBINCI ~ ABOR_P + INCOME, data=TBData, TWN_nbq_w, zero.policy=T)
summary(TB.lag)

## 
## Call:
## lagsarlm(formula = TBINCI ~ ABOR_P + INCOME, data = TBData, listw = TWN_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -8.0582e-03 -7.2283e-04 -6.3813e-06  8.4425e-04  1.4849e-02 
## 
## Type: lag 
## Regions with no neighbours included:
##  262 284 289 
## Coefficients: (numerical Hessian approximate standard errors) 
##                Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)  2.8197e-03  4.3929e-04  6.4188 1.374e-10
## ABOR_P       1.0405e-02  7.5813e-04 13.7247 < 2.2e-16
## INCOME      -3.1862e-06  2.0478e-06 -1.5559    0.1197
## 
## Rho: 0.34805, LR test value: 37.499, p-value: 9.1474e-10
## Approximate (numerical Hessian) standard error: 0.053804
##     z-value: 6.4689, p-value: 9.8691e-11
## Wald statistic: 41.847, p-value: 9.8691e-11
## 
## Log likelihood: 1640.718 for lag model
## ML residual variance (sigma squared): 5.1089e-06, (sigma: 0.0022603)
## Number of observations: 352 
## Number of parameters estimated: 5 
## AIC: -3271.4, (AIC for lm: -3235.9)

TB.lag2 <- lagsarlm(TBINCI ~ ABOR_P, data=TBData, TWN_nbq_w, zero.policy=T)
summary(TB.lag2)

## 
## Call:
## lagsarlm(formula = TBINCI ~ ABOR_P, data = TBData, listw = TWN_nbq_w, 
##     zero.policy = T)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -8.3573e-03 -7.3359e-04  2.5877e-05  8.7969e-04  1.4728e-02 
## 
## Type: lag 
## Regions with no neighbours included:
##  262 284 289 
## Coefficients: (numerical Hessian approximate standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 0.0022801  0.0002673  8.5299 < 2.2e-16
## ABOR_P      0.0106192  0.0007466 14.2235 < 2.2e-16
## 
## Rho: 0.36831, LR test value: 44.353, p-value: 2.7419e-11
## Approximate (numerical Hessian) standard error: 0.052004
##     z-value: 7.0824, p-value: 1.4166e-12
## Wald statistic: 50.161, p-value: 1.4166e-12
## 
## Log likelihood: 1639.517 for lag model
## ML residual variance (sigma squared): 5.1277e-06, (sigma: 0.0022644)
## Number of observations: 352 
## Number of parameters estimated: 4 
## AIC: -3271, (AIC for lm: -3228.7)

data.frame(coef(TB.lag2))

##             coef.TB.lag2.
## rho           0.368314883
## (Intercept)   0.002280056
## ABOR_P        0.010619232

rho<-coef(TB.lag2)[1];  rho

##       rho 
## 0.3683149

beta<-coef(TB.lag2)[3]; beta

##     ABOR_P 
## 0.01061923

8. Modeling Spatial Equilibrium Effect

# Create vector to store the estimate for each townships
ee.est <- rep(NA,dim(TBData)[1])
# Assign the town ID labels
names(ee.est) <- TBData$COUN_CODE

# Create a null vector to use in loop
svec <- rep(0,dim(TBData)[1])
# Create a N x N identity matrix
eye <- matrix(0,nrow=dim(TBData)[1],ncol=dim(TBData)[1])
diag(eye) <- 1

# Loop over 1:n towns and store effect of change in each town i in ee.est[i]
for(i in 1:length(ee.est)){
  cvec <- svec
  cvec[i] <- 1
  res <- solve(eye - rho * TWN_nbq_w.mat) %*% cvec * beta
  ee.est[i] <- res[i]
}

9. Mapping Spillover Effects

TWN.TB$ee.est <-ee.est
spplot(TWN.TB, zcol="ee.est", col.regions=lm.palette(20), main="TB Spillover Effects")

# example of impact on other townships (observation No.301)
cvec <- rep(0,dim(TBData)[1])
cvec[301] <- 1 # 花蓮縣秀林鄉

# Store estimates for impact of change in one town in rus.est
eye <- matrix(0,nrow=dim(TBData)[1],ncol=dim(TBData)[1])
diag(eye) <- 1
rus.est <- solve(eye - rho * TWN_nbq_w.mat) %*% cvec * beta
# Find ten highest values of rus.est vector
rus.est <- round(rus.est,5)
rus.est <- data.frame(TBData$FULLNAME,rus.est)
rus.est[rev(order(rus.est$rus.est)),][1:10,]

##     TBData.FULLNAME rus.est
## 301    花蓮縣秀林鄉 0.01106
## 294    花蓮縣新城鄉 0.00238
## 291    花蓮縣花蓮市 0.00186
## 295    花蓮縣吉安鄉 0.00171
## 296    花蓮縣壽豐鄉 0.00100
## 41     宜蘭縣南澳鄉 0.00087
## 145    南投縣仁愛鄉 0.00065
## 302    花蓮縣萬榮鄉 0.00060
## 106    台中縣和平鄉 0.00047
## 292    花蓮縣鳳林鎮 0.00016

TWN.TB$rus.est <-rus.est[,2]
spplot(TWN.TB, zcol="rus.est", col.regions=lm.palette(20), main="The Impacts on Other Towns")