Spatial Regression

Loading libraries and data for this session

rm(list=ls())
setwd("D:/_Lab_Data/R/R_Labs")
library(rgdal);library (spdep)
data(columbus)

NY8 <- readOGR("Spatial.R", "NY8_utm18")
TCE <- readOGR("Spatial.R", "TCE")
cities <- readOGR("Spatial.R", "NY8cities")
NY_nb <- read.gal("Spatial.R/NY_nb.gal", region.id=row.names(NY8))

1. Modeling single variable and Checking the regression residuals

col.listw <- nb2listw(col.gal.nb)
columbus.lm0<- lm(CRIME ~ 1, data=columbus)
summary(columbus.lm0)
## 
## Call:
## lm(formula = CRIME ~ 1, data = columbus)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.951 -15.080  -1.128  13.457  33.763 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    35.13       2.39    14.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.73 on 48 degrees of freedom
col.moran0 <-  lm.morantest(columbus.lm0,col.listw); col.moran0
## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = CRIME ~ 1, data = columbus)
## weights: col.listw
## 
## Moran I statistic standard deviate = 5.3818, p-value = 3.687e-08
## alternative hypothesis: greater
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##        0.485770914       -0.020833333        0.008860962
col.e <- resid(columbus.lm0)
col.moran_0 <- moran.test(col.e,col.listw, randomisation=FALSE); col.moran_0
## 
##  Moran's I test under normality
## 
## data:  col.e  
## weights: col.listw  
## 
## Moran I statistic standard deviate = 5.3818, p-value = 3.687e-08
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.485770914      -0.020833333       0.008860962

2. OLS Regression and Checking spatial autocorrealtion of the regression residuals

columbus.lm<- lm(CRIME ~ INC + HOVAL, data=columbus)
summary(columbus.lm)
## 
## Call:
## lm(formula = CRIME ~ INC + HOVAL, data = columbus)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -34.418  -6.388  -1.580   9.052  28.649 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  68.6190     4.7355  14.490  < 2e-16 ***
## INC          -1.5973     0.3341  -4.780 1.83e-05 ***
## HOVAL        -0.2739     0.1032  -2.654   0.0109 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.43 on 46 degrees of freedom
## Multiple R-squared:  0.5524, Adjusted R-squared:  0.5329 
## F-statistic: 28.39 on 2 and 46 DF,  p-value: 9.341e-09
col.listw <- nb2listw(col.gal.nb)
2.1 What NOT to do!
col.e <- resid(columbus.lm)
col.morane <- moran.test(col.e,col.listw, randomisation=FALSE,alternative="two.sided"); col.morane 
## 
##  Moran's I test under normality
## 
## data:  col.e  
## weights: col.listw  
## 
## Moran I statistic standard deviate = 2.4774, p-value = 0.01323
## alternative hypothesis: two.sided
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.212374153      -0.020833333       0.008860962
2.2 Correct Procedure
Global Moran’s I for LM regression residuals
col.moran <- lm.morantest(columbus.lm, col.listw); col.moran
## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = CRIME ~ INC + HOVAL, data = columbus)
## weights: col.listw
## 
## Moran I statistic standard deviate = 2.681, p-value = 0.00367
## alternative hypothesis: greater
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##        0.212374153       -0.033268284        0.008394853
col.moran2 <-  lm.morantest(columbus.lm,col.listw,alternative="two.sided"); col.moran2
## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = CRIME ~ INC + HOVAL, data = columbus)
## weights: col.listw
## 
## Moran I statistic standard deviate = 2.681, p-value = 0.00734
## alternative hypothesis: two.sided
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##        0.212374153       -0.033268284        0.008394853
2.3 Lagrange Multiplier Test Statistics for Spatial Autocorrelation
columbus.lagrange <- lm.LMtests(columbus.lm,col.listw,test=c("LMerr","RLMerr","LMlag","RLMlag","SARMA"))
columbus.lagrange
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = CRIME ~ INC + HOVAL, data = columbus)
## weights: col.listw
## 
## LMerr = 4.6111, df = 1, p-value = 0.03177
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = CRIME ~ INC + HOVAL, data = columbus)
## weights: col.listw
## 
## RLMerr = 0.0335, df = 1, p-value = 0.8547
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = CRIME ~ INC + HOVAL, data = columbus)
## weights: col.listw
## 
## LMlag = 7.8557, df = 1, p-value = 0.005066
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = CRIME ~ INC + HOVAL, data = columbus)
## weights: col.listw
## 
## RLMlag = 3.2781, df = 1, p-value = 0.07021
## 
## 
##  Lagrange multiplier diagnostics for spatial dependence
## 
## data:  
## model: lm(formula = CRIME ~ INC + HOVAL, data = columbus)
## weights: col.listw
## 
## SARMA = 7.8892, df = 2, p-value = 0.01936

3. Spatial Lag Model (SLM)

3.1 Maximum Likelihood Estimation
# Maximum Likelihood Estimation of the Spatial Lag Model
columbus.lag <- lagsarlm(CRIME ~ INC + HOVAL,data=columbus, col.listw); summary(columbus.lag)
## 
## Call:
## lagsarlm(formula = CRIME ~ INC + HOVAL, data = columbus, listw = col.listw)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -37.4497093  -5.4565567   0.0016387   6.7159553  24.7107978 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 46.851431   7.314754  6.4051 1.503e-10
## INC         -1.073533   0.310872 -3.4533 0.0005538
## HOVAL       -0.269997   0.090128 -2.9957 0.0027381
## 
## Rho: 0.40389, LR test value: 8.4179, p-value: 0.0037154
## Asymptotic standard error: 0.12071
##     z-value: 3.3459, p-value: 0.00082027
## Wald statistic: 11.195, p-value: 0.00082027
## 
## Log likelihood: -183.1683 for lag model
## ML residual variance (sigma squared): 99.164, (sigma: 9.9581)
## Number of observations: 49 
## Number of parameters estimated: 5 
## AIC: 376.34, (AIC for lm: 382.75)
## LM test for residual autocorrelation
## test value: 0.19184, p-value: 0.66139
3.2 What Not To Do: OLS estimation
attach(columbus)
lagCRIME <- lag.listw(col.listw,CRIME)
wrong.lag <- lm(CRIME ~ lagCRIME + INC + HOVAL); summary(wrong.lag)
## 
## Call:
## lm(formula = CRIME ~ lagCRIME + INC + HOVAL)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.393  -6.541  -0.190   6.799  23.485 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 40.07773    9.43653   4.247 0.000107 ***
## lagCRIME     0.52957    0.15612   3.392 0.001454 ** 
## INC         -0.91054    0.36314  -2.507 0.015840 *  
## HOVAL       -0.26877    0.09312  -2.886 0.005970 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.32 on 45 degrees of freedom
## Multiple R-squared:  0.6436, Adjusted R-squared:  0.6198 
## F-statistic: 27.08 on 3 and 45 DF,  p-value: 3.672e-10
detach(columbus)

4. Spatial Error Model (SEM)

# Maximum Likelihood Estimation of the Spatial Erroe Model
columbus.err <- errorsarlm(CRIME ~ INC + HOVAL,data=columbus,col.listw); summary(columbus.err)
## 
## Call:
## errorsarlm(formula = CRIME ~ INC + HOVAL, data = columbus, listw = col.listw)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -34.45950  -6.21730  -0.69775   7.65256  24.23631 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 61.053618   5.314875 11.4873 < 2.2e-16
## INC         -0.995473   0.337025 -2.9537 0.0031398
## HOVAL       -0.307979   0.092584 -3.3265 0.0008794
## 
## Lambda: 0.52089, LR test value: 6.4441, p-value: 0.011132
## Asymptotic standard error: 0.14129
##     z-value: 3.6868, p-value: 0.00022713
## Wald statistic: 13.592, p-value: 0.00022713
## 
## Log likelihood: -184.1552 for error model
## ML residual variance (sigma squared): 99.98, (sigma: 9.999)
## Number of observations: 49 
## Number of parameters estimated: 5 
## AIC: 378.31, (AIC for lm: 382.75)

5. Spatial Durbin Model (SDM)

columbus.durbin <- lagsarlm(CRIME ~ INC+HOVAL,data=columbus, col.listw, type="mixed"); summary(columbus.durbin)
## 
## Call:
## lagsarlm(formula = CRIME ~ INC + HOVAL, data = columbus, listw = col.listw, 
##     type = "mixed")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -37.15904  -6.62594  -0.39823   6.57561  23.62757 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 45.592893  13.128679  3.4728 0.0005151
## INC         -0.939088   0.338229 -2.7765 0.0054950
## HOVAL       -0.299605   0.090843 -3.2980 0.0009736
## lag.INC     -0.618375   0.577052 -1.0716 0.2838954
## lag.HOVAL    0.266615   0.183971  1.4492 0.1472760
## 
## Rho: 0.38251, LR test value: 4.1648, p-value: 0.041272
## Asymptotic standard error: 0.16237
##     z-value: 2.3557, p-value: 0.018488
## Wald statistic: 5.5493, p-value: 0.018488
## 
## Log likelihood: -182.0161 for mixed model
## ML residual variance (sigma squared): 95.051, (sigma: 9.7494)
## Number of observations: 49 
## Number of parameters estimated: 7 
## AIC: 378.03, (AIC for lm: 380.2)
## LM test for residual autocorrelation
## test value: 0.101, p-value: 0.75063

6. Simultaneous Autoregressive Model (SAR)

6.1 OLS regression
nylm <- lm(Z~PEXPOSURE+PCTAGE65P+PCTOWNHOME, data=NY8)
summary(nylm)
## 
## Call:
## lm(formula = Z ~ PEXPOSURE + PCTAGE65P + PCTOWNHOME, data = NY8)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7417 -0.3957 -0.0326  0.3353  4.1398 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.51728    0.15856  -3.262  0.00124 ** 
## PEXPOSURE    0.04884    0.03506   1.393  0.16480    
## PCTAGE65P    3.95089    0.60550   6.525 3.22e-10 ***
## PCTOWNHOME  -0.56004    0.17031  -3.288  0.00114 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6571 on 277 degrees of freedom
## Multiple R-squared:  0.1932, Adjusted R-squared:  0.1844 
## F-statistic:  22.1 on 3 and 277 DF,  p-value: 7.306e-13
NY8$lmresid <- residuals(nylm)
lm.palette <- colorRampPalette(c("white","orange", "red"), space = "rgb")
spplot(NY8, zcol="lmresid", col.regions=lm.palette(20), main="Resid")

6.2a SAR regression: Model summary
NYlistw<-nb2listw(NY_nb, style = "B")
lm.morantest(nylm, NYlistw)
## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = Z ~ PEXPOSURE + PCTAGE65P + PCTOWNHOME, data =
## NY8)
## weights: NYlistw
## 
## Moran I statistic standard deviate = 2.638, p-value = 0.004169
## alternative hypothesis: greater
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##        0.083090278       -0.009891282        0.001242320
# SAR
nysar<-spautolm(Z~PEXPOSURE+PCTAGE65P+PCTOWNHOME, data=NY8, listw=NYlistw)
summary(nysar)
## 
## Call: 
## spautolm(formula = Z ~ PEXPOSURE + PCTAGE65P + PCTOWNHOME, data = NY8, 
##     listw = NYlistw)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.56754 -0.38239 -0.02643  0.33109  4.01219 
## 
## Coefficients: 
##              Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -0.618193   0.176784 -3.4969 0.0004707
## PEXPOSURE    0.071014   0.042051  1.6888 0.0912635
## PCTAGE65P    3.754200   0.624722  6.0094 1.862e-09
## PCTOWNHOME  -0.419890   0.191329 -2.1946 0.0281930
## 
## Lambda: 0.040487 LR test value: 5.2438 p-value: 0.022026 
## Numerical Hessian standard error of lambda: 0.017199 
## 
## Log likelihood: -276.1069 
## ML residual variance (sigma squared): 0.41388, (sigma: 0.64333)
## Number of observations: 281 
## Number of parameters estimated: 6 
## AIC: 564.21
6.2b SAR regression: Mapping results
NY8$sar_trend <- nysar$fit$signal_trend
NY8$sar_stochastic <- nysar$fit$signal_stochastic
lm.palette <- colorRampPalette(c("white","orange", "red"), space = "rgb")
spplot(NY8, zcol="sar_trend", col.regions=lm.palette(20), main="sar_Trend")

spplot(NY8, zcol="sar_stochastic", col.regions=lm.palette(20), main="sar_Stochastic")

6.3 SLM, SEM and SDM
# SLM
NYlistwW <- nb2listw(NY_nb, style = "W")
nylag <- lagsarlm(Z~PEXPOSURE+PCTAGE65P+PCTOWNHOME, data=NY8, listw=NYlistwW); summary(nylag)
## 
## Call:
## lagsarlm(formula = Z ~ PEXPOSURE + PCTAGE65P + PCTOWNHOME, data = NY8, 
##     listw = NYlistwW)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.626029 -0.393321 -0.018767  0.326616  4.058315 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.505343   0.155850 -3.2425 0.001185
## PEXPOSURE    0.045543   0.034433  1.3227 0.185943
## PCTAGE65P    3.650055   0.599219  6.0914 1.12e-09
## PCTOWNHOME  -0.411829   0.169095 -2.4355 0.014872
## 
## Rho: 0.22518, LR test value: 7.7503, p-value: 0.0053703
## Asymptotic standard error: 0.079538
##     z-value: 2.8312, p-value: 0.0046378
## Wald statistic: 8.0155, p-value: 0.0046378
## 
## Log likelihood: -274.8536 for lag model
## ML residual variance (sigma squared): 0.40998, (sigma: 0.64029)
## Number of observations: 281 
## Number of parameters estimated: 6 
## AIC: 561.71, (AIC for lm: 567.46)
## LM test for residual autocorrelation
## test value: 0.6627, p-value: 0.41561
# SEM
nyerr <- errorsarlm(Z~PEXPOSURE+PCTAGE65P+PCTOWNHOME, data=NY8, listw=NYlistwW);summary(nyerr)
## 
## Call:errorsarlm(formula = Z ~ PEXPOSURE + PCTAGE65P + PCTOWNHOME, 
##     data = NY8, listw = NYlistwW)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.628589 -0.384745 -0.030234  0.324747  4.047906 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##             Estimate Std. Error z value  Pr(>|z|)
## (Intercept) -0.58662    0.17471 -3.3577  0.000786
## PEXPOSURE    0.05933    0.04226  1.4039  0.160335
## PCTAGE65P    3.83746    0.62345  6.1552 7.496e-10
## PCTOWNHOME  -0.44428    0.18897 -2.3510  0.018721
## 
## Lambda: 0.21693, LR test value: 5.4248, p-value: 0.019853
## Asymptotic standard error: 0.085044
##     z-value: 2.5507, p-value: 0.010749
## Wald statistic: 6.5063, p-value: 0.010749
## 
## Log likelihood: -276.0164 for error model
## ML residual variance (sigma squared): 0.41369, (sigma: 0.64319)
## Number of observations: 281 
## Number of parameters estimated: 6 
## AIC: 564.03, (AIC for lm: 567.46)
# SDM
nymix <- lagsarlm(Z~PEXPOSURE+PCTAGE65P+PCTOWNHOME, data=NY8, listw=NYlistwW, type="mixed");summary(nymix)
## 
## Call:
## lagsarlm(formula = Z ~ PEXPOSURE + PCTAGE65P + PCTOWNHOME, data = NY8, 
##     listw = NYlistwW, type = "mixed")
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -1.632286 -0.400142  0.011403  0.325858  4.056743 
## 
## Type: mixed 
## Coefficients: (asymptotic standard errors) 
##                 Estimate Std. Error z value  Pr(>|z|)
## (Intercept)    -0.322597   0.235599 -1.3693   0.17092
## PEXPOSURE       0.090394   0.116765  0.7742   0.43884
## PCTAGE65P       3.613563   0.657768  5.4937 3.937e-08
## PCTOWNHOME     -0.026866   0.252887 -0.1062   0.91539
## lag.PEXPOSURE  -0.051880   0.127429 -0.4071   0.68391
## lag.PCTAGE65P   0.131232   1.208395  0.1086   0.91352
## lag.PCTOWNHOME -0.699499   0.334331 -2.0922   0.03642
## 
## Rho: 0.17578, LR test value: 3.6967, p-value: 0.054521
## Asymptotic standard error: 0.086624
##     z-value: 2.0293, p-value: 0.042431
## Wald statistic: 4.1179, p-value: 0.042431
## 
## Log likelihood: -272.6698 for mixed model
## ML residual variance (sigma squared): 0.40527, (sigma: 0.63661)
## Number of observations: 281 
## Number of parameters estimated: 9 
## AIC: 563.34, (AIC for lm: 565.04)
## LM test for residual autocorrelation
## test value: 1.0337, p-value: 0.30929
6.4 Model Comparisons
anova(nymix, nylag)
##       Model df    AIC  logLik Test L.Ratio p-value
## nymix     1  9 563.34 -272.67    1                
## nylag     2  6 561.71 -274.85    2  4.3678 0.22439
6.5 Model Selection
NYlistwW <- nb2listw(NY_nb, style = "W")
res <- lm.LMtests(nylm, listw=NYlistwW, test="all")
summary(res)
##  Lagrange multiplier diagnostics for spatial dependence
## data:  
## model: lm(formula = Z ~ PEXPOSURE + PCTAGE65P + PCTOWNHOME, data =
## NY8)
## weights: NYlistwW
##  
##        statistic parameter  p.value   
## LMerr     5.1674         1 0.023015 * 
## LMlag     8.5430         1 0.003468 **
## RLMerr    1.6789         1 0.195068   
## RLMlag    5.0546         1 0.024561 * 
## SARMA    10.2220         2 0.006030 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1