Geographically-Weighted Regression (GWR)

Loading libraries and data for this session

rm(list=ls())
setwd("D:/_Lab_Data/R/R_Labs")
library(rgdal)
library (spdep); library (GWmodel) 

# Load Shapefiles
TWN <- readOGR(dsn = "shapefiles/TB", layer = "Taiwan_TB", encoding="big5")

1. Geographically-Weighted (GW) summary Statistics

Mean, Variance and Correlation

GW Summary
GWS<-gwss(TWN, vars = c("ABOR_P", "INCOME"),adaptive = TRUE, bw = 80)
head(data.frame(GWS$SDF))
##    ABOR_P_LM INCOME_LM ABOR_P_LSD INCOME_LSD ABOR_P_LVar INCOME_LVar
## 0 0.02078067  216.0930 0.06653543   88.67297 0.004426964    7862.895
## 1 0.01815741  213.4552 0.05830183   89.26415 0.003399104    7968.088
## 2 0.02241592  212.7975 0.07399609   89.98895 0.005475421    8098.011
## 3 0.02123055  210.9926 0.07100990   90.31073 0.005042406    8156.028
## 4 0.02021850  214.6373 0.06451800   87.24095 0.004162573    7610.983
## 5 0.02875191  207.2829 0.09547418   91.20165 0.009115318    8317.742
##   ABOR_P_LSKe INCOME_LSKe ABOR_P_LCV INCOME_LCV Cov_ABOR_P.INCOME
## 0    7.009662    1.416691   3.201795  0.4103463         -1.470084
## 1    7.885902    1.341086   3.210911  0.4181868         -1.120811
## 2    6.513068    1.362236   3.301051  0.4228854         -1.658100
## 3    6.800056    1.330823   3.344704  0.4280280         -1.494263
## 4    7.349027    1.454217   3.191038  0.4064576         -1.372312
## 5    5.371143    1.355036   3.320621  0.4399865         -2.419674
##   Corr_ABOR_P.INCOME Spearman_rho_ABOR_P.INCOME
## 0         -0.2447488                -0.14132799
## 1         -0.2117194                -0.01301468
## 2         -0.2448378                -0.08993434
## 3         -0.2292124                -0.03089111
## 4         -0.2395906                -0.12819225
## 5         -0.2734496                -0.11394820
GW Summary: Monte Carlo (randomization) tests
GWS.Sim<-montecarlo.gwss(TWN, vars = c("ABOR_P", "INCOME"),adaptive = TRUE, bw = 80)
Mapping GW Mean
lm.palette <- colorRampPalette(c("white","orange", "red"), space = "rgb")
spplot(TWN, zcol="INCOME", col.regions=lm.palette(20), main="Income")

spplot(GWS$SDF, zcol="INCOME_LM", col.regions=lm.palette(20), main="GW Income")

TWN$GW.income.p<-GWS.Sim[,2]
lm.palette <- colorRampPalette(c("red","orange", "white"), space = "rgb")
spplot(TWN, zcol="GW.income.p", col.regions=lm.palette(20), main="p-value for GW Income")

Mapping ABOR_P
spplot(TWN, zcol="ABOR_P", col.regions=lm.palette(20), main="ABOR_P")

spplot(GWS$SDF, zcol="ABOR_P_LM", col.regions=lm.palette(20), main="GW ABOR_P")

TWN$GW.ABOR_P.p<-GWS.Sim[,1]
lm.palette <- colorRampPalette(c("red","orange", "white"), space = "rgb")
spplot(TWN, zcol="GW.ABOR_P.p", col.regions=lm.palette(20), main="p-value for GW ABOR_P")

Spatial Non-stationary

spplot(GWS$SDF, zcol="Spearman_rho_ABOR_P.INCOME", col.regions=lm.palette(20), main="GW Correlation")

TWN$GW.Corr.p<-GWS.Sim[,13]
lm.palette <- colorRampPalette(c("red","orange", "white"), space = "rgb")
spplot(TWN, zcol="GW.Corr.p", col.regions=lm.palette(20), main="p-value for Corr. between GW ABOR_P and GW Income")


2. GWR Model Specification

OLS Model

lm.global <- lm(TBINCI ~ ABOR_P + INCOME, data=TWN)
summary(lm.global)
## 
## Call:
## lm(formula = TBINCI ~ ABOR_P + INCOME, data = TWN)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0065203 -0.0009545 -0.0000505  0.0009511  0.0155064 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.852e-03  3.310e-04  14.657  < 2e-16 ***
## ABOR_P       1.273e-02  7.155e-04  17.797  < 2e-16 ***
## INCOME      -6.510e-06  2.135e-06  -3.049  0.00247 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002424 on 349 degrees of freedom
## Multiple R-squared:  0.5427, Adjusted R-squared:  0.5401 
## F-statistic: 207.1 on 2 and 349 DF,  p-value: < 2.2e-16

GW Model

Define Variables
DeV<-"TBINCI"
InV<-c("ABOR_P","INCOME")
Model Selection
model.sel <- model.selection.gwr(DeV,InV, data = TWN, kernel = "bisquare", adaptive = TRUE, bw = 80)
## Now calbrating the model: 
##  TBINCI~ABOR_P 
## Now calbrating the model: 
##  TBINCI~INCOME 
## Now calbrating the model: 
##  TBINCI~ABOR_P+INCOME
data.frame(model.sel[2])
##   X1        X2        X3           X4
## 1 80 -3454.601 -3433.676 0.0010739117
## 2 80 -3212.526 -3187.384 0.0021158008
## 3 80 -3472.412 -3440.175 0.0009957491
sorted.models <- model.sort.gwr(model.sel, numVars = length(InV), ruler.vector = model.sel[[2]][,2])
model.list <- sorted.models[[1]]
model.view.gwr(DeV, InV, model.list = model.list)

plot(sorted.models[[2]][,2], col = "black", pch = 20, lty = 5, main = "Alternative view of GWR model selection procedure", ylab = "AICc", xlab = "Model number", type = "b")

Optimal Bandwidth
bw.gwr.1 <- bw.gwr(TBINCI ~ ABOR_P + INCOME, data = TWN, approach = "AICc", kernel = "bisquare", adaptive = TRUE)
## Adaptive bandwidth (number of nearest neighbours): 225 AICc value: -3341.394 
## Adaptive bandwidth (number of nearest neighbours): 147 AICc value: -3377.659 
## Adaptive bandwidth (number of nearest neighbours): 98 AICc value: -3426.754 
## Adaptive bandwidth (number of nearest neighbours): 68 AICc value: -3447.516 
## Adaptive bandwidth (number of nearest neighbours): 49 AICc value: -3458.459 
## Adaptive bandwidth (number of nearest neighbours): 38 AICc value: -3458.5 
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: -3459.36 
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: -3450.999 
## Adaptive bandwidth (number of nearest neighbours): 33 AICc value: -3461.35 
## Adaptive bandwidth (number of nearest neighbours): 34 AICc value: -3460.515 
## Adaptive bandwidth (number of nearest neighbours): 31 AICc value: -3460.654 
## Adaptive bandwidth (number of nearest neighbours): 33 AICc value: -3461.35
bw.gwr.1
## [1] 33

3. Fitting a GWR model

Understanding GWR outputs

gwr.result<-gwr.basic(TBINCI ~ ABOR_P + INCOME, data = TWN, kernel = "bisquare", adaptive = TRUE, bw=bw.gwr.1, F123.test = TRUE)
print(gwr.result)
##    ***********************************************************************
##    *                       Package   GWmodel                             *
##    ***********************************************************************
##    Program starts at: 2015-05-31 21:28:46 
##    Call:
##    gwr.basic(formula = TBINCI ~ ABOR_P + INCOME, data = TWN, bw = bw.gwr.1, 
##     kernel = "bisquare", adaptive = TRUE, F123.test = TRUE)
## 
##    Dependent (y) variable:  TBINCI
##    Independent variables:  ABOR_P INCOME
##    Number of data points: 352
##    ***********************************************************************
##    *                    Results of Global Regression                     *
##    ***********************************************************************
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0065203 -0.0009545 -0.0000505  0.0009511  0.0155064 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.852e-03  3.310e-04  14.657  < 2e-16 ***
## ABOR_P       1.273e-02  7.155e-04  17.797  < 2e-16 ***
## INCOME      -6.510e-06  2.135e-06  -3.049  0.00247 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002424 on 349 degrees of freedom
## Multiple R-squared:  0.5427, Adjusted R-squared:  0.5401 
## F-statistic: 207.1 on 2 and 349 DF,  p-value: < 2.2e-16
## 
##    ***Extra Diagnostic information
##    Residual sum of squares: 0.002049879
##    Sigma(hat): 0.002420082
##    AIC:  -3235.936
##    AICc:  -3235.821
##    ***********************************************************************
##    *          Results of Geographically Weighted Regression              *
##    ***********************************************************************
## 
##    *********************Model calibration information*********************
##    Kernel function: bisquare 
##    Adaptive bandwidth: 33 (number of nearest neighbours)
##    Regression points: the same locations as observations are used.
##    Distance metric: Euclidean distance metric is used.
## 
##    ****************Summary of GWR coefficient estimates:******************
##                    Min.    1st Qu.     Median    3rd Qu.   Max.
##    Intercept -6.011e-03  3.054e-03  4.484e-03  6.413e-03 0.0096
##    ABOR_P    -4.083e-01  6.600e-03  1.859e-02  3.952e-02 0.7842
##    INCOME    -4.681e-05 -1.452e-05 -4.417e-06  1.666e-06 0.0001
##    ************************Diagnostic information*************************
##    Number of data points: 352 
##    Effective number of parameters (2trace(S) - trace(S'S)): 88.80567 
##    Effective degrees of freedom (n-2trace(S) + trace(S'S)): 263.1943 
##    AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): -3461.35 
##    AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): -3565.832 
##    Residual sum of squares: 0.0006769341 
##    R-square value:  0.8489966 
##    Adjusted R-square value:  0.7978515 
##    ******************F test results of GWR calibration********************
##    ---F1 test (Leung et al. 2000)
##     F1 statistic Numerator DF Denominator DF     Pr(>)    
##          0.43789    287.54680            349 4.956e-13 ***
##    ---F2 test (Leung et al. 2000)
##     F2 statistic Numerator DF Denominator DF     Pr(>)    
##           2.7242     115.9181            349 6.537e-13 ***
##    ---F3 test (Leung et al. 2000)
##              F3 statistic Numerator DF Denominator DF     Pr(>)    
##    Intercept      2.63893    157.83800         287.55 4.927e-13 ***
##    ABOR_P         0.79185     18.93917         287.55    0.7162    
##    INCOME         3.44627    100.68325         287.55 < 2.2e-16 ***
##    ---F4 test (GWR book p92)
##     F4 statistic Numerator DF Denominator DF     Pr(>)    
##          0.33023    263.19433            349 < 2.2e-16 ***
## 
##    ---Significance stars
##    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
##    ***********************************************************************
##    Program stops at: 2015-05-31 21:28:55
head(data.frame(gwr.result$SDF),n=30)
##      Intercept      ABOR_P        INCOME       y        yhat      residual
## 0  0.003991128 0.019491250 -3.038096e-06 0.00378 0.003506050  2.739502e-04
## 1  0.004068040 0.014671847 -3.235740e-06 0.00378 0.003587774  1.922264e-04
## 2  0.004276390 0.013286240 -3.763705e-06 0.00385 0.003557541  2.924586e-04
## 3  0.004347377 0.009350246 -3.953374e-06 0.00303 0.003355510 -3.255100e-04
## 4  0.003810501 0.021388268 -2.611173e-06 0.00318 0.003523491 -3.434908e-04
## 5  0.004551476 0.004841073 -4.403812e-06 0.00389 0.003469884  4.201165e-04
## 6  0.003522722 0.026277514 -2.444939e-06 0.00353 0.003814131 -2.841314e-04
## 7  0.003156470 0.038952467 -5.080663e-06 0.00424 0.003110191  1.129809e-03
## 8  0.004247506 0.011297578 -4.326522e-06 0.00435 0.003779087  5.709130e-04
## 9  0.003342306 0.028740159 -1.162428e-06 0.00366 0.003326361  3.336391e-04
## 10 0.004744637 0.029355254 -5.103312e-06 0.00410 0.004066626  3.337410e-05
## 11 0.003134758 0.051962526  2.964967e-06 0.00664 0.004625832  2.014168e-03
## 12 0.003987664 0.020019423 -3.035685e-06 0.00340 0.003812152 -4.121522e-04
## 13 0.003982512 0.014854308 -2.970952e-06 0.00343 0.003653236 -2.232360e-04
## 14 0.003728207 0.017126115 -2.380432e-06 0.00331 0.003618397 -3.083966e-04
## 15 0.003612956 0.021231921 -2.272751e-06 0.00433 0.003626066  7.039343e-04
## 16 0.003097722 0.012592529 -1.490935e-06 0.00277 0.002960560 -1.905603e-04
## 17 0.004543258 0.025871971 -4.740176e-06 0.00444 0.003789843  6.501573e-04
## 18 0.004342271 0.006612645 -3.768988e-06 0.00486 0.003849327  1.010673e-03
## 19 0.004045822 0.006592552 -2.178866e-06 0.00132 0.003793713 -2.473713e-03
## 20 0.003188211 0.054704606 -1.089685e-06 0.00317 0.003540120 -3.701199e-04
## 21 0.003010911 0.074994704 -4.969543e-07 0.00188 0.003146087 -1.266087e-03
## 22 0.003547696 0.015156187 -1.783455e-06 0.00324 0.003478931 -2.389311e-04
## 23 0.003758473 0.049887265 -7.675048e-07 0.00511 0.003803634  1.306366e-03
## 24 0.003181933 0.049111549  2.863381e-06 0.00563 0.003604832  2.025168e-03
## 25 0.002649751 0.043099960  6.931196e-06 0.00283 0.003716368 -8.863684e-04
## 26 0.003255392 0.069176969 -1.043316e-06 0.00338 0.003309346  7.065420e-05
## 27 0.003474323 0.061682492 -1.248843e-06 0.00568 0.004065210  1.614790e-03
## 28 0.004906670 0.010277812 -6.448590e-06 0.00574 0.007822407 -2.082407e-03
## 29 0.004646714 0.014716972 -6.211713e-06 0.00378 0.003814071 -3.407098e-05
##         CV_Score Stud_residual Intercept_SE   ABOR_P_SE    INCOME_SE
## 0   3.143170e-04    0.18368675 0.0012407318 0.038266282 3.849065e-06
## 1   2.354979e-04    0.13288231 0.0012557651 0.040612770 3.850333e-06
## 2   3.315151e-04    0.19646651 0.0012187073 0.039696696 3.779968e-06
## 3  -3.578211e-04   -0.21448384 0.0012231477 0.041499385 3.769621e-06
## 4  -3.812831e-04   -0.22720323 0.0012447224 0.037733656 3.889734e-06
## 5   4.553316e-04    0.27958914 0.0009298892 0.004825645 3.382305e-06
## 6  -3.361093e-04   -0.19618883 0.0012255027 0.030673870 4.065012e-06
## 7   1.332291e-03    0.77383969 0.0016717973 0.028555738 6.756271e-06
## 8   7.223532e-04    0.42992499 0.0012944121 0.002996982 5.359686e-06
## 9   3.711832e-04    0.22884783 0.0011805872 0.052413031 3.848356e-06
## 10  4.995773e-05    0.02720990 0.0009680836 0.046203254 3.109707e-06
## 11  2.480976e-03    1.45839940 0.0008987638 0.041557733 3.913352e-06
## 12 -4.676242e-04   -0.28068573 0.0012492970 0.035530838 3.958339e-06
## 13 -2.633055e-04   -0.15397854 0.0012710708 0.042331877 3.897559e-06
## 14 -3.497423e-04   -0.21034325 0.0012452974 0.038083141 3.893200e-06
## 15  8.013611e-04    0.47766110 0.0012487637 0.037354460 3.918570e-06
## 16 -2.081488e-04   -0.12608830 0.0013829684 0.037122405 4.561646e-06
## 17  7.410246e-04    0.44866363 0.0011295426 0.059170909 3.514991e-06
## 18  1.318023e-03    0.77797488 0.0010188360 0.005370444 3.526890e-06
## 19 -3.100855e-03   -1.79113646 0.0009874231 0.005025203 4.172787e-06
## 20 -4.365766e-04   -0.26533464 0.0013598987 0.063764979 4.231877e-06
## 21 -1.791043e-03   -1.04934510 0.0012769422 0.055024974 4.268380e-06
## 22 -2.766817e-04   -0.16781320 0.0012916770 0.038861245 4.166074e-06
## 23  1.524106e-03    0.89255755 0.0008369275 0.040076686 3.201692e-06
## 24  2.398081e-03    1.42189684 0.0008660478 0.042379573 3.978884e-06
## 25 -1.018636e-03   -0.62240477 0.0009380379 0.044696335 5.027201e-06
## 26  9.097105e-05    0.05218592 0.0011018731 0.047572916 3.743813e-06
## 27  1.948300e-03    1.15542115 0.0009859330 0.043572061 3.415541e-06
## 28 -3.614295e-03   -2.17396355 0.0010331842 0.002640875 4.737420e-06
## 29 -3.903521e-05   -0.02335986 0.0012587888 0.002719266 7.839368e-06
##    Intercept_TV ABOR_P_TV  INCOME_TV
## 0      3.216753 0.5093583 -0.7893077
## 1      3.239491 0.3612619 -0.8403793
## 2      3.508956 0.3346939 -0.9956976
## 3      3.554253 0.2253105 -1.0487457
## 4      3.061326 0.5668220 -0.6712986
## 5      4.894644 1.0031970 -1.3020151
## 6      2.874512 0.8566742 -0.6014593
## 7      1.888070 1.3640855 -0.7519922
## 8      3.281417 3.7696520 -0.8072343
## 9      2.831054 0.5483400 -0.3020584
## 10     4.901061 0.6353504 -1.6410910
## 11     3.487855 1.2503696  0.7576540
## 12     3.191926 0.5634380 -0.7669088
## 13     3.133194 0.3509012 -0.7622597
## 14     2.993829 0.4497033 -0.6114334
## 15     2.893227 0.5683905 -0.5799950
## 16     2.239908 0.3392164 -0.3268415
## 17     4.022210 0.4372414 -1.3485598
## 18     4.261992 1.2313032 -1.0686436
## 19     4.097354 1.3118976 -0.5221609
## 20     2.344447 0.8579099 -0.2574944
## 21     2.357907 1.3629212 -0.1164269
## 22     2.746581 0.3900078 -0.4280900
## 23     4.490799 1.2447952 -0.2397185
## 24     3.674085 1.1588495  0.7196442
## 25     2.824780 0.9642840  1.3787386
## 26     2.954417 1.4541251 -0.2786775
## 27     3.523893 1.4156432 -0.3656354
## 28     4.749076 3.8918208 -1.3612030
## 29     3.691417 5.4121124 -0.7923742
Table<-data.frame(gwr.result$SDF)
hist(Table$INCOME, breaks=20)

Mapping the model parameters

lm.palette <- colorRampPalette(c("blue","orange", "red"), space = "rgb")
spplot(gwr.result$SDF, zcol="INCOME", col.regions=lm.palette(20), main="GWR Results")

# Exporting to Shape files
drv="ESRI Shapefile"
writeOGR(gwr.result$SDF, dsn="shapefiles/GWR", layer="TB_GWR", driver=drv)
## Warning in writeOGR(gwr.result$SDF, dsn = "shapefiles/GWR", layer =
## "TB_GWR", : Field names abbreviated for ESRI Shapefile driver

Mapping p-values from the pseudo t-tests of GWR outputs

Monte Carlo test for significance of GWR parameter variability
gwr.sim.result<-montecarlo.gwr(TBINCI ~ ABOR_P + INCOME, data = TWN, kernel = "bisquare", adaptive = TRUE, bw=bw.gwr.1)
## 
## Tests based on the Monte Carlo significance test
## 
##             p-value
## (Intercept)       0
## ABOR_P            0
## INCOME            0
p-values from the pseudo t-tests of GWR outputs
pvalue<-gwr.t.adjust(gwr.result) 
pvalueTable<-pvalue$SDF@data; names(pvalueTable)
##  [1] "Intercept_t"    "ABOR_P_t"       "INCOME_t"       "Intercept_p"   
##  [5] "ABOR_P_p"       "INCOME_p"       "Intercept_p_by" "ABOR_P_p_by"   
##  [9] "INCOME_p_by"    "Intercept_p_fb" "ABOR_P_p_fb"    "INCOME_p_fb"   
## [13] "Intercept_p_bo" "ABOR_P_p_bo"    "INCOME_p_bo"    "Intercept_p_bh"
## [17] "ABOR_P_p_bh"    "INCOME_p_bh"
Mapping pseudo t-statistic and p-value
# Mapping t-statistic
lm.palette <- colorRampPalette(c("white","orange", "red"), space = "rgb")
spplot(pvalue$SDF, zcol="ABOR_P_t", col.regions=lm.palette(20), main="t-statistic of ABOR_P")

# Mapping p-value
lm.palette <- colorRampPalette(c("red","orange", "white"), space = "rgb")
spplot(pvalue$SDF, zcol="ABOR_P_p", col.regions=lm.palette(20), main="p-value of ABOR_P")

# Mapping p-value (Bonferroni)
lm.palette <- colorRampPalette(c("red","orange", "white"), space = "rgb")
spplot(pvalue$SDF, zcol="ABOR_P_p_bo", col.regions=lm.palette(20), main="p-value (Bonferroni) of ABOR_P")