Subgroups and Community Detection

WEN, Tzai-Hung (NTU Geography)

library(sna)

1. Subgroups: k-core and n-cliques

setwd("D:/R_Labs")
data<- "sample_adjmatrix.csv"
el<-read.table(data, header=T, row.names=1, sep=",")
m=as.matrix(el)
gplot(m, displaylabels=TRUE)

# Subgroups: K-core structure
kc<-kcores(m)
gplot(m,vertex.col=kc)

# Sbbgroups: n-cliques
cc<-clique.census(m,clique.comembership="bysize")
cc$clique.count #Examine clique counts
##   Agg 23732 23778 23824 23871 58009 58098 58256
## 1   0     0     0     0     0     0     0     0
## 2   2     0     1     1     0     0     1     1
## 3   1     0     0     0     1     1     1     0
## 4   1     1     1     0     1     0     1     0
cc$clique.comemb[1,,] #Isolate co-membership is trivial
##       23732 23778 23824 23871 58009 58098 58256
## 23732     0     0     0     0     0     0     0
## 23778     0     0     0     0     0     0     0
## 23824     0     0     0     0     0     0     0
## 23871     0     0     0     0     0     0     0
## 58009     0     0     0     0     0     0     0
## 58098     0     0     0     0     0     0     0
## 58256     0     0     0     0     0     0     0
cc$clique.comemb[2,,] #Co-membership for 2-cliques
##       23732 23778 23824 23871 58009 58098 58256
## 23732     0     0     0     0     0     0     0
## 23778     0     1     1     0     0     0     0
## 23824     0     1     1     0     0     0     0
## 23871     0     0     0     0     0     0     0
## 58009     0     0     0     0     0     0     0
## 58098     0     0     0     0     0     1     1
## 58256     0     0     0     0     0     1     1
cc$clique.comemb[3,,] #Co-membership for 3-cliques
##       23732 23778 23824 23871 58009 58098 58256
## 23732     0     0     0     0     0     0     0
## 23778     0     0     0     0     0     0     0
## 23824     0     0     0     0     0     0     0
## 23871     0     0     0     1     1     1     0
## 58009     0     0     0     1     1     1     0
## 58098     0     0     0     1     1     1     0
## 58256     0     0     0     0     0     0     0
cc$cliques #Enumerate the cliques
## [[1]]
## NULL
## 
## [[2]]
## [[2]][[1]]
## [1] 6 7
## 
## [[2]][[2]]
## [1] 2 3
## 
## 
## [[3]]
## [[3]][[1]]
## [1] 4 5 6
## 
## 
## [[4]]
## [[4]][[1]]
## [1] 1 2 4 6

2. Community Structures: Identifying Communities

library(igraph)
## 
## Attaching package: 'igraph'
## 
## The following objects are masked from 'package:sna':
## 
##     %c%, betweenness, bonpow, closeness, components, degree,
##     dyad.census, evcent, hierarchy, is.connected, neighborhood,
##     triad.census
## 
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## 
## The following object is masked from 'package:base':
## 
##     union
data(studentnets.M182, package = "NetData")
head(m182_full_data_frame, n=20)
##    ego alter friend_tie social_tie task_tie
## 1    1     1          0       0.00     0.00
## 2    1     2          0       0.00     0.00
## 3    1     3          0       0.00     0.00
## 4    1     4          0       0.00     0.00
## 5    1     5          0       1.20     0.30
## 6    1     6          0       0.00     0.00
## 7    1     7          0       0.00     0.00
## 8    1     8          0       0.15     0.00
## 9    1     9          0       2.85     0.30
## 10   1    10          0       6.45     0.30
## 11   1    11          0       0.30     0.00
## 12   1    12          0       1.95     0.15
## 13   1    13          0       0.60     0.00
## 14   1    14          0       0.00     0.00
## 15   1    15          0       5.10     0.15
## 16   1    16          0       1.35     5.10
## 17   2     1          1       0.00     0.00
## 18   2     2          0       0.00     0.00
## 19   2     3          0       0.15     0.00
## 20   2     4          0       0.00     0.00
# igraph object
m182_full_nonzero_edges <- subset(m182_full_data_frame, (friend_tie > 0 | social_tie > 0 | task_tie > 0))
head(m182_full_nonzero_edges)
##    ego alter friend_tie social_tie task_tie
## 5    1     5          0       1.20     0.30
## 8    1     8          0       0.15     0.00
## 9    1     9          0       2.85     0.30
## 10   1    10          0       6.45     0.30
## 11   1    11          0       0.30     0.00
## 12   1    12          0       1.95     0.15
m182_full <- graph.data.frame(m182_full_nonzero_edges) 
summary(m182_full)
## IGRAPH DN-- 16 144 -- 
## + attr: name (v/c), friend_tie (e/n), social_tie (e/n), task_tie
## | (e/n)
plot(m182_full)

# Create sub-graphs based on edge attributes
m182_friend <- delete.edges(m182_full, E(m182_full)[get.edge.attribute(m182_full,name = "friend_tie")==0])
m182_social <- delete.edges(m182_full, E(m182_full)[get.edge.attribute(m182_full,name = "social_tie")==0])
m182_task <- delete.edges(m182_full, E(m182_full)[get.edge.attribute(m182_full,name = "task_tie")==0])

# Look at the plots for each sub-graph
friend_layout <- layout.fruchterman.reingold(m182_friend)  #?layout.fruchterman.reingold()
plot(m182_friend, layout=friend_layout, edge.arrow.size=.5)

social_layout <- layout.fruchterman.reingold(m182_social)
plot(m182_social, layout=social_layout, edge.arrow.size=.5)

task_layout <- layout.fruchterman.reingold(m182_task)
plot(m182_task, layout=task_layout, edge.arrow.size=.5)

# COMMUNITY DETECTION

# set the network to undirected and remove isolated vertices.
#?as.undirected()
m182_friend_und <- as.undirected(m182_friend, mode='collapse')
m182_friend_no_iso <- delete.vertices(m182_friend_und, V(m182_friend_und)[degree(m182_friend_und)==0])
summary(m182_friend_no_iso)
## IGRAPH UN-- 14 42 -- 
## + attr: name (v/c)
# A. COMMUNITY DETECTION: WALKTRAP, Pons and Latapy (2005)
# ?walktrap.community()
friend_comm_wt <- cluster_walktrap(m182_friend_no_iso, steps=200,modularity=TRUE)
friend_comm_wt
## IGRAPH clustering walktrap, groups: 3, mod: 0.099
## + groups:
##   $`1`
##   [1] "2"  "8"  "13"
##   
##   $`2`
##   [1] "1"  "3"  "5"  "9"  "10" "12" "15"
##   
##   $`3`
##   [1] "6"  "7"  "11" "14"
## 
friend_comm_dend <- as.dendrogram(friend_comm_wt, use.modularity=TRUE)
plot(friend_comm_dend)

# B. COMMUNITY DETECTION: EDGE BETWEENNESS METHOD, Newman and Girvan (2004)
vcount(m182_friend_no_iso) # v=14
## [1] 14
ecount(m182_friend_no_iso) # e=42
## [1] 42
friend_comm_eb1 <- cluster_edge_betweenness (m182_friend_no_iso)
friend_comm_eb1
## IGRAPH clustering edge betweenness, groups: 3, mod: 0.28
## + groups:
##   $`1`
##   [1] "1"  "9"  "10" "12" "15"
##   
##   $`2`
##   [1] "2"  "7"  "8"  "13" "14"
##   
##   $`3`
##   [1] "3"  "5"  "6"  "11"
## 
mods <- sapply(0:ecount(m182_friend_no_iso), function(i){
  g2 <- delete.edges(m182_friend_no_iso, friend_comm_eb1$removed.edges[seq(length=i)])
  cl <- clusters(g2)$membership
  # compute modularity on the original graph g 
   modularity(m182_friend_no_iso,cl)
})

# we can now plot all modularities
plot(mods, pch=20)

# Now, color the nodes according to their membership
g2<-delete.edges(m182_friend_no_iso, friend_comm_eb1$removed.edges[seq(length=which.max(mods)-1)])
V(m182_friend_no_iso)$color=clusters(g2)$membership

# choose a layout for the graph
m182_friend_no_iso$layout <- layout.fruchterman.reingold

# plot it
plot(m182_friend_no_iso)