ioftools / networkxMiCe / networkxmaster / networkx / algorithms / approximation / kcomponents.py @ 5cef0f13
History  View  Annotate  Download (13.1 KB)
1 
""" Fast approximation for kcomponent structure


2 
"""

3 
# Copyright (C) 2015 by

4 
# Jordi Torrents <jtorrents@milnou.net>

5 
# All rights reserved.

6 
# BSD license.

7 
import itertools 
8 
from collections import defaultdict 
9 
from collections.abc import Mapping 
10  
11 
import networkx as nx 
12 
from networkx.exception import NetworkXError 
13 
from networkx.utils import not_implemented_for 
14  
15 
from networkx.algorithms.approximation import local_node_connectivity 
16 
from networkx.algorithms.connectivity import \ 
17 
local_node_connectivity as exact_local_node_connectivity 
18  
19  
20 
__author__ = """\n""".join(['Jordi Torrents <jtorrents@milnou.net>']) 
21  
22 
__all__ = ['k_components']

23  
24  
25 
not_implemented_for('directed')

26  
27  
28 
def k_components(G, min_density=0.95): 
29 
r"""Returns the approximate kcomponent structure of a graph G.

30 

31 
A `k`component is a maximal subgraph of a graph G that has, at least,

32 
node connectivity `k`: we need to remove at least `k` nodes to break it

33 
into more components. `k`components have an inherent hierarchical

34 
structure because they are nested in terms of connectivity: a connected

35 
graph can contain several 2components, each of which can contain

36 
one or more 3components, and so forth.

37 

38 
This implementation is based on the fast heuristics to approximate

39 
the `k`component structure of a graph [1]_. Which, in turn, it is based on

40 
a fast approximation algorithm for finding good lower bounds of the number

41 
of node independent paths between two nodes [2]_.

42 

43 
Parameters

44 


45 
G : NetworkX graph

46 
Undirected graph

47 

48 
min_density : Float

49 
Density relaxation threshold. Default value 0.95

50 

51 
Returns

52 


53 
k_components : dict

54 
Dictionary with connectivity level `k` as key and a list of

55 
sets of nodes that form a kcomponent of level `k` as values.

56 

57 

58 
Examples

59 


60 
>>> # Petersen graph has 10 nodes and it is triconnected, thus all

61 
>>> # nodes are in a single component on all three connectivity levels

62 
>>> from networkx.algorithms import approximation as apxa

63 
>>> G = nx.petersen_graph()

64 
>>> k_components = apxa.k_components(G)

65 

66 
Notes

67 


68 
The logic of the approximation algorithm for computing the `k`component

69 
structure [1]_ is based on repeatedly applying simple and fast algorithms

70 
for `k`cores and biconnected components in order to narrow down the

71 
number of pairs of nodes over which we have to compute White and Newman's

72 
approximation algorithm for finding node independent paths [2]_. More

73 
formally, this algorithm is based on Whitney's theorem, which states

74 
an inclusion relation among node connectivity, edge connectivity, and

75 
minimum degree for any graph G. This theorem implies that every

76 
`k`component is nested inside a `k`edgecomponent, which in turn,

77 
is contained in a `k`core. Thus, this algorithm computes node independent

78 
paths among pairs of nodes in each biconnected part of each `k`core,

79 
and repeats this procedure for each `k` from 3 to the maximal core number

80 
of a node in the input graph.

81 

82 
Because, in practice, many nodes of the core of level `k` inside a

83 
bicomponent actually are part of a component of level k, the auxiliary

84 
graph needed for the algorithm is likely to be very dense. Thus, we use

85 
a complement graph data structure (see `AntiGraph`) to save memory.

86 
AntiGraph only stores information of the edges that are *not* present

87 
in the actual auxiliary graph. When applying algorithms to this

88 
complement graph data structure, it behaves as if it were the dense

89 
version.

90 

91 
See also

92 


93 
k_components

94 

95 
References

96 


97 
.. [1] Torrents, J. and F. Ferraro (2015) Structural Cohesion:

98 
Visualization and Heuristics for Fast Computation.

99 
https://arxiv.org/pdf/1503.04476v1

100 

101 
.. [2] White, Douglas R., and Mark Newman (2001) A Fast Algorithm for

102 
NodeIndependent Paths. Santa Fe Institute Working Paper #0107035

103 
http://eclectic.ss.uci.edu/~drwhite/working.pdf

104 

105 
.. [3] Moody, J. and D. White (2003). Social cohesion and embeddedness:

106 
A hierarchical conception of social groups.

107 
American Sociological Review 68(1), 10328.

108 
http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf

109 

110 
"""

111 
# Dictionary with connectivity level (k) as keys and a list of

112 
# sets of nodes that form a kcomponent as values

113 
k_components = defaultdict(list)

114 
# make a few functions local for speed

115 
node_connectivity = local_node_connectivity 
116 
k_core = nx.k_core 
117 
core_number = nx.core_number 
118 
biconnected_components = nx.biconnected_components 
119 
density = nx.density 
120 
combinations = itertools.combinations 
121 
# Exact solution for k = {1,2}

122 
# There is a linear time algorithm for triconnectivity, if we had an

123 
# implementation available we could start from k = 4.

124 
for component in nx.connected_components(G): 
125 
# isolated nodes have connectivity 0

126 
comp = set(component)

127 
if len(comp) > 1: 
128 
k_components[1].append(comp)

129 
for bicomponent in nx.biconnected_components(G): 
130 
# avoid considering dyads as bicomponents

131 
bicomp = set(bicomponent)

132 
if len(bicomp) > 2: 
133 
k_components[2].append(bicomp)

134 
# There is no kcomponent of k > maximum core number

135 
# \kappa(G) <= \lambda(G) <= \delta(G)

136 
g_cnumber = core_number(G) 
137 
max_core = max(g_cnumber.values())

138 
for k in range(3, max_core + 1): 
139 
C = k_core(G, k, core_number=g_cnumber) 
140 
for nodes in biconnected_components(C): 
141 
# Build a subgraph SG induced by the nodes that are part of

142 
# each biconnected component of the kcore subgraph C.

143 
if len(nodes) < k: 
144 
continue

145 
SG = G.subgraph(nodes) 
146 
# Build auxiliary graph

147 
H = _AntiGraph() 
148 
H.add_nodes_from(SG.nodes()) 
149 
for u, v in combinations(SG, 2): 
150 
K = node_connectivity(SG, u, v, cutoff=k) 
151 
if k > K:

152 
H.add_edge(u, v) 
153 
for h_nodes in biconnected_components(H): 
154 
if len(h_nodes) <= k: 
155 
continue

156 
SH = H.subgraph(h_nodes) 
157 
for Gc in _cliques_heuristic(SG, SH, k, min_density): 
158 
for k_nodes in biconnected_components(Gc): 
159 
Gk = nx.k_core(SG.subgraph(k_nodes), k) 
160 
if len(Gk) <= k: 
161 
continue

162 
k_components[k].append(set(Gk))

163 
return k_components

164  
165  
166 
def _cliques_heuristic(G, H, k, min_density): 
167 
h_cnumber = nx.core_number(H) 
168 
for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)): 
169 
cands = set(n for n, c in h_cnumber.items() if c == c_value) 
170 
# Skip checking for overlap for the highest core value

171 
if i == 0: 
172 
overlap = False

173 
else:

174 
overlap = set.intersection(*[

175 
set(x for x in H[n] if x not in cands) 
176 
for n in cands]) 
177 
if overlap and len(overlap) < k: 
178 
SH = H.subgraph(cands  overlap) 
179 
else:

180 
SH = H.subgraph(cands) 
181 
sh_cnumber = nx.core_number(SH) 
182 
SG = nx.k_core(G.subgraph(SH), k) 
183 
while not (_same(sh_cnumber) and nx.density(SH) >= min_density): 
184 
#!! This subgraph must be writable => .copy()

185 
SH = H.subgraph(SG).copy() 
186 
if len(SH) <= k: 
187 
break

188 
sh_cnumber = nx.core_number(SH) 
189 
sh_deg = dict(SH.degree())

190 
min_deg = min(sh_deg.values())

191 
SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg) 
192 
SG = nx.k_core(G.subgraph(SH), k) 
193 
else:

194 
yield SG

195  
196  
197 
def _same(measure, tol=0): 
198 
vals = set(measure.values())

199 
if (max(vals)  min(vals)) <= tol: 
200 
return True 
201 
return False 
202  
203  
204 
class _AntiGraph(nx.Graph): 
205 
"""

206 
Class for complement graphs.

207 

208 
The main goal is to be able to work with big and dense graphs with

209 
a low memory foodprint.

210 

211 
In this class you add the edges that *do not exist* in the dense graph,

212 
the report methods of the class return the neighbors, the edges and

213 
the degree as if it was the dense graph. Thus it's possible to use

214 
an instance of this class with some of NetworkX functions. In this

215 
case we only use kcore, connected_components, and biconnected_components.

216 
"""

217  
218 
all_edge_dict = {'weight': 1} 
219  
220 
def single_edge_dict(self): 
221 
return self.all_edge_dict 
222 
edge_attr_dict_factory = single_edge_dict 
223  
224 
def __getitem__(self, n): 
225 
"""Returns a dict of neighbors of node n in the dense graph.

226 

227 
Parameters

228 


229 
n : node

230 
A node in the graph.

231 

232 
Returns

233 


234 
adj_dict : dictionary

235 
The adjacency dictionary for nodes connected to n.

236 

237 
"""

238 
all_edge_dict = self.all_edge_dict

239 
return {node: all_edge_dict for node in 
240 
set(self._adj)  set(self._adj[n])  set([n])} 
241  
242 
def neighbors(self, n): 
243 
"""Returns an iterator over all neighbors of node n in the

244 
dense graph.

245 
"""

246 
try:

247 
return iter(set(self._adj)  set(self._adj[n])  set([n])) 
248 
except KeyError: 
249 
raise NetworkXError("The node %s is not in the graph." % (n,)) 
250  
251 
class AntiAtlasView(Mapping): 
252 
"""An adjacency inner dict for AntiGraph"""

253  
254 
def __init__(self, graph, node): 
255 
self._graph = graph

256 
self._atlas = graph._adj[node]

257 
self._node = node

258  
259 
def __len__(self): 
260 
return len(self._graph)  len(self._atlas)  1 
261  
262 
def __iter__(self): 
263 
return (n for n in self._graph if n not in self._atlas and n != self._node) 
264  
265 
def __getitem__(self, nbr): 
266 
nbrs = set(self._graph._adj)  set(self._atlas)  set([self._node]) 
267 
if nbr in nbrs: 
268 
return self._graph.all_edge_dict 
269 
raise KeyError(nbr) 
270  
271 
class AntiAdjacencyView(AntiAtlasView): 
272 
"""An adjacency outer dict for AntiGraph"""

273  
274 
def __init__(self, graph): 
275 
self._graph = graph

276 
self._atlas = graph._adj

277  
278 
def __len__(self): 
279 
return len(self._atlas) 
280  
281 
def __iter__(self): 
282 
return iter(self._graph) 
283  
284 
def __getitem__(self, node): 
285 
if node not in self._graph: 
286 
raise KeyError(node) 
287 
return self._graph.AntiAtlasView(self._graph, node) 
288  
289 
@property

290 
def adj(self): 
291 
return self.AntiAdjacencyView(self) 
292  
293 
def subgraph(self, nodes): 
294 
"""This subgraph method returns a full AntiGraph. Not a View"""

295 
nodes = set(nodes)

296 
G = _AntiGraph() 
297 
G.add_nodes_from(nodes) 
298 
for n in G: 
299 
Gnbrs = G.adjlist_inner_dict_factory() 
300 
G._adj[n] = Gnbrs 
301 
for nbr, d in self._adj[n].items(): 
302 
if nbr in G._adj: 
303 
Gnbrs[nbr] = d 
304 
G._adj[nbr][n] = d 
305 
G.graph = self.graph

306 
return G

307  
308 
class AntiDegreeView(nx.reportviews.DegreeView): 
309 
def __iter__(self): 
310 
all_nodes = set(self._succ) 
311 
for n in self._nodes: 
312 
nbrs = all_nodes  set(self._succ[n])  set([n]) 
313 
yield (n, len(nbrs)) 
314  
315 
def __getitem__(self, n): 
316 
nbrs = set(self._succ)  set(self._succ[n])  set([n]) 
317 
# AntiGraph is a ThinGraph so all edges have weight 1

318 
return len(nbrs) + (n in nbrs) 
319  
320 
@property

321 
def degree(self): 
322 
"""Returns an iterator for (node, degree) and degree for single node.

323 

324 
The node degree is the number of edges adjacent to the node.

325 

326 
Parameters

327 


328 
nbunch : iterable container, optional (default=all nodes)

329 
A container of nodes. The container will be iterated

330 
through once.

331 

332 
weight : string or None, optional (default=None)

333 
The edge attribute that holds the numerical value used

334 
as a weight. If None, then each edge has weight 1.

335 
The degree is the sum of the edge weights adjacent to the node.

336 

337 
Returns

338 


339 
deg:

340 
Degree of the node, if a single node is passed as argument.

341 
nd_iter : an iterator

342 
The iterator returns twotuples of (node, degree).

343 

344 
See Also

345 


346 
degree

347 

348 
Examples

349 


350 
>>> G = nx.path_graph(4)

351 
>>> G.degree(0) # node 0 with degree 1

352 
1

353 
>>> list(G.degree([0,1]))

354 
[(0, 1), (1, 2)]

355 

356 
"""

357 
return self.AntiDegreeView(self) 
358  
359 
def adjacency(self): 
360 
"""Returns an iterator of (node, adjacency set) tuples for all nodes

361 
in the dense graph.

362 

363 
This is the fastest way to look at every edge.

364 
For directed graphs, only outgoing adjacencies are included.

365 

366 
Returns

367 


368 
adj_iter : iterator

369 
An iterator of (node, adjacency set) for all nodes in

370 
the graph.

371 

372 
"""

373 
for n in self._adj: 
374 
yield (n, set(self._adj)  set(self._adj[n])  set([n])) 