1 # topsort - dependency (topological) sorting and cycle finding functions
2 # Copyright (C) 2007 RADLogic
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation;
7 # version 2.1 of the License.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
14 # See http://www.fsf.org/licensing/licenses/lgpl.txt for full license text.
15 """Provide toplogical sorting (i.e. dependency sorting) functions.
17 The topsort function is based on code posted on Usenet by Tim Peters.
21 - changed some bits to use current Python idioms
22 (listcomp instead of filter, +=/-=, inherit from Exception)
23 - added a topsort_levels version that ports items in each dependency level
25 - added find_cycles to aid in cycle debugging
27 Run this module directly to run the doctests (unittests).
28 Make sure they all pass before checking in any modifications.
30 Requires Python >= 2.2
31 (For Python 2.2 also requires separate sets.py module)
33 This requires the rad_util.py module.
37 # Provide support for Python 2.2*
38 from __future__ import generators
40 __version__ = '$Revision: 0.9 $'
41 __date__ = '$Date: 2007/03/27 04:15:26 $'
42 __credits__ = '''Tim Peters -- original topsort code
43 Tim Wegener -- doctesting, updating to current idioms, topsort_levels,
47 # Make Python 2.3 sets look like Python 2.4 sets.
51 from sets import Set as set
53 from rad_util import is_rotated
56 class CycleError(Exception):
61 def topsort(pairlist):
62 """Topologically sort a list of (parent, child) pairs.
64 Return a list of the elements in dependency order (parent to child order).
66 >>> print topsort( [(1,2), (3,4), (5,6), (1,3), (1,5), (1,6), (2,5)] )
69 >>> print topsort( [(1,2), (1,3), (2,4), (3,4), (5,6), (4,5)] )
72 >>> print topsort( [(1,2), (2,3), (3,2)] )
73 Traceback (most recent call last):
74 CycleError: ([1], {2: 1, 3: 1}, {2: [3], 3: [2]})
77 num_parents = {} # element -> # of predecessors
78 children = {} # element -> list of successors
79 for parent, child in pairlist:
80 # Make sure every element is a key in num_parents.
81 if not num_parents.has_key( parent ):
82 num_parents[parent] = 0
83 if not num_parents.has_key( child ):
84 num_parents[child] = 0
86 # Since child has a parent, increment child's num_parents count.
87 num_parents[child] += 1
89 # ... and parent gains a child.
90 children.setdefault(parent, []).append(child)
92 # Suck up everything without a parent.
93 answer = [x for x in num_parents.keys() if num_parents[x] == 0]
95 # For everything in answer, knock down the parent count on its children.
96 # Note that answer grows *in* the loop.
98 del num_parents[parent]
99 if children.has_key( parent ):
100 for child in children[parent]:
101 num_parents[child] -= 1
102 if num_parents[child] == 0:
103 answer.append( child )
104 # Following "del" isn't needed; just makes
105 # CycleError details easier to grasp.
109 # Everything in num_parents has at least one child ->
111 raise CycleError(answer, num_parents, children)
114 def topsort_levels(pairlist):
115 """Topologically sort a list of (parent, child) pairs into depth levels.
117 This returns a generator.
118 Turn this into a an iterator using the iter built-in function.
119 (if you iterate over the iterator, each element gets generated when
120 it is asked for, rather than generating the whole list up-front.)
122 Each generated element is a list of items at that dependency level.
124 >>> dependency_pairs = [(1,2), (3,4), (5,6), (1,3), (1,5), (1,6), (2,5)]
125 >>> for level in iter(topsort_levels( dependency_pairs )):
132 >>> dependency_pairs = [(1,2), (1,3), (2,4), (3,4), (5,6), (4,5)]
133 >>> for level in iter(topsort_levels( dependency_pairs )):
141 >>> dependency_pairs = [(1,2), (2,3), (3,4), (4, 3)]
143 ... for level in iter(topsort_levels( dependency_pairs )):
145 ... except CycleError, exc:
146 ... print 'CycleError:', exc
149 CycleError: ({3: 1, 4: 1}, {3: [4], 4: [3]})
152 The cycle error should look like.
153 CycleError: ({3: 1, 4: 1}, {3: [4], 4: [3]})
154 # todo: Make the doctest more robust (i.e. handle arbitrary dict order).
157 num_parents = {} # element -> # of predecessors
158 children = {} # element -> list of successors
159 for parent, child in pairlist:
160 # Make sure every element is a key in num_parents.
161 if not num_parents.has_key( parent ):
162 num_parents[parent] = 0
163 if not num_parents.has_key( child ):
164 num_parents[child] = 0
166 # Since child has a parent, increment child's num_parents count.
167 num_parents[child] += 1
169 # ... and parent gains a child.
170 children.setdefault(parent, []).append(child)
172 return topsort_levels_core(num_parents, children)
174 def topsort_levels_core(num_parents, children):
175 """Topologically sort a bunch of interdependent items based on dependency.
177 This returns a generator.
178 Turn this into a an iterator using the iter built-in function.
179 (if you iterate over the iterator, each element gets generated when
180 it is asked for, rather than generating the whole list up-front.)
182 Each generated element is a list of items at that dependency level.
184 >>> list(topsort_levels_core(
185 ... {1: 0, 2: 1, 3: 1, 4: 1, 5: 2, 6: 2},
186 ... {1: [2, 3, 5, 6], 2: [5], 3: [4], 4: [], 5: [6]}))
187 [[1], [2, 3], [4, 5], [6]]
189 >>> list(topsort_levels_core(
190 ... {1: 0, 2: 2, 3: 1},
191 ... {1: [2], 2: [3], 3: [2]}))
192 Traceback (most recent call last):
193 CycleError: ({2: 1, 3: 1}, {2: [3], 3: [2]})
195 This function has a more complicated interface than topsort_levels,
196 but is useful if the data is easier to generate in this form.
199 num_parents -- key: item, value: number of parents (predecessors)
200 children -- key: item, value: list of children (successors)
204 # Suck up everything without a predecessor.
205 level_parents = [x for x in num_parents.keys() if num_parents[x] == 0]
207 if not level_parents:
210 # Offer the next generated item,
211 # which is a list of the items at this dependency level.
214 # For everything item in this level,
215 # decrement the parent count,
216 # since we have accounted for its parent.
217 for level_parent in level_parents:
219 del num_parents[level_parent]
221 if children.has_key(level_parent):
222 for level_parent_child in children[level_parent]:
223 num_parents[level_parent_child] -= 1
224 del children[level_parent]
227 # Everything in num_parents has at least one child ->
229 raise CycleError(num_parents, children)
231 # This is the end of the generator.
235 def find_cycles(parent_children):
236 """Yield cycles. Each result is a list of items comprising a cycle.
238 Use a 'stack' based approach to find all the cycles.
239 This is a generator, so yields each cycle as it finds it.
241 It is implicit that the last item in each cycle list is a parent of the
242 first item (thereby forming a cycle).
245 parent_children -- parent -> collection of children
248 >>> cycles = list(find_cycles({'A': ['B'], 'B': ['A']}))
251 >>> cycle = cycles[0]
256 Simplest cycle with extra baggage at the start and the end:
257 >>> cycles = list(find_cycles(parent_children={'A': ['B'],
264 >>> cycle = cycles[0]
270 >>> cycles = list(find_cycles(parent_children={'A': ['B'],
271 ... 'B': ['C1', 'C2'],
281 >>> for cycle in cycles:
284 >>> cycle1 = cycles[0]
288 >>> cycle2 = cycles[1]
293 Simple cycle with children not specified for one item:
294 # todo: Should this barf instead?
295 >>> cycles = list(find_cycles(parent_children={'A': ['B'],
300 >>> cycle = cycles[0]
306 >>> cycles = list(find_cycles(parent_children={'A': ['B1', 'B2'],
309 ... 'C': ['A', 'B1']}))
312 >>> sorted_cycles = []
313 >>> for cycle in cycles:
314 ... cycle = list(cycle)
316 ... sorted_cycles.append(cycle)
317 >>> sorted_cycles.sort()
318 >>> for cycle in sorted_cycles:
324 Hairy case (order can matter if something is wrong):
325 (Note order of B and C in the list.)
326 >>> cycles = list(find_cycles(parent_children={
331 ... 'DQ': ['IA', 'TO'],
337 >>> cycle = cycles[0]
340 ['A', 'C', 'DQ', 'IA']
344 visited_nodes = set()
346 for parent in parent_children:
347 if parent in visited_nodes:
348 # This node is part of a path that has already been traversed.
358 children = parent_children[parent]
362 for child in children:
363 # Keeping a set of the path nodes, for O(1) lookups at the
364 # expense of more memory and complexity, actually makes speed
365 # worse. (Due to construction of sets.)
369 cycle = path[path.index(child):]
370 # Check that this is not a dup cycle.
372 for other_cycle in cycles:
373 if is_rotated(other_cycle, cycle):
380 # Push this new path onto the 'stack'.
381 # This is probably the most expensive part of the algorithm
383 paths.append(path + [child])
384 # Mark the node as visited.
385 visited_nodes.add(child)
388 if __name__ == '__main__':
389 # Run the doctest tests.
392 doctest.testmod(sys.modules['__main__'])