bindings/python/topsort.py
author Mathieu Lacage <mathieu.lacage@sophia.inria.fr>
Sat, 04 Jul 2009 08:15:48 +0200
changeset 4654 2eaebe77d66b
permissions -rw-r--r--
Added tag ns-3.5 for changeset c975274c9707
     1 # topsort - dependency (topological) sorting and cycle finding functions
     2 # Copyright (C) 2007 RADLogic
     3 # 
     4 # This library is free software; you can redistribute it and/or
     5 # modify it under the terms of the GNU Lesser General Public
     6 # License as published by the Free Software Foundation; 
     7 # version 2.1 of the License.
     8 # 
     9 # This library is distributed in the hope that it will be useful,
    10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
    11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
    12 # Lesser General Public License for more details.
    13 #
    14 # See http://www.fsf.org/licensing/licenses/lgpl.txt for full license text.
    15 """Provide toplogical sorting (i.e. dependency sorting) functions.
    16 
    17 The topsort function is based on code posted on Usenet by Tim Peters.
    18 
    19 Modifications:
    20 - added doctests
    21 - changed some bits to use current Python idioms
    22   (listcomp instead of filter, +=/-=, inherit from Exception)
    23 - added a topsort_levels version that ports items in each dependency level
    24   into a sub-list
    25 - added find_cycles to aid in cycle debugging
    26 
    27 Run this module directly to run the doctests (unittests).
    28 Make sure they all pass before checking in any modifications.
    29 
    30 Requires Python >= 2.2
    31 (For Python 2.2 also requires separate sets.py module)
    32 
    33 This requires the rad_util.py module.
    34 
    35 """
    36 
    37 # Provide support for Python 2.2*
    38 from __future__ import generators
    39 
    40 __version__ = '$Revision: 0.9 $'
    41 __date__ = '$Date: 2007/03/27 04:15:26 $'
    42 __credits__ = '''Tim Peters -- original topsort code
    43 Tim Wegener -- doctesting, updating to current idioms, topsort_levels,
    44                find_cycles
    45 '''
    46 
    47 # Make Python 2.3 sets look like Python 2.4 sets.
    48 try:
    49     set
    50 except NameError:
    51     from sets import Set as set
    52 
    53 from rad_util import is_rotated
    54 
    55 
    56 class CycleError(Exception):
    57     """Cycle Error"""
    58     pass
    59 
    60 
    61 def topsort(pairlist):
    62     """Topologically sort a list of (parent, child) pairs.
    63 
    64     Return a list of the elements in dependency order (parent to child order).
    65 
    66     >>> print topsort( [(1,2), (3,4), (5,6), (1,3), (1,5), (1,6), (2,5)] ) 
    67     [1, 2, 3, 5, 4, 6]
    68 
    69     >>> print topsort( [(1,2), (1,3), (2,4), (3,4), (5,6), (4,5)] )
    70     [1, 2, 3, 4, 5, 6]
    71 
    72     >>> print topsort( [(1,2), (2,3), (3,2)] )
    73     Traceback (most recent call last):
    74     CycleError: ([1], {2: 1, 3: 1}, {2: [3], 3: [2]})
    75     
    76     """
    77     num_parents = {}  # element -> # of predecessors 
    78     children = {}  # element -> list of successors 
    79     for parent, child in pairlist: 
    80         # Make sure every element is a key in num_parents.
    81         if not num_parents.has_key( parent ): 
    82             num_parents[parent] = 0 
    83         if not num_parents.has_key( child ): 
    84             num_parents[child] = 0 
    85 
    86         # Since child has a parent, increment child's num_parents count.
    87         num_parents[child] += 1
    88 
    89         # ... and parent gains a child.
    90         children.setdefault(parent, []).append(child)
    91 
    92     # Suck up everything without a parent.
    93     answer = [x for x in num_parents.keys() if num_parents[x] == 0]
    94 
    95     # For everything in answer, knock down the parent count on its children.
    96     # Note that answer grows *in* the loop.
    97     for parent in answer: 
    98         del num_parents[parent]
    99         if children.has_key( parent ): 
   100             for child in children[parent]: 
   101                 num_parents[child] -= 1
   102                 if num_parents[child] == 0: 
   103                     answer.append( child ) 
   104             # Following "del" isn't needed; just makes 
   105             # CycleError details easier to grasp.
   106             del children[parent]
   107 
   108     if num_parents: 
   109         # Everything in num_parents has at least one child -> 
   110         # there's a cycle.
   111         raise CycleError(answer, num_parents, children)
   112     return answer 
   113 
   114 def topsort_levels(pairlist):
   115     """Topologically sort a list of (parent, child) pairs into depth levels.
   116 
   117     This returns a generator. 
   118     Turn this into a an iterator using the iter built-in function.
   119     (if you iterate over the iterator, each element gets generated when
   120     it is asked for, rather than generating the whole list up-front.)
   121 
   122     Each generated element is a list of items at that dependency level.
   123 
   124     >>> dependency_pairs = [(1,2), (3,4), (5,6), (1,3), (1,5), (1,6), (2,5)]
   125     >>> for level in iter(topsort_levels( dependency_pairs )):
   126     ...    print level
   127     [1]
   128     [2, 3]
   129     [4, 5]
   130     [6]
   131 
   132     >>> dependency_pairs = [(1,2), (1,3), (2,4), (3,4), (5,6), (4,5)]
   133     >>> for level in iter(topsort_levels( dependency_pairs )):
   134     ...    print level
   135     [1]
   136     [2, 3]
   137     [4]
   138     [5]
   139     [6]
   140 
   141     >>> dependency_pairs = [(1,2), (2,3), (3,4), (4, 3)]
   142     >>> try:
   143     ...     for level in iter(topsort_levels( dependency_pairs )):
   144     ...         print level
   145     ... except CycleError, exc:
   146     ...     print 'CycleError:', exc
   147     [1]
   148     [2]
   149     CycleError: ({3: 1, 4: 1}, {3: [4], 4: [3]})
   150 
   151 
   152     The cycle error should look like.
   153     CycleError: ({3: 1, 4: 1}, {3: [4], 4: [3]})
   154     # todo: Make the doctest more robust (i.e. handle arbitrary dict order).
   155 
   156     """
   157     num_parents = {}  # element -> # of predecessors 
   158     children = {}  # element -> list of successors 
   159     for parent, child in pairlist: 
   160         # Make sure every element is a key in num_parents.
   161         if not num_parents.has_key( parent ): 
   162             num_parents[parent] = 0 
   163         if not num_parents.has_key( child ): 
   164             num_parents[child] = 0 
   165 
   166         # Since child has a parent, increment child's num_parents count.
   167         num_parents[child] += 1
   168 
   169         # ... and parent gains a child.
   170         children.setdefault(parent, []).append(child)
   171 
   172     return topsort_levels_core(num_parents, children)
   173 
   174 def topsort_levels_core(num_parents, children):
   175     """Topologically sort a bunch of interdependent items based on dependency.
   176 
   177     This returns a generator.
   178     Turn this into a an iterator using the iter built-in function.
   179     (if you iterate over the iterator, each element gets generated when
   180     it is asked for, rather than generating the whole list up-front.)
   181 
   182     Each generated element is a list of items at that dependency level.
   183 
   184     >>> list(topsort_levels_core(
   185     ...          {1: 0, 2: 1, 3: 1, 4: 1, 5: 2, 6: 2},
   186     ...          {1: [2, 3, 5, 6], 2: [5], 3: [4], 4: [], 5: [6]}))
   187     [[1], [2, 3], [4, 5], [6]]
   188 
   189     >>> list(topsort_levels_core(
   190     ...          {1: 0, 2: 2, 3: 1},
   191     ...          {1: [2], 2: [3], 3: [2]}))
   192     Traceback (most recent call last):
   193     CycleError: ({2: 1, 3: 1}, {2: [3], 3: [2]})
   194 
   195     This function has a more complicated interface than topsort_levels,
   196     but is useful if the data is easier to generate in this form.
   197 
   198     Arguments:
   199     num_parents -- key: item, value: number of parents (predecessors)
   200     children -- key: item, value: list of children (successors)
   201 
   202     """
   203     while 1:
   204         # Suck up everything without a predecessor.
   205         level_parents = [x for x in num_parents.keys() if num_parents[x] == 0]
   206 
   207         if not level_parents:
   208             break
   209 
   210         # Offer the next generated item,
   211         # which is a list of the items at this dependency level.
   212         yield level_parents
   213 
   214         # For everything item in this level,
   215         # decrement the parent count,
   216         # since we have accounted for its parent.
   217         for level_parent in level_parents:
   218 
   219             del num_parents[level_parent]
   220 
   221             if children.has_key(level_parent):
   222                 for level_parent_child in children[level_parent]:
   223                     num_parents[level_parent_child] -= 1
   224                 del children[level_parent]
   225         
   226     if num_parents: 
   227         # Everything in num_parents has at least one child -> 
   228         # there's a cycle.
   229         raise CycleError(num_parents, children)
   230     else:
   231         # This is the end of the generator.
   232         raise StopIteration
   233 
   234 
   235 def find_cycles(parent_children):
   236     """Yield cycles. Each result is a list of items comprising a cycle.
   237 
   238     Use a 'stack' based approach to find all the cycles.
   239     This is a generator, so yields each cycle as it finds it.
   240 
   241     It is implicit that the last item in each cycle list is a parent of the
   242     first item (thereby forming a cycle).
   243 
   244     Arguments:
   245     parent_children -- parent -> collection of children
   246 
   247     Simplest cycle:
   248     >>> cycles = list(find_cycles({'A': ['B'], 'B': ['A']}))
   249     >>> len(cycles)
   250     1
   251     >>> cycle = cycles[0]
   252     >>> cycle.sort()
   253     >>> print cycle
   254     ['A', 'B']
   255 
   256     Simplest cycle with extra baggage at the start and the end:
   257     >>> cycles = list(find_cycles(parent_children={'A': ['B'],
   258     ...                                            'B': ['C'],
   259     ...                                            'C': ['B', 'D'],
   260     ...                                            'D': [],
   261     ...                                            }))
   262     >>> len(cycles)
   263     1
   264     >>> cycle = cycles[0]
   265     >>> cycle.sort()
   266     >>> print cycle
   267     ['B', 'C']
   268 
   269     Double cycle:
   270     >>> cycles = list(find_cycles(parent_children={'A': ['B'],
   271     ...                                            'B': ['C1', 'C2'],
   272     ...                                            'C1': ['D1'],
   273     ...                                            'D1': ['E1'],
   274     ...                                            'E1': ['D1'],
   275     ...                                            'C2': ['D2'],
   276     ...                                            'D2': ['E2'],
   277     ...                                            'E2': ['D2'],
   278     ...                                            }))
   279     >>> len(cycles)
   280     2
   281     >>> for cycle in cycles:
   282     ...     cycle.sort()
   283     >>> cycles.sort()
   284     >>> cycle1 = cycles[0]
   285     >>> cycle1.sort()
   286     >>> print cycle1
   287     ['D1', 'E1']
   288     >>> cycle2 = cycles[1]
   289     >>> cycle2.sort()
   290     >>> print cycle2
   291     ['D2', 'E2']
   292 
   293     Simple cycle with children not specified for one item:
   294     # todo: Should this barf instead?
   295     >>> cycles = list(find_cycles(parent_children={'A': ['B'],
   296     ...                                            'B': ['A'],
   297     ...                                            'C': ['D']}))
   298     >>> len(cycles)
   299     1
   300     >>> cycle = cycles[0]
   301     >>> cycle.sort()
   302     >>> print cycle
   303     ['A', 'B']
   304 
   305     Diamond cycle
   306     >>> cycles = list(find_cycles(parent_children={'A': ['B1', 'B2'],
   307     ...                                            'B1': ['C'],
   308     ...                                            'B2': ['C'],
   309     ...                                            'C': ['A', 'B1']}))
   310     >>> len(cycles)
   311     3
   312     >>> sorted_cycles = []
   313     >>> for cycle in cycles:
   314     ...     cycle = list(cycle)
   315     ...     cycle.sort()
   316     ...     sorted_cycles.append(cycle)
   317     >>> sorted_cycles.sort()
   318     >>> for cycle in sorted_cycles:
   319     ...     print cycle
   320     ['A', 'B1', 'C']
   321     ['A', 'B2', 'C']
   322     ['B1', 'C']
   323 
   324     Hairy case (order can matter if something is wrong):
   325     (Note order of B and C in the list.)
   326     >>> cycles = list(find_cycles(parent_children={
   327     ...                                           'TD': ['DD'],
   328     ...                                           'TC': ['DC'],
   329     ...                                           'DC': ['DQ'],
   330     ...                                           'C': ['DQ'],
   331     ...                                           'DQ': ['IA', 'TO'],
   332     ...                                           'IA': ['A'],
   333     ...                                           'A': ['B', 'C'],
   334     ...                                           }))
   335     >>> len(cycles)
   336     1
   337     >>> cycle = cycles[0]
   338     >>> cycle.sort()
   339     >>> print cycle
   340     ['A', 'C', 'DQ', 'IA']
   341 
   342     """
   343     cycles = []
   344     visited_nodes = set()
   345 
   346     for parent in parent_children:
   347         if parent in visited_nodes:
   348             # This node is part of a path that has already been traversed.
   349             continue
   350 
   351         paths = [[parent]]
   352         while paths:
   353             path = paths.pop()
   354 
   355             parent = path[-1]
   356             
   357             try:
   358                 children = parent_children[parent]
   359             except KeyError:
   360                 continue
   361 
   362             for child in children:
   363                 # Keeping a set of the path nodes, for O(1) lookups at the
   364                 # expense of more memory and complexity, actually makes speed
   365                 # worse. (Due to construction of sets.)
   366                 # This is O(N).
   367                 if child in path:
   368                     # This is a cycle.
   369                     cycle = path[path.index(child):]
   370                     # Check that this is not a dup cycle.
   371                     is_dup = False
   372                     for other_cycle in cycles:
   373                         if is_rotated(other_cycle, cycle):
   374                             is_dup = True
   375                             break
   376                     if not is_dup:
   377                         cycles.append(cycle)
   378                         yield cycle
   379                 else:
   380                     # Push this new path onto the 'stack'.
   381                     # This is probably the most expensive part of the algorithm
   382                     # (a list copy).
   383                     paths.append(path + [child])
   384                     # Mark the node as visited.
   385                     visited_nodes.add(child)
   386 
   387 
   388 if __name__ == '__main__':
   389     # Run the doctest tests.
   390     import sys
   391     import doctest
   392     doctest.testmod(sys.modules['__main__'])