bindings/python/topsort.py
author Mathieu Lacage <mathieu.lacage@sophia.inria.fr>
Sat, 04 Jul 2009 08:15:48 +0200
changeset 4654 2eaebe77d66b
permissions -rw-r--r--
Added tag ns-3.5 for changeset c975274c9707
gjc@3408
     1
# topsort - dependency (topological) sorting and cycle finding functions
gjc@3408
     2
# Copyright (C) 2007 RADLogic
gjc@3408
     3
# 
gjc@3408
     4
# This library is free software; you can redistribute it and/or
gjc@3408
     5
# modify it under the terms of the GNU Lesser General Public
gjc@3408
     6
# License as published by the Free Software Foundation; 
gjc@3408
     7
# version 2.1 of the License.
gjc@3408
     8
# 
gjc@3408
     9
# This library is distributed in the hope that it will be useful,
gjc@3408
    10
# but WITHOUT ANY WARRANTY; without even the implied warranty of
gjc@3408
    11
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
gjc@3408
    12
# Lesser General Public License for more details.
gjc@3408
    13
#
gjc@3408
    14
# See http://www.fsf.org/licensing/licenses/lgpl.txt for full license text.
gjc@3408
    15
"""Provide toplogical sorting (i.e. dependency sorting) functions.
gjc@3408
    16
gjc@3408
    17
The topsort function is based on code posted on Usenet by Tim Peters.
gjc@3408
    18
gjc@3408
    19
Modifications:
gjc@3408
    20
- added doctests
gjc@3408
    21
- changed some bits to use current Python idioms
gjc@3408
    22
  (listcomp instead of filter, +=/-=, inherit from Exception)
gjc@3408
    23
- added a topsort_levels version that ports items in each dependency level
gjc@3408
    24
  into a sub-list
gjc@3408
    25
- added find_cycles to aid in cycle debugging
gjc@3408
    26
gjc@3408
    27
Run this module directly to run the doctests (unittests).
gjc@3408
    28
Make sure they all pass before checking in any modifications.
gjc@3408
    29
gjc@3408
    30
Requires Python >= 2.2
gjc@3408
    31
(For Python 2.2 also requires separate sets.py module)
gjc@3408
    32
gjc@3408
    33
This requires the rad_util.py module.
gjc@3408
    34
gjc@3408
    35
"""
gjc@3408
    36
gjc@3408
    37
# Provide support for Python 2.2*
gjc@3408
    38
from __future__ import generators
gjc@3408
    39
gjc@3408
    40
__version__ = '$Revision: 0.9 $'
gjc@3408
    41
__date__ = '$Date: 2007/03/27 04:15:26 $'
gjc@3408
    42
__credits__ = '''Tim Peters -- original topsort code
gjc@3408
    43
Tim Wegener -- doctesting, updating to current idioms, topsort_levels,
gjc@3408
    44
               find_cycles
gjc@3408
    45
'''
gjc@3408
    46
gjc@3408
    47
# Make Python 2.3 sets look like Python 2.4 sets.
gjc@3408
    48
try:
gjc@3408
    49
    set
gjc@3408
    50
except NameError:
gjc@3408
    51
    from sets import Set as set
gjc@3408
    52
gjc@3408
    53
from rad_util import is_rotated
gjc@3408
    54
gjc@3408
    55
gjc@3408
    56
class CycleError(Exception):
gjc@3408
    57
    """Cycle Error"""
gjc@3408
    58
    pass
gjc@3408
    59
gjc@3408
    60
gjc@3408
    61
def topsort(pairlist):
gjc@3408
    62
    """Topologically sort a list of (parent, child) pairs.
gjc@3408
    63
gjc@3408
    64
    Return a list of the elements in dependency order (parent to child order).
gjc@3408
    65
gjc@3408
    66
    >>> print topsort( [(1,2), (3,4), (5,6), (1,3), (1,5), (1,6), (2,5)] ) 
gjc@3408
    67
    [1, 2, 3, 5, 4, 6]
gjc@3408
    68
gjc@3408
    69
    >>> print topsort( [(1,2), (1,3), (2,4), (3,4), (5,6), (4,5)] )
gjc@3408
    70
    [1, 2, 3, 4, 5, 6]
gjc@3408
    71
gjc@3408
    72
    >>> print topsort( [(1,2), (2,3), (3,2)] )
gjc@3408
    73
    Traceback (most recent call last):
gjc@3408
    74
    CycleError: ([1], {2: 1, 3: 1}, {2: [3], 3: [2]})
gjc@3408
    75
    
gjc@3408
    76
    """
gjc@3408
    77
    num_parents = {}  # element -> # of predecessors 
gjc@3408
    78
    children = {}  # element -> list of successors 
gjc@3408
    79
    for parent, child in pairlist: 
gjc@3408
    80
        # Make sure every element is a key in num_parents.
gjc@3408
    81
        if not num_parents.has_key( parent ): 
gjc@3408
    82
            num_parents[parent] = 0 
gjc@3408
    83
        if not num_parents.has_key( child ): 
gjc@3408
    84
            num_parents[child] = 0 
gjc@3408
    85
gjc@3408
    86
        # Since child has a parent, increment child's num_parents count.
gjc@3408
    87
        num_parents[child] += 1
gjc@3408
    88
gjc@3408
    89
        # ... and parent gains a child.
gjc@3408
    90
        children.setdefault(parent, []).append(child)
gjc@3408
    91
gjc@3408
    92
    # Suck up everything without a parent.
gjc@3408
    93
    answer = [x for x in num_parents.keys() if num_parents[x] == 0]
gjc@3408
    94
gjc@3408
    95
    # For everything in answer, knock down the parent count on its children.
gjc@3408
    96
    # Note that answer grows *in* the loop.
gjc@3408
    97
    for parent in answer: 
gjc@3408
    98
        del num_parents[parent]
gjc@3408
    99
        if children.has_key( parent ): 
gjc@3408
   100
            for child in children[parent]: 
gjc@3408
   101
                num_parents[child] -= 1
gjc@3408
   102
                if num_parents[child] == 0: 
gjc@3408
   103
                    answer.append( child ) 
gjc@3408
   104
            # Following "del" isn't needed; just makes 
gjc@3408
   105
            # CycleError details easier to grasp.
gjc@3408
   106
            del children[parent]
gjc@3408
   107
gjc@3408
   108
    if num_parents: 
gjc@3408
   109
        # Everything in num_parents has at least one child -> 
gjc@3408
   110
        # there's a cycle.
gjc@3408
   111
        raise CycleError(answer, num_parents, children)
gjc@3408
   112
    return answer 
gjc@3408
   113
gjc@3408
   114
def topsort_levels(pairlist):
gjc@3408
   115
    """Topologically sort a list of (parent, child) pairs into depth levels.
gjc@3408
   116
gjc@3408
   117
    This returns a generator. 
gjc@3408
   118
    Turn this into a an iterator using the iter built-in function.
gjc@3408
   119
    (if you iterate over the iterator, each element gets generated when
gjc@3408
   120
    it is asked for, rather than generating the whole list up-front.)
gjc@3408
   121
gjc@3408
   122
    Each generated element is a list of items at that dependency level.
gjc@3408
   123
gjc@3408
   124
    >>> dependency_pairs = [(1,2), (3,4), (5,6), (1,3), (1,5), (1,6), (2,5)]
gjc@3408
   125
    >>> for level in iter(topsort_levels( dependency_pairs )):
gjc@3408
   126
    ...    print level
gjc@3408
   127
    [1]
gjc@3408
   128
    [2, 3]
gjc@3408
   129
    [4, 5]
gjc@3408
   130
    [6]
gjc@3408
   131
gjc@3408
   132
    >>> dependency_pairs = [(1,2), (1,3), (2,4), (3,4), (5,6), (4,5)]
gjc@3408
   133
    >>> for level in iter(topsort_levels( dependency_pairs )):
gjc@3408
   134
    ...    print level
gjc@3408
   135
    [1]
gjc@3408
   136
    [2, 3]
gjc@3408
   137
    [4]
gjc@3408
   138
    [5]
gjc@3408
   139
    [6]
gjc@3408
   140
gjc@3408
   141
    >>> dependency_pairs = [(1,2), (2,3), (3,4), (4, 3)]
gjc@3408
   142
    >>> try:
gjc@3408
   143
    ...     for level in iter(topsort_levels( dependency_pairs )):
gjc@3408
   144
    ...         print level
gjc@3408
   145
    ... except CycleError, exc:
gjc@3408
   146
    ...     print 'CycleError:', exc
gjc@3408
   147
    [1]
gjc@3408
   148
    [2]
gjc@3408
   149
    CycleError: ({3: 1, 4: 1}, {3: [4], 4: [3]})
gjc@3408
   150
gjc@3408
   151
gjc@3408
   152
    The cycle error should look like.
gjc@3408
   153
    CycleError: ({3: 1, 4: 1}, {3: [4], 4: [3]})
gjc@3408
   154
    # todo: Make the doctest more robust (i.e. handle arbitrary dict order).
gjc@3408
   155
gjc@3408
   156
    """
gjc@3408
   157
    num_parents = {}  # element -> # of predecessors 
gjc@3408
   158
    children = {}  # element -> list of successors 
gjc@3408
   159
    for parent, child in pairlist: 
gjc@3408
   160
        # Make sure every element is a key in num_parents.
gjc@3408
   161
        if not num_parents.has_key( parent ): 
gjc@3408
   162
            num_parents[parent] = 0 
gjc@3408
   163
        if not num_parents.has_key( child ): 
gjc@3408
   164
            num_parents[child] = 0 
gjc@3408
   165
gjc@3408
   166
        # Since child has a parent, increment child's num_parents count.
gjc@3408
   167
        num_parents[child] += 1
gjc@3408
   168
gjc@3408
   169
        # ... and parent gains a child.
gjc@3408
   170
        children.setdefault(parent, []).append(child)
gjc@3408
   171
gjc@3408
   172
    return topsort_levels_core(num_parents, children)
gjc@3408
   173
gjc@3408
   174
def topsort_levels_core(num_parents, children):
gjc@3408
   175
    """Topologically sort a bunch of interdependent items based on dependency.
gjc@3408
   176
gjc@3408
   177
    This returns a generator.
gjc@3408
   178
    Turn this into a an iterator using the iter built-in function.
gjc@3408
   179
    (if you iterate over the iterator, each element gets generated when
gjc@3408
   180
    it is asked for, rather than generating the whole list up-front.)
gjc@3408
   181
gjc@3408
   182
    Each generated element is a list of items at that dependency level.
gjc@3408
   183
gjc@3408
   184
    >>> list(topsort_levels_core(
gjc@3408
   185
    ...          {1: 0, 2: 1, 3: 1, 4: 1, 5: 2, 6: 2},
gjc@3408
   186
    ...          {1: [2, 3, 5, 6], 2: [5], 3: [4], 4: [], 5: [6]}))
gjc@3408
   187
    [[1], [2, 3], [4, 5], [6]]
gjc@3408
   188
gjc@3408
   189
    >>> list(topsort_levels_core(
gjc@3408
   190
    ...          {1: 0, 2: 2, 3: 1},
gjc@3408
   191
    ...          {1: [2], 2: [3], 3: [2]}))
gjc@3408
   192
    Traceback (most recent call last):
gjc@3408
   193
    CycleError: ({2: 1, 3: 1}, {2: [3], 3: [2]})
gjc@3408
   194
gjc@3408
   195
    This function has a more complicated interface than topsort_levels,
gjc@3408
   196
    but is useful if the data is easier to generate in this form.
gjc@3408
   197
gjc@3408
   198
    Arguments:
gjc@3408
   199
    num_parents -- key: item, value: number of parents (predecessors)
gjc@3408
   200
    children -- key: item, value: list of children (successors)
gjc@3408
   201
gjc@3408
   202
    """
gjc@3408
   203
    while 1:
gjc@3408
   204
        # Suck up everything without a predecessor.
gjc@3408
   205
        level_parents = [x for x in num_parents.keys() if num_parents[x] == 0]
gjc@3408
   206
gjc@3408
   207
        if not level_parents:
gjc@3408
   208
            break
gjc@3408
   209
gjc@3408
   210
        # Offer the next generated item,
gjc@3408
   211
        # which is a list of the items at this dependency level.
gjc@3408
   212
        yield level_parents
gjc@3408
   213
gjc@3408
   214
        # For everything item in this level,
gjc@3408
   215
        # decrement the parent count,
gjc@3408
   216
        # since we have accounted for its parent.
gjc@3408
   217
        for level_parent in level_parents:
gjc@3408
   218
gjc@3408
   219
            del num_parents[level_parent]
gjc@3408
   220
gjc@3408
   221
            if children.has_key(level_parent):
gjc@3408
   222
                for level_parent_child in children[level_parent]:
gjc@3408
   223
                    num_parents[level_parent_child] -= 1
gjc@3408
   224
                del children[level_parent]
gjc@3408
   225
        
gjc@3408
   226
    if num_parents: 
gjc@3408
   227
        # Everything in num_parents has at least one child -> 
gjc@3408
   228
        # there's a cycle.
gjc@3408
   229
        raise CycleError(num_parents, children)
gjc@3408
   230
    else:
gjc@3408
   231
        # This is the end of the generator.
gjc@3408
   232
        raise StopIteration
gjc@3408
   233
gjc@3408
   234
gjc@3408
   235
def find_cycles(parent_children):
gjc@3408
   236
    """Yield cycles. Each result is a list of items comprising a cycle.
gjc@3408
   237
gjc@3408
   238
    Use a 'stack' based approach to find all the cycles.
gjc@3408
   239
    This is a generator, so yields each cycle as it finds it.
gjc@3408
   240
gjc@3408
   241
    It is implicit that the last item in each cycle list is a parent of the
gjc@3408
   242
    first item (thereby forming a cycle).
gjc@3408
   243
gjc@3408
   244
    Arguments:
gjc@3408
   245
    parent_children -- parent -> collection of children
gjc@3408
   246
gjc@3408
   247
    Simplest cycle:
gjc@3408
   248
    >>> cycles = list(find_cycles({'A': ['B'], 'B': ['A']}))
gjc@3408
   249
    >>> len(cycles)
gjc@3408
   250
    1
gjc@3408
   251
    >>> cycle = cycles[0]
gjc@3408
   252
    >>> cycle.sort()
gjc@3408
   253
    >>> print cycle
gjc@3408
   254
    ['A', 'B']
gjc@3408
   255
gjc@3408
   256
    Simplest cycle with extra baggage at the start and the end:
gjc@3408
   257
    >>> cycles = list(find_cycles(parent_children={'A': ['B'],
gjc@3408
   258
    ...                                            'B': ['C'],
gjc@3408
   259
    ...                                            'C': ['B', 'D'],
gjc@3408
   260
    ...                                            'D': [],
gjc@3408
   261
    ...                                            }))
gjc@3408
   262
    >>> len(cycles)
gjc@3408
   263
    1
gjc@3408
   264
    >>> cycle = cycles[0]
gjc@3408
   265
    >>> cycle.sort()
gjc@3408
   266
    >>> print cycle
gjc@3408
   267
    ['B', 'C']
gjc@3408
   268
gjc@3408
   269
    Double cycle:
gjc@3408
   270
    >>> cycles = list(find_cycles(parent_children={'A': ['B'],
gjc@3408
   271
    ...                                            'B': ['C1', 'C2'],
gjc@3408
   272
    ...                                            'C1': ['D1'],
gjc@3408
   273
    ...                                            'D1': ['E1'],
gjc@3408
   274
    ...                                            'E1': ['D1'],
gjc@3408
   275
    ...                                            'C2': ['D2'],
gjc@3408
   276
    ...                                            'D2': ['E2'],
gjc@3408
   277
    ...                                            'E2': ['D2'],
gjc@3408
   278
    ...                                            }))
gjc@3408
   279
    >>> len(cycles)
gjc@3408
   280
    2
gjc@3408
   281
    >>> for cycle in cycles:
gjc@3408
   282
    ...     cycle.sort()
gjc@3408
   283
    >>> cycles.sort()
gjc@3408
   284
    >>> cycle1 = cycles[0]
gjc@3408
   285
    >>> cycle1.sort()
gjc@3408
   286
    >>> print cycle1
gjc@3408
   287
    ['D1', 'E1']
gjc@3408
   288
    >>> cycle2 = cycles[1]
gjc@3408
   289
    >>> cycle2.sort()
gjc@3408
   290
    >>> print cycle2
gjc@3408
   291
    ['D2', 'E2']
gjc@3408
   292
gjc@3408
   293
    Simple cycle with children not specified for one item:
gjc@3408
   294
    # todo: Should this barf instead?
gjc@3408
   295
    >>> cycles = list(find_cycles(parent_children={'A': ['B'],
gjc@3408
   296
    ...                                            'B': ['A'],
gjc@3408
   297
    ...                                            'C': ['D']}))
gjc@3408
   298
    >>> len(cycles)
gjc@3408
   299
    1
gjc@3408
   300
    >>> cycle = cycles[0]
gjc@3408
   301
    >>> cycle.sort()
gjc@3408
   302
    >>> print cycle
gjc@3408
   303
    ['A', 'B']
gjc@3408
   304
gjc@3408
   305
    Diamond cycle
gjc@3408
   306
    >>> cycles = list(find_cycles(parent_children={'A': ['B1', 'B2'],
gjc@3408
   307
    ...                                            'B1': ['C'],
gjc@3408
   308
    ...                                            'B2': ['C'],
gjc@3408
   309
    ...                                            'C': ['A', 'B1']}))
gjc@3408
   310
    >>> len(cycles)
gjc@3408
   311
    3
gjc@3408
   312
    >>> sorted_cycles = []
gjc@3408
   313
    >>> for cycle in cycles:
gjc@3408
   314
    ...     cycle = list(cycle)
gjc@3408
   315
    ...     cycle.sort()
gjc@3408
   316
    ...     sorted_cycles.append(cycle)
gjc@3408
   317
    >>> sorted_cycles.sort()
gjc@3408
   318
    >>> for cycle in sorted_cycles:
gjc@3408
   319
    ...     print cycle
gjc@3408
   320
    ['A', 'B1', 'C']
gjc@3408
   321
    ['A', 'B2', 'C']
gjc@3408
   322
    ['B1', 'C']
gjc@3408
   323
gjc@3408
   324
    Hairy case (order can matter if something is wrong):
gjc@3408
   325
    (Note order of B and C in the list.)
gjc@3408
   326
    >>> cycles = list(find_cycles(parent_children={
gjc@3408
   327
    ...                                           'TD': ['DD'],
gjc@3408
   328
    ...                                           'TC': ['DC'],
gjc@3408
   329
    ...                                           'DC': ['DQ'],
gjc@3408
   330
    ...                                           'C': ['DQ'],
gjc@3408
   331
    ...                                           'DQ': ['IA', 'TO'],
gjc@3408
   332
    ...                                           'IA': ['A'],
gjc@3408
   333
    ...                                           'A': ['B', 'C'],
gjc@3408
   334
    ...                                           }))
gjc@3408
   335
    >>> len(cycles)
gjc@3408
   336
    1
gjc@3408
   337
    >>> cycle = cycles[0]
gjc@3408
   338
    >>> cycle.sort()
gjc@3408
   339
    >>> print cycle
gjc@3408
   340
    ['A', 'C', 'DQ', 'IA']
gjc@3408
   341
gjc@3408
   342
    """
gjc@3408
   343
    cycles = []
gjc@3408
   344
    visited_nodes = set()
gjc@3408
   345
gjc@3408
   346
    for parent in parent_children:
gjc@3408
   347
        if parent in visited_nodes:
gjc@3408
   348
            # This node is part of a path that has already been traversed.
gjc@3408
   349
            continue
gjc@3408
   350
gjc@3408
   351
        paths = [[parent]]
gjc@3408
   352
        while paths:
gjc@3408
   353
            path = paths.pop()
gjc@3408
   354
gjc@3408
   355
            parent = path[-1]
gjc@3408
   356
            
gjc@3408
   357
            try:
gjc@3408
   358
                children = parent_children[parent]
gjc@3408
   359
            except KeyError:
gjc@3408
   360
                continue
gjc@3408
   361
gjc@3408
   362
            for child in children:
gjc@3408
   363
                # Keeping a set of the path nodes, for O(1) lookups at the
gjc@3408
   364
                # expense of more memory and complexity, actually makes speed
gjc@3408
   365
                # worse. (Due to construction of sets.)
gjc@3408
   366
                # This is O(N).
gjc@3408
   367
                if child in path:
gjc@3408
   368
                    # This is a cycle.
gjc@3408
   369
                    cycle = path[path.index(child):]
gjc@3408
   370
                    # Check that this is not a dup cycle.
gjc@3408
   371
                    is_dup = False
gjc@3408
   372
                    for other_cycle in cycles:
gjc@3408
   373
                        if is_rotated(other_cycle, cycle):
gjc@3408
   374
                            is_dup = True
gjc@3408
   375
                            break
gjc@3408
   376
                    if not is_dup:
gjc@3408
   377
                        cycles.append(cycle)
gjc@3408
   378
                        yield cycle
gjc@3408
   379
                else:
gjc@3408
   380
                    # Push this new path onto the 'stack'.
gjc@3408
   381
                    # This is probably the most expensive part of the algorithm
gjc@3408
   382
                    # (a list copy).
gjc@3408
   383
                    paths.append(path + [child])
gjc@3408
   384
                    # Mark the node as visited.
gjc@3408
   385
                    visited_nodes.add(child)
gjc@3408
   386
gjc@3408
   387
gjc@3408
   388
if __name__ == '__main__':
gjc@3408
   389
    # Run the doctest tests.
gjc@3408
   390
    import sys
gjc@3408
   391
    import doctest
gjc@3408
   392
    doctest.testmod(sys.modules['__main__'])