1 # Copyright (c) 2007 RADLogic
3 # Permission is hereby granted, free of charge, to any person obtaining a copy
4 # of this software and associated documentation files (the "Software"), to deal
5 # in the Software without restriction, including without limitation the rights
6 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 # copies of the Software, and to permit persons to whom the Software is
8 # furnished to do so, subject to the following conditions:
10 # The above copyright notice and this permission notice shall be included in
11 # all copies or substantial portions of the Software.
13 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 """Provide various handy Python functions.
22 Running this script directly will execute the doctests.
25 int2bin(i, n) -- Convert integer to binary string.
26 bin2int(bin_string) -- Convert binary string to integer.
27 reverse(input_string) -- Reverse a string.
28 transpose(matrix) -- Transpose a list of lists.
29 polygon_area(points_list) -- Calculate the area of an arbitrary polygon.
30 timestamp() -- Return string containing current time stamp.
31 pt2str(point) -- Return prettier string version of point tuple.
32 gcf(a, b) -- Return the greatest common factor of two numbers.
33 lcm(a, b) -- Return the least common multiple of two numbers.
34 permutations(input_list) -- Generate all permutations of a list of items.
35 reduce_fraction(fraction) -- Reduce fraction (num, denom) to simplest form.
36 quantile(l, p) -- Return p quantile of list l. E.g. p=0.25 for q1.
37 trim(l) -- Discard values in list more than 1.5*IQR outside IQR.
38 nice_units(value) -- Return value converted to human readable units.
39 uniquify(seq) -- Return sequence with duplicate items in sequence seq removed.
40 reverse_dict(d) -- Return the dictionary with the items as keys and vice-versa.
41 lsb(x, n) -- Return the n least significant bits of x.
42 gray_encode(i) -- Gray encode the given integer.
43 random_vec(bits, max_value=None) -- Return a random binary vector.
44 binary_range(bits) -- Return list of all possible binary numbers width=bits.
45 float_range([start], stop, [step]) -- Return range of floats.
46 find_common_fixes(s1, s2) -- Find common (prefix, suffix) of two strings.
47 is_rotated(seq1, seq2) -- Return true if the list is a rotation of other list.
48 getmodule(obj) -- Return the module that contains the object definition of obj.
49 (use inspect.getmodule instead, though)
50 get_args(argv) -- Store command-line args in a dictionary.
52 This module requires Python >= 2.2
55 __author__ = 'Tim Wegener <twegener@radlogic.com.au>'
56 __date__ = '$Date: 2007/03/27 03:15:06 $'
57 __version__ = '$Revision: 0.45 $'
59 David Chandler, for polygon area algorithm.
60 (http://www.davidchandler.com/AreaOfAGeneralPolygon.pdf)
71 True, False = (1==1, 0==1)
75 """Convert decimal integer i to n-bit binary number (string).
87 Traceback (most recent call last):
88 ValueError: Value too large for given number of bits.
91 hex2bin = {'0': '0000', '1': '0001', '2': '0010', '3': '0011',
92 '4': '0100', '5': '0101', '6': '0110', '7': '0111',
93 '8': '1000', '9': '1001', 'a': '1010', 'b': '1011',
94 'c': '1100', 'd': '1101', 'e': '1110', 'f': '1111'}
95 # Convert to hex then map each hex digit to binary equivalent.
96 result = ''.join([hex2bin[x] for x in hex(i).lower().replace('l','')[2:]])
98 # Shrink result to appropriate length.
99 # Raise an error if the value is changed by the truncation.
100 if '1' in result[:-n]:
101 raise ValueError("Value too large for given number of bits.")
103 # Zero-pad if length longer than mapped result.
104 result = '0'*(n-len(result)) + result
108 def bin2int(bin_string):
109 """Convert binary number string to decimal integer.
111 Note: Python > v2 has int(bin_string, 2)
121 ## bin_list = list(bin_string)
122 ## if len(filter(lambda x: x in ('1','0'), bin_list)) < len(bin_list):
123 ## raise Exception ("bin2int: Error - not a binary number: %s"
125 ## bit_list = map(int, bin_list)
126 ## bit_list.reverse() # Make most significant bit have highest index.
127 ## for bit_place in range(len(bit_list)):
128 ## result = result + ((2**bit_place) * bit_list[bit_place])
130 return int(bin_string, 2)
133 def reverse(input_string):
134 """Reverse a string. Useful for strings of binary numbers.
140 str_list = list(input_string)
142 return ''.join(str_list)
145 def transpose(matrix):
146 """Transpose a list of lists.
148 >>> transpose([['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']])
149 [['a', 'd', 'g'], ['b', 'e', 'h'], ['c', 'f', 'i']]
151 >>> transpose([['a', 'b', 'c'], ['d', 'e', 'f']])
152 [['a', 'd'], ['b', 'e'], ['c', 'f']]
154 >>> transpose([['a', 'b'], ['d', 'e'], ['g', 'h']])
155 [['a', 'd', 'g'], ['b', 'e', 'h']]
158 result = zip(*matrix)
159 # Convert list of tuples to list of lists.
160 # map is faster than a list comprehension since it is being used with
161 # a built-in function as an argument.
162 result = map(list, result)
166 def polygon_area(points_list, precision=100):
167 """Calculate area of an arbitrary polygon using an algorithm from the web.
169 Return the area of the polygon as a positive float.
172 points_list -- list of point tuples [(x0, y0), (x1, y1), (x2, y2), ...]
173 (Unclosed polygons will be closed automatically.
174 precision -- Internal arithmetic precision (integer arithmetic).
176 >>> polygon_area([(0, 0), (0, 1), (1, 1), (1, 2), (2, 2), (2, 0), (0, 0)])
180 Area of a General Polygon by David Chandler
181 http://www.davidchandler.com/AreaOfAGeneralPolygon.pdf
184 # Scale up co-ordinates and convert them to integers.
185 for i in range(len(points_list)):
186 points_list[i] = (int(points_list[i][0] * precision),
187 int(points_list[i][1] * precision))
188 # Close polygon if not closed.
189 if points_list[-1] != points_list[0]:
190 points_list.append(points_list[0])
193 for i in range(len(points_list)-1):
194 (x_i, y_i) = points_list[i]
195 (x_i_plus_1, y_i_plus_1) = points_list[i+1]
196 area = area + (x_i_plus_1 * y_i) - (y_i_plus_1 * x_i)
199 area = float(area)/(precision**2)
204 """Return string containing current time stamp.
206 Note: In Python 2 onwards can use time.asctime() with no arguments.
210 return time.asctime()
214 """Return prettier string version of point tuple.
216 >>> pt2str((1.8, 1.9))
220 return "(%s, %s)" % (str(point[0]), str(point[1]))
223 def gcf(a, b, epsilon=1e-16):
224 """Return the greatest common factor of a and b, using Euclidean algorithm.
228 If both numbers are integers return an integer result,
229 otherwise return a float result.
230 epsilon -- floats less than this magnitude are considered to be zero
247 By (a convenient) definition:
253 remainder = min(a, b)
254 while remainder and abs(remainder) > epsilon:
255 new_remainder = result % remainder
257 remainder = new_remainder
260 def lcm(a, b, precision=None):
261 """Return the least common multiple of a and b, using the gcf function.
264 a, b -- two numbers. If both are integers return an integer result,
265 otherwise a return a float result.
266 precision -- scaling factor if a and/or b are floats.
274 >>> str(lcm(1.5e-8, 2.5e-8, precision=1e9))
277 By (an arbitary) definition:
282 # Note: Dummy precision argument is for backwards compatibility.
283 # Do the division first.
284 # (See http://en.wikipedia.org/wiki/Least_common_multiple )
289 result = a * (b / denom)
293 def permutations(input_list):
294 """Return a list containing all permutations of the input list.
296 Note: This is a recursive function.
298 >>> perms = permutations(['a', 'b', 'c'])
300 >>> for perm in perms:
311 if len(input_list) > 1:
312 # Extract first item in list.
314 # Find all permutations of remainder of list. (Recursive call.)
315 sub_lists = permutations(input_list[1:])
316 # For every permutation of the sub list...
317 for sub_list in sub_lists:
318 # Insert the extracted first item at every position of the list.
319 for i in range(len(input_list)):
320 new_list = sub_list[:]
321 new_list.insert(i, item)
322 out_lists.append(new_list)
324 # Termination condition: only one item in input list.
325 out_lists = [input_list]
329 def reduce_fraction(fraction):
330 """Reduce fraction tuple to simplest form. fraction=(num, denom)
332 >>> reduce_fraction((14, 7))
335 >>> reduce_fraction((-2, 4))
338 >>> reduce_fraction((0, 4))
341 >>> reduce_fraction((4, 0))
345 (numerator, denominator) = fraction
346 common_factor = abs(gcf(numerator, denominator))
347 result = (numerator/common_factor, denominator/common_factor)
352 """Return p quantile of list l. E.g. p=0.25 for q1.
355 http://rweb.stat.umn.edu/R/library/base/html/quantile.html
361 r = 1 + ((n - 1) * p)
365 result = (1-f)*l_sort[i-1] + f*l_sort[i]
372 """Discard values in list more than 1.5*IQR outside IQR.
374 (IQR is inter-quartile-range)
376 This function uses rad_util.quantile
378 1.5*IQR -- mild outlier
379 3*IQR -- extreme outlier
382 http://wind.cc.whecn.edu/~pwildman/statnew/section_7_-_exploratory_data_analysis.htm
387 # Calculate medianscore (based on stats.py lmedianscore by Gary Strangman)
388 if len(l_sort) % 2 == 0:
389 # If even number of scores, average middle 2.
390 index = int(len(l_sort) / 2) # Integer division correct
391 median = float(l_sort[index] + l_sort[index-1]) / 2
393 # int divsion gives mid value when count from 0
394 index = int(len(l_sort) / 2)
395 median = l_sort[index]
397 q1 = quantile(l_sort, 0.25)
398 q3 = quantile(l_sort, 0.75)
400 iqr_extra = iqr * 1.5
401 def in_interval(x, i=iqr_extra, q1=q1, q3=q3):
402 return (x >= q1-i and x <= q3+i)
403 l_trimmed = [x for x in l_sort if in_interval(x)]
407 def nice_units(value, dp=0, sigfigs=None, suffix='', space=' ',
408 use_extra_prefixes=False, use_full_name=False, mode='si'):
409 """Return value converted to human readable units eg milli, micro, etc.
412 value -- number in base units
413 dp -- number of decimal places to display (rounded)
414 sigfigs -- number of significant figures to display (rounded)
415 This overrides dp if set.
416 suffix -- optional unit suffix to append to unit multiplier
417 space -- seperator between value and unit multiplier (default: ' ')
418 use_extra_prefixes -- use hecto, deka, deci and centi as well if set.
420 use_full_name -- use full name for multiplier symbol,
421 e.g. milli instead of m
423 mode -- 'si' for SI prefixes, 'bin' for binary multipliers (1024, etc.)
427 http://physics.nist.gov/cuu/Units/prefixes.html
428 (Greek mu changed to u.)
429 Binary prefixes based on:
430 http://physics.nist.gov/cuu/Units/binary.html
432 >>> nice_units(2e-11)
435 >>> nice_units(2e-11, space='')
439 si_prefixes = {1e24: ('Y', 'yotta'),
440 1e21: ('Z', 'zetta'),
447 1e-3: ('m', 'milli'),
448 1e-6: ('u', 'micro'),
450 1e-12: ('p', 'pico'),
451 1e-15: ('f', 'femto'),
452 1e-18: ('a', 'atto'),
453 1e-21: ('z', 'zepto'),
454 1e-24: ('y', 'yocto')
456 if use_extra_prefixes:
457 si_prefixes.update({1e2: ('h', 'hecto'),
462 bin_prefixes = {2**10: ('K', 'kilo'),
463 2**20: ('M', 'mega'),
464 2**30: ('G', 'mega'),
465 2**40: ('T', 'tera'),
466 2**50: ('P', 'peta'),
470 prefixes = bin_prefixes
472 prefixes = si_prefixes
473 prefixes[1] = ('', '') # Unity.
474 # Determine appropriate multiplier.
475 multipliers = prefixes.keys()
478 for i in range(len(multipliers) - 1):
479 lower_mult = multipliers[i]
480 upper_mult = multipliers[i+1]
481 if lower_mult <= value < upper_mult:
485 if value < multipliers[0]:
487 elif value >= multipliers[-1]:
488 mult_i = len(multipliers) - 1
489 mult = multipliers[mult_i]
490 # Convert value for this multiplier.
491 new_value = value / mult
492 # Deal with special case due to rounding.
494 if mult_i < (len(multipliers) - 1) and \
495 round(new_value, dp) == \
496 round((multipliers[mult_i+1] / mult), dp):
497 mult = multipliers[mult_i + 1]
498 new_value = value / mult
499 # Concatenate multiplier symbol.
504 # Round and truncate to appropriate precision.
506 str_value = eval('"%.'+str(dp)+'f" % new_value', locals(), {})
508 str_value = eval('"%.'+str(sigfigs)+'g" % new_value', locals(), {})
509 return str_value + space + prefixes[mult][label_type] + suffix
512 def uniquify(seq, preserve_order=False):
513 """Return sequence with duplicate items in sequence seq removed.
515 The code is based on usenet post by Tim Peters.
517 This code is O(N) if the sequence items are hashable, O(N**2) if not.
519 Peter Bengtsson has a blog post with an empirical comparison of other
521 http://www.peterbe.com/plog/uniqifiers-benchmark
523 If order is not important and the sequence items are hashable then
524 list(set(seq)) is readable and efficient.
526 If order is important and the sequence items are hashable generator
527 expressions can be used (in py >= 2.4) (useful for large sequences):
529 do_something(x for x in seq if x not in seen or seen.add(x))
533 preserve_order -- if not set the order will be arbitrary
534 Using this option will incur a speed penalty.
537 Example showing order preservation:
539 >>> uniquify(['a', 'aa', 'b', 'b', 'ccc', 'ccc', 'd'], preserve_order=True)
540 ['a', 'aa', 'b', 'ccc', 'd']
542 Example using a sequence of un-hashable items:
544 >>> uniquify([['z'], ['x'], ['y'], ['z']], preserve_order=True)
545 [['z'], ['x'], ['y']]
547 The sorted output or the non-order-preserving approach should equal
548 that of the sorted order-preserving approach output:
550 >>> unordered = uniquify([3, 3, 1, 2], preserve_order=False)
552 >>> ordered = uniquify([3, 3, 1, 2], preserve_order=True)
556 >>> int(ordered == unordered)
561 # Attempt fast algorithm.
564 # This is based on Dave Kirby's method (f8) noted in the post:
565 # http://www.peterbe.com/plog/uniqifiers-benchmark
566 return [x for x in seq if (x not in d) and not d.__setitem__(x, 0)]
572 # Have an unhashable object, so use slow algorithm.
580 # Alias to noun form for backward compatibility.
585 """Reverse a dictionary so the items become the keys and vice-versa.
587 Note: The results will be arbitrary if the items are not unique.
589 >>> d = reverse_dict({'a': 1, 'b': 2})
590 >>> d_items = d.items()
597 for key, value in d.items():
603 """Return the n least significant bits of x.
609 return x & ((2 ** n) - 1)
613 """Gray encode the given integer."""
618 def random_vec(bits, max_value=None):
619 """Generate a random binary vector of length bits and given max value."""
622 for _ in range(int(bits / 10) + 1):
623 i = int((2**10) * random.random())
624 vector += int2bin(i, 10)
626 if max_value and (max_value < 2 ** bits - 1):
627 vector = int2bin((int(vector, 2) / (2 ** bits - 1)) * max_value, bits)
629 return vector[0:bits]
632 def binary_range(bits):
633 """Return a list of all possible binary numbers in order with width=bits.
635 It would be nice to extend it to match the
636 functionality of python's range() built-in function.
642 toggle = [1] + [0] * bits
644 while toggle[bits] != 1:
647 l.append(''.join(v_copy))
649 toggle = [1] + [0]*bits
651 while i < bits and toggle[i] == 1:
663 def float_range(start, stop=None, step=None):
664 """Return a list containing an arithmetic progression of floats.
666 Return a list of floats between 0.0 (or start) and stop with an
669 This is in functionality to python's range() built-in function
670 but can accept float increments.
672 As with range(), stop is omitted from the list.
691 def find_common_fixes(s1, s2):
692 """Find common (prefix, suffix) of two strings.
694 >>> find_common_fixes('abc', 'def')
697 >>> find_common_fixes('abcelephantdef', 'abccowdef')
700 >>> find_common_fixes('abcelephantdef', 'abccow')
703 >>> find_common_fixes('elephantdef', 'abccowdef')
711 common_len = min(len(s1), len(s2))
712 while i < common_len:
720 while i < (common_len + 1):
724 suffix.append(s1[-i])
729 prefix = ''.join(prefix)
730 suffix = ''.join(suffix)
732 return (prefix, suffix)
735 def is_rotated(seq1, seq2):
736 """Return true if the first sequence is a rotation of the second sequence.
738 >>> seq1 = ['A', 'B', 'C', 'D']
739 >>> seq2 = ['C', 'D', 'A', 'B']
740 >>> int(is_rotated(seq1, seq2))
743 >>> seq2 = ['C', 'D', 'B', 'A']
744 >>> int(is_rotated(seq1, seq2))
747 >>> seq1 = ['A', 'B', 'C', 'A']
748 >>> seq2 = ['A', 'A', 'B', 'C']
749 >>> int(is_rotated(seq1, seq2))
752 >>> seq2 = ['A', 'B', 'C', 'A']
753 >>> int(is_rotated(seq1, seq2))
756 >>> seq2 = ['A', 'A', 'C', 'B']
757 >>> int(is_rotated(seq1, seq2))
762 if len(seq1) != len(seq2):
764 # Look for occurrences of second sequence head item in first sequence.
767 for index1 in range(len(seq1)):
768 if seq1[index1] == head_item:
769 start_indexes.append(index1)
770 # Check that wrapped sequence matches.
771 double_seq1 = seq1 + seq1
772 for index1 in start_indexes:
773 if double_seq1[index1:index1+len(seq1)] == seq2:
778 """Return the module that contains the object definition of obj.
780 Note: Use inspect.getmodule instead.
783 obj -- python obj, generally a class or a function
788 >>> module = getmodule(random.choice)
795 >>> module = getmodule(random.Random)
801 A class inheriting from a class in another module:
802 (note: The inheriting class must define at least one function.)
803 >>> class MyRandom(random.Random):
806 >>> module = getmodule(MyRandom)
807 >>> if __name__ == '__main__':
808 ... name = 'rad_util'
810 ... name = module.__name__
813 >>> module is sys.modules[__name__]
817 This approach is slightly hackish, and won't work in various situations.
818 However, this was the approach recommended by GvR, so it's as good as
821 See GvR's post in this thread:
822 http://groups.google.com.au/group/comp.lang.python/browse_thread/thread/966a7bdee07e3b34/c3cab3f41ea84236?lnk=st&q=python+determine+class+module&rnum=4&hl=en#c3cab3f41ea84236
825 if hasattr(obj, 'func_globals'):
830 for item in obj.__dict__.values():
831 if hasattr(item, 'func_globals'):
835 raise ValueError("No functions attached to object: %r" % obj)
836 module_name = func.func_globals['__name__']
838 module = sys.modules[module_name]
842 def round_grid(value, grid, mode=0):
843 """Round off the given value to the given grid size.
846 value -- value to be roudne
847 grid -- result must be a multiple of this
848 mode -- 0 nearest, 1 up, -1 down
852 >>> round_grid(7.5, 5)
855 >>> round_grid(7.5, 5, mode=-1)
858 >>> round_grid(7.3, 5, mode=1)
861 >>> round_grid(7.3, 5.0, mode=1)
865 off_grid = value % grid
867 add_one = int(off_grid >= (grid / 2.0))
868 elif mode == 1 and off_grid:
870 elif mode == -1 and off_grid:
872 result = ((int(value / grid) + add_one) * grid)
877 """Store command-line args in a dictionary.
879 -, -- prefixes are removed
880 Items not prefixed with - or -- are stored as a list, indexed by 'args'
882 For options that take a value use --option=value
884 Consider using optparse or getopt (in Python standard library) instead.
892 if arg.startswith('-'):
893 parts = re.sub(r'^-+', '', arg).split('=')
895 d[parts[0]] = parts[1]
906 if __name__ == '__main__':
908 doctest.testmod(sys.modules['__main__'])