516 lines
19 KiB
Python
516 lines
19 KiB
Python
|
# -*- coding: iso-8859-15 -*-
|
||
|
"""Immutable integer set type.
|
||
|
|
||
|
Integer set class.
|
||
|
|
||
|
Copyright (C) 2006, Heiko Wundram.
|
||
|
Released under the MIT license.
|
||
|
"""
|
||
|
import six
|
||
|
|
||
|
# Version information
|
||
|
# -------------------
|
||
|
|
||
|
__author__ = "Heiko Wundram <me@modelnine.org>"
|
||
|
__version__ = "0.2"
|
||
|
__revision__ = "6"
|
||
|
__date__ = "2006-01-20"
|
||
|
|
||
|
|
||
|
# Utility classes
|
||
|
# ---------------
|
||
|
|
||
|
class _Infinity(object):
|
||
|
"""Internal type used to represent infinity values."""
|
||
|
|
||
|
__slots__ = ["_neg"]
|
||
|
|
||
|
def __init__(self,neg):
|
||
|
self._neg = neg
|
||
|
|
||
|
def __lt__(self,value):
|
||
|
if not isinstance(value, _VALID_TYPES):
|
||
|
return NotImplemented
|
||
|
return ( self._neg and
|
||
|
not ( isinstance(value,_Infinity) and value._neg ) )
|
||
|
|
||
|
def __le__(self,value):
|
||
|
if not isinstance(value, _VALID_TYPES):
|
||
|
return NotImplemented
|
||
|
return self._neg
|
||
|
|
||
|
def __gt__(self,value):
|
||
|
if not isinstance(value, _VALID_TYPES):
|
||
|
return NotImplemented
|
||
|
return not ( self._neg or
|
||
|
( isinstance(value,_Infinity) and not value._neg ) )
|
||
|
|
||
|
def __ge__(self,value):
|
||
|
if not isinstance(value, _VALID_TYPES):
|
||
|
return NotImplemented
|
||
|
return not self._neg
|
||
|
|
||
|
def __eq__(self,value):
|
||
|
if not isinstance(value, _VALID_TYPES):
|
||
|
return NotImplemented
|
||
|
return isinstance(value,_Infinity) and self._neg == value._neg
|
||
|
|
||
|
def __ne__(self,value):
|
||
|
if not isinstance(value, _VALID_TYPES):
|
||
|
return NotImplemented
|
||
|
return not isinstance(value,_Infinity) or self._neg != value._neg
|
||
|
|
||
|
def __repr__(self):
|
||
|
return "None"
|
||
|
|
||
|
_VALID_TYPES = six.integer_types + (_Infinity,)
|
||
|
|
||
|
|
||
|
|
||
|
# Constants
|
||
|
# ---------
|
||
|
|
||
|
_MININF = _Infinity(True)
|
||
|
_MAXINF = _Infinity(False)
|
||
|
|
||
|
|
||
|
# Integer set class
|
||
|
# -----------------
|
||
|
|
||
|
class IntSet(object):
|
||
|
"""Integer set class with efficient storage in a RLE format of ranges.
|
||
|
Supports minus and plus infinity in the range."""
|
||
|
|
||
|
__slots__ = ["_ranges","_min","_max","_hash"]
|
||
|
|
||
|
def __init__(self,*args,**kwargs):
|
||
|
"""Initialize an integer set. The constructor accepts an unlimited
|
||
|
number of arguments that may either be tuples in the form of
|
||
|
(start,stop) where either start or stop may be a number or None to
|
||
|
represent maximum/minimum in that direction. The range specified by
|
||
|
(start,stop) is always inclusive (differing from the builtin range
|
||
|
operator).
|
||
|
|
||
|
Keyword arguments that can be passed to an integer set are min and
|
||
|
max, which specify the minimum and maximum number in the set,
|
||
|
respectively. You can also pass None here to represent minus or plus
|
||
|
infinity, which is also the default.
|
||
|
"""
|
||
|
|
||
|
# Special case copy constructor.
|
||
|
if len(args) == 1 and isinstance(args[0],IntSet):
|
||
|
if kwargs:
|
||
|
raise ValueError("No keyword arguments for copy constructor.")
|
||
|
self._min = args[0]._min
|
||
|
self._max = args[0]._max
|
||
|
self._ranges = args[0]._ranges
|
||
|
self._hash = args[0]._hash
|
||
|
return
|
||
|
|
||
|
# Initialize set.
|
||
|
self._ranges = []
|
||
|
|
||
|
# Process keyword arguments.
|
||
|
self._min = kwargs.pop("min",_MININF)
|
||
|
self._max = kwargs.pop("max",_MAXINF)
|
||
|
if self._min is None:
|
||
|
self._min = _MININF
|
||
|
if self._max is None:
|
||
|
self._max = _MAXINF
|
||
|
|
||
|
# Check keyword arguments.
|
||
|
if kwargs:
|
||
|
raise ValueError("Invalid keyword argument.")
|
||
|
if not ( isinstance(self._min, six.integer_types) or self._min is _MININF ):
|
||
|
raise TypeError("Invalid type of min argument.")
|
||
|
if not ( isinstance(self._max, six.integer_types) or self._max is _MAXINF ):
|
||
|
raise TypeError("Invalid type of max argument.")
|
||
|
if ( self._min is not _MININF and self._max is not _MAXINF and
|
||
|
self._min > self._max ):
|
||
|
raise ValueError("Minimum is not smaller than maximum.")
|
||
|
if isinstance(self._max, six.integer_types):
|
||
|
self._max += 1
|
||
|
|
||
|
# Process arguments.
|
||
|
for arg in args:
|
||
|
if isinstance(arg, six.integer_types):
|
||
|
start, stop = arg, arg+1
|
||
|
elif isinstance(arg,tuple):
|
||
|
if len(arg) != 2:
|
||
|
raise ValueError("Invalid tuple, must be (start,stop).")
|
||
|
|
||
|
# Process argument.
|
||
|
start, stop = arg
|
||
|
if start is None:
|
||
|
start = self._min
|
||
|
if stop is None:
|
||
|
stop = self._max
|
||
|
|
||
|
# Check arguments.
|
||
|
if not ( isinstance(start, six.integer_types) or start is _MININF ):
|
||
|
raise TypeError("Invalid type of tuple start.")
|
||
|
if not ( isinstance(stop, six.integer_types) or stop is _MAXINF ):
|
||
|
raise TypeError("Invalid type of tuple stop.")
|
||
|
if ( start is not _MININF and stop is not _MAXINF and
|
||
|
start > stop ):
|
||
|
continue
|
||
|
if isinstance(stop, six.integer_types):
|
||
|
stop += 1
|
||
|
else:
|
||
|
raise TypeError("Invalid argument.")
|
||
|
|
||
|
if start > self._max:
|
||
|
continue
|
||
|
elif start < self._min:
|
||
|
start = self._min
|
||
|
if stop < self._min:
|
||
|
continue
|
||
|
elif stop > self._max:
|
||
|
stop = self._max
|
||
|
self._ranges.append((start,stop))
|
||
|
|
||
|
# Normalize set.
|
||
|
self._normalize()
|
||
|
|
||
|
# Utility functions for set operations
|
||
|
# ------------------------------------
|
||
|
|
||
|
def _iterranges(self,r1,r2,minval=_MININF,maxval=_MAXINF):
|
||
|
curval = minval
|
||
|
curstates = {"r1":False,"r2":False}
|
||
|
imax, jmax = 2*len(r1), 2*len(r2)
|
||
|
i, j = 0, 0
|
||
|
while i < imax or j < jmax:
|
||
|
if i < imax and ( ( j < jmax and
|
||
|
r1[i>>1][i&1] < r2[j>>1][j&1] ) or
|
||
|
j == jmax ):
|
||
|
cur_r, newname, newstate = r1[i>>1][i&1], "r1", not (i&1)
|
||
|
i += 1
|
||
|
else:
|
||
|
cur_r, newname, newstate = r2[j>>1][j&1], "r2", not (j&1)
|
||
|
j += 1
|
||
|
if curval < cur_r:
|
||
|
if cur_r > maxval:
|
||
|
break
|
||
|
yield curstates, (curval,cur_r)
|
||
|
curval = cur_r
|
||
|
curstates[newname] = newstate
|
||
|
if curval < maxval:
|
||
|
yield curstates, (curval,maxval)
|
||
|
|
||
|
def _normalize(self):
|
||
|
self._ranges.sort()
|
||
|
i = 1
|
||
|
while i < len(self._ranges):
|
||
|
if self._ranges[i][0] < self._ranges[i-1][1]:
|
||
|
self._ranges[i-1] = (self._ranges[i-1][0],
|
||
|
max(self._ranges[i-1][1],
|
||
|
self._ranges[i][1]))
|
||
|
del self._ranges[i]
|
||
|
else:
|
||
|
i += 1
|
||
|
self._ranges = tuple(self._ranges)
|
||
|
self._hash = hash(self._ranges)
|
||
|
|
||
|
def __coerce__(self,other):
|
||
|
if isinstance(other,IntSet):
|
||
|
return self, other
|
||
|
elif isinstance(other, six.integer_types + (tuple,)):
|
||
|
try:
|
||
|
return self, self.__class__(other)
|
||
|
except TypeError:
|
||
|
# Catch a type error, in that case the structure specified by
|
||
|
# other is something we can't coerce, return NotImplemented.
|
||
|
# ValueErrors are not caught, they signal that the data was
|
||
|
# invalid for the constructor. This is appropriate to signal
|
||
|
# as a ValueError to the caller.
|
||
|
return NotImplemented
|
||
|
elif isinstance(other,list):
|
||
|
try:
|
||
|
return self, self.__class__(*other)
|
||
|
except TypeError:
|
||
|
# See above.
|
||
|
return NotImplemented
|
||
|
return NotImplemented
|
||
|
|
||
|
# Set function definitions
|
||
|
# ------------------------
|
||
|
|
||
|
def _make_function(name,type,doc,pall,pany=None):
|
||
|
"""Makes a function to match two ranges. Accepts two types: either
|
||
|
'set', which defines a function which returns a set with all ranges
|
||
|
matching pall (pany is ignored), or 'bool', which returns True if pall
|
||
|
matches for all ranges and pany matches for any one range. doc is the
|
||
|
dostring to give this function. pany may be none to ignore the any
|
||
|
match.
|
||
|
|
||
|
The predicates get a dict with two keys, 'r1', 'r2', which denote
|
||
|
whether the current range is present in range1 (self) and/or range2
|
||
|
(other) or none of the two, respectively."""
|
||
|
|
||
|
if type == "set":
|
||
|
def f(self,other):
|
||
|
coerced = self.__coerce__(other)
|
||
|
if coerced is NotImplemented:
|
||
|
return NotImplemented
|
||
|
other = coerced[1]
|
||
|
newset = self.__class__.__new__(self.__class__)
|
||
|
newset._min = min(self._min,other._min)
|
||
|
newset._max = max(self._max,other._max)
|
||
|
newset._ranges = []
|
||
|
for states, (start,stop) in \
|
||
|
self._iterranges(self._ranges,other._ranges,
|
||
|
newset._min,newset._max):
|
||
|
if pall(states):
|
||
|
if newset._ranges and newset._ranges[-1][1] == start:
|
||
|
newset._ranges[-1] = (newset._ranges[-1][0],stop)
|
||
|
else:
|
||
|
newset._ranges.append((start,stop))
|
||
|
newset._ranges = tuple(newset._ranges)
|
||
|
newset._hash = hash(self._ranges)
|
||
|
return newset
|
||
|
elif type == "bool":
|
||
|
def f(self,other):
|
||
|
coerced = self.__coerce__(other)
|
||
|
if coerced is NotImplemented:
|
||
|
return NotImplemented
|
||
|
other = coerced[1]
|
||
|
_min = min(self._min,other._min)
|
||
|
_max = max(self._max,other._max)
|
||
|
found = not pany
|
||
|
for states, (start,stop) in \
|
||
|
self._iterranges(self._ranges,other._ranges,_min,_max):
|
||
|
if not pall(states):
|
||
|
return False
|
||
|
found = found or pany(states)
|
||
|
return found
|
||
|
else:
|
||
|
raise ValueError("Invalid type of function to create.")
|
||
|
try:
|
||
|
f.func_name = name
|
||
|
except TypeError:
|
||
|
pass
|
||
|
f.func_doc = doc
|
||
|
return f
|
||
|
|
||
|
# Intersection.
|
||
|
__and__ = _make_function("__and__","set",
|
||
|
"Intersection of two sets as a new set.",
|
||
|
lambda s: s["r1"] and s["r2"])
|
||
|
__rand__ = _make_function("__rand__","set",
|
||
|
"Intersection of two sets as a new set.",
|
||
|
lambda s: s["r1"] and s["r2"])
|
||
|
intersection = _make_function("intersection","set",
|
||
|
"Intersection of two sets as a new set.",
|
||
|
lambda s: s["r1"] and s["r2"])
|
||
|
|
||
|
# Union.
|
||
|
__or__ = _make_function("__or__","set",
|
||
|
"Union of two sets as a new set.",
|
||
|
lambda s: s["r1"] or s["r2"])
|
||
|
__ror__ = _make_function("__ror__","set",
|
||
|
"Union of two sets as a new set.",
|
||
|
lambda s: s["r1"] or s["r2"])
|
||
|
union = _make_function("union","set",
|
||
|
"Union of two sets as a new set.",
|
||
|
lambda s: s["r1"] or s["r2"])
|
||
|
|
||
|
# Difference.
|
||
|
__sub__ = _make_function("__sub__","set",
|
||
|
"Difference of two sets as a new set.",
|
||
|
lambda s: s["r1"] and not s["r2"])
|
||
|
__rsub__ = _make_function("__rsub__","set",
|
||
|
"Difference of two sets as a new set.",
|
||
|
lambda s: s["r2"] and not s["r1"])
|
||
|
difference = _make_function("difference","set",
|
||
|
"Difference of two sets as a new set.",
|
||
|
lambda s: s["r1"] and not s["r2"])
|
||
|
|
||
|
# Symmetric difference.
|
||
|
__xor__ = _make_function("__xor__","set",
|
||
|
"Symmetric difference of two sets as a new set.",
|
||
|
lambda s: s["r1"] ^ s["r2"])
|
||
|
__rxor__ = _make_function("__rxor__","set",
|
||
|
"Symmetric difference of two sets as a new set.",
|
||
|
lambda s: s["r1"] ^ s["r2"])
|
||
|
symmetric_difference = _make_function("symmetric_difference","set",
|
||
|
"Symmetric difference of two sets as a new set.",
|
||
|
lambda s: s["r1"] ^ s["r2"])
|
||
|
|
||
|
# Containership testing.
|
||
|
__contains__ = _make_function("__contains__","bool",
|
||
|
"Returns true if self is superset of other.",
|
||
|
lambda s: s["r1"] or not s["r2"])
|
||
|
issubset = _make_function("issubset","bool",
|
||
|
"Returns true if self is subset of other.",
|
||
|
lambda s: s["r2"] or not s["r1"])
|
||
|
istruesubset = _make_function("istruesubset","bool",
|
||
|
"Returns true if self is true subset of other.",
|
||
|
lambda s: s["r2"] or not s["r1"],
|
||
|
lambda s: s["r2"] and not s["r1"])
|
||
|
issuperset = _make_function("issuperset","bool",
|
||
|
"Returns true if self is superset of other.",
|
||
|
lambda s: s["r1"] or not s["r2"])
|
||
|
istruesuperset = _make_function("istruesuperset","bool",
|
||
|
"Returns true if self is true superset of other.",
|
||
|
lambda s: s["r1"] or not s["r2"],
|
||
|
lambda s: s["r1"] and not s["r2"])
|
||
|
overlaps = _make_function("overlaps","bool",
|
||
|
"Returns true if self overlaps with other.",
|
||
|
lambda s: True,
|
||
|
lambda s: s["r1"] and s["r2"])
|
||
|
|
||
|
# Comparison.
|
||
|
__eq__ = _make_function("__eq__","bool",
|
||
|
"Returns true if self is equal to other.",
|
||
|
lambda s: not ( s["r1"] ^ s["r2"] ))
|
||
|
__ne__ = _make_function("__ne__","bool",
|
||
|
"Returns true if self is different to other.",
|
||
|
lambda s: True,
|
||
|
lambda s: s["r1"] ^ s["r2"])
|
||
|
|
||
|
# Clean up namespace.
|
||
|
del _make_function
|
||
|
|
||
|
# Define other functions.
|
||
|
def inverse(self):
|
||
|
"""Inverse of set as a new set."""
|
||
|
|
||
|
newset = self.__class__.__new__(self.__class__)
|
||
|
newset._min = self._min
|
||
|
newset._max = self._max
|
||
|
newset._ranges = []
|
||
|
laststop = self._min
|
||
|
for r in self._ranges:
|
||
|
if laststop < r[0]:
|
||
|
newset._ranges.append((laststop,r[0]))
|
||
|
laststop = r[1]
|
||
|
if laststop < self._max:
|
||
|
newset._ranges.append((laststop,self._max))
|
||
|
return newset
|
||
|
|
||
|
__invert__ = inverse
|
||
|
|
||
|
# Hashing
|
||
|
# -------
|
||
|
|
||
|
def __hash__(self):
|
||
|
"""Returns a hash value representing this integer set. As the set is
|
||
|
always stored normalized, the hash value is guaranteed to match for
|
||
|
matching ranges."""
|
||
|
|
||
|
return self._hash
|
||
|
|
||
|
# Iterating
|
||
|
# ---------
|
||
|
|
||
|
def __len__(self):
|
||
|
"""Get length of this integer set. In case the length is larger than
|
||
|
2**31 (including infinitely sized integer sets), it raises an
|
||
|
OverflowError. This is due to len() restricting the size to
|
||
|
0 <= len < 2**31."""
|
||
|
|
||
|
if not self._ranges:
|
||
|
return 0
|
||
|
if self._ranges[0][0] is _MININF or self._ranges[-1][1] is _MAXINF:
|
||
|
raise OverflowError("Infinitely sized integer set.")
|
||
|
rlen = 0
|
||
|
for r in self._ranges:
|
||
|
rlen += r[1]-r[0]
|
||
|
if rlen >= 2**31:
|
||
|
raise OverflowError("Integer set bigger than 2**31.")
|
||
|
return rlen
|
||
|
|
||
|
def len(self):
|
||
|
"""Returns the length of this integer set as an integer. In case the
|
||
|
length is infinite, returns -1. This function exists because of a
|
||
|
limitation of the builtin len() function which expects values in
|
||
|
the range 0 <= len < 2**31. Use this function in case your integer
|
||
|
set might be larger."""
|
||
|
|
||
|
if not self._ranges:
|
||
|
return 0
|
||
|
if self._ranges[0][0] is _MININF or self._ranges[-1][1] is _MAXINF:
|
||
|
return -1
|
||
|
rlen = 0
|
||
|
for r in self._ranges:
|
||
|
rlen += r[1]-r[0]
|
||
|
return rlen
|
||
|
|
||
|
def __nonzero__(self):
|
||
|
"""Returns true if this integer set contains at least one item."""
|
||
|
|
||
|
return bool(self._ranges)
|
||
|
|
||
|
def __iter__(self):
|
||
|
"""Iterate over all values in this integer set. Iteration always starts
|
||
|
by iterating from lowest to highest over the ranges that are bounded.
|
||
|
After processing these, all ranges that are unbounded (maximum 2) are
|
||
|
yielded intermixed."""
|
||
|
|
||
|
ubranges = []
|
||
|
for r in self._ranges:
|
||
|
if r[0] is _MININF:
|
||
|
if r[1] is _MAXINF:
|
||
|
ubranges.extend(([0,1],[-1,-1]))
|
||
|
else:
|
||
|
ubranges.append([r[1]-1,-1])
|
||
|
elif r[1] is _MAXINF:
|
||
|
ubranges.append([r[0],1])
|
||
|
else:
|
||
|
for val in xrange(r[0],r[1]):
|
||
|
yield val
|
||
|
if ubranges:
|
||
|
while True:
|
||
|
for ubrange in ubranges:
|
||
|
yield ubrange[0]
|
||
|
ubrange[0] += ubrange[1]
|
||
|
|
||
|
# Printing
|
||
|
# --------
|
||
|
|
||
|
def __repr__(self):
|
||
|
"""Return a representation of this integer set. The representation is
|
||
|
executable to get an equal integer set."""
|
||
|
|
||
|
rv = []
|
||
|
for start, stop in self._ranges:
|
||
|
if ( isinstance(start, six.integer_types) and isinstance(stop, six.integer_types)
|
||
|
and stop-start == 1 ):
|
||
|
rv.append("%r" % start)
|
||
|
elif isinstance(stop, six.integer_types):
|
||
|
rv.append("(%r,%r)" % (start,stop-1))
|
||
|
else:
|
||
|
rv.append("(%r,%r)" % (start,stop))
|
||
|
if self._min is not _MININF:
|
||
|
rv.append("min=%r" % self._min)
|
||
|
if self._max is not _MAXINF:
|
||
|
rv.append("max=%r" % self._max)
|
||
|
return "%s(%s)" % (self.__class__.__name__,",".join(rv))
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
# Little test script demonstrating functionality.
|
||
|
x = IntSet((10,20),30)
|
||
|
y = IntSet((10,20))
|
||
|
z = IntSet((10,20),30,(15,19),min=0,max=40)
|
||
|
print(x)
|
||
|
print(x&110)
|
||
|
print(x|110)
|
||
|
print(x^(15,25))
|
||
|
print(x-12)
|
||
|
print(12 in x)
|
||
|
print(x.issubset(x))
|
||
|
print(y.issubset(x))
|
||
|
print(x.istruesubset(x))
|
||
|
print(y.istruesubset(x))
|
||
|
for val in x:
|
||
|
print(val)
|
||
|
print(x.inverse())
|
||
|
print(x == z)
|
||
|
print(x == y)
|
||
|
print(x != y)
|
||
|
print(hash(x))
|
||
|
print(hash(z))
|
||
|
print(len(x))
|
||
|
print(x.len())
|