/usr/lib/python3/dist-packages/ClusterShell/RangeSet.py is in python3-clustershell 1.8-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 | #
# Copyright (C) 2012-2016 CEA/DAM
# Copyright (C) 2012-2016 Aurelien Degremont <aurelien.degremont@cea.fr>
# Copyright (C) 2015-2017 Stephane Thiell <sthiell@stanford.edu>
#
# This file is part of ClusterShell.
#
# ClusterShell is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# ClusterShell is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with ClusterShell; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""
Cluster range set module.
Instances of RangeSet provide similar operations than the builtin set type,
extended to support cluster ranges-like format and stepping support ("0-8/2").
"""
from functools import reduce
from itertools import product
from operator import mul
__all__ = ['RangeSetException',
'RangeSetParseError',
'RangeSetPaddingError',
'RangeSet',
'RangeSetND',
'AUTOSTEP_DISABLED']
# Special constant used to force turn off autostep feature.
# Note: +inf is 1E400, but a bug in python 2.4 makes it impossible to be
# pickled, so we use less. Later, we could consider sys.maxint here.
AUTOSTEP_DISABLED = 1E100
class RangeSetException(Exception):
"""Base RangeSet exception class."""
class RangeSetParseError(RangeSetException):
"""Raised when RangeSet parsing cannot be done properly."""
def __init__(self, part, msg):
if part:
msg = "%s : \"%s\"" % (msg, part)
RangeSetException.__init__(self, msg)
# faulty subrange; this allows you to target the error
self.part = part
class RangeSetPaddingError(RangeSetParseError):
"""Raised when a fatal padding incoherency occurs"""
def __init__(self, part, msg):
RangeSetParseError.__init__(self, part, "padding mismatch (%s)" % msg)
class RangeSet(set):
"""
Mutable set of cluster node indexes featuring a fast range-based API.
This class aims to ease the management of potentially large cluster range
sets and is used by the :class:`.NodeSet` class.
RangeSet basic constructors:
>>> rset = RangeSet() # empty RangeSet
>>> rset = RangeSet("5,10-42") # contains 5, 10 to 42
>>> rset = RangeSet("0-10/2") # contains 0, 2, 4, 6, 8, 10
Also any iterable of integers can be specified as first argument:
>>> RangeSet([3, 6, 8, 7, 1])
1,3,6-8
>>> rset2 = RangeSet(rset)
Padding of ranges (eg. "003-009") can be managed through a public RangeSet
instance variable named padding. It may be changed at any time. Padding is
a simple display feature per RangeSet object, thus current padding value is
not taken into account when computing set operations.
RangeSet is itself an iterator over its items as integers (instead of
strings). To iterate over string items with optional padding, you can use
the :meth:`RangeSet.striter`: method.
RangeSet provides methods like :meth:`RangeSet.union`,
:meth:`RangeSet.intersection`, :meth:`RangeSet.difference`,
:meth:`RangeSet.symmetric_difference` and their in-place versions
:meth:`RangeSet.update`, :meth:`RangeSet.intersection_update`,
:meth:`RangeSet.difference_update`,
:meth:`RangeSet.symmetric_difference_update` which conform to the Python
Set API.
"""
_VERSION = 3 # serial version number
def __init__(self, pattern=None, autostep=None):
"""Initialize RangeSet object.
:param pattern: optional string pattern
:param autostep: optional autostep threshold
"""
if pattern is None or isinstance(pattern, str):
set.__init__(self)
else:
set.__init__(self, pattern)
if isinstance(pattern, RangeSet):
self._autostep = pattern._autostep
self.padding = pattern.padding
else:
self._autostep = None
self.padding = None
self.autostep = autostep #: autostep threshold public instance attribute
if isinstance(pattern, str):
self._parse(pattern)
def _parse(self, pattern):
"""Parse string of comma-separated x-y/step -like ranges"""
# Comma separated ranges
for subrange in pattern.split(','):
if subrange.find('/') < 0:
baserange, step = subrange, 1
else:
baserange, step = subrange.split('/', 1)
try:
step = int(step)
except ValueError:
raise RangeSetParseError(subrange,
"cannot convert string to integer")
if baserange.find('-') < 0:
if step != 1:
raise RangeSetParseError(subrange, "invalid step usage")
begin = end = baserange
else:
begin, end = baserange.split('-', 1)
# compute padding and return node range info tuple
try:
pad = 0
if int(begin) != 0:
begins = begin.lstrip("0")
if len(begin) - len(begins) > 0:
pad = len(begin)
start = int(begins)
else:
if len(begin) > 1:
pad = len(begin)
start = 0
if int(end) != 0:
ends = end.lstrip("0")
else:
ends = end
stop = int(ends)
except ValueError:
if len(subrange) == 0:
msg = "empty range"
else:
msg = "cannot convert string to integer"
raise RangeSetParseError(subrange, msg)
# check preconditions
if stop > 1e100 or start > stop or step < 1:
raise RangeSetParseError(subrange, "invalid values in range")
self.add_range(start, stop + 1, step, pad)
@classmethod
def fromlist(cls, rnglist, autostep=None):
"""Class method that returns a new RangeSet with ranges from provided
list."""
inst = RangeSet(autostep=autostep)
inst.updaten(rnglist)
return inst
@classmethod
def fromone(cls, index, pad=0, autostep=None):
"""Class method that returns a new RangeSet of one single item or
a single range (from integer or slice object)."""
inst = RangeSet(autostep=autostep)
# support slice object with duck-typing
try:
inst.add(index, pad)
except TypeError:
if not index.stop:
raise ValueError("Invalid range upper limit (%s)" % index.stop)
inst.add_range(index.start or 0, index.stop, index.step or 1, pad)
return inst
def get_autostep(self):
"""Get autostep value (property)"""
if self._autostep >= AUTOSTEP_DISABLED:
return None
else:
# +1 as user wants node count but it means real steps here
return self._autostep + 1
def set_autostep(self, val):
"""Set autostep value (property)"""
if val is None:
# disabled by default for compat with other cluster tools
self._autostep = AUTOSTEP_DISABLED
else:
# - 1 because user means node count, but we mean real steps
# (this operation has no effect on AUTOSTEP_DISABLED value)
self._autostep = int(val) - 1
autostep = property(get_autostep, set_autostep)
def dim(self):
"""Get the number of dimensions of this RangeSet object. Common
method with RangeSetND. Here, it will always return 1 unless
the object is empty, in that case it will return 0."""
return int(len(self) > 0)
def _sorted(self):
"""Get sorted list from inner set."""
return sorted(set.__iter__(self))
def __iter__(self):
"""Iterate over each element in RangeSet."""
return iter(self._sorted())
def striter(self):
"""Iterate over each (optionally padded) string element in RangeSet."""
pad = self.padding or 0
for i in self._sorted():
yield "%0*d" % (pad, i)
def contiguous(self):
"""Object-based iterator over contiguous range sets."""
pad = self.padding or 0
for sli in self._contiguous_slices():
yield RangeSet.fromone(slice(sli.start, sli.stop, sli.step), pad)
def __reduce__(self):
"""Return state information for pickling."""
return self.__class__, (str(self),), \
{ 'padding': self.padding, \
'_autostep': self._autostep, \
'_version' : RangeSet._VERSION }
def __setstate__(self, dic):
"""called upon unpickling"""
self.__dict__.update(dic)
if getattr(self, '_version', 0) < RangeSet._VERSION:
# unpickle from old version?
if getattr(self, '_version', 0) <= 1:
# v1 (no object versioning) - CSv1.3
setattr(self, '_ranges', [(slice(start, stop + 1, step), pad) \
for start, stop, step, pad in getattr(self, '_ranges')])
elif hasattr(self, '_ranges'):
# v2 - CSv1.4-1.5
self_ranges = getattr(self, '_ranges')
if self_ranges and not isinstance(self_ranges[0][0], slice):
# workaround for object pickled from Python < 2.5
setattr(self, '_ranges', [(slice(start, stop, step), pad) \
for (start, stop, step), pad in self_ranges])
# convert to v3
for sli, pad in getattr(self, '_ranges'):
self.add_range(sli.start, sli.stop, sli.step, pad)
delattr(self, '_ranges')
delattr(self, '_length')
# add padding if unpickling old instances
if not hasattr(self, 'padding'):
setattr(self, 'padding', None)
def _strslices(self):
"""Stringify slices list (x-y/step format)"""
pad = self.padding or 0
for sli in self.slices():
if sli.start + 1 == sli.stop:
yield "%0*d" % (pad, sli.start)
else:
assert sli.step >= 0, "Internal error: sli.step < 0"
if sli.step == 1:
yield "%0*d-%0*d" % (pad, sli.start, pad, sli.stop - 1)
else:
yield "%0*d-%0*d/%d" % (pad, sli.start, pad, sli.stop - 1, \
sli.step)
def __str__(self):
"""Get comma-separated range-based string (x-y/step format)."""
return ','.join(self._strslices())
# __repr__ is the same as __str__ as it is a valid expression that
# could be used to recreate a RangeSet with the same value
__repr__ = __str__
def _contiguous_slices(self):
"""Internal iterator over contiguous slices in RangeSet."""
k = j = None
for i in self._sorted():
if k is None:
k = j = i
if i - j > 1:
yield slice(k, j + 1, 1)
k = i
j = i
if k is not None:
yield slice(k, j + 1, 1)
def _folded_slices(self):
"""Internal generator that is able to retrieve ranges organized by
step."""
if len(self) == 0:
return
prng = None # pending range
istart = None # processing starting indice
step = 0 # processing step
for sli in self._contiguous_slices():
start = sli.start
stop = sli.stop
unitary = (start + 1 == stop) # one indice?
if istart is None: # first loop
if unitary:
istart = start
else:
prng = [start, stop, 1]
istart = stop - 1
i = k = istart
elif step == 0: # istart is set but step is unknown
if not unitary:
if prng is not None:
# yield and replace pending range
yield slice(*prng)
else:
yield slice(istart, istart + 1, 1)
prng = [start, stop, 1]
istart = k = stop - 1
continue
i = start
else: # step > 0
assert step > 0
i = start
# does current range lead to broken step?
if step != i - k or not unitary:
#Python2.6+: j = i if step == i - k else k
if step == i - k:
j = i
else:
j = k
# stepped is True when autostep setting does apply
stepped = (j - istart >= self._autostep * step)
if prng: # yield pending range?
if stepped:
prng[1] -= 1
else:
istart += step
yield slice(*prng)
prng = None
if step != i - k:
# case: step value has changed
if stepped:
yield slice(istart, k + 1, step)
else:
for j in range(istart, k - step + 1, step):
yield slice(j, j + 1, 1)
if not unitary:
yield slice(k, k + 1, 1)
if unitary:
if stepped:
istart = i = k = start
else:
istart = k
else:
prng = [start, stop, 1]
istart = i = k = stop - 1
elif not unitary:
# case: broken step by contiguous range
if stepped:
# yield 'range/step' by taking first indice of new range
yield slice(istart, i + 1, step)
i += 1
else:
# autostep setting does not apply in that case
for j in range(istart, i - step + 1, step):
yield slice(j, j + 1, 1)
if stop > i + 1: # current->pending only if not unitary
prng = [i, stop, 1]
istart = i = k = stop - 1
step = i - k
k = i
# exited loop, process pending range or indice...
if step == 0:
if prng:
yield slice(*prng)
else:
yield slice(istart, istart + 1, 1)
else:
assert step > 0
stepped = (k - istart >= self._autostep * step)
if prng:
if stepped:
prng[1] -= 1
else:
istart += step
yield slice(*prng)
prng = None
if stepped:
yield slice(istart, i + 1, step)
else:
for j in range(istart, i + 1, step):
yield slice(j, j + 1, 1)
def slices(self):
"""
Iterate over RangeSet ranges as Python slice objects.
"""
# return an iterator
if self._autostep >= AUTOSTEP_DISABLED:
# autostep disabled: call simpler method to return only a:b slices
return self._contiguous_slices()
else:
# autostep enabled: call generic method to return a:b:step slices
return self._folded_slices()
def __getitem__(self, index):
"""
Return the element at index or a subrange when a slice is specified.
"""
if isinstance(index, slice):
inst = RangeSet()
inst._autostep = self._autostep
inst.padding = self.padding
inst.update(self._sorted()[index])
return inst
elif isinstance(index, int):
return self._sorted()[index]
else:
raise TypeError("%s indices must be integers" %
self.__class__.__name__)
def split(self, nbr):
"""
Split the rangeset into nbr sub-rangesets (at most). Each
sub-rangeset will have the same number of elements more or
less 1. Current rangeset remains unmodified. Returns an
iterator.
>>> RangeSet("1-5").split(3)
RangeSet("1-2")
RangeSet("3-4")
RangeSet("foo5")
"""
assert(nbr > 0)
# We put the same number of element in each sub-nodeset.
slice_size = len(self) // int(nbr)
left = len(self) % nbr
begin = 0
for i in range(0, min(nbr, len(self))):
length = slice_size + int(i < left)
yield self[begin:begin + length]
begin += length
def add_range(self, start, stop, step=1, pad=0):
"""
Add a range (start, stop, step and padding length) to RangeSet.
Like the Python built-in function *range()*, the last element
is the largest start + i * step less than stop.
"""
assert start < stop, "please provide ordered node index ranges"
assert step > 0
assert pad >= 0
assert stop - start < 1e9, "range too large"
# inherit padding info only if currently not defined
if pad is not None and pad > 0 and self.padding is None:
self.padding = pad
set.update(self, range(start, stop, step))
def copy(self):
"""Return a shallow copy of a RangeSet."""
cpy = self.__class__()
cpy._autostep = self._autostep
cpy.padding = self.padding
cpy.update(self)
return cpy
__copy__ = copy # For the copy module
def __eq__(self, other):
"""
RangeSet equality comparison.
"""
# Return NotImplemented instead of raising TypeError, to
# indicate that the comparison is not implemented with respect
# to the other type (the other comparand then gets a change to
# determine the result, then it falls back to object address
# comparison).
if not isinstance(other, RangeSet):
return NotImplemented
return len(self) == len(other) and self.issubset(other)
# Standard set operations: union, intersection, both differences.
# Each has an operator version (e.g. __or__, invoked with |) and a
# method version (e.g. union).
# Subtle: Each pair requires distinct code so that the outcome is
# correct when the type of other isn't suitable. For example, if
# we did "union = __or__" instead, then Set().union(3) would return
# NotImplemented instead of raising TypeError (albeit that *why* it
# raises TypeError as-is is also a bit subtle).
def __or__(self, other):
"""Return the union of two RangeSets as a new RangeSet.
(I.e. all elements that are in either set.)
"""
if not isinstance(other, set):
return NotImplemented
return self.union(other)
def union(self, other):
"""Return the union of two RangeSets as a new RangeSet.
(I.e. all elements that are in either set.)
"""
self_copy = self.copy()
self_copy.update(other)
return self_copy
def __and__(self, other):
"""Return the intersection of two RangeSets as a new RangeSet.
(I.e. all elements that are in both sets.)
"""
if not isinstance(other, set):
return NotImplemented
return self.intersection(other)
def intersection(self, other):
"""Return the intersection of two RangeSets as a new RangeSet.
(I.e. all elements that are in both sets.)
"""
self_copy = self.copy()
self_copy.intersection_update(other)
return self_copy
def __xor__(self, other):
"""Return the symmetric difference of two RangeSets as a new RangeSet.
(I.e. all elements that are in exactly one of the sets.)
"""
if not isinstance(other, set):
return NotImplemented
return self.symmetric_difference(other)
def symmetric_difference(self, other):
"""Return the symmetric difference of two RangeSets as a new RangeSet.
(ie. all elements that are in exactly one of the sets.)
"""
self_copy = self.copy()
self_copy.symmetric_difference_update(other)
return self_copy
def __sub__(self, other):
"""Return the difference of two RangeSets as a new RangeSet.
(I.e. all elements that are in this set and not in the other.)
"""
if not isinstance(other, set):
return NotImplemented
return self.difference(other)
def difference(self, other):
"""Return the difference of two RangeSets as a new RangeSet.
(I.e. all elements that are in this set and not in the other.)
"""
self_copy = self.copy()
self_copy.difference_update(other)
return self_copy
# Membership test
def __contains__(self, element):
"""Report whether an element is a member of a RangeSet.
Element can be either another RangeSet object, a string or an
integer.
Called in response to the expression ``element in self``.
"""
if isinstance(element, set):
return element.issubset(self)
return set.__contains__(self, int(element))
# Subset and superset test
def issubset(self, other):
"""Report whether another set contains this RangeSet."""
self._binary_sanity_check(other)
return set.issubset(self, other)
def issuperset(self, other):
"""Report whether this RangeSet contains another set."""
self._binary_sanity_check(other)
return set.issuperset(self, other)
# Inequality comparisons using the is-subset relation.
__le__ = issubset
__ge__ = issuperset
def __lt__(self, other):
self._binary_sanity_check(other)
return len(self) < len(other) and self.issubset(other)
def __gt__(self, other):
self._binary_sanity_check(other)
return len(self) > len(other) and self.issuperset(other)
# Assorted helpers
def _binary_sanity_check(self, other):
"""Check that the other argument to a binary operation is also a set,
raising a TypeError otherwise."""
if not isinstance(other, set):
raise TypeError("Binary operation only permitted between sets")
# In-place union, intersection, differences.
# Subtle: The xyz_update() functions deliberately return None,
# as do all mutating operations on built-in container types.
# The __xyz__ spellings have to return self, though.
def __ior__(self, other):
"""Update a RangeSet with the union of itself and another."""
self._binary_sanity_check(other)
set.__ior__(self, other)
return self
def union_update(self, other):
"""Update a RangeSet with the union of itself and another."""
self.update(other)
def __iand__(self, other):
"""Update a RangeSet with the intersection of itself and another."""
self._binary_sanity_check(other)
set.__iand__(self, other)
return self
def intersection_update(self, other):
"""Update a RangeSet with the intersection of itself and another."""
set.intersection_update(self, other)
def __ixor__(self, other):
"""Update a RangeSet with the symmetric difference of itself and
another."""
self._binary_sanity_check(other)
set.symmetric_difference_update(self, other)
return self
def symmetric_difference_update(self, other):
"""Update a RangeSet with the symmetric difference of itself and
another."""
set.symmetric_difference_update(self, other)
def __isub__(self, other):
"""Remove all elements of another set from this RangeSet."""
self._binary_sanity_check(other)
set.difference_update(self, other)
return self
def difference_update(self, other, strict=False):
"""Remove all elements of another set from this RangeSet.
If strict is True, raise KeyError if an element cannot be removed.
(strict is a RangeSet addition)"""
if strict and other not in self:
raise KeyError(set.difference(other, self).pop())
set.difference_update(self, other)
# Python dict-like mass mutations: update, clear
def update(self, iterable):
"""Add all integers from an iterable (such as a list)."""
if isinstance(iterable, RangeSet):
# keep padding unless it has not been defined yet
if self.padding is None and iterable.padding is not None:
self.padding = iterable.padding
assert not isinstance(iterable, str)
set.update(self, iterable)
def updaten(self, rangesets):
"""
Update a rangeset with the union of itself and several others.
"""
for rng in rangesets:
if isinstance(rng, set):
self.update(rng)
else:
self.update(RangeSet(rng))
# py2.5+
#self.update(rng if isinstance(rng, set) else RangeSet(rng))
def clear(self):
"""Remove all elements from this RangeSet."""
set.clear(self)
self.padding = None
# Single-element mutations: add, remove, discard
def add(self, element, pad=0):
"""Add an element to a RangeSet.
This has no effect if the element is already present.
"""
# inherit padding info only if currently not defined
if pad is not None and pad > 0 and self.padding is None:
self.padding = pad
set.add(self, int(element))
def remove(self, element):
"""Remove an element from a RangeSet; it must be a member.
:param element: the element to remove
:raises KeyError: element is not contained in RangeSet
:raises ValueError: element is not castable to integer
"""
set.remove(self, int(element))
def discard(self, element):
"""Remove element from the RangeSet if it is a member.
If the element is not a member, do nothing.
"""
try:
i = int(element)
set.discard(self, i)
except ValueError:
pass # ignore other object types
class RangeSetND(object):
"""
Build a N-dimensional RangeSet object.
.. warning:: You don't usually need to use this class directly, use
:class:`.NodeSet` instead that has ND support.
Empty constructor::
RangeSetND()
Build from a list of list of :class:`RangeSet` objects::
RangeSetND([[rs1, rs2, rs3, ...], ...])
Strings are also supported::
RangeSetND([["0-3", "4-10", ...], ...])
Integers are also supported::
RangeSetND([(0, 4), (0, 5), (1, 4), (1, 5), ...]
"""
def __init__(self, args=None, pads=None, autostep=None, copy_rangeset=True):
"""RangeSetND initializer
All parameters are optional.
:param args: generic "list of list" input argument (default is None)
:param pads: list of 0-padding length (default is to not pad any
dimensions)
:param autostep: autostep threshold (use range/step notation if more
than #autostep items meet the condition) - default is
off (None)
:param copy_rangeset: (advanced) if set to False, do not copy RangeSet
objects from args (transfer ownership), which is
faster. In that case, you should not modify these
objects afterwards (default is True).
"""
# RangeSetND are arranged as a list of N-dimensional RangeSet vectors
self._veclist = []
# Dirty flag to avoid doing veclist folding too often
self._dirty = True
# Initialize autostep through property
self._autostep = None
self.autostep = autostep #: autostep threshold public instance attribute
# Hint on whether several dimensions are varying or not
self._multivar_hint = False
if args is None:
return
for rgvec in args:
if rgvec:
if isinstance(rgvec[0], str):
self._veclist.append([RangeSet(rg, autostep=autostep) \
for rg in rgvec])
elif isinstance(rgvec[0], RangeSet):
if copy_rangeset:
self._veclist.append([rg.copy() for rg in rgvec])
else:
self._veclist.append(rgvec)
else:
if pads is None:
self._veclist.append( \
[RangeSet.fromone(rg, autostep=autostep) \
for rg in rgvec])
else:
self._veclist.append( \
[RangeSet.fromone(rg, pad, autostep) \
for rg, pad in zip(rgvec, pads)])
class precond_fold(object):
"""Decorator to ease internal folding management"""
def __call__(self, func):
def inner(*args, **kwargs):
rgnd, fargs = args[0], args[1:]
if rgnd._dirty:
rgnd._fold()
return func(rgnd, *fargs, **kwargs)
# modify the decorator meta-data for pydoc
# Note: should be later replaced by @wraps (functools)
# as of Python 2.5
inner.__name__ = func.__name__
inner.__doc__ = func.__doc__
inner.__dict__ = func.__dict__
inner.__module__ = func.__module__
return inner
@precond_fold()
def copy(self):
"""Return a new, mutable shallow copy of a RangeSetND."""
cpy = self.__class__()
# Shallow "to the extent possible" says the copy module, so here that
# means calling copy() on each sub-RangeSet to keep mutability.
cpy._veclist = [[rg.copy() for rg in rgvec] for rgvec in self._veclist]
cpy._dirty = self._dirty
return cpy
__copy__ = copy # For the copy module
def __eq__(self, other):
"""RangeSetND equality comparison."""
# Return NotImplemented instead of raising TypeError, to
# indicate that the comparison is not implemented with respect
# to the other type (the other comparand then gets a change to
# determine the result, then it falls back to object address
# comparison).
if not isinstance(other, RangeSetND):
return NotImplemented
return len(self) == len(other) and self.issubset(other)
def __bool__(self):
return bool(self._veclist)
__nonzero__ = __bool__ # Python 2 compat
def __len__(self):
"""Count unique elements in N-dimensional rangeset."""
return sum([reduce(mul, [len(rg) for rg in rgvec]) \
for rgvec in self.veclist])
@precond_fold()
def __str__(self):
"""String representation of N-dimensional RangeSet."""
result = ""
for rgvec in self._veclist:
result += "; ".join([str(rg) for rg in rgvec])
result += "\n"
return result
@precond_fold()
def __iter__(self):
return self._iter()
def _iter(self):
"""Iterate through individual items as tuples."""
for vec in self._veclist:
for ivec in product(*vec):
yield ivec
@precond_fold()
def iter_padding(self):
"""Iterate through individual items as tuples with padding info."""
for vec in self._veclist:
for ivec in product(*vec):
yield ivec, [rg.padding for rg in vec]
@precond_fold()
def _get_veclist(self):
"""Get folded veclist"""
return self._veclist
def _set_veclist(self, val):
"""Set veclist and set dirty flag for deferred folding."""
self._veclist = val
self._dirty = True
veclist = property(_get_veclist, _set_veclist)
def vectors(self):
"""Get underlying :class:`RangeSet` vectors"""
return iter(self.veclist)
def dim(self):
"""Get the current number of dimensions of this RangeSetND
object. Return 0 when object is empty."""
try:
return len(self._veclist[0])
except IndexError:
return 0
def pads(self):
"""Get a tuple of padding length info for each dimension."""
# return a tuple of max padding length for each axis
pad_veclist = ((rg.padding or 0 for rg in vec) for vec in self._veclist)
return tuple(max(pads) for pads in zip(*pad_veclist))
def get_autostep(self):
"""Get autostep value (property)"""
if self._autostep >= AUTOSTEP_DISABLED:
return None
else:
# +1 as user wants node count but _autostep means real steps here
return self._autostep + 1
def set_autostep(self, val):
"""Set autostep value (property)"""
# Must conform to RangeSet.autostep logic
if val is None:
self._autostep = AUTOSTEP_DISABLED
else:
# Like in RangeSet.set_autostep(): -1 because user means node count,
# but we mean real steps (this operation has no effect on
# AUTOSTEP_DISABLED value)
self._autostep = int(val) - 1
# Update our RangeSet objects
for rgvec in self._veclist:
for rg in rgvec:
rg._autostep = self._autostep
autostep = property(get_autostep, set_autostep)
@precond_fold()
def __getitem__(self, index):
"""
Return the element at index or a subrange when a slice is specified.
"""
if isinstance(index, slice):
iveclist = []
for rgvec in self._veclist:
iveclist += product(*rgvec)
assert(len(iveclist) == len(self))
rnd = RangeSetND(iveclist[index],
pads=[rg.padding for rg in self._veclist[0]],
autostep=self.autostep)
return rnd
elif isinstance(index, int):
# find a tuple of integer (multi-dimensional) at position index
if index < 0:
length = len(self)
if index >= -length:
index = length + index
else:
raise IndexError("%d out of range" % index)
length = 0
for rgvec in self._veclist:
cnt = reduce(mul, [len(rg) for rg in rgvec])
if length + cnt < index:
length += cnt
else:
for ivec in product(*rgvec):
if index == length:
return ivec
length += 1
raise IndexError("%d out of range" % index)
else:
raise TypeError("%s indices must be integers" %
self.__class__.__name__)
@precond_fold()
def contiguous(self):
"""Object-based iterator over contiguous range sets."""
veclist = self._veclist
try:
dim = len(veclist[0])
except IndexError:
return
for dimidx in range(dim):
new_veclist = []
for rgvec in veclist:
for rgsli in rgvec[dimidx].contiguous():
rgvec = list(rgvec)
rgvec[dimidx] = rgsli
new_veclist.append(rgvec)
veclist = new_veclist
for rgvec in veclist:
yield RangeSetND([rgvec])
# Membership test
@precond_fold()
def __contains__(self, element):
"""Report whether an element is a member of a RangeSetND.
Element can be either another RangeSetND object, a string or
an integer.
Called in response to the expression ``element in self``.
"""
if isinstance(element, RangeSetND):
rgnd_element = element
else:
rgnd_element = RangeSetND([[str(element)]])
return rgnd_element.issubset(self)
# Subset and superset test
def issubset(self, other):
"""Report whether another set contains this RangeSetND."""
self._binary_sanity_check(other)
return other.issuperset(self)
@precond_fold()
def issuperset(self, other):
"""Report whether this RangeSetND contains another RangeSetND."""
self._binary_sanity_check(other)
if self.dim() == 1 and other.dim() == 1:
return self._veclist[0][0].issuperset(other._veclist[0][0])
if not other._veclist:
return True
test = other.copy()
test.difference_update(self)
return not bool(test)
# Inequality comparisons using the is-subset relation.
__le__ = issubset
__ge__ = issuperset
def __lt__(self, other):
self._binary_sanity_check(other)
return len(self) < len(other) and self.issubset(other)
def __gt__(self, other):
self._binary_sanity_check(other)
return len(self) > len(other) and self.issuperset(other)
# Assorted helpers
def _binary_sanity_check(self, other):
"""Check that the other argument to a binary operation is also a
RangeSetND, raising a TypeError otherwise."""
if not isinstance(other, RangeSetND):
msg = "Binary operation only permitted between RangeSetND"
raise TypeError(msg)
def _sort(self):
"""N-dimensional sorting."""
def rgveckeyfunc(rgvec):
# key used for sorting purposes, based on the following
# conditions:
# (1) larger vector first (#elements)
# (2) larger dim first (#elements)
# (3) lower first index first
# (4) lower last index first
return (-reduce(mul, [len(rg) for rg in rgvec]), \
tuple((-len(rg), rg[0], rg[-1]) for rg in rgvec))
self._veclist.sort(key=rgveckeyfunc)
@precond_fold()
def fold(self):
"""Explicit folding call. Please note that folding of RangeSetND
nD vectors are automatically managed, so you should not have to
call this method. It may be still useful in some extreme cases
where the RangeSetND is heavily modified."""
pass
def _fold(self):
"""In-place N-dimensional folding."""
assert self._dirty
if len(self._veclist) > 1:
self._fold_univariate() or self._fold_multivariate()
else:
self._dirty = False
def _fold_univariate(self):
"""Univariate nD folding. Return True on success and False when
a multivariate folding is required."""
dim = self.dim()
vardim = dimdiff = 0
if dim > 1:
# We got more than one dimension, see if only one is changing...
for i in range(dim):
# Are all rangesets on this dimension the same?
slist = [vec[i] for vec in self._veclist]
if slist.count(slist[0]) != len(slist):
dimdiff += 1
if dimdiff > 1:
break
vardim = i
univar = (dim == 1 or dimdiff == 1)
if univar:
# Eligible for univariate folding (faster!)
for vec in self._veclist[1:]:
self._veclist[0][vardim].update(vec[vardim])
del self._veclist[1:]
self._dirty = False
self._multivar_hint = not univar
return univar
def _fold_multivariate(self):
"""Multivariate nD folding"""
# PHASE 1: expand with respect to uniqueness
self._fold_multivariate_expand()
self._sort()
# PHASE 2: merge
self._fold_multivariate_merge()
self._sort()
self._dirty = False
def _fold_multivariate_expand(self):
"""Multivariate nD folding: expand [phase 1]"""
max_length = sum([reduce(mul, [len(rg) for rg in rgvec]) \
for rgvec in self._veclist])
# Simple heuristic that makes us faster
if len(self._veclist) * (len(self._veclist) - 1) / 2 > max_length * 10:
# *** nD full expand is preferred ***
pads = self.pads()
self._veclist = [[RangeSet.fromone(i, pad=pads[axis])
for axis, i in enumerate(tvec)]
for tvec in set(self._iter())]
return
# *** nD compare algorithm is preferred ***
index1, index2 = 0, 1
while (index1 + 1) < len(self._veclist):
# use 2 references on iterator to compare items by couples
item1 = self._veclist[index1]
index2 = index1 + 1
index1 += 1
while index2 < len(self._veclist):
item2 = self._veclist[index2]
index2 += 1
new_item = None
disjoint = False
suppl = []
for pos, (rg1, rg2) in enumerate(zip(item1, item2)):
if not rg1 & rg2:
disjoint = True
break
if new_item is None:
new_item = [None] * len(item1)
if rg1 == rg2:
new_item[pos] = rg1
else:
assert rg1 & rg2
# intersection
new_item[pos] = rg1 & rg2
# create part 1
if rg1 - rg2:
item1_p = item1[0:pos] + [rg1 - rg2] + item1[pos+1:]
suppl.append(item1_p)
# create part 2
if rg2 - rg1:
item2_p = item2[0:pos] + [rg2 - rg1] + item2[pos+1:]
suppl.append(item2_p)
if not disjoint:
assert new_item is not None
assert suppl is not None
item1 = self._veclist[index1 - 1] = new_item
index2 -= 1
self._veclist.pop(index2)
self._veclist += suppl
def _fold_multivariate_merge(self):
"""Multivariate nD folding: merge [phase 2]"""
chg = True
while chg:
chg = False
index1, index2 = 0, 1
while (index1 + 1) < len(self._veclist):
# use 2 references on iterator to compare items by couples
item1 = self._veclist[index1]
index2 = index1 + 1
index1 += 1
while index2 < len(self._veclist):
item2 = self._veclist[index2]
index2 += 1
new_item = [None] * len(item1)
nb_diff = 0
# compare 2 rangeset vector, item by item, the idea being
# to merge vectors if they differ only by one item
for pos, (rg1, rg2) in enumerate(zip(item1, item2)):
if rg1 == rg2:
new_item[pos] = rg1
elif not rg1 & rg2: # merge on disjoint ranges
nb_diff += 1
if nb_diff > 1:
break
new_item[pos] = rg1 | rg2
# if fully contained, keep the largest one
elif (rg1 > rg2 or rg1 < rg2): # and nb_diff == 0:
nb_diff += 1
if nb_diff > 1:
break
new_item[pos] = max(rg1, rg2)
# otherwise, compute rangeset intersection and
# keep the two disjoint part to be handled
# later...
else:
# intersection but do nothing
nb_diff = 2
break
# one change has been done: use this new item to compare
# with other
if nb_diff <= 1:
chg = True
item1 = self._veclist[index1 - 1] = new_item
index2 -= 1
self._veclist.pop(index2)
def __or__(self, other):
"""Return the union of two RangeSetNDs as a new RangeSetND.
(I.e. all elements that are in either set.)
"""
if not isinstance(other, RangeSetND):
return NotImplemented
return self.union(other)
def union(self, other):
"""Return the union of two RangeSetNDs as a new RangeSetND.
(I.e. all elements that are in either set.)
"""
rgnd_copy = self.copy()
rgnd_copy.update(other)
return rgnd_copy
def update(self, other):
"""Add all RangeSetND elements to this RangeSetND."""
if isinstance(other, RangeSetND):
iterable = other._veclist
else:
iterable = other
for vec in iterable:
# copy rangesets and set custom autostep
assert isinstance(vec[0], RangeSet)
cpyvec = []
for rg in vec:
cpyrg = rg.copy()
cpyrg.autostep = self.autostep
cpyvec.append(cpyrg)
self._veclist.append(cpyvec)
self._dirty = True
if not self._multivar_hint:
self._fold_univariate()
union_update = update
def __ior__(self, other):
"""Update a RangeSetND with the union of itself and another."""
self._binary_sanity_check(other)
self.update(other)
return self
def __isub__(self, other):
"""Remove all elements of another set from this RangeSetND."""
self._binary_sanity_check(other)
self.difference_update(other)
return self
def difference_update(self, other, strict=False):
"""Remove all elements of another set from this RangeSetND.
If strict is True, raise KeyError if an element cannot be removed
(strict is a RangeSet addition)"""
if strict and not other in self:
raise KeyError(other.difference(self)[0])
ergvx = other._veclist # read only
rgnd_new = []
index1 = 0
while index1 < len(self._veclist):
rgvec1 = self._veclist[index1]
procvx1 = [ rgvec1 ]
nextvx1 = []
index2 = 0
while index2 < len(ergvx):
rgvec2 = ergvx[index2]
while len(procvx1) > 0: # refine diff for each resulting vector
rgproc1 = procvx1.pop(0)
tmpvx = []
for pos, (rg1, rg2) in enumerate(zip(rgproc1, rgvec2)):
if rg1 == rg2 or rg1 < rg2: # issubset
pass
elif rg1 & rg2: # intersect
tmpvec = list(rgproc1)
tmpvec[pos] = rg1.difference(rg2)
tmpvx.append(tmpvec)
else: # disjoint
tmpvx = [ rgproc1 ] # reset previous work
break
if tmpvx:
nextvx1 += tmpvx
if nextvx1:
procvx1 = nextvx1
nextvx1 = []
index2 += 1
if procvx1:
rgnd_new += procvx1
index1 += 1
self.veclist = rgnd_new
def __sub__(self, other):
"""Return the difference of two RangeSetNDs as a new RangeSetND.
(I.e. all elements that are in this set and not in the other.)
"""
if not isinstance(other, RangeSetND):
return NotImplemented
return self.difference(other)
def difference(self, other):
"""
``s.difference(t)`` returns a new object with elements in s
but not in t.
"""
self_copy = self.copy()
self_copy.difference_update(other)
return self_copy
def intersection(self, other):
"""
``s.intersection(t)`` returns a new object with elements common
to s and t.
"""
self_copy = self.copy()
self_copy.intersection_update(other)
return self_copy
def __and__(self, other):
"""
Implements the & operator. So ``s & t`` returns a new object
with elements common to s and t.
"""
if not isinstance(other, RangeSetND):
return NotImplemented
return self.intersection(other)
def intersection_update(self, other):
"""
``s.intersection_update(t)`` returns nodeset s keeping only
elements also found in t.
"""
if other is self:
return
tmp_rnd = RangeSetND()
empty_rset = RangeSet()
for rgvec in self._veclist:
for ergvec in other._veclist:
irgvec = [rg.intersection(erg) \
for rg, erg in zip(rgvec, ergvec)]
if not empty_rset in irgvec:
tmp_rnd.update([irgvec])
# substitute
self.veclist = tmp_rnd.veclist
def __iand__(self, other):
"""
Implements the &= operator. So ``s &= t`` returns object s
keeping only elements also found in t (Python 2.5+ required).
"""
self._binary_sanity_check(other)
self.intersection_update(other)
return self
def symmetric_difference(self, other):
"""
``s.symmetric_difference(t)`` returns the symmetric difference
of two objects as a new RangeSetND.
(ie. all items that are in exactly one of the RangeSetND.)
"""
self_copy = self.copy()
self_copy.symmetric_difference_update(other)
return self_copy
def __xor__(self, other):
"""
Implement the ^ operator. So ``s ^ t`` returns a new RangeSetND
with nodes that are in exactly one of the RangeSetND.
"""
if not isinstance(other, RangeSetND):
return NotImplemented
return self.symmetric_difference(other)
def symmetric_difference_update(self, other):
"""
``s.symmetric_difference_update(t)`` returns RangeSetND s
keeping all nodes that are in exactly one of the objects.
"""
diff2 = other.difference(self)
self.difference_update(other)
self.update(diff2)
def __ixor__(self, other):
"""
Implement the ^= operator. So ``s ^= t`` returns object s after
keeping all items that are in exactly one of the RangeSetND
(Python 2.5+ required).
"""
self._binary_sanity_check(other)
self.symmetric_difference_update(other)
return self
|