/usr/share/doc/python-kjbuckets/examples/relalg.py is in python-kjbuckets 1:1.0.0-15.1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | #!/usr/bin/python -O
# A simple implementation of the relational algebra
# using kjbuckets
from kjbuckets import *
def relFromDictSet(schemeseq, dictSet):
result = relation(schemeseq, [] )
result.rowset = dictSet
return result
class relation:
def __init__(self, schemeseq, listofrows):
self.schemeseq = schemeseq
self.scheme = kjSet(schemeseq)
rowset = kjSet()
for row in listofrows:
rowset.add(kjUndump(schemeseq, row))
self.rowset = rowset
def pprint(self):
print self.schemeseq
print "============"
for row in self.rowset.items():
print row.dump(self.schemeseq)
def addDicts(self, dictseq): # not used...
for dict in dictseq:
self.rowset.add(dict)
def checkUnionCompatible(self,other):
if self.scheme != other.scheme:
raise ValueError, "operands not union compatible"
# relational union
def __add__(self, other):
self.checkUnionCompatible(other)
return relFromDictSet(self.schemeseq, self.rowset + other.rowset)
# relational difference
def __sub__(self, other):
self.checkUnionCompatible(other)
return relFromDictSet(self.schemeseq, self.rowset - other.rowset)
# natural join (hash based algorithm)
def __mul__(self,other):
commonatts = self.scheme & other.scheme
resultset = kjSet()
if commonatts: # do a hash based join
dumper = tuple(commonatts.items())
selfgraph = kjGraph() # hash index for self
othergraph = kjGraph() # hash index for other
for row in self.rowset.items():
selfgraph[row] = row.dump(dumper)
for row in other.rowset.items():
othergraph[row.dump(dumper)] = row
for (selfrow, otherrow) in (selfgraph * othergraph).items():
resultset.add(selfrow + otherrow)
else: # no common attributes: do a cross product
otherrows = other.rowset.items()
for selfrow in self.rowset.items():
for otherrow in otherrows:
resultset.add(selfrow + otherrow)
return relFromDictSet( tuple((self.scheme + other.scheme).items()),
resultset )
# selection using a att->value pairs (as conjunction)
def vSel(pairs, rel):
selected = kjSet()
selector = kjDict(pairs)
if selector.Clean()!=None:
for row in rel.rowset.items():
if (row + selector).Clean() != None:
selected.add(row)
return relFromDictSet(rel.schemeseq, selected)
# selection using att = att pairs (as conjunction)
def eqSelect(pairs, rel):
selected = kjSet()
selector = kjGraph(pairs)
selector = (selector + ~selector).tclosure() # sym, trans closure
for row in rel.rowset.items():
if row.remap(selector) != None:
selected.add(row)
return relFromDictSet(rel.schemeseq, selected)
# projection on attribute sequence (as conjunction)
def proj(atts, rel):
attset = kjSet(atts)
resultset = kjSet()
for row in rel.rowset.items():
resultset.add(attset * row)
return relFromDictSet(atts, resultset)
# renaming using (new,old) pair sequence
def rename(pairs, rel):
renames = kjDict(pairs)
untouched = rel.scheme - kjSet(renames.values())
mapper = renames + untouched
resultset = kjSet()
for row in rel.rowset.items():
resultset.add(mapper * row)
return relFromDictSet(tuple(mapper.keys()), resultset)
#=========== end of simple.py
#
#Now let me show you the "simple" module in use. First we need some relations.
#I'll steal C.J.Date's canonical/soporific supplier/parts database:
#
## database of suppliers, parts and shipments
## from Date, page 79 (2nd ed) or page 92 (3rd ed) */
def test():
#suppliers
S = relation(
('snum', 'sname', 'status', 'city'),
[ (1, 'Smith', 20, 'London'),
(2, 'Jones', 10, 'Paris'),
(3, 'Blake', 30, 'Paris'),
(4, 'Clark', 20, 'London'),
(5, 'Adams', 30, 'Athens')
])
#parts
P = relation(
('pnum', 'pname', 'color', 'weight', 'pcity'),
[ (1, 'Nut', 'Red', 12, 'London'),
(2, 'Bolt', 'Green', 17, 'Paris' ),
(3, 'Screw', 'Blue', 17, 'Rome' ),
(4, 'Screw', 'Red', 14, 'London'),
(5, 'Cam', 'Blue', 12, 'Paris'),
(6, 'Cog', 'Red', 19, 'London')
])
# shipments
SP = relation(
('snum', 'pnum', 'qty',),
[ (1, 1, 300),
(1, 2, 200),
(1, 3, 400),
(1, 4, 200),
(1, 5, 100),
(1, 6, 100),
(2, 1, 300),
(2, 2, 400),
(3, 2, 200),
(4, 2, 200),
(4, 4, 300),
(4, 5, 400)
])
# names and cities of suppliers
proj(("sname","city"),S).pprint()
# part names of parts supplied by Blake
proj(("pname",),vSel( ( ("sname","Blake"), ), S*SP*P)).pprint()
# supplier names and numbers where the supplier doesn't supply screws
( proj( ("sname","snum"), S) -
proj( ("sname","snum"),
vSel( ( ("pname", "Screw"), ), P*SP*S )
) ).pprint()
if __name__=="__main__":
test()
|