/usr/lib/python2.7/dist-packages/scitools/EfficiencyTable.py is in python-scitools 0.9.0-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 | """
This module contains a class for managing efficiency/performance
experiments.
"""
class EfficiencyTable:
"""
Manage the CPU times of efficiency experiments and make
tabular reports with sorted results.
>>> e = EfficiencyTable('some description of the experiments')
>>> e.add('about an experiment', cpu_time)
>>> e.add('about another experiment', cpu_time)
>>> e.add('about a third experiment', cpu_time)
>>> print e # prints a sorted table with scaled CPU times
>>> e += e2 # add experiments from EfficiencyTable e2 to e
"""
def __init__(self, description, normalization_time=None):
"""
@param description: a string acting as headline for this test.
@param normalization_time: all CPU times will be divided by this value
(if not set, the class will find the smallest (best) CPU
time and divide all others by this value.
The best_time parameter can also be set in the
set_normalization_time method. The normalization time is not
used before an instance is printed (str method).)
"""
self.description = description
self.experiments = {} # key=description, value=[CPU-time1, CPU-time2, ]
self._normalization_time = normalization_time
def add(self, description, CPU_time, function_name=None):
"""
Add the CPU time of an experiment, together with a description
and an optional function_name (which is appended to the
description string).
"""
if function_name is not None:
# include name of the tested function as part of the description:
description = description + ' (%s)' % function_name
if not description in self.experiments:
self.experiments[description] = []
self.experiments[description].append(CPU_time)
def __iadd__(self, other):
"""
Add results in other to present results.
New items (descriptions) are simply registered, while
identical items (descriptions) will have a list of CPU times,
corresponding to the different EfficiencyTable instances.
"""
self.description += '\n' + other.description
for e in other.experiments:
if e in self.experiments:
# this experiment is already registered, add the lists of CPU times:
self.experiments[e].extend(other.experiments[e])
else:
self.experiments[e] = other.experiments[e]
return self
def __add__(self, other):
"""As e += table (see __iadd__)."""
e = EfficiencyTable(self.description)
e.experiments = self.experiments.copy()
e.__iadd__(other)
return e
def set_normalization_time(self, t):
"""
Set the CPU time by which all other CPU times will be divided.
By default, this is the maximum CPU time encountered in the data.
"""
self._normalization_time = t
def _reference_CPU_time(self, experiment_idx=0):
if self._normalization_time is not None:
# try first to see if there is an experiment with the
# given normalization time, and if so, use the corresponding
# description, otherwise use a dummy description:
for description in self.experiments:
if abs(self.experiments[description][experiment_idx] - \
self._normalization_time) < 1.0E-10:
return self._normalization_time, description
# no experiment coincides with the given normalization time
description = 'some external experiment'
self.experiments[description] = [self._normalization_time]
return self._normalization_time, description
# no given normalization time, find best performance:
# (only search among positive CPU times for an experiment with
# index experiment_idx)
best = 1.0E+20
cpu_eps = 1.0E-9 # smallest reliable CPU time (but many repetitions
# may produce small time per call while the
# measurements are reliable)
for description in self.experiments:
cpu_time = self.experiments[description][experiment_idx]
# drop counting very small (unreliable) or negative
# (erroneous timings) CPU times:
if cpu_time > cpu_eps:
if cpu_time < best:
best = cpu_time
best_key = description
if best == 1.0E+20:
# did not find any CPU time > cpu_eps
raise ValueError('too small CPU times (all less than %E)' % cpu_eps)
return best, best_key
def __str__(self):
"""
Print out a sorted list (with respect to CPU times) of the experiments.
In case of multiple CPU times per description of an experiment,
the table is sorted with respect to the first CPU time entry of each
multiple CPU times list. All CPU times are divided by a normalization
time, which is given to the constructor or to the
set_normalization_time method, or if not prescribed, this class
finds the smallest reliable CPU time (neglecting very small
CPU time).
"""
# inv_dict is the inverse dictionary of self.experiments, i.e.,
# CPU time is the key and the description is the valid.
# Only the first CPU time entry is used.
# inv_dict computation does not work if the CPU times are very
# small (0.00 is the key of many), so we need to add a small
# random number to very small CPU times
import random, math
inv_dict = {} # inverse of self.experiments
for k in self.experiments:
CPU_time = self.experiments[k][0]
if math.fabs(CPU_time) < 1.0E-7:
CPU_time += 1.0E-14*random.random()
if CPU_time in inv_dict:
# this destroys the one-to-one mapping, perturb CPU_time:
CPU_time *= 1.0 + 1.0E-3*random.random()
self.experiments[k][0] = CPU_time
inv_dict[CPU_time] = k
# sort CPU-times:
cpu_times0 = inv_dict.keys()
cpu_times0.sort()
s = '\n\n' + '*'*80 + '\n' + self.description + '\n' + '*'*80 + '\n'
self.best, self.best_key = self._reference_CPU_time(0)
s += 'reference CPU time based on the experiment\n "%s"\nwith '\
'CPU time:\n %s\n\n' % \
(self.best_key, str(self.experiments[self.best_key])[1:-1])
max_length = max([len(string) for string in self.experiments.keys()])
for cpu_time_key in cpu_times0:
description = inv_dict[cpu_time_key]
s += '%%-%ds | ' % max_length % description
for cpu_time, ref_time in \
zip(self.experiments[description],
self.experiments[self.best_key]):
nc = cpu_time/ref_time
if abs(nc) > 9999.0:
s += '%10.1e' % nc
else:
s += '%8.2f' % nc
s += '\n'
return s
def plot(filename):
f = open(filename, 'r')
lines = f.readlines()
f.close()
for i in range(len(lines)):
if lines[i].find('CPU times') >= 0:
start = i+3
break
counter = 1
curves = {}
labels = open('tmp_plot_labels', 'w')
import math
for line in lines[start:]:
name, numbers = line.split('|')
curves[name] = [float(x) for x in numbers.split()]
for i in range(len(curves[name])):
try:
curves[name][i] = math.log10(curves[name][i])
except ValueError:
curves[name][i] = 0.0
f = open('tmp_plot_%02d' % counter, 'w')
for i in range(len(curves[name])):
v = curves[name][i]
if v > 0.0:
f.write('%2d %g\n' % (i+1, v))
f.close()
counter += 1
labels.write('%2d: %s\n' % (counter, name))
labels.close()
# generate Gnuplot script:
plotfiles = ['"tmp_plot_%02d" title "%d" with lines' % (i,i) \
for i in range(1,len(lines[start:])+1)]
cmd = 'plot ' + ', '.join(plotfiles)
f = open('tmp_plot.gnuplot', 'w')
f.write("""
set xrange [0:%d]
%s
""" % (len(curves[name])+1, cmd))
f.close()
def _test(n):
# how much does it cost to run an empty loop with
# range, xrange and iseq?
e = EfficiencyTable('Empty loops, loop length = %d' % n)
import timeit
t1 = timeit.Timer('for i in range(n): pass',
setup='n=%d' % n).timeit(5)
e.add('for i in range(n): pass', t1)
t2 = timeit.Timer('for i in xrange(n): pass',
setup='n=%d' % n).timeit(5)
e.add('for i in xrange(n): pass', t2)
t3 = timeit.Timer('for i in iseq(stop=n-1): pass',
setup='from scitools.numpyutils import iseq;' +
'n=%d' % n).timeit(5)
e.add('for i in iseq(stop=n-1): pass', t3)
print e
if __name__ == '__main__':
import sys
try:
n = int(sys.argv[1])
except:
n = 100
_test(n)
|