/usr/share/pyshared/pebl/learner/greedy.py is in python-pebl 1.0.2-2build1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | """Learner that implements a greedy learning algorithm"""
import time
from pebl import network, result, evaluator
from pebl.util import *
from pebl.learner.base import *
class GreedyLearnerStatistics:
def __init__(self):
self.restarts = -1
self.iterations = 0
self.unimproved_iterations = 0
self.best_score = 0
self.start_time = time.time()
@property
def runtime(self):
return time.time() - self.start_time
class GreedyLearner(Learner):
#
# Parameters
#
_params = (
config.IntParameter(
'greedy.max_iterations',
"""Maximum number of iterations to run.""",
default=1000
),
config.IntParameter(
'greedy.max_time',
"""Maximum learner runtime in seconds.""",
default=0
),
config.IntParameter(
'greedy.max_unimproved_iterations',
"""Maximum number of iterations without score improvement before
a restart.""",
default=500
),
config.StringParameter(
'greedy.seed',
'Starting network for a greedy search.',
default=''
)
)
def __init__(self, data_=None, prior_=None, **options):
"""
Create a learner that uses a greedy learning algorithm.
The algorithm works as follows:
1. start with a random network
2. Make a small, local change and rescore network
3. If new network scores better, accept it, otherwise reject.
4. Steps 2-3 are repeated till the restarting_criteria is met, at
which point we begin again with a new random network (step 1)
Any config param for 'greedy' can be passed in via options.
Use just the option part of the parameter name.
For more information about greedy learning algorithms, consult:
1. http://en.wikipedia.org/wiki/Greedy_algorithm
2. D. Heckerman. A Tutorial on Learning with Bayesian Networks.
Microsoft Technical Report MSR-TR-95-06, 1995. p.35.
"""
super(GreedyLearner, self).__init__(data_, prior_)
self.options = options
config.setparams(self, options)
if not isinstance(self.seed, network.Network):
self.seed = network.Network(self.data.variables, self.seed)
def run(self):
"""Run the learner.
Returns a LearnerResult instance. Also sets self.result to that
instance.
"""
# max_time and max_iterations are mutually exclusive stopping critera
if 'max_time' not in self.options:
_stop = self._stop_after_iterations
else:
_stop = self._stop_after_time
self.stats = GreedyLearnerStatistics()
self.result = result.LearnerResult(self)
self.evaluator = evaluator.fromconfig(self.data, self.seed, self.prior)
self.evaluator.score_network(self.seed.copy())
first = True
self.result.start_run()
while not _stop():
self._run_without_restarts(_stop, self._restart,
randomize_net=(not first))
first = False
self.result.stop_run()
return self.result
def _run_without_restarts(self, _stop, _restart, randomize_net=True):
self.stats.restarts += 1
self.stats.unimproved_iterations = 0
if randomize_net:
self.evaluator.randomize_network()
# set the default best score
self.stats.best_score = self.evaluator.score_network()
# continue learning until time to stop or restart
while not (_restart() or _stop()):
self.stats.iterations += 1
try:
curscore = self._alter_network_randomly_and_score()
except CannotAlterNetworkException:
return
self.result.add_network(self.evaluator.network, curscore)
if curscore <= self.stats.best_score:
# score did not improve, undo network alteration
self.stats.unimproved_iterations += 1
self.evaluator.restore_network()
else:
self.stats.best_score = curscore
self.stats.unimproved_iterations = 0
#
# Stopping and restarting criteria
#
def _stop_after_time(self):
return self.stats.runtime >= self.max_time
def _stop_after_iterations(self):
return self.stats.iterations >= self.max_iterations
def _restart(self):
return self.stats.unimproved_iterations >= self.max_unimproved_iterations
|