/usr/share/check_mk/modules/prediction.py is in check-mk-server 1.2.8p16-1ubuntu0.1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 | #!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# | ____ _ _ __ __ _ __ |
# | / ___| |__ ___ ___| | __ | \/ | |/ / |
# | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
# | | |___| | | | __/ (__| < | | | | . \ |
# | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
# | |
# | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation in version 2. check_mk is distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
# out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more de-
# tails. You should have received a copy of the GNU General Public
# License along with GNU Make; see the file COPYING. If not, write
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.
# Code for predictive monitoring / anomaly detection
# Export data from an RRD file. This requires an up-to-date
# version of the rrdtools.
# Fetch RRD historic metrics data of a specific service. returns a tuple
# of (step, [value1, value2, ...])
# IMPORTANT: Until we have a central library, keep this function in sync with
# the function get_rrd_data() from web/prediction.py.
def get_rrd_data(hostname, service_description, varname, cf, fromtime, untiltime):
step = 1
rpn = "%s.%s" % (varname, cf.lower()) # "MAX" -> "max"
lql = "GET services\n" \
"Columns: rrddata:m1:%s:%d:%d:%d\n" \
"OutputFormat: python\n" \
"Filter: host_name = %s\n" \
"Filter: description = %s\n" % (
rpn, fromtime, untiltime, step, hostname, service_description)
try:
response = eval(simple_livestatus_query(lql))[0][0]
except Exception, e:
if opt_debug:
raise
raise MKGeneralException("Cannot get historic metrics via Livestatus: %s" % e)
if not response:
raise MKGeneralException("Got no historic metrics")
real_fromtime, real_untiltime, step = response[:3]
values = response[3:]
return step, values
daynames = [ "monday", "tuesday", "wednesday", "thursday",
"friday", "saturday", "sunday"]
# Check wether a certain time stamp lies with in daylight safing time (DST)
def is_dst(timestamp):
return time.localtime(timestamp).tm_isdst
# Returns the timezone *including* DST shift at a certain point of time
def timezone_at(timestamp):
if is_dst(timestamp):
return time.altzone
else:
return time.timezone
def group_by_wday(t):
wday = time.localtime(t).tm_wday
day_of_epoch, rel_time = divmod(t - timezone_at(t), 86400)
return daynames[wday], rel_time
def group_by_day(t):
return "everyday", (t - timezone_at(t)) % 86400
def group_by_day_of_month(t):
broken = time.localtime(t)
mday = broken[2]
return str(mday), (t - timezone_at(t)) % 86400
def group_by_everyhour(t):
return "everyhour", (t - timezone_at(t)) % 3600
prediction_periods = {
"wday" : {
"slice" : 86400, # 7 slices
"groupby" : group_by_wday,
"valid" : 7,
},
"day" : {
"slice" : 86400, # 31 slices
"groupby" : group_by_day_of_month,
"valid" : 28,
},
"hour" : {
"slice" : 86400, # 1 slice
"groupby" : group_by_day,
"valid" : 1,
},
"minute" : {
"slice" : 3600, # 1 slice
"groupby" : group_by_everyhour,
"valid" : 24,
},
}
def get_prediction_timegroup(t, period_info):
# Convert to local timezone
timegroup, rel_time = period_info["groupby"](t)
from_time = t - rel_time
until_time = t - rel_time + period_info["slice"]
return timegroup, from_time, until_time, rel_time
def compute_prediction(pred_file, timegroup, params, period_info, from_time, dsname, cf):
# Collect all slices back into the past until the time horizon
# is reached
begin = from_time
slices = []
absolute_begin = from_time - params["horizon"] * 86400
# The resolutions of the different time ranges differ. We interpolate
# to the best resolution. We assume that the youngest slice has the
# finest resolution. We also assume, that each step is always dividable
# by the smallest step.
# Note: due to the f**king DST, we can have several shifts between
# DST and non-DST during are computation. We need to compensate for
# those. DST swaps within slices are being ignored. The DST flag
# is checked against the beginning of the slice.
smallest_step = None
while begin >= absolute_begin:
tg, fr, un, rel = get_prediction_timegroup(begin, period_info)
if tg == timegroup:
step, data = get_rrd_data(g_hostname, g_service_description,
dsname, cf, fr, un-1)
if smallest_step == None:
smallest_step = step
slices.append((fr, step / smallest_step, data))
begin -= period_info["slice"]
# Now we have all the RRD data we need. The next step is to consolidate
# all that data into one new array.
num_points = len(slices[0][2])
consolidated = []
for i in xrange(num_points):
point_line = []
for from_time, scale, data in slices:
idx = int(i / float(scale))
if idx < len(data):
d = data[idx]
if d != None:
point_line.append(d)
# else:
# date_str = time.strftime("%Y-%m-%d %H:%M", time.localtime(fr + ((un - fr) * i / float(num_points))))
# print "Keine Daten fur %s / %d/%s/ %.2f " % (date_str, i, float(scale),i/float(scale))
if point_line:
average = sum(point_line) / len(point_line)
consolidated.append([
average,
min(point_line),
max(point_line),
stdev(point_line, average),
])
else:
consolidated.append([None, None, None, None])
result = {
"num_points" : num_points,
"step" : smallest_step,
"columns" : [ "average", "min", "max", "stdev" ],
"points" : consolidated,
}
return result
def stdev(point_line, average):
return math.sqrt(sum([ (p-average)**2 for p in point_line ]) / len(point_line))
# cf: consilidation function (MAX, MIN, AVERAGE)
# levels_factor: this multiplies all absolute levels. Usage for example
# in the cpu.loads check the multiplies the levels by the number of CPU
# cores.
def get_predictive_levels(dsname, params, cf, levels_factor=1.0):
# Compute timegroup
now = time.time()
period_info = prediction_periods[params["period"]]
# timegroup: name of the group, like 'monday' or '12'
# from_time: absolute epoch time of the first second of the
# current slice.
# until_time: absolute epoch of the first second *not* in the slice
# rel_time: seconds offset of now in the current slice
timegroup, from_time, until_time, rel_time = \
get_prediction_timegroup(now, period_info)
# Compute directory for prediction data
dir = "%s/prediction/%s/%s/%s" % (var_dir, g_hostname,
pnp_cleanup(g_service_description), pnp_cleanup(dsname))
if not os.path.exists(dir):
os.makedirs(dir)
pred_file = "%s/%s" % (dir, timegroup)
info_file = pred_file + ".info"
# In previous versions it could happen that the files were created with 0 bytes of size
# which was never handled correctly so that the prediction could never be used again until
# manual removal of the files. Clean this up.
for file_path in [ pred_file, info_file ]:
if os.path.exists(file_path) and os.stat(file_path).st_size == 0:
os.unlink(file_path)
# Check, if we need to (re-)compute the prediction file. This is
# the case if:
# - no prediction has been done yet for this time group
# - the prediction from the last time is outdated
# - the prediction from the last time has done with other parameters
try:
last_info = eval(file(info_file).read())
for k, v in params.items():
if last_info.get(k) != v:
if opt_debug:
sys.stderr.write("Prediction parameters have changed.\n")
last_info = None
break
except IOError:
if opt_debug:
sys.stderr.write("No previous prediction for group %s available.\n" % timegroup)
last_info = None
if last_info and last_info["time"] + period_info["valid"] * period_info["slice"] < now:
if opt_debug:
sys.stderr.write("Prediction of %s outdated.\n" % timegroup)
last_info = None
if last_info:
# TODO: faster file format. Binary encoded?
prediction = eval(file(pred_file).read())
else:
# Remove all prediction files that result from other
# prediction periods. This is e.g. needed if the user switches
# the parameter from 'wday' to 'day'.
for f in os.listdir(dir):
if f.endswith(".info"):
try:
info = eval(file(dir + "/" + f).read())
if info["period"] != params["period"]:
if opt_debug:
sys.stderr.write("Removing obsolete prediction %s\n" % f[:-5])
os.remove(dir + "/" + f)
os.remove(dir + "/" + f[:-5])
except:
pass
if opt_debug:
sys.stderr.write("Computing prediction for time group %s.\n" % timegroup)
prediction = compute_prediction(pred_file, timegroup, params, period_info, from_time, dsname, cf)
info = {
"time" : now,
"range" : (from_time, until_time),
"cf" : cf,
"dsname" : dsname,
"slice" : period_info["slice"],
}
info.update(params)
file(info_file, "w").write("%r\n" % info)
file(pred_file, "w").write("%r\n" % prediction)
# Find reference value in prediction
index = int(rel_time / prediction["step"])
# print "rel_time: %d, step: %d, Index: %d, num_points: %d" % (rel_time, prediction["step"], index, prediction["num_points"])
# print prediction.keys()
reference = dict(zip(prediction["columns"], prediction["points"][index]))
# print "Reference: %s" % reference
ref_value = reference["average"]
stdev = reference["stdev"]
levels = []
if not ref_value: # No reference data available
levels = ((None, None), (None, None))
else:
for what, sig in [ ( "upper", 1 ), ( "lower", -1 )]:
p = "levels_" + what
if p in params:
how, (warn, crit) = params[p]
if how == "absolute":
this_levels = (ref_value + (sig * warn * levels_factor), ref_value + (sig * crit * levels_factor))
elif how == "relative":
this_levels = (ref_value + sig * (ref_value * warn / 100),
ref_value + sig * (ref_value * crit / 100))
else: # how == "stdev":
this_levels = (ref_value + sig * (stdev * warn),
ref_value + sig * (stdev * crit))
if what == "upper" and "levels_upper_min" in params:
limit_warn, limit_crit = params["levels_upper_min"]
this_levels = (max(limit_warn, this_levels[0]), max(limit_crit, this_levels[1]))
levels.append(this_levels)
else:
levels.append((None, None))
# print levels
return ref_value, levels
def pnp_cleanup(s):
return s \
.replace(' ', '_') \
.replace(':', '_') \
.replace('/', '_') \
.replace('\\', '_')
|