/usr/share/check_mk/checks/df.include is in check-mk-server 1.2.8p16-1ubuntu0.1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 | #!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# | ____ _ _ __ __ _ __ |
# | / ___| |__ ___ ___| | __ | \/ | |/ / |
# | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
# | | |___| | | | __/ (__| < | | | | . \ |
# | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
# | |
# | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation in version 2. check_mk is distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
# out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more de-
# tails. You should have received a copy of the GNU General Public
# License along with GNU Make; see the file COPYING. If not, write
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.
# Common include file for all filesystem checks (df, df_netapp, hr_fs, ...)
# Settings for filesystem checks (df, df_vms, df_netapp and maybe others)
filesystem_levels = [] # obsolete. Just here to check config and warn if changed
filesystem_default_levels = {} # can also be dropped some day in future
# Filesystems to ignore (shouldn't be sent by agent anyway)
inventory_df_exclude_fs = [ 'tmpfs', 'nfs', 'smbfs', 'cifs', 'iso9660' ]
inventory_df_exclude_mountpoints = [ '/dev' ]
# Grouping of filesystems into groups that are monitored as one entity
# Example:
# filesystem_groups = [
# ( [ ( "Storage pool", "/data/pool*" ) ], [ 'linux', 'prod' ], ALL_HOSTS ),
# ( [ ( "Backup space 1", "/usr/backup/*.xyz" ),
# ( "Backup space 2", "/usr/backup2/*.xyz" ) ], ALL_HOSTS ),
# ]
filesystem_groups = []
# Alternative syntax for parameters:
# { "levels" : (80, 90), # levels in percent
# "magic" : 0.5, # magic factor
# "magic_normsize" : 20, # normsize in GB
# "levels_low" : (50, 60), # magic never lowers levels below this (percent)
# "trend_range" : 24, # hours
# "trend_mb" : (10, 20), # MB of change during trend_range
# "trend_perc" : (1, 2), # Percent change during trend_range
# "trend_timeleft" : (72, 48) # run time left in hours until full
# }
factory_settings["filesystem_default_levels"] = {
"levels" : (80.0, 90.0), # warn/crit in percent
"magic_normsize" : 20, # Standard size if 20 GB
"levels_low" : (50.0, 60.0), # Never move warn level below 50% due to magic factor
"trend_range" : 24,
"trend_perfdata" : True, # do send performance data for trends
"show_levels" : "onmagic",
"inodes_levels" : (10.0, 5.0),
"show_inodes" : "onlow",
"show_reserved" : False,
}
def df_inventory(mplist):
group_patterns = {}
for line in host_extra_conf(g_hostname, filesystem_groups):
for group_name, pattern in line:
group_patterns.setdefault(group_name, []).append(pattern)
# Create one service for each mount point that is not contained
# in a filesystem group. And create one service for each non-empty
# filesystem group.
inventory = []
have_groups = set([])
for mp in mplist:
in_group = False
for group_name, patterns in group_patterns.items():
for pattern in patterns:
if fnmatch.fnmatch(mp, pattern):
have_groups.add(group_name)
in_group = True
break
if not in_group:
inventory.append((mp, {}))
for group_name in have_groups:
inventory.append((group_name, { "patterns" : group_patterns[group_name] }))
return inventory
# Users might have set filesystem_default_levels to old format like (80, 90)
# needed by df, df_netapp and vms_df and maybe others in future:
# compute warning and critical levels. Takes into account the size of
# the filesystem and the magic number. Since the size is only known at
# check time this function's result cannot be precompiled.
# Note: this function is in our days only needed in order to support
# old style params a'la (80, 90). As soon as we drop support for that
# (can happen any decade now), we can get rid of this function.
def get_filesystem_levels(host, mountpoint, size_gb, params):
mega = 1024 * 1024
giga = mega * 1024
# Start with factory settings
levels = factory_settings["filesystem_default_levels"].copy()
def convert_legacy_levels(value):
if type(params) == tuple or not params.get("flex_levels"):
return tuple(map(float, value))
else:
return value
# convert default levels to dictionary. This is in order support
# old style levels like (80, 90)
if type(filesystem_default_levels) == dict:
fs_default_levels = filesystem_default_levels.copy()
fs_levels = fs_default_levels.get("levels")
if fs_levels:
fs_default_levels["levels"] = convert_legacy_levels(fs_levels)
levels.update(filesystem_default_levels)
else:
levels = factory_settings["filesystem_default_levels"].copy()
levels["levels"] = convert_legacy_levels(filesystem_default_levels[:2])
if len(filesystem_default_levels) == 2:
levels["magic"] = None
else:
levels["magic"] = filesystem_default_levels[2]
# If params is a dictionary, make that override the default values
if type(params) == dict:
levels.update(params)
else: # simple format - explicitely override levels and magic
levels["levels"] = convert_legacy_levels(params[:2])
if len(params) >= 3:
levels["magic"] = params[2]
# Determine real warn, crit levels
if type(levels["levels"]) == tuple:
warn, crit = levels["levels"]
else:
# A list of levels. Choose the correct one depending on the
# size of the current filesystem. We do not make the first
# rule match, but that with the largest size_gb. That way
# the order of the entries is not important.
found = False
found_size = 0
for to_size, this_levels in levels["levels"]:
if size_gb * giga > to_size and to_size >= found_size:
warn, crit = this_levels
found_size = to_size
found = True
if not found:
warn, crit = 100.0, 100.0 # entry not found in list
# If the magic factor is used, take disk size and magic factor
# into account in order to move levels
magic = levels.get("magic")
# We need a way to disable the magic factor so check
# if magic not 1.0
if magic and magic != 1.0:
# convert warn/crit to percentage
if type(warn) != float:
warn = savefloat(warn * mega / float(size_gb * giga)) * 100
if type(crit) != float:
crit = savefloat(crit * mega / float(size_gb * giga)) * 100
normsize = levels["magic_normsize"]
hgb_size = size_gb / float(normsize)
felt_size = hgb_size ** magic
scale = felt_size / hgb_size
warn_scaled = 100 - (( 100 - warn ) * scale)
crit_scaled = 100 - (( 100 - crit ) * scale)
# Make sure, levels do never get too low due to magic factor
lowest_warning_level, lowest_critical_level = levels["levels_low"]
if warn_scaled < lowest_warning_level:
warn_scaled = lowest_warning_level
if crit_scaled < lowest_critical_level:
crit_scaled = lowest_critical_level
else:
if type(warn) != float:
warn_scaled = savefloat(warn * mega / float(size_gb * giga)) * 100
else:
warn_scaled = warn
if type(crit) != float:
crit_scaled = savefloat(crit * mega / float(size_gb * giga)) * 100
else:
crit_scaled = crit
size_mb = size_gb * 1024
warn_mb = savefloat(size_mb * warn_scaled / 100)
crit_mb = savefloat(size_mb * crit_scaled / 100)
levels["levels_mb"] = (warn_mb, crit_mb)
if type(warn) == float:
if warn_scaled < 0 and crit_scaled < 0:
label = 'warn/crit at free space below'
warn_scaled *= -1
crit_scaled *= -1
else:
label = 'warn/crit at'
levels["levels_text"] = "(%s %.2f/%.2f%%)" % (label, warn_scaled, crit_scaled)
else:
if warn * mega < 0 and crit * mega < 0:
label = 'warn/crit at free space below'
warn *= -1
crit *= -1
else:
label = 'warn/crit at'
warn_hr = get_bytes_human_readable(warn * mega)
crit_hr = get_bytes_human_readable(crit * mega)
levels["levels_text"] = "(%s %s/%s)" % (label, warn_hr, crit_hr)
if "inodes_levels" in params:
if type(levels["inodes_levels"]) == tuple:
warn, crit = levels["inodes_levels"]
else:
# A list of inode levels. Choose the correct one depending on the
# size of the current filesystem. We do not make the first
# rule match, but that with the largest size_gb. That way
# the order of the entries is not important.
found = False
found_size = 0
for to_size, this_levels in levels["inodes_levels"]:
if size_gb * giga > to_size and to_size >= found_size:
warn, crit = this_levels
found_size = to_size
found = True
if not found:
warn, crit = 100.0, 100.0 # entry not found in list
levels["inodes_levels"] = warn, crit
else:
levels["inodes_levels"] = (None, None)
return levels
# Legacy function for checks that do not support groups yet
def df_check_filesystem(hostname, mountpoint, size_mb, avail_mb, reserved_mb, params):
return df_check_filesystem_list(mountpoint, params, [(mountpoint, size_mb, avail_mb, reserved_mb)])
# New function for checks that support groups.
def df_check_filesystem_list(item, params, fslist_blocks, fslist_inodes = None):
if "patterns" in params:
patterns = params["patterns"]
count = 0
total_size_mb = 0
total_avail_mb = 0
total_reserved_mb = 0
total_inodes = 0
total_inodes_avail = 0
for idx, (mp, size_mb, avail_mb, reserved_mb) in enumerate(fslist_blocks):
for pattern in patterns:
if fnmatch.fnmatch(mp, pattern):
count += 1
total_size_mb += size_mb
total_avail_mb += avail_mb
total_reserved_mb += reserved_mb
if fslist_inodes:
total_inodes += fslist_inodes[idx][1]
total_inodes_avail += fslist_inodes[idx][2]
break
# If no filesystem has been found we cannot do the
# actual check since the size is zero.
if count == 0:
return (3, "No filesystem matching the patterns")
else:
status, infotext, perfdata = df_check_filesystem_single(g_hostname, item, total_size_mb, total_avail_mb, total_reserved_mb, total_inodes, total_inodes_avail, params)
infotext += " (%d filesystems)" % count
return status, infotext, perfdata
else:
for idx, (mp, size_mb, avail_mb, reserved_mb) in enumerate(fslist_blocks):
if mp == item:
if fslist_inodes:
inodes_total, inodes_avail = fslist_inodes[idx][1], fslist_inodes[idx][2]
else:
inodes_total, inodes_avail = None, None
return df_check_filesystem_single(g_hostname, mp, size_mb, avail_mb, reserved_mb, inodes_total, inodes_avail, params)
return (3, "filesystem not found")
def df_check_filesystem_single(hostname, mountpoint, size_mb, avail_mb, reserved_mb, inodes_total, inodes_avail, params, this_time = None):
if size_mb == 0:
return (1, "size of filesystem is 0 MB", [])
used_mb = size_mb - avail_mb
used_perc = 100.0 * (float(used_mb) / size_mb)
size_gb = size_mb / 1024.0
# Get warning and critical levels already with 'magic factor' applied
levels = get_filesystem_levels(g_hostname, mountpoint, size_gb, params)
warn_mb, crit_mb = levels["levels_mb"]
warn_inode, crit_inode = levels["inodes_levels"]
# Take into account magic scaling factor (third optional argument
# in check params). A factor of 1.0 changes nothing. Factor should
# be > 0 and <= 1. A smaller factor raises levels for big file systems
# bigger than 100 GB and lowers it for file systems smaller than 100 GB.
# Please run df_magic_factor.py to understand how it works.
used_hr = get_bytes_human_readable(used_mb * 1024 * 1024)
size_hr = get_bytes_human_readable(size_mb * 1024 * 1024)
# If both numbers end with both MB or GB or TB, then drop the first one
if used_hr[-2:] == size_hr[-2:]:
used_hr = used_hr[:-3]
# Show enough decimal digits so that very small percentages are still
# visible!
infotext = "%s used (%s of %s)" % (
get_percent_human_readable(used_perc), used_hr, size_hr)
if warn_mb < 0.0:
# Negative levels, so user configured thresholds based on space left. Calculate the
# upper thresholds based on the size of the filesystem
crit_mb = size_mb + crit_mb
warn_mb = size_mb + warn_mb
status = 0
if used_mb >= crit_mb:
status = 2
elif used_mb >= warn_mb:
status = max(1, status)
# TODO: In some future version use a fixed name as perf variable
perf_var = mountpoint.replace(" ", "_")
perfdata = [(perf_var, str(used_mb) + 'MB', warn_mb, crit_mb, 0, size_mb)]
perfdata.append(('fs_size', str(size_mb) + 'MB'))
if type(params) == dict:
show_levels = params.get("show_levels")
if show_levels == "always" or \
(show_levels == "onproblem" and status > 0) or \
(show_levels == "onmagic" and (status > 0 or levels.get("magic", 1.0) != 1.0)):
infotext += ", " + levels["levels_text"]
if reserved_mb > 0 and params["show_reserved"]:
reserved_perc = 100.0 * float(reserved_mb) / size_mb
infotext += ", therein reserved for root: %.1f%% (%.2f MB)" % (reserved_perc, reserved_mb)
perfdata.append(("reserved", reserved_mb))
# Trends. The trends are computed in two steps. In the first step we
# compute the delta to the last check, using a normal check_mk counter.
# In the second step we compute an average over that counter and can
# make a long-time prediction. If we have that values we can optionally
# apply levels.
# Trend parameters:
# "trend_range" : 24, # hours
# "trend_mb" : (10, 20), # MB of change during trend_range
# "trend_perc" : (1, 2), # Percent change during trend_range
# "trend_timeleft" : (72, 48) # run time left in hours until full
problems = []
MB = 1024 * 1024.0
H24 = 60 * 60 * 24
if levels.get("trend_range"):
try:
range = levels["trend_range"] # in hours
range_sec = range * 3600.0
if not this_time:
this_time = time.time()
# first compute current rate in MB/s by computing delta since last check
rate = get_rate("df.%s.delta" % mountpoint, this_time, used_mb, allow_negative=True, onwrap=ZERO)
if levels.get("trend_perfdata"):
# Change in 1.1.13i3: The trend perfdata always outputs
# the growth in MB/24h, not any longer in MB/trendrange
perfdata.append(("growth", rate * H24))
# average trend, initialize with zero, rate_avg is in MB/s
rate_avg = get_average("df.%s.trend" % mountpoint,
this_time, rate, range_sec / 60.0, True)
# rate_avg is growth in MB/s, trend is in MB per trend range hours
trend = rate_avg * range_sec
sign = trend > 0 and "+" or ""
infotext += ", trend: %s%s / %g hours" % \
(sign, get_bytes_human_readable(trend * MB), range)
# levels for performance data
warn_perf, crit_perf = None, None
# apply levels for absolute growth in MB / interval
trend_mb = levels.get("trend_mb")
if trend_mb:
wa, cr = trend_mb
warn_perf, crit_perf = wa, cr
if trend >= wa:
problems.append("growing too fast (warn/crit at %s/%s per %.1f h)(!" %
( get_bytes_human_readable(wa * MB), get_bytes_human_readable(cr * MB), range))
status = max(1, status)
if trend >= cr:
status = 2
problems[-1] += "!"
problems[-1] += ")"
else:
wa, cr = None, None
# apply levels for growth relative to filesystem size
trend_perc = levels.get("trend_perc")
if trend_perc:
wa_perc, cr_perc = trend_perc
wa = wa_perc / 100.0 * size_mb
cr = cr_perc / 100.0 * size_mb
if warn_perf != None:
warn_perf = min(warn_perf, wa)
crit_perf = min(crit_perf, cr)
else:
warn_perf, crit_perf = wa, cr
if trend >= wa:
problems.append("growing too fast (warn/crit at %.3f%%/%.3f%% per %.1f h)(!" %
( wa_perc, cr_perc, range))
status = max(1, status)
if trend >= cr:
status = 2
problems[-1] += "!"
problems[-1] += ")"
# compute time until filesystem is full (only for positive trend, of course)
# The start value of hours_left is negative. The pnp graph and the perfometer
# will interpret this as inifinite -> not growing
hours_left = -1
if trend > 0:
space_left = size_mb - used_mb
hours_left = space_left / trend * range
timeleft = levels.get("trend_timeleft")
def format_hours(hours):
if hours > 365 * 24:
return "more than a year"
elif hours > 90 * 24:
return "%0d months" % (hours/ (30 * 24))
elif hours > 4 * 7 * 24: # 4 weeks
return "%0d weeks" % (hours/ (7 * 24))
elif hours > 7 * 24: # 1 week
return "%0.1f weeks" % (hours/ (7 * 24))
elif hours > 2 * 24: # 2 days
return "%0.1f days" % (hours/24)
else:
return "%d hours" % hours
if timeleft:
wa, cr = timeleft
if hours_left <= cr:
status = 2
problems.append("only %s until disk full(!!)" % format_hours(hours_left))
elif hours_left <= wa:
status = max(status, 1)
problems.append("only %s until disk full(!)" % format_hours(hours_left))
elif hours_left <= wa * 2 or levels.get("trend_showtimeleft"):
problems.append("time left until disk full: %s" % format_hours(hours_left))
elif levels.get("trend_showtimeleft"):
problems.append("time left until disk full: %s" % format_hours(hours_left))
if levels.get("trend_perfdata"):
# New in 1.1.13i3: output trend not as MB / trend_range, but as
# MB / 24 hours. The same holds for the warn and crit information.
# It is configured in MB / trend range but in the performance data
# it's sent as MB / 24h.
perfdata.append(("trend", rate_avg * H24,
warn_perf != None and (warn_perf / range_sec * H24) or None,
crit_perf != None and (crit_perf / range_sec * H24) or None,
0, size_mb / range))
if levels.get("trend_showtimeleft"):
perfdata.append(("trend_hoursleft", hours_left))
except MKCounterWrapped:
# need more data for computing a trend
# In this case erase all perfdata to prevent an rrd file which has no space
# for the trend information
perfdata = []
if problems:
infotext += " - %s" % ", ".join(problems)
problems = []
# Check inode levels
inode_status = 0
if inodes_total:
inodes_avail_perc = 100.0 * inodes_avail / inodes_total
inodes_warn, inodes_crit = levels["inodes_levels"]
if inodes_warn != None:
# Levels in absolute numbers
if type(inodes_warn) == int:
if inodes_crit > inodes_avail:
inode_status = 2
problems.append("less than %dk inodes available(!!)" % (crit_inode / 1000))
elif inodes_warn > inodes_avail:
inode_status = 1
problems.append("less than %dk inodes available(!)" % (warn_inode / 1000))
inodes_warn_abs = inodes_warn
inodes_crit_abs = inodes_crit
# Levels in percent
else:
if inodes_crit > inodes_avail_perc:
inode_status = 2
problems.append("less than %0.2f%% inodes available(!!)" % inodes_crit)
elif inodes_warn > inodes_avail_perc:
inode_status = 1
problems.append("less than %.02f%% inodes available(!)" % inodes_warn)
inodes_warn_abs = (100 - inodes_warn) / 100.0 * inodes_total
inodes_crit_abs = (100 - inodes_crit) / 100.0 * inodes_total
else:
inodes_warn_abs = None
inodes_crit_abs = None
# Only show inodes if they are at less then 50%
status = max(status, inode_status)
show_inodes = levels["show_inodes"]
if show_inodes == "always" or \
(show_inodes == "onlow" and (inode_status or inodes_avail_perc < 50)) or \
(show_inodes == "onproblem" and inode_status):
infotext += ", inodes available: %dk/%0.2f%%" % (inodes_avail / 1000, inodes_avail_perc)
perfdata += [ ("inodes_used", inodes_total - inodes_avail, inodes_warn_abs, inodes_crit_abs, 0, inodes_total) ]
if problems:
infotext += " - %s" % ", ".join(problems)
problems = []
return (status, infotext, perfdata)
|