This file is indexed.

/usr/share/check_mk/checks/diskstat.include is in check-mk-server 1.2.8p16-1ubuntu0.1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2014             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# tails. You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.

diskstat_inventory_mode = "rule" # "summary", "single", "legacy"

diskstat_default_levels = {
#    "read" :    (10, 20),   # MB/sec
#    "write" :   (20, 40),   # MB/sec
#    "average" : 15,         # min
#    "latency" : (10, 20),   # ms
#    "latency_perfdata" : True,
}

# Rule for controlling diskstat inventory more fine grained
diskstat_inventory = []

# Example
# diskstat_inventory = [
#  ( [], [ 'linux' ], ALL_HOST ), --> No diskstat on this host
#  ( [ 'summary', 'physical', 'lvm', 'vxvm' ], ALL_HOSTS ),
# ]

def inventory_diskstat_generic(info):
    # Skip over on empty data
    if not info:
        return

    # New style: use rule based configuration, defaulting to summary mode
    if diskstat_inventory_mode == "rule":
        hits = host_extra_conf(g_hostname, diskstat_inventory)
        if len(hits) > 0:
            modes = hits[0]
        else:
            modes = [ "summary" ]

    elif diskstat_inventory_mode == "single":
        modes = [ "physical" ]
    elif diskstat_inventory_mode == "summary":
        modes = [ "summary" ]
    else:
        modes = [ "legacy" ]

    inventory = []
    if "summary" in modes:
        inventory.append( ( "SUMMARY", "diskstat_default_levels" ) )

    if "legacy" in modes:
        inventory += [ ( "read", None ), ( "write", None ) ]

    if "physical" in modes:
        inventory += [ (line[1], "diskstat_default_levels")
                       for line in info
                       if not ' ' in line[1] ]

    if "lvm" in modes:
        inventory += [ (line[1], "diskstat_default_levels")
                       for line in info
                       if line[1].startswith("LVM ") ]

    if "vxvm" in modes:
        inventory += [ (line[1], "diskstat_default_levels")
                       for line in info
                       if line[1].startswith("VxVM ") ]

    return inventory



def check_diskstat_line(this_time, item, params, line, mode='sectors'):
    average_range = params.get("average")
    if average_range == 0:
        average_range = None # disable averaging when 0 is set

    perfdata = []
    infos = []
    status = 0
    node = line[0]
    if node != None and node != "":
        infos.append("Node %s" % node)
    prediction_perf = []
    for what, ctr in [ ("read",  line[2]), ("write", line[3]) ]:
        if node:
            countername = "diskstat.%s.%s.%s" % (node, item, what)
        else:
            countername = "diskstat.%s.%s" % (item, what)

        # unpack levels now, need also for perfdata
        levels = params.get(what)
        if type(levels) == tuple:
            warn, crit = levels
        else:
            warn, crit = None, None

        per_sec = get_rate(countername, this_time, int(ctr))
        if mode == 'sectors':
            # compute IO rate in bytes/sec
            bytes_per_sec = per_sec * 512
        elif mode == 'bytes':
            bytes_per_sec = per_sec

        infos.append("%s/sec %s" % (get_bytes_human_readable(bytes_per_sec), what))
        perfdata.append( (what, bytes_per_sec, warn, crit) )
        dsname = what

        # compute average of the rate over ___ minutes
        if average_range != None:
            avg = get_average(countername + ".avg", this_time, bytes_per_sec, average_range)
            dsname = what + ".avg"
            perfdata.append( (dsname, avg) )
            bytes_per_sec = avg

        # check levels
        state, text, extraperf = check_levels(bytes_per_sec, dsname, levels,
                                              unit = "MB/s", scale = 1048576, statemarkers=True)
        if text:
            infos.append(text)
        status = max(state, status)
        prediction_perf += extraperf

    # Add performance data for averaged IO
    if average_range != None:
        perfdata = [ perfdata[0], perfdata[2], perfdata[1], perfdata[3] ]

    # Process IOs when available
    ios_per_sec = None
    if len(line) >= 6 and line[4] >= 0 and line[5] > 0:
        reads, writes = map(int, line[4:6])
        ios = reads + writes
        ios_per_sec = get_rate(countername + ".ios", this_time, ios)
        infos.append("IOs: %.2f/sec" % ios_per_sec)

        if params.get("latency_perfdata"):
            perfdata.append(("ios", ios_per_sec))

    # Do Latency computation if this information is available:
    if len(line) >= 7 and line[6] >= 0:
        timems = int(line[6])
        timems_per_sec = get_rate(countername + ".time", this_time, timems)
        if not ios_per_sec:
            latency = 0.0
        else:
            latency = timems_per_sec / ios_per_sec
        infos.append("Latency: %.2fms" % latency)
        if "latency" in params:
            warn, crit = params["latency"]
            if latency >= crit:
                status = 2
                infos[-1] += "(!!)"
            elif latency >= warn:
                status = max(status, 1)
                infos[-1] += "(!)"
        else:
            warn, crit = None, None

        if params.get("latency_perfdata"):
            perfdata.append(("latency", latency, warn, crit))

    # Queue Lengths (currently only Windows). Windows uses counters here.
    # I have not understood, why....
    if len(line) >= 9:
        for what, ctr in [ ("read",  line[7]), ("write", line[8]) ]:
            countername = "diskstat.%s.ql.%s" % (item, what)
            levels = params.get(what + "_ql")
            if levels:
                warn, crit = levels
            else:
                warn, crit = None, None

            qlx = get_rate(countername, this_time, int(ctr))
            ql = qlx / 10000000.0
            infos.append(what.title() + " Queue: %.2f" % ql)

            # check levels
            if levels != None:
                if ql >= crit:
                    status = 2
                    infos[-1] += "(!!)"
                elif ql >= warn:
                    status = max(status, 1)
                    infos[-1] += "(!)"

            if params.get("ql_perfdata"):
                perfdata.append((what + "_ql", ql))

    perfdata += prediction_perf

    return (status, ", ".join(infos) , perfdata)


def check_diskstat_generic(item, params, this_time, info, mode='sectors'):
    # legacy version if item is "read" or "write"
    if item in [ 'read', 'write' ]:
        return check_diskstat_old(item, params, this_time, info)

    # Sum up either all physical disks (if item is "SUMMARY") or
    # all entries matching the item in question. It is not a bug if
    # a disk appears more than once. This can for example happen in
    # Windows clusters - even if they are no Check_MK clusters.

    summed_up = [0] * 13
    matching = 0

    for line in info:
        if item == 'SUMMARY' and line[0] != None:
            return 3, "summary mode not supported in a cluster"

        elif item == 'SUMMARY' and ' ' in line[1]:
            continue # skip non-physical disks

        elif item == 'SUMMARY' or line[1] == item:
            matching += 1
            summed_up = map(lambda e: e[0] + int(e[1]), zip(summed_up, line[2:]))

    if matching == 0:
        return 3, "No matching disk found"
    else:
        return check_diskstat_line(this_time, item, params, [None, ''] + summed_up, mode)


# This is the legacy version of diskstat as used in <= 1.1.10.
# We keep it here for a while in order to be compatible with
# old installations.
def check_diskstat_old(item, params, this_time, info):
    # sum up over all devices
    if item == 'read':
        index = 2 # sectors read
    elif item == 'write':
        index = 3 # sectors written
    else:
        return (3, "invalid item %s" % (item,))

    this_val = 0
    for line in info:
        if line[0] != None:
            return 3, "read/write mode not supported in a cluster"
        if ' ' not in line[1]:
            this_val += int(line[index])

    per_sec = get_rate("diskstat." + item, this_time, this_val)
    mb_per_s = per_sec / 2048.0    # Diskstat output is in sectors a 512 Byte
    kb_per_s = per_sec / 2.0
    perfdata = [ (item, "%f" % kb_per_s ) ]
    return (0, "%.1f MB/s" % mb_per_s, perfdata)


#.
#   .--Dict based API------------------------------------------------------.
#   |  ____  _      _     _                        _      _    ____ ___    |
#   | |  _ \(_) ___| |_  | |__   __ _ ___  ___  __| |    / \  |  _ \_ _|   |
#   | | | | | |/ __| __| | '_ \ / _` / __|/ _ \/ _` |   / _ \ | |_) | |    |
#   | | |_| | | (__| |_  | |_) | (_| \__ \  __/ (_| |  / ___ \|  __/| |    |
#   | |____/|_|\___|\__| |_.__/ \__,_|___/\___|\__,_| /_/   \_\_|  |___|   |
#   |                                                                      |
#   +----------------------------------------------------------------------+
#   |  The newest generation of Disk IO checks parse all informatin info   |
#   |  a dictionary, where counters are aleady resolved. Look at diskstat  |
#   |  (the Linux diskstat check) for an example.                          |
#   '----------------------------------------------------------------------'

def diskstat_select_disk(disks, item):

    # In summary mode we add up the throughput values, but
    # we average the other values for disks that have a throughput
    # > 0. Note: This is not very precise. Strictly spoken
    # we would need to do the summarization directly in the
    # parse function. But there we do not have information about
    # the physical multipath devices and would add up the traffic
    # of the paths with the traffice of the device itself....

    if item == "SUMMARY":
        summarized = {
            "node"                       : None,
            # We do not set these settings explictly because some
            # devices may not provide all of them.
            # "read_ios"                   : 0.0,
            # "write_ios"                  : 0.0,
            # "read_throughput"            : 0.0,
            # "write_throughput"           : 0.0,
            # "utilization"                : 0.0,
            # "latency"                    : 0.0,
            # "average_request_size"       : 0.0,
            # "average_wait"               : 0.0,
            # "average_read_wait"          : 0.0,
            # "average_read_request_size"  : 0.0,
            # "average_write_wait"         : 0.0,
            # "average_write_request_size" : 0.0,
            # "queue_length"               : 0.0,
        }

        if disks:
            num_averaged = 0
            for device, disk in disks.items():
                # If all disks are idle the summarized dict would have no keys
                # So we take care that at least all keys of this disk are set
                for key in disk.keys():
                    if key != "node":
                        summarized.setdefault(key, 0.0)

                if device.startswith("LVM "):
                    continue # skip LVM devices for summary

                if True or disk["read_throughput"] + disk["write_throughput"] > 0: # skip idle disks
                    num_averaged += 1
                    for key, value in disk.items():
                        if key != "node":
                            summarized[key] += value

            if num_averaged:
                for key, value in summarized.items():
                    if key.startswith("ave") or key in ("utilization", "latency", "queue_length"):
                        summarized[key] /= num_averaged

        return summarized

    elif item not in disks:
        return None

    else:
        return disks[item]

# New version for this diskstat checks that use the new dict
# format. The first one is "diskstat" - the Linux version of
# this check. Look there for examples of the format of the
# dictionary "disks". Example:
# disks = { "sda" : {
#       'node'                       : None,
#       'average_read_request_size'  : 0.0,
#       'average_read_wait'          : 0.0,
#       'average_request_size'       : 40569.90476190476,
#       'average_wait'               : 0.761904761904762,
#       'average_write_request_size' : 40569.90476190476,
#       'average_write_wait'         : 0.0007619047619047619,
#       'read_ios'                   : 0.0,
#       'read_throughput'            : 0.0,
#       'latency'                    : 0.00038095238095238096,
#       'utilization'                : 0.0006153846153846154,
#       'write_ios'                  : 1.6153846153846154,
#       'write_throughput'           : 65536.0,
#       'queue_length'               : 0.0,
# }}
def check_diskstat_dict(item, params, disks):
    # Take care of previously discovered services
    if item in ("read", "write"):
        yield 3, "Sorry, the new version of this check does not " \
                  "support one service for read and one for write anymore."
        return

    this_time = time.time()
    disk = diskstat_select_disk(disks, item)
    if not disk:
        return

    # Averaging
    # Note: this check uses a simple method of averaging: As soon as averaging
    # is turned on the actual metrics are *replaced* by the averaged ones. No
    # duplication of performance data or check output here. This is because we
    # have so many metrics...
    prefix = ""
    averaging = params.get("average") # in seconds here!
    if averaging:
        avg_disk = {} # Do not modify our arguments!!
        for key, value in disk.items():
            if type(value) in (int, float):
                avg_disk[key] = get_average("diskstat.%s.%s.avg" % (item, key), this_time, value, averaging / 60.0)
            else:
                avg_disk[key] = value
        disk = avg_disk
        prefix = "%s average: " % get_age_human_readable(averaging)


    # Utilization
    if "utilization" in disk:
        util = disk["utilization"]
        state, text, extraperf = check_levels(util, "disk_utilization", params.get("utilization"),
                                              unit = "%", scale = 0.01, statemarkers=False)
        yield state, "%sUtilization: %.1f%%%s" % (prefix, util * 100, text), extraperf


    # Throughput
    for what in "read", "write":
        if what + "_throughput" in disk:
            throughput = disk[what + "_throughput"]
            state, text, extraperf = check_levels(throughput, "disk_" + what + "_throughput", params.get(what),
                                                  unit = "MB/s", scale = 1048576, statemarkers=False)
            yield state, "%s: %s/s%s" % (what.title(), get_bytes_human_readable(throughput), text), extraperf


    # Average wait from end to end
    for what in [ "wait", "read_wait", "write_wait"]:
        if "average_" + what in disk:
            wait = disk["average_" + what]
            state, text, extraperf = check_levels(wait, what, params.get(what),
                                                  unit = "ms", scale = 0.001, statemarkers=False)
            yield state, "Average %s: %.2f ms%s" % (what.title().replace("_", " "), wait * 1000, text), extraperf

    # Average disk latency
    if "latency" in disk:
        latency = disk["latency"]
        state, text, extraperf = check_levels(latency, "disk_latency", params.get("latency"),
                                              unit = "ms", scale = 0.001, statemarkers=False)
        yield state, "Latency: %.2f ms%s" % (latency * 1000.0, text), extraperf

    # All the other metrics are currently not output in the plugin output - simply because
    # of their amount. They are present as performance data and will shown in graphs.

    # Send everything as performance data now. Sort keys alphabetically
    perfdata = []
    for key in sorted(disk.keys()):
        value = disk[key]
        if type(value) in (int, float):
            # Currently the levels are not shown in the perfdata
            perfdata.append(("disk_" + key, value))

    yield 0, None, perfdata