/usr/share/check_mk/checks/nvidia is in check-mk-server 1.2.8p16-1ubuntu0.1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | #!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# | ____ _ _ __ __ _ __ |
# | / ___| |__ ___ ___| | __ | \/ | |/ / |
# | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
# | | |___| | | | __/ (__| < | | | | . \ |
# | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
# | |
# | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation in version 2. check_mk is distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
# out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more de-
# tails. You should have received a copy of the GNU General Public
# License along with GNU Make; see the file COPYING. If not, write
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.
factory_settings["nvidia_temp_default_levels"] = {
"levels": (60, 65)
}
factory_settings["nvidia_temp_core_default_levels"] = {
"levels": (90, 95)
}
def format_nvidia_name(identifier):
identifier = identifier.replace("Temp", "")
if identifier == "GPUCore":
return "GPU NVIDIA"
else:
# afaik temperature sensors can be GPU or Board, maybe memory
return "System NVIDIA %s" % identifier
def inventory_nvidia_temp(core, info):
for line in info:
line_san = line[0].strip(":")
if line_san.lower().endswith("temp"):
if core == (line_san == "GPUCoreTemp"):
yield format_nvidia_name(line_san), {}
def check_nvidia_temp(item, params, info):
for line in info:
if format_nvidia_name(line[0].strip(":")) == item \
or item == line[0].strip(":"): # compatibility code for "old discovered" services
return check_temperature(int(line[1]), params, "nvidia_%s" % item)
check_info["nvidia.temp"] = {
'check_function' : check_nvidia_temp,
'inventory_function' : lambda info: inventory_nvidia_temp(False, info),
'service_description' : 'Temperature %s',
'has_perfdata' : True,
'includes' : [ 'temperature.include' ],
'group' : 'temperature',
'default_levels_variable' : 'nvidia_temp_default_levels'
}
check_info["nvidia.temp_core"] = {
'check_function' : check_nvidia_temp,
'inventory_function' : lambda info: inventory_nvidia_temp(True, info),
'service_description' : 'Temperature %s',
'has_perfdata' : True,
'includes' : [ 'temperature.include' ],
'group' : 'temperature',
'default_levels_variable' : 'nvidia_temp_core_default_levels'
}
def inventory_nvidia_errors(info):
for line in info:
if line[0] == 'GPUErrors:':
return [(None, None)]
def check_nvidia_errors(_no_item, _no_params, info):
for line in info:
if line[0] == "GPUErrors:":
errors = int(line[1])
if errors == 0:
return (0, "No GPU errors")
else:
return (2, "%d GPU errors" % errors)
return (3, "incomplete output from agent")
check_info["nvidia.errors"] = {
'check_function': check_nvidia_errors,
'inventory_function': inventory_nvidia_errors,
'service_description': 'NVIDIA GPU Errors',
'group': 'hw_errors',
}
|