/usr/share/pcp/examples/pmie/RAS is in pcp 3.10.8build1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | //
// Some System Reliability, Availability and Serviceability (RAS) Checks
//
delta = 20 sec;
//
// For Origin systems, sequence number errors are not indicative of
// a problem, but persistent checkbit and/or retry errors may indicate
// a CrayLink interconnect problem.
//
some_inst ( all_sample (
hw.router.perport.cb_errors @0..2 > 0 ||
hw.router.perport.retry_errors @0..2 > 0
) )
-> alarm 30mins "CrayLink SN and/or Retry errors: " "%i ";
|