#!/usr/bin/python # -*- encoding: utf-8; py-indent-offset: 4 -*- # +------------------------------------------------------------------+ # | ____ _ _ __ __ _ __ | # | / ___| |__ ___ ___| | __ | \/ | |/ / | # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / | # | | |___| | | | __/ (__| < | | | | . \ | # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ | # | | # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de | # +------------------------------------------------------------------+ # # This file is part of Check_MK. # The official homepage is at http://mathias-kettner.de/check_mk. # # check_mk is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation in version 2. check_mk is distributed # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with- # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. See the GNU General Public License for more de- # tails. You should have received a copy of the GNU General Public # License along with GNU Make; see the file COPYING. If not, write # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, # Boston, MA 02110-1301 USA. # This agent plugin has been built to collect information from SAP R/3 systems # using RFC calls. It needs the python module sapnwrfc (available in Check_MK # git at agents/sap/sapnwrfc) and the nwrfcsdk (can be downloaded from SAP # download portal) installed to be working. You can configure the agent plugin # using the configuration file /etc/check_mk/sap.cfg (a sample file can be # found in Check_MK git at agents/sap/sap.cfg) to tell it how to connect to # your SAP instance and which values you want to fetch from your system to be # forwarded to and checked by Check_MK. # # This current agent has been developed and tested with: # python-sapnwrfc-0.19 # # During development the "CCMS_Doku.pdf" was really helpful. import os, sys, fcntl import time, datetime # sapnwrfc needs to know where the libs are located. During # development the import failed, since the module did not # find the libraries. So we preload the library to have it # already loaded. try: import sapnwrfc except ImportError, e: if 'sapnwrfc.so' in str(e): sys.stderr.write( 'Unable to find the library sapnwrfc.so. Maybe you need to put a file pointing to\n' 'the sapnwrfc library directory into the /etc/ld.so.conf.d directory. For example\n' 'create the file /etc/ld.so.conf.d/sapnwrfc.conf containing the path\n' '"/usr/sap/nwrfcsdk/lib" and run "ldconfig" afterwards.\n' ) sys.exit(1) elif 'No module named sapnwrfc' in str(e): sys.stderr.write("Missing the Python module sapnwfrc.\n") sys.exit(1) else: raise # ############################################################################# # This sign is used to separate the path parts given in the config SEPARATOR = '/' # This are the different classes of monitoring objects which # can be found in the tree. # # Summarizs information from several subnodes MTE_SUMMARY = '050' # A monitoring object which has several subnodes which lead to the status # of this object. For example it is the "CPU" object on a host MTE_MON_OBJ = '070' # Contains performance information (which can be used to create graphs from) MTE_PERFORMANCE = '100' # Might contain several messages MTE_MSG_CONTAINER = '101' # Contains a single status message MTE_SINGLE_MSG = '102' # This is a long text label without status MTE_LONG_TXT = '110' # This is a short text label without status MTE_SHORT_TXT = '111' # Is a "folder" which has no own state, just computed by its childs MTE_VIRTUAL = '199' # This map converts between the SAP color codes (key values) and the # nagios state codes and strings STATE_VALUE_MAP = { 0: (0, 'OK'), # GRAY (inactive or no current info available) -> OK 1: (0, 'OK'), # GREEN -> OK 2: (1, 'WARN'), # YELLOW -> WARNING 3: (2, 'CRIT'), # RED -> CRITICAL } STATE_LOGWATCH_MAP = [ 'O', 'O', 'W', 'C' ] # Monitoring objects of these classes are skipped during processing SKIP_MTCLASSES = [ MTE_VIRTUAL, MTE_SUMMARY, MTE_MON_OBJ, MTE_SHORT_TXT, MTE_LONG_TXT, ] MK_CONFDIR = os.getenv("MK_CONFDIR") or "/etc/check_mk" MK_VARDIR = os.getenv("MK_VARDIR") or "/var/lib/check_mk_agent" STATE_FILE = MK_VARDIR + '/sap.state' state_file_changed = False # ############################################################################# # Settings to be used to connect to the SAP R/3 host. local_cfg = { 'ashost': 'localhost', 'sysnr': '00', 'client': '100', 'user': '', 'passwd': '', 'trace': '3', 'loglevel': 'warn', #'lang': 'EN', } # A list of strings, while the string must match the full path to one or # several monitor objects. We use unix shell patterns during matching, so # you can use several chars as placeholders: # # * matches everything # ? matches any single character # [seq] matches any character in seq # [!seq] matches any character not in seq # # The * matches the whole following string and does not end on next "/". # For examples, take a look at the default config file (/etc/check_mk/sap.cfg). monitor_paths = [ 'SAP CCMS Monitor Templates/Dialog Overview/*', ] monitor_types = [] config_file = MK_CONFDIR + '/sap.cfg' cfg = {} if os.path.exists(config_file): execfile(config_file) if type(cfg) == dict: cfg = [ cfg ] else: cfg = [ local_cfg ] # Load the state file into memory try: states = eval(file(STATE_FILE).read()) except IOError: states = {} # index of all logfiles which have been found in a run. This is used to # remove logfiles which are not available anymore from the states dict. logfiles = [] # ############################################################################# # # HELPERS # import fnmatch def to_be_monitored(path, toplevel_match = False): for rule in monitor_paths: if toplevel_match and rule.count('/') > 1: rule = '/'.join(rule.split('/')[:2]) if fnmatch.fnmatch(path, rule): return True return False def node_path(tree, node, path = ''): if path: path = node['MTNAMESHRT'].rstrip() + SEPARATOR + path else: path = node['MTNAMESHRT'].rstrip() if node['ALPARINTRE'] > 0: parent_node = tree[node['ALPARINTRE'] - 1] return node_path(tree, parent_node, path) return path # # API ACCESS FUNCTIONS # def query(what, params, debug = False): fd = conn.discover(what) if debug: sys.stdout.write("Name: %s Params: %s\n" % (fd.name, fd.handle.parameters)) sys.stdout.write("Given-Params: %s\n" % params) f = fd.create_function_call() for key, val in params.items(): getattr(f, key)(val) f.invoke() ret = f.RETURN.value if ret['TYPE'] == 'E': sys.stderr.write("ERROR: %s\n" % ret['MESSAGE'].strip()) return f def login(): f = query('BAPI_XMI_LOGON', { 'EXTCOMPANY': 'Mathias Kettner GmbH', 'EXTPRODUCT': 'Check_MK SAP Agent', 'INTERFACE': 'XAL', 'VERSION': '1.0', }) #sys.stdout.write("%s\n" % f.RETURN) return f.SESSIONID.value def logout(): query('BAPI_XMI_LOGOFF', { 'INTERFACE': 'XAL', }) def mon_list(cfg): f = query("BAPI_SYSTEM_MON_GETLIST", { 'EXTERNAL_USER_NAME': cfg['user'], }) l = [] for mon in f.MONITOR_NAMES.value: l.append((mon["MS_NAME"].rstrip(), mon["MONI_NAME"].rstrip())) return l #def ms_list( cfg ): # f = query("BAPI_SYSTEM_MS_GETLIST", { # 'EXTERNAL_USER_NAME': cfg['user'], # }) # l = [] # for ms in f.MONITOR_SETS.value: # l.append(ms['NAME'].rstrip()) # return l def mon_tree(cfg, ms_name, mon_name): f = query("BAPI_SYSTEM_MON_GETTREE", { 'EXTERNAL_USER_NAME': cfg['user'], 'MONITOR_NAME': {"MS_NAME": ms_name, "MONI_NAME": mon_name}, }) tree = f.TREE_NODES.value for node in tree: node['PATH'] = ms_name + SEPARATOR + node_path(tree, node) return tree def tid(node): return { 'MTSYSID': node['MTSYSID'].strip(), 'MTMCNAME': node['MTMCNAME'].strip(), 'MTNUMRANGE': node['MTNUMRANGE'].strip(), 'MTUID': node['MTUID'].strip(), 'MTCLASS': node['MTCLASS'].strip(), 'MTINDEX': node['MTINDEX'].strip(), 'EXTINDEX': node['EXTINDEX'].strip(), } def mon_perfdata(cfg, node): f = query('BAPI_SYSTEM_MTE_GETPERFCURVAL', { 'EXTERNAL_USER_NAME': cfg['user'], 'TID': tid(node), }) value = f.CURRENT_VALUE.value['LASTPERVAL'] f = query('BAPI_SYSTEM_MTE_GETPERFPROP', { 'EXTERNAL_USER_NAME': cfg['user'], 'TID': tid(node), }) if f.PROPERTIES.value['DECIMALS'] != 0: value = (value + 0.0) / 10**f.PROPERTIES.value['DECIMALS'] uom = f.PROPERTIES.value['VALUNIT'].strip() return value, uom def mon_msg(cfg, node): f = query('BAPI_SYSTEM_MTE_GETSMVALUE', { 'EXTERNAL_USER_NAME': cfg['user'], 'TID': tid(node), }) data = f.VALUE.value dt = parse_dt(data['SMSGDATE'], data['SMSGTIME']) return (dt, data['MSG'].strip()) def parse_dt(d, t): d = d.strip() t = t.strip() if not d or not t: return None else: return datetime.datetime(*time.strptime(d + t, '%Y%m%d%H%M%S')[:6]) def mon_alerts(cfg, node): f = query('BAPI_SYSTEM_MTE_GETALERTS', { 'EXTERNAL_USER_NAME': cfg['user'], 'TID': tid(node), }) return f.ALERTS.value def aid(alert): return { "ALSYSID": alert["ALSYSID"], "MSEGNAME": alert["MSEGNAME"], "ALUNIQNUM": alert["ALUNIQNUM"], "ALINDEX": alert["ALINDEX"], "ALERTDATE": alert["ALERTDATE"], "ALERTTIME": alert["ALERTTIME"], } def alert_details(cfg, alert): f = query('BAPI_SYSTEM_ALERT_GETDETAILS', { 'EXTERNAL_USER_NAME': cfg['user'], 'AID': aid(alert), }) #prop = f.PROPERTIES.value state = f.VALUE.value msg = f.XMI_EXT_MSG.value['MSG'].strip() return state, msg def process_alerts(cfg, logs, ms_name, mon_name, node, alerts): global state_file_changed sid = node["MTSYSID"].strip() or 'Other' context = node["MTMCNAME"].strip() or 'Other' path = node["PATH"] # Use the sid as hostname for the logs hostname = sid logfile = context + "/" + path logfiles.append((hostname, logfile)) logs.setdefault(sid, {}) logs[hostname][logfile] = [] newest_log_dt = None for alert in alerts: dt = parse_dt(alert['ALERTDATE'], alert['ALERTTIME']) if (hostname, logfile) in states and states[(hostname, logfile)] >= dt: continue # skip log messages which are older than the last cached date if not newest_log_dt or dt > newest_log_dt: newest_log_dt = dt # store the newest log of this run alert_state, alert_msg = alert_details(cfg, alert) # Format lines to "logwatch" format logs[hostname][logfile].append('%s %s %s' % (STATE_LOGWATCH_MAP[alert_state['VALUE']], dt.strftime("%Y-%m-%d %H:%M:%S"), alert_msg)) if newest_log_dt: # Write newest log age to cache to prevent double processing of logs states[(hostname, logfile)] = newest_log_dt state_file_changed = True return logs def check(cfg): global conn conn = sapnwrfc.base.rfc_connect(cfg) login() logs = {} sap_data = {} # This loop is used to collect all information from SAP for ms_name, mon_name in mon_list(cfg): path = ms_name + SEPARATOR + mon_name if not to_be_monitored(path, True): continue tree = mon_tree(cfg, ms_name, mon_name) for node in tree: if not to_be_monitored(node['PATH']): continue #sys.stdout.write("%s\n" % node["PATH"]) status_details = '' perfvalue = '-' uom = '-' # Use precalculated states state = { 'VALUE': node['ACTUALVAL'], 'SEVERITY': node['ACTUALSEV'], } if state['VALUE'] not in STATE_VALUE_MAP: sys.stdout.write('UNHANDLED STATE VALUE\n') sys.exit(1) # # Handle different object classes individually # to get details about them # if monitor_types and node['MTCLASS'] not in monitor_types: continue # Skip unwanted classes if class filtering is enabled if node['MTCLASS'] == MTE_PERFORMANCE: perfvalue, this_uom = mon_perfdata(cfg, node) uom = this_uom and this_uom or uom elif node['MTCLASS'] == MTE_SINGLE_MSG: status_details = "%s: %s" % mon_msg(cfg, node) elif node['MTCLASS'] == MTE_MSG_CONTAINER: alerts = mon_alerts(cfg, node) logs = process_alerts(cfg, logs, ms_name, mon_name, node, alerts) if len(alerts) > 0: last_alert = alerts[-1] dt = parse_dt(last_alert["ALERTDATE"], last_alert["ALERTTIME"]) alert_state, alert_msg = alert_details(cfg, last_alert) last_msg = '%s: %s - %s' % (dt, STATE_VALUE_MAP[alert_state['VALUE']][1], alert_msg) status_details = '%d Messages, Last: %s' % (len(alerts), last_msg) else: status_details = 'The log is empty' elif node['MTCLASS'] not in SKIP_MTCLASSES: # Add an error to output on unhandled classes status_details = "UNHANDLED MTCLASS", node['MTCLASS'] if node['MTCLASS'] not in SKIP_MTCLASSES: sid = node["MTSYSID"].strip() or 'Other' context = node["MTMCNAME"].strip() or 'Other' path = node["PATH"] sap_data.setdefault(sid, []) sap_data[sid].append("%s\t%d\t%3d\t%s\t%s\t%s\t%s" % (context, state['VALUE'], state['SEVERITY'], path, perfvalue, uom, status_details)) for host, host_sap in sap_data.items(): sys.stdout.write('<<<<%s>>>>\n' % host) sys.stdout.write('<<>>\n') sys.stdout.write('%s\n' % '\n'.join(host_sap)) sys.stdout.write('<<<<>>>>\n') for host, host_logs in logs.items(): sys.stdout.write('<<<<%s>>>>\n' % host) sys.stdout.write('<<>>\n') for log, lines in host_logs.items(): sys.stdout.write('[[[%s]]]\n' % log) if lines: sys.stdout.write('\n'.join(lines) + '\n') sys.stdout.write('<<<<>>>>\n') logout() conn.close() # It is possible to configure multiple SAP instances to monitor. Loop them all, but # do not terminate when one connection failed processed_all = True try: for entry in cfg: try: check(entry) sys.stdout.write('<<>>\n%s\tOK\n' % entry['ashost']) except sapnwrfc.RFCCommunicationError, e: sys.stderr.write('ERROR: Unable to connect (%s)\n' % e) sys.stdout.write('<<>>\n%s\tUnable to connect (%s)\n' %\ (entry['ashost'], e)) processed_all = False except Exception, e: sys.stderr.write('ERROR: Unhandled exception (%s)\n' % e) sys.stdout.write('<<>>\n%s\tUnhandled exception (%s)\n' %\ (entry['ashost'], e)) processed_all = False # Now check whether or not an old logfile needs to be removed. This can only # be done this way, when all hosts have been reached. Otherwise the cleanup # is skipped. if processed_all: for key in states.keys(): if key not in logfiles: state_file_changed = True del states[key] # Only write the state file once per run. And only when it has been changed if state_file_changed: new_file = STATE_FILE + '.new' fd = os.open(new_file, os.O_WRONLY | os.O_CREAT) fcntl.flock(fd, fcntl.LOCK_EX) os.write(fd, repr(states)) os.close(fd) os.rename(STATE_FILE+'.new', STATE_FILE) except Exception, e: sys.stderr.write('ERROR: Unhandled exception (%s)\n' % e) sys.exit(0)