#!/usr/bin/python # Monitor MongoDB on Linux import sys import time import pprint import os import datetime # This agent plugin creates various sections out of the MongoDB server status information. # Important: 1) If MongoDB runs as single instance the agent data is assigned # to the host same host where the plugin resides. # # 2) If MongoDB is deployed as replica set the agent data is piggybacked # to a different hostname, name after the replica set name. # You have to create a new host in the monitoring system matching the # replica set name, or use the piggyback translation rule to modify the # hostname according to your needs. try: import pymongo except ImportError, e: sys.stderr.write("ERROR: Unable to import pymongo module\n") sys.exit(2) # TODO: might be implemented in the future.. host = None port = None try: con = pymongo.MongoClient(host, port) try: # pylint: disable=no-member con = pymongo.database_names() except: con = pymongo.MongoClient(None, None, read_preference=pymongo.ReadPreference.SECONDARY) con.admin.read_preference = pymongo.ReadPreference.SECONDARY # if user and passwd: # db = con["admin"] # if not db.authenticate(user, passwd): # sys.exit("Username/Password incorrect") server_status = con.admin.command("serverStatus") except: sys.stdout.write("<<>>\n") sys.stdout.write("error\tInstance is down\n") sys.exit(0) server_version = tuple(con.server_info()['version'].split('.')) repl_info = server_status.get("repl") sys.stdout.write("<<>>\n") if not repl_info: sys.stdout.write("mode\tSingle Instance\n") else: if repl_info.get("ismaster"): sys.stdout.write("mode\tPrimary\n") elif repl_info.get("secondary"): sys.stdout.write("mode\tSecondary\n") else: sys.stdout.write("mode\tArbiter\n") sys.stdout.write("address\t%s\n" % repl_info["me"]) sys.stdout.write("version\t%s\n" % server_status["version"]) sys.stdout.write("pid\t%s\n" % server_status["pid"]) if repl_info: if not repl_info.get("ismaster"): sys.exit(0) sys.stdout.write("<<<<%s>>>>\n" % repl_info["setName"]) sys.stdout.write("<<>>\n") sys.stdout.write("primary\t%s\n" % repl_info.get("primary")) sys.stdout.write("hosts\t%s\n" % " ".join(repl_info.get("hosts"))) sys.stdout.write("arbiters\t%s\n" % " ".join(repl_info.get("arbiters"))) sys.stdout.write("<<>>\n") sys.stdout.write(pprint.pformat(con.admin.command("replSetGetStatus"))) sys.stdout.write("<<>>\n") for key, value in server_status.get("asserts", {}).items(): sys.stdout.write("%s %s\n" % (key, value)) sys.stdout.write("<<>>\n") sys.stdout.write("%s\n" % "\n".join(map(lambda x: "%s %s" % x, server_status["connections"].items()))) databases = dict(map(lambda x: (x, {}), con.database_names())) for name in databases.keys(): databases[name]["collections"] = con[name].collection_names() databases[name]["stats"] = con[name].command("dbstats") databases[name]["collstats"] = {} for collection in databases[name]["collections"]: databases[name]["collstats"][collection] = con[name].command("collstats", collection) sys.stdout.write("<<>>\n") col = con.config.chunks for db_name, db_data in databases.items(): shards = col.distinct("shard") sys.stdout.write("shardcount %d\n" % len(shards)) for collection in db_data.get("collections"): nsfilter = "%s.%s" % (db_name, collection) sys.stdout.write("nscount %s %s\n" % (nsfilter, col.find({"ns": nsfilter}).count())) for shard in shards: sys.stdout.write("shardmatches %s#%s %s\n" % (nsfilter, shard, col.find({"ns": nsfilter, "shard": shard}).count())) sys.stdout.write("<<>>\n") global_lock_info = server_status.get("globalLock") if global_lock_info: for what in [ "activeClients", "currentQueue" ]: if what in global_lock_info: for key, value in global_lock_info[what].items(): sys.stdout.write("%s %s %s\n" % (what, key, value)) sys.stdout.write("<<>>\n") sys.stdout.write("average_ms %s\n" % server_status["backgroundFlushing"]["average_ms"]) sys.stdout.write("last_ms %s\n" % server_status["backgroundFlushing"]["last_ms"]) sys.stdout.write("flushed %s\n" % server_status["backgroundFlushing"]["flushes"]) # Unused #try: # if server_version >= tuple("2.4.0".split(".")): # indexCounters = server_status['indexCounters'] # else: # indexCounters = server_status['indexCounters']["btree"] # print "<<>>" # for key, value in indexCounters.items(): # print "%s %s" % (key, value) #except: # pass sys.stdout.write("<<>>\n") for key, value in server_status["mem"].items(): sys.stdout.write("%s %s\n" % (key, value)) for key, value in server_status["extra_info"].items(): sys.stdout.write("%s %s\n" % (key, value)) sys.stdout.write("<<>>\n") for what in ["opcounters", "opcountersRepl"]: for key, value in server_status.get(what, {}).items(): sys.stdout.write("%s %s %s\n" % (what, key, value)) sys.stdout.write("<<>>\n") for dbname, dbdata in databases.items(): for collname, colldata in dbdata.get("collstats", {}).items(): for what, value in colldata.items(): sys.stdout.write("%s\t%s\t%s\t%s\n" % (dbname, collname, what, value)) sys.stdout.write("<<>>\n") sys.stdout.write("[[[MongoDB startupWarnings]]]\n") startup_warnings = con.admin.command({"getLog": "startupWarnings"}) var_dir = os.environ.get("MK_VARDIR") if var_dir: state_file = "%s/mongodb.state" % var_dir last_timestamp = None output_all = False # Supports: Nov 6 13:44:09 # 2015-10-17T05:35:24 def get_timestamp(text): for pattern in [ "%a %b %d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" ]: try: result = time.mktime(time.strptime(text, pattern)) return result except: continue year_available = False if os.path.exists(state_file): last_timestamp = int(file(state_file).read()) if time.localtime(last_timestamp).tm_year >= 2015: year_available = True # Note: there is no year information in these loglines # As workaround we look at the creation date (year) of the last statefile # If it differs and there are new messages we start from the beginning if not year_available: statefile_year = time.localtime(os.stat(state_file).st_ctime).tm_year if time.localtime().tm_year != statefile_year: output_all = True for line in startup_warnings["log"]: state = "C" state_index = line.find("]")+2 if len(line) == state_index or line[state_index:].startswith("** "): state = "." if "** WARNING:" in line: state = "W" if output_all or get_timestamp(line.split(".")[0]) > last_timestamp: sys.stdout.write("%s %s\n" % (state, line)) # update state file if startup_warnings["log"]: file(state_file, "w").write("%d" % get_timestamp(startup_warnings["log"][-1].split(".")[0])) sys.stdout.write("<<<<>>>>\n")