def check_page_faults(con, sample_time, warning, critical, perf_data): warning = warning or 10 critical = critical or 20 try: try: set_read_preference(con.admin) data1 = con.admin.command( pymongo.son_manipulator.SON([('serverStatus', 1)])) time.sleep(sample_time) data2 = con.admin.command( pymongo.son_manipulator.SON([('serverStatus', 1)])) except: data1 = con.admin.command(son.SON([('serverStatus', 1)])) time.sleep(sample_time) data2 = con.admin.command(son.SON([('serverStatus', 1)])) try: #on linux servers only page_faults = (int(data2['extra_info']['page_faults']) - int( data1['extra_info']['page_faults'])) / sample_time except KeyError: print "WARNING - Can't get extra_info.page_faults counter from MongoDB" sys.exit(1) message = "Page Faults: %i" % (page_faults) message += performance_data( perf_data, [(page_faults, "page_faults", warning, critical)]) check_levels(page_faults, warning, critical, message) except Exception, e: exit_with_general_critical(e)
def check_all_databases_size(con, warning, critical, perf_data): warning = warning or 100 critical = critical or 1000 try: set_read_preference(con.admin) all_dbs_data = con.admin.command( pymongo.son_manipulator.SON([('listDatabases', 1)])) except: all_dbs_data = con.admin.command(son.SON([('listDatabases', 1)])) total_storage_size = 0 message = "" perf_data_param = [()] for db in all_dbs_data['databases']: database = db['name'] data = con[database].command('dbstats') storage_size = round(data['storageSize'] / 1024 / 1024, 1) message += "; Database %s size: %.0f MB" % (database, storage_size) perf_data_param.append((storage_size, database + "_database_size")) total_storage_size += storage_size perf_data_param[0] = (total_storage_size, "total_size", warning, critical) message += performance_data(perf_data, perf_data_param) message = "Total size: %.0f MB" % total_storage_size + message return check_levels(total_storage_size, warning, critical, message)
def open(url=None, task=None): from mongo_util import get_collection query = son.SON(json.loads(url, object_hook=json_util.object_hook)) uri = query['inputURI'] uri_info = uri_parser.parse_uri(uri) spec = query['query'] fields = query['fields'] skip = query['skip'] limit = query['limit'] timeout = query['timeout'] sort = query['sort'] slave_ok = query['slave_ok'] #go around: connect to the sonnection then choose db by ['dbname'] collection = get_collection(uri) cursor = collection.find(spec=spec, fields=fields, skip=skip, limit=limit, sort=sort, timeout=timeout, slave_okay=slave_ok) wrapper = MongoWrapper(cursor) return wrapper
def get_server_status(con): try: set_read_preference(con.admin) data = con.admin.command(pymongo.son_manipulator.SON([('serverStatus', 1)])) except: data = con.admin.command(son.SON([('serverStatus', 1)])) return data
def check_replset_state(con): try: if not check_ismaster(con): set_read_preference(con.admin) data = con.admin.command( pymongo.son_manipulator.SON([('replSetGetStatus', 1)])) except: data = con.admin.command(son.SON([('replSetGetStatus', 1)])) primary_node = None secondary_node = [] arbiter_node = [] dbStatus['mongo.replication.replication_deplay'] = 0 dbStatus['mongo.replication.replication_cluster_state'] = 1 for member in data['members']: if member['stateStr'] == 'PRIMARY': primary_node = member if member['stateStr'] == 'SECONDARY': secondary_node.append(member) if member['stateStr'] == 'ARBITER': arbiter_node.append(member) if primary_node is not None: for member in secondary_node: if convert_time( primary_node['optime'].as_datetime()) - convert_time( member['optime'].as_datetime()) > 300: dbStatus[ 'mongo.replication.replication_deplay'] = convert_time( primary_node['optime'].as_datetime()) - convert_time( member['optime'].as_datetime()) dbStatus['mongo.replication.replication_cluster_state'] = int( data['myState'])
def get_next_needed(self, nb_fallacies=1): """ creates a probability-distribution over the fallacy-types and returns one or more fallacy-types with a probability given by this distribution. If a fallacy-type is linked by more arguments than another one, it receives a lower probability than that type. Thereby unequal class-distributions are prevented. """ print('starting minvoted') argP = self.plugins['arguments'] pipeline = [ {"$unwind": "$fallacyId"}, {"$group": {"_id": "$fallacyId", "count": {"$sum": 1}}}, {"$sort": son.SON([("count", 1), ("_id", -1)])} ] ref_fallacies_id_count = list(argP.arguments.aggregate(pipeline)) nb_args = 0 inv_nb_args = 0 for item in ref_fallacies_id_count: nb_args += item['count'] for item in ref_fallacies_id_count: inv_nb_args += nb_args - item['count'] p = [] for item in ref_fallacies_id_count: prob = (nb_args - item['count'])/inv_nb_args p.append(prob) assert len(ref_fallacies_id_count) indices = np.random.choice(len(ref_fallacies_id_count), nb_fallacies, p=p, replace=False) print(indices) fallacies = [] for idx in indices: fallacy = ref_fallacies_id_count[idx]['_id'] fallacies.append(fallacy) return fallacies
def delete_course_index(self, course_index): """ Delete the course_index from the persistence mechanism whose id is the given course_index """ return self.course_index.remove( son.SON([('org', course_index['org']), ('offering', course_index['offering'])]))
def update_course_index(self, course_index): """ Update the db record for course_index """ self.course_index.update( son.SON([('org', course_index['org']), ('offering', course_index['offering'])]), course_index)
def get_topic_by_ref_count(self, language): """ returns the topic with the minimum number linked sessions """ topicP = self.plugins['topics'] pipeline = [{ "$unwind": "$out_refersTo" }, { "$group": { "_id": "$out_refersTo", "count": { "$sum": 1 } } }, { "$sort": son.SON([("count", 1), ("_id", -1)]) }] ref_topics_count_id = list(self.sessions.aggregate(pipeline)) all_topics = topicP.get_all({'out_language': language}) ref_topics_id = [] for t in ref_topics_count_id: ref_topics_id.append(t['_id']) for t in all_topics: #no session exists for topic, return this one if not t['_id'] in ref_topics_id: return t #return topic with minimum-reference count t = topicP.get_by_id({'id': ref_topics_id[0]}, kraken=False) print('returning ref topic:', str(t)) return t
def get_course_index(self, key, ignore_case=False): """ Get the course_index from the persistence mechanism whose id is the given key """ case_regex = ur"(?i)^{}$" if ignore_case else ur"{}" return self.course_index.find_one( son.SON([(key_attr, re.compile(case_regex.format(getattr(key, key_attr)))) for key_attr in ('org', 'course', 'run')]))
def get_server_status(self): try: data = self.connection['admin'].command(pymongo.son_manipulator.SON([('serverStatus', 1)])) except: try: data = self.connection['admin'].command(son.SON([('serverStatus', 1)])) except Exception, e: if type(e).__name__ == "OperationFailure": sys.exit("UNKNOWN - Not authorized!") else: sys.exit("UNKNOWN - Unable to run serverStatus: %s::%s" % (type(e).__name__, unicode_truncate(e.message, 45)))
def check_oplog(con, warning, critical, perf_data): """ Checking the oplog time - the time of the log currntly saved in the oplog collection defaults: critical 4 hours warning 24 hours those can be changed as usual with -C and -W parameters""" warning = warning or 24 critical = critical or 4 try: db = con.local ol = db.system.namespaces.find_one({"name": "local.oplog.rs"}) if (db.system.namespaces.find_one({"name": "local.oplog.rs"}) != None): oplog = "oplog.rs" else: ol = db.system.namespaces.find_one({"name": "local.oplog.$main"}) if (db.system.namespaces.find_one({"name": "local.oplog.$main"}) != None): oplog = "oplog.$main" else: message = "neither master/slave nor replica set replication detected" return check_levels(None, warning, critical, message) try: set_read_preference(con.admin) data = con.local.command( pymongo.son_manipulator.SON([('collstats', oplog)])) except: data = con.admin.command(son.SON([('collstats', oplog)])) ol_size = data['size'] ol_storage_size = data['storageSize'] ol_used_storage = int(float(ol_size) / ol_storage_size * 100 + 1) ol = con.local[oplog] firstc = ol.find().sort("$natural", pymongo.ASCENDING).limit(1)[0]['ts'] lastc = ol.find().sort("$natural", pymongo.DESCENDING).limit(1)[0]['ts'] time_in_oplog = (lastc.as_datetime() - firstc.as_datetime()) message = "Oplog saves " + str( time_in_oplog) + " %d%% used" % ol_used_storage try: #work starting from python2.7 hours_in_oplog = time_in_oplog.total_seconds() / 60 / 60 except: hours_in_oplog = float(time_in_oplog.seconds + time_in_oplog.days * 24 * 3600) / 60 / 60 approx_level = hours_in_oplog * 100 / ol_used_storage message += performance_data( perf_data, [("%.2f" % hours_in_oplog, 'oplog_time', warning, critical), ("%.2f " % approx_level, 'oplog_time_100_percent_used')]) return check_levels(-approx_level, -warning, -critical, message) except Exception, e: return exit_with_general_critical(e)
def get_server_status(mongo_connection): """" read server information from mongoDB connection. """ try: set_read_preference(mongo_connection.admin) data = mongo_connection.admin.command( pymongo.son_manipulator.SON([('serverStatus', 1)])) except: data = mongo_connection.admin.command(son.SON([('serverStatus', 1)])) return data
def check_databases(con): try: try: if not check_ismaster(con): set_read_preference(con.admin) data = con.admin.command( pymongo.son_manipulator.SON([('listDatabases', 1)])) except: data = con.admin.command(son.SON([('listDatabases', 1)])) dbStatus['mongo.databases.count'] = len(data['databases']) except Exception, e: print exit_with_general_critical(e)
def _tops(self, fields, top=5, hours_ago=None, **kwargs): if isinstance(fields, basestring): fields = [ fields, ] match_query = dict([(field, {'$ne': None}) for field in fields]) for name, value in kwargs.items(): if name.startswith('ne__'): match_query[name[4:]] = {'$ne': value} elif name.startswith('gt__'): match_query[name[4:]] = {'$gt': value} elif name.startswith('lt__'): match_query[name[4:]] = {'$lt': value} elif name.startswith('gte__'): match_query[name[5:]] = {'$gte': value} elif name.startswith('lte__'): match_query[name[5:]] = {'$lte': value} else: match_query[name] = value if hours_ago: match_query['timestamp'] = { '$gte': datetime.datetime.now() - datetime.timedelta(hours=hours_ago) } query = [{ '$match': match_query }, { '$group': { '_id': dict([(field, '${}'.format(field)) for field in fields]), 'count': { '$sum': 1 } } }, { '$sort': son.SON([('count', -1)]) }] res = self.collection.aggregate(query) def format_result(r): result = dict(r['_id']) result['count'] = r['count'] return result if 'ok' in res: return [format_result(r) for r in res.get('result', [])[:top]]
def on_inserted_inventories_event(items): db = app.data.driver.db col_products = db["products"] try: products = odoo_products() for item in items: inventory_id = item["_id"] for p in products: p["inventory"] = inventory_id col_products.insert_many(products) except Exception: for item in items: db["inventories"].delete_one(filter=son.SON({"_id": item["_id"]})) raise
def check_databases(con, warning, critical,perf_data=None): try: try: set_read_preference(con.admin) data = con.admin.command(pymongo.son_manipulator.SON([('listDatabases', 1)])) except: data = con.admin.command(son.SON([('listDatabases', 1)])) count = len(data['databases']) message="Number of DBs: %.0f" % count message+=performance_data(perf_data,[(count,"databases",warning,critical,message)]) return check_levels(count,warning,critical,message) except Exception, e: return exit_with_general_critical(e)
def update_course_index(self, course_index, from_index=None): """ Update the db record for course_index. Arguments: from_index: If set, only update an index if it matches the one specified in `from_index`. """ self.course_index.update( from_index or son.SON([('org', course_index['org']), ('course', course_index['course']), ('run', course_index['run'])]), course_index, upsert=False, )
def check_replset_state(con, perf_data, warning="", critical=""): try: warning = [int(x) for x in warning.split(",")] except: warning = [0, 3, 5] try: critical = [int(x) for x in critical.split(",")] except: critical = [8, 4, -1] ok = range(-1, 8) #should include the range of all posiible values try: try: try: set_read_preference(con.admin) data = con.admin.command( pymongo.son_manipulator.SON([('replSetGetStatus', 1)])) except: data = con.admin.command(son.SON([('replSetGetStatus', 1)])) state = int(data['myState']) except pymongo.errors.OperationFailure, e: if e.code == None and str(e).find( 'failed: not running with --replSet"'): state = -1 if state == 8: message = "State: %i (Down)" % state elif state == 4: message = "State: %i (Fatal error)" % state elif state == 0: message = "State: %i (Starting up, phase1)" % state elif state == 3: message = "State: %i (Recovering)" % state elif state == 5: message = "State: %i (Starting up, phase2)" % state elif state == 1: message = "State: %i (Primary)" % state elif state == 2: message = "State: %i (Secondary)" % state elif state == 7: message = "State: %i (Arbiter)" % state elif state == -1: message = "Not running with replSet" else: message = "State: %i (Unknown state)" % state message += performance_data(perf_data, [(state, "state")]) return check_levels(state, warning, critical, message, ok)
def format_uri_with_query(self): """ returns a json-format str as disco input uri """ base = self.inputURI queryObj = son.SON() queryObj['inputURI'] = self.inputURI queryObj['keyField'] = self.keyField queryObj['query'] = self.query queryObj['fields'] = self.fields queryObj['sort'] = self.sort queryObj['limit'] = self.limit queryObj['skip'] = self.skip queryObj['timeout'] = self.timeout queryObj['slave_ok'] = self.slave_ok return json.dumps(queryObj, default=json_util.default, separators=(',', ':'))
def check_all_databases_size(con): try: if not check_ismaster(con): set_read_preference(con.admin) all_dbs_data = con.admin.command( pymongo.son_manipulator.SON([('listDatabases', 1)])) except: all_dbs_data = con.admin.command(son.SON([('listDatabases', 1)])) total_storage_size = 0 for db in all_dbs_data['databases']: database = db['name'] data = con[database].command('dbstats') storage_size = data['storageSize'] / 1024.0 / 1024.0 total_storage_size += storage_size dbStatus['mongo.databases.total_storage_size'] = total_storage_size
def find_matching_course_indexes(self, branch=None, search_targets=None): """ Find the course_index matching particular conditions. Arguments: branch: If specified, this branch must exist in the returned courses search_targets: If specified, this must be a dictionary specifying field values that must exist in the search_targets of the returned courses """ query = son.SON() if branch is not None: query['versions.{}'.format(branch)] = {'$exists': True} if search_targets: for key, value in search_targets.iteritems(): query['search_targets.{}'.format(key)] = value return self.course_index.find(query)
def get_grouped_ads(**kwargs): agg = g.db.ads.aggregate([{ '$match': build_query(**kwargs) }, { '$group': { '_id': { 'brand': '$brand', 'model': '$model', 'year' : '$year' }, 'ads': { '$push': { 'version' : '$version', 'title' : '$title', 'price' : '$price', 'currency': '$currency', 'km' : '$km', 'href' : '$href', 'img' : '$img', 'source' : '$source' } } } }, { '$sort': son.SON([('_id.year', -1), ('_id.brand', 1), ('_id.model', 1)]) }]) filter_outliers = request.args.get('filter_outliers', 1, type=int) if filter_outliers: output = [] for group in agg: ads = pd.DataFrame(group['ads']) ads = ads.loc[~outliers_modified_z_score(ads.price)] ads = ads.loc[~outliers_iqr(ads.price)] ads = ads[(ads.price >= ads.price.quantile(0.15)) & (ads.price <= ads.price.quantile(0.95))] output.append({'_id': group['_id'], 'ads': ads.to_dict('records')}) return jsonify({'groups': output}) return Response(json_util.dumps({'groups': agg}), status=200, mimetype='application/json')
def _top(collecion, fields, top=10, tsgt="20151101", **kwargs): if isinstance(fields, basestring): fields = [ fields, ] match_query = dict([(field, {'$ne': None}) for field in fields]) for name, value in kwargs.items(): if name.startswith('ne__'): match_query[name[4:]] = {'$ne': value} elif name.startswith('gt__'): match_query[name[4:]] = {'$gt': value} elif name.startswith('lt__'): match_query[name[4:]] = {'$lt': value} elif name.startswith('gte__'): match_query[name[5:]] = {'$gte': value} elif name.startswith('lte__'): match_query[name[5:]] = {'$lte': value} else: match_query[name] = value match_query['timestamp'] = { '$gte': datetime.datetime.now() - datetime.timedelta(days=730) } query = [{ '$match': match_query }, { '$group': { '_id': dict([(field, '${}'.format(field)) for field in fields]), 'count': { '$sum': 1 } } }, { '$sort': son.SON([('count', -1)]) }] result_list = list(collecion.aggregate(query))[:top] return result_list
def compact_database(con, database): try: olddata = get_stats_db(con, database) collections = get_db_collections(con, database) for collection in collections: if collection.find('system') == -1: print('Compact %s' % collection) con[database].command(son.SON([('compact', collection)])) newdata = get_stats_db(con, database) print ( u"Srhink: %.0f MB" % ( olddata['storageSize'] - newdata['storageSize'] )) return True except Exception, e: return exit_with_general_critical(e)
def check_collections(con, warning, critical,perf_data=None): try: try: set_read_preference(con.admin) data = con.admin.command(pymongo.son_manipulator.SON([('listDatabases', 1)])) except: data = con.admin.command(son.SON([('listDatabases', 1)])) count = 0 for db in data['databases']: dbname = db['name'] count += len(con[dbname].collection_names()) message="Number of collections: %.0f" % count message+=performance_data(perf_data,[(count,"collections",warning,critical,message)]) return check_levels(count,warning,critical,message) except Exception, e: return exit_with_general_critical(e)
def check_collections(con): try: try: if not check_ismaster(con): set_read_preference(con.admin) data = con.admin.command( pymongo.son_manipulator.SON([('listDatabases', 1)])) except: data = con.admin.command(son.SON([('listDatabases', 1)])) count = 0 for db in data['databases']: dbname = db['name'] count += len(con[dbname].collection_names()) dbStatus['mongo.collections.count'] = count except Exception, e: print exit_with_general_critical(e)
def _top_for_ipinfo(collecion, fields, top=10, **kwargs): if isinstance(fields, basestring): fields = [ fields, ] match_query = dict([(field, {'$ne': None}) for field in fields]) for name, value in kwargs.items(): if name.startswith('ne__'): match_query[name[4:]] = {'$ne': value} elif name.startswith('gt__'): match_query[name[4:]] = {'$gt': value} elif name.startswith('lt__'): match_query[name[4:]] = {'$lt': value} elif name.startswith('gte__'): match_query[name[5:]] = {'$gte': value} elif name.startswith('lte__'): match_query[name[5:]] = {'$lte': value} else: match_query[name] = value query = [{ '$group': { '_id': dict([(field, '${}'.format(field)) for field in fields]), 'count': { '$sum': 1 } } }, { '$sort': son.SON([('count', -1)]) }] result_list = list(collecion.aggregate(query))[:top] return result_list
def _tops(self, fields, top=5, hours_ago=None): if isinstance(fields, basestring): fields = [fields,] match_query = dict([ (field, {'$ne': None}) for field in fields ]) if hours_ago: match_query['timestamp'] = { '$gte': datetime.datetime.now() - datetime.timedelta(hours=hours_ago) } query = [ { '$match': match_query }, { '$group': { '_id': dict( [(field, '${}'.format(field)) for field in fields] ), 'count': {'$sum': 1} } }, { '$sort': son.SON([('count', -1)]) } ] res = self.collection.aggregate(query) def format_result(r): result = dict(r['_id']) result['count'] = r['count'] return result if 'ok' in res: return [ format_result(r) for r in res.get('result', [])[:top] ]
def get_info_dbs(con): try: data = con.admin.command(pymongo.son_manipulator.SON([('listDatabases', 1)])) except: data = con.admin.command(son.SON([('listDatabases', 1)])) return data