def command( self, command, value=1, check=True, allowable_errors=None, codec_options=DEFAULT_CODEC_OPTIONS, _deadline=None, **kwargs ): """command(command, value=1, check=True, allowable_errors=None, codec_options=DEFAULT_CODEC_OPTIONS)""" if isinstance(command, (bytes, unicode)): command = SON([(command, value)]) options = kwargs.copy() command.update(options) def on_ok(response): if check: msg = "TxMongo: command {0} on namespace {1} failed with '%s'".format(repr(command), ns) _check_command_response(response, msg, allowable_errors) return response ns = self["$cmd"].with_options(codec_options=codec_options) return ns.find_one(command, _deadline=_deadline).addCallback(on_ok)
def test_only_secondary_ok_commands_have_read_prefs(self): c = get_connection(read_preference=ReadPreference.SECONDARY) is_mongos = utils.is_mongos(c) if not is_mongos: raise SkipTest("Only mongos have read_prefs added to the spec") # Ensure secondary_ok_commands have readPreference for cmd in secondary_ok_commands: if cmd == "mapreduce": # map reduce is a special case continue command = SON([(cmd, 1)]) cursor = c.pymongo_test["$cmd"].find(command.copy()) # White-listed commands also have to be wrapped in $query command = SON([("$query", command)]) command["$readPreference"] = {"mode": "secondary"} self.assertEqual(command, cursor._Cursor__query_spec()) # map_reduce inline should have read prefs command = SON([("mapreduce", "test"), ("out", {"inline": 1})]) cursor = c.pymongo_test["$cmd"].find(command.copy()) # White-listed commands also have to be wrapped in $query command = SON([("$query", command)]) command["$readPreference"] = {"mode": "secondary"} self.assertEqual(command, cursor._Cursor__query_spec()) # map_reduce that outputs to a collection shouldn't have read prefs command = SON([("mapreduce", "test"), ("out", {"mrtest": 1})]) cursor = c.pymongo_test["$cmd"].find(command.copy()) self.assertEqual(command, cursor._Cursor__query_spec()) # Other commands shouldn't be changed for cmd in ("drop", "create", "any-future-cmd"): command = SON([(cmd, 1)]) cursor = c.pymongo_test["$cmd"].find(command.copy()) self.assertEqual(command, cursor._Cursor__query_spec())
def _gen_find_command(coll, spec, projection, skip, limit, batch_size, options): """Generate a find command document.""" cmd = SON([('find', coll)]) if '$query' in spec: cmd.update([(_MODIFIERS[key], val) for key, val in spec.items()]) else: cmd['filter'] = spec if projection: cmd['projection'] = projection if skip: cmd['skip'] = skip if limit: cmd['limit'] = limit if batch_size: cmd['batchSize'] = batch_size # XXX: Should the check for 1 be here? if limit < 0 or limit == 1: cmd['singleBatch'] = True if options: cmd.update([(opt, True) for opt, val in _OPTIONS.items() if options & val]) return cmd
def test_copying(self): simple_son = SON([]) complex_son = SON([('son', simple_son), ('list', [simple_son, simple_son])]) regex_son = SON([("x", re.compile("^hello.*"))]) reflexive_son = SON([('son', simple_son)]) reflexive_son["reflexive"] = reflexive_son simple_son1 = copy.copy(simple_son) self.assertEqual(simple_son, simple_son1) complex_son1 = copy.copy(complex_son) self.assertEqual(complex_son, complex_son1) regex_son1 = copy.copy(regex_son) self.assertEqual(regex_son, regex_son1) reflexive_son1 = copy.copy(reflexive_son) self.assertEqual(reflexive_son, reflexive_son1) # Test deepcopying simple_son1 = copy.deepcopy(simple_son) self.assertEqual(simple_son, simple_son1) regex_son1 = copy.deepcopy(regex_son) self.assertEqual(regex_son, regex_son1) complex_son1 = copy.deepcopy(complex_son) self.assertEqual(complex_son, complex_son1) reflexive_son1 = copy.deepcopy(reflexive_son) self.assertEqual(reflexive_son.keys(), reflexive_son1.keys()) self.assertEqual(id(reflexive_son1), id(reflexive_son1["reflexive"]))
def as_command(self, sock_info): """Return a find command document for this query.""" # We use the command twice: on the wire and for command monitoring. # Generate it once, for speed and to avoid repeating side-effects. if self._as_command is not None: return self._as_command explain = '$explain' in self.spec cmd = _gen_find_command( self.coll, self.spec, self.fields, self.ntoskip, self.limit, self.batch_size, self.flags, self.read_concern, self.collation) if explain: self.name = 'explain' cmd = SON([('explain', cmd)]) session = self.session if session: session._apply_to(cmd, False, self.read_preference) # Explain does not support readConcern. if (not explain and session.options.causal_consistency and session.operation_time is not None and not session._in_transaction): cmd.setdefault( 'readConcern', {})[ 'afterClusterTime'] = session.operation_time sock_info.send_cluster_time(cmd, session, self.client) self._as_command = cmd, self.db return self._as_command
def __last_error(namespace, args): """Data to send to do a lastError. """ cmd = SON([("getlasterror", 1)]) cmd.update(args) splitns = namespace.split(".", 1) return query(0, splitns[0] + ".$cmd", 0, -1, cmd, None, DEFAULT_CODEC_OPTIONS)
class SortStage(object): key = '$sort' def __init__(self): self._sorts = SON() def by(self, *args): if not args: raise RuntimeError('This method needs at least one argument') if isinstance(args[0], list): return self.by(*args[0]) if isinstance(args[0], tuple): sort_by = SON(args) else: raise ValueError('The arguments to this method must be tuples') self._sorts.update(sort_by) return self def sort(self, field, direction=1): if isinstance(direction, basestring): if direction.lower() == 'asc': direction = 1 else: direction = -1 self._sorts[field] = direction return self def build(self): return {self.key: self._sorts}
def find_and_modify(self, query={}, update=None, upsert=False, **kwargs): def wrapper(result): no_obj_error = "No matching object found" if not result['ok']: if result["errmsg"] == no_obj_error: return None else: raise ValueError("Unexpected Error: %s" % (result,)) return result.get('value') if (not update and not kwargs.get('remove', None)): raise ValueError("Must either update or remove") if (update and kwargs.get('remove', None)): raise ValueError("Can't do both update and remove") cmd = SON([("findAndModify", self._collection_name)]) cmd.update(kwargs) # No need to include empty args if query: cmd['query'] = query if update: cmd['update'] = update if upsert: cmd['upsert'] = upsert d = self._database["$cmd"].find_one(cmd) d.addCallback(wrapper) return d
def _new_find_and_modify(self, filter, projection, sort, upsert=None, return_document=ReturnDocument.BEFORE, **kwargs): validate_is_mapping("filter", filter) if not isinstance(return_document, bool): raise ValueError("TxMongo: return_document must be ReturnDocument.BEFORE " "or ReturnDocument.AFTER") cmd = SON([("findAndModify", self._collection_name), ("query", filter), ("new", return_document)]) cmd.update(kwargs) if projection is not None: cmd["fields"] = self._normalize_fields_projection(projection) if sort is not None: cmd["sort"] = dict(sort["orderby"]) if upsert is not None: validate_boolean("upsert", upsert) cmd["upsert"] = upsert no_obj_error = "No matching object found" result = yield self._database.command(cmd, allowable_errors=[no_obj_error], **kwargs) defer.returnValue(result.get("value"))
def __last_error(namespace, args): """Data to send to do a lastError. """ cmd = SON([("getlasterror", 1)]) cmd.update(args) splitns = namespace.split('.', 1) return query(0, splitns[0] + '.$cmd', 0, -1, cmd)
def __query_spec(self): """Get the spec to use for a query. """ operators = {} if self.__ordering: operators["$orderby"] = self.__ordering if self.__explain: operators["$explain"] = True if self.__hint: operators["$hint"] = self.__hint if self.__snapshot: operators["$snapshot"] = True if self.__max_scan: operators["$maxScan"] = self.__max_scan if operators: # Make a shallow copy so we can cleanly rewind or clone. spec = self.__spec.copy() if "$query" not in spec: # $query has to come first spec = SON({"$query": spec}) spec.update(operators) return spec # Have to wrap with $query if "query" is the first key. # We can't just use $query anytime "query" is a key as # that breaks commands like count and find_and_modify. # Checking spec.keys()[0] covers the case that the spec # was passed as an instance of SON or OrderedDict. elif ("query" in self.__spec and (len(self.__spec) == 1 or self.__spec.keys()[0] == "query")): return SON({"$query": self.__spec}) return self.__spec
def test_only_secondary_ok_commands_have_read_prefs(self): c = get_client(read_preference=ReadPreference.SECONDARY) is_mongos = utils.is_mongos(c) if not is_mongos: raise SkipTest("Only mongos have read_prefs added to the spec") # Ensure secondary_ok_commands have readPreference for cmd in secondary_ok_commands: if cmd == 'mapreduce': # map reduce is a special case continue command = SON([(cmd, 1)]) cursor = c.pymongo_test["$cmd"].find(command.copy()) # White-listed commands also have to be wrapped in $query command = SON([('$query', command)]) command['$readPreference'] = {'mode': 'secondary'} self.assertEqual(command, cursor._Cursor__query_spec()) # map_reduce inline should have read prefs command = SON([('mapreduce', 'test'), ('out', {'inline': 1})]) cursor = c.pymongo_test["$cmd"].find(command.copy()) # White-listed commands also have to be wrapped in $query command = SON([('$query', command)]) command['$readPreference'] = {'mode': 'secondary'} self.assertEqual(command, cursor._Cursor__query_spec()) # map_reduce that outputs to a collection shouldn't have read prefs command = SON([('mapreduce', 'test'), ('out', {'mrtest': 1})]) cursor = c.pymongo_test["$cmd"].find(command.copy()) self.assertEqual(command, cursor._Cursor__query_spec()) # Other commands shouldn't be changed for cmd in ('drop', 'create', 'any-future-cmd'): command = SON([(cmd, 1)]) cursor = c.pymongo_test["$cmd"].find(command.copy()) self.assertEqual(command, cursor._Cursor__query_spec())
def find_and_modify(self, spec, document, upsert=False, manipulate=False, safe=True, multi=False, callback=None, **kwargs): if self._key_ in spec: spec[MONGODB_ID] = spec[self._key_] del spec[self._key_] # add conditions delete_flag is not 1 spec[DELETE_FLAG] = {'$ne': '1'} old_update_data = document.get("$set", None) if old_update_data: old_update_data["last_modify"] = int(time.time()) document['$set'] = old_update_data from bson.son import SON command = SON( [('findAndModify', self._table_), ('query', spec), ('update', document), ('upsert', False), ('new', True)]) command.update(kwargs) result = yield tornado.gen.Task(self.async_client.connection("$cmd", self._db_).find_one, command, _must_use_master=True, _is_command=True) flag = result[0][0]['value'] if flag and self.need_sync: self.sync_update_data(spec, document) callback(flag)
def main(): persistence = mpl.MongoPersistenceLayer() current_epoch = long(time.mktime(time.gmtime())) users = persistence.find_records('user', {'access_token': {"$exists": True}, 'access_token_expires': {"$gt": current_epoch}}, {'_id':1, 'access_token':1, 'uname':1, 'friends':1}) for user in users: client_longitude = -122.063796997 client_latitude = 37.2538986206 friend_ids = user['friends'] uid = user['_id'] me_friend_ids = [user['_id']] + friend_ids logger.debug("uid: %s", uid) logger.debug("num of associated users: %s", len(me_friend_ids)) logger.debug("associated users: %s", me_friend_ids) import pdb;pdb.set_trace() #me_friend_checkins = persistence.CHECKIN.find({"loc" : {"$near": [client_longitude, client_latitude]}, "author_uid": {"$in": me_friend_ids}}) #me_friend_checkins = persistence.CHECKIN.find({"loc" : {"$near": [client_longitude, client_latitude], "$maxDistance": 2.5}, "author_uid": {"$in": me_friend_ids}}) s = SON({'$near': [client_longitude, client_latitude]}) s.update({'$maxDistance': 0.5}) me_friend_checkins = persistence.CHECKIN.find({"loc" : s, "author_uid": {"$in": me_friend_ids}}) #me_friend_checkins = persistence.CHECKIN.find({"loc" : s}).limit(20) count = me_friend_checkins.count() logger.debug("length of no max distance checkins: %s", count) for c in me_friend_checkins: logger.debug("%s", c)
def transform_incoming(self, son, collection): """Move _id to the front if it's there. """ if not "_id" in son: return son transformed = SON({"_id": son["_id"]}) transformed.update(son) return transformed
def _create_user(authdb, user, pwd=None, roles=None, **kwargs): cmd = SON([('createUser', user)]) # X509 doesn't use a password if pwd: cmd['pwd'] = pwd cmd['roles'] = roles or ['root'] cmd.update(**kwargs) return authdb.command(cmd)
def _touch_query(self): if self._query_additions: spec = SON({'$query': self.spec or {}}) for k, v in self._query_additions: if k == 'sort': ordering = spec.setdefault('$orderby', SON()) ordering.update(v) self.spec = spec
def to_mongo(self, use_db_field=True, fields=None): """ Return as SON data ready for use with MongoDB. """ if not fields: fields = [] data = SON() data['_id'] = None data['_cls'] = self._class_name # only root fields ['test1.a', 'test2'] => ['test1', 'test2'] root_fields = set([f.split('.')[0] for f in fields]) for field_name in self: if root_fields and field_name not in root_fields: continue value = self._data.get(field_name, None) field = self._fields.get(field_name) if field is None and self._dynamic: field = self._dynamic_fields.get(field_name) if value is not None: f_inputs = field.to_mongo.__code__.co_varnames ex_vars = {} if fields and 'fields' in f_inputs: key = '%s.' % field_name embedded_fields = [ i.replace(key, '') for i in fields if i.startswith(key)] ex_vars['fields'] = embedded_fields if 'use_db_field' in f_inputs: ex_vars['use_db_field'] = use_db_field value = field.to_mongo(value, **ex_vars) # Handle self generating fields if value is None and field._auto_gen: value = field.generate() self._data[field_name] = value if value is not None: if use_db_field: data[field.db_field] = value else: data[field.name] = value # Only add _cls if allow_inheritance is True if not self._meta.get('allow_inheritance'): data.pop('_cls') return data
def as_doc(self): """Get the SON document representation of this DBRef. Generally not needed by application developers """ doc = SON([("$ref", self.collection), ("$id", self.id)]) if self.database is not None: doc["$db"] = self.database doc.update(self.__kwargs) return doc
def test_clears(self): """ Test clear() """ test_son = SON([(1, 100), (2, 200), (3, 300)]) test_son.clear() self.assertFalse(1 in test_son) self.assertEqual(0, len(test_son)) self.assertEqual(0, len(test_son.keys())) self.assertEqual({}, test_son.to_dict())
def test_contains_has(self): """ has_key and __contains__ """ test_son = SON([(1, 100), (2, 200), (3, 300)]) self.assertTrue(1 in test_son) self.assertTrue(2 in test_son, "in failed") self.assertFalse(22 in test_son, "in succeeded when it shouldn't") self.assertTrue(test_son.has_key(2), "has_key failed") self.assertFalse(test_son.has_key(22), "has_key succeeded when it shouldn't")
def test_len(self): """ Test len """ test_son = SON() self.assertEqual(0, len(test_son)) test_son = SON([(1, 100), (2, 200), (3, 300)]) self.assertEqual(3, len(test_son)) test_son.popitem() self.assertEqual(2, len(test_son))
def test_ordered_dict(self): a1 = SON() a1["hello"] = "world" a1["mike"] = "awesome" a1["hello_"] = "mike" self.assertEqual(a1.items(), [("hello", "world"), ("mike", "awesome"), ("hello_", "mike")]) b2 = SON({"hello": "world"}) self.assertEqual(b2["hello"], "world") self.assertRaises(KeyError, lambda: b2["goodbye"])
def _gen_find_command(coll, spec, projection, skip, limit, batch_size, options, read_concern=DEFAULT_READ_CONCERN): """Generate a find command document.""" cmd = SON([("find", coll)]) if "$query" in spec: cmd.update([(_MODIFIERS[key], val) if key in _MODIFIERS else (key, val) for key, val in spec.items()]) if "$explain" in cmd: cmd.pop("$explain") if "$readPreference" in cmd: cmd.pop("$readPreference") else: cmd["filter"] = spec if projection: cmd["projection"] = projection if skip: cmd["skip"] = skip if limit: cmd["limit"] = abs(limit) if limit < 0: cmd["singleBatch"] = True if batch_size: cmd["batchSize"] = batch_size if read_concern.level: cmd["readConcern"] = read_concern.document if options: cmd.update([(opt, True) for opt, val in _OPTIONS.items() if options & val]) return cmd
def _gen_find_command(coll, spec, projection, skip, limit, batch_size, options, read_concern=DEFAULT_READ_CONCERN): """Generate a find command document.""" cmd = SON([('find', coll)]) if '$query' in spec: cmd.update([(_MODIFIERS[key], val) if key in _MODIFIERS else (key, val) for key, val in spec.items()]) if '$explain' in cmd: cmd.pop('$explain') if '$readPreference' in cmd: cmd.pop('$readPreference') else: cmd['filter'] = spec if projection: cmd['projection'] = projection if skip: cmd['skip'] = skip if limit: cmd['limit'] = abs(limit) if limit < 0: cmd['singleBatch'] = True if batch_size: cmd['batchSize'] = batch_size if read_concern.level: cmd['readConcern'] = read_concern.document if options: cmd.update([(opt, True) for opt, val in _OPTIONS.items() if options & val]) return cmd
def __query_spec(self): """Get the spec to use for a query. """ operators = self.__modifiers.copy() if self.__ordering: operators["$orderby"] = self.__ordering if self.__explain: operators["$explain"] = True if self.__hint: operators["$hint"] = self.__hint if self.__comment: operators["$comment"] = self.__comment if self.__max_scan: operators["$maxScan"] = self.__max_scan if self.__max_time_ms is not None: operators["$maxTimeMS"] = self.__max_time_ms if self.__max: operators["$max"] = self.__max if self.__min: operators["$min"] = self.__min if self.__return_key: operators["$returnKey"] = self.__return_key if self.__show_record_id: # This is upgraded to showRecordId for MongoDB 3.2+ "find" command. operators["$showDiskLoc"] = self.__show_record_id if self.__snapshot: operators["$snapshot"] = self.__snapshot if operators: # Make a shallow copy so we can cleanly rewind or clone. spec = self.__spec.copy() # White-listed commands must be wrapped in $query. if "$query" not in spec: # $query has to come first spec = SON([("$query", spec)]) if not isinstance(spec, SON): # Ensure the spec is SON. As order is important this will # ensure its set before merging in any extra operators. spec = SON(spec) spec.update(operators) return spec # Have to wrap with $query if "query" is the first key. # We can't just use $query anytime "query" is a key as # that breaks commands like count and find_and_modify. # Checking spec.keys()[0] covers the case that the spec # was passed as an instance of SON or OrderedDict. elif ("query" in self.__spec and (len(self.__spec) == 1 or next(iter(self.__spec)) == "query")): return SON({"$query": self.__spec}) return self.__spec
def command(self, command, value=1, callback=None, check=True, allowable_errors=[], **kwargs): """Issue a MongoDB command. Send command `command` to the database and return the response. If `command` is an instance of :class:`basestring` then the command {`command`: `value`} will be sent. Otherwise, `command` must be an instance of :class:`dict` and will be sent as is. Any additional keyword arguments will be added to the final command document before it is sent. For example, a command like ``{buildinfo: 1}`` can be sent using: >>> db.command("buildinfo") For a command where the value matters, like ``{collstats: collection_name}`` we can do: >>> db.command("collstats", collection_name) For commands that take additional arguments we can use kwargs. So ``{filemd5: object_id, root: file_root}`` becomes: >>> db.command("filemd5", object_id, root=file_root) :Parameters: - `command`: document representing the command to be issued, or the name of the command (for simple commands only). .. note:: the order of keys in the `command` document is significant (the "verb" must come first), so commands which require multiple keys (e.g. `findandmodify`) should use an instance of :class:`~bson.son.SON` or a string and kwargs instead of a Python `dict`. - `value` (optional): value to use for the command verb when `command` is passed as a string - `**kwargs` (optional): additional keyword arguments will be added to the command document before it is sent .. mongodoc:: commands """ if isinstance(command, str): command = SON([(command, value)]) command.update(kwargs) self.connection("$cmd").find_one(command,callback=callback, _must_use_master=True, _is_command=True)
def map_reduce(self, map, reduce, full_response=False, **kwargs): def wrapper(result, full_response): if full_response: return result return result.get("result") cmd = SON([("mapreduce", self._collection_name), ("map", map), ("reduce", reduce)]) cmd.update(**kwargs) d = self._database["$cmd"].find_one(cmd) d.addCallback(wrapper, full_response) return d
def test_ordered_dict(self): a = SON() a["hello"] = "world" a["mike"] = "awesome" a["hello_"] = "mike" self.assertEqual(list(a.items()), [("hello", "world"), ("mike", "awesome"), ("hello_", "mike")]) b = SON({"hello": "world"}) self.assertEqual(b["hello"], "world") self.assertRaises(KeyError, lambda: b["goodbye"])
def command(self, command, value=1, check=True, allowable_errors=None, **kwargs): if isinstance(command, basestring): command = SON([(command, value)]) command.update(kwargs) ns = self["$cmd"] response = yield ns.find_one(command) if check: msg = "command {0} on namespace {1} failed: %s".format(repr(command), ns) _check_command_response(response, msg, allowable_errors) defer.returnValue(response)
def tearDown(self): if (client_context.version.at_least(3, 5) and client_context.is_rs and client_context.test_commands_enabled): self.client.admin.command( SON([('configureFailPoint', 'onPrimaryTransactionalWrite'), ('mode', 'off')]))
pat = u'^' + pat pipeline.append({u"$match": {u"NameLast": {u"$regex": Regex(pat, u"i")}}}) try: pat2 = u'^' + sys.argv[2] pipeline.append( {u"$match": { u'NameFirst': { u"$regex": Regex(pat2, u"i") } }}) except IndexError: pass # Append additional operations to the pipeline # Sort pipeline.append({u"$sort": SON([(u"CAPID", 1)])}) # Lookup phone and email contacts pipeline.append({ u"$lookup": { u"from": u"MbrContact", u"localField": u"CAPID", u"foreignField": u"CAPID", u"as": u"Contacts" } }) # Lookup postal addresses pipeline.append({ u"$lookup": { u"from": u"MbrAddresses", u"localField": u"CAPID", u"foreignField": u"CAPID",
def __query_spec(self): """Get the spec to use for a query. """ operators = {} if self.__ordering: operators["$orderby"] = self.__ordering if self.__explain: operators["$explain"] = True if self.__hint: operators["$hint"] = self.__hint if self.__snapshot: operators["$snapshot"] = True if self.__max_scan: operators["$maxScan"] = self.__max_scan # Only set $readPreference if it's something other than # PRIMARY to avoid problems with mongos versions that # don't support read preferences. if (self.__collection.database.connection.is_mongos and self.__read_preference != ReadPreference.PRIMARY): has_tags = self.__tag_sets and self.__tag_sets != [{}] # For maximum backwards compatibility, don't set $readPreference # for SECONDARY_PREFERRED unless tags are in use. Just rely on # the slaveOkay bit (set automatically if read preference is not # PRIMARY), which has the same behavior. if (self.__read_preference != ReadPreference.SECONDARY_PREFERRED or has_tags): read_pref = { 'mode': read_preferences.mongos_mode(self.__read_preference) } if has_tags: read_pref['tags'] = self.__tag_sets operators['$readPreference'] = read_pref if operators: # Make a shallow copy so we can cleanly rewind or clone. spec = self.__spec.copy() # Only commands that can be run on secondaries should have any # operators added to the spec. Command queries can be issued # by db.command or calling find_one on $cmd directly if self.collection.name == "$cmd": # Don't change commands that can't be sent to secondaries command_name = spec and spec.keys()[0].lower() or "" if command_name not in secondary_ok_commands: return spec elif command_name == 'mapreduce': # mapreduce shouldn't be changed if its not inline out = spec.get('out') if not isinstance(out, dict) or not out.get('inline'): return spec # White-listed commands must be wrapped in $query. if "$query" not in spec: # $query has to come first spec = SON([("$query", spec)]) if not isinstance(spec, SON): # Ensure the spec is SON. As order is important this will # ensure its set before merging in any extra operators. spec = SON(spec) spec.update(operators) return spec # Have to wrap with $query if "query" is the first key. # We can't just use $query anytime "query" is a key as # that breaks commands like count and find_and_modify. # Checking spec.keys()[0] covers the case that the spec # was passed as an instance of SON or OrderedDict. elif ("query" in self.__spec and (len(self.__spec) == 1 or self.__spec.keys()[0] == "query")): return SON({"$query": self.__spec}) return self.__spec
def clean(grouped): return SON((k, SON((kk, remove_np(vv)) for kk, vv in v.items())) for k, v in grouped.items())
def test_command(self): # Test generic 'command' method. Some commands obey read preference, # most don't. # Disobedient commands, always go to primary self._test_fn(False, lambda: self.c.pymongo_test.command('ping')) self._test_fn(False, lambda: self.c.admin.command('buildinfo')) # Obedient commands. self._test_fn( True, lambda: self.c.pymongo_test.command( 'group', { 'ns': 'test', 'key': { 'a': 1 }, '$reduce': 'function(obj, prev) { }', 'initial': {} })) self._test_fn(True, lambda: self.c.pymongo_test.command('dbStats')) # collStats fails if no collection self.c.pymongo_test.test.insert({}, w=self.w) self._test_fn(True, lambda: self.c.pymongo_test.command('collStats', 'test')) # Count self._test_fn(True, lambda: self.c.pymongo_test.command('count', 'test')) self._test_fn( True, lambda: self.c.pymongo_test.command( 'count', 'test', query={'a': 1})) self._test_fn( True, lambda: self.c.pymongo_test.command( SON([('count', 'test'), ('query', { 'a': 1 })]))) # Distinct self._test_fn( True, lambda: self.c.pymongo_test.command( 'distinct', 'test', key={'a': 1})) self._test_fn( True, lambda: self.c.pymongo_test.command( 'distinct', 'test', key={'a': 1}, query={'a': 1})) self._test_fn( True, lambda: self.c.pymongo_test.command( SON([('distinct', 'test'), ('key', { 'a': 1 }), ('query', { 'a': 1 })]))) # Geo stuff. Make sure a 2d index is created and replicated self.c.pymongo_test.system.indexes.insert( { 'key': { 'location': '2d' }, 'ns': 'pymongo_test.test', 'name': 'location_2d' }, w=self.w) self.c.pymongo_test.system.indexes.insert(SON([ ('ns', 'pymongo_test.test'), ('key', SON([('location', 'geoHaystack'), ('key', 1)])), ('bucketSize', 100), ('name', 'location_geoHaystack'), ]), w=self.w) self._test_fn( True, lambda: self.c.pymongo_test.command( 'geoNear', 'test', near=[0, 0])) self._test_fn( True, lambda: self.c.pymongo_test.command( SON([('geoNear', 'test'), ('near', [0, 0])]))) self._test_fn( True, lambda: self.c.pymongo_test.command('geoSearch', 'test', near=[33, 33], maxDistance=6, search={'type': 'restaurant'}, limit=30)) self._test_fn( True, lambda: self.c.pymongo_test.command( SON([('geoSearch', 'test'), ('near', [33, 33]), ('maxDistance', 6), ('search', { 'type': 'restaurant' }), ('limit', 30)]))) if version.at_least(self.c, (2, 1, 0)): self._test_fn( True, lambda: self.c.pymongo_test.command( SON([('aggregate', 'test'), ('pipeline', [])]))) # Text search. if version.at_least(self.c, (2, 3, 2)): utils.enable_text_search(self.c) db = self.c.pymongo_test # Only way to create an index and wait for all members to build it. index = { 'ns': 'pymongo_test.test', 'name': 't_text', 'key': { 't': 'text' } } db.system.indexes.insert(index, manipulate=False, check_keys=False, w=self.w) self._test_fn( True, lambda: self.c.pymongo_test.command( SON([('text', 'test'), ('search', 'foo')]))) self.c.pymongo_test.test.drop_indexes()
class BaseDocument(object): __slots__ = ('_changed_fields', '_initialised', '_created', '_data', '_dynamic_fields', '_auto_id_field', '_db_field_map', '__weakref__') _dynamic = False _dynamic_lock = True STRICT = False def __init__(self, *args, **values): """ Initialise a document or embedded document :param __auto_convert: Try and will cast python objects to Object types :param values: A dictionary of values for the document """ self._initialised = False self._created = True if args: # Combine positional arguments with named arguments. # We only want named arguments. field = iter(self._fields_ordered) # If its an automatic id field then skip to the first defined field if getattr(self, '_auto_id_field', False): next(field) for value in args: name = next(field) if name in values: raise TypeError("Multiple values for keyword argument '" + name + "'") values[name] = value __auto_convert = values.pop("__auto_convert", True) # 399: set default values only to fields loaded from DB __only_fields = set(values.pop("__only_fields", values)) _created = values.pop("_created", True) signals.pre_init.send(self.__class__, document=self, values=values) # Check if there are undefined fields supplied to the constructor, # if so raise an Exception. if not self._dynamic and (self._meta.get('strict', True) or _created): _undefined_fields = set(values.keys()) - set( list(self._fields.keys()) + ['id', 'pk', '_cls', '_text_score']) if _undefined_fields: msg = ("The fields '{0}' do not exist on the document '{1}'" ).format(_undefined_fields, self._class_name) raise FieldDoesNotExist(msg) if self.STRICT and not self._dynamic: self._data = StrictDict.create(allowed_keys=self._fields_ordered)() else: self._data = SemiStrictDict.create( allowed_keys=self._fields_ordered)() self._dynamic_fields = SON() # Assign default values to instance for key, field in self._fields.items(): if self._db_field_map.get(key, key) in __only_fields: continue value = getattr(self, key, None) setattr(self, key, value) if "_cls" not in values: self._cls = self._class_name # Set passed values after initialisation if self._dynamic: dynamic_data = {} for key, value in values.items(): if key in self._fields or key == '_id': setattr(self, key, value) elif self._dynamic: dynamic_data[key] = value else: FileField = _import_class('FileField') for key, value in values.items(): if key == '__auto_convert': continue key = self._reverse_db_field_map.get(key, key) if key in self._fields or key in ('id', 'pk', '_cls'): if __auto_convert and value is not None: field = self._fields.get(key) if field and not isinstance(field, FileField): value = field.to_python(value) setattr(self, key, value) else: self._data[key] = value # Set any get_<field>_display methods self.__set_field_display() if self._dynamic: self._dynamic_lock = False for key, value in dynamic_data.items(): setattr(self, key, value) # Flag initialised self._initialised = True self._created = _created signals.post_init.send(self.__class__, document=self) def __delattr__(self, *args, **kwargs): """Handle deletions of fields""" field_name = args[0] if field_name in self._fields: default = self._fields[field_name].default if callable(default): default = default() setattr(self, field_name, default) else: super(BaseDocument, self).__delattr__(*args, **kwargs) def __setattr__(self, name, value): # Handle dynamic data only if an initialised dynamic document if self._dynamic and not self._dynamic_lock: if not hasattr(self, name) and not name.startswith('_'): DynamicField = _import_class("DynamicField") field = DynamicField(db_field=name) field.name = name self._dynamic_fields[name] = field self._fields_ordered += (name, ) if not name.startswith('_'): value = self.__expand_dynamic_values(name, value) # Handle marking data as changed if name in self._dynamic_fields: self._data[name] = value if hasattr(self, '_changed_fields'): self._mark_as_changed(name) try: self__created = self._created except AttributeError: self__created = True if (self._is_document and not self__created and name in self._meta.get('shard_key', tuple()) and self._data.get(name) != value): OperationError = _import_class('OperationError') msg = "Shard Keys are immutable. Tried to update %s" % name raise OperationError(msg) try: self__initialised = self._initialised except AttributeError: self__initialised = False # Check if the user has created a new instance of a class if (self._is_document and self__initialised and self__created and name == self._meta.get('id_field')): super(BaseDocument, self).__setattr__('_created', False) super(BaseDocument, self).__setattr__(name, value) def __getstate__(self): data = {} for k in ('_changed_fields', '_initialised', '_created', '_dynamic_fields', '_fields_ordered'): if hasattr(self, k): data[k] = getattr(self, k) data['_data'] = self.to_mongo() return data def __setstate__(self, data): if isinstance(data["_data"], SON): data["_data"] = self.__class__._from_son(data["_data"])._data for k in ('_changed_fields', '_initialised', '_created', '_data', '_dynamic_fields'): if k in data: setattr(self, k, data[k]) if '_fields_ordered' in data: if self._dynamic: setattr(self, '_fields_ordered', data['_fields_ordered']) else: _super_fields_ordered = type(self)._fields_ordered setattr(self, '_fields_ordered', _super_fields_ordered) dynamic_fields = data.get('_dynamic_fields') or SON() for k in list(dynamic_fields.keys()): setattr(self, k, data["_data"].get(k)) def __iter__(self): return iter(self._fields_ordered) def __getitem__(self, name): """Dictionary-style field access, return a field's value if present. """ try: if name in self._fields_ordered: return getattr(self, name) except AttributeError: pass raise KeyError(name) def __setitem__(self, name, value): """Dictionary-style field access, set a field's value. """ # Ensure that the field exists before settings its value if not self._dynamic and name not in self._fields: raise KeyError(name) return setattr(self, name, value) def __contains__(self, name): try: val = getattr(self, name) return val is not None except AttributeError: return False def __len__(self): return len(self._data) def __repr__(self): try: u = self.__str__() except (UnicodeEncodeError, UnicodeDecodeError): u = '[Bad Unicode data]' repr_type = str if u is None else type(u) return repr_type('<%s: %s>' % (self.__class__.__name__, u)) def __str__(self): if hasattr(self, '__unicode__'): if PY3: return self.__unicode__() else: return str(self).encode('utf-8') return txt_type('%s object' % self.__class__.__name__) def __eq__(self, other): if isinstance(other, self.__class__) and hasattr( other, 'id') and other.id is not None: return self.id == other.id if isinstance(other, DBRef): return self._get_collection_name( ) == other.collection and self.id == other.id if self.id is None: return self is other return False def __ne__(self, other): return not self.__eq__(other) def __hash__(self): if getattr(self, 'pk', None) is None: # For new object return super(BaseDocument, self).__hash__() else: return hash(self.pk) def clean(self): """ Hook for doing document level data cleaning before validation is run. Any ValidationError raised by this method will not be associated with a particular field; it will have a special-case association with the field defined by NON_FIELD_ERRORS. """ pass def get_text_score(self): """ Get text score from text query """ if '_text_score' not in self._data: raise InvalidDocumentError( 'This document is not originally built from a text query') return self._data['_text_score'] def to_mongo(self, use_db_field=True, fields=None): """ Return as SON data ready for use with MongoDB. """ if not fields: fields = [] data = SON() data["_id"] = None data['_cls'] = self._class_name # only root fields ['test1.a', 'test2'] => ['test1', 'test2'] root_fields = set([f.split('.')[0] for f in fields]) for field_name in self: if root_fields and field_name not in root_fields: continue value = self._data.get(field_name, None) field = self._fields.get(field_name) if field is None and self._dynamic: field = self._dynamic_fields.get(field_name) if value is not None: f_inputs = field.to_mongo.__code__.co_varnames ex_vars = {} if fields and 'fields' in f_inputs: key = '%s.' % field_name embedded_fields = [ i.replace(key, '') for i in fields if i.startswith(key) ] ex_vars['fields'] = embedded_fields if 'use_db_field' in f_inputs: ex_vars['use_db_field'] = use_db_field value = field.to_mongo(value, **ex_vars) # Handle self generating fields if value is None and field._auto_gen: value = field.generate() self._data[field_name] = value if value is not None: if use_db_field: data[field.db_field] = value else: data[field.name] = value # If "_id" has not been set, then try and set it Document = _import_class("Document") if isinstance(self, Document): if data["_id"] is None: data["_id"] = self._data.get("id", None) if data['_id'] is None: data.pop('_id') # Only add _cls if allow_inheritance is True if (not hasattr(self, '_meta') or not self._meta.get('allow_inheritance', ALLOW_INHERITANCE)): data.pop('_cls') return data def validate(self, clean=True): """Ensure that all fields' values are valid and that required fields are present. """ # Ensure that each field is matched to a valid value errors = {} if clean: try: self.clean() except ValidationError as error: errors[NON_FIELD_ERRORS] = error # Get a list of tuples of field names and their current values fields = [(self._fields.get(name, self._dynamic_fields.get(name)), self._data.get(name)) for name in self._fields_ordered] EmbeddedDocumentField = _import_class("EmbeddedDocumentField") GenericEmbeddedDocumentField = _import_class( "GenericEmbeddedDocumentField") for field, value in fields: if value is not None: try: if isinstance( field, (EmbeddedDocumentField, GenericEmbeddedDocumentField)): field._validate(value, clean=clean) else: field._validate(value) except ValidationError as error: errors[field.name] = error.errors or error except (ValueError, AttributeError, AssertionError) as error: errors[field.name] = error elif field.required and not getattr(field, '_auto_gen', False): errors[field.name] = ValidationError('Field is required', field_name=field.name) if errors: pk = "None" if hasattr(self, 'pk'): pk = self.pk elif self._instance and hasattr(self._instance, 'pk'): pk = self._instance.pk message = "ValidationError (%s:%s) " % (self._class_name, pk) raise ValidationError(message, errors=errors) def to_json(self, *args, **kwargs): """Converts a document to JSON. :param use_db_field: Set to True by default but enables the output of the json structure with the field names and not the mongodb store db_names in case of set to False """ use_db_field = kwargs.pop('use_db_field', True) return json_util.dumps(self.to_mongo(use_db_field), *args, **kwargs) @classmethod def from_json(cls, json_data, created=False): """Converts json data to an unsaved document instance""" return cls._from_son(json_util.loads(json_data), created=created) def __expand_dynamic_values(self, name, value): """expand any dynamic values to their correct types / values""" if not isinstance(value, (dict, list, tuple)): return value EmbeddedDocumentListField = _import_class('EmbeddedDocumentListField') is_list = False if not hasattr(value, 'items'): is_list = True value = dict([(k, v) for k, v in enumerate(value)]) if not is_list and '_cls' in value: cls = get_document(value['_cls']) return cls(**value) data = {} for k, v in list(value.items()): key = name if is_list else k data[k] = self.__expand_dynamic_values(key, v) if is_list: # Convert back to a list data_items = sorted(list(data.items()), key=operator.itemgetter(0)) value = [v for k, v in data_items] else: value = data # Convert lists / values so we can watch for any changes on them if (isinstance(value, (list, tuple)) and not isinstance(value, BaseList)): if issubclass(type(self), EmbeddedDocumentListField): value = EmbeddedDocumentList(value, self, name) else: value = BaseList(value, self, name) elif isinstance(value, dict) and not isinstance(value, BaseDict): value = BaseDict(value, self, name) return value def _mark_as_changed(self, key): """Marks a key as explicitly changed by the user """ if not key: return if not hasattr(self, '_changed_fields'): return if '.' in key: key, rest = key.split('.', 1) key = self._db_field_map.get(key, key) key = '%s.%s' % (key, rest) else: key = self._db_field_map.get(key, key) if key not in self._changed_fields: levels, idx = key.split('.'), 1 while idx <= len(levels): if '.'.join(levels[:idx]) in self._changed_fields: break idx += 1 else: self._changed_fields.append(key) # remove lower level changed fields level = '.'.join(levels[:idx]) + '.' remove = self._changed_fields.remove for field in self._changed_fields[:]: if field.startswith(level): remove(field) def _clear_changed_fields(self): """Using get_changed_fields iterate and remove any fields that are marked as changed""" for changed in self._get_changed_fields(): parts = changed.split(".") data = self for part in parts: if isinstance(data, list): try: data = data[int(part)] except IndexError: data = None elif isinstance(data, dict): data = data.get(part, None) else: data = getattr(data, part, None) if hasattr(data, "_changed_fields"): if hasattr(data, "_is_document") and data._is_document: continue data._changed_fields = [] self._changed_fields = [] def _nestable_types_changed_fields(self, changed_fields, key, data, inspected): # Loop list / dict fields as they contain documents # Determine the iterator to use if not hasattr(data, 'items'): iterator = enumerate(data) else: iterator = iter(data.items()) for index, value in iterator: list_key = "%s%s." % (key, index) # don't check anything lower if this key is already marked # as changed. if list_key[:-1] in changed_fields: continue if hasattr(value, '_get_changed_fields'): changed = value._get_changed_fields(inspected) changed_fields += [ "%s%s" % (list_key, k) for k in changed if k ] elif isinstance(value, (list, tuple, dict)): self._nestable_types_changed_fields(changed_fields, list_key, value, inspected) def _get_changed_fields(self, inspected=None): """Returns a list of all fields that have explicitly been changed. """ EmbeddedDocument = _import_class("EmbeddedDocument") DynamicEmbeddedDocument = _import_class("DynamicEmbeddedDocument") ReferenceField = _import_class("ReferenceField") SortedListField = _import_class("SortedListField") changed_fields = [] changed_fields += getattr(self, '_changed_fields', []) inspected = inspected or set() if hasattr(self, 'id') and isinstance(self.id, Hashable): if self.id in inspected: return changed_fields inspected.add(self.id) for field_name in self._fields_ordered: db_field_name = self._db_field_map.get(field_name, field_name) key = '%s.' % db_field_name data = self._data.get(field_name, None) field = self._fields.get(field_name) if hasattr(data, 'id'): if data.id in inspected: continue if isinstance(field, ReferenceField): continue elif (isinstance(data, (EmbeddedDocument, DynamicEmbeddedDocument)) and db_field_name not in changed_fields): # Find all embedded fields that have been changed changed = data._get_changed_fields(inspected) changed_fields += ["%s%s" % (key, k) for k in changed if k] elif (isinstance(data, (list, tuple, dict)) and db_field_name not in changed_fields): if (hasattr(field, 'field') and isinstance(field.field, ReferenceField)): continue elif isinstance(field, SortedListField) and field._ordering: # if ordering is affected whole list is changed if any( [field._ordering in d._changed_fields for d in data]): changed_fields.append(db_field_name) continue self._nestable_types_changed_fields(changed_fields, key, data, inspected) return changed_fields def _delta(self): """Returns the delta (set, unset) of the changes for a document. Gets any values that have been explicitly changed. """ # Handles cases where not loaded from_son but has _id doc = self.to_mongo() set_fields = self._get_changed_fields() unset_data = {} parts = [] if hasattr(self, '_changed_fields'): set_data = {} # Fetch each set item from its path for path in set_fields: parts = path.split('.') d = doc new_path = [] for p in parts: if isinstance(d, (ObjectId, DBRef)): break elif isinstance(d, list) and p.lstrip('-').isdigit(): if p[0] == '-': p = str(len(d) + int(p)) try: d = d[int(p)] except IndexError: d = None elif hasattr(d, 'get'): d = d.get(p) new_path.append(p) path = '.'.join(new_path) set_data[path] = d else: set_data = doc if '_id' in set_data: del set_data['_id'] # Determine if any changed items were actually unset. for path, value in list(set_data.items()): if value or isinstance(value, (numbers.Number, bool)): continue # If we've set a value that ain't the default value don't unset it. default = None if (self._dynamic and len(parts) and parts[0] in self._dynamic_fields): del set_data[path] unset_data[path] = 1 continue elif path in self._fields: default = self._fields[path].default else: # Perform a full lookup for lists / embedded lookups d = self parts = path.split('.') db_field_name = parts.pop() for p in parts: if isinstance(d, list) and p.lstrip('-').isdigit(): if p[0] == '-': p = str(len(d) + int(p)) d = d[int(p)] elif (hasattr(d, '__getattribute__') and not isinstance(d, dict)): real_path = d._reverse_db_field_map.get(p, p) d = getattr(d, real_path) else: d = d.get(p) if hasattr(d, '_fields'): field_name = d._reverse_db_field_map.get( db_field_name, db_field_name) if field_name in d._fields: default = d._fields.get(field_name).default else: default = None if default is not None: if callable(default): default = default() if default != value: continue del set_data[path] unset_data[path] = 1 return set_data, unset_data @classmethod def _get_collection_name(cls): """Returns the collection name for this class. None for abstract class """ return cls._meta.get('collection', None) @classmethod def _from_son(cls, son, _auto_dereference=True, only_fields=None, created=False): """Create an instance of a Document (subclass) from a PyMongo SON. """ if not only_fields: only_fields = [] # get the class name from the document, falling back to the given # class if unavailable class_name = son.get('_cls', cls._class_name) data = dict(("%s" % key, value) for key, value in son.items()) # Return correct subclass for document type if class_name != cls._class_name: cls = get_document(class_name) changed_fields = [] errors_dict = {} fields = cls._fields if not _auto_dereference: fields = copy.copy(fields) for field_name, field in fields.items(): field._auto_dereference = _auto_dereference if field.db_field in data: value = data[field.db_field] try: data[field_name] = (value if value is None else field.to_python(value)) if field_name != field.db_field: del data[field.db_field] except (AttributeError, ValueError) as e: errors_dict[field_name] = e if errors_dict: errors = "\n".join( ["%s - %s" % (k, v) for k, v in list(errors_dict.items())]) msg = ("Invalid data to create a `%s` instance.\n%s" % (cls._class_name, errors)) raise InvalidDocumentError(msg) if cls.STRICT: data = dict((k, v) for k, v in data.items() if k in cls._fields) obj = cls(__auto_convert=False, _created=created, __only_fields=only_fields, **data) obj._changed_fields = changed_fields if not _auto_dereference: obj._fields = fields return obj @classmethod def _build_index_specs(cls, meta_indexes): """Generate and merge the full index specs """ geo_indices = cls._geo_indices() unique_indices = cls._unique_with_indexes() index_specs = [cls._build_index_spec(spec) for spec in meta_indexes] def merge_index_specs(index_specs, indices): if not indices: return index_specs spec_fields = [v['fields'] for k, v in enumerate(index_specs)] # Merge unique_indexes with existing specs for k, v in enumerate(indices): if v['fields'] in spec_fields: index_specs[spec_fields.index(v['fields'])].update(v) else: index_specs.append(v) return index_specs index_specs = merge_index_specs(index_specs, geo_indices) index_specs = merge_index_specs(index_specs, unique_indices) return index_specs @classmethod def _build_index_spec(cls, spec): """Build a PyMongo index spec from a MongoEngine index spec. """ if isinstance(spec, str): spec = {'fields': [spec]} elif isinstance(spec, (list, tuple)): spec = {'fields': list(spec)} elif isinstance(spec, dict): spec = dict(spec) index_list = [] direction = None # Check to see if we need to include _cls allow_inheritance = cls._meta.get('allow_inheritance', ALLOW_INHERITANCE) include_cls = (allow_inheritance and not spec.get('sparse', False) and spec.get('cls', True) and '_cls' not in spec['fields']) # 733: don't include cls if index_cls is False unless there is an explicit cls with the index include_cls = include_cls and (spec.get('cls', False) or cls._meta.get('index_cls', True)) if "cls" in spec: spec.pop('cls') for key in spec['fields']: # If inherited spec continue if isinstance(key, (list, tuple)): continue # ASCENDING from + # DESCENDING from - # TEXT from $ # HASHED from # # GEOSPHERE from ( # GEOHAYSTACK from ) # GEO2D from * direction = pymongo.ASCENDING if key.startswith("-"): direction = pymongo.DESCENDING elif key.startswith("$"): direction = pymongo.TEXT elif key.startswith("#"): direction = pymongo.HASHED elif key.startswith("("): direction = pymongo.GEOSPHERE elif key.startswith(")"): direction = pymongo.GEOHAYSTACK elif key.startswith("*"): direction = pymongo.GEO2D if key.startswith(("+", "-", "*", "$", "#", "(", ")")): key = key[1:] # Use real field name, do it manually because we need field # objects for the next part (list field checking) parts = key.split('.') if parts in (['pk'], ['id'], ['_id']): key = '_id' else: fields = cls._lookup_field(parts) parts = [] for field in fields: try: if field != "_id": field = field.db_field except AttributeError: pass parts.append(field) key = '.'.join(parts) index_list.append((key, direction)) # Don't add cls to a geo index if include_cls and direction not in (pymongo.GEO2D, pymongo.GEOHAYSTACK, pymongo.GEOSPHERE): index_list.insert(0, ('_cls', 1)) if index_list: spec['fields'] = index_list return spec @classmethod def _unique_with_indexes(cls, namespace=""): """ Find and set unique indexes """ unique_indexes = [] for field_name, field in list(cls._fields.items()): sparse = field.sparse # Generate a list of indexes needed by uniqueness constraints if field.unique: unique_fields = [field.db_field] # Add any unique_with fields to the back of the index spec if field.unique_with: if isinstance(field.unique_with, str): field.unique_with = [field.unique_with] # Convert unique_with field names to real field names unique_with = [] for other_name in field.unique_with: parts = other_name.split('.') # Lookup real name parts = cls._lookup_field(parts) name_parts = [part.db_field for part in parts] unique_with.append('.'.join(name_parts)) # Unique field should be required parts[-1].required = True sparse = (not sparse and parts[-1].name not in cls.__dict__) unique_fields += unique_with # Add the new index to the list fields = [("%s%s" % (namespace, f), pymongo.ASCENDING) for f in unique_fields] index = {'fields': fields, 'unique': True, 'sparse': sparse} unique_indexes.append(index) if field.__class__.__name__ == "ListField": field = field.field # Grab any embedded document field unique indexes if (field.__class__.__name__ == "EmbeddedDocumentField" and field.document_type != cls): field_namespace = "%s." % field_name doc_cls = field.document_type unique_indexes += doc_cls._unique_with_indexes(field_namespace) return unique_indexes @classmethod def _geo_indices(cls, inspected=None, parent_field=None): inspected = inspected or [] geo_indices = [] inspected.append(cls) geo_field_type_names = [ "EmbeddedDocumentField", "GeoPointField", "PointField", "LineStringField", "PolygonField" ] geo_field_types = tuple( [_import_class(field) for field in geo_field_type_names]) for field in list(cls._fields.values()): if not isinstance(field, geo_field_types): continue if hasattr(field, 'document_type'): field_cls = field.document_type if field_cls in inspected: continue if hasattr(field_cls, '_geo_indices'): geo_indices += field_cls._geo_indices( inspected, parent_field=field.db_field) elif field._geo_index: field_name = field.db_field if parent_field: field_name = "%s.%s" % (parent_field, field_name) geo_indices.append( {'fields': [(field_name, field._geo_index)]}) return geo_indices @classmethod def _lookup_field(cls, parts): """Lookup a field based on its attribute and return a list containing the field's parents and the field. """ ListField = _import_class("ListField") DynamicField = _import_class('DynamicField') if not isinstance(parts, (list, tuple)): parts = [parts] fields = [] field = None for field_name in parts: # Handle ListField indexing: if field_name.isdigit() and isinstance(field, ListField): fields.append(field_name) continue if field is None: # Look up first field from the document if field_name == 'pk': # Deal with "primary key" alias field_name = cls._meta['id_field'] if field_name in cls._fields: field = cls._fields[field_name] elif cls._dynamic: field = DynamicField(db_field=field_name) elif cls._meta.get("allow_inheritance", False) or cls._meta.get("abstract", False): # 744: in case the field is defined in a subclass for subcls in cls.__subclasses__(): try: field = subcls._lookup_field([field_name])[0] except LookUpError: continue if field is not None: break else: raise LookUpError('Cannot resolve field "%s"' % field_name) else: raise LookUpError('Cannot resolve field "%s"' % field_name) else: ReferenceField = _import_class('ReferenceField') GenericReferenceField = _import_class('GenericReferenceField') if isinstance(field, (ReferenceField, GenericReferenceField)): raise LookUpError('Cannot perform join in mongoDB: %s' % '__'.join(parts)) if hasattr(getattr(field, 'field', None), 'lookup_member'): new_field = field.field.lookup_member(field_name) elif cls._dynamic and (isinstance(field, DynamicField) or getattr( getattr(field, 'document_type', None), '_dynamic', None)): new_field = DynamicField(db_field=field_name) else: # Look up subfield on the previous field or raise try: new_field = field.lookup_member(field_name) except AttributeError: raise LookUpError( 'Cannot resolve subfield or operator {} ' 'on the field {}'.format(field_name, field.name)) if not new_field and isinstance(field, ComplexBaseField): fields.append(field_name) continue elif not new_field: raise LookUpError('Cannot resolve field "%s"' % field_name) field = new_field # update field to the new field type fields.append(field) return fields @classmethod def _translate_field_name(cls, field, sep='.'): """Translate a field attribute name to a database field name. """ parts = field.split(sep) parts = [f.db_field for f in cls._lookup_field(parts)] return '.'.join(parts) def __set_field_display(self): """For each field that specifies choices, create a get_<field>_display method. """ fields_with_choices = [(n, f) for n, f in list(self._fields.items()) if f.choices] for attr_name, field in fields_with_choices: setattr(self, 'get_%s_display' % attr_name, partial(self.__get_field_display, field=field)) def __get_field_display(self, field): """Return the display value for a choice field""" value = getattr(self, field.name) if field.choices and isinstance(field.choices[0], (list, tuple)): return dict(field.choices).get(value, value) return value
def searchMongoAlerts(mozdefdb): attackers=mozdefdb['attackers'] alerts=mozdefdb['alerts'] # search the last X alerts for IP addresses # aggregated by CIDR mask/24 # aggregate IPv4 addresses in the most recent alerts # to find common attackers. ipv4TopHits = alerts.aggregate([ {"$sort": {"utcepoch":-1}}, # reverse sort the current alerts {"$limit": 100}, #most recent 100 {"$match": {"events.documentsource.details.sourceipaddress":{"$exists": True}}}, # must have an ip address {"$match": {"attackerid":{"$exists": False}}}, # must not be already related to an attacker {"$group": {"_id": {"ipaddress":"$events.documentsource.details.sourceipaddress"}}}, # grab ip address from the events {"$unwind": "$_id.ipaddress"}, # separate all ips from their alerts {"$group": {"_id": "$_id.ipaddress", "hitcount": {"$sum": 1}}}, # count by ip {"$match":{"hitcount":{"$gt":5}}}, # limit to those with X observances {"$sort": SON([("hitcount", -1), ("_id", -1)])}, # sort {"$limit": 10} # top 10 ]) for ip in ipv4TopHits['result']: if netaddr.valid_ipv4(ip['_id']): ipcidr = netaddr.IPNetwork(ip['_id']) # expand it to a /24 CIDR # todo: lookup ipwhois for asn_cidr value # potentially with a max mask value (i.e. asn is /8, limit attackers to /24) ipcidr.prefixlen = 24 # append to or create attacker. # does this match an existing attacker's indicators if not ipcidr.ip.is_loopback() and not ipcidr.ip.is_private() and not ipcidr.ip.is_reserved(): logger.debug('searching for alert ip ' + str(ipcidr)) attacker = attackers.find_one({'indicators.ipv4address': str(ipcidr)}) if attacker is None: # new attacker # generate a meteor-compatible ID # save the ES document type, index, id # and add a sub list for future events logger.debug('new attacker from alerts') newAttacker = genNewAttacker() # str to get the ip/cidr rather than netblock cidr. # i.e. '1.2.3.4/24' not '1.2.3.0/24' newAttacker['indicators'].append(dict(ipv4address=str(ipcidr))) matchingalerts = alerts.find( {"events.documentsource.details.sourceipaddress": str(ipcidr.ip), }) if matchingalerts is not None: # update list of alerts this attacker matched. for alert in matchingalerts: newAttacker['alerts'].append( dict(alertid=alert['_id']) ) # update alert with attackerID alert['attackerid'] = newAttacker['_id'] alerts.save(alert) #add the events from this alert: #add the events from this alert: for e in alert['events']: newAttacker['events'].append(e) newAttacker['alertscount'] = len(newAttacker['alerts']) newAttacker['eventscount'] = len(newAttacker['events']) if newAttacker['eventscount'] > 0: newAttacker['lastseentimestamp'] = toUTC(newAttacker['events'][-1]['documentsource']['utctimestamp'], 'UTC') attackers.insert(newAttacker) #upate geoIP info latestGeoIP = [a['events'] for a in alerts.find( {"events.documentsource.details.sourceipaddress": str(ipcidr.ip), })][-1][0]['documentsource'] updateAttackerGeoIP(mozdefdb, newAttacker['_id'], latestGeoIP) else: logger.debug('found existing attacker in alerts') # if alert not present in this attackers list # append this to the list # todo: trim the list at X (i.e. last 100) # search alerts without attackerid matchingalerts = alerts.find( {"events.documentsource.details.sourceipaddress": str(ipcidr.ip), "attackerid":{"$exists": False} }) if matchingalerts is not None: #attacker['eventscount'] = len(attacker['events']) logger.debug('matched alert with attacker') # update list of alerts this attacker matched. for alert in matchingalerts: attacker['alerts'].append( dict(alertid=alert['_id']) ) # update alert with attackerID alert['attackerid'] = attacker['_id'] alerts.save(alert) #add the events from this alert: for e in alert['events']: attacker['events'].append(e) # geo ip could have changed, update it # to the latest updateAttackerGeoIP(mozdefdb, attacker['_id'], alert['events'][-1]['documentsource']) # update last seen time attacker['lastseentimestamp'] = toUTC(attacker['events'][-1]['documentsource']['utctimestamp'], 'UTC') # update counts attacker['alertscount'] = len(attacker['alerts']) attacker['eventscount'] = len(attacker['events']) attackers.save(attacker)
from pymongo import MongoClient from datetime import datetime import pprint from bson.son import SON #setup connection client = MongoClient('localhost', 27017) db = client.project_4.tweets #setup pipeline variable, I usually wouldn't bother but mongodb queries get messsy quickly pipelines = [[ {"$unwind": "$entities.hashtags"}, {"$group": {"_id": "$entities.hashtags.text", "count": {"$sum": 1}}}, {"$sort": SON([("count", -1)])} ]] #save the output to variable res res = [[x for x in db.aggregate(pipelines[-1])]] #print only those that had a count over 50 for r in res[-1]: if r['count']>=50: print r #dates are now stored as dates in MongoDB so we can query on dates #although it would be trivial to step though the months in python, #we'll use the capabilities of mongodb's query language to group #counts by month for htag in ('DataSciBowl','Data4Good'): pipelines.append([ {"$match":{"entities.hashtags.text":htag}},
def validate_collection(self, name_or_collection, scandata=False, full=False, session=None, background=None): """Validate a collection. Returns a dict of validation info. Raises CollectionInvalid if validation fails. See also the MongoDB documentation on the `validate command`_. :Parameters: - `name_or_collection`: A Collection object or the name of a collection to validate. - `scandata`: Do extra checks beyond checking the overall structure of the collection. - `full`: Have the server do a more thorough scan of the collection. Use with `scandata` for a thorough scan of the structure of the collection and the individual documents. - `session` (optional): a :class:`~pymongo.client_session.ClientSession`. - `background` (optional): A boolean flag that determines whether the command runs in the background. Requires MongoDB 4.4+. .. versionchanged:: 3.11 Added ``background`` parameter. .. versionchanged:: 3.6 Added ``session`` parameter. .. _validate command: https://docs.mongodb.com/manual/reference/command/validate/ """ name = name_or_collection if isinstance(name, Collection): name = name.name if not isinstance(name, str): raise TypeError("name_or_collection must be an instance of str or " "Collection") cmd = SON([("validate", name), ("scandata", scandata), ("full", full)]) if background is not None: cmd["background"] = background result = self.command(cmd, session=session) valid = True # Pre 1.9 results if "result" in result: info = result["result"] if info.find("exception") != -1 or info.find("corrupt") != -1: raise CollectionInvalid("%s invalid: %s" % (name, info)) # Sharded results elif "raw" in result: for _, res in result["raw"].items(): if "result" in res: info = res["result"] if (info.find("exception") != -1 or info.find("corrupt") != -1): raise CollectionInvalid("%s invalid: " "%s" % (name, info)) elif not res.get("valid", False): valid = False break # Post 1.9 non-sharded results. elif not result.get("valid", False): valid = False if not valid: raise CollectionInvalid("%s invalid: %r" % (name, result)) return result
def register_entry(self, id_detected, date=None, time=datetime.now().time(), override=False): if date is None: date = self.date attendence_find = SON({ "employee id": id_detected, "date": SON({ "year": date.year, "month": date.month, "day": date.day }) }) attendence_query = self.db.attendance_collection.find(attendence_find) if len(list(attendence_query)) > 0: if override: date_query = { "employee id": id_detected, "date": SON({ "year": date.year, "month": date.month, "day": date.day }) } update_entry = { "$set": { "entry": SON({ "hour": time.hour, "minute": time.minute, "second": time.second }) } } self.db.attendance_collection.update_one( date_query, update_entry) print("".join([ "database entry updated for employee id=" + str(id_detected) ])) else: raise QueryError(' '.join( ["employee number", str(id_detected), "already registered entry at date", str(date) \ + "\nmust allow override in order to update entry"])) else: attendence_insert = SON({ "_id": '-'.join([ str(id_detected), str(date.year), str(date.day), str(date.day) ]), "employee id": id_detected, "date": SON({ "year": date.year, "month": date.month, "day": date.day }), "entry": SON({ "hour": time.hour, "minute": time.minute, "second": time.second }), "exit": None, "total": None }) self.db.attendance_collection.insert_one(attendence_insert) print("".join([ "database entry registered for employee id=" + str(id_detected) ]))
def _execute_command(self, generator, write_concern, session, sock_info, op_id, retryable, full_result): if sock_info.max_wire_version < 5 and self.uses_collation: raise ConfigurationError( 'Must be connected to MongoDB 3.4+ to use a collation.') if sock_info.max_wire_version < 6 and self.uses_array_filters: raise ConfigurationError( 'Must be connected to MongoDB 3.6+ to use arrayFilters.') db_name = self.collection.database.name client = self.collection.database.client listeners = client._event_listeners if not self.current_run: self.current_run = next(generator) run = self.current_run # sock_info.command validates the session, but we use # sock_info.write_command. sock_info.validate_session(client, session) while run: cmd = SON([(_COMMANDS[run.op_type], self.collection.name), ('ordered', self.ordered)]) if not write_concern.is_server_default: cmd['writeConcern'] = write_concern.document if self.bypass_doc_val and sock_info.max_wire_version >= 4: cmd['bypassDocumentValidation'] = True bwc = _BulkWriteContext(db_name, cmd, sock_info, op_id, listeners, session) while run.idx_offset < len(run.ops): if session: # Start a new retryable write unless one was already # started for this command. if retryable and not self.started_retryable_write: session._start_retryable_write() self.started_retryable_write = True session._apply_to(cmd, retryable, ReadPreference.PRIMARY, sock_info) sock_info.send_cluster_time(cmd, session, client) check_keys = run.op_type == _INSERT ops = islice(run.ops, run.idx_offset, None) # Run as many ops as possible. request_id, msg, to_send = _do_bulk_write_command( self.namespace, run.op_type, cmd, ops, check_keys, self.collection.codec_options, bwc) if not to_send: raise InvalidOperation("cannot do an empty bulk write") result = bwc.write_command(request_id, msg, to_send) client._receive_cluster_time(result, session) # Retryable writeConcernErrors halt the execution of this run. wce = result.get('writeConcernError', {}) if wce.get('code', 0) in _RETRYABLE_ERROR_CODES: # Synthesize the full bulk result without modifying the # current one because this write operation may be retried. full = copy.deepcopy(full_result) _merge_command(run, full, run.idx_offset, result) _raise_bulk_write_error(full) _merge_command(run, full_result, run.idx_offset, result) # We're no longer in a retry once a command succeeds. self.retrying = False self.started_retryable_write = False if self.ordered and "writeErrors" in result: break run.idx_offset += len(to_send) # We're supposed to continue if errors are # at the write concern level (e.g. wtimeout) if self.ordered and full_result['writeErrors']: break # Reset our state self.current_run = run = next(generator, None)
def add_recipe(self, name, desc): oid = self.db.recipes.insert( SON([('name', name), ('desc', desc)]) ) self.report("add", oid, "Recipe Name: {}".format(name))
def command(self, command, value=1, check=True, allowable_errors=[], uuid_subtype=OLD_UUID_SUBTYPE, **kwargs): """Issue a MongoDB command. Send command `command` to the database and return the response. If `command` is an instance of :class:`basestring` (:class:`str` in python 3) then the command {`command`: `value`} will be sent. Otherwise, `command` must be an instance of :class:`dict` and will be sent as is. Any additional keyword arguments will be added to the final command document before it is sent. For example, a command like ``{buildinfo: 1}`` can be sent using: >>> db.command("buildinfo") For a command where the value matters, like ``{collstats: collection_name}`` we can do: >>> db.command("collstats", collection_name) For commands that take additional arguments we can use kwargs. So ``{filemd5: object_id, root: file_root}`` becomes: >>> db.command("filemd5", object_id, root=file_root) :Parameters: - `command`: document representing the command to be issued, or the name of the command (for simple commands only). .. note:: the order of keys in the `command` document is significant (the "verb" must come first), so commands which require multiple keys (e.g. `findandmodify`) should use an instance of :class:`~bson.son.SON` or a string and kwargs instead of a Python `dict`. - `value` (optional): value to use for the command verb when `command` is passed as a string - `check` (optional): check the response for errors, raising :class:`~pymongo.errors.OperationFailure` if there are any - `allowable_errors`: if `check` is ``True``, error messages in this list will be ignored by error-checking - `uuid_subtype` (optional): The BSON binary subtype to use for a UUID used in this command. - `read_preference`: The read preference for this connection. See :class:`~pymongo.read_preferences.ReadPreference` for available options. - `tag_sets`: Read from replica-set members with these tags. To specify a priority-order for tag sets, provide a list of tag sets: ``[{'dc': 'ny'}, {'dc': 'la'}, {}]``. A final, empty tag set, ``{}``, means "read from any member that matches the mode, ignoring tags." ReplicaSetConnection tries each set of tags in turn until it finds a set of tags with at least one matching member. - `secondary_acceptable_latency_ms`: Any replica-set member whose ping time is within secondary_acceptable_latency_ms of the nearest member may accept reads. Default 15 milliseconds. **Ignored by mongos** and must be configured on the command line. See the localThreshold_ option for more information. - `**kwargs` (optional): additional keyword arguments will be added to the command document before it is sent .. note:: ``command`` ignores the ``network_timeout`` parameter. .. versionchanged:: 2.3 Added `tag_sets` and `secondary_acceptable_latency_ms` options. .. versionchanged:: 2.2 Added support for `as_class` - the class you want to use for the resulting documents .. versionchanged:: 1.6 Added the `value` argument for string commands, and keyword arguments for additional command options. .. versionchanged:: 1.5 `command` can be a string in addition to a full document. .. versionadded:: 1.4 .. mongodoc:: commands .. _localThreshold: http://docs.mongodb.org/manual/reference/mongos/#cmdoption-mongos--localThreshold """ if isinstance(command, basestring): command = SON([(command, value)]) command_name = command.keys()[0].lower() must_use_master = kwargs.pop('_use_master', False) if command_name not in rp.secondary_ok_commands: must_use_master = True # Special-case: mapreduce can go to secondaries only if inline if command_name == 'mapreduce': out = command.get('out') or kwargs.get('out') if not isinstance(out, dict) or not out.get('inline'): must_use_master = True extra_opts = { 'as_class': kwargs.pop('as_class', None), 'slave_okay': kwargs.pop('slave_okay', self.slave_okay), '_must_use_master': must_use_master, '_uuid_subtype': uuid_subtype } extra_opts['read_preference'] = kwargs.pop('read_preference', self.read_preference) extra_opts['tag_sets'] = kwargs.pop('tag_sets', self.tag_sets) extra_opts['secondary_acceptable_latency_ms'] = kwargs.pop( 'secondary_acceptable_latency_ms', self.secondary_acceptable_latency_ms) fields = kwargs.get('fields') if fields is not None and not isinstance(fields, dict): kwargs['fields'] = helpers._fields_list_to_dict(fields) command.update(kwargs) result = self["$cmd"].find_one(command, **extra_opts) if check: msg = "command %s failed: %%s" % repr(command).replace("%", "%%") helpers._check_command_response(result, self.connection.disconnect, msg, allowable_errors) return result
# sort, skip and limit are quite similar to shell res = dbconn.writers.find().sort('name', pymongo.DESCENDING).skip(3).limit(1) print list(res) # you can use it as kw arguments res = dbconn.writers.find(skip=3).sort('name', pymongo.DESCENDING).limit(1) print list(res) # to sort by more than one parameter we use list of set not dict res = dbconn.writers.find().sort([('name', pymongo.DESCENDING), ('_id', pymongo.ASCENDING)]).skip(3).limit(1) print list(res) # if you want a query with several keys use compound index, use SON from bson.son import SON ordered_query = SON({'name': re.compile('^Miguel'), 'age':{'$lt': 200}}) res = dbconn.writers.find(ordered_query) print list(res) #========================================================================================== # # Explain plans # #========================================================================================== from pprint import pprint pprint(dbconn.writers.find({"name": "Pablo Neruda"}).explain())
def __init__(self, *args, **values): """ Initialise a document or embedded document :param __auto_convert: Try and will cast python objects to Object types :param values: A dictionary of values for the document """ self._initialised = False self._created = True if args: # Combine positional arguments with named arguments. # We only want named arguments. field = iter(self._fields_ordered) # If its an automatic id field then skip to the first defined field if getattr(self, '_auto_id_field', False): next(field) for value in args: name = next(field) if name in values: raise TypeError("Multiple values for keyword argument '" + name + "'") values[name] = value __auto_convert = values.pop("__auto_convert", True) # 399: set default values only to fields loaded from DB __only_fields = set(values.pop("__only_fields", values)) _created = values.pop("_created", True) signals.pre_init.send(self.__class__, document=self, values=values) # Check if there are undefined fields supplied to the constructor, # if so raise an Exception. if not self._dynamic and (self._meta.get('strict', True) or _created): _undefined_fields = set(values.keys()) - set( list(self._fields.keys()) + ['id', 'pk', '_cls', '_text_score']) if _undefined_fields: msg = ("The fields '{0}' do not exist on the document '{1}'" ).format(_undefined_fields, self._class_name) raise FieldDoesNotExist(msg) if self.STRICT and not self._dynamic: self._data = StrictDict.create(allowed_keys=self._fields_ordered)() else: self._data = SemiStrictDict.create( allowed_keys=self._fields_ordered)() self._dynamic_fields = SON() # Assign default values to instance for key, field in self._fields.items(): if self._db_field_map.get(key, key) in __only_fields: continue value = getattr(self, key, None) setattr(self, key, value) if "_cls" not in values: self._cls = self._class_name # Set passed values after initialisation if self._dynamic: dynamic_data = {} for key, value in values.items(): if key in self._fields or key == '_id': setattr(self, key, value) elif self._dynamic: dynamic_data[key] = value else: FileField = _import_class('FileField') for key, value in values.items(): if key == '__auto_convert': continue key = self._reverse_db_field_map.get(key, key) if key in self._fields or key in ('id', 'pk', '_cls'): if __auto_convert and value is not None: field = self._fields.get(key) if field and not isinstance(field, FileField): value = field.to_python(value) setattr(self, key, value) else: self._data[key] = value # Set any get_<field>_display methods self.__set_field_display() if self._dynamic: self._dynamic_lock = False for key, value in dynamic_data.items(): setattr(self, key, value) # Flag initialised self._initialised = True self._created = _created signals.post_init.send(self.__class__, document=self)
def _set_fail_point(self, client, command_args): cmd = SON([('configureFailPoint', 'failCommand')]) cmd.update(command_args) client.admin.command(cmd)
def count(self, with_limit_and_skip=False): """**DEPRECATED** - Get the size of the results set for this query. The :meth:`count` method is deprecated and **not** supported in a transaction. Please use :meth:`~pymongo.collection.Collection.count_documents` instead. Returns the number of documents in the results set for this query. Does not take :meth:`limit` and :meth:`skip` into account by default - set `with_limit_and_skip` to ``True`` if that is the desired behavior. Raises :class:`~pymongo.errors.OperationFailure` on a database error. When used with MongoDB >= 2.6, :meth:`~count` uses any :meth:`~hint` applied to the query. In the following example the hint is passed to the count command: collection.find({'field': 'value'}).hint('field_1').count() The :meth:`count` method obeys the :attr:`~pymongo.collection.Collection.read_preference` of the :class:`~pymongo.collection.Collection` instance on which :meth:`~pymongo.collection.Collection.find` was called. :Parameters: - `with_limit_and_skip` (optional): take any :meth:`limit` or :meth:`skip` that has been applied to this cursor into account when getting the count .. note:: The `with_limit_and_skip` parameter requires server version **>= 1.1.4-** .. versionchanged:: 3.7 Deprecated. .. versionchanged:: 2.8 The :meth:`~count` method now supports :meth:`~hint`. """ warnings.warn( "count is deprecated. Use Collection.count_documents " "instead.", DeprecationWarning, stacklevel=2) validate_boolean("with_limit_and_skip", with_limit_and_skip) cmd = SON([("count", self.__collection.name), ("query", self.__spec)]) if self.__max_time_ms is not None: cmd["maxTimeMS"] = self.__max_time_ms if self.__comment: cmd["comment"] = self.__comment if self.__hint is not None: cmd["hint"] = self.__hint if with_limit_and_skip: if self.__limit: cmd["limit"] = self.__limit if self.__skip: cmd["skip"] = self.__skip return self.__collection._count(cmd, self.__collation, session=self.__session)
def set_fail_point(self, command_args): cmd = SON([('configureFailPoint', 'onPrimaryTransactionalWrite')]) cmd.update(command_args) client_context.client.admin.command(cmd)
def find_near_neighbor(self, x, y, limit=100): result_list = [] cursor = self.neighbors.find({"position": SON([("$near", {"$geometry": SON([("type", "Point"), ("coordinates", [x, y])])})])}).limit(limit) while (yield cursor.fetch_next): result_list.append(Neighbor.from_mongo(cursor.next_object())) raise tornado.gen.Return(result_list)
def searchMongoAlerts(mozdefdb): attackers = mozdefdb['attackers'] alerts = mozdefdb['alerts'] # search the last X alerts for IP addresses # aggregated by CIDR mask/24 # aggregate IPv4 addresses in the most recent alerts # to find common attackers. ipv4TopHits = alerts.aggregate([ {"$sort": {"utcepoch":-1}}, # reverse sort the current alerts {"$limit": 100}, #most recent 100 {"$match": {"events.documentsource.details.sourceipaddress":{"$exists": True}}}, # must have an ip address {"$match": {"attackerid":{"$exists": False}}}, # must not be already related to an attacker {"$unwind":"$events"}, #make each event into it's own doc {"$project":{"_id":0, "sourceip":"$events.documentsource.details.sourceipaddress"}}, #emit the source ip only {"$group": {"_id": "$sourceip", "hitcount": {"$sum": 1}}}, # count by ip {"$match":{"hitcount":{"$gt":5}}}, # limit to those with X observances {"$sort": SON([("hitcount", -1), ("_id", -1)])}, # sort {"$limit": 10} # top 10 ]) for ip in ipv4TopHits: # sanity check ip['_id'] which should be the ipv4 address if isIPv4(ip['_id']) and ip['_id'] not in netaddr.IPSet(['0.0.0.0']): ipcidr = netaddr.IPNetwork(ip['_id']) # set CIDR # todo: lookup ipwhois for asn_cidr value # potentially with a max mask value (i.e. asn is /8, limit attackers to /24) ipcidr.prefixlen = 32 # append to or create attacker. # does this match an existing attacker's indicators if not ipcidr.ip.is_loopback() and not ipcidr.ip.is_private() and not ipcidr.ip.is_reserved(): logger.debug('Searching for existing attacker with ip ' + str(ipcidr)) attacker = attackers.find_one({'indicators.ipv4address': str(ipcidr)}) if attacker is None: logger.debug('Attacker not found, creating new one') # new attacker # generate a meteor-compatible ID # save the ES document type, index, id newAttacker = genNewAttacker() # str to get the ip/cidr rather than netblock cidr. # i.e. '1.2.3.4/24' not '1.2.3.0/24' newAttacker['indicators'].append(dict(ipv4address=str(ipcidr))) matchingalerts = alerts.find( {"events.documentsource.details.sourceipaddress": str(ipcidr.ip), }) total_events = 0 if matchingalerts is not None: # update list of alerts this attacker matched. for alert in matchingalerts: newAttacker['alerts'].append( dict(alertid=alert['_id']) ) # update alert with attackerID alert['attackerid'] = newAttacker['_id'] alerts.save(alert) total_events += len(alert['events']) if len(alert['events']) > 0: newAttacker['lastseentimestamp'] = toUTC(alert['events'][-1]['documentsource']['utctimestamp']) newAttacker['alertscount'] = len(newAttacker['alerts']) newAttacker['eventscount'] = total_events attackers.insert(newAttacker) # update geoIP info latestGeoIP = [a['events'] for a in alerts.find( {"events.documentsource.details.sourceipaddress": str(ipcidr.ip), })][-1][0]['documentsource'] updateAttackerGeoIP(mozdefdb, newAttacker['_id'], latestGeoIP) if options.broadcastattackers: broadcastAttacker(newAttacker) else: logger.debug('Found existing attacker') # if alert not present in this attackers list # append this to the list # todo: trim the list at X (i.e. last 100) # search alerts without attackerid matchingalerts = alerts.find( {"events.documentsource.details.sourceipaddress": str(ipcidr.ip), "attackerid":{"$exists": False} }) if matchingalerts is not None: logger.debug('Matched alert with attacker') # update list of alerts this attacker matched. for alert in matchingalerts: attacker['alerts'].append( dict(alertid=alert['_id']) ) # update alert with attackerID alert['attackerid'] = attacker['_id'] alerts.save(alert) attacker['eventscount'] += len(alert['events']) attacker['lastseentimestamp'] = toUTC(alert['events'][-1]['documentsource']['utctimestamp']) # geo ip could have changed, update it to the latest updateAttackerGeoIP(mozdefdb, attacker['_id'], alert['events'][-1]['documentsource']) # update counts attacker['alertscount'] = len(attacker['alerts']) attackers.save(attacker) # should we autocategorize the attacker # based on their alerts? if attacker['category'] == 'unknown' and options.autocategorize: # take a look at recent alerts for this attacker # and if they are all the same category # auto-categorize the attacker matchingalerts = alerts.find( {"attackerid":attacker['_id']} ).sort('utcepoch', -1).limit(50) # summarize the alert categories # returns list of tuples: [(u'bruteforce', 8)] categoryCounts= mostCommon(matchingalerts,'category') #are the alerts all the same category? if len(categoryCounts) == 1: #is the alert category mapped to an attacker category? for category in options.categorymapping: if category.keys()[0] == categoryCounts[0][0]: attacker['category'] = category[category.keys()[0]] attackers.save(attacker)
_SEEK_SET = os.SEEK_SET _SEEK_CUR = os.SEEK_CUR _SEEK_END = os.SEEK_END # before 2.5 except AttributeError: _SEEK_SET = 0 _SEEK_CUR = 1 _SEEK_END = 2 EMPTY = b"" NEWLN = b"\n" """Default chunk size, in bytes.""" # Slightly under a power of 2, to work well with server's record allocations. DEFAULT_CHUNK_SIZE = 255 * 1024 _C_INDEX = SON([("files_id", ASCENDING), ("n", ASCENDING)]) _F_INDEX = SON([("filename", ASCENDING), ("uploadDate", ASCENDING)]) def _grid_in_property(field_name, docstring, read_only=False, closed_only=False): """Create a GridIn property.""" def getter(self): if closed_only and not self._closed: raise AttributeError("can only get %r on a closed file" % field_name) # Protect against PHP-237 if field_name == 'length': return self._file.get(field_name, 0)
def tearDown(self): client_context.client.admin.command( SON([('configureFailPoint', 'onPrimaryTransactionalWrite'), ('mode', 'off')]))
def _element_to_bson(key, value, check_keys, uuid_subtype): if not isinstance(key, basestring): raise InvalidDocument("documents must have only string keys, " "key was %r" % key) if check_keys: if key.startswith("$"): raise InvalidDocument("key %r must not start with '$'" % key) if "." in key: raise InvalidDocument("key %r must not contain '.'" % key) name = _make_c_string(key, True) if isinstance(value, float): return "\x01" + name + struct.pack("<d", value) # Use Binary w/ subtype 3 for UUID instances if _use_uuid: if isinstance(value, uuid.UUID): value = Binary(value.bytes, subtype=uuid_subtype) if isinstance(value, Binary): subtype = value.subtype if subtype == 2: value = struct.pack("<i", len(value)) + value return "\x05%s%s%s%s" % (name, struct.pack( "<i", len(value)), chr(subtype), value) if isinstance(value, Code): cstring = _make_c_string(value) if not value.scope: length = struct.pack("<i", len(cstring)) return "\x0D" + name + length + cstring scope = _dict_to_bson(value.scope, False, uuid_subtype, False) full_length = struct.pack("<i", 8 + len(cstring) + len(scope)) length = struct.pack("<i", len(cstring)) return "\x0F" + name + full_length + length + cstring + scope if isinstance(value, str): cstring = _make_c_string(value) length = struct.pack("<i", len(cstring)) return "\x02" + name + length + cstring if isinstance(value, unicode): cstring = _make_c_string(value) length = struct.pack("<i", len(cstring)) return "\x02" + name + length + cstring if isinstance(value, dict): return "\x03" + name + _dict_to_bson(value, check_keys, uuid_subtype, False) if isinstance(value, (list, tuple)): as_dict = SON(zip([str(i) for i in range(len(value))], value)) return "\x04" + name + _dict_to_bson(as_dict, check_keys, uuid_subtype, False) if isinstance(value, ObjectId): return "\x07" + name + value.binary if value is True: return "\x08" + name + "\x01" if value is False: return "\x08" + name + "\x00" if isinstance(value, int): # TODO this is an ugly way to check for this... if value > MAX_INT64 or value < MIN_INT64: raise OverflowError("BSON can only handle up to 8-byte ints") if value > MAX_INT32 or value < MIN_INT32: return "\x12" + name + struct.pack("<q", value) return "\x10" + name + struct.pack("<i", value) if isinstance(value, long): # XXX No long type in Python 3 if value > MAX_INT64 or value < MIN_INT64: raise OverflowError("BSON can only handle up to 8-byte ints") return "\x12" + name + struct.pack("<q", value) if isinstance(value, datetime.datetime): if value.utcoffset() is not None: value = value - value.utcoffset() millis = int( calendar.timegm(value.timetuple()) * 1000 + value.microsecond / 1000) return "\x09" + name + struct.pack("<q", millis) if isinstance(value, Timestamp): time = struct.pack("<I", value.time) inc = struct.pack("<I", value.inc) return "\x11" + name + inc + time if value is None: return "\x0A" + name if isinstance(value, RE_TYPE): pattern = value.pattern flags = "" if value.flags & re.IGNORECASE: flags += "i" if value.flags & re.LOCALE: flags += "l" if value.flags & re.MULTILINE: flags += "m" if value.flags & re.DOTALL: flags += "s" if value.flags & re.UNICODE: flags += "u" if value.flags & re.VERBOSE: flags += "x" return "\x0B" + name + _make_c_string(pattern, True) + \ _make_c_string(flags) if isinstance(value, DBRef): return _element_to_bson(key, value.as_doc(), False, uuid_subtype) if isinstance(value, MinKey): return "\xFF" + name if isinstance(value, MaxKey): return "\x7F" + name raise InvalidDocument("cannot convert value of type %s to bson" % type(value))
def register_exit(self, id_detected, date=None, time=datetime.now().time(), override=False): if date is None: date = self.date entry_find = SON({ "employee id": id_detected, "date": SON({ "year": int(date.year), "month": int(date.month), "day": int(date.day) }), "entry": { "$ne": None } }), SON({ "entry": 1, "_id": 0 }) entry_query = self.db.attendance_collection.find( entry_find[0], entry_find[1]) entry_query_list = list(entry_query) if len(entry_query_list) == 0: raise QueryError(' '.join([ "employee id=", str(id_detected), "didn't register entry at date", str(date), "and therefore cannot register exit" ])) already_registered_exit_find = SON({ "employee id": id_detected, "date": SON({ "year": date.year, "month": date.month, "day": date.day }), "exit": { "$ne": None } }) already_registered_exit_query = self.db.attendance_collection.find( already_registered_exit_find) if override == False and len(list(already_registered_exit_query)) > 0: raise QueryError(' '.join([ "employee id=", str(id_detected), "already registered exit at date", str(date), "\nmust allow override in order to update exit" ])) else: date_query = { "employee id": id_detected, "date": SON({ "year": date.year, "month": date.month, "day": date.day }) } update_entry_date_and_time = [ entry_query_list[0]["entry"]["hour"], entry_query_list[0]["entry"]["minute"], entry_query_list[0]["entry"]["second"] ] update_exit_date_and_time = [time.hour, time.minute, time.second] hours, minutes, seconds = calculate_total( update_entry_date_and_time, update_exit_date_and_time) update_exit_and_total = { "$set": { "exit": SON({ "hour": time.hour, "minute": time.minute, "second": time.second }), "total": SON({ "hours": hours, "minutes": minutes, "seconds": seconds }) } } self.db.attendance_collection.update_one(date_query, update_exit_and_total) if override: print("".join([ "database exit and total updated for employee id=", str(id_detected) ])) else: print("".join([ "database exit and total registered for employee id=", str(id_detected) ]))
def to_mongo(self, use_db_field=True, fields=None): """ Return as SON data ready for use with MongoDB. """ if not fields: fields = [] data = SON() data["_id"] = None data['_cls'] = self._class_name # only root fields ['test1.a', 'test2'] => ['test1', 'test2'] root_fields = set([f.split('.')[0] for f in fields]) for field_name in self: if root_fields and field_name not in root_fields: continue value = self._data.get(field_name, None) field = self._fields.get(field_name) if field is None and self._dynamic: field = self._dynamic_fields.get(field_name) if value is not None: f_inputs = field.to_mongo.__code__.co_varnames ex_vars = {} if fields and 'fields' in f_inputs: key = '%s.' % field_name embedded_fields = [ i.replace(key, '') for i in fields if i.startswith(key) ] ex_vars['fields'] = embedded_fields if 'use_db_field' in f_inputs: ex_vars['use_db_field'] = use_db_field value = field.to_mongo(value, **ex_vars) # Handle self generating fields if value is None and field._auto_gen: value = field.generate() self._data[field_name] = value if value is not None: if use_db_field: data[field.db_field] = value else: data[field.name] = value # If "_id" has not been set, then try and set it Document = _import_class("Document") if isinstance(self, Document): if data["_id"] is None: data["_id"] = self._data.get("id", None) if data['_id'] is None: data.pop('_id') # Only add _cls if allow_inheritance is True if (not hasattr(self, '_meta') or not self._meta.get('allow_inheritance', ALLOW_INHERITANCE)): data.pop('_cls') return data
def _command(self, command, value=1, check=True, allowable_errors=None, uuid_subtype=OLD_UUID_SUBTYPE, compile_re=True, **kwargs): """Internal command helper. """ if isinstance(command, str): command = SON([(command, value)]) command_name = list(command.keys())[0].lower() must_use_master = kwargs.pop('_use_master', False) if command_name not in secondary_ok_commands: must_use_master = True # Special-case: mapreduce can go to secondaries only if inline if command_name == 'mapreduce': out = command.get('out') or kwargs.get('out') if not isinstance(out, dict) or not out.get('inline'): must_use_master = True # Special-case: aggregate with $out cannot go to secondaries. if command_name == 'aggregate': for stage in kwargs.get('pipeline', []): if '$out' in stage: must_use_master = True break extra_opts = { 'as_class': kwargs.pop('as_class', None), 'slave_okay': kwargs.pop('slave_okay', self.slave_okay), '_must_use_master': must_use_master, '_uuid_subtype': uuid_subtype } extra_opts['read_preference'] = kwargs.pop('read_preference', self.read_preference) extra_opts['tag_sets'] = kwargs.pop('tag_sets', self.tag_sets) extra_opts['secondary_acceptable_latency_ms'] = kwargs.pop( 'secondary_acceptable_latency_ms', self.secondary_acceptable_latency_ms) extra_opts['compile_re'] = compile_re fields = kwargs.get('fields') if fields is not None and not isinstance(fields, dict): kwargs['fields'] = helpers._fields_list_to_dict(fields) command.update(kwargs) # Warn if must_use_master will override read_preference. if (extra_opts['read_preference'] != ReadPreference.PRIMARY and extra_opts['_must_use_master']): warnings.warn("%s does not support %s read preference " "and will be routed to the primary instead." % (command_name, modes[extra_opts['read_preference']]), UserWarning, stacklevel=3) cursor = self["$cmd"].find(command, **extra_opts).limit(-1) for doc in cursor: result = doc if check: msg = "command %s failed: %%s" % repr(command).replace("%", "%%") helpers._check_command_response(result, self.connection.disconnect, msg, allowable_errors) return result, cursor.conn_id
def run_query(self, query, user): db = self._get_db() logger.debug("mongodb connection string: %s", self.configuration['connectionString']) logger.debug("mongodb got query: %s", query) try: query_data = parse_query_json(query) except ValueError: return None, "Invalid query format. The query is not a valid JSON." if "collection" not in query_data: return None, "'collection' must have a value to run a query" else: collection = query_data["collection"] q = query_data.get("query", None) f = None aggregate = query_data.get("aggregate", None) if aggregate: for step in aggregate: if "$sort" in step: sort_list = [] for sort_item in step["$sort"]: sort_list.append((sort_item["name"], sort_item["direction"])) step["$sort"] = SON(sort_list) if not aggregate: s = None if "sort" in query_data and query_data["sort"]: s = [] for field in query_data["sort"]: s.append((field["name"], field["direction"])) if "fields" in query_data: f = query_data["fields"] s = None if "sort" in query_data and query_data["sort"]: s = [] for field_data in query_data["sort"]: s.append((field_data["name"], field_data["direction"])) columns = [] rows = [] cursor = None if q or (not q and not aggregate): if s: cursor = db[collection].find(q, f).sort(s) else: cursor = db[collection].find(q, f) if "skip" in query_data: cursor = cursor.skip(query_data["skip"]) if "limit" in query_data: cursor = cursor.limit(query_data["limit"]) if "count" in query_data: cursor = cursor.count() elif aggregate: allow_disk_use = query_data.get('allowDiskUse', False) r = db[collection].aggregate(aggregate, allowDiskUse=allow_disk_use) # Backwards compatibility with older pymongo versions. # # Older pymongo version would return a dictionary from an aggregate command. # The dict would contain a "result" key which would hold the cursor. # Newer ones return pymongo.command_cursor.CommandCursor. if isinstance(r, dict): cursor = r["result"] else: cursor = r if "count" in query_data: columns.append({ "name" : "count", "friendly_name" : "count", "type" : TYPE_INTEGER }) rows.append({ "count" : cursor }) else: rows, columns = parse_results(cursor) if f: ordered_columns = [] for k in sorted(f, key=f.get): column = _get_column_by_name(columns, k) if column: ordered_columns.append(column) columns = ordered_columns data = { "columns": columns, "rows": rows } error = None json_data = json.dumps(data, cls=MongoDBJSONEncoder) return json_data, error
def test_maybe_add_read_preference(self): # Primary doesn't add $readPreference out = _maybe_add_read_preference({}, Primary()) self.assertEqual(out, {}) pref = PrimaryPreferred() out = _maybe_add_read_preference({}, pref) self.assertEqual( out, SON([("$query", {}), ("$readPreference", pref.document)])) pref = PrimaryPreferred(tag_sets=[{'dc': 'nyc'}]) out = _maybe_add_read_preference({}, pref) self.assertEqual( out, SON([("$query", {}), ("$readPreference", pref.document)])) pref = Secondary() out = _maybe_add_read_preference({}, pref) self.assertEqual( out, SON([("$query", {}), ("$readPreference", pref.document)])) pref = Secondary(tag_sets=[{'dc': 'nyc'}]) out = _maybe_add_read_preference({}, pref) self.assertEqual( out, SON([("$query", {}), ("$readPreference", pref.document)])) # SecondaryPreferred without tag_sets doesn't add $readPreference pref = SecondaryPreferred() out = _maybe_add_read_preference({}, pref) self.assertEqual(out, {}) pref = SecondaryPreferred(tag_sets=[{'dc': 'nyc'}]) out = _maybe_add_read_preference({}, pref) self.assertEqual( out, SON([("$query", {}), ("$readPreference", pref.document)])) pref = Nearest() out = _maybe_add_read_preference({}, pref) self.assertEqual( out, SON([("$query", {}), ("$readPreference", pref.document)])) pref = Nearest(tag_sets=[{'dc': 'nyc'}]) out = _maybe_add_read_preference({}, pref) self.assertEqual( out, SON([("$query", {}), ("$readPreference", pref.document)])) criteria = SON([("$query", {}), ("$orderby", SON([("_id", 1)]))]) pref = Nearest() out = _maybe_add_read_preference(criteria, pref) self.assertEqual( out, SON([("$query", {}), ("$orderby", SON([("_id", 1)])), ("$readPreference", pref.document)])) pref = Nearest(tag_sets=[{'dc': 'nyc'}]) out = _maybe_add_read_preference(criteria, pref) self.assertEqual( out, SON([("$query", {}), ("$orderby", SON([("_id", 1)])), ("$readPreference", pref.document)]))
def get_market_price_history(asset1, asset2, start_ts=None, end_ts=None, as_dict=False): """Return block-by-block aggregated market history data for the specified asset pair, within the specified date range. @returns List of lists (or list of dicts, if as_dict is specified). * If as_dict is False, each embedded list has 8 elements [block time (epoch in MS), open, high, low, close, volume, # trades in block, block index] * If as_dict is True, each dict in the list has the keys: block_time (epoch in MS), block_index, open, high, low, close, vol, count Aggregate on an an hourly basis """ now_ts = calendar.timegm(time.gmtime()) if not end_ts: # default to current datetime end_ts = now_ts if not start_ts: # default to 180 days before the end date start_ts = end_ts - (180 * 24 * 60 * 60) base_asset, quote_asset = util.assets_to_asset_pair(asset1, asset2) # get ticks -- open, high, low, close, volume result = config.mongo_db.trades.aggregate([ { "$match": { "base_asset": base_asset, "quote_asset": quote_asset, "block_time": { "$gte": datetime.datetime.utcfromtimestamp(start_ts) } if end_ts == now_ts else { "$gte": datetime.datetime.utcfromtimestamp(start_ts), "$lte": datetime.datetime.utcfromtimestamp(end_ts) } } }, { "$project": { "year": { "$year": "$block_time" }, "month": { "$month": "$block_time" }, "day": { "$dayOfMonth": "$block_time" }, "hour": { "$hour": "$block_time" }, "block_index": 1, "unit_price": 1, "base_quantity_normalized": 1 # to derive volume } }, { "$group": { "_id": { "year": "$year", "month": "$month", "day": "$day", "hour": "$hour" }, "open": { "$first": "$unit_price" }, "high": { "$max": "$unit_price" }, "low": { "$min": "$unit_price" }, "close": { "$last": "$unit_price" }, "vol": { "$sum": "$base_quantity_normalized" }, "count": { "$sum": 1 }, } }, { "$sort": SON([("_id.year", pymongo.ASCENDING), ("_id.month", pymongo.ASCENDING), ("_id.day", pymongo.ASCENDING), ("_id.hour", pymongo.ASCENDING)]) }, ]) result = list(result) if not len(result): return False midline = [((r['high'] + r['low']) / 2.0) for r in result] if as_dict: for i in range(len(result)): result[i]['interval_time'] = int( calendar.timegm( datetime.datetime( result[i]['_id']['year'], result[i]['_id']['month'], result[i]['_id']['day'], result[i]['_id']['hour']).timetuple()) * 1000) result[i]['midline'] = midline[i] del result[i]['_id'] return result else: list_result = [] for i in range(len(result)): list_result.append([ int( calendar.timegm( datetime.datetime( result[i]['_id']['year'], result[i]['_id']['month'], result[i]['_id']['day'], result[i]['_id']['hour']).timetuple()) * 1000), result[i]['open'], result[i]['high'], result[i]['low'], result[i]['close'], result[i]['vol'], result[i]['count'], midline[i] ]) return list_result