def execute(self, write_concern): """Execute operations. """ if not self.ops: raise InvalidOperation('No operations to execute') if self.executed: raise InvalidOperation('Bulk operations can ' 'only be executed once.') self.executed = True write_concern = (WriteConcern(**write_concern) if write_concern else self.collection.write_concern) if self.ordered: generator = self.gen_ordered() else: generator = self.gen_unordered() client = self.collection.database.client with client._socket_for_writes() as sock_info: if sock_info.max_wire_version < 5 and self.uses_collation: raise ConfigurationError( 'Must be connected to MongoDB 3.4+ to use a collation.') if not write_concern.acknowledged: if self.uses_collation: raise ConfigurationError( 'Collation is unsupported for unacknowledged writes.') self.execute_no_results(sock_info, generator) elif sock_info.max_wire_version > 1: return self.execute_command(sock_info, generator, write_concern) else: return self.execute_legacy(sock_info, generator, write_concern)
def _raise_if_unacknowledged(self, property_name): """Raise an exception on property access if unacknowledged.""" if not self.__acknowledged: raise InvalidOperation("A value for %s is not available when " "the write is unacknowledged. Check the " "acknowledged attribute to avoid this " "error." % (property_name, ))
def add_option(self, mask): """Set arbitrary query flags using a bitmask. To set the tailable flag: cursor.add_option(2) """ if not isinstance(mask, int): raise TypeError("mask must be an int") self.__check_okay_to_chain() if mask & _QUERY_OPTIONS["exhaust"]: if self.__limit: raise InvalidOperation("Can't use limit and exhaust together.") if self.__collection.database.client.is_mongos: raise InvalidOperation('Exhaust cursors are ' 'not supported by mongos') self.__exhaust = True self.__query_flags |= mask return self
def insert(collection_name, docs, check_keys, safe, last_error_args, continue_on_error, opts): """Get an **insert** message.""" options = 0 if continue_on_error: options += 1 data = struct.pack("<i", options) data += bson._make_c_string(collection_name) encoded = [bson.BSON.encode(doc, check_keys, opts) for doc in docs] if not encoded: raise InvalidOperation("cannot do an empty bulk insert") max_bson_size = max(map(len, encoded)) data += _EMPTY.join(encoded) if safe: (_, insert_message) = __pack_message(2002, data) (request_id, error_message, _) = __last_error(collection_name, last_error_args) return (request_id, insert_message + error_message, max_bson_size) else: (request_id, insert_message) = __pack_message(2002, data) return (request_id, insert_message, max_bson_size)
def limit(self, limit): """Limits the number of results to be returned by this cursor. Raises :exc:`TypeError` if `limit` is not an integer. Raises :exc:`~pymongo.errors.InvalidOperation` if this :class:`Cursor` has already been used. The last `limit` applied to this cursor takes precedence. A limit of ``0`` is equivalent to no limit. :Parameters: - `limit`: the number of results to return .. mongodoc:: limit """ if not isinstance(limit, integer_types): raise TypeError("limit must be an integer") if self.__exhaust: raise InvalidOperation("Can't use limit and exhaust together.") self.__check_okay_to_chain() self.__empty = False self.__limit = limit return self
def __check_okay_to_chain(self): """Check if it is okay to chain more options onto this cursor. """ if self.__retrieved or self.__id is not None: raise InvalidOperation("cannot set options after executing query")
def __init__(self, collection, filter=None, projection=None, skip=0, limit=0, no_cursor_timeout=False, cursor_type=CursorType.NON_TAILABLE, sort=None, allow_partial_results=False, oplog_replay=False, modifiers=None, batch_size=0, manipulate=True, collation=None, hint=None, max_scan=None, max_time_ms=None, max=None, min=None, return_key=False, show_record_id=False, snapshot=False, comment=None): """Create a new cursor. Should not be called directly by application developers - see :meth:`~pymongo.collection.Collection.find` instead. .. mongodoc:: cursors """ self.__id = None self.__exhaust = False self.__exhaust_mgr = None spec = filter if spec is None: spec = {} validate_is_mapping("filter", spec) if not isinstance(skip, int): raise TypeError("skip must be an instance of int") if not isinstance(limit, int): raise TypeError("limit must be an instance of int") validate_boolean("no_cursor_timeout", no_cursor_timeout) if cursor_type not in (CursorType.NON_TAILABLE, CursorType.TAILABLE, CursorType.TAILABLE_AWAIT, CursorType.EXHAUST): raise ValueError("not a valid value for cursor_type") validate_boolean("allow_partial_results", allow_partial_results) validate_boolean("oplog_replay", oplog_replay) if modifiers is not None: warnings.warn("the 'modifiers' parameter is deprecated", DeprecationWarning, stacklevel=2) validate_is_mapping("modifiers", modifiers) if not isinstance(batch_size, integer_types): raise TypeError("batch_size must be an integer") if batch_size < 0: raise ValueError("batch_size must be >= 0") if projection is not None: if not projection: projection = {"_id": 1} projection = helpers._fields_list_to_dict(projection, "projection") self.__collection = collection self.__spec = spec self.__projection = projection self.__skip = skip self.__limit = limit self.__batch_size = batch_size self.__modifiers = modifiers and modifiers.copy() or {} self.__ordering = sort and helpers._index_document(sort) or None self.__max_scan = max_scan self.__explain = False self.__comment = comment self.__max_time_ms = max_time_ms self.__max_await_time_ms = None self.__max = max self.__min = min self.__manipulate = manipulate self.__collation = validate_collation_or_none(collation) self.__return_key = return_key self.__show_record_id = show_record_id self.__snapshot = snapshot self.__set_hint(hint) # Exhaust cursor support if cursor_type == CursorType.EXHAUST: if self.__collection.database.client.is_mongos: raise InvalidOperation('Exhaust cursors are ' 'not supported by mongos') if limit: raise InvalidOperation("Can't use limit and exhaust together.") self.__exhaust = True # This is ugly. People want to be able to do cursor[5:5] and # get an empty result set (old behavior was an # exception). It's hard to do that right, though, because the # server uses limit(0) to mean 'no limit'. So we set __empty # in that case and check for it when iterating. We also unset # it anytime we change __limit. self.__empty = False self.__data = deque() self.__address = None self.__retrieved = 0 self.__killed = False self.__codec_options = collection.codec_options self.__read_preference = collection.read_preference self.__read_concern = collection.read_concern self.__query_flags = cursor_type if self.__read_preference != ReadPreference.PRIMARY: self.__query_flags |= _QUERY_OPTIONS["slave_okay"] if no_cursor_timeout: self.__query_flags |= _QUERY_OPTIONS["no_timeout"] if allow_partial_results: self.__query_flags |= _QUERY_OPTIONS["partial"] if oplog_replay: self.__query_flags |= _QUERY_OPTIONS["oplog_replay"]
def _do_batched_write_command(namespace, operation, command, docs, check_keys, opts, ctx): """Execute a batch of insert, update, or delete commands. """ max_bson_size = ctx.max_bson_size max_write_batch_size = ctx.max_write_batch_size # Max BSON object size + 16k - 2 bytes for ending NUL bytes. # Server guarantees there is enough room: SERVER-10643. max_cmd_size = max_bson_size + _COMMAND_OVERHEAD ordered = command.get('ordered', True) buf = StringIO() # Save space for message length and request id buf.write(_ZERO_64) # responseTo, opCode buf.write(b"\x00\x00\x00\x00\xd4\x07\x00\x00") # No options buf.write(_ZERO_32) # Namespace as C string buf.write(b(namespace)) buf.write(_ZERO_8) # Skip: 0, Limit: -1 buf.write(_SKIPLIM) # Where to write command document length command_start = buf.tell() buf.write(bson.BSON.encode(command)) # Start of payload buf.seek(-1, 2) # Work around some Jython weirdness. buf.truncate() try: buf.write(_OP_MAP[operation]) except KeyError: raise InvalidOperation('Unknown command') if operation in (_UPDATE, _DELETE): check_keys = False # Where to write list document length list_start = buf.tell() - 4 to_send = [] def send_message(): """Finalize and send the current OP_QUERY message. """ # Close list and command documents buf.write(_ZERO_16) # Write document lengths and request id length = buf.tell() buf.seek(list_start) buf.write(struct.pack('<i', length - list_start - 1)) buf.seek(command_start) buf.write(struct.pack('<i', length - command_start)) buf.seek(4) request_id = _randint() buf.write(struct.pack('<i', request_id)) buf.seek(0) buf.write(struct.pack('<i', length)) return ctx.write_command(request_id, buf.getvalue(), to_send) # If there are multiple batches we'll # merge results in the caller. results = [] idx = 0 idx_offset = 0 has_docs = False for doc in docs: has_docs = True # Encode the current operation key = b(str(idx)) value = bson.BSON.encode(doc, check_keys, opts) # Send a batch? enough_data = (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size enough_documents = (idx >= max_write_batch_size) if enough_data or enough_documents: if not idx: write_op = "insert" if operation == _INSERT else None _raise_document_too_large( write_op, len(value), max_bson_size) result = send_message() results.append((idx_offset, result)) if ordered and "writeErrors" in result: return results # Truncate back to the start of list elements buf.seek(list_start + 4) buf.truncate() idx_offset += idx idx = 0 key = b'0' to_send = [] buf.write(_BSONOBJ) buf.write(key) buf.write(_ZERO_8) buf.write(value) to_send.append(doc) idx += 1 if not has_docs: raise InvalidOperation("cannot do an empty bulk write") results.append((idx_offset, send_message())) return results
def _do_batched_insert(collection_name, docs, check_keys, safe, last_error_args, continue_on_error, opts, ctx): """Insert `docs` using multiple batches. """ def _insert_message(insert_message, send_safe): """Build the insert message with header and GLE. """ request_id, final_message = __pack_message(2002, insert_message) if send_safe: request_id, error_message, _ = __last_error(collection_name, last_error_args) final_message += error_message return request_id, final_message send_safe = safe or not continue_on_error last_error = None data = StringIO() data.write(struct.pack("<i", int(continue_on_error))) data.write(bson._make_c_string(collection_name)) message_length = begin_loc = data.tell() has_docs = False to_send = [] for doc in docs: encoded = bson.BSON.encode(doc, check_keys, opts) encoded_length = len(encoded) too_large = (encoded_length > ctx.max_bson_size) message_length += encoded_length if message_length < ctx.max_message_size and not too_large: data.write(encoded) to_send.append(doc) has_docs = True continue if has_docs: # We have enough data, send this message. try: request_id, msg = _insert_message(data.getvalue(), send_safe) ctx.legacy_write(request_id, msg, 0, send_safe, to_send) # Exception type could be OperationFailure or a subtype # (e.g. DuplicateKeyError) except OperationFailure as exc: # Like it says, continue on error... if continue_on_error: # Store exception details to re-raise after the final batch. last_error = exc # With unacknowledged writes just return at the first error. elif not safe: return # With acknowledged writes raise immediately. else: raise if too_large: _raise_document_too_large( "insert", encoded_length, ctx.max_bson_size) message_length = begin_loc + encoded_length data.seek(begin_loc) data.truncate() data.write(encoded) to_send = [doc] if not has_docs: raise InvalidOperation("cannot do an empty bulk insert") request_id, msg = _insert_message(data.getvalue(), safe) ctx.legacy_write(request_id, msg, 0, safe, to_send) # Re-raise any exception stored due to continue_on_error if last_error is not None: raise last_error