Пример #1
0
    def insert_many(self, entries):
        """
        Insert an iterable of documents.
        In the event that a BulkWriteError occurs (using an azure server where the throughput (RU/s) is exceeded),
        this method will attempt to continue in a slower batched mode.
        :param entries: iterable of documents
        :return: pymongo.results.InsertManyResult
        """
        result = InsertManyResult((), False)
        collection = self.get_collection()
        if collection is not None:
            initial_num_docs = collection.count_documents({})
            try:
                result = collection.insert_many(entries)
            except BulkWriteError as bwe:
                logging.warning(f'BulkWriteError: {bwe.details}')

                write_err = bwe.details['writeErrors'][0]
                err_index = write_err['index']
                err_code = write_err['code']
                # there is a RetryAfterMs value in errmsg but skip it for now and use a big value
                # err_msg = write_err['errmsg']
                sleep(0.5)  # wait 500ms

                if err_code == 16500 and 'mongo.cosmos.azure' in self['server']:
                    # looks like an azure server is being used and the number of requests exceeded capacity
                    # lets try it in batches
                    batch_size = int(err_index * 0.25)
                    logging.info(f'Attempting to continue in batches of {batch_size}')

                    try:
                        inserted_ids = [None] * err_index   # can't get ObjectIds of uploaded before BulkWriteError
                        count = err_index
                        pause = 0.25     # wait 250ms between batches
                        for idx in range(err_index, len(entries), batch_size):
                            result = collection.insert_many(entries[idx:idx + batch_size])
                            inserted_ids.append(result.inserted_ids)
                            count += len(result.inserted_ids)
                            estimate = int(((len(entries)-idx)/batch_size) * pause)
                            logging.info(f'Uploaded {count} of {len(entries)}, ETC {str(timedelta(seconds=estimate))}')
                            sleep(pause)

                        num_docs = collection.count_documents({}) - initial_num_docs
                        if num_docs != len(entries):
                            raise ValueError(f'Batch inserted document count {num_docs} '
                                             f'does not match document size of document collection {len(entries)}')
                        else:
                            result = InsertManyResult(inserted_ids, result.acknowledged)
                    except BulkWriteError as bweb:
                        logging.warning(f'BulkWriteError: {bweb.details}')
                        raise
                else:
                    raise
        return result
Пример #2
0
    def insert_many(self, documents, ordered=True, **kwargs):
        """Insert an iterable of documents into collection

        :param documents:
            An iterable of documents to insert (``list``,
            ``tuple``, ...)

        :param ordered:
            If ``True`` (the default) documents will be inserted on the server
            serially, in the order provided. If an error occurs, all remaining
            inserts are aborted. If ``False``, documents will be inserted on
            the server in arbitrary order, possibly in parallel, and all
            document inserts will be attempted.

        :returns:
            :class:`Deferred` that called back with
            :class:`pymongo.results.InsertManyResult`
        """
        inserted_ids = []
        for doc in documents:
            if isinstance(doc, collections.Mapping):
                inserted_ids.append(doc.setdefault("_id", ObjectId()))
            else:
                raise TypeError(
                    "TxMongo: insert_many takes list of documents.")

        bulk = _Bulk(self, ordered, bypass_document_validation=False)
        bulk.ops = [(_INSERT, doc) for doc in documents]
        yield self._execute_bulk(bulk)
        defer.returnValue(
            InsertManyResult(inserted_ids, self.write_concern.acknowledged))
 def test_format_calledWithValidOperationResult_returnCorrectResult(self):
     expected = '{\n\t"acknowledged" : ' + 'true' + ',\n\t"insertedIds" : [\n\t\t' + 'ObjectId("4d128b6ea794fc13a8000002"),\n\t\tObjectId("4d128b6ea794fc13a8000003")\n\t]' + '\n}'
     actual = self.sut.format(
         InsertManyResult([
             ObjectId("4d128b6ea794fc13a8000002"),
             ObjectId("4d128b6ea794fc13a8000003")
         ], True))
     self.assertEqual(actual, expected)
Пример #4
0
def cart_remove(item_name: str) -> InsertManyResult:
    """ Remove an item from a shopping cart """
    LOGGER.debug('removing item from shopping cart: {}'.format(item_name))
    db = connect_to_db() # pylint: disable=invalid-name
    result = InsertManyResult(None, False)
    try:
        item = db.cart.find_one_and_delete({'name': item_name})
        if item:
            result = items_add([item])
    except Exception as exception:
        # TODO: use specifc exceptions. Create a custom error exception
        LOGGER.error(exception)
        raise Exception('There was a DB problem')
    return result
Пример #5
0
    def insert_many(self, documents, ordered=True):

        if not isinstance(documents, abc.Iterable) or not documents:
            raise TypeError("documents must be a non-empty list")
        inserted_ids = []

        def gen():
            """A generator that validates documents and handles _ids."""
            for document in documents:
                common.validate_is_document_type("document", document)
                if not isinstance(document, RawBSONDocument):
                    if "_id" not in document:
                        document["_id"] = ObjectId()
                    inserted_ids.append(document["_id"])
                yield (message._INSERT, document)

        return InsertManyResult(inserted_ids)
Пример #6
0
 def insert_many(self, documents, ordered=True, **kwargs):
     inserted_ids = yield self._insert_one_or_many(documents, ordered, **kwargs)
     defer.returnValue(InsertManyResult(inserted_ids, self.write_concern.acknowledged))
Пример #7
0
            "nUpserted": 0,
            "nMatched": 0,
            "nModified": 0,
            "nRemoved": 0,
            "upserted": [],
        },
        f"Duplicated documents: 1",
        f"Non-duplicated documents count: 0",
    )
]


SAVE_PARAMS = ("data", "result", "expected_result")
SAVE_TEST_CASES = [
    (
        [
            {
                "id": "opsmatters_uk.ThelatestupdateforBroadcomincludesDXNetOps202Netwo.opentracing"
            }
        ],
        InsertManyResult(
            [
                "opsmatters_uk.ThelatestupdateforBroadcomincludesDXNetOps202Netwo.opentracing"
            ],
            True,
        ),
        1,
    ),
    ([], InsertManyResult([], True), 0),
]
Пример #8
0
 def insert_many(self, documents, ordered=True, session=None):
     result = self.__insert(documents)
     return InsertManyResult(result, True)
Пример #9
0
    def insert_many(self, documents, ordered=True, **kwargs):
        inserted_ids = []
        for doc in documents:
            if isinstance(doc, collections.Mapping):
                inserted_ids.append(doc.setdefault("_id", ObjectId()))
            else:
                raise TypeError("TxMongo: insert_many takes list of documents.")

        cmd_collname = str(self._database["$cmd"])
        proto = yield self._database.connection.getprotocol()

        error = {
            "nInserted": 0,
            "writeErrors": [],
            "writeConcernErrors": []
        }

        def accumulate_response(reply):
            response = reply.documents[0].decode()
            error["nInserted"] += response.get('n', 0)
            error["writeErrors"].extend(response.get("writeErrors", []))
            if "writeConcernError" in response:
                error["writeConcernErrors"].append(response["writeConcernError"])

        def has_errors():
            return error["writeErrors"] or error["writeConcernErrors"]

        def raise_error():
            error["writeErrors"].sort(key=lambda error: error["index"])
            for write_error in error["writeErrors"]:
                write_error[u"op"] = documents[write_error["index"]]
            raise BulkWriteError(error)

        # There are four major cases with different behavior of insert_many:
        #   * Unack, Unordered:  sending all batches and not handling responses at all
        #                        so ignoring any errors
        #
        #   * Ack, Unordered:    sending all batches, accumulating all responses and
        #                        returning aggregated response
        #
        #   * Unack, Ordered:    handling DB responses despite unacknowledged write_concern
        #                        because we must stop on first error (not raising it though)
        #
        #   * Ack, Ordered:      stopping on first error and raising BulkWriteError

        actual_write_concern = self.write_concern
        if ordered and self.write_concern.acknowledged is False:
            actual_write_concern = WriteConcern(w=1)

        batches = self._generate_insert_many_batches(self._collection_name, documents, ordered,
                                                     actual_write_concern, proto.max_bson_size,
                                                     proto.max_write_batch_size)

        all_responses = []
        for batch in batches:
            batch_result = proto.send_QUERY(Query(collection=cmd_collname, query=batch))
            if self.write_concern.acknowledged or ordered:
                batch_result.addCallback(accumulate_response)
                if ordered:
                    yield batch_result
                    if has_errors():
                        if self.write_concern.acknowledged:
                            raise_error()
                        else:
                            break
                else:
                    all_responses.append(batch_result)

        if self.write_concern.acknowledged and not ordered:
            yield defer.DeferredList(all_responses)
            if has_errors():
                raise_error()

        defer.returnValue(InsertManyResult(inserted_ids, self.write_concern.acknowledged))