def test_write_oplog_progress(self):
        """Test write_oplog_progress under several circumstances
        """
        os.system('touch %s' % (TEMP_CONFIG))
        config_file_path = TEMP_CONFIG
        conn = Connector(MAIN_ADDRESS, config_file_path, None, ['test.test'],
                      '_id', None, None)

        #test that None is returned if there is no config file specified.
        self.assertEqual(conn.write_oplog_progress(), None)

        conn.oplog_progress.get_dict()[1] = Timestamp(12, 34)
        #pretend to insert a thread/timestamp pair
        conn.write_oplog_progress()

        data = json.load(open(config_file_path, 'r'))
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(12, 34))

        #ensure the temp file was deleted
        self.assertFalse(os.path.exists(config_file_path + '~'))

        #ensure that updates work properly
        conn.oplog_progress.get_dict()[1] = Timestamp(44, 22)
        conn.write_oplog_progress()

        config_file = open(config_file_path, 'r')
        data = json.load(config_file)
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(44, 22))

        os.system('rm ' + config_file_path)
        config_file.close()
    def test_write_oplog_progress(self):
        """Test write_oplog_progress under several circumstances
        """
        os.system('touch %s' % (TEMP_CONFIG))
        config_file_path = TEMP_CONFIG
        conn = Connector(MAIN_ADDRESS, config_file_path, None, ['test.test'],
                         '_id', None, None)

        #test that None is returned if there is no config file specified.
        self.assertEqual(conn.write_oplog_progress(), None)

        conn.oplog_progress.get_dict()[1] = Timestamp(12, 34)
        #pretend to insert a thread/timestamp pair
        conn.write_oplog_progress()

        data = json.load(open(config_file_path, 'r'))
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(12, 34))

        #ensure the temp file was deleted
        self.assertFalse(os.path.exists(config_file_path + '~'))

        #ensure that updates work properly
        conn.oplog_progress.get_dict()[1] = Timestamp(44, 22)
        conn.write_oplog_progress()

        config_file = open(config_file_path, 'r')
        data = json.load(config_file)
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(44, 22))

        os.system('rm ' + config_file_path)
        config_file.close()
示例#3
0
    def read_oplog_progress(self):
        """Reads oplog progress from file provided by user.
        This method is only called once before any threads are spanwed.
        """

        if self.oplog_checkpoint is None:
            return None

        # Check for empty file
        try:
            if os.stat(self.oplog_checkpoint).st_size == 0:
                logging.info("MongoConnector: Empty oplog progress file.")
                return None
        except OSError:
            return None

        source = open(self.oplog_checkpoint, 'r')
        try:
            data = json.load(source)
        except ValueError:  # empty file
            reason = "It may be empty or corrupt."
            logging.info("MongoConnector: Can't read oplog progress file. %s" %
                         (reason))
            source.close()
            return None

        source.close()

        count = 0
        oplog_dict = self.oplog_progress.get_dict()
        for count in range(0, len(data), 2):
            oplog_str = data[count]
            time_stamp = data[count + 1]
            oplog_dict[oplog_str] = util.long_to_bson_ts(time_stamp)
    def read_oplog_progress(self):
        """Reads oplog progress from file provided by user.
        This method is only called once before any threads are spanwed.
        """

        if self.oplog_checkpoint is None:
            return None

        # Check for empty file
        try:
            if os.stat(self.oplog_checkpoint).st_size == 0:
                logging.info("MongoConnector: Empty oplog progress file.")
                return None
        except OSError:
            return None

        source = open(self.oplog_checkpoint, 'r')
        try:
            data = json.load(source)
        except ValueError:       # empty file
            reason = "It may be empty or corrupt."
            logging.info("MongoConnector: Can't read oplog progress file. %s" %
                         (reason))
            source.close()
            return None

        source.close()

        count = 0
        oplog_dict = self.oplog_progress.get_dict()
        for count in range(0, len(data), 2):
            oplog_str = data[count]
            time_stamp = data[count + 1]
            oplog_dict[oplog_str] = util.long_to_bson_ts(time_stamp)
示例#5
0
    def test_dump_collection(self):
        """Test dump_collection in oplog_manager. Assertion failure if it
            doesn't pass
        """

        test_oplog, primary_conn, search_ts = self.get_oplog_thread()

        #with documents
        primary_conn['test']['test'].insert({'name': 'paulie'})
        search_ts = test_oplog.get_last_oplog_timestamp()
        test_oplog.dump_collection()

        doc_manager = test_oplog.doc_managers[0]
        doc_manager.commit()
        solr_results = doc_manager._search()
        self.assertEqual(len(solr_results), 1)
        solr_doc = solr_results[0]
        self.assertEqual(long_to_bson_ts(solr_doc['_ts']), search_ts)
        self.assertEqual(solr_doc['name'], 'paulie')
        self.assertEqual(solr_doc['ns'], 'test.test')

        # test multiple targets
        doc_managers = [DocManager(), DocManager(), DocManager()]
        test_oplog.doc_managers = doc_managers
        primary_conn["test"]["test"].remove()
        for i in range(1000):
            primary_conn["test"]["test"].insert({"i": i})
        test_oplog.dump_collection()
        for dm in doc_managers:
            self.assertEqual(len(dm._search()), 1000)
    def test_dump_collection(self):
        """Test dump_collection in oplog_manager. Assertion failure if it
            doesn't pass
        """

        test_oplog, primary_conn, search_ts = self.get_oplog_thread()

        #with documents
        primary_conn['test']['test'].insert({'name': 'paulie'})
        search_ts = test_oplog.get_last_oplog_timestamp()
        test_oplog.dump_collection()

        doc_manager = test_oplog.doc_managers[0]
        doc_manager.commit()
        solr_results = doc_manager._search()
        self.assertEqual(len(solr_results), 1)
        solr_doc = solr_results[0]
        self.assertEqual(long_to_bson_ts(solr_doc['_ts']), search_ts)
        self.assertEqual(solr_doc['name'], 'paulie')
        self.assertEqual(solr_doc['ns'], 'test.test')

        # test multiple targets
        doc_managers = [DocManager(), DocManager(), DocManager()]
        test_oplog.doc_managers = doc_managers
        primary_conn["test"]["test"].remove()
        for i in range(1000):
            primary_conn["test"]["test"].insert({"i": i})
        test_oplog.dump_collection()
        for dm in doc_managers:
            self.assertEqual(len(dm._search()), 1000)
示例#7
0
    def test_bson_ts_to_long(self):
        """Test bson_ts_to_long and long_to_bson_ts
        """

        tstamp = timestamp.Timestamp(0x12345678, 0x90abcdef)

        self.assertEqual(0x1234567890abcdef, bson_ts_to_long(tstamp))
        self.assertEqual(long_to_bson_ts(0x1234567890abcdef), tstamp)
示例#8
0
    def test_bson_ts_to_long(self):
        """Test bson_ts_to_long and long_to_bson_ts
        """

        tstamp = timestamp.Timestamp(0x12345678, 0x90ABCDEF)

        self.assertEqual(0x1234567890ABCDEF, bson_ts_to_long(tstamp))
        self.assertEqual(long_to_bson_ts(0x1234567890ABCDEF), tstamp)
    def test_write_oplog_progress(self):
        """Test write_oplog_progress under several circumstances
        """
        try:
            os.unlink("temp_config.txt")
        except OSError:
            pass
        open("temp_config.txt", "w").close()
        conn = Connector(
            address='%s:%d' % (mongo_host, self.primary_p),
            oplog_checkpoint="temp_config.txt",
            target_url=None,
            ns_set=['test.test'],
            u_key='_id',
            auth_key=None
        )

        #test that None is returned if there is no config file specified.
        self.assertEqual(conn.write_oplog_progress(), None)

        conn.oplog_progress.get_dict()[1] = Timestamp(12, 34)
        #pretend to insert a thread/timestamp pair
        conn.write_oplog_progress()

        data = json.load(open("temp_config.txt", 'r'))
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(12, 34))

        #ensure the temp file was deleted
        self.assertFalse(os.path.exists("temp_config.txt" + '~'))

        #ensure that updates work properly
        conn.oplog_progress.get_dict()[1] = Timestamp(44, 22)
        conn.write_oplog_progress()

        config_file = open("temp_config.txt", 'r')
        data = json.load(config_file)
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(44, 22))

        config_file.close()
        os.unlink("temp_config.txt")
示例#10
0
    def read_oplog_progress(self):
        """Reads oplog progress from file provided by user.
        This method is only called once before any threads are spanwed.
        """
        min_value = None

        for dm in self.doc_managers:
            value = dm.get_last_doc(self.ns_set or None)
            if value and (not min_value or value['_ts'] < min_value):
                min_value = value['_ts']

        return util.long_to_bson_ts(min_value) if min_value is not None else None
    def test_write_oplog_progress(self):
        """Test write_oplog_progress under several circumstances
        """
        try:
            os.unlink("temp_config.txt")
        except OSError:
            pass
        open("temp_config.txt", "w").close()
        conn = Connector(address='%s:%d' % (mongo_host, self.primary_p),
                         oplog_checkpoint="temp_config.txt",
                         target_url=None,
                         ns_set=['test.test'],
                         u_key='_id',
                         auth_key=None)

        #test that None is returned if there is no config file specified.
        self.assertEqual(conn.write_oplog_progress(), None)

        conn.oplog_progress.get_dict()[1] = Timestamp(12, 34)
        #pretend to insert a thread/timestamp pair
        conn.write_oplog_progress()

        data = json.load(open("temp_config.txt", 'r'))
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(12, 34))

        #ensure the temp file was deleted
        self.assertFalse(os.path.exists("temp_config.txt" + '~'))

        #ensure that updates work properly
        conn.oplog_progress.get_dict()[1] = Timestamp(44, 22)
        conn.write_oplog_progress()

        config_file = open("temp_config.txt", 'r')
        data = json.load(config_file)
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(44, 22))

        config_file.close()
        os.unlink("temp_config.txt")
    def test_write_oplog_progress(self):
        """Test write_oplog_progress under several circumstances
        """
        try:
            os.unlink("temp_oplog.timestamp")
        except OSError:
            pass
        open("temp_oplog.timestamp", "w").close()
        conn = Connector(
            mongo_address=self.repl_set.uri,
            oplog_checkpoint="temp_oplog.timestamp",
            **connector_opts
        )

        # test that None is returned if there is no config file specified.
        self.assertEqual(conn.write_oplog_progress(), None)

        conn.oplog_progress.get_dict()[1] = Timestamp(12, 34)
        # pretend to insert a thread/timestamp pair
        conn.write_oplog_progress()

        data = json.load(open("temp_oplog.timestamp", "r"))
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(12, 34))

        # ensure the temp file was deleted
        self.assertFalse(os.path.exists("temp_oplog.timestamp" + "~"))

        # ensure that updates work properly
        conn.oplog_progress.get_dict()[1] = Timestamp(44, 22)
        conn.write_oplog_progress()

        config_file = open("temp_oplog.timestamp", "r")
        data = json.load(config_file)
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(44, 22))

        config_file.close()
        os.unlink("temp_oplog.timestamp")
    def test_write_oplog_progress(self):
        """Test write_oplog_progress under several circumstances
        """
        try:
            os.unlink("temp_oplog.timestamp")
        except OSError:
            pass
        open("temp_oplog.timestamp", "w").close()
        conn = Connector(
            mongo_address=self.repl_set.uri,
            oplog_checkpoint="temp_oplog.timestamp",
            **connector_opts
        )

        # test that None is returned if there is no config file specified.
        self.assertEqual(conn.write_oplog_progress(), None)

        conn.oplog_progress.get_dict()[1] = Timestamp(12, 34)
        # pretend to insert a thread/timestamp pair
        conn.write_oplog_progress()

        data = json.load(open("temp_oplog.timestamp", 'r'))
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(12, 34))

        # ensure the temp file was deleted
        self.assertFalse(os.path.exists("temp_oplog.timestamp" + '~'))

        # ensure that updates work properly
        conn.oplog_progress.get_dict()[1] = Timestamp(44, 22)
        conn.write_oplog_progress()

        config_file = open("temp_oplog.timestamp", 'r')
        data = json.load(config_file)
        self.assertEqual(1, int(data[0]))
        self.assertEqual(long_to_bson_ts(int(data[1])), Timestamp(44, 22))

        config_file.close()
        os.unlink("temp_oplog.timestamp")
示例#14
0
    def read_oplog_progress(self):
        """Reads oplog progress from file provided by user.
        This method is only called once before any threads are spanwed.
        """

        if self.oplog_checkpoint is None:
            return None

        # Check for empty file
        try:
            if os.stat(self.oplog_checkpoint).st_size == 0:
                LOG.info("MongoConnector: Empty oplog progress file.")
                return None
        except OSError:
            return None

        with open(self.oplog_checkpoint, 'r') as progress_file:
            try:
                data = json.load(progress_file)
            except ValueError:
                LOG.exception(
                    'Cannot read oplog progress file "%s". '
                    'It may be corrupt after Mongo Connector was shut down'
                    'uncleanly. You can try to recover from a backup file '
                    '(may be called "%s.backup") or create a new progress file '
                    'starting at the current moment in time by running '
                    'mongo-connector --no-dump <other options>. '
                    'You may also be trying to read an oplog progress file '
                    'created with the old format for sharded clusters. '
                    'See https://github.com/10gen-labs/mongo-connector/wiki'
                    '/Oplog-Progress-File for complete documentation.'
                    % (self.oplog_checkpoint, self.oplog_checkpoint))
                return
            # data format:
            # [name, timestamp] = replica set
            # [[name, timestamp], [name, timestamp], ...] = sharded cluster
            if not isinstance(data[0], list):
                data = [data]
            with self.oplog_progress:
                self.oplog_progress.dict = dict(
                    (name, util.long_to_bson_ts(timestamp))
                    for name, timestamp in data)
示例#15
0
    def read_oplog_progress(self):
        """Reads oplog progress from file provided by user.
        This method is only called once before any threads are spanwed.
        """

        if self.oplog_checkpoint is None:
            return None

        # Check for empty file
        try:
            if os.stat(self.oplog_checkpoint).st_size == 0:
                LOG.info("MongoConnector: Empty oplog progress file.")
                return None
        except OSError:
            return None

        with open(self.oplog_checkpoint, 'r') as progress_file:
            try:
                data = json.load(progress_file)
            except ValueError:
                LOG.exception(
                    'Cannot read oplog progress file "%s". '
                    'It may be corrupt after Mongo Connector was shut down'
                    'uncleanly. You can try to recover from a backup file '
                    '(may be called "%s.backup") or create a new progress file '
                    'starting at the current moment in time by running '
                    'mongo-connector --no-dump <other options>. '
                    'You may also be trying to read an oplog progress file '
                    'created with the old format for sharded clusters. '
                    'See https://github.com/10gen-labs/mongo-connector/wiki'
                    '/Oplog-Progress-File for complete documentation.'
                    % (self.oplog_checkpoint, self.oplog_checkpoint))
                return
            # data format:
            # [name, timestamp] = replica set
            # [[name, timestamp], [name, timestamp], ...] = sharded cluster
            if not isinstance(data[0], list):
                data = [data]
            with self.oplog_progress:
                self.oplog_progress.dict = dict(
                    (name, util.long_to_bson_ts(timestamp))
                    for name, timestamp in data)
    def test_dump_collection(self):
        """Test dump_collection in oplog_manager. Assertion failure if it
            doesn't pass
        """

        test_oplog, primary_conn, search_ts = self.get_oplog_thread()
        solr = DocManager()
        test_oplog.doc_manager = solr

        # with documents
        primary_conn["test"]["test"].insert({"name": "paulie"})
        search_ts = test_oplog.get_last_oplog_timestamp()
        test_oplog.dump_collection()

        test_oplog.doc_manager.commit()
        solr_results = solr._search()
        self.assertEqual(len(solr_results), 1)
        solr_doc = solr_results[0]
        self.assertEqual(long_to_bson_ts(solr_doc["_ts"]), search_ts)
        self.assertEqual(solr_doc["name"], "paulie")
        self.assertEqual(solr_doc["ns"], "test.test")
示例#17
0
    def test_dump_collection(self):
        """Test dump_collection in oplog_manager.

        Assertion failure if it doesn't pass
        """

        test_oplog, search_ts, solr, mongos = self.get_oplog_thread()

        # with documents
        safe_mongo_op(mongos['alpha']['foo'].insert, {'name': 'paulie'})
        search_ts = test_oplog.get_last_oplog_timestamp()
        test_oplog.dump_collection()

        docman = test_oplog.doc_managers[0]
        docman.commit()
        solr_results = docman._search()
        assert (len(solr_results) == 1)
        solr_doc = solr_results[0]
        assert (long_to_bson_ts(solr_doc['_ts']) == search_ts)
        assert (solr_doc['name'] == 'paulie')
        assert (solr_doc['ns'] == 'alpha.foo')
    def test_dump_collection(self):
        """Test dump_collection in oplog_manager. Assertion failure if it
            doesn't pass
        """

        test_oplog, primary_conn, search_ts = self.get_oplog_thread()
        solr = DocManager()
        test_oplog.doc_manager = solr

        #with documents
        primary_conn['test']['test'].insert({'name': 'paulie'})
        search_ts = test_oplog.get_last_oplog_timestamp()
        test_oplog.dump_collection()

        test_oplog.doc_manager.commit()
        solr_results = solr._search()
        self.assertEqual(len(solr_results), 1)
        solr_doc = solr_results[0]
        self.assertEqual(long_to_bson_ts(solr_doc['_ts']), search_ts)
        self.assertEqual(solr_doc['name'], 'paulie')
        self.assertEqual(solr_doc['ns'], 'test.test')
示例#19
0
    def rollback(self):
        """Rollback target system to consistent state.

        The strategy is to find the latest timestamp in the target system and
        the largest timestamp in the oplog less than the latest target system
        timestamp. This defines the rollback window and we just roll these
        back until the oplog and target system are in consistent states.
        """
        # Find the most recently inserted document in each target system
        LOG.debug(
            "OplogThread: Initiating rollback sequence to bring "
            "system into a consistent state."
        )
        last_docs = []
        for dm in self.doc_managers:
            dm.commit()
            last_docs.append(dm.get_last_doc())

        # Of these documents, which is the most recent?
        last_inserted_doc = max(
            last_docs, key=lambda x: x["_ts"] if x else float("-inf")
        )

        # Nothing has been replicated. No need to rollback target systems
        if last_inserted_doc is None:
            return None

        # Find the oplog entry that touched the most recent document.
        # We'll use this to figure where to pick up the oplog later.
        target_ts = util.long_to_bson_ts(last_inserted_doc["_ts"])
        last_oplog_entry = util.retry_until_ok(
            self.oplog.find_one,
            {"ts": {"$lte": target_ts}, "op": {"$ne": "n"}},
            sort=[("$natural", pymongo.DESCENDING)],
        )

        LOG.debug("OplogThread: last oplog entry is %s" % str(last_oplog_entry))

        # The oplog entry for the most recent document doesn't exist anymore.
        # If we've fallen behind in the oplog, this will be caught later
        if last_oplog_entry is None:
            return None

        # rollback_cutoff_ts happened *before* the rollback
        rollback_cutoff_ts = last_oplog_entry["ts"]
        start_ts = util.bson_ts_to_long(rollback_cutoff_ts)
        # timestamp of the most recent document on any target system
        end_ts = last_inserted_doc["_ts"]

        for dm in self.doc_managers:
            rollback_set = {}  # this is a dictionary of ns:list of docs

            # group potentially conflicted documents by namespace
            for doc in dm.search(start_ts, end_ts):
                if doc["ns"] in rollback_set:
                    rollback_set[doc["ns"]].append(doc)
                else:
                    rollback_set[doc["ns"]] = [doc]

            # retrieve these documents from MongoDB, either updating
            # or removing them in each target system
            for namespace, doc_list in rollback_set.items():
                # Get the original namespace
                original_namespace = self.namespace_config.unmap_namespace(namespace)
                if not original_namespace:
                    original_namespace = namespace

                database, coll = original_namespace.split(".", 1)
                obj_id = bson.objectid.ObjectId
                bson_obj_id_list = [obj_id(doc["_id"]) for doc in doc_list]

                # Use connection to whole cluster if in sharded environment.
                client = self.mongos_client or self.primary_client
                to_update = util.retry_until_ok(
                    client[database][coll].find,
                    {"_id": {"$in": bson_obj_id_list}},
                    projection=self.namespace_config.projection(original_namespace),
                )
                # Doc list are docs in target system, to_update are
                # Docs in mongo
                doc_hash = {}  # Hash by _id
                for doc in doc_list:
                    doc_hash[bson.objectid.ObjectId(doc["_id"])] = doc

                to_index = []

                def collect_existing_docs():
                    for doc in to_update:
                        if doc["_id"] in doc_hash:
                            del doc_hash[doc["_id"]]
                            to_index.append(doc)

                retry_until_ok(collect_existing_docs)

                # Delete the inconsistent documents
                LOG.debug("OplogThread: Rollback, removing inconsistent " "docs.")
                remov_inc = 0
                for document_id in doc_hash:
                    try:
                        dm.remove(
                            document_id,
                            namespace,
                            util.bson_ts_to_long(rollback_cutoff_ts),
                        )
                        remov_inc += 1
                        LOG.debug("OplogThread: Rollback, removed %r " % doc)
                    except errors.OperationFailed:
                        LOG.warning(
                            "Could not delete document during rollback: %r "
                            "This can happen if this document was already "
                            "removed by another rollback happening at the "
                            "same time." % doc
                        )

                LOG.debug("OplogThread: Rollback, removed %d docs." % remov_inc)

                # Insert the ones from mongo
                LOG.debug("OplogThread: Rollback, inserting documents " "from mongo.")
                insert_inc = 0
                fail_insert_inc = 0
                for doc in to_index:
                    try:
                        insert_inc += 1
                        dm.upsert(
                            doc, namespace, util.bson_ts_to_long(rollback_cutoff_ts)
                        )
                    except errors.OperationFailed:
                        fail_insert_inc += 1
                        LOG.exception(
                            "OplogThread: Rollback, Unable to " "insert %r" % doc
                        )

        LOG.debug(
            "OplogThread: Rollback, Successfully inserted %d "
            " documents and failed to insert %d"
            " documents.  Returning a rollback cutoff time of %s "
            % (insert_inc, fail_insert_inc, str(rollback_cutoff_ts))
        )

        return rollback_cutoff_ts
示例#20
0
    def rollback(self):
        """Rollback target system to consistent state.

        The strategy is to find the latest timestamp in the target system and
        the largest timestamp in the oplog less than the latest target system
        timestamp. This defines the rollback window and we just roll these
        back until the oplog and target system are in consistent states.
        """
        # Find the most recently inserted document in each target system
        logging.debug("OplogThread: Initiating rollback sequence to bring "
                      "system into a consistent state.")
        last_docs = []
        for dm in self.doc_managers:
            dm.commit()
            last_docs.append(dm.get_last_doc())

        # Of these documents, which is the most recent?
        last_inserted_doc = max(last_docs,
                                key=lambda x: x["_ts"] if x else float("-inf"))

        # Nothing has been replicated. No need to rollback target systems
        if last_inserted_doc is None:
            return None

        # Find the oplog entry that touched the most recent document.
        # We'll use this to figure where to pick up the oplog later.
        target_ts = util.long_to_bson_ts(last_inserted_doc['_ts'])
        last_oplog_entry = util.retry_until_ok(
            self.oplog.find_one,
            {'ts': {'$lte': target_ts}},
            sort=[('$natural', pymongo.DESCENDING)]
        )

        logging.debug("OplogThread: last oplog entry is %s"
                      % str(last_oplog_entry))

        # The oplog entry for the most recent document doesn't exist anymore.
        # If we've fallen behind in the oplog, this will be caught later
        if last_oplog_entry is None:
            return None

        # rollback_cutoff_ts happened *before* the rollback
        rollback_cutoff_ts = last_oplog_entry['ts']
        start_ts = util.bson_ts_to_long(rollback_cutoff_ts)
        # timestamp of the most recent document on any target system
        end_ts = last_inserted_doc['_ts']

        for dm in self.doc_managers:
            rollback_set = {}   # this is a dictionary of ns:list of docs

            # group potentially conflicted documents by namespace
            for doc in dm.search(start_ts, end_ts):
                if doc['ns'] in rollback_set:
                    rollback_set[doc['ns']].append(doc)
                else:
                    rollback_set[doc['ns']] = [doc]

            # retrieve these documents from MongoDB, either updating
            # or removing them in each target system
            for namespace, doc_list in rollback_set.items():
                # Get the original namespace
                original_namespace = namespace
                for source_name, dest_name in self.dest_mapping.items():
                    if dest_name == namespace:
                        original_namespace = source_name

                database, coll = original_namespace.split('.', 1)
                obj_id = bson.objectid.ObjectId
                bson_obj_id_list = [obj_id(doc['_id']) for doc in doc_list]

                to_update = util.retry_until_ok(
                    self.main_connection[database][coll].find,
                    {'_id': {'$in': bson_obj_id_list}},
                    fields=self._fields
                )
                #doc list are docs in target system, to_update are
                #docs in mongo
                doc_hash = {}  # hash by _id
                for doc in doc_list:
                    doc_hash[bson.objectid.ObjectId(doc['_id'])] = doc

                to_index = []

                def collect_existing_docs():
                    for doc in to_update:
                        if doc['_id'] in doc_hash:
                            del doc_hash[doc['_id']]
                            to_index.append(doc)
                retry_until_ok(collect_existing_docs)

                #delete the inconsistent documents
                logging.debug("OplogThread: Rollback, removing inconsistent "
                              "docs.")
                remov_inc = 0
                for doc in doc_hash.values():
                    try:
                        dm.remove(doc)
                        remov_inc += 1
                        logging.debug("OplogThread: Rollback, removed %s " %
                                      str(doc))
                    except errors.OperationFailed:
                        logging.warning(
                            "Could not delete document during rollback: %s "
                            "This can happen if this document was already "
                            "removed by another rollback happening at the "
                            "same time." % str(doc)
                        )

                logging.debug("OplogThread: Rollback, removed %d docs." %
                              remov_inc)

                #insert the ones from mongo
                logging.debug("OplogThread: Rollback, inserting documents "
                              "from mongo.")
                insert_inc = 0
                fail_insert_inc = 0
                for doc in to_index:
                    doc['_ts'] = util.bson_ts_to_long(rollback_cutoff_ts)
                    doc['ns'] = self.dest_mapping.get(namespace, namespace)
                    try:
                        insert_inc += 1
                        dm.upsert(doc)
                    except errors.OperationFailed as e:
                        fail_insert_inc += 1
                        logging.error("OplogThread: Rollback, Unable to "
                                      "insert %s with exception %s"
                                      % (doc, str(e)))

        logging.debug("OplogThread: Rollback, Successfully inserted %d "
                      " documents and failed to insert %d"
                      " documents.  Returning a rollback cutoff time of %s "
                      % (insert_inc, fail_insert_inc, str(rollback_cutoff_ts)))

        return rollback_cutoff_ts
示例#21
0
def read_ts_file(ts_file):
    content = json.loads(open(ts_file).read())
    rs = content[0]
    ts = content[1]
    ts = util.long_to_bson_ts(ts)
    return rs, ts
示例#22
0
    def rollback(self):
        """Rollback target system to consistent state.

        The strategy is to find the latest timestamp in the target system and
        the largest timestamp in the oplog less than the latest target system
        timestamp. This defines the rollback window and we just roll these
        back until the oplog and target system are in consistent states.
        """
        self.doc_manager.commit()
        last_inserted_doc = self.doc_manager.get_last_doc()

        if last_inserted_doc is None:
            return None

        target_ts = util.long_to_bson_ts(last_inserted_doc['_ts'])
        last_oplog_entry = self.oplog.find_one({'ts': {'$lte': target_ts}},
                                               sort=[('$natural',
                                               pymongo.DESCENDING)])
        if last_oplog_entry is None:
            return None

        rollback_cutoff_ts = last_oplog_entry['ts']
        start_ts = util.bson_ts_to_long(rollback_cutoff_ts)
        end_ts = last_inserted_doc['_ts']

        rollback_set = {}   # this is a dictionary of ns:list of docs
        for doc in self.doc_manager.search(start_ts, end_ts):
            if doc['ns'] in rollback_set:
                rollback_set[doc['ns']].append(doc)
            else:
                rollback_set[doc['ns']] = [doc]

        for namespace, doc_list in rollback_set.items():
            database, coll = namespace.split('.', 1)
            obj_id = bson.objectid.ObjectId
            bson_obj_id_list = [obj_id(doc['_id']) for doc in doc_list]

            to_update = util.retry_until_ok(
                self.main_connection[database][coll].find,
                {'_id': {'$in': bson_obj_id_list}})
            #doc list are docs in  target system, to_update are docs in mongo
            doc_hash = {}  # hash by _id
            for doc in doc_list:
                doc_hash[bson.objectid.ObjectId(doc['_id'])] = doc

            to_index = []
            count = 0
            while True:
                try:
                    for doc in to_update:
                        if doc['_id'] in doc_hash:
                            del doc_hash[doc['_id']]
                            to_index.append(doc)
                    break
                except (pymongo.errors.OperationFailure,
                        pymongo.errors.AutoReconnect):
                    count += 1
                    if count > 60:
                        sys.exit(1)
                    time.sleep(1)

            #delete the inconsistent documents
            for doc in doc_hash.values():
                self.doc_manager.remove(doc)

            #insert the ones from mongo
            for doc in to_index:
                doc['_ts'] = util.bson_ts_to_long(rollback_cutoff_ts)
                doc['ns'] = namespace
                try:
                    self.doc_manager.upsert(doc)
                except errors.OperationFailed:
                    logging.error("Unable to insert %s" % (doc))

        return rollback_cutoff_ts
    def rollback(self):
        """Rollback target system to consistent state.

        The strategy is to find the latest timestamp in the target system and
        the largest timestamp in the oplog less than the latest target system
        timestamp. This defines the rollback window and we just roll these
        back until the oplog and target system are in consistent states.
        """
        # Find the most recently inserted document in each target system
        LOG.debug(
            "OplogThread: Initiating rollback sequence to bring "
            "system into a consistent state."
        )
        last_docs = []
        for dm in self.doc_managers:
            dm.commit()
            last_docs.append(dm.get_last_doc())

        # Of these documents, which is the most recent?
        last_inserted_doc = max(
            last_docs, key=lambda x: x["_ts"] if x else float("-inf")
        )

        # Nothing has been replicated. No need to rollback target systems
        if last_inserted_doc is None:
            return None

        # Find the oplog entry that touched the most recent document.
        # We'll use this to figure where to pick up the oplog later.
        target_ts = util.long_to_bson_ts(last_inserted_doc["_ts"])
        last_oplog_entry = util.retry_until_ok(
            self.oplog.find_one,
            {"ts": {"$lte": target_ts}, "op": {"$ne": "n"}},
            sort=[("$natural", pymongo.DESCENDING)],
        )

        LOG.debug("OplogThread: last oplog entry is %s" % str(last_oplog_entry))

        # The oplog entry for the most recent document doesn't exist anymore.
        # If we've fallen behind in the oplog, this will be caught later
        if last_oplog_entry is None:
            return None

        # rollback_cutoff_ts happened *before* the rollback
        rollback_cutoff_ts = last_oplog_entry["ts"]
        start_ts = util.bson_ts_to_long(rollback_cutoff_ts)
        # timestamp of the most recent document on any target system
        end_ts = last_inserted_doc["_ts"]

        for dm in self.doc_managers:
            rollback_set = {}  # this is a dictionary of ns:list of docs

            # group potentially conflicted documents by namespace
            for doc in dm.search(start_ts, end_ts):
                if doc["ns"] in rollback_set:
                    rollback_set[doc["ns"]].append(doc)
                else:
                    rollback_set[doc["ns"]] = [doc]

            # retrieve these documents from MongoDB, either updating
            # or removing them in each target system
            for namespace, doc_list in rollback_set.items():
                # Get the original namespace
                original_namespace = self.namespace_config.unmap_namespace(namespace)
                if not original_namespace:
                    original_namespace = namespace

                database, coll = original_namespace.split(".", 1)
                obj_id = bson.objectid.ObjectId
                bson_obj_id_list = [obj_id(doc["_id"]) for doc in doc_list]

                # Use connection to whole cluster if in sharded environment.
                client = self.mongos_client or self.primary_client
                to_update = util.retry_until_ok(
                    client[database][coll].find,
                    {"_id": {"$in": bson_obj_id_list}},
                    projection=self.namespace_config.projection(original_namespace),
                )
                # Doc list are docs in target system, to_update are
                # Docs in mongo
                doc_hash = {}  # Hash by _id
                for doc in doc_list:
                    doc_hash[bson.objectid.ObjectId(doc["_id"])] = doc

                to_index = []

                def collect_existing_docs():
                    for doc in to_update:
                        if doc["_id"] in doc_hash:
                            del doc_hash[doc["_id"]]
                            to_index.append(doc)

                retry_until_ok(collect_existing_docs)

                # Delete the inconsistent documents
                LOG.debug("OplogThread: Rollback, removing inconsistent " "docs.")
                remov_inc = 0
                for document_id in doc_hash:
                    try:
                        dm.remove(
                            document_id,
                            namespace,
                            util.bson_ts_to_long(rollback_cutoff_ts),
                        )
                        remov_inc += 1
                        LOG.debug("OplogThread: Rollback, removed %r " % doc)
                    except errors.OperationFailed:
                        LOG.warning(
                            "Could not delete document during rollback: %r "
                            "This can happen if this document was already "
                            "removed by another rollback happening at the "
                            "same time." % doc
                        )

                LOG.debug("OplogThread: Rollback, removed %d docs." % remov_inc)

                # Insert the ones from mongo
                LOG.debug("OplogThread: Rollback, inserting documents " "from mongo.")
                insert_inc = 0
                fail_insert_inc = 0
                for doc in to_index:
                    try:
                        insert_inc += 1
                        dm.upsert(
                            doc, namespace, util.bson_ts_to_long(rollback_cutoff_ts)
                        )
                    except errors.OperationFailed:
                        fail_insert_inc += 1
                        LOG.exception(
                            "OplogThread: Rollback, Unable to " "insert %r" % doc
                        )

        LOG.debug(
            "OplogThread: Rollback, Successfully inserted %d "
            " documents and failed to insert %d"
            " documents.  Returning a rollback cutoff time of %s "
            % (insert_inc, fail_insert_inc, str(rollback_cutoff_ts))
        )

        return rollback_cutoff_ts