def dump_collection(self): """Dumps collection into the target system. This method is called when we're initializing the cursor and have no configs i.e. when we're starting for the first time. """ timestamp = retry_until_ok(self.get_last_oplog_timestamp) if timestamp is None: return None long_ts = util.bson_ts_to_long(timestamp) # Flag if this oplog thread was cancelled during the collection dump. # Use a list to workaround python scoping. dump_cancelled = [False] def get_all_ns(): ns_set = [] gridfs_ns_set = [] db_list = self.namespace_config.get_included_databases() if not db_list: # Only use listDatabases when the configured databases are not # explicit. db_list = retry_until_ok(self.primary_client.database_names) for database in db_list: if database == "config" or database == "local": continue coll_list = retry_until_ok( self.primary_client[database].collection_names ) for coll in coll_list: # ignore system collections if coll.startswith("system."): continue # ignore gridfs chunks collections if coll.endswith(".chunks"): continue if coll.endswith(".files"): namespace = "%s.%s" % (database, coll) namespace = namespace[: -len(".files")] if self.namespace_config.gridfs_namespace(namespace): gridfs_ns_set.append(namespace) else: namespace = "%s.%s" % (database, coll) if self.namespace_config.map_namespace(namespace): ns_set.append(namespace) return ns_set, gridfs_ns_set dump_set, gridfs_dump_set = get_all_ns() LOG.debug("OplogThread: Dumping set of collections %s " % dump_set) def docs_to_dump(from_coll): last_id = None attempts = 0 projection = self.namespace_config.projection(from_coll.full_name) # Loop to handle possible AutoReconnect while attempts < 60: if last_id is None: cursor = retry_until_ok( from_coll.find, projection=projection, sort=[("_id", pymongo.ASCENDING)], ) else: cursor = retry_until_ok( from_coll.find, {"_id": {"$gt": last_id}}, projection=projection, sort=[("_id", pymongo.ASCENDING)], ) try: for doc in cursor: if not self.running: # Thread was joined while performing the # collection dump. dump_cancelled[0] = True raise StopIteration last_id = doc["_id"] yield doc break except (pymongo.errors.AutoReconnect, pymongo.errors.OperationFailure): attempts += 1 time.sleep(1) def upsert_each(dm): num_failed = 0 for namespace in dump_set: from_coll = self.get_collection(namespace) mapped_ns = self.namespace_config.map_namespace(namespace) total_docs = retry_until_ok(from_coll.count) num = None for num, doc in enumerate(docs_to_dump(from_coll)): try: dm.upsert(doc, mapped_ns, long_ts) except Exception: if self.continue_on_error: LOG.exception("Could not upsert document: %r" % doc) num_failed += 1 else: raise if num % 10000 == 0: LOG.info( "Upserted %d out of approximately %d docs " "from collection '%s'", num + 1, total_docs, namespace, ) if num is not None: LOG.info( "Upserted %d out of approximately %d docs from " "collection '%s'", num + 1, total_docs, namespace, ) if num_failed > 0: LOG.error("Failed to upsert %d docs" % num_failed) def upsert_all(dm): try: for namespace in dump_set: from_coll = self.get_collection(namespace) total_docs = retry_until_ok(from_coll.count) mapped_ns = self.namespace_config.map_namespace(namespace) LOG.info( "Bulk upserting approximately %d docs from " "collection '%s'", total_docs, namespace, ) dm.bulk_upsert(docs_to_dump(from_coll), mapped_ns, long_ts) except Exception: if self.continue_on_error: LOG.exception( "OplogThread: caught exception" " during bulk upsert, re-upserting" " documents serially" ) upsert_each(dm) else: raise def do_dump(dm, error_queue): try: LOG.debug( "OplogThread: Using bulk upsert function for " "collection dump" ) upsert_all(dm) if gridfs_dump_set: LOG.info( "OplogThread: dumping GridFS collections: %s", gridfs_dump_set ) # Dump GridFS files for gridfs_ns in gridfs_dump_set: mongo_coll = self.get_collection(gridfs_ns) from_coll = self.get_collection(gridfs_ns + ".files") dest_ns = self.namespace_config.map_namespace(gridfs_ns) for doc in docs_to_dump(from_coll): gridfile = GridFSFile(mongo_coll, doc) dm.insert_file(gridfile, dest_ns, long_ts) except Exception: # Likely exceptions: # pymongo.errors.OperationFailure, # mongo_connector.errors.ConnectionFailed # mongo_connector.errors.OperationFailed error_queue.put(sys.exc_info()) # Extra threads (if any) that assist with collection dumps dumping_threads = [] # Did the dump succeed for all target systems? dump_success = True # Holds any exceptions we can't recover from errors = queue.Queue() if len(self.doc_managers) == 1: do_dump(self.doc_managers[0], errors) else: # Slight performance gain breaking dump into separate # threads if > 1 replication target for dm in self.doc_managers: t = threading.Thread(target=do_dump, args=(dm, errors)) dumping_threads.append(t) t.start() # cleanup for t in dumping_threads: t.join() # Print caught exceptions try: while True: LOG.critical( "Exception during collection dump", exc_info=errors.get_nowait() ) dump_success = False except queue.Empty: pass if not dump_success: err_msg = "OplogThread: Failed during dump collection" effect = "cannot recover!" LOG.error("%s %s %s" % (err_msg, effect, self.oplog)) self.running = False return None if dump_cancelled[0]: LOG.warning( "Initial collection dump was interrupted. " "Will re-run the collection dump on next startup." ) return None return timestamp
def dump_collection(self): """Dumps collection into the target system. This method is called when we're initializing the cursor and have no configs i.e. when we're starting for the first time. """ dump_set = self.namespace_set or [] logging.debug("OplogThread: Dumping set of collections %s " % dump_set) #no namespaces specified if not self.namespace_set: db_list = retry_until_ok(self.main_connection.database_names) for database in db_list: if database == "config" or database == "local": continue coll_list = retry_until_ok( self.main_connection[database].collection_names) for coll in coll_list: if coll.startswith("system"): continue namespace = "%s.%s" % (database, coll) dump_set.append(namespace) timestamp = util.retry_until_ok(self.get_last_oplog_timestamp) if timestamp is None: return None long_ts = util.bson_ts_to_long(timestamp) def docs_to_dump(): for namespace in dump_set: logging.info("OplogThread: dumping collection %s" % namespace) database, coll = namespace.split('.', 1) last_id = None attempts = 0 # Loop to handle possible AutoReconnect while attempts < 60: target_coll = self.main_connection[database][coll] if not last_id: cursor = util.retry_until_ok( target_coll.find, fields=self._fields, sort=[("_id", pymongo.ASCENDING)] ) else: cursor = util.retry_until_ok( target_coll.find, {"_id": {"$gt": last_id}}, fields=self._fields, sort=[("_id", pymongo.ASCENDING)] ) try: for doc in cursor: if not self.running: raise StopIteration doc["ns"] = self.dest_mapping.get( namespace, namespace) doc["_ts"] = long_ts last_id = doc["_id"] yield doc break except pymongo.errors.AutoReconnect: attempts += 1 time.sleep(1) # Extra threads (if any) that assist with collection dumps dumping_threads = [] # Did the dump succeed for all target systems? dump_success = True # Holds any exceptions we can't recover from errors = queue.Queue() try: for dm in self.doc_managers: # Bulk upsert if possible if hasattr(dm, "bulk_upsert"): logging.debug("OplogThread: Using bulk upsert function for" "collection dump") # Slight performance gain breaking dump into separate # threads, only if > 1 replication target if len(self.doc_managers) == 1: dm.bulk_upsert(docs_to_dump()) else: def do_dump(error_queue): all_docs = docs_to_dump() try: dm.bulk_upsert(all_docs) except Exception: # Likely exceptions: # pymongo.errors.OperationFailure, # mongo_connector.errors.ConnectionFailed # mongo_connector.errors.OperationFailed error_queue.put(sys.exc_info()) t = threading.Thread(target=do_dump, args=(errors,)) dumping_threads.append(t) t.start() else: logging.debug("OplogThread: DocManager %s has not" "bulk_upsert method. Upserting documents " "serially for collection dump." % str(dm)) num = 0 for num, doc in enumerate(docs_to_dump()): if num % 10000 == 0: logging.debug("Upserted %d docs." % num) dm.upsert(doc) logging.debug("Upserted %d docs" % num) # cleanup for t in dumping_threads: t.join() except Exception: # See "likely exceptions" comment above errors.put(sys.exc_info()) # Print caught exceptions try: while True: klass, value, trace = errors.get_nowait() dump_success = False traceback.print_exception(klass, value, trace) except queue.Empty: pass if not dump_success: err_msg = "OplogThread: Failed during dump collection" effect = "cannot recover!" logging.error('%s %s %s' % (err_msg, effect, self.oplog)) self.running = False return None return timestamp
def dump_collection(self): """Dumps collection into the target system. This method is called when we're initializing the cursor and have no configs i.e. when we're starting for the first time. """ timestamp = util.retry_until_ok(self.get_last_oplog_timestamp) if timestamp is None: return None long_ts = util.bson_ts_to_long(timestamp) dump_set = self.namespace_set or [] LOG.debug("OplogThread: Dumping set of collections %s " % dump_set) # No namespaces specified if not self.namespace_set: db_list = retry_until_ok(self.primary_client.database_names) for database in db_list: if database == "config" or database == "local": continue coll_list = retry_until_ok( self.primary_client[database].collection_names) for coll in coll_list: # ignore system collections if coll.startswith("system."): continue # ignore gridfs collections if coll.endswith(".files") or coll.endswith(".chunks"): continue namespace = "%s.%s" % (database, coll) dump_set.append(namespace) def docs_to_dump(namespace): database, coll = namespace.split('.', 1) last_id = None attempts = 0 # Loop to handle possible AutoReconnect while attempts < 60: target_coll = self.primary_client[database][coll] if not last_id: cursor = util.retry_until_ok( target_coll.find, projection=self._projection, sort=[("_id", pymongo.ASCENDING)] ) else: cursor = util.retry_until_ok( target_coll.find, {"_id": {"$gt": last_id}}, projection=self._projection, sort=[("_id", pymongo.ASCENDING)] ) try: for doc in cursor: if not self.running: raise StopIteration last_id = doc["_id"] yield doc break except (pymongo.errors.AutoReconnect, pymongo.errors.OperationFailure): attempts += 1 time.sleep(1) def upsert_each(dm): num_inserted = 0 num_failed = 0 for namespace in dump_set: for num, doc in enumerate(docs_to_dump(namespace)): if num % 10000 == 0: LOG.debug("Upserted %d docs." % num) try: mapped_ns = self.dest_mapping.get(namespace, namespace) dm.upsert(doc, mapped_ns, long_ts) num_inserted += 1 except Exception: if self.continue_on_error: LOG.exception( "Could not upsert document: %r" % doc) num_failed += 1 else: raise LOG.debug("Upserted %d docs" % num_inserted) if num_failed > 0: LOG.error("Failed to upsert %d docs" % num_failed) def upsert_all(dm): try: for namespace in dump_set: mapped_ns = self.dest_mapping.get(namespace, namespace) dm.bulk_upsert(docs_to_dump(namespace), mapped_ns, long_ts) except Exception: if self.continue_on_error: LOG.exception("OplogThread: caught exception" " during bulk upsert, re-upserting" " documents serially") upsert_each(dm) else: raise def do_dump(dm, error_queue): try: # Dump the documents, bulk upsert if possible if hasattr(dm, "bulk_upsert"): LOG.debug("OplogThread: Using bulk upsert function for " "collection dump") upsert_all(dm) else: LOG.debug( "OplogThread: DocManager %s has no " "bulk_upsert method. Upserting documents " "serially for collection dump." % str(dm)) upsert_each(dm) # Dump GridFS files for gridfs_ns in self.gridfs_set: db, coll = gridfs_ns.split('.', 1) mongo_coll = self.primary_client[db][coll] dest_ns = self.dest_mapping.get(gridfs_ns, gridfs_ns) for doc in docs_to_dump(gridfs_ns + '.files'): gridfile = GridFSFile(mongo_coll, doc) dm.insert_file(gridfile, dest_ns, long_ts) except: # Likely exceptions: # pymongo.errors.OperationFailure, # mongo_connector.errors.ConnectionFailed # mongo_connector.errors.OperationFailed error_queue.put(sys.exc_info()) # Extra threads (if any) that assist with collection dumps dumping_threads = [] # Did the dump succeed for all target systems? dump_success = True # Holds any exceptions we can't recover from errors = queue.Queue() if len(self.doc_managers) == 1: do_dump(self.doc_managers[0], errors) else: # Slight performance gain breaking dump into separate # threads if > 1 replication target for dm in self.doc_managers: t = threading.Thread(target=do_dump, args=(dm, errors)) dumping_threads.append(t) t.start() # cleanup for t in dumping_threads: t.join() # Print caught exceptions try: while True: LOG.critical('Exception during collection dump', exc_info=errors.get_nowait()) dump_success = False except queue.Empty: pass if not dump_success: err_msg = "OplogThread: Failed during dump collection" effect = "cannot recover!" LOG.error('%s %s %s' % (err_msg, effect, self.oplog)) self.running = False return None return timestamp
def dump_collection(self): """Dumps collection into the target system. This method is called when we're initializing the cursor and have no configs i.e. when we're starting for the first time. """ timestamp = retry_until_ok(self.get_last_oplog_timestamp) if timestamp is None: return None long_ts = util.bson_ts_to_long(timestamp) # Flag if this oplog thread was cancelled during the collection dump. # Use a list to workaround python scoping. dump_cancelled = [False] def get_all_ns(): ns_set = [] gridfs_ns_set = [] db_list = retry_until_ok(self.primary_client.database_names) for database in db_list: if database == "config" or database == "local": continue coll_list = retry_until_ok( self.primary_client[database].collection_names) for coll in coll_list: # ignore system collections if coll.startswith("system."): continue # ignore gridfs chunks collections if coll.endswith(".chunks"): continue if coll.endswith(".files"): namespace = "%s.%s" % (database, coll) namespace = namespace[:-len(".files")] if self.namespace_config.gridfs_namespace(namespace): gridfs_ns_set.append(namespace) else: namespace = "%s.%s" % (database, coll) if self.namespace_config.map_namespace(namespace): ns_set.append(namespace) return ns_set, gridfs_ns_set dump_set, gridfs_dump_set = get_all_ns() LOG.debug("OplogThread: Dumping set of collections %s " % dump_set) def docs_to_dump(from_coll): last_id = None attempts = 0 projection = self.namespace_config.projection(from_coll.full_name) # Loop to handle possible AutoReconnect while attempts < 60: if last_id is None: cursor = retry_until_ok( from_coll.find, projection=projection, sort=[("_id", pymongo.ASCENDING)] ) else: cursor = retry_until_ok( from_coll.find, {"_id": {"$gt": last_id}}, projection=projection, sort=[("_id", pymongo.ASCENDING)] ) try: for doc in cursor: if not self.running: # Thread was joined while performing the # collection dump. dump_cancelled[0] = True raise StopIteration last_id = doc["_id"] yield doc break except (pymongo.errors.AutoReconnect, pymongo.errors.OperationFailure): attempts += 1 time.sleep(1) def upsert_each(dm): num_failed = 0 for namespace in dump_set: from_coll = self.get_collection(namespace) mapped_ns = self.namespace_config.map_namespace(namespace) total_docs = retry_until_ok(from_coll.count) num = None for num, doc in enumerate(docs_to_dump(from_coll)): try: dm.upsert(doc, mapped_ns, long_ts) except Exception: if self.continue_on_error: LOG.exception( "Could not upsert document: %r" % doc) num_failed += 1 else: raise if num % 10000 == 0: LOG.info("Upserted %d out of approximately %d docs " "from collection '%s'", num + 1, total_docs, namespace) if num is not None: LOG.info("Upserted %d out of approximately %d docs from " "collection '%s'", num + 1, total_docs, namespace) if num_failed > 0: LOG.error("Failed to upsert %d docs" % num_failed) def upsert_all(dm): try: for namespace in dump_set: from_coll = self.get_collection(namespace) total_docs = retry_until_ok(from_coll.count) mapped_ns = self.namespace_config.map_namespace( namespace) LOG.info("Bulk upserting approximately %d docs from " "collection '%s'", total_docs, namespace) dm.bulk_upsert(docs_to_dump(from_coll), mapped_ns, long_ts) except Exception: if self.continue_on_error: LOG.exception("OplogThread: caught exception" " during bulk upsert, re-upserting" " documents serially") upsert_each(dm) else: raise def do_dump(dm, error_queue): try: LOG.debug("OplogThread: Using bulk upsert function for " "collection dump") upsert_all(dm) if gridfs_dump_set: LOG.info("OplogThread: dumping GridFS collections: %s", gridfs_dump_set) # Dump GridFS files for gridfs_ns in gridfs_dump_set: mongo_coll = self.get_collection(gridfs_ns) from_coll = self.get_collection(gridfs_ns + '.files') dest_ns = self.namespace_config.map_namespace(gridfs_ns) for doc in docs_to_dump(from_coll): gridfile = GridFSFile(mongo_coll, doc) dm.insert_file(gridfile, dest_ns, long_ts) except: # Likely exceptions: # pymongo.errors.OperationFailure, # mongo_connector.errors.ConnectionFailed # mongo_connector.errors.OperationFailed error_queue.put(sys.exc_info()) # Extra threads (if any) that assist with collection dumps dumping_threads = [] # Did the dump succeed for all target systems? dump_success = True # Holds any exceptions we can't recover from errors = queue.Queue() if len(self.doc_managers) == 1: do_dump(self.doc_managers[0], errors) else: # Slight performance gain breaking dump into separate # threads if > 1 replication target for dm in self.doc_managers: t = threading.Thread(target=do_dump, args=(dm, errors)) dumping_threads.append(t) t.start() # cleanup for t in dumping_threads: t.join() # Print caught exceptions try: while True: LOG.critical('Exception during collection dump', exc_info=errors.get_nowait()) dump_success = False except queue.Empty: pass if not dump_success: err_msg = "OplogThread: Failed during dump collection" effect = "cannot recover!" LOG.error('%s %s %s' % (err_msg, effect, self.oplog)) self.running = False return None if dump_cancelled[0]: LOG.warning('Initial collection dump was interrupted. ' 'Will re-run the collection dump on next startup.') return None return timestamp
def dump_collection(self): """Dumps collection into the target system. This method is called when we're initializing the cursor and have no configs i.e. when we're starting for the first time. """ dump_set = self.get_dump_set() shard_doc_count = self.shard_doc_count() timestamp = None LOG.info("OplogThread: Dumping set of collections %s from shard %s with %d docs" % (dump_set, self.shard_name, shard_doc_count)) if shard_doc_count > 0: timestamp = util.retry_until_ok(self.get_last_oplog_timestamp) long_ts = util.bson_ts_to_long(timestamp) else: long_ts = 0 def query_handle_id(query): if '_id' in query and query['_id']: for key in query['_id']: query['_id'][key] = ObjectId(query['_id'][key]) def docs_to_dump(namespace): database, coll = namespace.split('.', 1) last_id = None attempts = 0 # Loop to handle possible AutoReconnect while attempts < 60: target_coll = self.primary_client[database][coll] fields_to_fetch = None if 'include' in self._fields: fields_to_fetch = self._fields['include'] query = {} if self.initial_import['query']: query_handle_id(self.initial_import['query']) query = self.initial_import['query'] if not last_id: cursor = util.retry_until_ok( target_coll.find, query, fields=fields_to_fetch, sort=[("_id", pymongo.ASCENDING)] ) else: cursor = util.retry_until_ok( target_coll.find, query.update({"_id": {"$gt": last_id}}), fields=fields_to_fetch, sort=[("_id", pymongo.ASCENDING)] ) try: for doc in cursor: if not self.running: LOG.error("Stopped iterating over Cursor while initial import") raise StopIteration self.pop_excluded_fields(doc) last_id = doc["_id"] yield doc break except (pymongo.errors.AutoReconnect, pymongo.errors.OperationFailure): attempts += 1 time.sleep(1) # Extra threads (if any) that assist with collection dumps dumping_threads = [] # Did the dump succeed for all target systems? dump_success = True # Holds any exceptions we can't recover from errors = queue.Queue() if len(self.doc_managers) == 1: do_dump(self.doc_managers[0], errors) else: # Slight performance gain breaking dump into separate # threads if > 1 replication target for dm in self.doc_managers: t = threading.Thread(target=do_dump, args=(dm, errors)) dumping_threads.append(t) t.start() # cleanup for t in dumping_threads: t.join() # Print caught exceptions try: while True: LOG.critical('Exception during collection dump', exc_info=errors.get_nowait()) dump_success = False except queue.Empty: pass if not dump_success: err_msg = "OplogThread: Failed during dump collection" effect = "cannot recover!" LOG.error('%s %s %s' % (err_msg, effect, self.oplog)) self.running = False return None else: LOG.info('OplogThread: Successfully dumped collection') LOG.warning('Fields with errors: %r' % self.error_fields) if not self.reimporting: LOG.info("Waiting for other threads to finish collection dump") self.barrier.wait() return timestamp
def dump_collection(self): """Dumps collection into the target system. This method is called when we're initializing the cursor and have no configs i.e. when we're starting for the first time. """ dump_set = self.get_dump_set() shard_doc_count = self.shard_doc_count() timestamp = None long_ts = 0 LOG.info("OplogThread: Dumping set of collections %s from shard %s with %d docs" % (dump_set, self.shard_name, shard_doc_count)) if shard_doc_count > 0: timestamp = util.retry_until_ok(self.get_last_oplog_timestamp) if timestamp: long_ts = util.bson_ts_to_long(timestamp) def query_handle_id(query): if '_id' in query and query['_id']: for key in query['_id']: query['_id'][key] = ObjectId(query['_id'][key]) def docs_to_dump(namespace): database, coll = namespace.split('.', 1) last_id = None attempts = 0 # Loop to handle possible AutoReconnect while attempts < 60: target_coll = self.primary_client[database][coll] fields_to_fetch = None if 'include' in self._fields: fields_to_fetch = self._fields['include'] query = {} if self.initial_import['query']: query_handle_id(self.initial_import['query']) query = self.initial_import['query'] if not last_id: cursor = util.retry_until_ok( target_coll.find, query, fields=fields_to_fetch, sort=[("_id", pymongo.ASCENDING)] ) else: cursor = util.retry_until_ok( target_coll.find, query.update({"_id": {"$gt": last_id}}), fields=fields_to_fetch, sort=[("_id", pymongo.ASCENDING)] ) try: for doc in cursor: if not self.running: LOG.error("Stopped iterating over Cursor while initial import") raise StopIteration self.pop_excluded_fields(doc) last_id = doc["_id"] yield doc break except (pymongo.errors.AutoReconnect, pymongo.errors.OperationFailure): attempts += 1 time.sleep(1) def upsert_all_failed_docs(dm, namespace, errors): if errors: for _id, field in errors: self.upsert_doc(dm, namespace, long_ts, _id, field) def upsert_each(dm): num_inserted = 0 num_failed = 0 for namespace in dump_set: mapped_ns = self.dest_mapping.get(namespace, namespace) dm.disable_refresh(mapped_ns) for num, doc in enumerate(docs_to_dump(namespace)): if num % 10000 == 0: LOG.info("Upserted %d docs." % num) try: self.upsert_doc(dm, namespace, long_ts, None, None, doc) num_inserted += 1 except Exception: if self.continue_on_error: LOG.exception("Could not upsert document: %r" % doc) num_failed += 1 else: raise dm.enable_refresh(mapped_ns) LOG.info("Upserted %d docs" % num_inserted) if num_failed > 0: LOG.error("Failed to upsert %d docs" % num_failed) def upsert_all(dm): try: for namespace in dump_set: mapped_ns = self.dest_mapping.get(namespace, namespace) dm.disable_refresh(mapped_ns) errors = dm.bulk_upsert(docs_to_dump(namespace), mapped_ns, long_ts) upsert_all_failed_docs(dm, namespace, errors) dm.enable_refresh(mapped_ns) except Exception: if self.continue_on_error: LOG.exception("OplogThread: caught exception" " during bulk upsert, re-upserting" " documents serially") upsert_each(dm) else: raise def do_dump(dm, error_queue): try: # Dump the documents, bulk upsert if possible if hasattr(dm, "bulk_upsert"): LOG.debug("OplogThread: Using bulk upsert function for " "collection dump") upsert_all(dm) else: LOG.debug( "OplogThread: DocManager %s has no " "bulk_upsert method. Upserting documents " "serially for collection dump." % str(dm)) upsert_each(dm) # Dump GridFS files for gridfs_ns in self.gridfs_set: db, coll = gridfs_ns.split('.', 1) mongo_coll = self.primary_client[db][coll] dest_ns = self.dest_mapping.get(gridfs_ns, gridfs_ns) for doc in docs_to_dump(gridfs_ns + '.files'): gridfile = GridFSFile(mongo_coll, doc) dm.insert_file(gridfile, dest_ns, long_ts) except: # Likely exceptions: # pymongo.errors.OperationFailure, # mongo_connector.errors.ConnectionFailed # mongo_connector.errors.OperationFailed error_queue.put(sys.exc_info()) # Extra threads (if any) that assist with collection dumps dumping_threads = [] # Did the dump succeed for all target systems? dump_success = True # Holds any exceptions we can't recover from errors = queue.Queue() if len(self.doc_managers) == 1: do_dump(self.doc_managers[0], errors) else: # Slight performance gain breaking dump into separate # threads if > 1 replication target for dm in self.doc_managers: t = threading.Thread(target=do_dump, args=(dm, errors)) dumping_threads.append(t) t.start() # cleanup for t in dumping_threads: t.join() # Print caught exceptions try: while True: LOG.critical('Exception during collection dump', exc_info=errors.get_nowait()) dump_success = False except queue.Empty: pass if not dump_success: err_msg = "OplogThread: Failed during dump collection" effect = "cannot recover!" LOG.error('%s %s %s' % (err_msg, effect, self.oplog)) self.running = False return None else: LOG.info('OplogThread: Successfully dumped collection') LOG.warning('Fields with errors: %r' % self.error_fields) if not self.reimporting: LOG.info("Waiting for other threads to finish collection dump") self.barrier.wait() return timestamp