def test_upgrade_oplog_progress(self): first_oplog_ts1 = self.opman1.oplog.find_one()['ts'] first_oplog_ts2 = self.opman2.oplog.find_one()['ts'] # Old format oplog progress file: progress = { str(self.opman1.oplog): bson_ts_to_long(first_oplog_ts1), str(self.opman2.oplog): bson_ts_to_long(first_oplog_ts2) } # Set up oplog managers to use the old format. oplog_progress = LockingDict() oplog_progress.dict = progress self.opman1.oplog_progress = oplog_progress self.opman2.oplog_progress = oplog_progress # Cause the oplog managers to update their checkpoints. self.opman1.update_checkpoint(first_oplog_ts1) self.opman2.update_checkpoint(first_oplog_ts2) # New format should be in place now. new_format = { self.opman1.replset_name: first_oplog_ts1, self.opman2.replset_name: first_oplog_ts2 } self.assertEqual( new_format, self.opman1.oplog_progress.get_dict() ) self.assertEqual( new_format, self.opman2.oplog_progress.get_dict() )
def test_init_cursor(self): """Test init_cursor in oplog_manager. Assertion failure if it doesn't pass """ test_oplog, primary_conn, search_ts = self.get_oplog_thread() test_oplog.checkpoint = None # needed for these tests # initial tests with no config file and empty oplog self.assertEqual(test_oplog.init_cursor(), None) # no config, single oplog entry primary_conn['test']['test'].insert({'name': 'paulie'}) search_ts = test_oplog.get_last_oplog_timestamp() cursor = test_oplog.init_cursor() self.assertEqual(cursor.count(), 1) self.assertEqual(test_oplog.checkpoint, search_ts) # with config file, assert that size != 0 os.system('touch temp_config.txt') cursor = test_oplog.init_cursor() oplog_dict = test_oplog.oplog_progress.get_dict() self.assertEqual(cursor.count(), 1) self.assertTrue(str(test_oplog.oplog) in oplog_dict) self.assertTrue( oplog_dict[str(test_oplog.oplog)] == test_oplog.checkpoint) os.system('rm temp_config.txt') # test init_cursor when OplogThread created with/without no-dump option # insert some documents (will need to be dumped) primary_conn['test']['test'].remove() primary_conn['test']['test'].insert(({"_id": i} for i in range(100))) # test no-dump option docman = test_oplog.doc_managers[0] docman._delete() test_oplog.collection_dump = False test_oplog.oplog_progress = LockingDict() # init_cursor has the side-effect of causing a collection dump test_oplog.init_cursor() self.assertEqual(len(docman._search()), 0) # test w/o no-dump option docman._delete() test_oplog.collection_dump = True test_oplog.oplog_progress = LockingDict() test_oplog.init_cursor() self.assertEqual(len(docman._search()), 100)
def get_oplog_thread(cls): """ Set up connection with mongo. Returns oplog, the connection and oplog collection This function clears the oplog """ is_sharded = True primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"])) if primary_conn['admin'].command("isMaster")['ismaster'] is False: primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"])) primary_conn['test']['test'].drop() mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN']) if PORTS_ONE["MAIN"] == PORTS_ONE["PRIMARY"]: mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN']) is_sharded = False oplog_coll = primary_conn['local']['oplog.rs'] oplog_coll.drop() # reset the oplog primary_conn['local'].create_collection('oplog.rs', capped=True, size=1000000) namespace_set = ['test.test'] doc_manager = DocManager() oplog = OplogThread(primary_conn, mongos_addr, oplog_coll, is_sharded, doc_manager, LockingDict(), namespace_set, cls.AUTH_KEY, AUTH_USERNAME, repl_set="demo-repl") return(oplog, primary_conn, oplog_coll)
def setUp(self): # Create a new oplog progress file try: os.unlink("oplog.timestamp") except OSError: pass open("oplog.timestamp", "w").close() # Start a replica set self.repl_set = ReplicaSet().start() # Connection to the replica set as a whole self.main_conn = self.repl_set.client() # Connection to the primary specifically self.primary_conn = self.repl_set.primary.client() # Connection to the secondary specifically self.secondary_conn = self.repl_set.secondary.client( read_preference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.main_conn["test"]["mc"].drop() # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread(primary_client=self.main_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, ns_set=["test.mc"])
def get_oplog_thread(cls): """ Set up connection with mongo. Returns oplog, the connection and oplog collection. This function clears the oplog. """ primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"])) if primary_conn['admin'].command("isMaster")['ismaster'] is False: primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"])) mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE["MONGOS"]) mongos = Connection(mongos_addr) mongos['alpha']['foo'].drop() oplog_coll = primary_conn['local']['oplog.rs'] oplog_coll.drop() # reset the oplog primary_conn['local'].create_collection('oplog.rs', capped=True, size=1000000) namespace_set = ['test.test', 'alpha.foo'] doc_manager = DocManager() oplog = OplogThread(primary_conn, mongos_addr, oplog_coll, True, doc_manager, LockingDict(), namespace_set, cls.AUTH_KEY, AUTH_USERNAME) return (oplog, primary_conn, oplog_coll, mongos)
def get_new_oplog(cls): """ Set up connection with mongo. Returns oplog, the connection and oplog collection This function does not clear the oplog """ is_sharded = True primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"])) if primary_conn['admin'].command("isMaster")['ismaster'] is False: primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"])) mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN']) if PORTS_ONE["MAIN"] == PORTS_ONE["PRIMARY"]: mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN']) is_sharded = False oplog_coll = primary_conn['local']['oplog.rs'] namespace_set = ['test.test'] doc_manager = DocManager() oplog = OplogThread(primary_conn=primary_conn, main_address=mongos_addr, oplog_coll=oplog_coll, is_sharded=is_sharded, doc_manager=doc_manager, oplog_progress_dict=LockingDict(), namespace_set=namespace_set, auth_key=cls.AUTH_KEY, auth_username=AUTH_USERNAME, repl_set="demo-repl") return (oplog, primary_conn, oplog.main_connection, oplog_coll)
def test_skipped_oplog_entry_updates_checkpoint(self): repl_set = ReplicaSetSingle().start() conn = repl_set.client() opman = OplogThread( primary_client=conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig(namespace_set=["test.test"]), ) opman.start() # Insert a document into an included collection conn["test"]["test"].insert_one({"test": 1}) last_ts = opman.get_last_oplog_timestamp() assert_soon( lambda: last_ts == opman.checkpoint, "OplogThread never updated checkpoint to non-skipped " "entry.", ) self.assertEqual(len(opman.doc_managers[0]._search()), 1) # Make sure that the oplog thread updates its checkpoint on every # oplog entry. conn["test"]["ignored"].insert_one({"test": 1}) last_ts = opman.get_last_oplog_timestamp() assert_soon( lambda: last_ts == opman.checkpoint, "OplogThread never updated checkpoint to skipped entry.", ) opman.join() conn.close() repl_set.stop()
def setUp(self): self.repl_set = ReplicaSet().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local['oplog.rs'] self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict())
def setUp(self): self.namespace_config = NamespaceConfig() self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), namespace_config=self.namespace_config, )
def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None): self.namespace_config = NamespaceConfig(namespace_set=include_ns, ex_namespace_set=exclude_ns, namespace_options=dest_mapping) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), namespace_config=self.namespace_config)
def test_fields_and_exclude(self): fields = ['a', 'b', 'c', '_id'] exclude_fields = ['x', 'y', 'z'] # Test setting both to None in constructor opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=None, exclude_fields=None) self._check_fields(opman, [], [], None) opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=None, exclude_fields=exclude_fields) self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) # Test setting fields when exclude_fields is set self.assertRaises(errors.InvalidConfiguration, setattr, opman, "fields", fields) self.assertRaises(errors.InvalidConfiguration, setattr, opman, "fields", None) opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=None, fields=fields) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) self.assertRaises(errors.InvalidConfiguration, setattr, opman, "exclude_fields", exclude_fields) self.assertRaises(errors.InvalidConfiguration, setattr, opman, "exclude_fields", None) self.assertRaises(errors.InvalidConfiguration, OplogThread, self.primary_conn, (DocManager(), ), LockingDict(), self.dest_mapping_stru, fields=fields, exclude_fields=exclude_fields)
def setUp(self): self.repl_set = ReplicaSetSingle().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local['oplog.rs'] self.dest_mapping_stru = DestMapping([], [], {}) self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, )
def setUp(self): self.repl_set = ReplicaSetSingle().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local["oplog.rs"] self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig( namespace_options={"test.*": True, "gridfs.*": {"gridfs": True}} ), )
def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog, with gridfs collections 3. non-empty oplog, specified a namespace-set, none of the oplog entries are for collections in the namespace-set """ # Test with empty oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] last_ts = self.opman.dump_collection() self.assertEqual(last_ts, None) # Test with non-empty oplog with gridfs collections self.opman.oplog = self.primary_conn["local"]["oplog.rs"] # Insert 10 gridfs files for i in range(10): fs = gridfs.GridFS(self.primary_conn["gridfs"], collection="test" + str(i)) fs.put(b"hello world") # Insert 1000 documents for i in range(1000): self.primary_conn["test"]["test"].insert_one({ "i": i + 500 }) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 1010) # Case 3 # 1MB oplog so that we can rollover quickly repl_set = ReplicaSetSingle(oplogSize=1).start() conn = repl_set.client() opman = OplogThread( primary_client=conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig(namespace_set=["test.test"]), ) # Insert a document into an included collection conn["test"]["test"].insert_one({"test": 1}) # Cause the oplog to rollover on a non-included collection while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}): conn["test"]["ignored"].insert_many( [{"test": "1" * 1024} for _ in range(1024)]) last_ts = opman.get_last_oplog_timestamp() self.assertEqual(last_ts, opman.dump_collection()) self.assertEqual(len(opman.doc_managers[0]._search()), 1) conn.close() repl_set.stop()
def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog 3. non-empty oplog, specified a namespace-set, none of the oplog entries are for collections in the namespace-set """ # Test with empty oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] last_ts = self.opman.dump_collection() self.assertEqual(last_ts, None) # Test with non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert_one({ "i": i + 500 }) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000) # Case 3 # 1MB oplog so that we can rollover quickly repl_set = ReplicaSetSingle(oplogSize=1).start() conn = repl_set.client() dest_mapping_stru = DestMapping(["test.test"], [], {}) opman = OplogThread( primary_client=conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=dest_mapping_stru, ns_set=set(["test.test"]) ) # Insert a document into a ns_set collection conn["test"]["test"].insert_one({"test": 1}) # Cause the oplog to rollover on a non-ns_set collection while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}): conn["test"]["ignored"].insert_many( [{"test": "1" * 1024} for _ in range(1024)]) last_ts = opman.get_last_oplog_timestamp() self.assertEqual(last_ts, opman.dump_collection()) self.assertEqual(len(opman.doc_managers[0]._search()), 1) conn.close() repl_set.stop()
def setUp(self): _, _, self.primary_p = start_replica_set('test-oplog-manager') self.primary_conn = pymongo.MongoClient(mongo_host, self.primary_p) self.oplog_coll = self.primary_conn.local['oplog.rs'] self.opman = OplogThread(primary_conn=self.primary_conn, main_address='%s:%d' % (mongo_host, self.primary_p), oplog_coll=self.oplog_coll, is_sharded=False, doc_manager=DocManager(), oplog_progress_dict=LockingDict(), namespace_set=None, auth_key=None, auth_username=None, repl_set='test-oplog-manager')
def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None): if include_ns is None: include_ns = [] if exclude_ns is None: exclude_ns = [] if dest_mapping is None: dest_mapping = {} # include_ns must not exist together with exclude_ns # dest_mapping must exist together with include_ns # those checks have been tested in test_config.py so we skip that here. self.dest_mapping_stru = DestMapping(include_ns, exclude_ns, dest_mapping) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, ns_set=include_ns, ex_ns_set=exclude_ns)
def get_new_oplog(cls): """ Set up connection with mongo. Returns oplog, the connection and oplog collection This function does not clear the oplog """ primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"])) if primary_conn['admin'].command("isMaster")['ismaster'] is False: primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"])) mongos = "%s:%s" % (HOSTNAME, PORTS_ONE["MONGOS"]) oplog_coll = primary_conn['local']['oplog.rs'] namespace_set = ['test.test', 'alpha.foo'] doc_manager = DocManager() oplog = OplogThread(primary_conn, mongos, oplog_coll, True, doc_manager, LockingDict(), namespace_set, cls.AUTH_KEY, AUTH_USERNAME) return (oplog, primary_conn, oplog_coll, oplog.main_connection)
def setUp(self): # Create a new oplog progress file try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() # Start a replica set _, self.secondary_p, self.primary_p = start_replica_set('rollbacks') # Connection to the replica set as a whole self.main_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p), replicaSet='rollbacks') # Connection to the primary specifically self.primary_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p)) # Connection to the secondary specifically self.secondary_conn = MongoClient( '%s:%d' % (mongo_host, self.secondary_p), read_preference=ReadPreference.SECONDARY_PREFERRED ) # Wipe any test data self.main_conn["test"]["mc"].drop() # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread( primary_conn=self.main_conn, main_address='%s:%d' % (mongo_host, self.primary_p), oplog_coll=self.main_conn["local"]["oplog.rs"], is_sharded=False, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mc"], auth_key=None, auth_username=None, repl_set="rollbacks" )
def setUp(self): # Create a new oplog progress file try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() # Start a replica set start_cluster(sharded=False, use_mongos=False) # Connection to the replica set as a whole self.main_conn = Connection("localhost:%s" % PORTS_ONE["PRIMARY"], replicaSet="demo-repl") # Connection to the primary specifically self.primary_conn = Connection("localhost:%s" % PORTS_ONE["PRIMARY"]) # Connection to the secondary specifically self.secondary_conn = Connection( "localhost:%s" % PORTS_ONE["SECONDARY"], read_preference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.main_conn["test"]["mc"].drop() # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread( primary_conn=self.main_conn, main_address="localhost:%s" % PORTS_ONE["PRIMARY"], oplog_coll=self.main_conn["local"]["oplog.rs"], is_sharded=False, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mc"], auth_key=None, auth_username=None, repl_set="demo-repl")
def setUp(self): self.dest_mapping_stru = DestMapping([], [], {}) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru)
class Connector(threading.Thread): """Checks the cluster for shards to tail. """ def __init__(self, address, oplog_checkpoint, target_url, ns_set, u_key, auth_key, doc_manager=None, auth_username=None): if doc_manager is not None: doc_manager = imp.load_source('DocManager', doc_manager) else: from mongo_connector.doc_manager import DocManager time.sleep(1) super(Connector, self).__init__() #can_run is set to false when we join the thread self.can_run = True #The name of the file that stores the progress of the OplogThreads self.oplog_checkpoint = oplog_checkpoint #main address - either mongos for sharded setups or a primary otherwise self.address = address #The URL of the target system self.target_url = target_url #The set of relevant namespaces to consider self.ns_set = ns_set #The key that is a unique document identifier for the target system. #Not necessarily the mongo unique key. self.u_key = u_key #Password for authentication self.auth_key = auth_key #Username for authentication self.auth_username = auth_username #The set of OplogThreads created self.shard_set = {} #Dict of OplogThread/timestamp pairs to record progress self.oplog_progress = LockingDict() try: if target_url is None: if doc_manager is None: # imported using from... import self.doc_manager = DocManager(unique_key=u_key) else: # imported using load source self.doc_manager = doc_manager.DocManager(unique_key=u_key) else: if doc_manager is None: self.doc_manager = DocManager(self.target_url, unique_key=u_key) else: self.doc_manager = doc_manager.DocManager(self.target_url, unique_key=u_key) except errors.ConnectionFailed: err_msg = "MongoConnector: Could not connect to target system" logging.critical(err_msg) self.can_run = False return if self.oplog_checkpoint is not None: if not os.path.exists(self.oplog_checkpoint): info_str = ("MongoConnector: Can't find %s, " "attempting to create an empty progress log" % self.oplog_checkpoint) logging.info(info_str) try: # Create oplog progress file open(self.oplog_checkpoint, "w").close() except IOError as e: logging.critical("MongoConnector: Could not " "create a progress log: %s" % str(e)) sys.exit(1) else: if (not os.access(self.oplog_checkpoint, os.W_OK) and not os.access(self.oplog_checkpoint, os.R_OK )): logging.critical("Invalid permissions on %s! Exiting" % (self.oplog_checkpoint)) sys.exit(1) def join(self): """ Joins thread, stops it from running """ self.can_run = False self.doc_manager.stop() threading.Thread.join(self) def write_oplog_progress(self): """ Writes oplog progress to file provided by user """ if self.oplog_checkpoint is None: return None # write to temp file backup_file = self.oplog_checkpoint + '.backup' os.rename(self.oplog_checkpoint, backup_file) # for each of the threads write to file with open(self.oplog_checkpoint, 'w') as dest: with self.oplog_progress as oplog_prog: oplog_dict = oplog_prog.get_dict() for oplog, time_stamp in oplog_dict.items(): oplog_str = str(oplog) timestamp = util.bson_ts_to_long(time_stamp) json_str = json.dumps([oplog_str, timestamp]) try: dest.write(json_str) except IOError: # Basically wipe the file, copy from backup dest.truncate() with open(backup_file, 'r') as backup: shutil.copyfile(backup, dest) break os.remove(self.oplog_checkpoint + '.backup') def read_oplog_progress(self): """Reads oplog progress from file provided by user. This method is only called once before any threads are spanwed. """ if self.oplog_checkpoint is None: return None # Check for empty file try: if os.stat(self.oplog_checkpoint).st_size == 0: logging.info("MongoConnector: Empty oplog progress file.") return None except OSError: return None source = open(self.oplog_checkpoint, 'r') try: data = json.load(source) except ValueError: # empty file reason = "It may be empty or corrupt." logging.info("MongoConnector: Can't read oplog progress file. %s" % (reason)) source.close() return None source.close() count = 0 oplog_dict = self.oplog_progress.get_dict() for count in range(0, len(data), 2): oplog_str = data[count] time_stamp = data[count + 1] oplog_dict[oplog_str] = util.long_to_bson_ts(time_stamp) #stored as bson_ts def run(self): """Discovers the mongo cluster and creates a thread for each primary. """ main_conn = Connection(self.address) if self.auth_key is not None: main_conn['admin'].authenticate(self.auth_username, self.auth_key) self.read_oplog_progress() conn_type = None try: main_conn.admin.command("isdbgrid") except pymongo.errors.OperationFailure: conn_type = "REPLSET" if conn_type == "REPLSET": #non sharded configuration oplog_coll = main_conn['local']['oplog.rs'] prim_admin = main_conn.admin repl_set = prim_admin.command("replSetGetStatus")['set'] oplog = oplog_manager.OplogThread(main_conn, (main_conn.host + ":" + str(main_conn.port)), oplog_coll, False, self.doc_manager, self.oplog_progress, self.ns_set, self.auth_key, self.auth_username, repl_set=repl_set) self.shard_set[0] = oplog logging.info('MongoConnector: Starting connection thread %s' % main_conn) oplog.start() while self.can_run: if not self.shard_set[0].running: logging.error("MongoConnector: OplogThread" " %s unexpectedly stopped! Shutting down" % (str(self.shard_set[0]))) self.oplog_thread_join() self.doc_manager.stop() return self.write_oplog_progress() time.sleep(1) else: # sharded cluster while self.can_run is True: for shard_doc in main_conn['config']['shards'].find(): shard_id = shard_doc['_id'] if shard_id in self.shard_set: if not self.shard_set[shard_id].running: logging.error("MongoConnector: OplogThread" " %s unexpectedly stopped! Shutting down" % (str(self.shard_set[shard_id]))) self.oplog_thread_join() self.doc_manager.stop() return self.write_oplog_progress() time.sleep(1) continue try: repl_set, hosts = shard_doc['host'].split('/') except ValueError: cause = "The system only uses replica sets!" logging.error("MongoConnector: %s", cause) self.oplog_thread_join() self.doc_manager.stop() return shard_conn = Connection(hosts, replicaset=repl_set) oplog_coll = shard_conn['local']['oplog.rs'] oplog = oplog_manager.OplogThread(shard_conn, self.address, oplog_coll, True, self.doc_manager, self.oplog_progress, self.ns_set, self.auth_key, self.auth_username) self.shard_set[shard_id] = oplog msg = "Starting connection thread" logging.info("MongoConnector: %s %s" % (msg, shard_conn)) oplog.start() self.oplog_thread_join() self.write_oplog_progress() def oplog_thread_join(self): """Stops all the OplogThreads """ logging.info('MongoConnector: Stopping all OplogThreads') for thread in self.shard_set.values(): thread.join()
def test_exclude_fields_constructor(self): # Test with the "_id" field in exclude_fields exclude_fields = ["_id", "title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields) exclude_fields.remove('_id') self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered) # Test without "_id" field included in exclude_fields exclude_fields = ["title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields) self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = extra_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual({'extra1': 1, 'extra2': 1}, filtered) # Test with only "_id" field in exclude_fields exclude_fields = ["_id"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields) self._check_fields(opman, [], [], None) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) # Test with nothing set for exclude_fields opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=None) self._check_fields(opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
def test_fields_constructor(self): # Test with "_id" field in constructor fields = ["_id", "title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test without "_id" field in constructor fields = ["title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields) fields.append('_id') self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test with only "_id" field fields = ["_id"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual({'_id': 1}, filtered) # Test with no fields set opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru) self._check_fields(opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
def setUp(self): self.repl_set = ReplicaSet().start() self.primary_conn = self.repl_set.client() self.oplog_progress = LockingDict() self.opman = None
def __init__(self, mongo_address, doc_managers=None, **kwargs): super(Connector, self).__init__() # can_run is set to false when we join the thread self.can_run = True # The signal that caused the connector to stop or None self.signal = None # main address - either mongos for sharded setups or a primary otherwise self.address = mongo_address # connection to the main address self.main_conn = None # List of DocManager instances if doc_managers: self.doc_managers = doc_managers else: LOG.warning('No doc managers specified, using simulator.') self.doc_managers = (simulator.DocManager(),) # Password for authentication self.auth_key = kwargs.pop('auth_key', None) # Username for authentication self.auth_username = kwargs.pop('auth_username', None) # The name of the file that stores the progress of the OplogThreads self.oplog_checkpoint = kwargs.pop('oplog_checkpoint', 'oplog.timestamp') # The set of OplogThreads created self.shard_set = {} # Dict of OplogThread/timestamp pairs to record progress self.oplog_progress = LockingDict() # Timezone awareness self.tz_aware = kwargs.get('tz_aware', False) # SSL keyword arguments to MongoClient. ssl_certfile = kwargs.pop('ssl_certfile', None) ssl_ca_certs = kwargs.pop('ssl_ca_certs', None) ssl_keyfile = kwargs.pop('ssl_keyfile', None) ssl_cert_reqs = kwargs.pop('ssl_cert_reqs', None) self.ssl_kwargs = {} if ssl_certfile: self.ssl_kwargs['ssl_certfile'] = ssl_certfile if ssl_ca_certs: self.ssl_kwargs['ssl_ca_certs'] = ssl_ca_certs if ssl_keyfile: self.ssl_kwargs['ssl_keyfile'] = ssl_keyfile if ssl_cert_reqs: self.ssl_kwargs['ssl_cert_reqs'] = ssl_cert_reqs # Save the rest of kwargs. self.kwargs = kwargs # Replace the origin dest_mapping self.dest_mapping = DestMapping(kwargs.get('ns_set', []), kwargs.get('ex_ns_set', []), kwargs.get('dest_mapping', {})) # Initialize and set the command helper command_helper = CommandHelper(self.dest_mapping) for dm in self.doc_managers: dm.command_helper = command_helper if self.oplog_checkpoint is not None: if not os.path.exists(self.oplog_checkpoint): info_str = ("MongoConnector: Can't find %s, " "attempting to create an empty progress log" % self.oplog_checkpoint) LOG.warning(info_str) try: # Create oplog progress file open(self.oplog_checkpoint, "w").close() except IOError as e: LOG.critical("MongoConnector: Could not " "create a progress log: %s" % str(e)) sys.exit(2) else: if (not os.access(self.oplog_checkpoint, os.W_OK) and not os.access(self.oplog_checkpoint, os.R_OK)): LOG.critical("Invalid permissions on %s! Exiting" % (self.oplog_checkpoint)) sys.exit(2)
def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] self.opman1.collection_dump = False self.opman2.collection_dump = False self.assertTrue( all(doc['op'] == 'n' for doc in self.opman1.init_cursor()[0])) self.assertEqual(self.opman1.checkpoint, None) self.assertTrue( all(doc['op'] == 'n' for doc in self.opman2.init_cursor()[0])) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman1.collection_dump = self.opman2.collection_dump = True cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman1.checkpoint, None) cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] oplog_startup_ts = self.opman2.get_last_oplog_timestamp() collection = self.mongos_conn["test"]["mcsharded"] collection.insert({"i": 1}) collection.remove({"i": 1}) time.sleep(3) last_ts1 = self.opman1.get_last_oplog_timestamp() cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) # init_cursor should point to startup message in shard2 oplog cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman2.checkpoint, oplog_startup_ts) # No last checkpoint, no collection dump, stuff in oplog progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.collection_dump = self.opman2.collection_dump = False collection.insert({"i": 1200}) last_ts2 = self.opman2.get_last_oplog_timestamp() self.opman1.init_cursor() self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) cursor, cursor_len = self.opman2.init_cursor() for i in range(cursor_len - 1): next(cursor) self.assertEqual(next(cursor)["o"]["i"], 1200) self.assertEqual(self.opman2.checkpoint, last_ts2) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2) # Last checkpoint exists progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress for i in range(1000): collection.insert({"i": i + 500}) entry1 = list(self.shard1_conn["local"]["oplog.rs"].find(skip=200, limit=2)) entry2 = list(self.shard2_conn["local"]["oplog.rs"].find(skip=200, limit=2)) progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"] progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"] self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor1, cursor_len1 = self.opman1.init_cursor() cursor2, cursor_len2 = self.opman2.init_cursor() self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"]) self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"]) self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"]) self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"]) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], entry1[0]["ts"]) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], entry2[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[str(self.opman1.oplog)] = bson.Timestamp(1, 0) progress.get_dict()[str(self.opman2.oplog)] = bson.Timestamp(1, 0) self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman1.checkpoint) cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman2.checkpoint)
def __init__(self, address, oplog_checkpoint, target_url, ns_set, u_key, auth_key, doc_manager=None, auth_username=None, collection_dump=True, batch_size=constants.DEFAULT_BATCH_SIZE, fields=None, dest_mapping={}, auto_commit_interval=constants.DEFAULT_COMMIT_INTERVAL, continue_on_error=False): if target_url and not doc_manager: raise errors.ConnectorError("Cannot create a Connector with a " "target URL but no doc manager!") def is_string(s): try: return isinstance(s, basestring) except NameError: return isinstance(s, str) def load_doc_manager(path): name, _ = os.path.splitext(os.path.basename(path)) try: import importlib.machinery loader = importlib.machinery.SourceFileLoader(name, path) module = loader.load_module(name) except ImportError: module = imp.load_source(name, path) return module doc_manager_modules = None if doc_manager is not None: # backwards compatilibity: doc_manager may be a string if is_string(doc_manager): doc_manager_modules = [load_doc_manager(doc_manager)] # doc_manager is a list else: doc_manager_modules = [] for dm in doc_manager: doc_manager_modules.append(load_doc_manager(dm)) super(Connector, self).__init__() #can_run is set to false when we join the thread self.can_run = True #The name of the file that stores the progress of the OplogThreads self.oplog_checkpoint = oplog_checkpoint #main address - either mongos for sharded setups or a primary otherwise self.address = address #The URLs of each target system, respectively if is_string(target_url): self.target_urls = [target_url] elif target_url: self.target_urls = list(target_url) else: self.target_urls = None #The set of relevant namespaces to consider self.ns_set = ns_set #The dict of source namespace to destination namespace self.dest_mapping = dest_mapping #Whether the collection dump gracefully handles exceptions self.continue_on_error = continue_on_error #The key that is a unique document identifier for the target system. #Not necessarily the mongo unique key. self.u_key = u_key #Password for authentication self.auth_key = auth_key #Username for authentication self.auth_username = auth_username #The set of OplogThreads created self.shard_set = {} #Boolean chooses whether to dump the entire collection if no timestamp # is present in the config file self.collection_dump = collection_dump #Num entries to process before updating config file with current pos self.batch_size = batch_size #Dict of OplogThread/timestamp pairs to record progress self.oplog_progress = LockingDict() # List of fields to export self.fields = fields try: docman_kwargs = {"unique_key": u_key, "namespace_set": ns_set, "auto_commit_interval": auto_commit_interval} # No doc managers specified, using simulator if doc_manager is None: self.doc_managers = [simulator.DocManager(**docman_kwargs)] else: self.doc_managers = [] for i, d in enumerate(doc_manager_modules): # self.target_urls may be shorter than # self.doc_managers, or left as None if self.target_urls and i < len(self.target_urls): target_url = self.target_urls[i] else: target_url = None if target_url: self.doc_managers.append( d.DocManager(self.target_urls[i], **docman_kwargs)) else: self.doc_managers.append( d.DocManager(**docman_kwargs)) # If more target URLs were given than doc managers, may need # to create additional doc managers for url in self.target_urls[i + 1:]: self.doc_managers.append( doc_manager_modules[-1].DocManager(url, **docman_kwargs)) except errors.ConnectionFailed: err_msg = "MongoConnector: Could not connect to target system" logging.critical(err_msg) self.can_run = False return if self.oplog_checkpoint is not None: if not os.path.exists(self.oplog_checkpoint): info_str = ("MongoConnector: Can't find %s, " "attempting to create an empty progress log" % self.oplog_checkpoint) logging.info(info_str) try: # Create oplog progress file open(self.oplog_checkpoint, "w").close() except IOError as e: logging.critical("MongoConnector: Could not " "create a progress log: %s" % str(e)) sys.exit(2) else: if (not os.access(self.oplog_checkpoint, os.W_OK) and not os.access(self.oplog_checkpoint, os.R_OK)): logging.critical("Invalid permissions on %s! Exiting" % (self.oplog_checkpoint)) sys.exit(2)
def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. last checkpoint exists """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] self.opman1.collection_dump = False self.opman2.collection_dump = False self.assertEqual(self.opman1.init_cursor(), None) self.assertEqual(self.opman1.checkpoint, None) self.assertEqual(self.opman2.init_cursor(), None) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman1.collection_dump = True self.opman2.collection_dump = True self.assertEqual(self.opman1.init_cursor(), None) self.assertEqual(self.opman1.checkpoint, None) self.assertEqual(self.opman2.init_cursor(), None) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] oplog_startup_ts = self.opman2.get_last_oplog_timestamp() collection = self.mongos_conn["test"]["mcsharded"] collection.insert({"i": 1}) collection.remove({"i": 1}) time.sleep(3) last_ts1 = self.opman1.get_last_oplog_timestamp() self.assertEqual(next(self.opman1.init_cursor())["ts"], last_ts1) self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) # init_cursor should point to startup message in shard2 oplog cursor = self.opman2.init_cursor() self.assertEqual(next(cursor)["ts"], oplog_startup_ts) self.assertEqual(self.opman2.checkpoint, oplog_startup_ts) # No last checkpoint, non-empty collections, stuff in oplog progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress collection.insert({"i": 1200}) last_ts2 = self.opman2.get_last_oplog_timestamp() self.assertEqual(next(self.opman1.init_cursor())["ts"], last_ts1) self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) self.assertEqual(next(self.opman2.init_cursor())["ts"], last_ts2) self.assertEqual(self.opman2.checkpoint, last_ts2) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2) # Last checkpoint exists progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress for i in range(1000): collection.insert({"i": i + 500}) entry1 = list( self.shard1_conn["local"]["oplog.rs"].find(skip=200, limit=2)) entry2 = list( self.shard2_conn["local"]["oplog.rs"].find(skip=200, limit=2)) progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"] progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"] self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor1 = self.opman1.init_cursor() cursor2 = self.opman2.init_cursor() self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"]) self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"]) self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"]) self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"]) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], entry1[0]["ts"]) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], entry2[0]["ts"])
def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] self.opman1.collection_dump = False self.opman2.collection_dump = False self.assertTrue(all(doc['op'] == 'n' for doc in self.opman1.init_cursor()[0])) self.assertEqual(self.opman1.checkpoint, None) self.assertTrue(all(doc['op'] == 'n' for doc in self.opman2.init_cursor()[0])) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman1.collection_dump = self.opman2.collection_dump = True cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman1.checkpoint, None) cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] oplog_startup_ts = self.opman2.get_last_oplog_timestamp() collection = self.mongos_conn["test"]["mcsharded"] collection.insert_one({"i": 1}) collection.delete_one({"i": 1}) time.sleep(3) last_ts1 = self.opman1.get_last_oplog_timestamp() cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) # init_cursor should point to startup message in shard2 oplog cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman2.checkpoint, oplog_startup_ts) # No last checkpoint, no collection dump, stuff in oplog progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.collection_dump = self.opman2.collection_dump = False collection.insert_one({"i": 1200}) last_ts2 = self.opman2.get_last_oplog_timestamp() self.opman1.init_cursor() self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) cursor, cursor_len = self.opman2.init_cursor() for i in range(cursor_len - 1): next(cursor) self.assertEqual(next(cursor)["o"]["i"], 1200) self.assertEqual(self.opman2.checkpoint, last_ts2) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2) # Last checkpoint exists progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress for i in range(1000): collection.insert_one({"i": i + 500}) entry1 = list( self.shard1_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) entry2 = list( self.shard2_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"] progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"] self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor1, cursor_len1 = self.opman1.init_cursor() cursor2, cursor_len2 = self.opman2.init_cursor() self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"]) self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"]) self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"]) self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"]) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], entry1[0]["ts"]) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], entry2[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[str(self.opman1.oplog)] = bson.Timestamp(1, 0) progress.get_dict()[str(self.opman2.oplog)] = bson.Timestamp(1, 0) self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman1.checkpoint) cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman2.checkpoint)
def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.opman.collection_dump = False self.assertTrue(all(doc["op"] == "n" for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] collection = self.primary_conn["test"]["test"] collection.insert({"i": 1}) collection.remove({"i": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, last_ts) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts) # No last checkpoint, no collection dump, something in oplog self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert({"i": 2}) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() for i in range(cursor_len - 1): next(cursor) self.assertEqual(next(cursor)["o"]["i"], 2) self.assertEqual(self.opman.checkpoint, last_ts) # Last checkpoint exists progress = LockingDict() self.opman.oplog_progress = progress for i in range(1000): collection.insert({"i": i + 500}) entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=2)) progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"] self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], entry[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[str(self.opman.oplog)] = bson.Timestamp(1, 0) self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman.checkpoint)
def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["includedb1"]["emptycollection"] self.opman.collection_dump = False self.assertTrue( all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertTrue(cursor_empty) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn['local']['oplog.rs'] collection = self.primary_conn["includedb1"]["includecol1"] collection.insert_one({"idb1col1": 1}) collection.delete_one({"idb1col1": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_empty = self.opman.init_cursor() self.assertFalse(cursor_empty) self.assertEqual(self.opman.checkpoint, last_ts) self.assertEqual(self.opman.read_last_checkpoint(), last_ts) # No last checkpoint, no collection dump, something in oplog # If collection dump is false the checkpoint should not be set self.opman.checkpoint = None self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert_one({"idb1col1": 2}) cursor, cursor_empty = self.opman.init_cursor() for doc in cursor: last_doc = doc self.assertEqual(last_doc['o']['idb1col1'], 2) self.assertIsNone(self.opman.checkpoint) # Last checkpoint exists collection.insert_many([{"idb1col1": i + 500} for i in range(1000)]) entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) self.opman.update_checkpoint(entry[0]["ts"]) cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) self.assertEqual(self.opman.read_last_checkpoint(), entry[0]["ts"]) # Last checkpoint is behind self.opman.update_checkpoint(bson.Timestamp(1, 0)) cursor, cursor_empty = self.opman.init_cursor() self.assertTrue(cursor_empty) self.assertEqual(cursor, None) self.assertEqual(self.opman.checkpoint, bson.Timestamp(1, 0))
def __init__(self, address, oplog_checkpoint, target_url, ns_set, u_key, auth_key, doc_manager=None, auth_username=None): if doc_manager is not None: doc_manager = imp.load_source('DocManager', doc_manager) else: from mongo_connector.doc_manager import DocManager time.sleep(1) super(Connector, self).__init__() #can_run is set to false when we join the thread self.can_run = True #The name of the file that stores the progress of the OplogThreads self.oplog_checkpoint = oplog_checkpoint #main address - either mongos for sharded setups or a primary otherwise self.address = address #The URL of the target system self.target_url = target_url #The set of relevant namespaces to consider self.ns_set = ns_set #The key that is a unique document identifier for the target system. #Not necessarily the mongo unique key. self.u_key = u_key #Password for authentication self.auth_key = auth_key #Username for authentication self.auth_username = auth_username #The set of OplogThreads created self.shard_set = {} #Dict of OplogThread/timestamp pairs to record progress self.oplog_progress = LockingDict() try: if target_url is None: if doc_manager is None: # imported using from... import self.doc_manager = DocManager(unique_key=u_key) else: # imported using load source self.doc_manager = doc_manager.DocManager(unique_key=u_key) else: if doc_manager is None: self.doc_manager = DocManager(self.target_url, unique_key=u_key) else: self.doc_manager = doc_manager.DocManager(self.target_url, unique_key=u_key) except errors.ConnectionFailed: err_msg = "MongoConnector: Could not connect to target system" logging.critical(err_msg) self.can_run = False return if self.oplog_checkpoint is not None: if not os.path.exists(self.oplog_checkpoint): info_str = ("MongoConnector: Can't find %s, " "attempting to create an empty progress log" % self.oplog_checkpoint) logging.info(info_str) try: # Create oplog progress file open(self.oplog_checkpoint, "w").close() except IOError as e: logging.critical("MongoConnector: Could not " "create a progress log: %s" % str(e)) sys.exit(1) else: if (not os.access(self.oplog_checkpoint, os.W_OK) and not os.access(self.oplog_checkpoint, os.R_OK )): logging.critical("Invalid permissions on %s! Exiting" % (self.oplog_checkpoint)) sys.exit(1)
def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.opman.collection_dump = False self.assertTrue( all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn['local']['oplog.rs'] collection = self.primary_conn["test"]["test"] collection.insert_one({"i": 1}) collection.delete_one({"i": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, last_ts) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts) # No last checkpoint, no collection dump, something in oplog self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert_one({"i": 2}) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() for i in range(cursor_len - 1): next(cursor) self.assertEqual(next(cursor)['o']['i'], 2) self.assertEqual(self.opman.checkpoint, last_ts) # Last checkpoint exists progress = LockingDict() self.opman.oplog_progress = progress for i in range(1000): collection.insert_one({"i": i + 500}) entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"] self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], entry[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[str(self.opman.oplog)] = bson.Timestamp(1, 0) self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman.checkpoint)
def setUp(self): """ Initialize the cluster: Clean out the databases used by the tests Make connections to mongos, mongods Create and shard test collections Create OplogThreads """ self.cluster = ShardedCluster().start() # Connection to mongos self.mongos_conn = self.cluster.client() # Connections to the shards self.shard1_conn = self.cluster.shards[0].client() self.shard2_conn = self.cluster.shards[1].client() self.shard1_secondary_conn = self.cluster.shards[0].secondary.client( readPreference=ReadPreference.SECONDARY_PREFERRED) self.shard2_secondary_conn = self.cluster.shards[1].secondary.client( readPreference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.mongos_conn["test"]["mcsharded"].drop() # Create and shard the collection test.mcsharded on the "i" field self.mongos_conn["test"]["mcsharded"].ensure_index("i") self.mongos_conn.admin.command("enableSharding", "test") self.mongos_conn.admin.command("shardCollection", "test.mcsharded", key={"i": 1}) # Pre-split the collection so that: # i < 1000 lives on shard1 # i >= 1000 lives on shard2 self.mongos_conn.admin.command( bson.SON([("split", "test.mcsharded"), ("middle", { "i": 1000 })])) # disable the balancer self.mongos_conn.config.settings.update({"_id": "balancer"}, {"$set": { "stopped": True }}, upsert=True) # Move chunks to their proper places try: self.mongos_conn["admin"].command("moveChunk", "test.mcsharded", find={"i": 1}, to='demo-set-0') except pymongo.errors.OperationFailure: pass try: self.mongos_conn["admin"].command("moveChunk", "test.mcsharded", find={"i": 1000}, to='demo-set-1') except pymongo.errors.OperationFailure: pass # Make sure chunks are distributed correctly self.mongos_conn["test"]["mcsharded"].insert({"i": 1}) self.mongos_conn["test"]["mcsharded"].insert({"i": 1000}) def chunks_moved(): doc1 = self.shard1_conn.test.mcsharded.find_one() doc2 = self.shard2_conn.test.mcsharded.find_one() if None in (doc1, doc2): return False return doc1['i'] == 1 and doc2['i'] == 1000 assert_soon(chunks_moved, max_tries=120, message='chunks not moved? doc1=%r, doc2=%r' % (self.shard1_conn.test.mcsharded.find_one(), self.shard2_conn.test.mcsharded.find_one())) self.mongos_conn.test.mcsharded.remove() # create a new oplog progress file try: os.unlink("oplog.timestamp") except OSError: pass open("oplog.timestamp", "w").close() # Oplog threads (oplog manager) for each shard doc_manager = DocManager() oplog_progress = LockingDict() self.opman1 = OplogThread( primary_client=self.shard1_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], mongos_client=self.mongos_conn) self.opman2 = OplogThread( primary_client=self.shard2_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], mongos_client=self.mongos_conn)
def setUp(self): """ Initialize the cluster: Clean out the databases used by the tests Make connections to mongos, mongods Create and shard test collections Create OplogThreads """ # Start the cluster with a mongos on port 27217 self.mongos_p = start_cluster() # Connection to mongos mongos_address = '%s:%d' % (mongo_host, self.mongos_p) self.mongos_conn = MongoClient(mongos_address) # Connections to the shards shard1_ports = get_shard(self.mongos_p, 0) shard2_ports = get_shard(self.mongos_p, 1) self.shard1_prim_p = shard1_ports['primary'] self.shard1_scnd_p = shard1_ports['secondaries'][0] self.shard2_prim_p = shard2_ports['primary'] self.shard2_scnd_p = shard2_ports['secondaries'][0] self.shard1_conn = MongoClient('%s:%d' % (mongo_host, self.shard1_prim_p), replicaSet="demo-set-0") self.shard2_conn = MongoClient('%s:%d' % (mongo_host, self.shard2_prim_p), replicaSet="demo-set-1") self.shard1_secondary_conn = MongoClient( '%s:%d' % (mongo_host, self.shard1_scnd_p), read_preference=ReadPreference.SECONDARY_PREFERRED ) self.shard2_secondary_conn = MongoClient( '%s:%d' % (mongo_host, self.shard2_scnd_p), read_preference=ReadPreference.SECONDARY_PREFERRED ) # Wipe any test data self.mongos_conn["test"]["mcsharded"].drop() # Create and shard the collection test.mcsharded on the "i" field self.mongos_conn["test"]["mcsharded"].ensure_index("i") self.mongos_conn.admin.command("enableSharding", "test") self.mongos_conn.admin.command("shardCollection", "test.mcsharded", key={"i": 1}) # Pre-split the collection so that: # i < 1000 lives on shard1 # i >= 1000 lives on shard2 self.mongos_conn.admin.command(bson.SON([ ("split", "test.mcsharded"), ("middle", {"i": 1000}) ])) # disable the balancer self.mongos_conn.config.settings.update( {"_id": "balancer"}, {"$set": {"stopped": True}}, upsert=True ) # Move chunks to their proper places try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1}, to="demo-set-0" ) except pymongo.errors.OperationFailure: pass # chunk may already be on the correct shard try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1000}, to="demo-set-1" ) except pymongo.errors.OperationFailure: pass # chunk may already be on the correct shard # Make sure chunks are distributed correctly self.mongos_conn["test"]["mcsharded"].insert({"i": 1}) self.mongos_conn["test"]["mcsharded"].insert({"i": 1000}) def chunks_moved(): doc1 = self.shard1_conn.test.mcsharded.find_one() doc2 = self.shard2_conn.test.mcsharded.find_one() if None in (doc1, doc2): return False return doc1['i'] == 1 and doc2['i'] == 1000 assert_soon(chunks_moved) self.mongos_conn.test.mcsharded.remove() # create a new oplog progress file try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() # Oplog threads (oplog manager) for each shard doc_manager = DocManager() oplog_progress = LockingDict() self.opman1 = OplogThread( primary_conn=self.shard1_conn, main_address='%s:%d' % (mongo_host, self.mongos_p), oplog_coll=self.shard1_conn["local"]["oplog.rs"], is_sharded=True, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], auth_key=None, auth_username=None ) self.opman2 = OplogThread( primary_conn=self.shard2_conn, main_address='%s:%d' % (mongo_host, self.mongos_p), oplog_coll=self.shard2_conn["local"]["oplog.rs"], is_sharded=True, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], auth_key=None, auth_username=None )
class Connector(threading.Thread): """Checks the cluster for shards to tail. """ def __init__(self, address, oplog_checkpoint, target_url, ns_set, u_key, auth_key, doc_manager=None, auth_username=None, collection_dump=True, batch_size=constants.DEFAULT_BATCH_SIZE, fields=None, dest_mapping={}, auto_commit_interval=constants.DEFAULT_COMMIT_INTERVAL, continue_on_error=False): if target_url and not doc_manager: raise errors.ConnectorError("Cannot create a Connector with a " "target URL but no doc manager!") def is_string(s): try: return isinstance(s, basestring) except NameError: return isinstance(s, str) def load_doc_manager(path): name, _ = os.path.splitext(os.path.basename(path)) try: import importlib.machinery loader = importlib.machinery.SourceFileLoader(name, path) module = loader.load_module(name) except ImportError: module = imp.load_source(name, path) return module doc_manager_modules = None if doc_manager is not None: # backwards compatilibity: doc_manager may be a string if is_string(doc_manager): doc_manager_modules = [load_doc_manager(doc_manager)] # doc_manager is a list else: doc_manager_modules = [] for dm in doc_manager: doc_manager_modules.append(load_doc_manager(dm)) super(Connector, self).__init__() #can_run is set to false when we join the thread self.can_run = True #The name of the file that stores the progress of the OplogThreads self.oplog_checkpoint = oplog_checkpoint #main address - either mongos for sharded setups or a primary otherwise self.address = address #The URLs of each target system, respectively if is_string(target_url): self.target_urls = [target_url] elif target_url: self.target_urls = list(target_url) else: self.target_urls = None #The set of relevant namespaces to consider self.ns_set = ns_set #The dict of source namespace to destination namespace self.dest_mapping = dest_mapping #Whether the collection dump gracefully handles exceptions self.continue_on_error = continue_on_error #The key that is a unique document identifier for the target system. #Not necessarily the mongo unique key. self.u_key = u_key #Password for authentication self.auth_key = auth_key #Username for authentication self.auth_username = auth_username #The set of OplogThreads created self.shard_set = {} #Boolean chooses whether to dump the entire collection if no timestamp # is present in the config file self.collection_dump = collection_dump #Num entries to process before updating config file with current pos self.batch_size = batch_size #Dict of OplogThread/timestamp pairs to record progress self.oplog_progress = LockingDict() # List of fields to export self.fields = fields try: docman_kwargs = {"unique_key": u_key, "namespace_set": ns_set, "auto_commit_interval": auto_commit_interval} # No doc managers specified, using simulator if doc_manager is None: self.doc_managers = [simulator.DocManager(**docman_kwargs)] else: self.doc_managers = [] for i, d in enumerate(doc_manager_modules): # self.target_urls may be shorter than # self.doc_managers, or left as None if self.target_urls and i < len(self.target_urls): target_url = self.target_urls[i] else: target_url = None if target_url: self.doc_managers.append( d.DocManager(self.target_urls[i], **docman_kwargs)) else: self.doc_managers.append( d.DocManager(**docman_kwargs)) # If more target URLs were given than doc managers, may need # to create additional doc managers for url in self.target_urls[i + 1:]: self.doc_managers.append( doc_manager_modules[-1].DocManager(url, **docman_kwargs)) except errors.ConnectionFailed: err_msg = "MongoConnector: Could not connect to target system" logging.critical(err_msg) self.can_run = False return if self.oplog_checkpoint is not None: if not os.path.exists(self.oplog_checkpoint): info_str = ("MongoConnector: Can't find %s, " "attempting to create an empty progress log" % self.oplog_checkpoint) logging.info(info_str) try: # Create oplog progress file open(self.oplog_checkpoint, "w").close() except IOError as e: logging.critical("MongoConnector: Could not " "create a progress log: %s" % str(e)) sys.exit(2) else: if (not os.access(self.oplog_checkpoint, os.W_OK) and not os.access(self.oplog_checkpoint, os.R_OK)): logging.critical("Invalid permissions on %s! Exiting" % (self.oplog_checkpoint)) sys.exit(2) def join(self): """ Joins thread, stops it from running """ self.can_run = False for dm in self.doc_managers: dm.stop() threading.Thread.join(self) def write_oplog_progress(self): """ Writes oplog progress to file provided by user """ if self.oplog_checkpoint is None: return None # write to temp file backup_file = self.oplog_checkpoint + '.backup' os.rename(self.oplog_checkpoint, backup_file) # for each of the threads write to file with open(self.oplog_checkpoint, 'w') as dest: with self.oplog_progress as oplog_prog: oplog_dict = oplog_prog.get_dict() for oplog, time_stamp in oplog_dict.items(): oplog_str = str(oplog) timestamp = util.bson_ts_to_long(time_stamp) json_str = json.dumps([oplog_str, timestamp]) try: dest.write(json_str) except IOError: # Basically wipe the file, copy from backup dest.truncate() with open(backup_file, 'r') as backup: shutil.copyfile(backup, dest) break os.remove(self.oplog_checkpoint + '.backup') def read_oplog_progress(self): """Reads oplog progress from file provided by user. This method is only called once before any threads are spanwed. """ if self.oplog_checkpoint is None: return None # Check for empty file try: if os.stat(self.oplog_checkpoint).st_size == 0: logging.info("MongoConnector: Empty oplog progress file.") return None except OSError: return None source = open(self.oplog_checkpoint, 'r') try: data = json.load(source) except ValueError: # empty file reason = "It may be empty or corrupt." logging.info("MongoConnector: Can't read oplog progress file. %s" % (reason)) source.close() return None source.close() count = 0 oplog_dict = self.oplog_progress.get_dict() for count in range(0, len(data), 2): oplog_str = data[count] time_stamp = data[count + 1] oplog_dict[oplog_str] = util.long_to_bson_ts(time_stamp) #stored as bson_ts def run(self): """Discovers the mongo cluster and creates a thread for each primary. """ main_conn = MongoClient(self.address) if self.auth_key is not None: main_conn['admin'].authenticate(self.auth_username, self.auth_key) self.read_oplog_progress() conn_type = None try: main_conn.admin.command("isdbgrid") except pymongo.errors.OperationFailure: conn_type = "REPLSET" if conn_type == "REPLSET": # Make sure we are connected to a replica set is_master = main_conn.admin.command("isMaster") if not "setName" in is_master: logging.error( 'No replica set at "%s"! A replica set is required ' 'to run mongo-connector. Shutting down...' % self.address ) return # Establish a connection to the replica set as a whole main_conn.disconnect() main_conn = MongoClient(self.address, replicaSet=is_master['setName']) if self.auth_key is not None: main_conn.admin.authenticate(self.auth_username, self.auth_key) #non sharded configuration oplog_coll = main_conn['local']['oplog.rs'] oplog = OplogThread( primary_conn=main_conn, main_address=self.address, oplog_coll=oplog_coll, is_sharded=False, doc_manager=self.doc_managers, oplog_progress_dict=self.oplog_progress, namespace_set=self.ns_set, auth_key=self.auth_key, auth_username=self.auth_username, repl_set=is_master['setName'], collection_dump=self.collection_dump, batch_size=self.batch_size, fields=self.fields, dest_mapping=self.dest_mapping, continue_on_error=self.continue_on_error ) self.shard_set[0] = oplog logging.info('MongoConnector: Starting connection thread %s' % main_conn) oplog.start() while self.can_run: if not self.shard_set[0].running: logging.error("MongoConnector: OplogThread" " %s unexpectedly stopped! Shutting down" % (str(self.shard_set[0]))) self.oplog_thread_join() for dm in self.doc_managers: dm.stop() return self.write_oplog_progress() time.sleep(1) else: # sharded cluster while self.can_run is True: for shard_doc in main_conn['config']['shards'].find(): shard_id = shard_doc['_id'] if shard_id in self.shard_set: if not self.shard_set[shard_id].running: logging.error("MongoConnector: OplogThread " "%s unexpectedly stopped! Shutting " "down" % (str(self.shard_set[shard_id]))) self.oplog_thread_join() for dm in self.doc_managers: dm.stop() return self.write_oplog_progress() time.sleep(1) continue try: repl_set, hosts = shard_doc['host'].split('/') except ValueError: cause = "The system only uses replica sets!" logging.error("MongoConnector: %s", cause) self.oplog_thread_join() for dm in self.doc_managers: dm.stop() return shard_conn = MongoClient(hosts, replicaSet=repl_set) oplog_coll = shard_conn['local']['oplog.rs'] oplog = OplogThread( primary_conn=shard_conn, main_address=self.address, oplog_coll=oplog_coll, is_sharded=True, doc_manager=self.doc_managers, oplog_progress_dict=self.oplog_progress, namespace_set=self.ns_set, auth_key=self.auth_key, auth_username=self.auth_username, collection_dump=self.collection_dump, batch_size=self.batch_size, fields=self.fields, dest_mapping=self.dest_mapping, continue_on_error=self.continue_on_error ) self.shard_set[shard_id] = oplog msg = "Starting connection thread" logging.info("MongoConnector: %s %s" % (msg, shard_conn)) oplog.start() self.oplog_thread_join() self.write_oplog_progress() def oplog_thread_join(self): """Stops all the OplogThreads """ logging.info('MongoConnector: Stopping all OplogThreads') for thread in self.shard_set.values(): thread.join()