def _await_mongod_sharding_initialization(self): if (self.enable_sharding) and (self.num_rs_nodes_per_shard is not None): deadline = time.time( ) + ShardedClusterFixture.AWAIT_SHARDING_INITIALIZATION_TIMEOUT_SECS timeout_occurred = lambda: deadline - time.time() <= 0.0 mongod_clients = [(mongod.mongo_client(), mongod.port) for shard in self.shards for mongod in shard.nodes] for client, port in mongod_clients: interface.authenticate(client, self.auth_options) while True: # The choice of namespace (local.fooCollection) does not affect the output. get_shard_version_result = client.admin.command( "getShardVersion", "local.fooCollection", check=False) if get_shard_version_result["ok"]: break if timeout_occurred(): raise self.fixturelib.ServerFailure( "mongod on port: {} failed waiting for getShardVersion success after {} seconds" .format( port, interface.Fixture.AWAIT_READY_TIMEOUT_SECS)) time.sleep(0.1)
def _await_stable_recovery_timestamp(self): """ Awaits stable recovery timestamps on all nodes in the replica set. Performs some writes and then waits for all nodes in this replica set to establish a stable recovery timestamp. The writes are necessary to prompt storage engines to quickly establish stable recovery timestamps. A stable recovery timestamp ensures recoverable rollback is possible, as well as startup recovery without re-initial syncing in the case of durable storage engines. By waiting for all nodes to report having a stable recovery timestamp, we ensure a degree of stability in our tests to run as expected. """ # Since this method is called at startup we expect the first node to be primary even when # self.all_nodes_electable is True. primary_client = self.nodes[0].mongo_client() interface.authenticate(primary_client, self.auth_options) # All nodes must be in primary/secondary state prior to this point. Perform a majority # write to ensure there is a committed operation on the set. The commit point will # propagate to all members and trigger a stable checkpoint on all persisted storage engines # nodes. admin = primary_client.get_database( "admin", write_concern=pymongo.write_concern.WriteConcern(w="majority")) admin.command("appendOplogNote", data={"await_stable_recovery_timestamp": 1}) for node in self.nodes: self.logger.info( "Waiting for node on port %d to have a stable recovery timestamp.", node.port) client = node.mongo_client( read_preference=pymongo.ReadPreference.SECONDARY) interface.authenticate(client, self.auth_options) client_admin = client["admin"] while True: status = client_admin.command("replSetGetStatus") # The `lastStableRecoveryTimestamp` field contains a stable timestamp guaranteed to # exist on storage engine recovery to a stable timestamp. last_stable_recovery_timestamp = status.get( "lastStableRecoveryTimestamp", None) # A missing `lastStableRecoveryTimestamp` field indicates that the storage # engine does not support "recover to a stable timestamp". if not last_stable_recovery_timestamp: break # A null `lastStableRecoveryTimestamp` indicates that the storage engine supports # "recover to a stable timestamp" but does not have a stable recovery timestamp yet. if last_stable_recovery_timestamp.time: self.logger.info( "Node on port %d now has a stable timestamp for recovery. Time: %s", node.port, last_stable_recovery_timestamp) break time.sleep(0.1) # Wait a little bit before trying again.
def await_ready(self): """Block until the fixture can be used for testing.""" # Wait for the config server if self.configsvr is not None: self.configsvr.await_ready() # Wait for each of the shards for shard in self.shards: shard.await_ready() # We call mongos.setup() in self.await_ready() function instead of self.setup() # because mongos routers have to connect to a running cluster. for mongos in self.mongos: # Start up the mongos. mongos.setup() # Wait for the mongos. mongos.await_ready() client = self.mongo_client() interface.authenticate(client, self.auth_options) # Turn off the balancer if it is not meant to be enabled. if not self.enable_balancer: self.stop_balancer() # Turn off autosplit if it is not meant to be enabled. if not self.enable_autosplit: wc = pymongo.WriteConcern(w="majority", wtimeout=30000) coll = client.config.get_collection("settings", write_concern=wc) coll.update_one({"_id": "autosplit"}, {"$set": { "enabled": False }}, upsert=True) # Inform mongos about each of the shards for shard in self.shards: self._add_shard(client, shard) # Ensure that all CSRS nodes are up to date. This is strictly needed for tests that use # multiple mongoses. In those cases, the first mongos initializes the contents of the config # database, but without waiting for those writes to replicate to all the config servers then # the secondary mongoses risk reading from a stale config server and seeing an empty config # database. self.configsvr.await_last_op_committed() # Enable sharding on each of the specified databases for db_name in self.enable_sharding: self.logger.info("Enabling sharding for '%s' database...", db_name) client.admin.command({"enablesharding": db_name}) # Wait for mongod's to be ready. self._await_mongod_sharding_initialization() # Ensure that the sessions collection gets auto-sharded by the config server if self.configsvr is not None: self.refresh_logical_session_cache(self.configsvr) for shard in self.shards: self.refresh_logical_session_cache(shard)
def _await_newly_added_removals(self): """ Wait for all 'newlyAdded' fields to be removed from the replica set config. Additionally, wait for that config to be committed, and for the in-memory and on-disk configs to match. """ self.logger.info("Waiting to remove all 'newlyAdded' fields") primary = self.get_primary() client = primary.mongo_client() interface.authenticate(client, self.auth_options) while self._should_await_newly_added_removals_longer(client): time.sleep(0.1) # Wait a little bit before trying again. self.logger.info("All 'newlyAdded' fields removed")
def _await_cmd_all_nodes(self, fn, msg, timeout_secs=30): """Run `fn` on all nodes until it returns a truthy value. Return the node for which makes `fn` become truthy. Two arguments are passed to fn: the client for a node and the MongoDFixture corresponding to that node. """ start = time.time() clients = {} while True: for node in self.nodes: now = time.time() if (now - start) >= timeout_secs: msg = "Timed out while {} for replica set '{}'.".format( msg, self.replset_name) self.logger.error(msg) raise self.fixturelib.ServerFailure(msg) try: if node.port not in clients: clients[node.port] = interface.authenticate( node.mongo_client(), self.auth_options) if fn(clients[node.port], node): return node except pymongo.errors.AutoReconnect: # AutoReconnect exceptions may occur if the primary stepped down since PyMongo # last contacted it. We'll just try contacting the node again in the next round # of isMaster requests. continue
def _create_tenant_migration_donor_and_recipient_roles(self, rs): """Create a role for tenant migration donor and recipient.""" primary = rs.get_primary() primary_client = interface.authenticate(primary.mongo_client(), self.auth_options) try: primary_client.admin.command({ "createRole": "tenantMigrationDonorRole", "privileges": [{ "resource": {"cluster": True}, "actions": ["runTenantMigration"] }, {"resource": {"db": "admin", "collection": "system.keys"}, "actions": ["find"]}], "roles": [] }) except: self.logger.exception( "Error creating tenant migration donor role on primary on port %d of replica" + " set '%s'.", primary.port, rs.replset_name) raise try: primary_client.admin.command({ "createRole": "tenantMigrationRecipientRole", "privileges": [{ "resource": {"cluster": True}, "actions": ["listDatabases", "useUUID", "advanceClusterTime"] }, {"resource": {"db": "", "collection": ""}, "actions": ["listCollections"]}, { "resource": {"anyResource": True}, "actions": ["dbStats", "collStats", "find", "listIndexes"] }], "roles": [] }) except: self.logger.exception( "Error creating tenant migration recipient role on primary on port %d of replica" + " set '%s'.", primary.port, rs.replset_name) raise
def await_last_op_committed(self): """Wait for the last majority committed op to be visible.""" primary_client = self.get_primary().mongo_client() interface.authenticate(primary_client, self.auth_options) primary_optime = get_last_optime(primary_client, self.fixturelib) up_to_date_nodes = set() def check_rcmaj_optime(client, node): """Return True if all nodes have caught up with the primary.""" res = client.admin.command({"replSetGetStatus": 1}) read_concern_majority_optime = res["optimes"]["readConcernMajorityOpTime"] if (read_concern_majority_optime["t"] == primary_optime["t"] and read_concern_majority_optime["ts"] >= primary_optime["ts"]): up_to_date_nodes.add(node.port) return len(up_to_date_nodes) == len(self.nodes) self._await_cmd_all_nodes(check_rcmaj_optime, "waiting for last committed optime")
def stepup_node(self, node, auth_options): """Try to step up the given node; return whether the attempt was successful.""" try: self.logger.info( "Attempting to step up the chosen secondary on port %d of replica set '%s.", node.port, self.replset_name) client = interface.authenticate(node.mongo_client(), auth_options) client.admin.command("replSetStepUp") return True except pymongo.errors.OperationFailure: # OperationFailure exceptions are expected when the election attempt fails due to # not receiving enough votes. This can happen when the 'chosen' secondary's opTime # is behind that of other secondaries. We handle this by attempting to elect a # different secondary. self.logger.info("Failed to step up the secondary on port %d of replica set '%s'.", node.port, self.replset_name) return False except pymongo.errors.AutoReconnect: # It is possible for a replSetStepUp to fail with AutoReconnect if that node goes # into Rollback (which causes it to close any open connections). return False
def await_ready(self): """Block until the fixture can be used for testing.""" # Wait for the config server self.configsvr.await_ready() # Wait for each of the shards for replica_set in self.replica_sets: replica_set.await_ready() # Add all the shards for replica_set in self.replica_sets: connection_string = replica_set.get_internal_connection_string() self.logger.info("Adding %s as a shard...", connection_string) config_primary = self.configsvr.get_primary() config_primary_client = interface.authenticate( config_primary.mongo_client(), self.auth_options) try: config_primary_client.admin.command( {"_configsvrAddShard": connection_string}, write_concern=pymongo.write_concern.WriteConcern( w="majority")) except: self.logger.exception( "Error calling addShard for replica set '%s'", connection_string) raise # Ensure that the sessions collection gets auto-sharded by the config server self.logger.info("Sending refresh logical session cache to configsvr") self.refresh_logical_session_cache(self.configsvr) for replica_set in self.replica_sets: self.logger.info("Sending refresh logical session cache to shards") self.refresh_logical_session_cache(replica_set)
def _create_client(self, node): return fixture_interface.authenticate(node.mongo_client(), self._auth_options)
def start_balancer(self, timeout_ms=60000): """Start the balancer.""" client = self.mongo_client() interface.authenticate(client, self.auth_options) client.admin.command({"balancerStart": 1}, maxTimeMS=timeout_ms) self.logger.info("Started the balancer")
def change_version_and_restart_node(self, primary, auth_options): """ Select Secondary for stepUp. Ensure its version is different to that of the old primary; change the version of the Secondary is needed. """ def get_chosen_node_from_replsetstatus(status_member_infos): max_optime = None chosen_index = None # We always select the secondary with highest optime to setup. for member_info in status_member_infos: if member_info.get("self", False): # Ignore self, which is the old primary and not eligible # to be re-elected in downgrade multiversion cluster. continue optime_dict = member_info["optime"] if max_optime is None: chosen_index = member_info["_id"] max_optime = optime_dict else: if compare_optime(optime_dict, max_optime) > 0: chosen_index = member_info["_id"] max_optime = optime_dict if chosen_index is None or max_optime is None: raise self.fixturelib.ServerFailure( "Failed to find a secondary eligible for " f"election; index: {chosen_index}, optime: {max_optime}" ) return self.nodes[chosen_index] primary_client = interface.authenticate(primary.mongo_client(), auth_options) retry_time_secs = self.AWAIT_REPL_TIMEOUT_MINS * 60 retry_start_time = time.time() while True: member_infos = primary_client.admin.command( {"replSetGetStatus": 1})["members"] chosen_node = get_chosen_node_from_replsetstatus(member_infos) if chosen_node.change_version_if_needed(primary): self.logger.info( "Waiting for the chosen secondary on port %d of replica set '%s' to exit.", chosen_node.port, self.replset_name) teardown_mode = interface.TeardownMode.TERMINATE chosen_node.mongod.stop(mode=teardown_mode) chosen_node.mongod.wait() self.logger.info( "Attempting to restart the chosen secondary on port %d of replica set '%s.", chosen_node.port, self.replset_name) chosen_node.setup() self.logger.info(interface.create_fixture_table(self)) chosen_node.await_ready() if self.stepup_node(chosen_node, auth_options): break if time.time() - retry_start_time > retry_time_secs: raise pymongo.errors.ServerFailure( "The old primary on port {} of replica set {} did not step up in" " {} seconds.".format(chosen_node.port, self.replset_name, retry_time_secs)) return chosen_node
def setup(self): # pylint: disable=too-many-branches,too-many-statements,too-many-locals """Set up the replica set.""" # Version-agnostic options for mongod/s can be set here. # Version-specific options should be set in get_version_specific_options_for_mongod() # to avoid options for old versions being applied to new Replicaset fixtures. for i in range(self.num_nodes): self.nodes[i].setup() if self.initial_sync_node: self.initial_sync_node.setup() self.initial_sync_node.await_ready() # Legacy multiversion line if self.mixed_bin_versions: for i in range(self.num_nodes): print("node[i] version: " + self.nodes[i].mongod_executable + "mixed_bin_version[i]: " + self.mixed_bin_versions[i]) if self.nodes[i].mongod_executable != self.mixed_bin_versions[ i]: msg = ( f"Executable of node{i}: {self.nodes[i].mongod_executable} does not " f"match the executable assigned by mixedBinVersions: " f"{self.mixed_bin_versions[i]}.") raise self.fixturelib.ServerFailure(msg) # We need only to wait to connect to the first node of the replica set because we first # initiate it as a single node replica set. self.nodes[0].await_ready() # Initiate the replica set. members = [] for (i, node) in enumerate(self.nodes): member_info = { "_id": i, "host": node.get_internal_connection_string() } if i > 0: if not self.all_nodes_electable: member_info["priority"] = 0 if i >= 7 or not self.voting_secondaries: # Only 7 nodes in a replica set can vote, so the other members must still be # non-voting when this fixture is configured to have voting secondaries. member_info["votes"] = 0 members.append(member_info) if self.initial_sync_node: members.append({ "_id": self.initial_sync_node_idx, "host": self.initial_sync_node.get_internal_connection_string(), "priority": 0, "hidden": 1, "votes": 0 }) repl_config = {"_id": self.replset_name, "protocolVersion": 1} client = self.nodes[0].mongo_client() interface.authenticate(client, self.auth_options) if client.local.system.replset.count_documents(filter={}): # Skip initializing the replset if there is an existing configuration. self.logger.info( "Configuration exists. Skipping initializing the replset.") return if self.write_concern_majority_journal_default is not None: repl_config[ "writeConcernMajorityJournalDefault"] = self.write_concern_majority_journal_default else: server_status = client.admin.command({"serverStatus": 1}) cmd_line_opts = client.admin.command({"getCmdLineOpts": 1}) if not (server_status["storageEngine"]["persistent"] and cmd_line_opts["parsed"].get("storage", {}).get( "journal", {}).get("enabled", True)): repl_config["writeConcernMajorityJournalDefault"] = False if self.replset_config_options.get("configsvr", False): repl_config["configsvr"] = True if self.replset_config_options.get("settings"): replset_settings = self.replset_config_options["settings"] repl_config["settings"] = replset_settings # Increase the election timeout to 24 hours to prevent spurious elections. repl_config.setdefault("settings", {}) if "electionTimeoutMillis" not in repl_config["settings"]: repl_config["settings"][ "electionTimeoutMillis"] = 24 * 60 * 60 * 1000 # Start up a single node replica set then reconfigure to the correct size (if the config # contains more than 1 node), so the primary is elected more quickly. repl_config["members"] = [members[0]] self.logger.info("Issuing replSetInitiate command: %s", repl_config) self._initiate_repl_set(client, repl_config) self._await_primary() if self.fcv is not None: # Initiating a replica set with a single node will use "latest" FCV. This will # cause IncompatibleServerVersion errors if additional "last-lts" binary version # nodes are subsequently added to the set, since such nodes cannot set their FCV to # "latest". Therefore, we make sure the primary is "last-lts" FCV before adding in # nodes of different binary versions to the replica set. client.admin.command({"setFeatureCompatibilityVersion": self.fcv}) if self.nodes[1:]: # Wait to connect to each of the secondaries before running the replSetReconfig # command. for node in self.nodes[1:]: node.await_ready() # Add in the members one at a time, since non force reconfigs can only add/remove a # single voting member at a time. for ind in range(2, len(members) + 1): self._add_node_to_repl_set(client, repl_config, ind, members) self._await_secondaries() self._await_newly_added_removals()
def get_shard_ids(self): """Get the list of shard ids in the cluster.""" client = self.mongo_client() interface.authenticate(client, self.auth_options) res = client.admin.command("listShards") return [shard_info["_id"] for shard_info in res["shards"]]
def setup(self): # pylint: disable=too-many-branches,too-many-statements,too-many-locals """Set up the replica set.""" self.replset_name = self.mongod_options.get("replSet", "rs") if not self.nodes: for i in range(self.num_nodes): node = self._new_mongod(i, self.replset_name) self.nodes.append(node) for i in range(self.num_nodes): steady_state_constraint_param = "oplogApplicationEnforcesSteadyStateConstraints" # TODO (SERVER-52985): Set steady state constraint parameters on last-lts nodes. if (steady_state_constraint_param not in self.nodes[i].mongod_options["set_parameters"] and self.mixed_bin_versions is not None and self.mixed_bin_versions[i] == "new"): self.nodes[i].mongod_options["set_parameters"][ steady_state_constraint_param] = True if self.linear_chain and i > 0: self.nodes[i].mongod_options["set_parameters"][ "failpoint.forceSyncSourceCandidate"] = self.fixturelib.make_historic( { "mode": "alwaysOn", "data": { "hostAndPort": self.nodes[i - 1].get_internal_connection_string() } }) self.nodes[i].setup() if self.start_initial_sync_node: if not self.initial_sync_node: self.initial_sync_node_idx = len(self.nodes) self.initial_sync_node = self._new_mongod( self.initial_sync_node_idx, self.replset_name) self.initial_sync_node.setup() self.initial_sync_node.await_ready() if self.mixed_bin_versions: for i in range(self.num_nodes): if self.nodes[i].mongod_executable != self.mixed_bin_versions[ i]: msg = ( f"Executable of node{i}: {self.nodes[i].mongod_executable} does not " f"match the executable assigned by mixedBinVersions: " f"{self.mixed_bin_versions[i]}.") raise self.fixturelib.ServerFailure(msg) # We need only to wait to connect to the first node of the replica set because we first # initiate it as a single node replica set. self.nodes[0].await_ready() # Initiate the replica set. members = [] for (i, node) in enumerate(self.nodes): member_info = { "_id": i, "host": node.get_internal_connection_string() } if i > 0: if not self.all_nodes_electable: member_info["priority"] = 0 if i >= 7 or not self.voting_secondaries: # Only 7 nodes in a replica set can vote, so the other members must still be # non-voting when this fixture is configured to have voting secondaries. member_info["votes"] = 0 members.append(member_info) if self.initial_sync_node: members.append({ "_id": self.initial_sync_node_idx, "host": self.initial_sync_node.get_internal_connection_string(), "priority": 0, "hidden": 1, "votes": 0 }) repl_config = {"_id": self.replset_name, "protocolVersion": 1} client = self.nodes[0].mongo_client() interface.authenticate(client, self.auth_options) if client.local.system.replset.count(): # Skip initializing the replset if there is an existing configuration. return if self.write_concern_majority_journal_default is not None: repl_config[ "writeConcernMajorityJournalDefault"] = self.write_concern_majority_journal_default else: server_status = client.admin.command({"serverStatus": 1}) cmd_line_opts = client.admin.command({"getCmdLineOpts": 1}) if not (server_status["storageEngine"]["persistent"] and cmd_line_opts["parsed"].get("storage", {}).get( "journal", {}).get("enabled", True)): repl_config["writeConcernMajorityJournalDefault"] = False if self.replset_config_options.get("configsvr", False): repl_config["configsvr"] = True if self.replset_config_options.get("settings"): replset_settings = self.replset_config_options["settings"] repl_config["settings"] = replset_settings # Increase the election timeout to 24 hours to prevent spurious elections. repl_config.setdefault("settings", {}) if "electionTimeoutMillis" not in repl_config["settings"]: repl_config["settings"][ "electionTimeoutMillis"] = 24 * 60 * 60 * 1000 # Start up a single node replica set then reconfigure to the correct size (if the config # contains more than 1 node), so the primary is elected more quickly. repl_config["members"] = [members[0]] self.logger.info("Issuing replSetInitiate command: %s", repl_config) self._initiate_repl_set(client, repl_config) self._await_primary() if self.mixed_bin_versions is not None: if self.mixed_bin_versions[0] == "new": fcv_response = client.admin.command({ "getParameter": 1, "featureCompatibilityVersion": 1 }) fcv = fcv_response["featureCompatibilityVersion"]["version"] if fcv != ReplicaSetFixture._LATEST_FCV: msg = (("Server returned FCV{} when we expected FCV{}." ).format(fcv, ReplicaSetFixture._LATEST_FCV)) raise self.fixturelib.ServerFailure(msg) # Initiating a replica set with a single node will use "latest" FCV. This will # cause IncompatibleServerVersion errors if additional "last-lts" binary version # nodes are subsequently added to the set, since such nodes cannot set their FCV to # "latest". Therefore, we make sure the primary is "last-lts" FCV before adding in # nodes of different binary versions to the replica set. client.admin.command({ "setFeatureCompatibilityVersion": ReplicaSetFixture._LAST_LTS_FCV }) if self.nodes[1:]: # Wait to connect to each of the secondaries before running the replSetReconfig # command. for node in self.nodes[1:]: node.await_ready() # Add in the members one at a time, since non force reconfigs can only add/remove a # single voting member at a time. for ind in range(2, len(members) + 1): self._add_node_to_repl_set(client, repl_config, ind, members) self._await_secondaries() self._await_newly_added_removals()
def add_recipient_nodes(self, recipient_set_name, recipient_tag_name=None): """Build recipient nodes, and reconfig them into the donor as non-voting members.""" recipient_tag_name = recipient_tag_name or "recipientNode" self.logger.info( f"Adding {self.num_nodes_per_replica_set} recipient nodes to donor replica set." ) with self.__lock: self._port_index ^= 1 # Toggle the set of mongod ports between index 0 and 1 for i in range(self.num_nodes_per_replica_set): mongod_logger = self.fixturelib.new_fixture_node_logger( "MongoDFixture", self.job_num, f"{recipient_set_name}:node{i}") mongod_options = self.common_mongod_options.copy() # Even though these nodes are not starting in a replica set, we structure their # files on disk as if they were already part of the new recipient set. This makes # logging and cleanup easier. mongod_options["dbpath"] = os.path.join( self._dbpath_prefix, recipient_set_name, "node{}".format(i)) mongod_options["set_parameters"] = mongod_options.get( "set_parameters", self.fixturelib.make_historic({})).copy() mongod_options["serverless"] = True mongod_port = self._ports[self._port_index][i] self.fixtures.append( self.fixturelib.make_fixture( "MongoDFixture", mongod_logger, self.job_num, mongod_options=mongod_options, dbpath_prefix=self.dbpath_prefix, preserve_dbpath=self.preserve_dbpath, port=mongod_port)) recipient_nodes = self.get_recipient_nodes() for recipient_node in recipient_nodes: recipient_node.setup() recipient_node.await_ready() # Reconfig the donor to add the recipient nodes as non-voting members donor_client = self.get_donor_rs().get_primary().mongo_client() interface.authenticate(donor_client, self.auth_options) repl_config = donor_client.admin.command({"replSetGetConfig": 1})["config"] repl_members = repl_config["members"] for recipient_node in recipient_nodes: repl_members.append({ "host": recipient_node.get_internal_connection_string(), "votes": 0, "priority": 0, "tags": { recipient_tag_name: str(ObjectId()) } }) # Re-index all members from 0 for idx, member in enumerate(repl_members): member["_id"] = idx # Prepare the new config repl_config["version"] = repl_config["version"] + 1 repl_config["members"] = repl_members self.logger.info( f"Reconfiguring donor replica set to add non-voting recipient nodes: {repl_config}" ) donor_client.admin.command({ "replSetReconfig": repl_config, "maxTimeMS": self.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000 }) # Wait for recipient nodes to become secondaries self._await_recipient_nodes()