def _promote_to_replica_source(old_master, master_candidate, replica_models): # First, we transition from the old master to new as quickly as # possible to minimize the scope of unrecoverable error old_master.make_read_only(True) master_ips = old_master.detach_public_ips() slave_ips = master_candidate.detach_public_ips() latest_txn_id = old_master.get_latest_txn_id() master_candidate.wait_for_txn(latest_txn_id) master_candidate.detach_replica(old_master, for_failover=True) master_candidate.enable_as_master() old_master.attach_replica(master_candidate) master_candidate.attach_public_ips(master_ips) master_candidate.make_read_only(False) old_master.attach_public_ips(slave_ips) # At this point, should something go wrong, there # should be a working master with some number of working slaves, # and possibly some number of "orphaned" slaves exception_replicas = [] for replica in replica_models: try: replica.wait_for_txn(latest_txn_id) if replica.id != master_candidate.id: replica.detach_replica(old_master, for_failover=True) replica.attach_replica(master_candidate) except exception.TroveError: msg = _("promote-to-replica-source: Unable to migrate " "replica %(slave)s from old replica source " "%(old_master)s to new source %(new_master)s.") msg_values = { "slave": replica.id, "old_master": old_master.id, "new_master": master_candidate.id } LOG.exception(msg % msg_values) exception_replicas.append(replica.id) try: old_master.demote_replication_master() except Exception: LOG.exception(_("Exception demoting old replica source")) exception_replicas.append(old_master) self._set_task_status([old_master] + replica_models, InstanceTasks.NONE) if exception_replicas: self._set_task_status(exception_replicas, InstanceTasks.PROMOTION_ERROR) msg = _("promote-to-replica-source %(id)s: The following " "replicas may not have been switched: %(replicas)s") msg_values = { "id": master_candidate.id, "replicas": exception_replicas } raise ReplicationSlaveAttachError(msg % msg_values)
def _eject_replica_source(old_master, replica_models): master_candidate = self._most_current_replica( old_master, replica_models) master_ips = old_master.detach_public_ips() slave_ips = master_candidate.detach_public_ips() master_candidate.detach_replica(old_master, for_failover=True) master_candidate.enable_as_master() master_candidate.attach_public_ips(master_ips) master_candidate.make_read_only(False) old_master.attach_public_ips(slave_ips) exception_replicas = [] for replica in replica_models: try: if replica.id != master_candidate.id: replica.detach_replica(old_master, for_failover=True) replica.attach_replica(master_candidate) except exception.TroveError: msg = _("eject-replica-source: Unable to migrate " "replica %(slave)s from old replica source " "%(old_master)s to new source %(new_master)s.") msg_values = { "slave": replica.id, "old_master": old_master.id, "new_master": master_candidate.id } LOG.exception(msg % msg_values) exception_replicas.append(replica.id) if master_candidate.post_processing_required_for_replication(): new_slaves = list(replica_models) new_slaves.remove(master_candidate) new_slaves_detail = [ slave.get_replication_detail() for slave in new_slaves ] master_candidate.complete_master_setup(new_slaves_detail) self._set_task_status([old_master] + replica_models, InstanceTasks.NONE) if exception_replicas: self._set_task_status(exception_replicas, InstanceTasks.EJECTION_ERROR) msg = _("eject-replica-source %(id)s: The following " "replicas may not have been switched: %(replicas)s") msg_values = { "id": master_candidate.id, "replicas": exception_replicas } raise ReplicationSlaveAttachError(msg % msg_values)
def _eject_replica_source(old_master, replica_models): master_candidate = self._most_current_replica( old_master, replica_models) master_ips = old_master.detach_public_ips() slave_ips = master_candidate.detach_public_ips() master_candidate.detach_replica(old_master, for_failover=True) master_candidate.enable_as_master() master_candidate.attach_public_ips(master_ips) master_candidate.make_read_only(False) old_master.attach_public_ips(slave_ips) exception_replicas = [] error_messages = "" for replica in replica_models: try: if replica.id != master_candidate.id: replica.detach_replica(old_master, for_failover=True) replica.attach_replica(master_candidate) except exception.TroveError as ex: log_fmt = ("Unable to migrate replica %(slave)s from " "old replica source %(old_master)s to " "new source %(new_master)s on eject.") exc_fmt = _("Unable to migrate replica %(slave)s from " "old replica source %(old_master)s to " "new source %(new_master)s on eject.") msg_content = { "slave": replica.id, "old_master": old_master.id, "new_master": master_candidate.id } LOG.exception(log_fmt, msg_content) exception_replicas.append(replica) error_messages += "%s (%s)\n" % (exc_fmt % msg_content, ex) self._set_task_status([old_master] + replica_models, InstanceTasks.NONE) if exception_replicas: self._set_task_status(exception_replicas, InstanceTasks.EJECTION_ERROR) msg = ( _("eject-replica-source %(id)s: The following " "replicas may not have been switched: %(replicas)s:" "\n%(err)s") % { "id": master_candidate.id, "replicas": [repl.id for repl in exception_replicas], "err": error_messages }) raise ReplicationSlaveAttachError(msg)
def _promote_to_replica_source(old_master, master_candidate, replica_models): # First, we transition from the old master to new as quickly as # possible to minimize the scope of unrecoverable error # NOTE(zhaochao): we cannot reattach the old master to the new # one immediately after the new master is up, because for MariaDB # the other replicas are still connecting to the old master, and # during reattaching the old master as a slave, new GTID may be # created and synced to the replicas. After that, when attaching # the replicas to the new master, 'START SLAVE' will fail by # 'fatal error 1236' if the binlog of the replica diverged from # the new master. So the proper order should be: # -1. make the old master read only (and detach floating ips) # -2. make sure the new master is up-to-date # -3. detach the new master from the old one # -4. enable the new master (and attach floating ips) # -5. attach the other replicas to the new master # -6. attach the old master to the new one # (and attach floating ips) # -7. demote the old master # What we changed here is the order of the 6th step, previously # this step took place right after step 4, which causes failures # with MariaDB replications. old_master.make_read_only(True) master_ips = old_master.detach_public_ips() slave_ips = master_candidate.detach_public_ips() latest_txn_id = old_master.get_latest_txn_id() master_candidate.wait_for_txn(latest_txn_id) master_candidate.detach_replica(old_master, for_failover=True) master_candidate.enable_as_master() master_candidate.attach_public_ips(master_ips) master_candidate.make_read_only(False) # At this point, should something go wrong, there # should be a working master with some number of working slaves, # and possibly some number of "orphaned" slaves exception_replicas = [] error_messages = "" for replica in replica_models: try: if replica.id != master_candidate.id: replica.detach_replica(old_master, for_failover=True) replica.attach_replica(master_candidate) except exception.TroveError as ex: log_fmt = ("Unable to migrate replica %(slave)s from " "old replica source %(old_master)s to " "new source %(new_master)s on promote.") exc_fmt = _("Unable to migrate replica %(slave)s from " "old replica source %(old_master)s to " "new source %(new_master)s on promote.") msg_content = { "slave": replica.id, "old_master": old_master.id, "new_master": master_candidate.id} LOG.exception(log_fmt, msg_content) exception_replicas.append(replica) error_messages += "%s (%s)\n" % ( exc_fmt % msg_content, ex) # dealing with the old master after all the other replicas # has been migrated. old_master.attach_replica(master_candidate) old_master.attach_public_ips(slave_ips) try: old_master.demote_replication_master() except Exception as ex: log_fmt = "Exception demoting old replica source %s." exc_fmt = _("Exception demoting old replica source %s.") LOG.exception(log_fmt, old_master.id) exception_replicas.append(old_master) error_messages += "%s (%s)\n" % ( exc_fmt % old_master.id, ex) self._set_task_status([old_master] + replica_models, InstanceTasks.NONE) if exception_replicas: self._set_task_status(exception_replicas, InstanceTasks.PROMOTION_ERROR) msg = (_("promote-to-replica-source %(id)s: The following " "replicas may not have been switched: %(replicas)s:" "\n%(err)s") % {"id": master_candidate.id, "replicas": [repl.id for repl in exception_replicas], "err": error_messages}) raise ReplicationSlaveAttachError(msg)
def _promote_to_replica_source(old_master, master_candidate, replica_models): # First, we transition from the old master to new as quickly as # possible to minimize the scope of unrecoverable error old_master.make_read_only(True) master_ips = old_master.detach_public_ips() slave_ips = master_candidate.detach_public_ips() latest_txn_id = old_master.get_latest_txn_id() master_candidate.wait_for_txn(latest_txn_id) master_candidate.detach_replica(old_master, for_failover=True) master_candidate.enable_as_master() old_master.attach_replica(master_candidate) master_candidate.attach_public_ips(master_ips) master_candidate.make_read_only(False) old_master.attach_public_ips(slave_ips) # At this point, should something go wrong, there # should be a working master with some number of working slaves, # and possibly some number of "orphaned" slaves exception_replicas = [] error_messages = "" for replica in replica_models: try: if replica.id != master_candidate.id: replica.detach_replica(old_master, for_failover=True) replica.attach_replica(master_candidate) except exception.TroveError as ex: log_fmt = ("Unable to migrate replica %(slave)s from " "old replica source %(old_master)s to " "new source %(new_master)s on promote.") exc_fmt = _("Unable to migrate replica %(slave)s from " "old replica source %(old_master)s to " "new source %(new_master)s on promote.") msg_content = { "slave": replica.id, "old_master": old_master.id, "new_master": master_candidate.id } LOG.exception(log_fmt, msg_content) exception_replicas.append(replica) error_messages += "%s (%s)\n" % (exc_fmt % msg_content, ex) try: old_master.demote_replication_master() except Exception as ex: log_fmt = "Exception demoting old replica source %s." exc_fmt = _("Exception demoting old replica source %s.") LOG.exception(log_fmt, old_master.id) exception_replicas.append(old_master) error_messages += "%s (%s)\n" % (exc_fmt % old_master.id, ex) self._set_task_status([old_master] + replica_models, InstanceTasks.NONE) if exception_replicas: self._set_task_status(exception_replicas, InstanceTasks.PROMOTION_ERROR) msg = ( _("promote-to-replica-source %(id)s: The following " "replicas may not have been switched: %(replicas)s:" "\n%(err)s") % { "id": master_candidate.id, "replicas": [repl.id for repl in exception_replicas], "err": error_messages }) raise ReplicationSlaveAttachError(msg)