def create_backup(self): self.inst_id = self._get_rid() utils.check_server_status(self.inst_id, type = DBInstanceType.READ_REPLI) backup = utils.create_backup_byclient(self.inst_id, backup_type='snapshot') utils.check_backup_status(backup.id) utils.check_backup_path(backup.id)
def create(self): inst_id = None try: inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) except Exception as e: print e pass self.inst_id = inst_id if not self.inst_id: self.inst_id = utils.create_rds_byclient("HA") utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=240) self.vip_id = utils.get_vip_id(self.inst_id) _inst = utils.get_instance(self.inst_id, deleted = False) self.group_id = _inst.group_id origin_id = inst_utils.virtual_instid_2_origin_instid(self.inst_id) assert _inst.id == origin_id, (_inst.id, origin_id) self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) LOG.info('master_id:%s slave_id:%s' % (self.inst_id, self.dbslave_id)) self.validate()
def resize(self): old_rr_inst_id = self._get_rid() rr_instance = utils.get_builtin_instance(old_rr_inst_id) old_flavor_id = rr_instance.flavor_id old_virtual_inst_id = rr_instance.virtual_instance_id flavor = old_flavor_id + 1 if old_flavor_id < 5 else 5 _ret_inst = utils.resize_flavor_byclient(old_rr_inst_id,flavor) utils.check_server_status(old_virtual_inst_id, type = DBInstanceType.READ_REPLI, expected_task = utils.tasks.InstanceTasks.RESIZING) insts = utils.check_resize_status(self.group_id) new_rr_id = insts[0] utils.check_server_status(new_rr_id, type = DBInstanceType.READ_REPLI, expected_task = utils.tasks.InstanceTasks.NONE, expected_svr_status = utils.ServiceStatuses.RUNNING, deleted = False, timeout = 120) new_rr_instance = utils.get_instance(new_rr_id, deleted = False) new_flavor_id = new_rr_instance.flavor_id assert old_flavor_id != new_flavor_id self._validate(self.master_id, new_rr_id)
def _delete_RRs(self): rids = self._get_rids() orig_rid_vips = {} for rid in rids: vip_id = utils.get_vip_id(rid, deleted = False) orig_rid_vips[rid] = vip_id for rid in rids: utils.check_server_status(rid,expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.READ_REPLI, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False,timeout=10) utils.delete_rds_byclient(rid) utils.check_server_status(rid,expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.READ_REPLI, expected_svr_status=utils.ServiceStatuses.DELETED, deleted=True,timeout=CONF.nova_delete_timeout) vip_id = utils.get_vip_id(rid) utils.check_vip(rid, vip_id=vip_id, deleted=True) # Fore. 20150615. Delete One RR Instance will not make change to other RR Instance's VIP-RIP mapping. orig_rid_vips.pop(rid) for k, v in orig_rid_vips.items(): utils.check_vip(id = k, vip_id = v, deleted = False)
def migrate(self, instance_id=None): self.inst_id = instance_id if not self.inst_id: self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.SINGLE) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.SINGLE, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False) inst_db_info = utils.get_instance(self.inst_id, deleted= False) old_nova_server_id = inst_db_info.compute_instance_id ran_count = 45 utils.generate_databases(self.inst_id, count = ran_count) _ret = rpc.call(utils.get_context(), "taskmanager", {"method": "migrate", "args": {'migrate_id': inst_db_info.id}}) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.SINGLE, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False) new_nova_server_id = utils.get_instance(self.inst_id, deleted= False).compute_instance_id assert old_nova_server_id != new_nova_server_id utils.check_generated_databases(self.inst_id, count = ran_count) self.validate()
def _lock(self): self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) utils.lock_byclient(self.inst_id) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.LOCKED, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=240) utils.check_instance_is_lock(self.inst_id)
def __prepare_HA_innstance(cls, backup_id): master_id = test_utils.create_rds_byclient("HA", backup = backup_id) test_utils.check_server_status(master_id, expected_task = tasks.InstanceTasks.NONE, type = DBInstanceType.MASTER, expected_svr_status= test_utils.ServiceStatuses.RUNNING, deleted=False, timeout=10) return master_id
def __prepare_RR_instance(cls, master_id): rr_inst_id = test_utils.create_rds_byclient("RR", instance_id = master_id) test_utils.check_server_status(rr_inst_id, expected_task=tasks.InstanceTasks.NONE, type=DBInstanceType.READ_REPLI, expected_svr_status = test_utils.ServiceStatuses.RUNNING, deleted=False, timeout=300) return rr_inst_id
def migrate_when_backup_fail(self): self.inst_id = self._get_rid() utils.check_server_status(self.inst_id, type = DBInstanceType.READ_REPLI) backup = utils.create_backup_byclient(self.inst_id) utils.check_backup_status(backup.id) bk_info = DBBackup.find_by(utils.get_context(), id = backup.id) bk_info.state = 'FAILED' bk_info.save() self.migrate()
def delete(self): self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.SINGLE) _master_vm_id = utils.get_instance(self.inst_id).compute_instance_id utils.delete_rds_byclient(self.inst_id) utils.check_vm_is_running(_master_vm_id, expect_status="DELETED", timeout=CONF.nova_delete_timeout) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, expected_svr_status=utils.ServiceStatuses.DELETED, deleted=True, timeout=CONF.nova_delete_timeout) utils.check_vip(self.inst_id, deleted=True)
def upgradeha(self,instance_id=None): self.inst_id = instance_id if not self.inst_id: self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.SINGLE) self.group_id = utils.get_instance(self.inst_id).group_id utils.upgrade_ha_byclient(self.inst_id) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False) _ha = HAInstance() _ha.create()
def restore_to_point_in_time(self): self.create() old_master_inst = utils.get_builtin_instance(self.inst_id) master_vid = old_master_inst.virtual_instance_id db_count = 100 utils.generate_databases(self.inst_id, count = db_count) utils.check_generated_databases(self.inst_id, count = db_count) time.sleep(3) time1 = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') utils.clear_generated_databases(self.inst_id, count = db_count) time.sleep(3) dt2 = datetime.datetime.now() time2 = dt2.strftime('%Y-%m-%d %H:%M:%S') while True: restorable_time = utils.get_restorable_time(self.inst_id) dt = datetime.datetime.strptime(restorable_time.end, '%Y-%m-%d %H:%M:%S') if dt > dt2: break time.sleep(3) utils.restore_to_point_in_time_byclient(master_vid, time1) time.sleep(10) # wait for compute instance appear new_inst1 = utils.get_restore_instance() utils.check_server_status(new_inst1, DBInstanceType.SINGLE, timeout=RESTORE_TIME_OUT) utils.check_generated_databases(new_inst1, count = db_count) utils.delete_rds_byclient(new_inst1) utils.check_server_status(new_inst1, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.SINGLE, expected_svr_status=utils.ServiceStatuses.DELETED, deleted=True, timeout=CONF.trove_delete_timeout) # delete original instance utils.delete_rds_byclient(master_vid) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.DELETED, deleted=True, timeout=CONF.trove_delete_timeout) # restore to point in time after deleting utils.restore_to_point_in_time_byclient(master_vid, time1) time.sleep(10) # wait for compute instance appear new_inst2 = utils.get_restore_instance() utils.check_server_status(new_inst2, DBInstanceType.SINGLE, timeout=RESTORE_TIME_OUT) utils.check_generated_databases(new_inst2, count = db_count) utils.delete_rds_byclient(new_inst2) utils.check_server_status(new_inst2, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.SINGLE, expected_svr_status=utils.ServiceStatuses.DELETED, deleted=True, timeout=CONF.trove_delete_timeout)
def _unlock(self): self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) utils.unlock_byclient(self.inst_id) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=240) self.vip_id = utils.get_vip_id(self.inst_id) _inst = utils.get_builtin_instance(self.inst_id) self.group_id = _inst.group_id assert self.inst_id == _inst.virtual_instance_id, (self.inst_id, _inst.virtual_instance_id) self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) LOG.info('master_id:%s slave_id:%s' % (self.inst_id, self.dbslave_id)) self.validate()
def _create_with_backup(self, backup, admin_user=None, admin_password=None): self.inst_id = utils.create_rds_byclient("HA", backup=backup.id, admin_user=admin_user, admin_password=admin_password) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=600) self.vip_id = utils.get_vip_id(self.inst_id) _inst = utils.get_builtin_instance(self.inst_id) self.group_id = _inst.group_id self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) LOG.info('master_id:%s slave_id:%s' % (self.inst_id, self.dbslave_id)) self.validate() return self.inst_id
def create_with_master_config(self): self.master_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) utils.check_server_status(self.master_id,expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False,timeout=10) _inst = utils.get_builtin_instance(self.master_id) self.group_id = _inst.group_id if not _inst.db_info.configuration_id: raise Exception("master configuration_id is None") self.inst_id = utils.create_rds_byclient("RR", instance_id=self.master_id, config_id_from_master=_inst.db_info.configuration_id) self.validate() utils.check_instancedb_mysql_variables(self.inst_id)
def create_with_config(self): self.master_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) utils.check_server_status(self.master_id,expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False,timeout=10) _inst = utils.get_builtin_instance(self.master_id) self.group_id = _inst.group_id config = utils.get_config_bytenant() if not config: raise Exception("not found template config") self.inst_id = utils.create_rds_byclient("RR",instance_id = self.master_id, config_id = config.id) self.validate() utils.check_instancedb_mysql_variables(self.inst_id)
def configuration_patch(self): self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) if self.inst_id is None: raise Exception("not found HA instance") utils.patch_instance_config_and_check(self.inst_id) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=240) self.vip_id = utils.get_vip_id(self.inst_id) _inst = utils.get_builtin_instance(self.inst_id) self.group_id = _inst.group_id self.inst_id = utils.get_instance_id(self.group_id, DBInstanceType.MASTER) self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) LOG.info('master_id:%s slave_id:%s' % (self.inst_id, self.dbslave_id)) self.validate() utils.check_instace_config_is_same(self.inst_id)
def _validate(self, master_id, validated_inst_id): utils.check_server_status(validated_inst_id,expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.READ_REPLI, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False,timeout=300) utils.check_mysql_adminuser(validated_inst_id) utils.check_vip(master_id) vip_id = utils.get_vip_id(validated_inst_id) utils.check_vip(validated_inst_id,vip_id=vip_id) utils.check_rpl_delay(validated_inst_id) master_ip = utils.check_allocate_ip(utils.get_builtin_instance(master_id).server) rr_ip = utils.check_allocate_ip(utils.get_builtin_instance(validated_inst_id).server) utils.check_rpl_consist(master_id, [validated_inst_id],master_ip,[rr_ip]) utils.check_rpl_topo_rr(self.group_id)
def migrate(self): rr_inst_id = self._get_rid() old_rr_inst = utils.get_builtin_instance(rr_inst_id) old_rr_id = old_rr_inst.id rr_vid = old_rr_inst.virtual_instance_id _ret = rpc.call(utils.get_context(), "taskmanager", {"method": "migrate", "args": {'migrate_id': old_rr_inst.id}}) utils.check_server_status(rr_vid, DBInstanceType.READ_REPLI, InstanceTasks.NONE, utils.ServiceStatuses.RUNNING, timeout = 600) new_rr_inst = utils.get_builtin_instance(rr_vid) assert new_rr_inst.virtual_instance_id == rr_vid and new_rr_inst.id != old_rr_id, (rr_vid, new_rr_inst.id) master_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) self._validate(master_id, rr_vid)
def validate(self): utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False) inst_db_info = utils.get_instance(self.inst_id) self.group_id = inst_db_info.group_id utils.check_mysql_adminuser(inst_db_info.id) _ret = utils.get_builtin_instance(inst_db_info.id) self.nova_instance = _ret.server # ip = utils.check_allocate_ip(self.nova_instance) utils.check_mysql_is_running(inst_db_info.id) utils.check_vip(inst_db_info.id) self.backup_id = utils.check_backup(self.group_id) utils.check_backup_status(self.backup_id) utils.check_backup_path(self.backup_id) utils.check_rpl_topo_single(self.group_id)
def restart(self, instance_id=None): self.inst_id = instance_id if self.inst_id is None: self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) utils.restart_mysql(self.inst_id) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.STANDBY, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=240) self.vip_id = utils.get_vip_id(self.inst_id) _inst = utils.get_builtin_instance(self.inst_id) self.group_id = _inst.group_id assert self.inst_id == _inst.virtual_instance_id, (self.inst_id, _inst.virtual_instance_id) self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) LOG.info('master_id:%s slave_id:%s' % (self.inst_id, self.dbslave_id)) utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.REBOOTING, type=DBInstanceType.STANDBY, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=240) utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.STANDBY, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=240) self.validate()
def validate(self): utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=600) utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.STANDBY, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=600) rr_items = InstanceGroupItem.get_by_gid_type(utils.get_context(), self.group_id, DBInstanceType.READ_REPLI, deleted = False) slave_ids = [] for rr in rr_items: slave_ids.append(rr.instance_id) slave_ids.append(self.dbslave_id) utils.check_mysql_adminuser(self.inst_id) utils.check_mysql_adminuser(self.dbslave_id) for _id in [self.inst_id, self.dbslave_id]: utils.check_mysql_is_running(self.inst_id) utils.check_vip(self.inst_id, vip_id=self.vip_id) self.backup_id = utils.check_backup(self.group_id) utils.check_backup_status(self.backup_id) utils.check_backup_path(self.backup_id) for slave_id in slave_ids: utils.check_rpl_delay(slave_id) master_inst = utils.get_builtin_instance(self.inst_id) slave_inst = utils.get_builtin_instance(self.dbslave_id) master_ip = utils.check_allocate_ip(master_inst.server) slave_ip = utils.check_allocate_ip(slave_inst.server) LOG.info("master_ip:%s slave_ip:%s" % (master_ip, slave_ip)) utils.check_rpl_consist(self.inst_id, slave_ids, master_ip, [slave_ip]) utils.check_rpl_topo_ha(self.group_id)
def _create_RRs(self, rr_count = 2, attach_same_vip = False): self.master_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) utils.check_server_status(self.master_id,expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False,timeout=10) _inst = utils.get_builtin_instance(self.master_id) self.group_id = _inst.group_id rids = self._get_rids() if len(rids) == 0: vinst_id1 = utils.create_rds_byclient("RR", instance_id = self.master_id, attached_vip=None) self._validate(self.master_id, vinst_id1) origin_inst_id1 = inst_utils.virtual_instid_2_origin_instid(vinst_id1, get_deleted = False) vip_id = utils.get_vip_id(vinst_id1, deleted = False) left_to_create = rr_count - 1 while left_to_create > 0: if attach_same_vip: utils.create_rds_byclient("RR", instance_id = self.master_id, attached_vip=vip_id) else: utils.create_rds_byclient("RR", instance_id = self.master_id, attached_vip=None) left_to_create -= 1 rids = self._get_rids() map(lambda x: self._validate(self.master_id, x), rids) if attach_same_vip: utils.check_lbs_vip(vip_id, rids) else: utils.check_lbs_vip(vip_id, [origin_inst_id1]) else: map(lambda x: self._validate(self.master_id, x), rids)
def create_with_config(self): config = utils.get_config_bytenant() if not config: raise Exception("not found template config") self.inst_id = utils.create_rds_byclient("HA",config_id=config.id) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=240) self.vip_id = utils.get_vip_id(self.inst_id) _inst = utils.get_builtin_instance(self.inst_id) self.group_id = _inst.group_id self.inst_id =utils.get_instance_id(self.group_id, DBInstanceType.MASTER) self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) LOG.info('master_id:%s slave_id:%s' % (self.inst_id, self.dbslave_id)) self.validate() utils.check_instancedb_mysql_variables(self.inst_id) utils.check_instancedb_mysql_variables(self.dbslave_id) utils.check_instace_config_is_same(self.inst_id)
def restart(self): self.inst_id = self._get_rid() old_rr_inst = utils.get_builtin_instance(self.inst_id) rr_vid = old_rr_inst.virtual_instance_id utils.check_server_status(rr_vid, DBInstanceType.READ_REPLI, InstanceTasks.NONE, utils.ServiceStatuses.RUNNING, timeout = 600) utils.restart_mysql(rr_vid) utils.check_server_status(rr_vid, DBInstanceType.READ_REPLI, InstanceTasks.REBOOTING, utils.ServiceStatuses.RUNNING, timeout = 60) utils.check_server_status(rr_vid, DBInstanceType.READ_REPLI, InstanceTasks.NONE, utils.ServiceStatuses.RUNNING, timeout = 60) self.validate()
def restorable_time_failover_twice(self): def failover(id): rpc.call(utils.get_context(), "taskmanager", {"method": "failover", "args": {'instance_id':id}}) self.create() master = utils.get_builtin_instance(self.inst_id) master_vid = master.virtual_instance_id rt_1 = utils.get_restorable_time(master_vid) # case1: failover only failover1_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') failover(master.id) utils.check_server_status(master_vid, DBInstanceType.MASTER, timeout = 360) new_slave_id = master.id utils.check_server_status(new_slave_id, DBInstanceType.STANDBY, timeout = 360) rt_2 = utils.get_restorable_time(master_vid) assert rt_2.end > rt_1.end and rt_1.begin <= rt_2.begin, \ ("before first failover rt: [%s, %s], after first failover rt: [%s, %s]" % (rt_1.begin, rt_1.end, rt_2.begin, rt_2.end)) # case2: restart & failover master2 = utils.get_builtin_instance(master_vid) failover(master2.id) utils.check_server_status(master_vid, DBInstanceType.MASTER, timeout = 360) new_slave_id2 = master2.id utils.check_server_status(new_slave_id2, DBInstanceType.STANDBY, timeout = 360) rt_3 = utils.get_restorable_time(master_vid) assert rt_3.begin > failover1_time and rt_3.end > rt_2.end, (rt_3.begin, failover1_time, rt_3.end, rt_2.end) print rt_1.begin, rt_1.end print rt_2.begin, rt_2.end print rt_3.begin, rt_3.end print "the first failover time: %s" % (failover1_time) time.sleep(60) rt_4 = utils.get_restorable_time(master_vid) print rt_4.begin, rt_4.end assert rt_4.end > rt_3.end
def _failover(self, stop_mysqld = False, rm_mysql_data = False): self.inst_id = self._get_rid() utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.READ_REPLI, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False,timeout=10) self.vip_id = utils.get_vip_id(self.inst_id) _rr_server = utils.get_builtin_instance(self.inst_id) nova_instance = _rr_server.server self.group_id = _rr_server.db_info.group_id instance_id = self.inst_id ran_count = 56 utils.generate_databases(self.master_id, count = ran_count) ip = utils.check_allocate_ip(nova_instance) if stop_mysqld: utils.stop_mysqld(ip) if rm_mysql_data: utils.mysql_data_lost(ip) utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.READ_REPLI, expected_svr_status=utils.ServiceStatuses.SHUTDOWN, deleted=False,timeout=120) _ret = rpc.call(utils.get_context(),"taskmanager", { "method": "failover", "args": {'instance_id':instance_id} } ) utils.check_server_status(self.inst_id,expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.READ_REPLI, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False,timeout=120) self.master_id = utils.get_instance_id(self.group_id,DBInstanceType.MASTER) self.inst_id = self.inst_id utils.check_generated_databases(self.inst_id, count = ran_count) self.validate()
def migrate(self, instance_id=None, _strategy=None): self.inst_id = instance_id if not self.inst_id: self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) _master_inst = utils.get_instance(id = self.inst_id, deleted = 0) self.group_id = _master_inst.group_id self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=120) utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.STANDBY, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=120) self.vip_id = utils.get_vip_id(_master_inst.id) virtual_instance_id = None strategy = CONF.migrate_strategy if strategy == 'master': _ret = utils.get_builtin_instance(_master_inst.id) nova_instance = _ret.server instance_id = _master_inst.id type = DBInstanceType.MASTER virtual_instance_id = _ret.virtual_instance_id elif strategy == 'standby': _ret = utils.get_builtin_instance(self.dbslave_id) nova_instance = _ret.server instance_id = self.dbslave_id type = DBInstanceType.STANDBY else: raise Exception("not found migrate_strategy ss%s" % strategy) ran_count = random.randint(50, 100) utils.generate_databases(self.inst_id, count = ran_count) _ret = rpc.call(utils.get_context(), "taskmanager", {"method": "migrate", "args": {'migrate_id':instance_id}}) if strategy == 'master': raw_instance_id = inst_utils.virtual_instid_2_origin_instid(virtual_instance_id) new_server_id = utils.get_builtin_instance(raw_instance_id).server_id utils.check_server_status(raw_instance_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=120) assert new_server_id != nova_instance.id self.inst_id = raw_instance_id elif strategy == "standby": utils.check_server_status(_ret['id'], expected_task=utils.tasks.InstanceTasks.NONE, type = DBInstanceType.STANDBY, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout = 123) self.dbslave_id = _ret['id'] utils.check_generated_databases(self.inst_id, count = ran_count) utils.check_generated_databases(self.dbslave_id, count = ran_count) utils.clear_generated_databases(self.inst_id, count = ran_count) self.validate()
def _failover_test(self, group_id, trigger_inst_id, do_workload = False, do_prepare = False, mysqld_killed = False, host_rebooted = False, remove_tmp_initsql = False, mysql_data_lost = False, check_vip = False, check_rpl_consist = True, check_binlog_range = False): LOG.info("Doing Failover Test, group_id:%s, instance_id:%s, do_workload:%s, do_prepare:%s." % (group_id, trigger_inst_id, do_workload, do_prepare)) before_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False) before_items = set(map(lambda x: x.type + "_" + x.instance_id, before_group_items)) before_instance = test_utils.get_builtin_instance( trigger_inst_id) before_rip = test_utils.check_allocate_ip(before_instance.server) before_origin_instid = before_instance.id rt_before = rt_after = None if check_binlog_range: rt_before = test_utils.get_restorable_time(trigger_inst_id) if do_workload and before_instance.type == DBInstanceType.MASTER: FAILOVERInstance.__run_workload(do_prepare = do_prepare) if remove_tmp_initsql: FAILOVERInstance.__trigger_vm_remove_tmp_sql_file(trigger_inst_id) if mysqld_killed: FAILOVERInstance.__trigger_mysqld_crash(trigger_inst_id) test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, deleted=False, timeout=120) if host_rebooted: FAILOVERInstance.__trigger_host_reboot(trigger_inst_id) # when host-machine rebooted, no guestagent update service's status. # test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, # type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, # deleted=False, timeout=120) if mysql_data_lost: FAILOVERInstance.__trigger_mysql_data_lost(trigger_inst_id) rpc.call(test_utils.get_context(), "taskmanager", {"method": "failover", "args": {'instance_id':before_origin_instid}}, timeout = 3600) ## check vip <--> rip mapping. ## vip should be changed in 10 seconds. if before_instance.type == DBInstanceType.MASTER or before_instance.type == DBInstanceType.READ_REPLI: after_instance = test_utils.get_builtin_instance( trigger_inst_id) after_nova_inst = after_instance.server after_rip = test_utils.check_allocate_ip(after_nova_inst) assert after_instance.vip == before_instance.vip and before_rip != after_rip if before_instance.type == DBInstanceType.MASTER: test_utils.check_server_status(before_instance.id, expected_task = tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status = test_utils.ServiceStatuses.RUNNING, deleted=False, timeout=120) ## check replication topo after_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False) after_items = set(map(lambda x: x.type + "_" + x.instance_id, after_group_items)) LOG.info("before " + str(before_items)) LOG.info("after " + str(after_items)) if check_rpl_consist: diff_items = (before_items - after_items) # assert len(diff_items) == 0 assert len(before_group_items) == len(after_group_items), "size of mysql cluster should be the same." for group_item in after_group_items: if group_item.type == DBInstanceType.STANDBY and group_item.instance_id == before_instance.id: item = InstanceGroupItem.get_by_instance_id(test_utils.get_context(), group_item.instance_id, deleted = False) assert item != None continue test_utils.check_server_status(group_item.instance_id, expected_task = tasks.InstanceTasks.NONE, type = group_item.type, expected_svr_status = test_utils.ServiceStatuses.RUNNING, deleted = False, timeout = 120) if check_binlog_range: rt_after = test_utils.get_restorable_time(trigger_inst_id) assert rt_after.end > rt_before.end, (rt_after.end, rt_before.end) time.sleep(60) rt_after2 = test_utils.get_restorable_time(trigger_inst_id) assert rt_after2.end > rt_after.end, (rt_after2.end, rt_after.end)
def failover(self, instance_id=None, _strategy=None): self.inst_id = instance_id if not self.inst_id: self.inst_id = utils.get_instance_id_bytenant(DBInstanceType.MASTER) _inst = utils.get_builtin_instance(self.inst_id) self.group_id = _inst.group_id self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=120) utils.check_server_status(self.dbslave_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.STANDBY, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=120) self.vip_id = utils.get_vip_id(self.inst_id) strategy = CONF.ha_failover_strategy virtual_instance_id = None if strategy == 'master': _ret = utils.get_builtin_instance(self.inst_id) nova_instance = _ret.server instance_id = _ret.id type = DBInstanceType.MASTER virtual_instance_id = _ret.virtual_instance_id elif strategy == 'standby': _ret = utils.get_builtin_instance(self.dbslave_id) nova_instance = _ret.server instance_id = _ret.id type = DBInstanceType.STANDBY else: raise Exception("not found ha_failover_strategy %s" % strategy) rancount = random.randint(50, 100) utils.generate_databases(self.inst_id, count = rancount) ip = utils.check_allocate_ip(nova_instance) utils.stop_mysqld(ip, stop_ga=True) utils.check_server_status(instance_id, expected_task=utils.tasks.InstanceTasks.NONE, type=type, expected_svr_status=utils.ServiceStatuses.SHUTDOWN, deleted=False, timeout=120) rpc.call(utils.get_context(), "taskmanager", {"method": "failover", "args": {'instance_id':instance_id}}) if strategy == 'master': origin_inst_id = inst_utils.virtual_instid_2_origin_instid(virtual_instance_id) self.inst_id = origin_inst_id utils.check_server_status(origin_inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=120) new_slave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) utils.check_server_status(new_slave_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.STANDBY, expected_svr_status=utils.ServiceStatuses.RUNNING, deleted=False, timeout=120) self.dbslave_id = new_slave_id utils.check_generated_databases(self.inst_id, count = rancount) utils.clear_generated_databases(self.dbslave_id, count = rancount) self.validate()