def restorable_time_failover_twice(self): def failover(id): rpc.call(utils.get_context(), "taskmanager", {"method": "failover", "args": {'instance_id':id}}) self.create() master = utils.get_builtin_instance(self.inst_id) master_vid = master.virtual_instance_id rt_1 = utils.get_restorable_time(master_vid) # case1: failover only failover1_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') failover(master.id) utils.check_server_status(master_vid, DBInstanceType.MASTER, timeout = 360) new_slave_id = master.id utils.check_server_status(new_slave_id, DBInstanceType.STANDBY, timeout = 360) rt_2 = utils.get_restorable_time(master_vid) assert rt_2.end > rt_1.end and rt_1.begin <= rt_2.begin, \ ("before first failover rt: [%s, %s], after first failover rt: [%s, %s]" % (rt_1.begin, rt_1.end, rt_2.begin, rt_2.end)) # case2: restart & failover master2 = utils.get_builtin_instance(master_vid) failover(master2.id) utils.check_server_status(master_vid, DBInstanceType.MASTER, timeout = 360) new_slave_id2 = master2.id utils.check_server_status(new_slave_id2, DBInstanceType.STANDBY, timeout = 360) rt_3 = utils.get_restorable_time(master_vid) assert rt_3.begin > failover1_time and rt_3.end > rt_2.end, (rt_3.begin, failover1_time, rt_3.end, rt_2.end) print rt_1.begin, rt_1.end print rt_2.begin, rt_2.end print rt_3.begin, rt_3.end print "the first failover time: %s" % (failover1_time) time.sleep(60) rt_4 = utils.get_restorable_time(master_vid) print rt_4.begin, rt_4.end assert rt_4.end > rt_3.end
def restore_to_point_in_time(self): self.create() old_master_inst = utils.get_builtin_instance(self.inst_id) master_vid = old_master_inst.virtual_instance_id db_count = 100 utils.generate_databases(self.inst_id, count = db_count) utils.check_generated_databases(self.inst_id, count = db_count) time.sleep(3) time1 = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') utils.clear_generated_databases(self.inst_id, count = db_count) time.sleep(3) dt2 = datetime.datetime.now() time2 = dt2.strftime('%Y-%m-%d %H:%M:%S') while True: restorable_time = utils.get_restorable_time(self.inst_id) dt = datetime.datetime.strptime(restorable_time.end, '%Y-%m-%d %H:%M:%S') if dt > dt2: break time.sleep(3) utils.restore_to_point_in_time_byclient(master_vid, time1) time.sleep(10) # wait for compute instance appear new_inst1 = utils.get_restore_instance() utils.check_server_status(new_inst1, DBInstanceType.SINGLE, timeout=RESTORE_TIME_OUT) utils.check_generated_databases(new_inst1, count = db_count) utils.delete_rds_byclient(new_inst1) utils.check_server_status(new_inst1, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.SINGLE, expected_svr_status=utils.ServiceStatuses.DELETED, deleted=True, timeout=CONF.trove_delete_timeout) # delete original instance utils.delete_rds_byclient(master_vid) utils.check_server_status(self.inst_id, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status=utils.ServiceStatuses.DELETED, deleted=True, timeout=CONF.trove_delete_timeout) # restore to point in time after deleting utils.restore_to_point_in_time_byclient(master_vid, time1) time.sleep(10) # wait for compute instance appear new_inst2 = utils.get_restore_instance() utils.check_server_status(new_inst2, DBInstanceType.SINGLE, timeout=RESTORE_TIME_OUT) utils.check_generated_databases(new_inst2, count = db_count) utils.delete_rds_byclient(new_inst2) utils.check_server_status(new_inst2, expected_task=utils.tasks.InstanceTasks.NONE, type=DBInstanceType.SINGLE, expected_svr_status=utils.ServiceStatuses.DELETED, deleted=True, timeout=CONF.trove_delete_timeout)
def override_to_point_in_time(self): self.create() old_master_inst = utils.get_builtin_instance(self.inst_id) master_vid = old_master_inst.virtual_instance_id db_count = 100 utils.generate_databases(self.inst_id, count = db_count) utils.check_generated_databases(self.inst_id, count = db_count) time.sleep(3) time1 = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') utils.clear_generated_databases(self.inst_id, count = db_count) time.sleep(3) dt2 = datetime.datetime.now() time2 = dt2.strftime('%Y-%m-%d %H:%M:%S') while True: restorable_time = utils.get_restorable_time(self.inst_id) dt = datetime.datetime.strptime(restorable_time.end, '%Y-%m-%d %H:%M:%S') if dt > dt2: break time.sleep(3) utils.override_with_backup_byclient(master_vid, None, time1) utils.check_server_deleted(old_master_inst.id, DBInstanceType.PENDING, timeout = RESTORE_TIME_OUT) new_master_inst1 = utils.get_builtin_instance(master_vid) self.inst_id = new_master_inst1.id utils.check_generated_databases(self.inst_id, count = db_count) time.sleep(3) utils.override_with_backup_byclient(master_vid, None, time2) utils.check_server_deleted(self.inst_id, DBInstanceType.PENDING, timeout = RESTORE_TIME_OUT) utils.check_generated_databases(master_vid, count = 0) self.inst_id = utils.get_instance_id(self.group_id, DBInstanceType.MASTER) self.dbslave_id = utils.get_instance_id(self.group_id, DBInstanceType.STANDBY) self.validate()
def _failover_test(self, group_id, trigger_inst_id, do_workload = False, do_prepare = False, mysqld_killed = False, host_rebooted = False, remove_tmp_initsql = False, mysql_data_lost = False, check_vip = False, check_rpl_consist = True, check_binlog_range = False): LOG.info("Doing Failover Test, group_id:%s, instance_id:%s, do_workload:%s, do_prepare:%s." % (group_id, trigger_inst_id, do_workload, do_prepare)) before_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False) before_items = set(map(lambda x: x.type + "_" + x.instance_id, before_group_items)) before_instance = test_utils.get_builtin_instance( trigger_inst_id) before_rip = test_utils.check_allocate_ip(before_instance.server) before_origin_instid = before_instance.id rt_before = rt_after = None if check_binlog_range: rt_before = test_utils.get_restorable_time(trigger_inst_id) if do_workload and before_instance.type == DBInstanceType.MASTER: FAILOVERInstance.__run_workload(do_prepare = do_prepare) if remove_tmp_initsql: FAILOVERInstance.__trigger_vm_remove_tmp_sql_file(trigger_inst_id) if mysqld_killed: FAILOVERInstance.__trigger_mysqld_crash(trigger_inst_id) test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, deleted=False, timeout=120) if host_rebooted: FAILOVERInstance.__trigger_host_reboot(trigger_inst_id) # when host-machine rebooted, no guestagent update service's status. # test_utils.check_server_status(trigger_inst_id, expected_task=tasks.InstanceTasks.NONE, # type=before_instance.type, expected_svr_status=test_utils.ServiceStatuses.SHUTDOWN, # deleted=False, timeout=120) if mysql_data_lost: FAILOVERInstance.__trigger_mysql_data_lost(trigger_inst_id) rpc.call(test_utils.get_context(), "taskmanager", {"method": "failover", "args": {'instance_id':before_origin_instid}}, timeout = 3600) ## check vip <--> rip mapping. ## vip should be changed in 10 seconds. if before_instance.type == DBInstanceType.MASTER or before_instance.type == DBInstanceType.READ_REPLI: after_instance = test_utils.get_builtin_instance( trigger_inst_id) after_nova_inst = after_instance.server after_rip = test_utils.check_allocate_ip(after_nova_inst) assert after_instance.vip == before_instance.vip and before_rip != after_rip if before_instance.type == DBInstanceType.MASTER: test_utils.check_server_status(before_instance.id, expected_task = tasks.InstanceTasks.NONE, type=DBInstanceType.MASTER, expected_svr_status = test_utils.ServiceStatuses.RUNNING, deleted=False, timeout=120) ## check replication topo after_group_items = InstanceGroupItem.list_by_gid(test_utils.get_context(), group_id, deleted = False) after_items = set(map(lambda x: x.type + "_" + x.instance_id, after_group_items)) LOG.info("before " + str(before_items)) LOG.info("after " + str(after_items)) if check_rpl_consist: diff_items = (before_items - after_items) # assert len(diff_items) == 0 assert len(before_group_items) == len(after_group_items), "size of mysql cluster should be the same." for group_item in after_group_items: if group_item.type == DBInstanceType.STANDBY and group_item.instance_id == before_instance.id: item = InstanceGroupItem.get_by_instance_id(test_utils.get_context(), group_item.instance_id, deleted = False) assert item != None continue test_utils.check_server_status(group_item.instance_id, expected_task = tasks.InstanceTasks.NONE, type = group_item.type, expected_svr_status = test_utils.ServiceStatuses.RUNNING, deleted = False, timeout = 120) if check_binlog_range: rt_after = test_utils.get_restorable_time(trigger_inst_id) assert rt_after.end > rt_before.end, (rt_after.end, rt_before.end) time.sleep(60) rt_after2 = test_utils.get_restorable_time(trigger_inst_id) assert rt_after2.end > rt_after.end, (rt_after2.end, rt_after.end)
def _get_restorable_time(sleep = None): if sleep != None: time.sleep(sleep) restorable_time = utils.get_restorable_time(master_vid) return restorable_time