示例#1
0
文件: brick_app.py 项目: zezhen/brick
 def process(self, args):
     action = args.get('action')
     
     result = brick.execute(self.config, action, args)
     
     ret = util.json_to_str(result)
     print(ret[:1000])
     return ret
示例#2
0
    def _gateway_load_info(self, api):
        time.sleep(3)

        log_reader = LogReader(api.conf.log_file_prefix)

        gw_list = copy.deepcopy(self.GW_LIST)
        while True:
            line = log_reader.readline()
            if line == None:
                break

            if line.find(MSG_GATEWAY_ADD_ZK) != -1:
                gw = line.split('data:')[1]
                gw = ast.literal_eval(gw)

                if gw not in gw_list:
                    self.fail("FAIL, unexpected gateway information. gw:%s" % util.json_to_str(gw))
                else:
                    print gw
                    gw_list.remove(gw)

                continue
            else:
                for gw in gw_list:
                    find_str = MSG_GATEWAY_ADD_GW_PREFIX_FMT % (gw['ip'], gw['port'])

                    if line.find(find_str) != -1:
                        if line.find(MSG_GATEWAY_ADD_GW_POSTFIX) != -1:
                            print gw
                            gw_list.remove(gw)

        if len(gw_list) != 0:
            self.fail("FAIL, load gateway information. gw:%s" % util.json_to_str(gw_list))
        else:
            util.log('SUCCESS, load gateway information.')

        api.destroy()
示例#3
0
    def test_4_mgmt_is_isolated_with_red_failover(self):
        util.print_frame()

        util.iptables_print_list()

        cluster = filter(lambda x: x['cluster_name'] == 'network_isolation_cluster_1', config.clusters)[0]
        util.log(util.json_to_str(cluster))

        self.leader_cm = cluster['servers'][0]

        # MGMT
        mgmt_ip = cluster['servers'][0]['real_ip']
        mgmt_port = cluster['servers'][0]['cm_port']

        # Create cluster
        conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cluster )
        self.assertIsNotNone(conf_checker, 'failed to initialize cluster')

        util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port)

        # Master must be the first pgs, cluster['servers'][0].
        to_be_master = cluster['servers'][0]
        m = util.get_server_by_role_and_pg(cluster['servers'], 'master', to_be_master['pg_id'])
        master_id = -1
        if m['id'] != to_be_master['id']:
            try_cnt = 0
            while master_id != to_be_master['id'] and try_cnt < 20:
                master_id = util.role_change(cluster['servers'][0], cluster['cluster_name'], to_be_master['id'])
                try_cnt += 1
                time.sleep(1)
            self.assertEquals(master_id, to_be_master['id'], 'change %d to a master fail' % to_be_master['id'])

        # Print initial state of cluster
        util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
        initial_state = []
        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

        # Set SMR option (slave_idle_timeout)
        util.log('\n\n\n ### Set SMR option ###')
        for s in cluster['servers']:
            t = telnet.Telnet('SMR%d' % s['id'])
            self.assertEqual(t.connect(s['ip'], s['smr_mgmt_port']), 0,
                    'Failed to connect to smr. ADDR=%s:%d' % (s['ip'], s['smr_mgmt_port']))
            cmd = 'confset slave_idle_timeout_msec 18000'
            util.log('[%s:%d] >> %s' % (s['ip'], s['smr_mgmt_port'], cmd))
            t.write('confset slave_idle_timeout_msec 18000\r\n')
            reply = t.read_until('\r\n').strip()
            util.log('[%s:%d] << %s' % (s['ip'], s['smr_mgmt_port'], reply))
            self.assertEqual(reply, '+OK', 'Failed to set slave_idle_timeout, REPLY=%s' % reply)

        # Network isolation test
        for loop_cnt in range(3):
            # Block network
            util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % loop_cnt)
            for s in cluster['servers']:
                self.assertTrue(util.iptables_drop('A', '127.0.0.100', s['smr_mgmt_port']), 'add a bloking role to iptables fail.')

            for i in range(4):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Check cluster state
            ok = False
            for i in range(7):
                isolated_states = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                time.sleep(1)

                state_transition_done = True
                for s in isolated_states:
                    if s['ip'] != '127.0.0.100':
                        continue

                    if s['active_role'] != '?' or s['mgmt_role'] != 'N':
                        state_transition_done = False

                if state_transition_done :
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state transition')

            pgs_list = util.get_pgs_info_list(mgmt_ip, mgmt_port, cluster)
            reds = filter(lambda x: x['color'] == 'RED', pgs_list)

            # Shutdown
            server = cluster['servers'][random.choice(reds)['pgs_id']]
            util.log( 'shutdown pgs%d while hanging.' % server['id'] )
            ret = testbase.request_to_shutdown_smr( server )
            self.assertEqual( ret, 0, 'failed to shutdown smr. id:%d' % server['id'] )
            ret = testbase.request_to_shutdown_redis( server )
            self.assertEqual( ret, 0, 'failed to shutdown redis. id:%d' % server['id'] )

            # Check state F
            max_try = 20
            expected = 'F'
            for i in range( 0, max_try):
                util.log('MGMT_IP:%s, MGMT_PORT:%d' % (mgmt_ip, mgmt_port))
                state = util._get_smr_state( server['id'], cluster['cluster_name'], mgmt_ip, mgmt_port )
                if expected == state:
                    break;
                time.sleep( 1 )
            self.assertEqual( expected , state,
                               'server%d - state:%s, expected:%s' % (server['id'], state, expected) )
            util.log( 'succeeded : pgs%d state changed to F.' % server['id'] )

            # Unblock network
            for s in cluster['servers']:
                self.assertTrue(util.iptables_drop('D', '127.0.0.100', s['smr_mgmt_port']), 'delete a bloking role to iptables fail.')

            # Check cluster state
            ok = False
            for i in range(10):
                final_state = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, final_state, check_quorum=True)

                state_consistency = True
                for s in final_state:
                    if s['pgs_id'] == server['id']:
                        continue

                    if is_pgs_normal(s) == False:
                        state_consistency = False

                if state_consistency:
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state consistency')

            # Recovery
            util.log( 'restart pgs%d.' % server['id'] )
            ret = testbase.request_to_start_smr( server )
            self.assertEqual( ret, 0, 'failed to start smr. id:%d' % server['id'] )

            ret = testbase.request_to_start_redis( server )
            self.assertEqual( ret, 0, 'failed to start redis. id:%d' % server['id'] )

            wait_count = 20
            ret = testbase.wait_until_finished_to_set_up_role( server, wait_count )
            self.assertEqual( ret, 0, 'failed to role change. smr_id:%d' % (server['id']) )

            redis = redis_mgmt.Redis( server['id'] )
            ret = redis.connect( server['ip'], server['redis_port'] )
            self.assertEqual( ret, 0, 'failed to connect to redis' )

            ok = False
            for i in xrange(5):
                ok = util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, check_quorum=True)
                if ok:
                    break
                else:
                    time.sleep(1)
            self.assertTrue(ok, 'failed to check cluster state')

            # Reset SMR option (slave_idle_timeout)
            t = telnet.Telnet('SMR%d' % server['id'])
            self.assertEqual(t.connect(server['ip'], server['smr_mgmt_port']), 0,
                    'Failed to connect to smr. ADDR=%s:%d' % (server['ip'], server['smr_mgmt_port']))
            cmd = 'confset slave_idle_timeout_msec 18000'
            util.log('[%s:%d] >> %s' % (server['ip'], server['smr_mgmt_port'], cmd))
            t.write('confset slave_idle_timeout_msec 18000\r\n')
            reply = t.read_until('\r\n').strip()
            util.log('[%s:%d] << %s' % (server['ip'], server['smr_mgmt_port'], reply))
            self.assertEqual(reply, '+OK', 'Failed to set slave_idle_timeout, REPLY=%s' % reply)

        # Check state
        self.assertNotEqual(initial_state, None, 'initial_state is None')
        self.assertNotEqual(final_state, None, 'final_state is None')

        initial_state = sorted(initial_state, key=lambda x: int(x['pgs_id']))
        final_state = sorted(final_state, key=lambda x: int(x['pgs_id']))
        for i in range(len(final_state)):
            msg = 'ts (%d)%d -> (%d)%d' % (initial_state[i]['pgs_id'], initial_state[i]['active_ts'], final_state[i]['pgs_id'], final_state[i]['active_ts'])
            util.log(msg)
            if initial_state[i]['pgs_id'] == 1:
                self.assertNotEqual(initial_state[i]['active_ts'], final_state[i]['active_ts'], msg)

        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, check_quorum=True), 'failed to check cluster state')

        self.assertTrue(conf_checker.final_check())

        # Shutdown cluster
        default_cluster.finalize(cluster)
示例#4
0
    def test_3_some_pgs_is_isolated_2copy(self):
        util.print_frame()

        out = util.sudo('iptables -L')
        util.log('====================================================================')
        util.log('out : %s' % out)
        util.log('out.return_code : %d' % out.return_code)
        util.log('out.stderr : %s' % out.stderr)
        util.log('out.succeeded : %s' % out.succeeded)

        # Add forwarding role (127.0.0.100 -> 127.0.0.1)
        self.assertTrue(util.iptables_redirect('A', '127.0.0.100', '127.0.0.1'), 'add a forwarding role to iptables fail.')

        cluster = filter(lambda x: x['cluster_name'] == 'network_isolation_cluster_1_2copy', config.clusters)[0]
        util.log(util.json_to_str(cluster))

        # MGMT
        mgmt_ip = cluster['servers'][0]['ip']
        mgmt_port = cluster['servers'][0]['cm_port']

        # Create cluster
        conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cluster )
        self.assertIsNotNone(conf_checker, 'failed to initialize cluster')

        # Place master on real ip address
        for pg_id in [0, 1]:
            m = util.get_server_by_role_and_pg(cluster['servers'], 'master', pg_id)
            s = util.get_server_by_role_and_pg(cluster['servers'], 'slave', pg_id)
            if m.has_key('ip') and m.has_key('real_ip'):
                if m['ip'] != m['real_ip']:
                    ret = util.role_change(cluster['servers'][0], cluster['cluster_name'], s['id'])
                    self.assertNotEquals(ret, -1, 'change %d to a master fail' % s['id'])

        # Print initial state of cluster
        util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
        initial_state = []
        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

        # Network isolation test
        for cnt in range(3):
            # Block network
            util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % cnt)
            self.assertTrue(util.iptables_drop('A', '127.0.0.100'), 'add a bloking role to iptables fail.')

            for i in range(4):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Check cluster state
            ok = False
            for i in range(7):
                isolated_states = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                time.sleep(1)

                state_transition_done = True
                for s in isolated_states:
                    if s['pgs_id'] == 0 or s['pgs_id'] == 1:
                        continue

                    if s['active_role'] != 'M' or s['mgmt_role'] != 'M':
                        state_transition_done = False

                    if s['quorum'] != 0:
                        state_transition_done = False

                if state_transition_done:
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state transition')

            # Unblock network
            util.log('\n\n\n ### UNBLOCK NETWORK, %d ### ' % cnt)
            self.assertTrue(util.iptables_drop('D', '127.0.0.100'), 'delete a bloking role to iptables fail.')

            # Check cluster state
            ok = False
            for i in range(7):
                final_state = []
                if util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, final_state, check_quorum=True) == False:
                    time.sleep(1)
                    continue

                state_consistency = True
                for s in final_state:
                    if s['pgs_id'] == 1:
                        continue

                    if is_pgs_normal(s) == False:
                        state_consistency = False

                if state_consistency:
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state consistency')

        # Check state
        self.assertNotEqual(initial_state, None, 'initial_state is None')
        self.assertNotEqual(final_state, None, 'final_state is None')

        # Delete forwarding role (127.0.0.100 -> 127.0.0.1)
        self.assertTrue(util.iptables_redirect('D', '127.0.0.100', '127.0.0.1'), 'delete a forwarding role to iptables fail.')

        self.assertTrue(conf_checker.final_check())

        # Shutdown cluster
        default_cluster.finalize(cluster)
示例#5
0
    def test_2_some_pgs_is_isolated(self):
        util.print_frame()

        util.iptables_print_list()

        # Add forwarding role (127.0.0.100 -> 127.0.0.1)
        self.assertTrue(util.iptables_redirect('A', '127.0.0.100', '127.0.0.1'), 'add a forwarding role to iptables fail.')

        cluster = filter(lambda x: x['cluster_name'] == 'network_isolation_cluster_2', config.clusters)[0]
        util.log(util.json_to_str(cluster))

        # MGMT
        mgmt_ip = cluster['servers'][0]['real_ip']
        mgmt_port = cluster['servers'][0]['cm_port']

        # Create cluster
        conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cluster )
        self.assertIsNotNone(conf_checker, 'failed to initialize cluster')

        # Place master on virtual ip address in order to cause master election.
        pg_id = 0
        m = util.get_server_by_role_and_pg(cluster['servers'], 'master', pg_id)
        s = util.get_server_by_role_and_pg(cluster['servers'], 'slave', pg_id)
        if m.has_key('ip') == True and m.has_key('real_ip') == False:
            ret = util.role_change(cluster['servers'][0], cluster['cluster_name'], s['id'])
            self.assertNotEquals(ret, -1, 'change %d to a master fail' % s['id'])

        # Print initial state of cluster
        util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
        initial_state = []
        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

        # Network isolation test
        for cnt in range(3):
            # Block network
            util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % cnt)
            self.assertTrue(util.iptables_drop('A', '127.0.0.100'), 'add a bloking role to iptables fail.')

            for i in range(4):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Check cluster state
            ok = False
            for i in range(7):
                isolated_states = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                time.sleep(1)

                state_transition_done = True
                for s in isolated_states:
                    if s['pgs_id'] == 1:
                        continue

                    if s['active_role'] != '?' or s['mgmt_role'] != 'N':
                        state_transition_done = False

                if state_transition_done :
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state transition')

            # Unblock network
            util.log('\n\n\n ### UNBLOCK NETWORK, %d ### ' % cnt)
            self.assertTrue(util.iptables_drop('D', '127.0.0.100'), 'delete a bloking role to iptables fail.')

            # Check cluster state
            ok = False
            for i in range(7):
                final_state = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, final_state, check_quorum=True)

                state_consistency = True
                for s in final_state:
                    if s['pgs_id'] == 1:
                        continue

                    if is_pgs_normal(s) == False:
                        state_consistency = False

                if state_consistency:
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state consistency')

        # Check state
        self.assertNotEqual(initial_state, None, 'initial_state is None')
        self.assertNotEqual(final_state, None, 'final_state is None')

        initial_state = sorted(initial_state, key=lambda x: int(x['pgs_id']))
        final_state = sorted(final_state, key=lambda x: int(x['pgs_id']))
        for i in range(len(final_state)):
            msg = 'ts (%d)%d -> (%d)%d' % (initial_state[i]['pgs_id'], initial_state[i]['active_ts'], final_state[i]['pgs_id'], final_state[i]['active_ts'])
            util.log(msg)
            self.assertNotEqual(initial_state[i]['active_ts'], final_state[i]['active_ts'], msg)

        # Delete forwarding role (127.0.0.100 -> 127.0.0.1)
        self.assertTrue(util.iptables_redirect('D', '127.0.0.100', '127.0.0.1'), 'delete a forwarding role to iptables fail')

        self.assertTrue(conf_checker.final_check())

        # Shutdown cluster
        default_cluster.finalize(cluster)
示例#6
0
    def test_delete_smrlog_after_scaleout(self):
        util.print_frame()

        # start load generator
        util.log("start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            self.load_gen_thrd_list[i] = load_generator.LoadGenerator(i, ip, port)
            self.load_gen_thrd_list[i].start()

        time.sleep(5) # generate load for 5 sec
        util.log("started load_generator")

        # servers for scale out
        servers = [config.server4, config.server5, config.server6]
        leader_cm = self.cluster['servers'][0]

        # Scale out
        cluster = config.clusters[0]
        ret = util.pg_add(cluster, servers, leader_cm)
        self.assertEqual(True, ret, 'Scale out fail. util.pg_add returns false')

        time.sleep(5)
        # pg0 -> pg1
        cluster = config.clusters[1]
        ret = util.migration(cluster, 0, 1, 8000, 8191, 40000)
        self.assertEqual(True, ret, 'Migration Fail 0 -> 1')

        # get log file
        old_logs = {}
        for s in config.clusters[0]['servers']:
            parent_dir, log_dir = util.smr_log_dir(s['id'])
            path = '%s/%s' % (parent_dir, log_dir)
            old_logs[s['id']] = util.ls(path)

        # bgsave in order to make smrlogs deleted.
        for s in config.clusters[0]['servers']:
            bgsave_ret = util.bgsave(s)
            self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % s['id'])
            util.log('bgsave pgs%d is done.')

        # check consistency
        ok = True
        for j in range(len(self.load_gen_thrd_list)):
            self.assertTrue(self.load_gen_thrd_list[j].isConsistent(),
                    'Inconsistent after migration')

        # is smr-replicator delete smrlogs?
        i = 0
        while i < 20:
            i += 1
            # get current log files
            cur_logs = {}
            for s in config.clusters[0]['servers']:
                parent_dir, log_dir = util.smr_log_dir(s['id'])
                path = '%s/%s' % (parent_dir, log_dir)
                cur_logs[s['id']] = util.ls(path)

            # compare old and new
            temp_old_logs = copy.deepcopy(old_logs)
            for id, nl in cur_logs.items():
                ol = temp_old_logs.get(id)
                self.assertNotEqual(ol, None, "failed to check logfiles. old logs for smr-replicator '%d' is not exist." % id)

                for log in nl:
                    if log in ol:
                        ol.remove(log)

            ok = True
            for id, ol in temp_old_logs.items():
                if len(ol) == 0:
                    ok = False

            util.log('Loop %d ---------------------------------------------------------' % i)
            util.log('deleted smrlog files: %s' % util.json_to_str(temp_old_logs))

            if ok:
                break

            time.sleep(10)

        self.assertTrue(ok, 'smr-replicator does not delete smrlogs.')
        util.log('smr-replicator deletes smrlogs.')

        # check consistency of load_generator
        for i in range(len(self.load_gen_thrd_list)):
            self.load_gen_thrd_list[i].quit()
        for i in range(len(self.load_gen_thrd_list)):
            self.load_gen_thrd_list[i].join()
            self.assertTrue(self.load_gen_thrd_list[i].isConsistent(), 'Inconsistent after migration')
示例#7
0
    def test_delete_smrlog_after_scaleout(self):
        util.print_frame()

        # start load generator
        util.log("start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            self.load_gen_thrd_list[i] = load_generator.LoadGenerator(
                i, ip, port)
            self.load_gen_thrd_list[i].start()

        time.sleep(5)  # generate load for 5 sec
        util.log("started load_generator")

        # servers for scale out
        servers = [config.server4, config.server5, config.server6]
        leader_cm = self.cluster['servers'][0]

        # Scale out
        cluster = config.clusters[0]
        ret = util.pg_add(cluster, servers, leader_cm)
        self.assertEqual(True, ret,
                         'Scale out fail. util.pg_add returns false')

        time.sleep(5)
        # pg0 -> pg1
        cluster = config.clusters[1]
        ret = util.migration(cluster, 0, 1, 8000, 8191, 40000)
        self.assertEqual(True, ret, 'Migration Fail 0 -> 1')

        # get log file
        old_logs = {}
        for s in config.clusters[0]['servers']:
            parent_dir, log_dir = util.smr_log_dir(s['id'])
            path = '%s/%s' % (parent_dir, log_dir)
            old_logs[s['id']] = util.ls(path)

        # bgsave in order to make smrlogs deleted.
        for s in config.clusters[0]['servers']:
            bgsave_ret = util.bgsave(s)
            self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % s['id'])
            util.log('bgsave pgs%d is done.')

        # check consistency
        ok = True
        for j in range(len(self.load_gen_thrd_list)):
            self.assertTrue(self.load_gen_thrd_list[j].isConsistent(),
                            'Inconsistent after migration')

        # is smr-replicator delete smrlogs?
        i = 0
        while i < 20:
            i += 1
            # get current log files
            cur_logs = {}
            for s in config.clusters[0]['servers']:
                parent_dir, log_dir = util.smr_log_dir(s['id'])
                path = '%s/%s' % (parent_dir, log_dir)
                cur_logs[s['id']] = util.ls(path)

            # compare old and new
            temp_old_logs = copy.deepcopy(old_logs)
            for id, nl in cur_logs.items():
                ol = temp_old_logs.get(id)
                self.assertNotEqual(
                    ol, None,
                    "failed to check logfiles. old logs for smr-replicator '%d' is not exist."
                    % id)

                for log in nl:
                    if log in ol:
                        ol.remove(log)

            ok = True
            for id, ol in temp_old_logs.items():
                if len(ol) == 0:
                    ok = False

            util.log(
                'Loop %d ---------------------------------------------------------'
                % i)
            util.log('deleted smrlog files: %s' %
                     util.json_to_str(temp_old_logs))

            if ok:
                break

            time.sleep(10)

        self.assertTrue(ok, 'smr-replicator does not delete smrlogs.')
        util.log('smr-replicator deletes smrlogs.')

        # check consistency of load_generator
        for i in range(len(self.load_gen_thrd_list)):
            self.load_gen_thrd_list[i].quit()
        for i in range(len(self.load_gen_thrd_list)):
            self.load_gen_thrd_list[i].join()
            self.assertTrue(self.load_gen_thrd_list[i].isConsistent(),
                            'Inconsistent after migration')
示例#8
0
    def test_2_some_pgs_is_isolated(self):
        util.print_frame()

        out = util.sudo('iptables -L')
        util.log('====================================================================')
        util.log('out : %s' % out)
        util.log('out.return_code : %d' % out.return_code)
        util.log('out.stderr : %s' % out.stderr)
        util.log('out.succeeded : %s' % out.succeeded)

        # Add forwarding role (127.0.0.100 -> 127.0.0.1)
        out = util.sudo('iptables -t nat -A OUTPUT -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)

        out = util.sudo('iptables -t nat -A PREROUTING -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)

        cluster = filter(lambda x: x['cluster_name'] == 'network_isolation_cluster_2', config.clusters)[0]
        util.log(util.json_to_str(cluster))

        # MGMT
        mgmt_ip = cluster['servers'][0]['real_ip']
        mgmt_port = cluster['servers'][0]['cm_port']

        # Create cluster
        ret = default_cluster.initialize_starting_up_smr_before_redis( cluster )
        self.assertEqual(0, ret, 'failed to TestMaintenance.initialize')

        # Place master on virtual ip address in order to cause master election.
        pg_id = 0
        m = util.get_server_by_role_and_pg(cluster['servers'], 'master', pg_id)
        s = util.get_server_by_role_and_pg(cluster['servers'], 'slave', pg_id)
        if m.has_key('ip') == True and m.has_key('real_ip') == False:
            ret = util.role_change(cluster['servers'][0], cluster['cluster_name'], s['id'])
            self.assertNotEquals(ret, -1, 'change %d to a master fail' % s['id'])

        # Print initial state of cluster
        util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
        initial_state = []
        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

        # Network isolation test
        for cnt in range(3):
            # Block network
            util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % cnt)
            out = util.sudo('iptables -A OUTPUT -d 127.0.0.100 -j DROP')
            self.assertTrue(out.succeeded, 'add a bloking role to iptables fail. output:%s' % out)

            for i in range(4):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Check cluster state
            ok = False
            for i in range(7):
                isolated_states = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                time.sleep(1)

                state_transition_done = True
                for s in isolated_states:
                    if s['pgs_id'] == 1:
                        continue

                    if s['active_role'] != '?' or s['mgmt_role'] != 'N':
                        state_transition_done = False

                if state_transition_done :
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state transition')

            # Unblock network
            util.log('\n\n\n ### UNBLOCK NETWORK, %d ### ' % cnt)
            out = util.sudo('iptables -D OUTPUT -d 127.0.0.100 -j DROP')
            self.assertTrue(out.succeeded, 'delete a bloking role to iptables fail. output:%s' % out)

            # Check cluster state
            ok = False
            for i in range(7):
                final_state = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, final_state, check_quorum=True)

                state_consistency = True
                for s in final_state:
                    if s['pgs_id'] == 1:
                        continue

                    if s['active_role'] != s['mgmt_role']:
                        state_consistency = False

                if state_consistency:
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state consistency')

        # Check state
        self.assertNotEqual(initial_state, None, 'initial_state is None')
        self.assertNotEqual(final_state, None, 'final_state is None')

        initial_state = sorted(initial_state, key=lambda x: int(x['pgs_id']))
        final_state = sorted(final_state, key=lambda x: int(x['pgs_id']))
        for i in range(len(final_state)):
            msg = 'ts (%d)%d -> (%d)%d' % (initial_state[i]['pgs_id'], initial_state[i]['active_ts'], final_state[i]['pgs_id'], final_state[i]['active_ts'])
            util.log(msg)
            self.assertNotEqual(initial_state[i]['active_ts'], final_state[i]['active_ts'], msg)

        # Shutdown cluster
        ret = default_cluster.finalize( cluster )
        self.assertEqual(ret, 0, 'failed to TestMaintenance.finalize')

        # Delete forwarding role (127.0.0.100 -> 127.0.0.1)
        out = util.sudo('iptables -t nat -D OUTPUT -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)

        out = util.sudo('iptables -t nat -D PREROUTING -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)
示例#9
0
    def test_7_dirty_network_fi(self):
        util.print_frame()
        clnts = []

        try:
            util.iptables_print_list()

            # Add forwarding role
            self.assertTrue(util.iptables_redirect('A', '127.0.0.100', '127.0.0.1'), 'add a forwarding role to iptables fail.')

            cluster_name = 'network_isolation_cluster_1'
            cluster = filter(lambda x: x['cluster_name'] == cluster_name, config.clusters)[0]
            util.log(util.json_to_str(cluster))

            self.leader_cm = cluster['servers'][0]

            # MGMT
            mgmt_ip = cluster['servers'][0]['real_ip']
            mgmt_port = cluster['servers'][0]['cm_port']

            # Create cluster
            conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cluster, 
                    conf={'cm_context':'applicationContext-fi.xml'})
            self.assertIsNotNone(conf_checker, 'failed to initialize cluster')

            # Print initial state of cluster
            util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
            initial_state = []
            self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

            # Start crc16 client
            for s in cluster['servers']:
                c = load_generator_crc16.Crc16Client(s['id'], s['ip'], s['redis_port'], 600, verbose=False)
                c.start()
                clnts.append(c)

            # Network isolation test
            cmfi = fi_confmaster.ConfmasterWfFi(['ra', 'me', 'yj', 'bj', 'mg'], 
                                                ['lconn', 'slave', 'master', 'setquorum'], [True, False], 1)

            for fi in cmfi:
                # Block network
                util.log('\n\n\n ### BLOCK NETWORK, %s ### ' % str(fi))
                ret = block_network(cluster, mgmt_ip, mgmt_port)
                self.assertTrue(ret, '[%s] failed to block network.' % str(fi))

                for i in xrange(4):
                    util.log('waiting... %d' % (i + 1))
                    time.sleep(1)

                # Check cluster state
                ok = False
                for i in xrange(10):
                    isolated_states = []
                    util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)

                    state_transition_done = True
                    for s in isolated_states:
                        if s['ip'] != '127.0.0.100':
                            continue

                        if s['active_role'] != '?' or s['mgmt_role'] != 'N':
                            state_transition_done = False

                    if state_transition_done:
                        ok = True
                        break
                    time.sleep(1)
                self.assertTrue(ok, 'Fail, state transition')

                # Fault injection
                try:
                    self.assertTrue(fi_confmaster.fi_add(fi, 1, mgmt_ip, mgmt_port), 
                            "Confmaster command fail. fi: %s" % str(fi))
                except ValueError as e:
                    self.fail("Confmaster command error. cmd: \"%s\", reply: \"%s\"" % (cmd, reply))

                # Unblock network
                util.log('\n\n\n ### UNBLOCK NETWORK, %s ### ' % str(fi))
                ret = unblock_network(cluster, mgmt_ip, mgmt_port, None)
                self.assertTrue(ret, '[%s] failed to unblock network.' % str(fi))

                for i in xrange(4):
                    util.log('waiting... %d' % (i + 1))
                    time.sleep(1)

                # Check cluster state
                ok = False
                for i in xrange(10):
                    isolated_states = []
                    ok = util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                    if ok:
                        break
                    time.sleep(1)
                self.assertTrue(ok, '[%s] Fail. unstable cluster.' % str(fi))

                check_cluster = False

                # 'bj', 'slave'
                if fi[0] == 'bj' and fi[1] == 'slave':
                    m, s1, s2 = util.get_mss(cluster)
                    ret = util.role_lconn(s1)
                    self.assertEqual("+OK\r\n", ret, '[%s] role lconn fail.' % str(fi))
                    check_cluster = True
                # 'me', 'lconn'
                elif fi[0] == 'me':
                    m, s1, s2 = util.get_mss(cluster)
                    ret = util.role_lconn(m)
                    self.assertEqual("+OK\r\n", ret, '[%s] role lconn fail.' % str(fi))
                    check_cluster = True
                # 'setquorum'
                elif fi[1] == 'setquorum':
                    m, s1, s2 = util.get_mss(cluster)
                    ret = util.cmd_to_smr_addr(s1['ip'], s1['smr_mgmt_port'], 'fi delay sleep 1 8000\r\n', timeout=20)
                    self.assertEqual("+OK\r\n", ret, '[%s] "fi delay sleep 1 8000" fail. ret: "%s"' % (str(fi), ret))
                    check_cluster = True

                if check_cluster:
                    # Check cluster state
                    ok = False
                    for i in xrange(20):
                        isolated_states = []
                        ok = util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                        if ok:
                            break
                        time.sleep(1)
                    self.assertTrue(ok, '[%s] Fail. unstable cluster.' % str(fi))

                # Check fault injection
                ok = False
                for i in xrange(10):
                    count = fi_confmaster.fi_count(fi, mgmt_ip, mgmt_port)
                    if count == 0:
                        ok = True
                        break
                    time.sleep(0.5)
                self.assertTrue(ok, "[%s] fail. failt injection had not been triggered." % str(fi))

                for c in clnts:
                    self.assertTrue(c.is_consistency(), '[%s] data consistency error!' % str(fi))

            for c in clnts:
                self.assertTrue(c.is_consistency(), '[%s] data consistency error!' % str(fi))

            # Go back to initial configuration
            cmfi.init()
            for fi in cmfi:
                try:
                    self.assertTrue(fi_confmaster.fi_add(fi, 0, mgmt_ip, mgmt_port),
                            "Confmaster command fail. fi: %s" % str(fi))
                except ValueError as e:
                    self.fail("Confmaster command error. cmd: \"%s\", reply: \"%s\"" % (cmd, reply))

            # Wait until workflows done
            ret = util.await(60, True)(
                    lambda cinfo: cinfo['wf'] == 0,
                    lambda : util.cluster_info(mgmt_ip, mgmt_port, cluster['cluster_name']))
            self.assertTrue(ret, 'There are still some workflows.')

            self.assertTrue(conf_checker.final_check())

            # Shutdown cluster
            default_cluster.finalize(cluster)

        finally:
            for c in clnts:
                c.quit()
            for c in clnts:
                c.join()

            # Delete forwarding role
            self.assertTrue(util.iptables_redirect('D', '127.0.0.100', '127.0.0.1'), 'add a forwarding role to iptables fail.')
示例#10
0
    def test_gateway_add_del(self):
        util.print_frame()

        api = ARC_API(ZK_ADDR, CLUSTER_NAME, logFilePrefix = self.arcci_log, so_path = self.so_path)

        # Add gateway
        gw_port = 10000
        gw_id = 10
        cmd = 'gw_add %s %d %s %s %d' % (CLUSTER_NAME, gw_id, HOST_NAME, HOST_IP, gw_port)
        ret = util.cm_command(MGMT_IP, MGMT_PORT, cmd)
        if ret != None and len(ret) >= 2:
            ret = ret[:-2]
        util.log('cmd:"%s", ret:"%s"' % (cmd, ret))
        if not ret.startswith('{"state":"success","msg":"+OK"}'):
            self.fail('failed to add gateway')

        # Deploy gateway
        server = self.cluster['servers'][0]
        ret = util.deploy_gateway(gw_id)
        self.assertTrue(ret, 'failed to deploy_gateway')

        # Start gateway
        ret = util.start_gateway( gw_id, server['ip'], MGMT_PORT, server['cluster_name'], gw_port)
        self.assertEqual( ret, 0, 'failed : start gateawy%d' % gw_id )

        time.sleep(5)

        # Check if gateway is added
        added_gw = {"ip":HOST_IP,"port":gw_port}

        log_reader = LogReader(api.conf.log_file_prefix)
        found = False
        while True:
            line = log_reader.readline()
            if line == None:
                break

            if line.find(MSG_GATEWAY_ADD_ZK) == -1:
                continue

            gw = line.split('data:')[1]
            gw = ast.literal_eval(gw)

            if gw['ip'] == added_gw['ip'] and gw['port'] == added_gw['port']:
                found = True

        if not found:
            self.fail('FAIL, load gateway information, gw:%s' % util.json_to_str(added_gw))
        else:
            util.log('SUCCESS, load gateway information.')

        # Delete gateway
        cmd = 'gw_del %s %d' % (CLUSTER_NAME, gw_id)
        ret = util.cm_command(MGMT_IP, MGMT_PORT, cmd)
        if ret != None and len(ret) >= 2:
            ret = ret[:-2]
        util.log('cmd:"%s", ret:"%s"' % (cmd, ret))
        if not ret.startswith('{"state":"success","msg":"+OK"}'):
            self.fail('failed to delete gateway')

        # Check if gateway is deleted
        deleted_gw = {"ip":HOST_IP,"port":gw_port}

        found = check_gateway_deleted(deleted_gw, api)
        if not found:
            self.fail('FAIL, delete gateway information, gw:%s' % util.json_to_str(deleted_gw))
        else:
            util.log('SUCCESS, delete gateway information.')

        # Stop gateway
        ret = util.shutdown_gateway(gw_id, gw_port)
        self.assertEqual(ret, 0, 'failed : shutdown gateawy%d' % gw_id)

        api.destroy()
    def test_6_repeat_isolation_and_no_opinion_linepay(self):
        util.print_frame()

        out = util.sudo('iptables -L')
        util.log('====================================================================')
        util.log('out : %s' % out)
        util.log('out.return_code : %d' % out.return_code)
        util.log('out.stderr : %s' % out.stderr)
        util.log('out.succeeded : %s' % out.succeeded)

        # Add forwarding role
        out = util.sudo('iptables -t nat -A OUTPUT -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)
        out = util.sudo('iptables -t nat -A PREROUTING -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)

        out = util.sudo('iptables -t nat -A OUTPUT -d 127.0.0.101 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)
        out = util.sudo('iptables -t nat -A PREROUTING -d 127.0.0.101 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)

        out = util.sudo('iptables -t nat -A OUTPUT -d 127.0.0.102 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)
        out = util.sudo('iptables -t nat -A PREROUTING -d 127.0.0.102 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)

        cluster_name = 'no_opinion'
        cluster = filter(lambda x: x['cluster_name'] == cluster_name, config.clusters)[0]
        util.log(util.json_to_str(cluster))

        self.leader_cm = cluster['servers'][0]

        # MGMT
        mgmt_ip = cluster['servers'][0]['real_ip']
        mgmt_port = cluster['servers'][0]['cm_port']

        # Create cluster
        ret = default_cluster.initialize_starting_up_smr_before_redis( cluster )
        self.assertEqual(0, ret, 'failed to TestMaintenance.initialize')

        # Print initial state of cluster
        util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
        initial_state = []
        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

        # Network isolation test
        loop_cnt = 0
        while (loop_cnt < 20):
            loop_cnt += 1
            # Block network
            util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % loop_cnt)
            out = util.sudo('iptables -A OUTPUT -d 127.0.0.102 -j DROP')
            self.assertTrue(out.succeeded, 'add a bloking role to iptables fail. output:%s' % out)

            for i in range(1):
                util.log('waiting... %d' % (i + 1))
                time.sleep(0.1)

            out = util.sudo('iptables -A OUTPUT -d 127.0.0.100 -j DROP')
            self.assertTrue(out.succeeded, 'add a bloking role to iptables fail. output:%s' % out)

            for i in range(3):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1.2)

            out = util.sudo('iptables -A OUTPUT -d 127.0.0.101 -j DROP')
            self.assertTrue(out.succeeded, 'add a bloking role to iptables fail. output:%s' % out)

            for i in range(1):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Unblock network
            util.log('\n\n\n ### UNBLOCK NETWORK, %d ### ' % loop_cnt)
            out = util.sudo('iptables -D OUTPUT -d 127.0.0.102 -j DROP')
            self.assertTrue(out.succeeded, 'delete a bloking role to iptables fail. output:%s' % out)
            for i in range(0):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)
            out = util.sudo('iptables -D OUTPUT -d 127.0.0.100 -j DROP')
            self.assertTrue(out.succeeded, 'delete a bloking role to iptables fail. output:%s' % out)
            for i in range(0):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)
            out = util.sudo('iptables -D OUTPUT -d 127.0.0.101 -j DROP')
            self.assertTrue(out.succeeded, 'delete a bloking role to iptables fail. output:%s' % out)
            for i in range(3):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Print state of cluster
            util.log('\n ### STATE OF CLUSTER ### ')
            cluster_state = False
            for i in range(10):
                cluster_state = util.check_cluster(cluster_name, mgmt_ip, mgmt_port, initial_state, check_quorum=True)
                if cluster_state == True:
                    break
                else:
                    time.sleep(1)
            self.assertTrue(cluster_state, 'failed to check cluster state')

            all_in_f = True
            for s in cluster['servers']:
                if checkLastState(mgmt_ip, s['cm_port'], cluster_name, 0, 'F') == False:
                    all_in_f = False
                    break

            self.assertFalse(all_in_f, 'PGS0`s last state remains in F')

        # Shutdown cluster
        ret = default_cluster.finalize( cluster )
        self.assertEqual(ret, 0, 'failed to TestMaintenance.finalize')

        # Delete forwarding role
        out = util.sudo('iptables -t nat -D OUTPUT -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)
        out = util.sudo('iptables -t nat -D PREROUTING -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)

        out = util.sudo('iptables -t nat -D OUTPUT -d 127.0.0.101 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)
        out = util.sudo('iptables -t nat -D PREROUTING -d 127.0.0.101 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)

        out = util.sudo('iptables -t nat -D OUTPUT -d 127.0.0.102 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)
        out = util.sudo('iptables -t nat -D PREROUTING -d 127.0.0.102 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)
示例#12
0
    def test_7_dirty_network_fi(self):
        util.print_frame()
        clnts = []

        try:
            out = util.sudo('iptables -L')
            util.log('====================================================================')
            util.log('out : %s' % out)
            util.log('out.return_code : %d' % out.return_code)
            util.log('out.stderr : %s' % out.stderr)
            util.log('out.succeeded : %s' % out.succeeded)

            # Add forwarding role
            out = util.sudo('iptables -t nat -A OUTPUT -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
            self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)
            out = util.sudo('iptables -t nat -A PREROUTING -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
            self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)

            cluster_name = 'network_isolation_cluster_1'
            cluster = filter(lambda x: x['cluster_name'] == cluster_name, config.clusters)[0]
            util.log(util.json_to_str(cluster))

            self.leader_cm = cluster['servers'][0]

            # MGMT
            mgmt_ip = cluster['servers'][0]['real_ip']
            mgmt_port = cluster['servers'][0]['cm_port']

            # Create cluster
            ret = default_cluster.initialize_starting_up_smr_before_redis( cluster, 
                    conf={'cm_context':'applicationContext-fi.xml'})
            self.assertEqual(0, ret, 'failed to TestMaintenance.initialize')

            # Print initial state of cluster
            util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
            initial_state = []
            self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

            # Start crc16 client
            for s in cluster['servers']:
                c = load_generator_crc16.Crc16Client(s['id'], s['ip'], s['gateway_port'], 3000, verbose=False)
                c.start()
                clnts.append(c)

            # Network isolation test
            cmfi = fi_confmaster.ConfmasterWfFi(['ra', 'qa', 'me', 'yj', 'bj', 'mg'], 
                                                ['lconn', 'slave', 'master', 'setquorum'], [True, False], 1)

            for fi in cmfi:
                # Block network
                util.log('\n\n\n ### BLOCK NETWORK, %s ### ' % str(fi))
                ret = block_network(cluster, mgmt_ip, mgmt_port)
                self.assertTrue(ret, '[%s] failed to block network.' % str(fi))

                for i in xrange(4):
                    util.log('waiting... %d' % (i + 1))
                    time.sleep(1)

                # Check cluster state
                ok = False
                for i in xrange(10):
                    isolated_states = []
                    util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)

                    state_transition_done = True
                    for s in isolated_states:
                        if s['ip'] != '127.0.0.100':
                            continue

                        if s['active_role'] != '?' or s['mgmt_role'] != 'N':
                            state_transition_done = False

                    if state_transition_done:
                        ok = True
                        break
                    time.sleep(1)
                self.assertTrue(ok, 'Fail, state transition')

                # Fault injection
                try:
                    self.assertTrue(fi_confmaster.fi_add(fi, 1, mgmt_ip, mgmt_port), 
                            "Confmaster command fail. fi: %s" % str(fi))
                except ValueError as e:
                    self.fail("Confmaster command error. cmd: \"%s\", reply: \"%s\"" % (cmd, reply))

                # Unblock network
                util.log('\n\n\n ### UNBLOCK NETWORK, %s ### ' % str(fi))
                ret = unblock_network(cluster, mgmt_ip, mgmt_port, None)
                self.assertTrue(ret, '[%s] failed to unblock network.' % str(fi))

                for i in xrange(4):
                    util.log('waiting... %d' % (i + 1))
                    time.sleep(1)

                # Check cluster state
                ok = False
                for i in xrange(10):
                    isolated_states = []
                    ok = util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                    if ok:
                        break
                    time.sleep(1)
                self.assertTrue(ok, '[%s] Fail. unstable cluster.' % str(fi))

                check_cluster = False

                # 'bj', 'slave'
                if fi[0] == 'bj' and fi[1] == 'slave':
                    m, s1, s2 = util.get_mss(cluster)
                    ret = util.role_lconn(s1)
                    self.assertEqual("+OK\r\n", ret, '[%s] role lconn fail.' % str(fi))
                    check_cluster = True
                # 'me', 'lconn'
                elif fi[0] == 'me' and fi[1] == 'lconn':
                    m, s1, s2 = util.get_mss(cluster)
                    ret = util.role_lconn(m)
                    self.assertEqual("+OK\r\n", ret, '[%s] role lconn fail.' % str(fi))
                    check_cluster = True
                # 'qa', 'setquorum'
                elif fi[0] == 'qa' and fi[1] == 'setquorum':
                    m, s1, s2 = util.get_mss(cluster)

                    # shutdown
                    ret = testbase.request_to_shutdown_smr(s1)
                    self.assertEqual(0, ret, '[%s] failed to shutdown smr%d' % (str(fi), s1['id']))
                    ret = testbase.request_to_shutdown_redis(s1)
                    self.assertEqual(0, ret, '[%s] failed to shutdown redis%d' % (str(fi), s1['id']))

                    # Check quorum
                    q = -1
                    for q_cnt in xrange(20):
                        q = util.get_quorum(m)
                        if q == 1:
                            break
                        time.sleep(1)
                    self.assertEquals(1, q, "[%s] check quorum fail." % str(fi))

                    # Modify quorum
                    ret = util.cmd_to_smr_addr(m['ip'], m['smr_mgmt_port'], 'setquorum 0\r\n')
                    self.assertEqual("+OK\r\n", ret, '[%s] "setquorum 0" fail.' % str(fi))

                    # Check quorum
                    q = -1
                    for q_cnt in xrange(20):
                        q = util.get_quorum(m)
                        if q == 1:
                            break
                        time.sleep(1)
                    self.assertEquals(1, q, "[%s] check quorum fail." % str(fi))

                    # recovery
                    ret = testbase.request_to_start_smr(s1)
                    self.assertEqual(0, ret, '[%s] failed to start smr' % str(fi))
                    ret = testbase.request_to_start_redis(s1, max_try=120)
                    self.assertEqual(0, ret, '[%s] failed to start redis' % str(fi))
                    ret = testbase.wait_until_finished_to_set_up_role(s1, 11)
                    self.assertEqual(0, ret, '[%s] failed to role change. smr_id:%d' % (str(fi), s1['id']))

                    check_cluster = True

                # 'setquorum'
                elif fi[1] == 'setquorum':
                    m, s1, s2 = util.get_mss(cluster)
                    ret = util.cmd_to_smr_addr(s1['ip'], s1['smr_mgmt_port'], 'fi delay sleep 1 8000\r\n', timeout=20)
                    self.assertEqual("+OK\r\n", ret, '[%s] "fi delay sleep 1 8000" fail. ret: "%s"' % (str(fi), ret))
                    check_cluster = True

                if check_cluster:
                    # Check cluster state
                    ok = False
                    for i in xrange(20):
                        isolated_states = []
                        ok = util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                        if ok:
                            break
                        time.sleep(1)
                    self.assertTrue(ok, '[%s] Fail. unstable cluster.' % str(fi))

                # Check fault injection
                ok = False
                for i in xrange(10):
                    count = fi_confmaster.fi_count(fi, mgmt_ip, mgmt_port)
                    if count == 0:
                        ok = True
                        break
                    time.sleep(0.5)
                self.assertTrue(ok, "[%s] fail. failt injection had not been triggered." % str(fi))

            # Shutdown cluster
            ret = default_cluster.finalize( cluster )
            self.assertEqual(ret, 0, '[%s] failed to TestMaintenance.finalize' % str(fi))

            # Delete forwarding role
            out = util.sudo('iptables -t nat -D OUTPUT -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
            self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)
            out = util.sudo('iptables -t nat -D PREROUTING -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
            self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)

            for c in clnts:
                self.assertTrue(c.is_consistency(), '[%s] data consistency error!' % str(fi))

        finally:
            for c in clnts:
                c.quit()
            for c in clnts:
                c.join()
示例#13
0
    def test_5_mgmt_is_isolated_with_master_failover(self):
        util.print_frame()

        out = util.sudo('iptables -L')
        util.log('====================================================================')
        util.log('out : %s' % out)
        util.log('out.return_code : %d' % out.return_code)
        util.log('out.stderr : %s' % out.stderr)
        util.log('out.succeeded : %s' % out.succeeded)

        # Add forwarding role (127.0.0.100 -> 127.0.0.1)
        out = util.sudo('iptables -t nat -A OUTPUT -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)

        out = util.sudo('iptables -t nat -A PREROUTING -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'add a forwarding role to iptables fail. output:%s' % out)

        cluster = filter(lambda x: x['cluster_name'] == 'network_isolation_cluster_1', config.clusters)[0]
        util.log(util.json_to_str(cluster))

        self.leader_cm = cluster['servers'][0]

        # MGMT
        mgmt_ip = cluster['servers'][0]['real_ip']
        mgmt_port = cluster['servers'][0]['cm_port']

        # Create cluster
        ret = default_cluster.initialize_starting_up_smr_before_redis( cluster )
        self.assertEqual(0, ret, 'failed to TestMaintenance.initialize')

        # Print initial state of cluster
        util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
        initial_state = []
        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

        # Network isolation test
        for loop_cnt in range(3):
            master, slave1, slave2 = util.get_mss(cluster)
            self.assertNotEquals(master, None, 'there is no master')
            self.assertNotEquals(slave1, None, 'there is no slave1')
            self.assertNotEquals(slave2, None, 'there is no slave2')

            # Block network
            util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % loop_cnt)
            out = util.sudo('iptables -A OUTPUT -d 127.0.0.100 -j DROP')
            self.assertTrue(out.succeeded, 'add a bloking role to iptables fail. output:%s' % out)

            for i in range(4):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Check cluster state
            ok = False
            for i in range(10):
                isolated_states = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                time.sleep(1)

                state_transition_done = True
                for s in isolated_states:
                    if s['ip'] != '127.0.0.100':
                        continue

                    if s['active_role'] != '?' or s['mgmt_role'] != 'N':
                        state_transition_done = False

                if state_transition_done :
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state transition')

            # Shutdown master
            util.log( 'shutdown pgs%d while hanging.' % master['id'] )
            ret = testbase.request_to_shutdown_smr( master )
            self.assertEqual( ret, 0, 'failed to shutdown smr. id:%d' % master['id'] )
            ret = testbase.request_to_shutdown_redis( master )
            self.assertEqual( ret, 0, 'failed to shutdown redis. id:%d' % master['id'] )

            # Check state F
            max_try = 20
            expected = 'F'
            for i in range( 0, max_try):
                util.log('MGMT_IP:%s, MGMT_PORT:%d' % (mgmt_ip, mgmt_port))
                state = util._get_smr_state( master['id'], cluster['cluster_name'], mgmt_ip, mgmt_port )
                if expected == state:
                    break;
                time.sleep( 1 )
            self.assertEqual( expected , state,
                               'master%d - state:%s, expected:%s' % (master['id'], state, expected) )
            util.log( 'succeeded : pgs%d state changed to F.' % master['id'] )

            # Unblock network
            util.log('\n\n\n ### UNBLOCK NETWORK, %d ### ' % loop_cnt)
            out = util.sudo('iptables -D OUTPUT -d 127.0.0.100 -j DROP')
            self.assertTrue(out.succeeded, 'delete a bloking role to iptables fail. output:%s' % out)

            # Check cluster state
            ok = False
            for i in range(7):
                final_state = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, final_state, check_quorum=True)

                state_consistency = True
                for s in final_state:
                    if s['pgs_id'] == master['id']:
                        continue

                    if s['active_role'] != s['mgmt_role']:
                        state_consistency = False

                if state_consistency:
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state consistency')

            # Recovery
            util.log( 'restart pgs%d.' % master['id'] )
            ret = testbase.request_to_start_smr( master )
            self.assertEqual( ret, 0, 'failed to start smr. id:%d' % master['id'] )

            ret = testbase.request_to_start_redis( master )
            self.assertEqual( ret, 0, 'failed to start redis. id:%d' % master['id'] )

            wait_count = 20
            ret = testbase.wait_until_finished_to_set_up_role( master, wait_count )
            self.assertEqual( ret, 0, 'failed to role change. smr_id:%d' % (master['id']) )

            redis = redis_mgmt.Redis( master['id'] )
            ret = redis.connect( master['ip'], master['redis_port'] )
            self.assertEqual( ret, 0, 'failed to connect to redis' )

            ok = False
            for i in xrange(5):
                ok = util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, check_quorum=True)
                if ok:
                    break
                else:
                    time.sleep(1)

            self.assertTrue(ok, 'failed to check cluster state')

        # Check state
        self.assertNotEqual(initial_state, None, 'initial_state is None')
        self.assertNotEqual(final_state, None, 'final_state is None')

        initial_state = sorted(initial_state, key=lambda x: int(x['pgs_id']))
        final_state = sorted(final_state, key=lambda x: int(x['pgs_id']))
        for i in range(0, 3):
            msg = 'ts (%d)%d -> (%d)%d' % (initial_state[i]['pgs_id'], initial_state[i]['active_ts'], final_state[i]['pgs_id'], final_state[i]['active_ts'])
            util.log(msg)
            self.assertNotEqual(initial_state[i]['active_ts'], final_state[i]['active_ts'], msg)

        for i in range(3, 6):
            msg = 'ts (%d)%d -> (%d)%d' % (initial_state[i]['pgs_id'], initial_state[i]['active_ts'], final_state[i]['pgs_id'], final_state[i]['active_ts'])
            util.log(msg)
            self.assertEqual(initial_state[i]['active_ts'], final_state[i]['active_ts'], msg)

        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, check_quorum=True), 'failed to check cluster state')

        # Shutdown cluster
        ret = default_cluster.finalize( cluster )
        self.assertEqual(ret, 0, 'failed to TestMaintenance.finalize')

        # Delete forwarding role (127.0.0.100 -> 127.0.0.1)
        out = util.sudo('iptables -t nat -D OUTPUT -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)

        out = util.sudo('iptables -t nat -D PREROUTING -d 127.0.0.100 -p tcp -j DNAT --to-destination 127.0.0.1')
        self.assertTrue(out.succeeded, 'delete a forwarding role to iptables fail. output:%s' % out)
示例#14
0
    def test_5_mgmt_is_isolated_with_lconn(self):
        util.print_frame()

        util.iptables_print_list()

        cluster = filter(lambda x: x['cluster_name'] == 'network_isolation_cluster_1', config.clusters)[0]
        util.log(util.json_to_str(cluster))

        self.leader_cm = cluster['servers'][0]

        # MGMT
        mgmt_ip = cluster['servers'][0]['real_ip']
        mgmt_port = cluster['servers'][0]['cm_port']

        # Create cluster
        conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cluster )
        self.assertIsNotNone(conf_checker, 'failed to initialize cluster')

        # Print initial state of cluster
        util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
        initial_state = []
        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

        # Set SMR option (slave_idle_timeout)
        util.log('\n\n\n ### Set SMR option ###')
        for s in cluster['servers']:
            t = telnet.Telnet('SMR%d' % s['id'])
            self.assertEqual(t.connect(s['ip'], s['smr_mgmt_port']), 0,
                    'Failed to connect to smr. ADDR=%s:%d' % (s['ip'], s['smr_mgmt_port']))
            cmd = 'confset slave_idle_timeout_msec 18000'
            util.log('[%s:%d] >> %s' % (s['ip'], s['smr_mgmt_port'], cmd))
            t.write('confset slave_idle_timeout_msec 18000\r\n')
            reply = t.read_until('\r\n').strip()
            util.log('[%s:%d] << %s' % (s['ip'], s['smr_mgmt_port'], reply))
            self.assertEqual(reply, '+OK', 'Failed to set slave_idle_timeout, REPLY=%s' % reply)

        # Network isolation test
        for loop_cnt in range(3):
            # Get master
            master = util.get_server_by_role_and_pg( cluster['servers'], 'master', 0 )

            first_slave = None
            for s in cluster['servers']:
                if s == master:
                    continue

                # Skip non-virtual host
                if s.has_key('real_ip') == False:
                    continue

                if first_slave == None:
                    first_slave = s

                # 'role lconn'
                util.log( 'role lconn pgs%d while hanging.' % s['id'] )

                ret = util.role_lconn_addr( s['real_ip'], s['smr_mgmt_port']  )
                self.assertEqual( ret, '+OK\r\n', 'role lconn failed. reply="%s"' % (ret[:-2]) )
                util.log( 'succeeded : cmd="role lconn", reply="%s"' % (ret[:-2]) )
                time.sleep(0.5)

            # Block network
            util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % loop_cnt)
            self.assertTrue(util.iptables_drop('A', '127.0.0.100', first_slave['smr_mgmt_port']), 'add a bloking role to iptables fail.')

            for i in range(6):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Check cluster state
            ok = False
            for i in range(10):
                isolated_states = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                time.sleep(1)

                state_transition_done = True
                for s in isolated_states:
                    if s['pgs_id'] != first_slave['id']:
                        continue

                    if s['active_role'] != '?' or s['mgmt_role'] != 'N':
                        state_transition_done = False

                if state_transition_done :
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state transition')

            # Unblock network
            self.assertTrue(util.iptables_drop('D', '127.0.0.100', first_slave['smr_mgmt_port']), 'delete a bloking role to iptables fail.')

            # Check cluster state
            ok = False
            for i in range(7):
                final_state = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, final_state, check_quorum=True)

                state_consistency = True
                for s in final_state:
                    if s['pgs_id'] == 1:
                        continue

                    if is_pgs_normal(s) == False:
                        state_consistency = False

                if state_consistency:
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state consistency')

            ok = False
            for i in xrange(5):
                ok = util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, check_quorum=True)
                if ok:
                    break
                else:
                    time.sleep(1)
            self.assertTrue(ok, 'failed to check cluster state')

        # Check state
        self.assertNotEqual(initial_state, None, 'initial_state is None')
        self.assertNotEqual(final_state, None, 'final_state is None')

        initial_state = sorted(initial_state, key=lambda x: int(x['pgs_id']))
        final_state = sorted(final_state, key=lambda x: int(x['pgs_id']))
        for i in range(len(final_state)):
            msg = 'ts (%d)%d -> (%d)%d' % (initial_state[i]['pgs_id'], initial_state[i]['active_ts'], final_state[i]['pgs_id'], final_state[i]['active_ts'])
            util.log(msg)

        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, check_quorum=True), 'failed to check cluster state')

        self.assertTrue(conf_checker.final_check())

        # Shutdown cluster
        default_cluster.finalize(cluster)
示例#15
0
    def _gateway_fault_failback(self, api):
        # Set up arguments
        server = self.cluster['servers'][0]
        gw_id = server['id']
        gw_port = server['gateway_port']

        # Stop gateway
        ret = util.shutdown_gateway(gw_id, gw_port, True)
        self.assertEqual(ret, 0, 'failed : shutdown gateawy%d' % gw_id)
        time.sleep(3)

        # Check error
        saved_err_cnt = {}
        for i in range(len(self.load_gen_list)):
            saved_err_cnt[i] = 0

        no_error_cnt = 0
        for i in range(10):
            util.log('check error count loop:%d' % i)
            no_err = True
            for j in range(len(self.load_gen_list)):
                err_cnt = self.load_gen_list[j].get_err_cnt()
                if err_cnt != saved_err_cnt[j]:
                    no_err = False
                util.log('saved_err_cnt:%d, err_cnt:%d' % (saved_err_cnt[j], err_cnt))
                saved_err_cnt[j] = err_cnt

            if no_err:
                no_error_cnt += 1

            if no_error_cnt >= 3:
                break

            time.sleep(1)
        self.assertTrue(no_error_cnt >= 3, 'failed to get replys from well working gateways')

        # Check if gateway is deleted
        deleted_gw = {"ip":HOST_IP,"port":gw_port}

        found = check_gateway_deleted(deleted_gw, api)
        if not found:
            self.fail('FAIL, delete gateway information, gw:%s' % util.json_to_str(deleted_gw))
        else:
            util.log('SUCCESS, delete gateway information.')

        # Start gateway
        ret = util.start_gateway( gw_id, server['ip'], MGMT_PORT, server['cluster_name'], gw_port)
        self.assertEqual( ret, 0, 'failed : start gateawy%d' % gw_id )
        time.sleep(3)

        # Check if gateway is added
        found = check_gateway_added(deleted_gw, api)
        if not found:
            self.fail('FAIL, load gateway information, gw:%s' % util.json_to_str(added_gw))
        else:
            util.log('SUCCESS, load gateway information.')

        # Check loadbalancing
        for i in range(3):
            ok = True
            for s in self.cluster['servers']:
                tps = util.get_tps(s['ip'], s['gateway_port'], 'gw')
                if tps < 50:
                    ok = False

            if ok:
                break

            time.sleep(1)

        if not ok:
            self.fail('FAIL, loadbalancing,')
        else:
            util.log('SUCCESS, loadbalancing.')
示例#16
0
    def test_6_repeat_isolation_and_no_opinion_linepay(self):
        util.print_frame()

        util.iptables_print_list()

        # Add forwarding role
        self.assertTrue(util.iptables_redirect('A', '127.0.0.100', '127.0.0.1'), 'add a forwarding role to iptables fail.')
        self.assertTrue(util.iptables_redirect('A', '127.0.0.101', '127.0.0.1'), 'add a forwarding role to iptables fail.')
        self.assertTrue(util.iptables_redirect('A', '127.0.0.102', '127.0.0.1'), 'add a forwarding role to iptables fail.')

        cluster_name = 'no_opinion'
        cluster = filter(lambda x: x['cluster_name'] == cluster_name, config.clusters)[0]
        util.log(util.json_to_str(cluster))

        self.leader_cm = cluster['servers'][0]

        # MGMT
        mgmt_ip = cluster['servers'][0]['real_ip']
        mgmt_port = cluster['servers'][0]['cm_port']

        # Create cluster
        conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cluster )
        self.assertIsNotNone(conf_checker, 'failed to initialize cluster')

        # Print initial state of cluster
        util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
        initial_state = []
        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

        # Network isolation test
        loop_cnt = 0
        while (loop_cnt < 20):
            loop_cnt += 1
            # Block network
            util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % loop_cnt)
            self.assertTrue(util.iptables_drop('A', '127.0.0.102'), 'add a bloking role to iptables fail.')

            for i in range(1):
                util.log('waiting... %d' % (i + 1))
                time.sleep(0.1)

            self.assertTrue(util.iptables_drop('A', '127.0.0.100'), 'add a bloking role to iptables fail.')

            for i in range(3):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1.2)

            self.assertTrue(util.iptables_drop('A', '127.0.0.101'), 'add a bloking role to iptables fail.')

            for i in range(1):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Unblock network
            util.log('\n\n\n ### UNBLOCK NETWORK, %d ### ' % loop_cnt)
            self.assertTrue(util.iptables_drop('D', '127.0.0.102'), 'delete a bloking role to iptables fail.')
            for i in range(0):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)
            self.assertTrue(util.iptables_drop('D', '127.0.0.100'), 'delete a bloking role to iptables fail.')
            for i in range(0):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)
            self.assertTrue(util.iptables_drop('D', '127.0.0.101'), 'delete a bloking role to iptables fail.')
            for i in range(3):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Print state of cluster
            util.log('\n ### STATE OF CLUSTER ### ')
            cluster_state = False
            for i in range(10):
                cluster_state = util.check_cluster(cluster_name, mgmt_ip, mgmt_port, initial_state, check_quorum=True)
                if cluster_state == True:
                    break
                else:
                    time.sleep(1)
            self.assertTrue(cluster_state, 'failed to check cluster state')

            all_in_f = True
            for s in cluster['servers']:
                if checkLastState(mgmt_ip, s['cm_port'], cluster_name, 0, 'F') == False:
                    all_in_f = False
                    break

            self.assertFalse(all_in_f, 'PGS0`s last state remains in F')

        # Delete forwarding role
        self.assertTrue(util.iptables_redirect('D', '127.0.0.100', '127.0.0.1'), 'delete a forwarding role to iptables fail.')
        self.assertTrue(util.iptables_redirect('D', '127.0.0.101', '127.0.0.1'), 'delete a forwarding role to iptables fail.')
        self.assertTrue(util.iptables_redirect('D', '127.0.0.102', '127.0.0.1'), 'delete a forwarding role to iptables fail.')

        self.assertTrue(conf_checker.final_check())

        # Shutdown cluster
        default_cluster.finalize(cluster)
示例#17
0
    def test_gateway_upgrade(self):
        util.print_frame()

        api = ARC_API(ZK_ADDR, CLUSTER_NAME, logFilePrefix = self.arcci_log, so_path = self.so_path)

        # Start load generation
        self.load_gen_list = {}
        for i in range(len(self.cluster['servers'])):
            arc_api = ARC_API(ZK_ADDR, CLUSTER_NAME, logFilePrefix = self.arcci_log, so_path = self.so_path)
            server = self.cluster['servers'][i]
            load_gen = LoadGenerator_ARCCI(server['id'], arc_api, timeout_second=10)
            load_gen.start()
            self.load_gen_list[i] = load_gen

        # Set up arguments
        server = self.cluster['servers'][0]
        gw_id = server['id']
        gw_port = server['gateway_port']

        # Check load
        for i in range(5):
            ok = True
            for s in self.cluster['servers']:
                tps = util.get_tps(s['ip'], s['gateway_port'], 'gw')
                util.log('%s:%d TPS:%d' % (s['ip'], s['gateway_port'], tps))
                if tps < 50:
                    ok = False

            if ok:
                break

            time.sleep(1)
        self.assertTrue(ok, 'failed to send requests')

        # Delete gateway
        self.assertTrue(
                util.gw_del(CLUSTER_NAME, gw_id, MGMT_IP, MGMT_PORT),
                'failed to delete gateway')

        # Check load
        for i in range(5):
            ok = True
            tps = util.get_tps(server['ip'], server['gateway_port'], 'gw')
            util.log('%s:%d TPS:%d' % (server['ip'], server['gateway_port'], tps))
            if tps > 10:
                ok = False

            if ok:
                break

            time.sleep(1)
        self.assertTrue(ok, 'failed to send requests')

        # Stop gateway
        ret = util.shutdown_gateway(gw_id, gw_port, True)
        self.assertEqual(ret, 0, 'failed : shutdown gateawy%d' % gw_id)

        # Check if gateway is deleted
        deleted_gw = {"ip":HOST_IP,"port":gw_port}

        found = check_gateway_deleted(deleted_gw, api)
        if not found:
            self.fail('FAIL, delete gateway information, gw:%s' % util.json_to_str(deleted_gw))
        else:
            util.log('SUCCESS, delete gateway information.')

        # Start gateway
        ret = util.start_gateway( gw_id, server['ip'], MGMT_PORT, server['cluster_name'], gw_port)
        self.assertEqual( ret, 0, 'failed : start gateawy%d' % gw_id )
        time.sleep(3)

        # Add gateway
        self.assertTrue(
                util.gw_add(CLUSTER_NAME, gw_id, HOST_NAME, HOST_IP, gw_port, MGMT_IP, MGMT_PORT),
                'failed to add gateway')

        # Check if gateway is added
        added_gw = {"ip":HOST_IP,"port":gw_port}

        log_reader = LogReader(api.conf.log_file_prefix)
        found = False
        while True:
            line = log_reader.readline()
            if line == None:
                break

            if line.find(MSG_GATEWAY_ADD_ZK) == -1:
                continue

            gw = line.split('data:')[1]
            gw = ast.literal_eval(gw)

            if gw['ip'] == added_gw['ip'] and gw['port'] == added_gw['port']:
                found = True

        if not found:
            self.fail('FAIL, load gateway information, gw:%s' % util.json_to_str(added_gw))
        else:
            util.log('SUCCESS, load gateway information.')

        # Check loadbalancing
        for i in range(5):
            ok = True
            for s in self.cluster['servers']:
                tps = util.get_tps(s['ip'], s['gateway_port'], 'gw')
                util.log('%s:%d TPS:%d' % (s['ip'], s['gateway_port'], tps))
                if tps < 50:
                    ok = False

            if ok:
                break

            time.sleep(1)

        if not ok:
            self.fail('FAIL, loadbalancing,')
        else:
            util.log('SUCCESS, loadbalancing.')

        api.destroy()
示例#18
0
    def test_1_mgmt_is_isolated(self):
        util.print_frame()

        util.iptables_print_list()

        cluster = filter(lambda x: x['cluster_name'] == 'network_isolation_cluster_1', config.clusters)[0]
        util.log(util.json_to_str(cluster))

        # MGMT
        mgmt_ip = cluster['servers'][0]['real_ip']
        mgmt_port = cluster['servers'][0]['cm_port']

        # Create cluster
        conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cluster )
        self.assertIsNotNone(conf_checker, 'failed to initialize cluster')

        # Print initial state of cluster
        util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
        initial_state = []
        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

        # Set SMR option (slave_idle_timeout)
        util.log('\n\n\n ### Set SMR option ###')
        for s in cluster['servers']:
            t = telnet.Telnet('SMR%d' % s['id'])
            self.assertEqual(t.connect(s['ip'], s['smr_mgmt_port']), 0,
                    'Failed to connect to smr. ADDR=%s:%d' % (s['ip'], s['smr_mgmt_port']))
            cmd = 'confset slave_idle_timeout_msec 18000'
            util.log('[%s:%d] >> %s' % (s['ip'], s['smr_mgmt_port'], cmd))
            t.write('confset slave_idle_timeout_msec 18000\r\n')
            reply = t.read_until('\r\n').strip()
            util.log('[%s:%d] << %s' % (s['ip'], s['smr_mgmt_port'], reply))
            self.assertEqual(reply, '+OK', 'Failed to set slave_idle_timeout, REPLY=%s' % reply)

        # Network isolation test
        for cnt in range(5):
            # Block network
            util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % cnt)
            for s in cluster['servers']:
                """Loopback Address Range (Reference : RFC3330)

                127.0.0.0/8 - This block is assigned for use as the Internet host
                  loopback address.  A datagram sent by a higher level protocol to an
                  address anywhere within this block should loop back inside the host.
                  This is ordinarily implemented using only 127.0.0.1/32 for loopback,
                  but no addresses within this block should ever appear on any network
                  anywhere [RFC1700, page 5].
                """
                self.assertTrue(util.iptables_drop('A', '127.0.0.100', s['smr_mgmt_port']), 'add a bloking role to iptables fail.')

            for i in range(4):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Check cluster state
            ok = False
            for i in range(7):
                isolated_states = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                time.sleep(1)

                state_transition_done = True
                for s in isolated_states:
                    if s['ip'] != '127.0.0.100':
                        continue

                    if s['active_role'] != '?' or s['mgmt_role'] != 'N':
                        state_transition_done = False

                if state_transition_done :
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state transition')

            # Unblock network
            util.log('\n\n\n ### UNBLOCK NETWORK, %d ### ' % cnt)
            for s in cluster['servers']:
                self.assertTrue(util.iptables_drop('D', '127.0.0.100', s['smr_mgmt_port']), 'delete a bloking role to iptables fail.')

            # Check cluster state
            ok = False
            for i in range(7):
                final_state = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, final_state, check_quorum=True)

                all_green = True
                for s in final_state:
                    if is_pgs_normal(s) == False:
                        all_green = False

                if all_green:
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state consistency')

        # Check state
        self.assertNotEqual(initial_state, None, 'initial_state is None')
        self.assertNotEqual(final_state, None, 'final_state is None')

        self.assertTrue(conf_checker.final_check())

        # Shutdown cluster
        default_cluster.finalize(cluster)
示例#19
0
    def test_1_mgmt_is_isolated(self):
        util.print_frame()

        out = util.sudo('iptables -L')
        util.log('====================================================================')
        util.log('out : %s' % out)
        util.log('out.return_code : %d' % out.return_code)
        util.log('out.stderr : %s' % out.stderr)
        util.log('out.succeeded : %s' % out.succeeded)

        cluster = filter(lambda x: x['cluster_name'] == 'network_isolation_cluster_1', config.clusters)[0]
        util.log(util.json_to_str(cluster))

        # MGMT
        mgmt_ip = cluster['servers'][0]['real_ip']
        mgmt_port = cluster['servers'][0]['cm_port']

        # Create cluster
        ret = default_cluster.initialize_starting_up_smr_before_redis( cluster )
        self.assertEqual(0, ret, 'failed to TestMaintenance.initialize')

        # Print initial state of cluster
        util.log('\n\n\n ### INITIAL STATE OF CLUSTER ### ')
        initial_state = []
        self.assertTrue(util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, initial_state, check_quorum=True), 'failed to check cluster state')

        # Set SMR option (slave_idle_timeout)
        util.log('\n\n\n ### Set SMR option ###')
        for s in cluster['servers']:
            t = telnet.Telnet('SMR%d' % s['id'])
            self.assertEqual(t.connect(s['ip'], s['smr_mgmt_port']), 0,
                    'Failed to connect to smr. ADDR=%s:%d' % (s['ip'], s['smr_mgmt_port']))
            cmd = 'confset slave_idle_timeout_msec 18000'
            util.log('[%s:%d] >> %s' % (s['ip'], s['smr_mgmt_port'], cmd))
            t.write('confset slave_idle_timeout_msec 18000\r\n')
            reply = t.read_until('\r\n').strip()
            util.log('[%s:%d] << %s' % (s['ip'], s['smr_mgmt_port'], reply))
            self.assertEqual(reply, '+OK', 'Failed to set slave_idle_timeout, REPLY=%s' % reply)

        # Network isolation test
        for cnt in range(5):
            # Block network
            util.log('\n\n\n ### BLOCK NETWORK, %d ### ' % cnt)
            for s in cluster['servers']:
                out = util.sudo('iptables -A OUTPUT -d 127.0.0.100 -p tcp --dport %d -j DROP' % (s['smr_mgmt_port']))
                """Loopback Address Range (Reference : RFC3330)

                127.0.0.0/8 - This block is assigned for use as the Internet host
                  loopback address.  A datagram sent by a higher level protocol to an
                  address anywhere within this block should loop back inside the host.
                  This is ordinarily implemented using only 127.0.0.1/32 for loopback,
                  but no addresses within this block should ever appear on any network
                  anywhere [RFC1700, page 5].
                """
                self.assertTrue(out.succeeded, 'add a bloking role to iptables fail. output:%s' % out)

            for i in range(4):
                util.log('waiting... %d' % (i + 1))
                time.sleep(1)

            # Check cluster state
            ok = False
            for i in range(7):
                isolated_states = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, isolated_states, check_quorum=True)
                time.sleep(1)

                state_transition_done = True
                for s in isolated_states:
                    if s['ip'] != '127.0.0.100':
                        continue

                    if s['active_role'] != '?' or s['mgmt_role'] != 'N':
                        state_transition_done = False

                if state_transition_done :
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state transition')

            # Unblock network
            util.log('\n\n\n ### UNBLOCK NETWORK, %d ### ' % cnt)
            for s in cluster['servers']:
                out = util.sudo('iptables -D OUTPUT -d 127.0.0.100 -p tcp --dport %d -j DROP' % (s['smr_mgmt_port']))
                self.assertTrue(out.succeeded, 'delete a bloking role to iptables fail. output:%s' % out)

            # Check cluster state
            ok = False
            for i in range(7):
                final_state = []
                util.check_cluster(cluster['cluster_name'], mgmt_ip, mgmt_port, final_state, check_quorum=True)

                state_consistency = True
                for s in final_state:
                    if s['active_role'] != s['mgmt_role']:
                        state_consistency = False

                if state_consistency:
                    ok = True
                    break
                time.sleep(1)
            self.assertTrue(ok, 'Fail, state consistency')

        # Check state
        self.assertNotEqual(initial_state, None, 'initial_state is None')
        self.assertNotEqual(final_state, None, 'final_state is None')

        initial_state = sorted(initial_state, key=lambda x: int(x['pgs_id']))
        final_state = sorted(final_state, key=lambda x: int(x['pgs_id']))
        for i in range(len(final_state)):
            msg = 'ts (%d)%d -> (%d)%d' % (initial_state[i]['pgs_id'], initial_state[i]['active_ts'], final_state[i]['pgs_id'], final_state[i]['active_ts'])
            util.log(msg)
            self.assertEqual(initial_state[i]['active_ts'], final_state[i]['active_ts'], msg)

        # Shutdown cluster
        ret = default_cluster.finalize( cluster )
        self.assertEqual(ret, 0, 'failed to TestMaintenance.finalize')