예제 #1
0
 def get_expected_smr_state(self, server, expected, max_try=60):
     for i in range(0, max_try):
         state = util.get_smr_state(server, self.leader_cm)
         if state == expected:
             break
         time.sleep(1)
     return state
예제 #2
0
    def failover(self, server):
        # shutdown
        ret = testbase.request_to_shutdown_smr(server)
        self.assertEqual(ret, 0, 'failed to shutdown smr')
        ret = testbase.request_to_shutdown_redis(server)
        self.assertEquals(ret, 0, 'failed to shutdown redis')

        # check state F
        max_try = 20
        expected = 'F'
        for i in range(0, max_try):
            state = util.get_smr_state(server, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s' %
            (server['id'], state, expected))

        # recovery
        ret = testbase.request_to_start_smr(server)
        self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(server)
        self.assertEqual(ret, 0, 'failed to start redis')

        ret = testbase.wait_until_finished_to_set_up_role(server, 10)
        self.assertEquals(ret, 0,
                          'failed to role change. smr_id:%d' % (server['id']))

        redis = redis_mgmt.Redis(server['id'])
        ret = redis.connect(server['ip'], server['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        # check state N
        max_try = 20
        expected = 'N'
        for i in range(0, max_try):
            state = util.get_smr_state(server, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        role = util.get_role_of_server(server)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s, role:%s' %
            (server['id'], state, expected, role))
예제 #3
0
    def failover_while_hang(self, server):
        # timestamp before hang
        ts_before = util.get_timestamp_of_pgs(server)
        self.assertNotEqual(
            ts_before, -1,
            'failed to get a timestamp of pgs(%d), ts_before:%d' %
            (server['id'], ts_before))

        # hang
        util.log('pgs(id:%d, ip:%s, port:%d) is going to hang.' %
                 (server['id'], server['ip'], server['smr_mgmt_port']))
        smr = smr_mgmt.SMR(server['id'])
        ret = smr.connect(server['ip'], server['smr_mgmt_port'])
        self.assertEqual(
            ret, 0, 'failed to connect to master. %s:%d' %
            (server['ip'], server['smr_mgmt_port']))
        smr.write('fi delay sleep 1 10000\r\n')
        reply = smr.read_until('\r\n', 1)
        if reply != None and reply.find('-ERR not supported') != -1:
            self.assertEqual(
                0, 1, 'make sure that smr has compiled with gcov option.')

        time.sleep(4)

        # check state F
        max_try = 20
        expected = 'F'
        for i in range(0, max_try):
            state = util.get_smr_state(server, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s' %
            (server['id'], state, expected))
        util.log('succeeded : pgs%d state changed to F.' % server['id'])

        # shutdown
        util.log('shutdown pgs%d while hanging.' % server['id'])
        ret = testbase.request_to_shutdown_smr(server)
        self.assertEqual(ret, 0,
                         'failed to shutdown smr. id:%d' % server['id'])
        ret = testbase.request_to_shutdown_redis(server)
        self.assertEquals(ret, 0,
                          'failed to shutdown redis. id:%d' % server['id'])

        # check state F
        max_try = 20
        expected = 'F'
        for i in range(0, max_try):
            state = util.get_smr_state(server, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s' %
            (server['id'], state, expected))
        util.log('succeeded : pgs%d state changed to F.' % server['id'])

        # recovery
        util.log('restart pgs%d.' % server['id'])
        ret = testbase.request_to_start_smr(server)
        self.assertEqual(ret, 0, 'failed to start smr. id:%d' % server['id'])

        ret = testbase.request_to_start_redis(server)
        self.assertEqual(ret, 0, 'failed to start redis. id:%d' % server['id'])

        wait_count = 20
        ret = testbase.wait_until_finished_to_set_up_role(server, wait_count)
        self.assertEquals(ret, 0,
                          'failed to role change. smr_id:%d' % (server['id']))

        redis = redis_mgmt.Redis(server['id'])
        ret = redis.connect(server['ip'], server['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        # check state N
        max_try = 20
        expected = 'N'
        for i in range(0, max_try):
            state = util.get_smr_state(server, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s' %
            (server['id'], state, expected))
        util.log('succeeded : pgs%d state changed to N.' % server['id'])

        # wait for rejoin as a slave
        success = False
        for i in range(20):
            role = util.get_role_of_server(server)
            if role == c.ROLE_SLAVE:
                ts_after = util.get_timestamp_of_pgs(server)
                if ts_after != -1 and ts_before != ts_after:
                    success = True
                    break
            time.sleep(1)
        self.assertEqual(success, True, 'failed to rejoin as a slave')
        util.log('succeeded : pgs%d joined as a slave.' % server['id'])

        return 0
예제 #4
0
    def failure_recovery(self, role, wait_count=10, redis_only=False):
        time.sleep(2)

        # get gateway info
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0,
                         'failed to connect to gateway, %s:%d' % (ip, port))

        # set value
        key = 'new_key_haha'
        cmd = 'set %s 12345\r\n' % (key)
        gw.write(cmd)
        res = gw.read_until('\r\n')
        self.assertEquals(res, '+OK\r\n')

        # shutdown
        server = util.get_server_by_role(self.cluster['servers'], role)

        if redis_only == False:
            ret = testbase.request_to_shutdown_smr(server)
            self.assertEqual(ret, 0, 'failed to shutdown smr')

        ret = testbase.request_to_shutdown_redis(server)
        self.assertEquals(ret, 0, 'failed to shutdown redis')

        # check state F
        max_try = 20
        expected = 'F'
        for i in range(0, max_try):
            state = util.get_smr_state(server, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s' %
            (server['id'], state, expected))

        # set value
        check_value = '54321'
        cmd = 'set %s %s\r\n' % (key, check_value)
        gw.write(cmd)
        res = gw.read_until('\r\n')
        self.assertEquals(res, '+OK\r\n')
        gw.disconnect()

        # recovery
        if redis_only == False:
            ret = testbase.request_to_start_smr(server)
            self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(server)
        self.assertEqual(ret, 0, 'failed to start redis')

        ret = testbase.wait_until_finished_to_set_up_role(server, wait_count)
        self.assertEquals(ret, 0,
                          'failed to role change. smr_id:%d' % (server['id']))

        redis = redis_mgmt.Redis(server['id'])
        ret = redis.connect(server['ip'], server['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        # check state N
        max_try = 20
        expected = 'N'
        for i in range(0, max_try):
            state = util.get_smr_state(server, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        role = util.get_role_of_server(server)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s, role:%s' %
            (server['id'], state, expected, role))

        # check value
        cmd = 'get %s\r\n' % (key)
        redis.write(cmd)
        redis.read_until('\r\n')
        response = redis.read_until('\r\n')
        self.assertEqual(response, '%s\r\n' % (check_value),
                         'inconsistent %s, %s' % (response, check_value))
예제 #5
0
    def deprecated_test_5_PGS_commit_is_greater_than_PG_commit(self):
        util.print_frame()

        # get gateway info
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0,
                         'failed to connect to gateway, %s:%d' % (ip, port))

        # initial data
        util.put_some_data(self.cluster)

        master, s1, s2 = util.get_mss(self.cluster)

        server_to_join = [s1, s2]
        # shutdown slaves
        for i in range(0, 2):
            ret = testbase.request_to_shutdown_smr(server_to_join[i])
            self.assertEqual(
                ret, 0, 'failed to shutdown smr%d' % server_to_join[i]['id'])
            util.log('succeeded to shutdown smr%d' % server_to_join[i]['id'])

            ret = testbase.request_to_shutdown_redis(server_to_join[i])
            self.assertEquals(ret, 0, 'failed to shutdown redis')
            util.log('succeeded to shutdown redis%d' % server_to_join[i]['id'])

            # check state F
            max_try = 20
            expected = 'F'
            for j in range(0, max_try):
                state = util.get_smr_state(server_to_join[i], self.leader_cm)
                if expected == state:
                    break
                time.sleep(1)
            self.assertEquals(
                expected, state, 'server%d - state:%s, expected:%s' %
                (server_to_join[i]['id'], state, expected))

        # put more data
        util.put_some_data(self.cluster, 10, 256)

        # bgsave
        ret = util.bgsave(master)
        self.assertTrue(ret, 'failed to bgsave. pgs%d' % master['id'])

        # shutdown master
        ret = testbase.request_to_shutdown_smr(master)
        self.assertEqual(ret, 0, 'failed to shutdown smr')
        util.log('succeeded to shutdown master smr, id=%d' % master['id'])
        ret = testbase.request_to_shutdown_redis(master)
        self.assertEquals(ret, 0, 'failed to shutdown redis')
        util.log('succeeded to shutdown master redis, id=%d' % master['id'])

        # check state F
        max_try = 20
        expected = 'F'
        for i in range(0, max_try):
            state = util.get_smr_state(master, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s' %
            (master['id'], state, expected))

        # recovery slaves
        for i in range(0, 2):
            ret = testbase.request_to_start_smr(server_to_join[i])
            self.assertEqual(ret, 0, 'failed to start smr')

            ret = testbase.request_to_start_redis(server_to_join[i])
            self.assertEqual(ret, 0, 'failed to start redis')

            ret = testbase.wait_until_finished_to_set_up_role(
                server_to_join[i], 10)
            self.assertEquals(
                ret, 0,
                'failed to role change. smr_id:%d' % (server_to_join[i]['id']))

            # check state N
            max_try = 20
            expected = 'N'
            for j in range(0, max_try):
                state = util.get_smr_state(server_to_join[i], self.leader_cm)
                if expected == state:
                    break
                time.sleep(1)
            role = util.get_role_of_server(server_to_join[i])
            self.assertEquals(
                expected, state, 'server%d - state:%s, expected:%s, role:%s' %
                (server_to_join[i]['id'], state, expected, role))

        # set value
        s = random.choice(server_to_join)
        redis = redis_mgmt.Redis(['id'])
        ret = redis.connect(s['ip'], s['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        key_base = 'key_test'
        for i in range(0, 10000):
            cmd = 'set %s%d %d\r\n' % (key_base, i, i)
            redis.write(cmd)
            res = redis.read_until('\r\n')
            self.assertEquals(res, '+OK\r\n')
        redis.disconnect()

        for i in range(0, 2):
            redis = redis_mgmt.Redis(server_to_join[i]['id'])
            ret = redis.connect(server_to_join[i]['ip'],
                                server_to_join[i]['redis_port'])
            self.assertEquals(ret, 0, 'failed to connect to redis')

            # check value
            for j in range(0, 10000):
                cmd = 'get %s%d\r\n' % (key_base, j)
                redis.write(cmd)
                redis.read_until('\r\n')
                response = redis.read_until('\r\n')
                self.assertEqual(response, '%d\r\n' % (j),
                                 'inconsistent %s, %d' % (response[:-2], j))

        # try to recover master, but failed
        ret = testbase.request_to_start_smr(master)
        self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(master, False)
        self.assertEqual(ret, 0, 'failed to start redis')

        max_try = 3
        expected = 'N'
        for i in range(0, max_try):
            state = util.get_smr_state(master, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        role = util.get_role_of_server(master)
        self.assertNotEqual(
            expected, state, 'server%d - state:%s, expected:not %s, role:%s' %
            (master['id'], state, expected, role))
        util.log(
            'success : the old master that has a greater commit-seq than the current master tried to join as a slave, but it is blocked successfully.'
        )

        gw.disconnect()
        return 0
예제 #6
0
    def test_4_PGS_mgen_is_less_than_PG_mgen(self):
        util.print_frame()

        # get gateway info
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0,
                         'failed to connect to gateway, %s:%d' % (ip, port))

        # initial data
        util.put_some_data(self.cluster)

        # shutdown
        server_to_join = util.get_server_by_role(self.cluster['servers'],
                                                 'master')
        ret = testbase.request_to_shutdown_smr(server_to_join)
        self.assertEqual(ret, 0, 'failed to shutdown smr')
        ret = testbase.request_to_shutdown_redis(server_to_join)
        self.assertEquals(ret, 0, 'failed to shutdown redis')

        # check state F
        max_try = 20
        expected = 'F'
        for i in range(0, max_try):
            state = util.get_smr_state(server_to_join, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s' %
            (server_to_join['id'], state, expected))

        # set value
        key_base = 'mw'
        for i in range(0, 10000):
            cmd = 'set %s%d %d\r\n' % (key_base, i, i)
            gw.write(cmd)
            res = gw.read_until('\r\n')
            self.assertEquals(res, '+OK\r\n')

        # master failover 1 (master generation + 1)
        util.log('master failover 1')
        server = util.get_server_by_role(self.cluster['servers'], 'master')
        self.failover(server)

        # check quorum (copy:3, quorum:1, available:2)
        ok = False
        for i in xrange(10):
            ok = util.check_quorum(self.cluster['cluster_name'],
                                   self.leader_cm['ip'],
                                   self.leader_cm['cm_port'])
            if ok:
                break
            else:
                time.sleep(1)
        self.assertTrue(ok, 'Check quorum fail.')

        # master failover 2 (master generation + 1)
        util.log('master failover 2')
        server = util.get_server_by_role(self.cluster['servers'], 'master')
        self.failover(server)

        # recovery
        util.log('master recovery start.')
        ret = testbase.request_to_start_smr(server_to_join)
        self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(server_to_join)
        self.assertEqual(ret, 0, 'failed to start redis')

        ret = testbase.wait_until_finished_to_set_up_role(server_to_join, 10)
        self.assertEquals(
            ret, 0,
            'failed to role change. smr_id:%d' % (server_to_join['id']))
        util.log('master recovery end successfully.')

        # check state N
        max_try = 20
        expected = 'N'
        for i in range(0, max_try):
            state = util.get_smr_state(server, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        role = util.get_role_of_server(server)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s, role:%s' %
            (server['id'], state, expected, role))

        time.sleep(5)

        # set value
        for i in range(10000, 20000):
            cmd = 'set %s%d %d\r\n' % (key_base, i, i)
            gw.write(cmd)
            res = gw.read_until('\r\n')
            self.assertEquals(res, '+OK\r\n')

        server = util.get_server_by_role(self.cluster['servers'], 'master')

        redis = redis_mgmt.Redis(server_to_join['id'])
        ret = redis.connect(server_to_join['ip'], server_to_join['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        # check value
        for i in range(0, 20000):
            cmd = 'get %s%d\r\n' % (key_base, i)
            redis.write(cmd)
            redis.read_until('\r\n')
            response = redis.read_until('\r\n')
            self.assertEqual(response, '%d\r\n' % (i),
                             'inconsistent %s, %d' % (response[:-2], i))

        gw.disconnect()
        return 0