Пример #1
0
    def get_mss( self ):
        # get master, slave1, and slave2
        master = util.get_server_by_role( self.cluster['servers'], 'master' )
        self.assertNotEquals( master, None, 'failed to get master' )

        slave1 = util.get_server_by_role( self.cluster['servers'], 'slave' )
        self.assertNotEquals( slave1, None, 'failed to get slave1' )

        slave2 = None
        for server in self.cluster['servers']:
            id = server['id']
            if id != master['id'] and id != slave1['id']:
                slave2 = server
                break
        self.assertNotEquals( slave2, None, 'failed to get slave2' )

        return master, slave1, slave2
Пример #2
0
    def get_mss(self):
        # get master, slave1, and slave2
        master = util.get_server_by_role(self.cluster['servers'], 'master')
        self.assertNotEquals(master, None, 'failed to get master')

        slave1 = util.get_server_by_role(self.cluster['servers'], 'slave')
        self.assertNotEquals(slave1, None, 'failed to get slave1')

        slave2 = None
        for server in self.cluster['servers']:
            id = server['id']
            if id != master['id'] and id != slave1['id']:
                slave2 = server
                break
        self.assertNotEquals(slave2, None, 'failed to get slave2')

        return master, slave1, slave2
    def setUpClass( cls ):
        cls.conf_checker = default_cluster.initialize_starting_up_smr_before_redis( cls.cluster )
        assert cls.conf_checker != None, 'failed to initialize cluster'

        slave = util.get_server_by_role( cls.cluster['servers'], 'slave' )

        for i in range( cls.max_load_generator ):
            cls.load_gen_thrd_list[i] = load_generator.LoadGenerator( i, slave['ip'], slave['redis_port'] )
            cls.load_gen_thrd_list[i].start()
Пример #4
0
    def setUpClass( cls ):
        ret = default_cluster.initialize_starting_up_smr_before_redis( cls.cluster )
        if ret is not 0:
            default_cluster.finalize( cls.cluster )
            return -1

        slave = util.get_server_by_role( cls.cluster['servers'], 'slave' )

        for i in range( cls.max_load_generator ):
            cls.load_gen_thrd_list[i] = load_generator.LoadGenerator( i, slave['ip'], slave['redis_port'] )
            cls.load_gen_thrd_list[i].start()
Пример #5
0
    def setUpClass(cls):
        cls.conf_checker = default_cluster.initialize_starting_up_smr_before_redis(
            cls.cluster)
        assert cls.conf_checker != None, 'failed to initialize cluster'

        slave = util.get_server_by_role(cls.cluster['servers'], 'slave')

        for i in range(cls.max_load_generator):
            cls.load_gen_thrd_list[i] = load_generator.LoadGenerator(
                i, slave['ip'], slave['redis_port'])
            cls.load_gen_thrd_list[i].start()
Пример #6
0
    def setUpClass(cls):
        ret = default_cluster.initialize_starting_up_smr_before_redis(
            cls.cluster)
        if ret is not 0:
            default_cluster.finalize(cls.cluster)
            return -1

        slave = util.get_server_by_role(cls.cluster['servers'], 'slave')

        for i in range(cls.max_load_generator):
            cls.load_gen_thrd_list[i] = load_generator.LoadGenerator(
                i, slave['ip'], slave['redis_port'])
            cls.load_gen_thrd_list[i].start()
    def failure_recovery( self, role, wait_count=10, redis_only=False ):
        time.sleep( 2 )

        # get gateway info
        ip, port = util.get_rand_gateway( self.cluster )
        gw = gateway_mgmt.Gateway( self.cluster['servers'][0]['id'] )
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port) )

        # set value
        key = 'new_key_haha'
        cmd = 'set %s 12345\r\n' % (key)
        gw.write( cmd )
        res = gw.read_until( '\r\n' )
        self.assertEquals( res, '+OK\r\n' )

        # shutdown
        server = util.get_server_by_role( self.cluster['servers'], role )

        if redis_only == False:
            ret = testbase.request_to_shutdown_smr( server )
            self.assertEqual( ret, 0, 'failed to shutdown smr' )

        ret = testbase.request_to_shutdown_redis( server )
        self.assertEquals( ret, 0, 'failed to shutdown redis' )

        # check state F
        max_try = 20
        expected = 'F'
        for i in range( 0, max_try):
            state = util.get_smr_state( server, self.leader_cm )
            if expected == state:
                break;
            time.sleep( 1 )
        self.assertEquals( expected , state,
                           'server%d - state:%s, expected:%s' % (server['id'], state, expected) )

        # set value
        check_value = '54321'
        cmd = 'set %s %s\r\n' % (key, check_value)
        gw.write( cmd )
        res = gw.read_until( '\r\n' )
        self.assertEquals( res, '+OK\r\n' )
        gw.disconnect()

        # recovery
        if redis_only == False:
            ret = testbase.request_to_start_smr( server )
            self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( server )
        self.assertEqual( ret, 0, 'failed to start redis' )

        ret = testbase.wait_until_finished_to_set_up_role( server, wait_count )
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (server['id']) )

        redis = redis_mgmt.Redis( server['id'] )
        ret = redis.connect( server['ip'], server['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        # check state N
        max_try = 20
        expected = 'N'
        for i in range( 0, max_try):
            state = util.get_smr_state( server, self.leader_cm )
            if expected == state:
                break;
            time.sleep( 1 )
        role = util.get_role_of_server( server )
        self.assertEquals( expected , state,
                           'server%d - state:%s, expected:%s, role:%s' % (server['id'], state, expected, role) )

        # check value
        cmd = 'get %s\r\n' % (key)
        redis.write( cmd )
        redis.read_until( '\r\n'  )
        response = redis.read_until( '\r\n'  )
        self.assertEqual( response, '%s\r\n' % (check_value), 'inconsistent %s, %s' % (response, check_value) )
Пример #8
0
    def test_1_role_change(self):
        util.print_frame()

        self.load_gen_list = {}

        # Start load generator
        util.log("Start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            load_gen = load_generator.LoadGenerator(i, ip, port)
            load_gen.start()
            self.load_gen_list[i] = load_gen

        # Loop (smr: 3 copy)
        for i in range(30):
            target_server = util.get_server_by_role(self.cluster['servers'],
                                                    'slave')
            self.assertNotEquals(target_server, None, 'Get slave fail.')
            target = target_server['id']

            print ''
            util.log("(3 copy) Loop:%d, target pgs:%d" % (i, target))

            # Get old timestamp
            util.log_server_state(self.cluster)
            old_timestamp_list = []
            for s in self.cluster['servers']:
                ts = util.get_timestamp_of_pgs(s)
                old_timestamp_list.append(ts)

            # Role change
            master = util.role_change(self.leader_cm,
                                      self.cluster['cluster_name'], target)
            self.assertNotEqual(master, -1, 'role_change error.')
            while target == master:
                target = (target + 1) % 3
            util.log('Change role success.')

            # Wait until role change finished
            for s in self.cluster['servers']:
                max_try_cnt = 20
                ok = False
                for try_cnt in range(max_try_cnt):
                    try:
                        pong = util.pingpong(s['ip'], s['redis_port'])
                        if pong != None and pong == '+PONG\r\n':
                            ok = True
                            break
                    except:
                        pass
                    time.sleep(0.2)
                self.assertTrue(ok, 'redis state error.')

            # Get new timestamp
            util.log_server_state(self.cluster)
            new_timestamp_list = []
            for s in self.cluster['servers']:
                ts = util.get_timestamp_of_pgs(s)
                new_timestamp_list.append(ts)

            # Compare old timestamps and new timestamps
            for i in range(3):
                self.assertNotEqual(
                    old_timestamp_list[i], new_timestamp_list[i],
                    'Timestamp is not changed. %d->%d' %
                    (old_timestamp_list[i], new_timestamp_list[i]))

            # Cheeck Consistency
            for load_gen_id, load_gen in self.load_gen_list.items():
                self.assertTrue(load_gen.isConsistent(),
                                'Data inconsistency after role_change')

        # Loop (smr: 2 copy)
        self.__del_server(self.cluster['servers'][0])
        servers = [self.cluster['servers'][1], self.cluster['servers'][2]]

        normal_state = False
        for i in xrange(20):
            normal_state = util.check_cluster(self.cluster['cluster_name'],
                                              self.leader_cm['ip'],
                                              self.leader_cm['cm_port'],
                                              check_quorum=True)
            if normal_state:
                break
            time.sleep(0.5)
        self.assertTrue(normal_state, "Unstable cluster state")

        for i in range(30):
            print ''
            util.log("(2 copy) Loop:%d, target pgs:%d" % (i, target))

            s = util.get_server_by_role(servers, 'slave')
            target = s['id']

            # Get old timestamp
            util.log_server_state(self.cluster)
            old_timestamp_list = []
            for s in servers:
                ts = util.get_timestamp_of_pgs(s)
                old_timestamp_list.append(ts)

            # Role change
            master = util.role_change(self.leader_cm,
                                      self.cluster['cluster_name'], target)
            self.assertNotEqual(master, -1, 'role_change error.')
            while target == master:
                target = (target) % 2 + 1
            util.log('Change role success.')

            # Wait until role change finished
            for s in servers:
                max_try_cnt = 20
                ok = False
                for try_cnt in range(max_try_cnt):
                    pong = util.pingpong(s['ip'], s['redis_port'])
                    if pong != None and pong == '+PONG\r\n':
                        ok = True
                        break
                    time.sleep(0.1)
                self.assertTrue(ok, 'redis state error.')

            # Get new timestamp
            util.log_server_state(self.cluster)
            new_timestamp_list = []
            for s in servers:
                ts = util.get_timestamp_of_pgs(s)
                new_timestamp_list.append(ts)

            # Compare old timestamps and new timestamps
            for i in range(2):
                self.assertNotEqual(
                    old_timestamp_list[i], new_timestamp_list[i],
                    'Timestamp is not changed. %d->%d' %
                    (old_timestamp_list[i], new_timestamp_list[i]))

            # Cheeck Consistency
            for load_gen_id, load_gen in self.load_gen_list.items():
                self.assertTrue(load_gen.isConsistent(),
                                'Data inconsistency after role_change')

        # Go back to initial configuration
        self.assertTrue(
            util.install_pgs(self.cluster,
                             self.cluster['servers'][0],
                             self.leader_cm,
                             rm_ckpt=False), 'failed to recover pgs.')
Пример #9
0
    def test_5_transfer_pgs_to_another_machine(self):
        util.print_frame()

        self.load_gen_list = {}

        # get gateway info
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0,
                         'failed to connect to gateway, %s:%d' % (ip, port))

        # incrase master generation number
        util.log('failover in order to increase master generation number.')
        max = 0
        for i in range(5):
            key_base = 'key'
            for i in range(max, max + 10000):
                cmd = 'set %s%d %d\r\n' % (key_base, i, i)
                gw.write(cmd)
                res = gw.read_until('\r\n')
                self.assertEquals(res, '+OK\r\n')
            max = max + 10000

            m = util.get_server_by_role(self.cluster['servers'], 'master')
            util.log('failover pgs%d' % m['id'])
            ret = util.failover(m, self.leader_cm)
            self.assertTrue(ret, 'failed to failover pgs%d' % m['id'])

        # start load generator
        util.log("start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            self.load_gen_list[i] = load_generator.LoadGenerator(i, ip, port)
            self.load_gen_list[i].start()

        time.sleep(5)  # generate load for 5 sec
        util.log("started load_generator")

        m, s1, s2 = util.get_mss(self.cluster)
        servers = [m, s1, s2]

        # bgsave
        for s in servers:
            ret = util.bgsave(s)
            self.assertTrue(ret, 'failed to bgsave. pgs%d' % s['id'])

        new_servers = [config.server4, config.server5]

        # add new slaves
        for s in new_servers:
            util.log('delete pgs%d`s check point.' % s['id'])
            util.del_dumprdb(s['id'])

            ret = util.cluster_util_getdump(s['id'], m['ip'], m['redis_port'],
                                            'dump.rdb', 0, 8191)
            self.assertEqual(
                True, ret,
                'failed : util.cluster_util_getdump returns false, src=%s:%d dest_pgsid=%d'
                % (m['ip'], m['redis_port'], s['id']))

            ret = util.install_pgs(self.cluster,
                                   s,
                                   self.leader_cm,
                                   0,
                                   rm_ckpt=False)
            self.assertEqual(
                True, ret,
                'failed : util.pgs_add returns false, pgsid=%d' % s['id'])
            util.log('succeeeded : add a new slave, pgsid=%d' % s['id'])

            # check consistency
            ok = True
            for j in range(self.max_load_generator):
                if self.load_gen_list[j].isConsistent() == False:
                    ok = False
                    break
            if not ok:
                break

        for server_to_del in servers:
            for s in servers:
                util.pingpong(s['ip'], s['smr_mgmt_port'])
            for s in new_servers:
                util.pingpong(s['ip'], s['smr_mgmt_port'])
            self.__del_server(server_to_del)
            util.log('succeeded : delete pgs%d' % server_to_del['id'])

        new_m = util.get_server_by_role(new_servers, 'master')
        new_s = util.get_server_by_role(new_servers, 'slave')
        self.assertNotEqual(new_m, None, 'master is None.')
        self.assertNotEqual(new_s, None, 'slave is None.')

        for s in new_servers:
            util.pingpong(s['ip'], s['smr_mgmt_port'])

        time.sleep(5)  # generate load for 5 sec
        # check consistency of load_generator
        for i in range(self.max_load_generator):
            self.load_gen_list[i].quit()
        for i in range(self.max_load_generator):
            self.load_gen_list[i].join()
            self.assertTrue(self.load_gen_list[i].isConsistent(),
                            'Inconsistent after migration')
            self.load_gen_list.pop(i, None)

        # Go back to initial configuration
        # recover pgs
        for s in servers:
            self.assertTrue(
                util.install_pgs(self.cluster,
                                 s,
                                 self.leader_cm,
                                 rm_ckpt=False),
                'failed to recover pgs. (install_pgs)')

        # cleanup new slaves
        for s in new_servers:
            self.assertTrue(
                util.uninstall_pgs(self.cluster, s, self.leader_cm),
                'failed to cleanup pgs. (uninstall_pgs)')
Пример #10
0
    def test_quorum(self):
        util.print_frame()

        master, slave1, slave2 = util.get_mss(self.cluster)

        expected = 2
        max_try = 20
        for i in range(0, max_try):
            quorum = util.get_quorum(master)
            if quorum == expected:
                break
            time.sleep(1)
        self.assertEquals(quorum, expected,
                          'quorum:%d, expected:%d' % (quorum, expected))

        ret = testbase.request_to_shutdown_smr(slave1)
        self.assertEqual(ret, 0,
                         'failed to shutdown smr, server:%d' % slave1['id'])
        time.sleep(1)

        expected = 1
        max_try = 20
        for i in range(0, max_try):
            master = util.get_server_by_role(self.cluster['servers'], 'master')
            quorum = util.get_quorum(master)
            if quorum == expected:
                break
            time.sleep(1)
        self.assertEquals(quorum, expected,
                          'quorum:%d, expected:%d' % (quorum, expected))

        ret = testbase.request_to_shutdown_smr(slave2)
        self.assertEqual(ret, 0,
                         'failed to shutdown smr, server:%d' % slave2['id'])
        time.sleep(1)

        expected = 0
        max_try = 20
        for i in range(0, max_try):
            master = util.get_server_by_role(self.cluster['servers'], 'master')
            quorum = util.get_quorum(master)
            if quorum == expected:
                break
            time.sleep(1)
        self.assertEquals(quorum, expected,
                          'quorum:%d, expected:%d' % (quorum, expected))

        # recovery
        ret = testbase.request_to_start_smr(slave1)
        self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(slave1)
        self.assertEqual(ret, 0, 'failed to start redis')

        ret = testbase.wait_until_finished_to_set_up_role(slave1)
        self.assertEquals(ret, 0,
                          'failed to role change. smr_id:%d' % (slave1['id']))
        time.sleep(1)

        expected = 1
        max_try = 20
        for i in range(0, max_try):
            quorum = util.get_quorum(master)
            if quorum == expected:
                break
            time.sleep(1)
        self.assertEquals(quorum, expected,
                          'quorum:%d, expected:%d' % (quorum, expected))

        # recovery
        ret = testbase.request_to_start_smr(slave2)
        self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(slave2)
        self.assertEqual(ret, 0, 'failed to start redis')

        ret = testbase.wait_until_finished_to_set_up_role(slave2)
        self.assertEquals(ret, 0,
                          'failed to role change. smr_id:%d' % (slave2['id']))
        time.sleep(1)

        expected = 2
        max_try = 20
        for i in range(0, max_try):
            quorum = util.get_quorum(master)
            if quorum == expected:
                break
            time.sleep(1)
        self.assertEquals(quorum, expected,
                          'quorum:%d, expected:%d' % (quorum, expected))
Пример #11
0
    def state_transition(self):
        server = util.get_server_by_role(self.cluster['servers'], 'slave')
        self.assertNotEquals(server, None,
                             'failed to get_server_by_role-slave')

        # get gateway info
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id'])

        # check initial state
        state = self.get_expected_smr_state(server, 'N')
        role = util.get_role_of_server(server)
        self.assertEquals(
            'N', state, 'server%d - state:%s, role:%s, expected:N' %
            (server['id'], state, role))

        # shutdown
        ret = testbase.request_to_shutdown_smr(server)
        self.assertEquals(ret, 0, 'failed to shutdown smr')
        ret = testbase.request_to_shutdown_redis(server)
        self.assertEquals(ret, 0, 'failed to shutdown redis')
        time.sleep(3)

        # check state F
        expected = 'F'
        state = self.get_expected_smr_state(server, expected)
        self.assertEquals(
            expected, state, 'server%d - state:%s, but expected:%s' %
            (server['id'], state, expected))

        # set value
        ret = gw.connect(ip, port)
        self.assertEquals(ret, 0,
                          'failed to connect to gateway, %s:%d' % (ip, port))
        timestamp = 0.0
        for i in range(0, 100):
            timestamp = time.time()
            key = 'new_key_haha'
            cmd = 'set %s %f\r\n' % (key, timestamp)
            gw.write(cmd)
            res = gw.read_until('\r\n')
            self.assertEquals(res, '+OK\r\n')
        gw.disconnect()

        # recovery
        ret = testbase.request_to_start_smr(server)
        self.assertEquals(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(server)
        self.assertEquals(ret, 0, 'failed to start redis')

        ret = testbase.wait_until_finished_to_set_up_role(server, 10)
        self.assertEquals(ret, 0,
                          'failed to role change. smr_id:%d' % (server['id']))
        time.sleep(5)

        redis = redis_mgmt.Redis(server['id'])
        ret = redis.connect(server['ip'], server['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        # check state N
        expected = 'N'
        max_try = 20
        for i in range(0, max_try):
            state = self.get_expected_smr_state(server, expected)
            if state == expected:
                break
            time.sleep(1)
        role = util.get_role_of_server(server)
        self.assertEquals(
            expected, state, 'server%d - state:%s, role:%s, but expected:%s' %
            (server['id'], state, role, expected))
Пример #12
0
    def test_5_transfer_pgs_to_another_machine(self):
        util.print_frame()

        self.load_gen_list = {}

        # get gateway info
        ip, port = util.get_rand_gateway( self.cluster )
        gw = gateway_mgmt.Gateway( self.cluster['servers'][0]['id'] )
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port) )

        # incrase master generation number
        util.log('failover in order to increase master generation number.')
        max = 0
        for i in range(5):
            key_base = 'key'
            for i in range(max, max+10000):
                cmd = 'set %s%d %d\r\n' % (key_base, i, i)
                gw.write( cmd )
                res = gw.read_until( '\r\n' )
                self.assertEquals( res, '+OK\r\n' )
            max = max + 10000

            m = util.get_server_by_role(self.cluster['servers'], 'master')
            util.log('failover pgs%d' %  m['id'])
            ret = util.failover(m, self.leader_cm)
            self.assertTrue(ret, 'failed to failover pgs%d' % m['id'])

        # start load generator
        util.log("start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            self.load_gen_list[i] = load_generator.LoadGenerator(i, ip, port)
            self.load_gen_list[i].start()

        time.sleep(5) # generate load for 5 sec
        util.log("started load_generator")

        m, s1, s2 = util.get_mss(self.cluster)
        servers = [m, s1, s2]

        # bgsave
        for s in servers:
            ret = util.bgsave(s)
            self.assertTrue(ret, 'failed to bgsave. pgs%d' % s['id'])

        new_servers = [config.server4, config.server5]

        # add new slaves
        for s in new_servers:
            util.log('delete pgs%d`s check point.' % s['id'])
            util.del_dumprdb(s['id'])

            ret = util.cluster_util_getdump(s['id'], m['ip'], m['redis_port'], 'dump.rdb', 0, 8191)
            self.assertEqual(True, ret,
                'failed : util.cluster_util_getdump returns false, src=%s:%d dest_pgsid=%d' % (
                m['ip'], m['redis_port'], s['id']))

            ret = util.pgs_add(self.cluster, s, self.leader_cm, 0, rm_ckpt=False)
            self.assertEqual(True, ret, 'failed : util.pgs_add returns false, pgsid=%d' % s['id'])
            util.log('succeeeded : add a new slave, pgsid=%d' % s['id'])

            # check consistency
            ok = True
            for j in range(self.max_load_generator):
                if self.load_gen_list[j].isConsistent() == False:
                    ok = False
                    break
            if not ok:
                break;

        for server_to_del in servers:
            for s in servers:
                util.pingpong( s['ip'], s['smr_mgmt_port'] )
            for s in new_servers:
                util.pingpong( s['ip'], s['smr_mgmt_port'] )
            self.__del_server(server_to_del)
            util.log('succeeded : delete pgs%d' % server_to_del['id'])

        new_m = util.get_server_by_role(new_servers, 'master')
        new_s = util.get_server_by_role(new_servers, 'slave')
        self.assertNotEqual( new_m, None, 'master is None.' )
        self.assertNotEqual( new_s, None, 'slave is None.' )

        for s in new_servers:
            util.pingpong( s['ip'], s['smr_mgmt_port'] )

        time.sleep(5) # generate load for 5 sec
        # check consistency of load_generator
        for i in range(self.max_load_generator):
            self.load_gen_list[i].quit()
        for i in range(self.max_load_generator):
            self.load_gen_list[i].join()
            self.assertTrue(self.load_gen_list[i].isConsistent(), 'Inconsistent after migration')
            self.load_gen_list.pop(i, None)
    def test_restart_recovery_with_remote_checkpoint_and_remote_log( self ):
        util.print_frame()
        key_base = 'key'
        target = util.get_server_by_role( self.cluster['servers'], 'slave' )
        master = util.get_server_by_role( self.cluster['servers'], 'master' )

        ip, port = util.get_rand_gateway( self.cluster )
        gw = gateway_mgmt.Gateway( master['id'] )
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway' )

        # set initial data in order to make an elapsed time for bgsave longer
        self.put_some_data()

        # generate some data
        for i in range( 0, 100 ):
            key = '%s%d' % (key_base, i)
            cmd = 'set %s %d\r\n' % (key, i)
            gw.write( cmd )
            res = gw.read_until( '\r\n' )
            self.assertEquals( res, '+OK\r\n' )
        gw.disconnect()

        # delete a local checkpoint
        util.log('delete pgs%d`s check point.' % target['id'])
        util.del_dumprdb( target['id'] )

        # generate a remote check point
        bgsave_ret = util.bgsave( master )
        self.assertTrue( bgsave_ret, 'failed to bgsave. pgs%d' % master['id'] )

        # shutdown
        util.log('shutdown target')
        ret = testbase.request_to_shutdown_smr( target )
        self.assertEqual( ret, 0, 'failed to shutdown smr' )

        time.sleep( 10 )

        # generate some data
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway' )
        for i in range( 100, 200 ):
            key = '%s%d' % (key_base, i)
            cmd = 'set %s %d\r\n' % (key, i)
            gw.write( cmd )
            res = gw.read_until( '\r\n' )
            self.assertEquals( res, '+OK\r\n' )
        gw.disconnect()

        # recovery
        util.log('recovery target')
        ret = testbase.request_to_start_smr( target )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( target )
        self.assertEqual( ret, 0, 'failed to start redis' )
        time.sleep( 5 )

        ret = testbase.wait_until_finished_to_set_up_role( target)
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (target['id']) )

        # check value
        recovered_redis = redis_mgmt.Redis( target['id'] )
        ret = recovered_redis .connect( target['ip'], target['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        for i in range (0, 200):
            key = '%s%d' % (key_base, i)
            cmd = 'get %s\r\n' % (key)
            recovered_redis .write( cmd )
            recovered_redis.read_until( '\r\n'  )
            response = recovered_redis.read_until( '\r\n'  )
            self.assertEqual( response, '%d\r\n' % i, 'inconsistent %s, %d' % (response, i) )
Пример #14
0
    def failure_recovery(self, role, wait_count=10, redis_only=False):
        time.sleep(2)

        # get gateway info
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0,
                         'failed to connect to gateway, %s:%d' % (ip, port))

        # set value
        key = 'new_key_haha'
        cmd = 'set %s 12345\r\n' % (key)
        gw.write(cmd)
        res = gw.read_until('\r\n')
        self.assertEquals(res, '+OK\r\n')

        # shutdown
        server = util.get_server_by_role(self.cluster['servers'], role)

        if redis_only == False:
            ret = testbase.request_to_shutdown_smr(server)
            self.assertEqual(ret, 0, 'failed to shutdown smr')

        ret = testbase.request_to_shutdown_redis(server)
        self.assertEquals(ret, 0, 'failed to shutdown redis')

        # check state F
        max_try = 20
        expected = 'F'
        for i in range(0, max_try):
            state = util.get_smr_state(server, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s' %
            (server['id'], state, expected))

        # set value
        check_value = '54321'
        cmd = 'set %s %s\r\n' % (key, check_value)
        gw.write(cmd)
        res = gw.read_until('\r\n')
        self.assertEquals(res, '+OK\r\n')
        gw.disconnect()

        # recovery
        if redis_only == False:
            ret = testbase.request_to_start_smr(server)
            self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(server)
        self.assertEqual(ret, 0, 'failed to start redis')

        ret = testbase.wait_until_finished_to_set_up_role(server, wait_count)
        self.assertEquals(ret, 0,
                          'failed to role change. smr_id:%d' % (server['id']))

        redis = redis_mgmt.Redis(server['id'])
        ret = redis.connect(server['ip'], server['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        # check state N
        max_try = 20
        expected = 'N'
        for i in range(0, max_try):
            state = util.get_smr_state(server, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        role = util.get_role_of_server(server)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s, role:%s' %
            (server['id'], state, expected, role))

        # check value
        cmd = 'get %s\r\n' % (key)
        redis.write(cmd)
        redis.read_until('\r\n')
        response = redis.read_until('\r\n')
        self.assertEqual(response, '%s\r\n' % (check_value),
                         'inconsistent %s, %s' % (response, check_value))
Пример #15
0
    def test_1_role_change(self):
        util.print_frame()

        self.load_gen_list = {}

        # Start load generator
        util.log("Start load_generator")
        for i in range(self.max_load_generator):
            ip, port = util.get_rand_gateway(self.cluster)
            load_gen = load_generator.LoadGenerator(i, ip, port)
            load_gen.start()
            self.load_gen_list[i] = load_gen

        # Loop (smr: 3 copy)
        target_server = util.get_server_by_role(self.cluster['servers'], 'slave')
        self.assertNotEquals(target_server, None, 'Get slave fail.')
        target = target_server['id']
        for i in range(30):
            print ''
            util.log("(3 copy) Loop:%d, target pgs:%d" % (i, target))

            # Get old timestamp
            util.log_server_state( self.cluster )
            old_timestamp_list = []
            for s in self.cluster['servers']:
                ts = util.get_timestamp_of_pgs( s )
                old_timestamp_list.append(ts)

            # Role change
            master = util.role_change(self.leader_cm, self.cluster['cluster_name'], target)
            self.assertNotEqual(master, -1, 'role_change error.')
            while target == master:
                target = (target + 1) % 3
            util.log('Change role success.')

            # Wait until role change finished
            for s in self.cluster['servers']:
                max_try_cnt = 20
                ok = False
                for try_cnt in range(max_try_cnt):
                    pong = util.pingpong(s['ip'], s['redis_port'])
                    if pong != None and pong == '+PONG\r\n':
                        ok = True
                        break
                    time.sleep(0.1)
                self.assertTrue(ok, 'redis state error.')

            # Get new timestamp
            util.log_server_state( self.cluster )
            new_timestamp_list = []
            for s in self.cluster['servers']:
                ts = util.get_timestamp_of_pgs( s )
                new_timestamp_list.append(ts)

            # Compare old timestamps and new timestamps
            for i in range(3):
                self.assertNotEqual(old_timestamp_list[i], new_timestamp_list[i],
                    'Timestamp is not changed. %d->%d' % (old_timestamp_list[i], new_timestamp_list[i]))

            # Cheeck Consistency
            for load_gen_id, load_gen in self.load_gen_list.items():
                self.assertTrue(load_gen.isConsistent(), 'Data inconsistency after role_change')

        # Loop (smr: 2 copy)
        self.__del_server(self.cluster['servers'][0])
        servers = [self.cluster['servers'][1], self.cluster['servers'][2]]
        s = util.get_server_by_role(servers, 'slave')
        target = s['id']
        for i in range(30):
            print ''
            util.log("(2 copy) Loop:%d, target pgs:%d" % (i, target))

            # Get old timestamp
            util.log_server_state( self.cluster )
            old_timestamp_list = []
            for s in servers:
                ts = util.get_timestamp_of_pgs( s )
                old_timestamp_list.append(ts)

            # Role change
            master = util.role_change(self.leader_cm, self.cluster['cluster_name'], target)
            self.assertNotEqual(master, -1, 'role_change error.')
            while target == master:
                target = (target) % 2 + 1
            util.log('Change role success.')

            # Wait until role change finished
            for s in servers:
                max_try_cnt = 20
                ok = False
                for try_cnt in range(max_try_cnt):
                    pong = util.pingpong(s['ip'], s['redis_port'])
                    if pong != None and pong == '+PONG\r\n':
                        ok = True
                        break
                    time.sleep(0.1)
                self.assertTrue(ok, 'redis state error.')

            # Get new timestamp
            util.log_server_state( self.cluster )
            new_timestamp_list = []
            for s in servers:
                ts = util.get_timestamp_of_pgs( s )
                new_timestamp_list.append(ts)

            # Compare old timestamps and new timestamps
            for i in range(2):
                self.assertNotEqual(old_timestamp_list[i], new_timestamp_list[i],
                    'Timestamp is not changed. %d->%d' % (old_timestamp_list[i], new_timestamp_list[i]))

            # Cheeck Consistency
            for load_gen_id, load_gen in self.load_gen_list.items():
                self.assertTrue(load_gen.isConsistent(), 'Data inconsistency after role_change')
Пример #16
0
    def test_quorum( self ):
        util.print_frame()

        master, slave1, slave2 = util.get_mss(self.cluster)

        expected = 2
        max_try = 20
        for i in range( 0, max_try ):
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        ret = testbase.request_to_shutdown_smr( slave1 )
        self.assertEqual( ret, 0, 'failed to shutdown smr, server:%d' % slave1['id'] )
        time.sleep( 1 )

        expected = 1
        max_try = 20
        for i in range( 0, max_try ):
            master = util.get_server_by_role( self.cluster['servers'], 'master' )
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        ret = testbase.request_to_shutdown_smr( slave2 )
        self.assertEqual( ret, 0, 'failed to shutdown smr, server:%d' % slave2['id'] )
        time.sleep( 1 )

        expected = 0
        max_try = 20
        for i in range( 0, max_try ):
            master = util.get_server_by_role( self.cluster['servers'], 'master' )
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        # recovery
        ret = testbase.request_to_start_smr( slave1 )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( slave1 )
        self.assertEqual( ret, 0, 'failed to start redis' )

        ret = testbase.wait_until_finished_to_set_up_role( slave1 )
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (slave1['id']) )
        time.sleep( 1 )

        expected = 1
        max_try = 20
        for i in range( 0, max_try ):
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )

        # recovery
        ret = testbase.request_to_start_smr( slave2 )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( slave2 )
        self.assertEqual( ret, 0, 'failed to start redis' )

        ret = testbase.wait_until_finished_to_set_up_role( slave2 )
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (slave2['id']) )
        time.sleep( 1 )

        expected = 2
        max_try = 20
        for i in range( 0, max_try ):
            quorum = util.get_quorum( master )
            if quorum == expected:
                break;
            time.sleep( 1 )
        self.assertEquals( quorum, expected,
                           'quorum:%d, expected:%d' % (quorum, expected) )
    def test_4_PGS_mgen_is_less_than_PG_mgen( self ):
        util.print_frame()

        # get gateway info
        ip, port = util.get_rand_gateway( self.cluster )
        gw = gateway_mgmt.Gateway( self.cluster['servers'][0]['id'] )
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port) )

        # initial data
        util.put_some_data(self.cluster)

        # shutdown
        server_to_join = util.get_server_by_role( self.cluster['servers'], 'master' )
        ret = testbase.request_to_shutdown_smr( server_to_join )
        self.assertEqual( ret, 0, 'failed to shutdown smr' )
        ret = testbase.request_to_shutdown_redis( server_to_join )
        self.assertEquals( ret, 0, 'failed to shutdown redis' )

        # check state F
        max_try = 20
        expected = 'F'
        for i in range( 0, max_try):
            state = util.get_smr_state( server_to_join, self.leader_cm )
            if expected == state:
                break;
            time.sleep( 1 )
        self.assertEquals( expected , state,
                           'server%d - state:%s, expected:%s' % (server_to_join['id'], state, expected) )

        # set value
        key_base = 'mw'
        for i in range(0, 10000):
            cmd = 'set %s%d %d\r\n' % (key_base, i, i)
            gw.write( cmd )
            res = gw.read_until( '\r\n' )
            self.assertEquals( res, '+OK\r\n' )

        # master failover 1 (master generation + 1)
        util.log('master failover 1')
        server = util.get_server_by_role( self.cluster['servers'], 'master' )
        self.failover( server )

        # check quorum (copy:3, quorum:1, available:2)
        ok = False
        for i in xrange(10):
            ok = util.check_quorum(self.cluster['cluster_name'],
                    self.leader_cm['ip'], self.leader_cm['cm_port'])
            if ok:
                break
            else:
                time.sleep(1)
        self.assertTrue( ok, 'Check quorum fail.' )

        # master failover 2 (master generation + 1)
        util.log('master failover 2')
        server = util.get_server_by_role( self.cluster['servers'], 'master' )
        self.failover( server )

        # recovery
        util.log('master recovery start.')
        ret = testbase.request_to_start_smr( server_to_join )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( server_to_join )
        self.assertEqual( ret, 0, 'failed to start redis' )

        ret = testbase.wait_until_finished_to_set_up_role( server_to_join, 10 )
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (server_to_join['id']) )
        util.log('master recovery end successfully.')

        # check state N
        max_try = 20
        expected = 'N'
        for i in range( 0, max_try):
            state = util.get_smr_state( server, self.leader_cm )
            if expected == state:
                break;
            time.sleep( 1 )
        role = util.get_role_of_server( server )
        self.assertEquals( expected , state,
                           'server%d - state:%s, expected:%s, role:%s' % (server['id'], state, expected, role) )

        time.sleep( 5 )

        # set value
        for i in range(10000, 20000):
            cmd = 'set %s%d %d\r\n' % (key_base, i, i)
            gw.write( cmd )
            res = gw.read_until( '\r\n' )
            self.assertEquals( res, '+OK\r\n' )

        server = util.get_server_by_role( self.cluster['servers'], 'master' )

        redis = redis_mgmt.Redis( server_to_join['id'] )
        ret = redis.connect( server_to_join['ip'], server_to_join['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        # check value
        for i in range(0, 20000):
            cmd = 'get %s%d\r\n' % (key_base, i)
            redis.write( cmd )
            redis.read_until( '\r\n'  )
            response = redis.read_until( '\r\n'  )
            self.assertEqual( response, '%d\r\n' % (i), 'inconsistent %s, %d' % (response[:-2], i) )

        gw.disconnect()
        return 0
Пример #18
0
    def recovery_with_local_checkpoint_and_remote_log(self, role):
        server = util.get_server_by_role(self.cluster['servers'], role)

        # set initial data in order to make an elapsed time for bgsave longer
        self.put_some_data()

        # set value
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(server['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0,
                         'failed to connect to gateway, id:%d' % server['id'])
        timestamp = {}
        key_base = 'key0000000000111111111122222222223333333333444444444455555555556666666666777777777788888888889999999999'
        for i in range(0, 50000):
            timestamp[i] = time.time()
            k = '%s_%d' % (key_base, i)
            cmd = 'set %s %f\r\n' % (k, timestamp[i])
            gw.write(cmd)
            response = gw.read_until('\r\n')
            self.assertNotEqual(response.find('+OK'), -1,
                                'failed to set key value through gateway')

        # generate a check point
        bgsave_ret = util.bgsave(server)
        self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % server['id'])

        # shutdown
        ret = testbase.request_to_shutdown_smr(server)
        self.assertEqual(ret, 0, 'failed to shutdown smr')
        ret = testbase.request_to_shutdown_redis(server)
        self.assertEqual(ret, 0, 'failed to shutdown redis')
        util.log('succeeded : shutdown pgs%d' % (server['id']))

        # delete smr_logs
        ret = util.delete_smr_logs(server['id'])
        self.assertEqual(ret, 0,
                         'failed to delete smr log, id:%d' % server['id'])
        util.log('succeeded : delete replication logs')

        time.sleep(5)

        # set value
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0, 'failed to connect to gateway')
        for i in range(50000, 100000):
            timestamp[i] = time.time()
            k = '%s_%d' % (key_base, i)
            cmd = 'set %s %f\r\n' % (k, timestamp[i])
            gw.write(cmd)
            response = gw.read_until('\r\n')
            self.assertNotEqual(response.find('+OK'), -1,
                                'failed to set key value through gateway')

        # recovery
        ret = testbase.request_to_start_smr(server)
        self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(server)
        self.assertEqual(ret, 0, 'failed to start redis')
        time.sleep(5)

        ret = testbase.wait_until_finished_to_set_up_role(server)
        self.assertEquals(ret, 0,
                          'failed to role change. smr_id:%d' % (server['id']))
        util.log('succeeded : recover pgs%d' % server['id'])

        # check value
        recovered_redis = redis_mgmt.Redis(server['id'])
        ret = recovered_redis.connect(server['ip'], server['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        for i in range(0, 100000):
            k = '%s_%d' % (key_base, i)
            cmd = 'get %s\r\n' % (k)
            recovered_redis.write(cmd)
            recovered_redis.read_until('\r\n')
            response = recovered_redis.read_until('\r\n')
            self.assertEqual(response, '%f\r\n' % (timestamp[i]),
                             'inconsistent %s, %f' % (response, timestamp[i]))
Пример #19
0
    def test_restart_recovery_with_remote_checkpoint_and_remote_log(self):
        util.print_frame()
        key_base = 'key'
        target = util.get_server_by_role(self.cluster['servers'], 'slave')
        master = util.get_server_by_role(self.cluster['servers'], 'master')

        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(master['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0, 'failed to connect to gateway')

        # set initial data in order to make an elapsed time for bgsave longer
        self.put_some_data()

        # generate some data
        for i in range(0, 100):
            key = '%s%d' % (key_base, i)
            cmd = 'set %s %d\r\n' % (key, i)
            gw.write(cmd)
            res = gw.read_until('\r\n')
            self.assertEquals(res, '+OK\r\n')
        gw.disconnect()

        # delete a local checkpoint
        util.log('delete pgs%d`s check point.' % target['id'])
        util.del_dumprdb(target['id'])

        # generate a remote check point
        bgsave_ret = util.bgsave(master)
        self.assertTrue(bgsave_ret, 'failed to bgsave. pgs%d' % master['id'])

        # shutdown
        util.log('shutdown target')
        ret = testbase.request_to_shutdown_smr(target)
        self.assertEqual(ret, 0, 'failed to shutdown smr')

        time.sleep(10)

        # generate some data
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0, 'failed to connect to gateway')
        for i in range(100, 200):
            key = '%s%d' % (key_base, i)
            cmd = 'set %s %d\r\n' % (key, i)
            gw.write(cmd)
            res = gw.read_until('\r\n')
            self.assertEquals(res, '+OK\r\n')
        gw.disconnect()

        # recovery
        util.log('recovery target')
        ret = testbase.request_to_start_smr(target)
        self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(target)
        self.assertEqual(ret, 0, 'failed to start redis')
        time.sleep(5)

        ret = testbase.wait_until_finished_to_set_up_role(target)
        self.assertEquals(ret, 0,
                          'failed to role change. smr_id:%d' % (target['id']))

        # check value
        recovered_redis = redis_mgmt.Redis(target['id'])
        ret = recovered_redis.connect(target['ip'], target['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        for i in range(0, 200):
            key = '%s%d' % (key_base, i)
            cmd = 'get %s\r\n' % (key)
            recovered_redis.write(cmd)
            recovered_redis.read_until('\r\n')
            response = recovered_redis.read_until('\r\n')
            self.assertEqual(response, '%d\r\n' % i,
                             'inconsistent %s, %d' % (response, i))
    def recovery_with_local_checkpoint_and_remote_log( self, role ):
        server = util.get_server_by_role( self.cluster['servers'], role )

        # set initial data in order to make an elapsed time for bgsave longer
        self.put_some_data()

        # set value
        ip, port = util.get_rand_gateway( self.cluster )
        gw = gateway_mgmt.Gateway( server['id'] )
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway, id:%d' % server['id'] )
        timestamp = {}
        key_base = 'key0000000000111111111122222222223333333333444444444455555555556666666666777777777788888888889999999999'
        for i in range (0, 50000):
            timestamp[i] = time.time()
            k = '%s_%d' % (key_base, i)
            cmd = 'set %s %f\r\n' % (k, timestamp[i])
            gw.write( cmd )
            response = gw.read_until( '\r\n' )
            self.assertNotEqual( response.find( '+OK' ), -1, 'failed to set key value through gateway' )

        # generate a check point
        bgsave_ret = util.bgsave( server )
        self.assertTrue( bgsave_ret, 'failed to bgsave. pgs%d' % server['id'] )

        # shutdown
        ret = testbase.request_to_shutdown_smr( server )
        self.assertEqual( ret, 0, 'failed to shutdown smr' )
        ret = testbase.request_to_shutdown_redis( server )
        self.assertEqual( ret, 0, 'failed to shutdown redis' )
        util.log('succeeded : shutdown pgs%d' % (server['id']))

        # delete smr_logs
        ret = util.delete_smr_logs( server['id'] )
        self.assertEqual( ret, 0, 'failed to delete smr log, id:%d' % server['id'] )
        util.log('succeeded : delete replication logs')

        time.sleep( 5 )

        # set value
        ret = gw.connect( ip, port )
        self.assertEqual( ret, 0, 'failed to connect to gateway' )
        for i in range (50000, 100000):
            timestamp[i] = time.time()
            k = '%s_%d' % (key_base, i)
            cmd = 'set %s %f\r\n' % (k, timestamp[i])
            gw.write( cmd )
            response = gw.read_until( '\r\n' )
            self.assertNotEqual( response.find( '+OK' ), -1, 'failed to set key value through gateway' )

        # recovery
        ret = testbase.request_to_start_smr( server )
        self.assertEqual( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( server )
        self.assertEqual( ret, 0, 'failed to start redis' )
        time.sleep( 5 )

        ret = testbase.wait_until_finished_to_set_up_role( server )
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (server['id']) )
        util.log('succeeded : recover pgs%d' % server['id'])

        # check value
        recovered_redis = redis_mgmt.Redis( server['id'] )
        ret = recovered_redis .connect( server['ip'], server['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        for i in range (0, 100000):
            k = '%s_%d' % (key_base, i)
            cmd = 'get %s\r\n' % (k)
            recovered_redis .write( cmd )
            recovered_redis.read_until( '\r\n'  )
            response = recovered_redis.read_until( '\r\n'  )
            self.assertEqual( response, '%f\r\n' % (timestamp[i]), 'inconsistent %s, %f' % (response, timestamp[i]) )
Пример #21
0
    def state_transition( self ):
        server = util.get_server_by_role( self.cluster['servers'], 'slave' )
        self.assertNotEquals( server, None, 'failed to get_server_by_role-slave' )

        # get gateway info
        ip, port = util.get_rand_gateway( self.cluster )
        gw = gateway_mgmt.Gateway( self.cluster['servers'][0]['id'] )

        # check initial state
        state = self.get_expected_smr_state( server, 'N' )
        role = util.get_role_of_server( server )
        self.assertEquals( 'N', state,
                           'server%d - state:%s, role:%s, expected:N' % (server['id'], state, role) )

        # shutdown
        ret = testbase.request_to_shutdown_smr( server )
        self.assertEquals( ret, 0, 'failed to shutdown smr' )
        ret = testbase.request_to_shutdown_redis( server )
        self.assertEquals( ret, 0, 'failed to shutdown redis' )
        time.sleep( 3 )


        # check state F
        expected = 'F'
        state = self.get_expected_smr_state( server, expected )
        self.assertEquals( expected , state,
                           'server%d - state:%s, but expected:%s' % (server['id'], state, expected) )

        # set value
        ret = gw.connect( ip, port )
        self.assertEquals( ret, 0, 'failed to connect to gateway, %s:%d' % (ip, port) )
        timestamp  = 0.0
        for i in range( 0, 100 ):
            timestamp = time.time()
            key = 'new_key_haha'
            cmd = 'set %s %f\r\n' % (key, timestamp)
            gw.write( cmd )
            res = gw.read_until( '\r\n' )
            self.assertEquals( res, '+OK\r\n' )
        gw.disconnect()

        # recovery
        ret = testbase.request_to_start_smr( server )
        self.assertEquals( ret, 0, 'failed to start smr' )

        ret = testbase.request_to_start_redis( server )
        self.assertEquals( ret, 0, 'failed to start redis' )

        ret = testbase.wait_until_finished_to_set_up_role( server, 10 )
        self.assertEquals( ret, 0, 'failed to role change. smr_id:%d' % (server['id']) )
        time.sleep( 5 )

        redis = redis_mgmt.Redis( server['id'] )
        ret = redis.connect( server['ip'], server['redis_port'] )
        self.assertEquals( ret, 0, 'failed to connect to redis' )

        # check state N
        expected = 'N'
        max_try = 20
        for i in range( 0, max_try ):
            state = self.get_expected_smr_state( server, expected )
            if state == expected:
                break
            time.sleep( 1 )
        role = util.get_role_of_server( server )
        self.assertEquals( expected , state,
                           'server%d - state:%s, role:%s, but expected:%s' % (server['id'], state, role, expected) )
Пример #22
0
    def test_4_PGS_mgen_is_less_than_PG_mgen(self):
        util.print_frame()

        # get gateway info
        ip, port = util.get_rand_gateway(self.cluster)
        gw = gateway_mgmt.Gateway(self.cluster['servers'][0]['id'])
        ret = gw.connect(ip, port)
        self.assertEqual(ret, 0,
                         'failed to connect to gateway, %s:%d' % (ip, port))

        # initial data
        util.put_some_data(self.cluster)

        # shutdown
        server_to_join = util.get_server_by_role(self.cluster['servers'],
                                                 'master')
        ret = testbase.request_to_shutdown_smr(server_to_join)
        self.assertEqual(ret, 0, 'failed to shutdown smr')
        ret = testbase.request_to_shutdown_redis(server_to_join)
        self.assertEquals(ret, 0, 'failed to shutdown redis')

        # check state F
        max_try = 20
        expected = 'F'
        for i in range(0, max_try):
            state = util.get_smr_state(server_to_join, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s' %
            (server_to_join['id'], state, expected))

        # set value
        key_base = 'mw'
        for i in range(0, 10000):
            cmd = 'set %s%d %d\r\n' % (key_base, i, i)
            gw.write(cmd)
            res = gw.read_until('\r\n')
            self.assertEquals(res, '+OK\r\n')

        # master failover 1 (master generation + 1)
        util.log('master failover 1')
        server = util.get_server_by_role(self.cluster['servers'], 'master')
        self.failover(server)

        # check quorum (copy:3, quorum:1, available:2)
        ok = False
        for i in xrange(10):
            ok = util.check_quorum(self.cluster['cluster_name'],
                                   self.leader_cm['ip'],
                                   self.leader_cm['cm_port'])
            if ok:
                break
            else:
                time.sleep(1)
        self.assertTrue(ok, 'Check quorum fail.')

        # master failover 2 (master generation + 1)
        util.log('master failover 2')
        server = util.get_server_by_role(self.cluster['servers'], 'master')
        self.failover(server)

        # recovery
        util.log('master recovery start.')
        ret = testbase.request_to_start_smr(server_to_join)
        self.assertEqual(ret, 0, 'failed to start smr')

        ret = testbase.request_to_start_redis(server_to_join)
        self.assertEqual(ret, 0, 'failed to start redis')

        ret = testbase.wait_until_finished_to_set_up_role(server_to_join, 10)
        self.assertEquals(
            ret, 0,
            'failed to role change. smr_id:%d' % (server_to_join['id']))
        util.log('master recovery end successfully.')

        # check state N
        max_try = 20
        expected = 'N'
        for i in range(0, max_try):
            state = util.get_smr_state(server, self.leader_cm)
            if expected == state:
                break
            time.sleep(1)
        role = util.get_role_of_server(server)
        self.assertEquals(
            expected, state, 'server%d - state:%s, expected:%s, role:%s' %
            (server['id'], state, expected, role))

        time.sleep(5)

        # set value
        for i in range(10000, 20000):
            cmd = 'set %s%d %d\r\n' % (key_base, i, i)
            gw.write(cmd)
            res = gw.read_until('\r\n')
            self.assertEquals(res, '+OK\r\n')

        server = util.get_server_by_role(self.cluster['servers'], 'master')

        redis = redis_mgmt.Redis(server_to_join['id'])
        ret = redis.connect(server_to_join['ip'], server_to_join['redis_port'])
        self.assertEquals(ret, 0, 'failed to connect to redis')

        # check value
        for i in range(0, 20000):
            cmd = 'get %s%d\r\n' % (key_base, i)
            redis.write(cmd)
            redis.read_until('\r\n')
            response = redis.read_until('\r\n')
            self.assertEqual(response, '%d\r\n' % (i),
                             'inconsistent %s, %d' % (response[:-2], i))

        gw.disconnect()
        return 0