Пример #1
0
class GpRecoversegRegressionTests(unittest.TestCase):

    def setUp(self):
        self.gprec = GpRecover()
        self.gphome = os.environ.get('GPHOME')

    def failover(self, type = 'mirror'):
        if type == 'mirror':
            fault_str = 'source %s/greenplum_path.sh;gpfaultinjector -f filerep_consumer  -m async -y fault -r mirror -H ALL' % self.gphome
        else:
            fault_str = 'source %s/greenplum_path.sh;gpfaultinjector -f postmaster -m async -y panic -r primary -H ALL' % self.gphome
        return run_shell_command(fault_str, cmdname = 'Run fault injector to failover')
    
    def test_incr_gprecoverseg(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover()):
            self.assertTrue(self.gprec.incremental())

    def test_full_gprecoverseg(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover()):
            self.assertTrue(self.gprec.full())

    def test_gprecoverseg_rebalance(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover('primary')):
            PSQL.run_sql_file(local_path('mirror_failover_trigger.sql'))
            self.gprec.incremental()
            if (self.gprec.wait_till_insync_transition()):
                self.assertTrue(self.gprec.rebalance())
    
    def test_wait_till_insync(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover()):
            self.gprec.incremental()
            self.assertTrue(self.gprec.wait_till_insync_transition())
Пример #2
0
class GpRecoversegRegressionTests(unittest.TestCase):
    def setUp(self):
        self.gprec = GpRecover()
        self.gphome = os.environ.get('GPHOME')

    def failover(self, type='mirror'):
        if type == 'mirror':
            fault_str = 'source %s/greenplum_path.sh;gpfaultinjector -f filerep_consumer  -m async -y fault -r mirror -H ALL' % self.gphome
        else:
            fault_str = 'source %s/greenplum_path.sh;gpfaultinjector -f postmaster -m async -y panic -r primary -H ALL' % self.gphome
        return run_shell_command(fault_str,
                                 cmdname='Run fault injector to failover')

    def test_incr_gprecoverseg(self):
        self.gprec.wait_till_insync_transition()
        if (self.failover()):
            self.assertTrue(self.gprec.incremental())

    def test_full_gprecoverseg(self):
        self.gprec.wait_till_insync_transition()
        if (self.failover()):
            self.assertTrue(self.gprec.full())

    def test_gprecoverseg_rebalance(self):
        self.gprec.wait_till_insync_transition()
        if (self.failover('primary')):
            PSQL.run_sql_file(local_path('mirror_failover_trigger.sql'))
            self.gprec.incremental()
            if (self.gprec.wait_till_insync_transition()):
                self.assertTrue(self.gprec.rebalance())

    def test_wait_till_insync(self):
        self.gprec.wait_till_insync_transition()
        if (self.failover()):
            self.gprec.incremental()
            self.assertTrue(self.gprec.wait_till_insync_transition())
Пример #3
0
    def test_resync_ct_blocks_per_query(self):
        '''Catch a bug in resync that manifests only after rebalance.
        The logic used by a resync worker to obtain changed blocks
        from CT log had a bug.  The SQL query used to obtain a batch
        of changed blocks from CT log was incorrectly using LSN to
        filter out changed blocks.  All of the following must be true
        for the bug to occur:

         * More than gp_filerep_ct_batch_size blocks of a relation
           are changed on a segment in changetracking.

         * A block with a higher number is changed earlier (lower
           LSN) than lower numbered blocks.

         * The first batch of changed blocks obtained by resync worker
           from CT log for this relation contains only lower
           (according to block number) blocks.  The higher block with
           lower LSN is not included in this batch.  Another query
           must be run against CT log to obtain this block.

         * The SQL query used to obtain next batch of changed blocks
           for this relation contains incorrect WHERE clause involving
           a filter based on LSN of previously obtained blocks.  The
           higher numbered block is missed out - not returned by the
           query as changed block for the relation.  The block is
           never shipped from primary to mirror, resulting in data
           loss.  The test aims to verify that this doesn't happen as
           the bug is now fixed.
        '''
        config = GPDBConfig()
        assert (config.is_not_insync_segments()
                & config.is_balanced_segments()
                ), 'cluster not in-sync and balanced'

        # Create table and insert data so that adequate number of
        # blocks are occupied.
        self.run_sql('resync_bug_setup')
        # Bring down primaries and transition mirrors to
        # changetracking.
        filerep = Filerepe2e_Util()
        filerep.inject_fault(y='fault',
                             f='segment_probe_response',
                             r='primary')
        # Trigger the fault by running a sql file.
        PSQL.run_sql_file(local_path('test_ddl.sql'))
        filerep.wait_till_change_tracking_transition()

        # Set gp_filerep_ct_batch_size = 3.
        cmd = Command('reduce resync batch size',
                      'gpconfig -c gp_filerep_ct_batch_size -v 3')
        cmd.run()
        assert cmd.get_results().rc == 0, 'gpconfig failed'
        cmd = Command('load updated config', 'gpstop -au')
        cmd.run()
        assert cmd.get_results().rc == 0, '"gpstop -au" failed'

        self.run_sql('change_blocks_in_ct')

        # Capture change tracking log contents from the segment of
        # interest for debugging, in case the test fails.
        (host, port) = GPDBConfig().get_hostandport_of_segment(0, 'p')
        assert PSQL.run_sql_file_utility_mode(
            sql_file=local_path('sql/ct_log_contents.sql'),
            out_file=local_path('output/ct_log_contents.out'),
            host=host,
            port=port), sql_file

        gprecover = GpRecover(GPDBConfig())
        gprecover.incremental(False)
        gprecover.wait_till_insync_transition()

        # Rebalance, so that original primary is back in the role
        gprecover = GpRecover(GPDBConfig())
        gprecover.rebalance()
        gprecover.wait_till_insync_transition()

        # Reset gp_filerep_ct_batch_size
        cmd = Command('reset resync batch size',
                      'gpconfig -r gp_filerep_ct_batch_size')
        cmd.run()
        assert cmd.get_results().rc == 0, 'gpconfig failed'
        cmd = Command('load updated config', 'gpstop -au')
        cmd.run()
        assert cmd.get_results().rc == 0, '"gpstop -au" failed'

        self.run_sql('select_after_rebalance')
    def test_resync_ct_blocks_per_query(self):
        '''Catch a bug in resync that manifests only after rebalance.
        The logic used by a resync worker to obtain changed blocks
        from CT log had a bug.  The SQL query used to obtain a batch
        of changed blocks from CT log was incorrectly using LSN to
        filter out changed blocks.  All of the following must be true
        for the bug to occur:

         * More than gp_filerep_ct_batch_size blocks of a relation
           are changed on a segment in changetracking.

         * A block with a higher number is changed earlier (lower
           LSN) than lower numbered blocks.

         * The first batch of changed blocks obtained by resync worker
           from CT log for this relation contains only lower
           (according to block number) blocks.  The higher block with
           lower LSN is not included in this batch.  Another query
           must be run against CT log to obtain this block.

         * The SQL query used to obtain next batch of changed blocks
           for this relation contains incorrect WHERE clause involving
           a filter based on LSN of previously obtained blocks.  The
           higher numbered block is missed out - not returned by the
           query as changed block for the relation.  The block is
           never shipped from primary to mirror, resulting in data
           loss.  The test aims to verify that this doesn't happen as
           the bug is now fixed.
        '''
        config = GPDBConfig()
        assert (config.is_not_insync_segments() &
                config.is_balanced_segments()), 'cluster not in-sync and balanced'

        # Create table and insert data so that adequate number of
        # blocks are occupied.
        self.run_sql('resync_bug_setup')
        # Bring down primaries and transition mirrors to
        # changetracking.
        filerep = Filerepe2e_Util()
        filerep.inject_fault(y='fault', f='segment_probe_response',
                             r='primary')
        # Trigger the fault by running a sql file.
        PSQL.run_sql_file(local_path('test_ddl.sql'))
        filerep.wait_till_change_tracking_transition()

        # Set gp_filerep_ct_batch_size = 3.
        cmd = Command('reduce resync batch size',
                      'gpconfig -c gp_filerep_ct_batch_size -v 3')
        cmd.run()
        assert cmd.get_results().rc == 0, 'gpconfig failed'
        cmd = Command('load updated config', 'gpstop -au')
        cmd.run()
        assert cmd.get_results().rc == 0, '"gpstop -au" failed'

        self.run_sql('change_blocks_in_ct')

        # Capture change tracking log contents from the segment of
        # interest for debugging, in case the test fails.
        (host, port) = GPDBConfig().get_hostandport_of_segment(0, 'p')
        assert PSQL.run_sql_file_utility_mode(
            sql_file=local_path('sql/ct_log_contents.sql'),
            out_file=local_path('output/ct_log_contents.out'),
            host=host, port=port), sql_file

        gprecover = GpRecover(GPDBConfig())
        gprecover.incremental(False)
        gprecover.wait_till_insync_transition()

        # Rebalance, so that original primary is back in the role
        gprecover = GpRecover(GPDBConfig())
        gprecover.rebalance()
        gprecover.wait_till_insync_transition()

        # Reset gp_filerep_ct_batch_size
        cmd = Command('reset resync batch size',
                      'gpconfig -r gp_filerep_ct_batch_size')
        cmd.run()
        assert cmd.get_results().rc == 0, 'gpconfig failed'
        cmd = Command('load updated config', 'gpstop -au')
        cmd.run()
        assert cmd.get_results().rc == 0, '"gpstop -au" failed'

        self.run_sql('select_after_rebalance')