Пример #1
0
 def test_with_concurrent_workload(self):
     """
     add new mirrors while concurrent workload in progress, check that mirrors added
     and current workload won't get affected, in the end, run checkmirrorseg.
     Note that: adding mirrors while running workload has checkmirrorseg issue with MPP-24311
     """
     gprecover = GpRecover()
     self._setup_gpaddmirrors()
     self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)
     sql_setup_file = local_path('sql/ao_heap_table_setup.sql') 
     sql_file = local_path('sql/ao_heap_table.sql')
     pg_stat_activity = 'SELECT * FROM pg_stat_activity;'
     PSQL.run_sql_file(sql_setup_file)
     subprocess.Popen(["psql", "-f", sql_file])
     time.sleep(15)
     subprocess.Popen(["gpaddmirrors", "-ai", self.mirror_config_file, "-d", self.mdd])
     time.sleep(15)
     result = PSQL.run_sql_command(pg_stat_activity, flags='-q -t', dbname='template1')
     result = result.strip()
     rows = result.split('\n')
     self.assertTrue(len(rows) > 1)
     while len(rows) > 1:
         result = PSQL.run_sql_command(pg_stat_activity, flags='-q -t', dbname='template1')
         result = result.strip()
         rows = result.split('\n')
         time.sleep(3)
     gprecover.wait_till_insync_transition()
     self.verify_config_file_with_gp_config()
Пример #2
0
 def test_mirror_spread(self):
     """
     Mirror spreading will place each mirror on a different host within the Greenplum  Database array
     """
     gprecover = GpRecover()
     if self.number_of_segments_per_host > len(self.hosts):
         self.skipTest(
             'skipping test since the number of host is less than number of segments per hosts'
         )
     self._setup_gpaddmirrors()
     self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)
     res = {'rc': 0, 'stdout': '', 'stderr': ''}
     run_shell_command(
         "gpaddmirrors -a -i %s -s -d %s --verbose" %
         (self.mirror_config_file, self.mdd),
         'run gpaddmirrros with mirror spreading', res)
     self.assertEqual(0, res['rc'])
     check_mirror_spreading = '''SELECT A.hostname, B.hostname 
                               FROM gp_segment_configuration A, gp_segment_configuration B 
                               WHERE A.preferred_role = \'p\' AND B.preferred_role = \'m\' AND A.content = B.content AND A.hostname <> B.hostname;'''
     result = PSQL.run_sql_command(check_mirror_spreading,
                                   flags='-q -t',
                                   dbname='template1')
     result = result.strip()
     self.assertNotEqual(0, len(result))
     rows = result.split('\n')
     self.assertEqual(self.number_of_segments, len(rows))
     gprecover.wait_till_insync_transition()
     self.verify_config_file_with_gp_config()
     self.check_mirror_seg()
Пример #3
0
 def test_with_concurrent_workload(self):
     """
     add new mirrors while concurrent workload in progress, check that mirrors added
     and current workload won't get affected, in the end, run checkmirrorseg.
     Note that: adding mirrors while running workload has checkmirrorseg issue with MPP-24311
     """
     gprecover = GpRecover()
     self._setup_gpaddmirrors()
     self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)
     sql_setup_file = local_path('sql/ao_heap_table_setup.sql')
     sql_file = local_path('sql/ao_heap_table.sql')
     pg_stat_activity = 'SELECT * FROM pg_stat_activity;'
     PSQL.run_sql_file(sql_setup_file)
     subprocess.Popen(["psql", "-f", sql_file])
     time.sleep(15)
     subprocess.Popen(
         ["gpaddmirrors", "-ai", self.mirror_config_file, "-d", self.mdd])
     time.sleep(15)
     result = PSQL.run_sql_command(pg_stat_activity,
                                   flags='-q -t',
                                   dbname='template1')
     result = result.strip()
     rows = result.split('\n')
     self.assertTrue(len(rows) > 1)
     while len(rows) > 1:
         result = PSQL.run_sql_command(pg_stat_activity,
                                       flags='-q -t',
                                       dbname='template1')
         result = result.strip()
         rows = result.split('\n')
         time.sleep(3)
     gprecover.wait_till_insync_transition()
     self.verify_config_file_with_gp_config()
Пример #4
0
    def test_option_port_offset(self):
        """
	primary port + offset = mirror database port
	primary port + (2 * offset) = mirror replication port
	primary port + (3 * offset) = primary replication port
        """
        gprecover = GpRecover()
        port_offset = 500
        self._setup_gpaddmirrors(port_offset = port_offset)
        self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)

        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command("gpaddmirrors -a -i %s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with non default port_offset', res)
        self.assertEqual(0, res['rc'])
        query_ports = 'SELECT port, replication_port FROM gp_segment_configuration WHERE content = 0 ORDER BY preferred_role DESC;'
        result = PSQL.run_sql_command(query_ports, flags='-q -t', dbname='template1')
        ports = result.strip().split('\n')
        primary_ports = ports[0]
        mirror_ports = ports[1]
        primary_ports = primary_ports.split('|')
        primary_ports = [port.strip() for port in primary_ports]
        primary_db_port = int(primary_ports[0])
        primary_replic_port = int(primary_ports[1])
        mirror_ports = mirror_ports.split('|')
        mirror_ports = [port.strip() for port in mirror_ports]
        mirror_db_port = int(mirror_ports[0])
        mirror_replic_port = int(mirror_ports[1])  
        self.assertEqual(primary_db_port + port_offset, mirror_db_port)
        self.assertEqual(primary_db_port + 2*port_offset, mirror_replic_port)
        self.assertEqual(primary_db_port + 3*port_offset, primary_replic_port)
        gprecover.wait_till_insync_transition()
        self.verify_config_file_with_gp_config()
        self.check_mirror_seg()
Пример #5
0
    def test_option_port_offset(self):
        """
	primary port + offset = mirror database port
	primary port + (2 * offset) = mirror replication port
	primary port + (3 * offset) = primary replication port
        """
        gprecover = GpRecover()
        port_offset = 500
        self._setup_gpaddmirrors(port_offset = port_offset)
        self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)

        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command("gpaddmirrors -a -i %s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with non default port_offset', res)
        self.assertEqual(0, res['rc'])
        query_ports = 'SELECT port, replication_port FROM gp_segment_configuration WHERE content = 0 ORDER BY preferred_role DESC;'
        result = PSQL.run_sql_command(query_ports, flags='-q -t', dbname='template1')
        ports = result.strip().split('\n')
        primary_ports = ports[0]
        mirror_ports = ports[1]
        primary_ports = primary_ports.split('|')
        primary_ports = [port.strip() for port in primary_ports]
        primary_db_port = int(primary_ports[0])
        primary_replic_port = int(primary_ports[1])
        mirror_ports = mirror_ports.split('|')
        mirror_ports = [port.strip() for port in mirror_ports]
        mirror_db_port = int(mirror_ports[0])
        mirror_replic_port = int(mirror_ports[1])  
        self.assertEqual(primary_db_port + port_offset, mirror_db_port)
        self.assertEqual(primary_db_port + 2*port_offset, mirror_replic_port)
        self.assertEqual(primary_db_port + 3*port_offset, primary_replic_port)
        gprecover.wait_till_insync_transition()
        self.verify_config_file_with_gp_config()
        self.check_mirror_seg()
Пример #6
0
    def test_batch_size_4(self):
        """
        check the batch size option -B of gpaddmirrors, depending on how many mirror segment to setup, otherwise, it will start up to 10
        """
        gprecover = GpRecover()
        self._setup_gpaddmirrors()
        self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)

        workers = Set()
        batch_size = 4
        res = {'rc': 0, 'stdout': '', 'stderr': ''}
        run_shell_command(
            "gpaddmirrors -a -i %s -B %s -d %s --verbose" %
            (self.mirror_config_file, batch_size, self.mdd),
            'run gpaddmirrros batch size %s' % batch_size, res)
        self.assertEqual(0, res['rc'])
        lines = res['stdout'].split('\n')
        for line in lines:
            if 'worker' in line and 'haltWork' in line:
                elems = line.split(' ')[1]
                worker = elems.split('-')[-1]
                workers.add(worker)
        self.assertEquals(len(workers), batch_size)
        gprecover.wait_till_insync_transition()
        self.verify_config_file_with_gp_config()
        self.check_mirror_seg()
Пример #7
0
 def __init__(self, methodName):
     self.pgport = os.environ.get('PGPORT')
     self.fileutil = Filerepe2e_Util()
     self.gpconfig = GPDBConfig()
     self.gprecover = GpRecover(self.gpconfig)
     self.gpstate = Gpstate()
     self.gpprimarymirror = Gpprimarymirror()
     self.base = GPDBStorageBaseTestCase(self.gpconfig)
     super(FtsTransitions, self).__init__(methodName)
Пример #8
0
 def check_insync_transition(self, dbname='template1'):
     """ 
     confirming that the current mode is in sync before performing the gpcheckmirrorseg, 
     resyncInterval increase 10 seconds for each new query, maximumly sleep 75 sec, can be tuned.
     """
     recoverseg = GpRecover()
     is_synchronized = recoverseg.wait_till_insync_transition()
     if not is_synchronized:
         self.fail('Segments are not in sync')
Пример #9
0
 def check_insync_transition(self, dbname='template1'):
     """ 
     confirming that the current mode is in sync before performing the gpcheckmirrorseg, 
     resyncInterval increase 10 seconds for each new query, maximumly sleep 75 sec, can be tuned.
     """
     recoverseg = GpRecover()
     is_synchronized = recoverseg.wait_till_insync_transition()
     if not is_synchronized:
         self.fail('Segments are not in sync')    
Пример #10
0
 def __init__(self, methodName):
     self.pgport = os.environ.get('PGPORT')
     self.util = Filerepe2e_Util()
     self.gpconfig = GpConfig()
     self.config = GPDBConfig()
     self.gpr = GpRecover(self.config)
     self.dbstate = DbStateClass('run_validation', self.config)
     self.gpstart = GpStart()
     self.gpstop = GpStop()
     super(FilerepTestCase, self).__init__(methodName)
Пример #11
0
 def __init__(self, methodName):
     self.filereputil = Filerepe2e_Util()
     self.config = GPDBConfig()
     self.gprecover = GpRecover(self.config)
     self.gpstop = GpStop()
     self.gpstart = GpStart()
     self.gpverify = GpdbVerify(config=self.config)
     self.dbstate = DbStateClass('run_validation', self.config)
     self.port = os.getenv('PGPORT')
     super(PgtwoPhaseClass, self).__init__(methodName)
Пример #12
0
Файл: base.py Проект: 50wu/gpdb
class BaseClass(MPPTestCase):
    '''
    Base Class for Storage test-suites 
    '''

    def __init__(self,methodName):
        self.filereputil = Filerepe2e_Util()
        self.gprecover = GpRecover()
        super(BaseClass,self).__init__(methodName)
        

    def inject_fault(self, fault_name, type, role='mirror', port=None, occurence=None, sleeptime=None, seg_id=None):
        ''' Reset the fault and then issue the fault with the given type'''
        self.filereputil.inject_fault(f=fault_name, y='reset', r=role, p=port , o=occurence, sleeptime=sleeptime, seg_id=seg_id)
        self.filereputil.inject_fault(f=fault_name, y=type, r=role, p=port , o=occurence, sleeptime=sleeptime, seg_id=seg_id)
        tinctest.logger.info('Successfully injected fault_name : %s fault_type : %s  occurence : %s ' % (fault_name, type, occurence))
   
    def reset_fault(self, fault_name, role='mirror', port=None, occurence=None, sleeptime=None, seg_id=None):
        ''' Reset the fault '''
        self.filereputil.inject_fault(f=fault_name, y='reset', r=role, p=port , o=occurence, sleeptime=sleeptime, seg_id=seg_id)
        tinctest.logger.info('Successfully reset fault_name : %s fault_type : %s  occurence : %s ' % (fault_name, type, occurence))

    def check_fault_status(self, fault_name, seg_id=None, role=None):
        status = self.filereputil.check_fault_status(fault_name = fault_name, status ='triggered', max_cycle=20, role=role, seg_id=seg_id)
        self.assertTrue(status, 'The fault is not triggered in the time expected')

    def incremental_recoverseg(self):
        self.gprecover.incremental()

    def wait_till_change_tracking(self):
        self.filereputil.wait_till_change_tracking_transition()

    def run_sql_in_background(self, sql_cmd):
        PSQL.run_sql_command(sql_cmd, background=True)

    def wait_till_insync(self):
        self.gprecover.wait_till_insync_transition()

    def set_gpconfig(self, param, value):
        ''' Set the configuration parameter using gpconfig '''
        command = "gpconfig -c %s -v \"\'%s\'\" --skipvalidation" % (param, value)
        rc = run_shell_command(command)
        if not rc:
            raise Exception('Unable to set the configuration parameter %s ' % param)
        gpstop = GpStop()
        gpstop.run_gpstop_cmd(restart=True)

    def reset_gpconfig(self,param):
        ''' Reset the configuration parameter '''
        command = "gpconfig -r %s " % (param)
        rc = run_shell_command(command)
        if not rc:
            raise Exception('Unable to reset the configuration parameter %s ' % param)
        gpstop = GpStop()
        gpstop.run_gpstop_cmd(restart=True)
Пример #13
0
 def __init__(self,methodName):
     self.fileutil = Filerepe2e_Util()
     self.config = GPDBConfig()
     self.gprecover = GpRecover(self.config)
     self.gpstart = GpStart()
     self.gpstop = GpStop()
     self.gpfile = Gpfilespace(self.config)
     self.dbstate = DbStateClass('run_validation', self.config)
     self.port = os.getenv('PGPORT')
     self.base = GPDBStorageBaseTestCase()
     super(SuspendCheckpointCrashRecovery,self).__init__(methodName)
Пример #14
0
 def test_option_d(self):
     """
     check the -d option of gpaddmirrors
     """
     gprecover = GpRecover()
     self._setup_gpaddmirrors()
     self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)
     del os.environ['MASTER_DATA_DIRECTORY']
     Command('run gpaddmirrors -i -d', 'gpaddmirrors -a -i %s -d %s' % (self.mirror_config_file, self.mdd)).run(validateAfter=True)
     os.environ['MASTER_DATA_DIRECTORY']=self.mdd
     gprecover.wait_till_insync_transition()
     self.verify_config_file_with_gp_config()
     self.check_mirror_seg() 
Пример #15
0
 def test_option_d(self):
     """
     check the -d option of gpaddmirrors
     """
     gprecover = GpRecover()
     self._setup_gpaddmirrors()
     self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)
     del os.environ['MASTER_DATA_DIRECTORY']
     Command('run gpaddmirrors -i -d', 'gpaddmirrors -a -i %s -d %s' % (self.mirror_config_file, self.mdd)).run(validateAfter=True)
     os.environ['MASTER_DATA_DIRECTORY']=self.mdd
     gprecover.wait_till_insync_transition()
     self.verify_config_file_with_gp_config()
     self.check_mirror_seg() 
Пример #16
0
    def test_with_fault_injection(self):
        """
        add new mirrors run workload to verify if cluster functioning correctly, and 
        inject the mirror to bring cluster into change tracking, then recoverseg
        """
        filerepUtil = Filerepe2e_Util()
        gprecover = GpRecover()
        self._setup_gpaddmirrors()
        self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)

        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command("gpaddmirrors -a -i %s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with fault injection', res)
        gprecover.wait_till_insync_transition()
        self.assertEqual(0, res['rc'])
        self.run_simple_ddl_dml()

        # after adding new mirrors, check the intergrity between primary and mirror
        self.check_mirror_seg()
        out_file = local_path('inject_fault_into_ct')
        filerepUtil.inject_fault(f='filerep_consumer', m='async', y='fault', r='mirror', H='ALL', outfile=out_file)
        # trigger the transtion to change tracking
        PSQL.run_sql_command('drop table if exists foo;', dbname = 'template1')
        filerepUtil.wait_till_change_tracking_transition()
        gprecover.incremental()
        gprecover.wait_till_insync_transition()
        out_file=local_path('reset_fault')
        filerepUtil.inject_fault(f='filerep_consumer', m='async', y='reset', r='mirror', H='ALL', outfile=out_file)
Пример #17
0
    def run_gprecoverseg(self, recover_option):
        '''
        @summary : Call gpecoverseg full or incremental to bring back the cluster to sync
        '''
        self.gpr = GpRecover()

        tinctest.logger.info("[STLRTest] Running run_gprecoverseg")

        if recover_option == 'full':
            self.gpr.full()
        else:
            self.gpr.incremental()

        self.gpr.wait_till_insync_transition()
Пример #18
0
 def test_interview(self):
     gprecover = GpRecover()
     child = pexpect.spawn('gpaddmirrors')
     #child.logfile = sys.stdout
     for i in range(0, self.number_of_segments_per_host):
         child.expect('Enter mirror segment data directory location.*.\r\n')
         child.sendline(self.mirror_data_dir)
     child.expect('Continue with add mirrors procedure Yy|Nn (default=N):')
     child.sendline('Y')
     child.expect(pexpect.EOF)
     # wait until cluste totally synced, then run gpcheckmirrorseg
     gprecover.wait_till_insync_transition()
     self.check_mirror_seg()
     self._do_gpdeletesystem()
     self._do_gpinitsystem()
Пример #19
0
 def test_interview(self):
     gprecover = GpRecover()
     child = pexpect.spawn('gpaddmirrors')
     #child.logfile = sys.stdout
     for i in range(0, self.number_of_segments_per_host):
         child.expect('Enter mirror segment data directory location.*.\r\n')        
         child.sendline(self.mirror_data_dir)
     child.expect('Continue with add mirrors procedure Yy|Nn (default=N):')
     child.sendline('Y')
     child.expect(pexpect.EOF)
     # wait until cluste totally synced, then run gpcheckmirrorseg
     gprecover.wait_till_insync_transition()
     self.check_mirror_seg()
     self._do_gpdeletesystem()
     self._do_gpinitsystem()
Пример #20
0
    def test_with_fault_injection(self):
        """
        add new mirrors run workload to verify if cluster functioning correctly, and 
        inject the mirror to bring cluster into change tracking, then recoverseg
        """
        filerepUtil = Filerepe2e_Util()
        gprecover = GpRecover()
        self._setup_gpaddmirrors()
        self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)

        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command("gpaddmirrors -a -i %s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with fault injection', res)
        gprecover.wait_till_insync_transition()
        self.assertEqual(0, res['rc'])
        self.run_simple_ddl_dml()

        # after adding new mirrors, check the intergrity between primary and mirror
        self.check_mirror_seg()
        out_file = local_path('inject_fault_into_ct')
        filerepUtil.inject_fault(f='filerep_consumer', m='async', y='fault', r='mirror', H='ALL', outfile=out_file)
        # trigger the transtion to change tracking
        PSQL.run_sql_command('drop table if exists foo;', dbname = 'template1')
        filerepUtil.wait_till_change_tracking_transition()
        gprecover.incremental()
        gprecover.wait_till_insync_transition()
        out_file=local_path('reset_fault')
        filerepUtil.inject_fault(f='filerep_consumer', m='async', y='reset', r='mirror', H='ALL', outfile=out_file)
Пример #21
0
 def __init__(self, methodName):
     self.pgport = os.environ.get('PGPORT')
     self.fileutil = Filerepe2e_Util()
     self.gpconfig = GPDBConfig()
     self.gprecover = GpRecover(self.gpconfig)
     self.gpstate = Gpstate()
     self.gpprimarymirror = Gpprimarymirror()
     self.base = GPDBStorageBaseTestCase(self.gpconfig)
     super(FtsTransitions,self).__init__(methodName)
Пример #22
0
 def test_gpaddmirrors_with_workload(self):
     """
     add new mirrors after creating some workload in progress, check that mirrors added
     and checkmirrorseg passes.
     """
     gprecover = GpRecover()
     self._setup_gpaddmirrors()
     self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)
     sql_setup_file = local_path('sql/ao_heap_table_setup.sql')
     sql_file = local_path('sql/ao_heap_table.sql')
     pg_stat_activity = 'SELECT * FROM pg_stat_activity;'
     PSQL.run_sql_file(sql_setup_file)
     PSQL.run_sql_file(sql_file)
     res = {'rc': 0, 'stdout' : '', 'stderr': ''}
     run_shell_command("gpaddmirrors -a -i %s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with fault injection', res)
     self.assertEqual(0, res['rc'])
     gprecover.wait_till_insync_transition()
     self.verify_config_file_with_gp_config()
     self.check_mirror_seg()
Пример #23
0
 def test_gpaddmirrors_with_workload(self):
     """
     add new mirrors after creating some workload in progress, check that mirrors added
     and checkmirrorseg passes.
     """
     gprecover = GpRecover()
     self._setup_gpaddmirrors()
     self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)
     sql_setup_file = local_path('sql/ao_heap_table_setup.sql')
     sql_file = local_path('sql/ao_heap_table.sql')
     pg_stat_activity = 'SELECT * FROM pg_stat_activity;'
     PSQL.run_sql_file(sql_setup_file)
     PSQL.run_sql_file(sql_file)
     res = {'rc': 0, 'stdout' : '', 'stderr': ''}
     run_shell_command("gpaddmirrors -a -i %s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with fault injection', res)
     self.assertEqual(0, res['rc'])
     gprecover.wait_till_insync_transition()
     self.verify_config_file_with_gp_config()
     self.check_mirror_seg()
Пример #24
0
 def __init__(self, methodName):    
     self.pgport = os.environ.get('PGPORT')
     self.util = Filerepe2e_Util()
     self.gpconfig = GpConfig()
     self.config = GPDBConfig()
     self.gpr = GpRecover(self.config)
     self.dbstate = DbStateClass('run_validation',self.config)
     self.gpstart = GpStart()
     self.gpstop = GpStop()
     super(FilerepTestCase,self).__init__(methodName)
Пример #25
0
 def __init__(self,methodName):
     self.filereputil = Filerepe2e_Util()
     self.config = GPDBConfig()
     self.gprecover = GpRecover(self.config)
     self.gpstop = GpStop()
     self.gpstart = GpStart()
     self.gpfile = Gpfilespace(self.config)
     self.gpverify = GpdbVerify(config=self.config)
     self.dbstate = DbStateClass('run_validation',self.config)
     self.port = os.getenv('PGPORT')
     super(PgtwoPhaseClass,self).__init__(methodName)
Пример #26
0
    def __init__(self, config=None):
        if config is not None:
            self.config = config
        else:
            self.config = GPDBConfig()

        self.filereputil = Filerepe2e_Util()
        self.gprecover = GpRecover(self.config)
        self.gpstop = GpStop()
        self.gpstart = GpStart()
        self.gpverify = GpdbVerify(config=self.config)
        self.dbstate = DbStateClass('run_validation', self.config)
        self.port = os.getenv('PGPORT')
Пример #27
0
 def test_mirror_spread(self):
     """
     Mirror spreading will place each mirror on a different host within the Greenplum  Database array
     """
     gprecover = GpRecover()
     if self.number_of_segments_per_host > len(self.hosts):
         self.skipTest('skipping test since the number of host is less than number of segments per hosts')
     self._setup_gpaddmirrors()
     self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)
     res = {'rc': 0, 'stdout' : '', 'stderr': ''}
     run_shell_command("gpaddmirrors -a -i %s -s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with mirror spreading', res)
     self.assertEqual(0, res['rc'])
     check_mirror_spreading = '''SELECT A.hostname, B.hostname 
                               FROM gp_segment_configuration A, gp_segment_configuration B 
                               WHERE A.preferred_role = \'p\' AND B.preferred_role = \'m\' AND A.content = B.content AND A.hostname <> B.hostname;'''
     result = PSQL.run_sql_command(check_mirror_spreading, flags='-q -t', dbname='template1')
     result = result.strip()
     self.assertNotEqual(0, len(result))
     rows = result.split('\n')
     self.assertEqual(self.number_of_segments, len(rows))
     gprecover.wait_till_insync_transition()
     self.verify_config_file_with_gp_config()
     self.check_mirror_seg()
Пример #28
0
    def test_batch_size_4(self):
        """
        check the batch size option -B of gpaddmirrors, depending on how many mirror segment to setup, otherwise, it will start up to 10
        """
        gprecover = GpRecover()
        self._setup_gpaddmirrors()
        self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)

        workers = Set()
        batch_size = 4
        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command("gpaddmirrors -a -i %s -B %s -d %s --verbose" % (self.mirror_config_file, batch_size, self.mdd), 'run gpaddmirrros batch size %s' % batch_size, res)
        self.assertEqual(0, res['rc'])
        lines = res['stdout'].split('\n')
        for line in lines:
            if 'worker' in line and 'haltWork' in line:
                elems = line.split(' ')[1]
                worker = elems.split('-')[-1]
                workers.add(worker)
        self.assertEquals(len(workers), batch_size)            
        gprecover.wait_till_insync_transition()
        self.verify_config_file_with_gp_config()
        self.check_mirror_seg()
Пример #29
0
    def run_gprecoverseg(self,recover_option):
        '''
        @summary : Call gpecoverseg full or incremental to bring back the cluster to sync
        '''
        self.gpr = GpRecover()

        tinctest.logger.info("[STLRTest] Running run_gprecoverseg")   

        if recover_option == 'full':
            self.gpr.full()
        else:
            self.gpr.incremental()

        self.gpr.wait_till_insync_transition()
Пример #30
0
    def run_gprecoverseg(self,recover_option):
        '''
        @summary : Call gpecoverseg full or incremental to bring back the cluster to sync
        '''
        self.gpr = GpRecover()

        tinctest.logger.info("[STLRTest] Running run_gprecoverseg")   
        tinctest.logger.info("[STLRTest] START printing gp segment configuration")
        (gp_seg_conf) = PSQL.run_sql_command("select * from gp_segment_configuration order by dbid")
        tinctest.logger.info(gp_seg_conf)

        if recover_option == 'full':
            self.gpr.full()
        else:
            self.gpr.incremental()
        #Wait till the primary and mirror are in sync
        tinctest.logger.info("[STLRTest] Middle printing gp segment configuration")
        (gp_seg_conf) = PSQL.run_sql_command("select * from gp_segment_configuration order by dbid")
        tinctest.logger.info(gp_seg_conf)

        self.gpr.wait_till_insync_transition()
        tinctest.logger.info("[STLRTest] END printing gp segment configuration")
        (gp_seg_conf) = PSQL.run_sql_command("select * from gp_segment_configuration order by dbid")
        tinctest.logger.info(gp_seg_conf)
Пример #31
0
class AOCOAlterColumn(MPPTestCase):
    
    def __init__(self):
        self.fileutil = Filerepe2e_Util()
        self.gprecover = GpRecover()
        self.config = GpConfig()
        self.base_dir = os.path.dirname(sys.modules[self.__class__.__module__].__file__)


    def get_sql_files(self, sql_file_name):
        sql_file = os.path.join( self.base_dir, "sql", sql_file_name + ".sql");    
        return  sql_file

    def validate_sql(self, ans_file, out_file):
        ''' Compare the out and ans files '''
        init_file=os.path.join( self.base_dir, "sql",'init_file')
        result1 = Gpdiff.are_files_equal(out_file, ans_file, match_sub =[init_file])
        self.assertTrue(result1 ,'Gpdiff.are_files_equal')        

    def run_sql(self, filename, out_file,background=False):
        ''' Run the provided sql and validate it '''
        out_file = local_path(filename.replace(".sql", ".out"))
        PSQL.run_sql_file(filename,out_file=out_file,background=background)


    def run_test_CatalogCheck(self, action,storage):
        file_name =action+'_'+storage
        sql_file = self.get_sql_files(file_name)
        out_file = self.base_dir+ "/sql/"+file_name+'.out'
        tinctest.logger.info( 'sql-file == %s \n' % sql_file)
        tinctest.logger.info( 'out-file == %s \n' % out_file)
        # Run Add/Drop Column script
        self.run_sql(sql_file, out_file=out_file)

    def validate_test_CatalogCheck(self, action,storage):
        file_name =action+'_'+storage
        out_file = self.base_dir+ "/sql/"+file_name+'.out'
        ans_file = self.base_dir+ "/expected/"+file_name+'.ans'
        tinctest.logger.info( 'out-file == %s \n' % out_file)
        tinctest.logger.info( 'ans-file == %s \n' % ans_file)
        # Validate Ans file
        self.validate_sql(ans_file,out_file)
        if storage == 'multisegfiles':
            ''' check if multi_segfile_tab file has  multiple segfiles per column '''
            tablename='multi_segfile_tab'
            relid = self.get_relid(file_name=tablename )
            utilitymodeinfo=self.get_utilitymode_conn_info( relid=relid)
            u_port=utilitymodeinfo[0]
            u_host=utilitymodeinfo[1]
            assert(1 < int(self.get_segment_cnt(relid=relid,host=u_host,port= u_port)))
        # Check Correctness of the catalog
        self.dbstate = DbStateClass('run_validation')
        outfile = local_path("gpcheckcat_"+datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d%H%M%S')+".out")
        self.dbstate.check_catalog(outputFile=outfile)

    def run_test_ChangeTracking(self,filename):
        # Log the segment state before starting the test
        # Expectation is a SYNC state
        self.log_segment_state()
        primary_dbid=self.get_dbid()
        # Run the 'alter table add column cmmand' in the background
        self.run_sql_ChangeTracking(filename,stage='fail',validate=False,background=True)
        # Inject Fault to put one primary in panic
        self.fileutil.inject_fault(f='postmaster', y='reset',  seg_id=primary_dbid)
        self.fileutil.inject_fault(f='postmaster', y='panic',  seg_id=primary_dbid)
        state=self.fileutil.check_fault_status(fault_name='postmaster', status='triggered')
        self.log_segment_state()
        # Recover the down segments
        self.recover_seg()
        self.log_segment_state()
        # Validate that the previous alter failed because primary segment went down as the alter was taking place
        self.run_sql_ChangeTracking(filename,stage='failvalidate',validate=True,background=False) 
        # Now the system is in change tracking so the next alter should pass
        self.run_sql_ChangeTracking(filename,stage='pass',validate=True,background=False) 
        self.log_segment_state()


    def recover_seg(self):
        result=self.get_segcount_state(state='d')
        if result > 0:
            if not self.gprecover.incremental():
                raise Exception('Gprecoverseg failed')
            if not self.gprecover.wait_till_insync_transition():
                raise Exception('Segments not in sync')
        tinctest.logger.info('Segments recovered and back in sync')
        

    def run_sql_ChangeTracking(self,filename,stage,validate=False,background=False):
        fname=filename+'-'+stage
        sql_file = self.get_sql_files(fname)
        out_file = self.base_dir+ "/sql/"+fname +'.out'
        ans_file = self.base_dir+ "/expected/"+fname+'.ans'
        tinctest.logger.info( '\n==============stage = %s ================' % (stage))
        tinctest.logger.info( sql_file)
        tinctest.logger.info( out_file)
        tinctest.logger.info( ans_file)
        tinctest.logger.info( '==============================')
        result=self.run_sql(sql_file,out_file=out_file,background=background)
        if validate == True:
           self.validate_sql(ans_file,out_file)

 
    def get_dbid(self):
        sql_cmd = "select min(dbid) dbid from gp_segment_configuration where role = 'p' and status = 'u' and content > -1"
        dbid=PSQL.run_sql_command(sql_cmd= sql_cmd,flags='-q -t')
        tinctest.logger.info('Segments %s chosen for fault injection' % (dbid))
        return dbid
     
    def log_segment_state(self):
        sql_cmd = "select * from gp_segment_configuration order by dbid"
        result=PSQL.run_sql_command(sql_cmd= sql_cmd)
        tinctest.logger.info('==========================')
        tinctest.logger.info('State of Segments ')
        tinctest.logger.info(result)
        tinctest.logger.info('==========================')

    def get_segcount_state(self,state):
        sql_cmd = "select count(*) from gp_segment_configuration where status = '%s'" % (state)
        result=PSQL.run_sql_command(sql_cmd= sql_cmd,flags='-q -t')
        tinctest.logger.info('Number of segments in %s State == %d' % (state,(int(result))))
        return int(result)

    def get_utilitymode_conn_info(self, relid=0):
        #get the segment_id where to log in utility mode and then get the hostname and port for this segment
        sql_cmd="select port, hostname from gp_segment_configuration sc  where dbid > 1 and role = 'p' limit 1;"
        utilitymodeinfo=PSQL.run_sql_command(sql_cmd=sql_cmd,  flags='-q -t')
        u_port=utilitymodeinfo.strip().split('|')[0]
        u_host=utilitymodeinfo.strip().split('|')[1]
        return [u_port,u_host]

    def get_relid(self,file_name=None):
        sql_cmd="SELECT oid FROM pg_class WHERE relname='%s';\n" % file_name
        relid= PSQL.run_sql_command(sql_cmd=sql_cmd,  flags='-q -t')
        return relid;

    def get_segment_cnt(self, relid=0,host=None,port=None):
        sql_cmd="select count(*) from gp_toolkit.__gp_aocsseg(%s) group by column_num having count(*) > 1 limit 1" % (relid)
        segcnt=PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd,host=host, port=port,flags='-q -t')
        if (len(segcnt.strip()) == 0):
            segcnt='0'
        return segcnt

    def run_test_utility_mode(self,filename):
        #alter_aoco_tab_utilitymode
        relid = self.get_relid(file_name=filename )
        utilitymodeinfo=self.get_utilitymode_conn_info( relid=relid)
        u_port=utilitymodeinfo[0]
        u_host=utilitymodeinfo[1]
        self.run_sql_utility_mode(filename,host=u_host,port=u_port)

    
    def run_sql_utility_mode(self,filename,host=None,port=None):
        fname=filename
        sql_file = self.get_sql_files(fname)
        out_file = self.base_dir+ "/sql/"+fname +'.out'
        ans_file = self.base_dir+ "/expected/"+fname+'.ans'
        tinctest.logger.info( '\n==============================')
        tinctest.logger.info( sql_file)
        tinctest.logger.info( out_file)
        tinctest.logger.info( ans_file)
        tinctest.logger.info( '==============================')
        result=PSQL.run_sql_file_utility_mode(sql_file,out_file=out_file,host=host, port=port)
        self.validate_sql(ans_file,out_file)
Пример #32
0
 def wait_till_insync_transition(self):
     self.gpr = GpRecover()
     self.gpr.wait_till_insync_transition()
Пример #33
0
 def test_recovery(self):
     gprecover = GpRecover()
     gprecover.incremental()
     gprecover.wait_till_insync_transition()
Пример #34
0
 def setUpClass(cls):
     super(mpp23395, cls).setUpClass()
     recoverseg = GpRecover()
     recoverseg.recover_rebalance_segs()
Пример #35
0
 def __init__(self):
     self.fileutil = Filerepe2e_Util()
     self.gprecover = GpRecover()
     self.config = GpConfig()
     self.base_dir = os.path.dirname(sys.modules[self.__class__.__module__].__file__)
Пример #36
0
class SuspendCheckpointCrashRecovery(MPPTestCase):
    
    def __init__(self,methodName):
        self.fileutil = Filerepe2e_Util()
        self.config = GPDBConfig()
        self.gprecover = GpRecover(self.config)
        self.gpstart = GpStart()
        self.gpstop = GpStop()
        self.gpfile = Gpfilespace(self.config)
        self.dbstate = DbStateClass('run_validation', self.config)
        self.port = os.getenv('PGPORT')
        self.base = GPDBStorageBaseTestCase()
        super(SuspendCheckpointCrashRecovery,self).__init__(methodName)

    def check_system(self):
        ''' 
        @summary: Check whether the system is up and sync. Exit out if not 
        '''
        cmd ="select count(*) from gp_segment_configuration where content<> -1 ;"
        count_all = PSQL.run_sql_command(cmd, flags ='-q -t', dbname='postgres')
        cmd ="select count(*) from gp_segment_configuration where content<> -1 and mode = 's' and status = 'u';"
        count_up_and_sync = PSQL.run_sql_command(cmd, flags ='-q -t', dbname='postgres')
        if count_all.strip() != count_up_and_sync.strip() :
            os._exit(1)
        else:
            tinctest.logger.info("\n Starting New Test: System is up and in sync .........")

    def get_items_list(self, tests):
        ''' Get file contents to a list '''
        test_file = local_path(tests)
        with open(test_file, 'r') as f:
            test_list = [line.strip() for line in f]
        return test_list

    def checkPSQLRun(self, test):
        '''Check if the psql run started in background is over before running the _post.sql '''
        cmd_str = "ps -ef|grep '%s'|grep [p]sql" % test
        while(1):
            is_running = 0 
            cmd = Command('Check psql run', cmd_str)
            cmd.run()
            result = cmd.get_results()
            for line in result.stdout.splitlines():
                if '%s' %test in line:
                    tinctest.logger.info(line)
                    is_running = 1 
            if is_running == 0:
                return True
            else:
                sleep(5)
        return False

    def modify_sql_file(self, filename):
        ans_file = local_path(filename.replace('.sql' , '.ans'))
        for sfile in (filename, ans_file):
            for line in fileinput.FileInput(sfile,inplace=1):
                line = re.sub('gptest', os.getenv('PGDATABASE'), line)
                print str(re.sub('\n','',line))

    def validate_sql(self, filename):
        ''' Compare the out and ans files '''
        out_file = local_path(filename.replace(".sql", ".out"))
        ans_file = local_path(filename.replace('.sql' , '.ans'))
        assert Gpdiff.are_files_equal(out_file, ans_file)

    def run_sql(self, filename):
        ''' Run the provided sql and validate it '''
        out_file = local_path(filename.replace(".sql", ".out"))
        PSQL.run_sql_file(sql_file = filename, out_file = out_file)
        self.validate_sql(filename)

    def set_faults_before_executing_pre_sqls(self, cluster_state):
        ''' Set the checkpoint skip fault '''
        if cluster_state == 'change_tracking':
           self.cluster_in_change_tracking()
        self.fileutil.inject_fault(f='checkpoint', y='reset', r='primary', p=self.port)
        self.fileutil.inject_fault(f='checkpoint', y='skip', r='primary', p=self.port, o='0')
        tinctest.logger.info('Successfully injected fault to skip checkpointing') 
        if(cluster_state == 'resync'):
            self.fileutil.inject_fault(f='filerep_consumer', y='reset')
            self.fileutil.inject_fault(f='filerep_consumer', y='fault')
            self.fileutil.wait_till_change_tracking_transition()

    def suspend_fault(self, fault_name):
        ''' Suspend the provided fault_name '''
        self.fileutil.inject_fault(f='%s' % fault_name, y='reset', o='0', r='primary', p=self.port)
        self.fileutil.inject_fault(f='%s' % fault_name, y='suspend', o='0', r='primary', p=self.port)
        tinctest.logger.info('Successfully injected fault to suspend %s' % fault_name)

    def get_faults_before_executing_trigger_sqls(self, pass_num,cluster_state, test_type, ddl_type, aborting_create_needed=False):
        ''' Get the fault before trigger sqls are executed '''
        fault_name=''
        tinctest.logger.info('Fault Conditions: pass_num = [%s], cluster_state = [%s], test_type =  [%s], ddl_type = [%s], aborting_create_needed = [%s]' % (pass_num, cluster_state, test_type, ddl_type, aborting_create_needed)) 

        if pass_num == 1 and test_type == 'commit' and ddl_type == 'create':
            if aborting_create_needed:
                fault_name = 'finish_prepared_transaction_commit_pass1_aborting_create_needed'
            else:
                fault_name = 'finish_prepared_transaction_commit_pass1_from_create_pending_to_created'
                
        elif pass_num == 2 and test_type == 'commit' and ddl_type == 'create':
            if aborting_create_needed:
                fault_name = 'finish_prepared_transaction_commit_pass2_aborting_create_needed'
            else:
                fault_name = 'finish_prepared_transaction_commit_pass2_from_create_pending_to_created'

        elif pass_num == 1 and test_type == 'commit' and ddl_type == 'drop':
            fault_name = 'finish_prepared_transaction_commit_pass1_from_drop_in_memory_to_drop_pending'

        elif pass_num == 2 and test_type == 'commit' and ddl_type == 'drop':
            fault_name = 'finish_prepared_transaction_commit_pass2_from_drop_in_memory_to_drop_pending'

        elif pass_num == 1 and test_type == 'abort':
            if aborting_create_needed:
                fault_name = 'finish_prepared_transaction_abort_pass1_aborting_create_needed'
            else:
                fault_name = 'finish_prepared_transaction_abort_pass1_from_create_pending_to_aborting_create'

        elif pass_num == 2 and test_type == 'abort':
            if aborting_create_needed:
                fault_name = 'finish_prepared_transaction_abort_pass2_aborting_create_needed'
            else:
                fault_name = 'finish_prepared_transaction_abort_pass2_from_create_pending_to_aborting_create'

        elif pass_num == 0 and (test_type == 'abort' or test_type == 'commit'):
            pass # We already set the fault error_txn_abort_after_dist_prepare_on_master above for abort tests and for commit tests skip checkpoint is done by default for all tests.
        return fault_name

    def set_faults_before_executing_trigger_sqls(self, pass_num,cluster_state, test_type, ddl_type, aborting_create_needed=False):
        ''' Set the fault before trigger sqls are executed '''
        if (cluster_state == 'resync'):
            self.cluster_in_resync()
        fault_name=''
        fault_name = self.get_faults_before_executing_trigger_sqls(pass_num,cluster_state, test_type, ddl_type, aborting_create_needed=False);

        if (test_type == 'abort'):
            self.fileutil.inject_fault(f='transaction_abort_after_distributed_prepared', y='reset', p=self.port, o='0', seg_id=1)
            self.fileutil.inject_fault(f='transaction_abort_after_distributed_prepared', y='error', p=self.port, o='0', seg_id=1)
            tinctest.logger.info('Successfully injected fault to error out after distributed prepare for abort tests')

        if pass_num !=0 :
            self.suspend_fault(fault_name)
        elif pass_num == 0 : 
            fault_name = None
        if (cluster_state == 'resync'):
            self.fileutil.inject_fault(f='filerep_transition_to_sync_begin', y = 'reset', r = 'primary')
            self.fileutil.inject_fault(f='filerep_transition_to_sync_begin', y = 'suspend', r = 'primary')
            tinctest.logger.info('Successfully suspended filerep_transition_to_sync_begin')
            #Resume resync so that trigger sql can execute while resync is in progress
            self.fileutil.inject_fault(f='filerep_resync', y = 'resume', r = 'primary')
        return fault_name

    def cluster_in_resync(self):
        '''
        1. Suspend filerep_resync, 2. Suspend filerep_transition_to_sync_before_checkpoint, 3. Run gprecoverseg
        '''
        self.base.invoke_fault('filerep_resync', 'suspend', role='primary')
        self.base.invoke_fault('filerep_transition_to_sync_before_checkpoint', 'suspend', role='primary', port=self.port , occurence='0')
        rc = self.gprecover.incremental()
        if not rc:
            raise Exception('Gprecvoerseg failed')
        tinctest.logger.info('Cluster in resync state')

    def switch_primary_mirror_role_in_utility_mode(self):
        '''Utility routine to start the master, connect in utility mode, switch the roles of primary and mirrors and shutdown the master '''
        cmd = Command('Start master in utility mode', 'export GPSTART_INTERNAL_MASTER_ONLY=1;gpstart -m')
        cmd.run(validateAfter=True)
        result = cmd.get_results()
        if result.rc != 0:
            raise Exception('Unable to start master in utility mode')
        tinctest.logger.info('Started master in utility mode')
    
        sql_cmd_list = ["update gp_segment_configuration set role='t' where role ='p' and content <> -1", "update gp_segment_configuration set role='p',mode='c' where role ='m' and content <> -1", "update gp_segment_configuration set role='m',status='d' where role ='t' and content <> -1"]
        for sql_cmd in sql_cmd_list:
            PSQL.run_sql_command(sql_cmd, PGOPTIONS="-c gp_session_role=utility -c allow_system_table_mods=dml")
        tinctest.logger.info('Updated the catalog to reverse the roles')
        rc = self.gpstop.run_gpstop_cmd(masteronly = True)
        if not rc:
            raise Exception('Failure to shut down the master')

    def stop_db(self):
        ''' gpstop immediate'''
        rc = self.gpstop.run_gpstop_cmd(immediate = True)
        if not rc:
            raise Exception('Failed to stop the cluster')
        tinctest.logger.info('Stopped cluster immediately')
    
    def start_db(self, down_segments=False):
        ''' Gpstart -a '''
        rc = self.gpstart.run_gpstart_cmd()
        if not rc:
            raise Exception('Failed to start the cluster')
        tinctest.logger.info('Started the cluster successfully')
       
        if not down_segments:
            if self.config.is_down_segments():
                raise Exception('Segments got marked down')

    ''' This is sleep free version based on fault triggered status '''
    def run_crash_and_recovery_fast(self,test_dir, pass_num, cluster_state, test_type, ddl_type, aborting_create_needed=False):
        if pass_num == 0:
            self.wait_till_all_sqls_done()
        else:
            mydir=local_path(test_dir)+'/trigger_sql/sql/'
            tinctest.logger.info('mydir = %s ' % mydir)
            trigger_count = len(glob.glob1(mydir,"*trigger.sql"))
            tinctest.logger.info('*** Count of trigger : %s *** ' % (trigger_count))
            if test_dir == "abort_create_tests":
               ''' vacuum full sql don't hit the suspend fault.'''
               trigger_count = trigger_count - 1
            if test_dir == "abort_create_needed_tests":
                ''' Not all SQLs hit the fault for this case, hence wait for them to complete and then others to hit the fault'''
                self.wait_till_all_sqls_done(8 + 1)
                trigger_count = 8
            if test_dir == "abort_abort_create_needed_tests":
                ''' Not all SQLs hit the fault for this case, hence wait for them to complete and then others to hit the fault'''
                self.wait_till_all_sqls_done(6 + 1)
                trigger_count = 6
            fault_type = self.get_faults_before_executing_trigger_sqls(pass_num, cluster_state, test_type, ddl_type, aborting_create_needed=False)
            fault_hit = self.fileutil.check_fault_status(fault_name=fault_type, status="triggered", num_times_hit=trigger_count)
            if not fault_hit:
               raise Exception('Fault not hit expected number of times')

        self.stop_start_validate(cluster_state)

    def wait_till_all_sqls_done(self, count=1):
        ''' 500 here is just an arbitrarily long time "if-we-exceed-this-then-oh-crap-lets-error-out" value '''
        for i in range(1,500):
            psql_count = PSQL.run_sql_command("select count(*) from pg_stat_activity where current_query <> '<IDLE>'", flags='-q -t', dbname='postgres')
            if int(psql_count.strip()) <= count :
                return
            sleep(1)
        raise Exception('SQLs expected to complete but are still running')

    def stop_start_validate(self, cluster_state):
        ''' Do gpstop immediate, gpstart and see if all segments come back up fine '''
        if cluster_state == 'sync' :
            self.stop_db()
            self.switch_primary_mirror_role_in_utility_mode()
            tinctest.logger.info('Successfully switched roles of primary and mirrors in gp_segment_configuration')
            self.start_db(down_segments=True)
            rc = self.gprecover.incremental()
            if not rc:
                raise Exception('Gprecoverseg failed')
            if not self.gprecover.wait_till_insync_transition():
                raise Exception('Segments not in sync')
        if cluster_state == 'change_tracking':
            self.stop_db()
            self.start_db(down_segments=True)

        if cluster_state == 'resync':
            #Resume the filerep_resync filerep_transition_to_sync_begin before stop-start
            self.fileutil.inject_fault(f='filerep_transition_to_sync_begin', y='resume', r='primary')
            self.stop_db()
            self.start_db()
            if not self.gprecover.wait_till_insync_transition():
                raise Exception('Segments not in sync')
        self.dbstate.check_catalog(alldb=False)

    def cluster_in_change_tracking(self):
        '''
        Put Cluster into change_tracking
        '''
        self.base.invoke_fault('filerep_consumer', 'fault', role='primary')
        self.fileutil.wait_till_change_tracking_transition()
        tinctest.logger.info('Change_tracking transition complete')


    def validate_system(self, cluster_state):
        # Validate the system's integrity
        if (cluster_state == 'change_tracking'):
            if not self.gprecover.incremental():
                raise Exception('Gprecoverseg failed')
            if not self.gprecover.wait_till_insync_transition():
                raise Exception('Segments not in sync')
            tinctest.logger.info('Segments recovered and back in sync')

        self.dbstate.check_mirrorintegrity()
        if self.config.has_master_mirror():
            self.dbstate.check_mirrorintegrity(master=True)

    def run_fault_injector_to_skip_checkpoint(self):
        tinctest.logger.info('Skip Checkpointing using fault injector.')
        self.fileutil.inject_fault(y = 'reset', f = 'checkpoint', r ='primary', H='ALL', m ='async', o = '0', p=self.port)
        (ok, out) = self.fileutil.inject_fault(y = 'skip', f = 'checkpoint', r ='primary', H='ALL', m ='async', o = '0', p=self.port)
        if not ok:
           raise Exception('Problem with injecting fault.')

    def backup_output_dir(self,test_dir, test_id):
        indir=local_path(test_dir)
        outdir = indir+'_'+test_id
        cmdstr="cp -r "+ indir + " " + outdir
        cmd = Command(name='run cp -r ', cmdStr=cmdstr)
        tinctest.logger.info("Taking a backup of SQL directory: %s" %cmd)
        try:
            cmd.run()
        except:
            self.fail("cp -r failed.")
        tinctest.logger.info("Test SQL directory Backup Done!!")

    def do_post_run_checks(self):
        self.stop_start_validate('sync')

        rc = self.gprecover.incremental()
        if not rc:
            raise Exception('Gprecvoerseg failed')

        self.gprecover.wait_till_insync_transition()

        tinctest.logger.info("Done going from resync to insync")
        self.dbstate.check_catalog(alldb=False)
        self.dbstate.check_mirrorintegrity()

        if self.config.has_master_mirror():
            self.dbstate.check_mirrorintegrity(master=True)
Пример #37
0
class FilerepTestCase(MPPTestCase):

    def __init__(self, methodName):    
        self.pgport = os.environ.get('PGPORT')
        self.util = Filerepe2e_Util()
        self.gpconfig = GpConfig()
        self.config = GPDBConfig()
        self.gpr = GpRecover(self.config)
        self.dbstate = DbStateClass('run_validation',self.config)
        self.gpstart = GpStart()
        self.gpstop = GpStop()
        super(FilerepTestCase,self).__init__(methodName)

    def sleep(self, seconds=60):
        time.sleep(seconds)

    def create_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('create a file', 'touch %s' % file_path, ctxt=REMOTE, remoteHost=host)
        cmd.run(validateAfter=True)

    def remove_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('remove a file', 'rm %s' % file_path, ctxt=REMOTE, remoteHost=host)
        cmd.run(validateAfter=True)

    def get_timestamp_of_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('check timestamp', """ python -c "import os; print os.stat('%s').st_mtime" """ %
                      file_path, ctxt=REMOTE, remoteHost=host)
        cmd.run(validateAfter=True)
        res = cmd.get_results().stdout.strip()
        return res

    def verify_file_exists(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('check if file exists', 'test -f %s' % file_path, ctxt=REMOTE, remoteHost=host)
        cmd.run(validateAfter=True)

    def handle_ext_cases(self,file):
        """
        @file: wet sql file to replace with specific machine env.
        """

        host = str(socket.gethostbyname(socket.gethostname())) #Must be an IP
        querystring = "gpfdist://"+host+":8088"
        
        if os.path.isfile(file):
            for line in fileinput.FileInput(file,inplace=1):
               line = re.sub('gpfdist.+8088',querystring,line)
               print str(re.sub('\n','',line))

    def handle_hybrid_part_cases(self, file):
        """
        @file: hybrid sql file to replace with specific machine env
        """

        querystring = "FROM '"+local_path('hybrid_part.data')+"'" 
        if os.path.isfile(file):
            for line in fileinput.FileInput(file,inplace=1):
                line = re.sub('FROM\s\'.+hybrid_part.data\'',querystring,line)
                print str(re.sub('\n','',line))


    def preprocess(self):
        """ 
        Replace the hard-coded information from sql files with correct hostname and ip address,etc 
        """

        list_workload_dir = ['set_sync1','sync1','set_ck_sync1','ck_sync1',
                        'set_ct','ct','set_resync','resync','set_sync2','sync2']
        for dir in list_workload_dir:
            sql_path = os.path.join(local_path(dir),'sql')
            ans_path = os.path.join(local_path(dir),'expected')
            for file in os.listdir(sql_path):
                    if (file.find('wet_ret')>=0):
                       self.handle_ext_cases(os.path.join(sql_path,file))
                    if (file.find('hybrid_part')>=0):
                       self.handle_hybrid_part_cases(os.path.join(sql_path,file))  
            for file in os.listdir(ans_path):
                    if (file.find('wet_ret')>=0):
                       self.handle_ext_cases(os.path.join(ans_path,file))
                    if (file.find('hybrid_part')>=0):
                       self.handle_hybrid_part_cases(os.path.join(ans_path,file)) 


    def clean_data(self):
        """ 
        Clean the data by removing the external table, otherwise, more data will be appended to the
        same external table from running multiple sql files. 
        """  

        test = local_path("")
        test = str(test) +"data/*.*"
    
        cmd = 'rm -rfv '+test
        run_shell_command(cmd)       

    def anydownsegments(self):
        """
        checks if any segments are down
        """        

        tinctest.logger.info("Checking if any segments are down")
        num_segments_down = self.count_of_nodes_down()
        if int(num_segments_down) == 0:
           return True
        else:
           return False

    def stop_start_validate(self, stopValidate=True):
        """
        Do gpstop -i, gpstart and see if all segments come back up fine 
        """        

        tinctest.logger.info("Performing stop start validate")
        tinctest.logger.info("Shutting down the cluster")
        ok = self.gpstop.run_gpstop_cmd(immediate = 'i', validate=stopValidate)
        if not ok and stopValidate:
           raise Exception('Problem while shutting down the cluster')
        tinctest.logger.info("Successfully shutdown the cluster.")

        tinctest.logger.info("Restarting the cluster.")
        ok = self.gpstart.run_gpstart_cmd()
        if not ok:
            raise Exception('Failed to bring the cluster back up')
        tinctest.logger.info("Successfully restarted the cluster.")
        if not self.anydownsegments():
           raise Exception("segments were marked down")
        else:
           return (True, "All segments are up")


    def method_reset_fault_injection(self):
        """
        Resets fault injection
        Return: (True, [result]) if OK, or (False, [result]) otherwise
        """        

        tinctest.logger.info("Resetting fault injection")
        
        (ok1,out1) = self.util.inject_fault(f='filerep_resync', m = 'async', y = 'reset', r = 'primary', H ='ALL')
        if not ok1:
            raise Exception("Fault injection failed")   
        tinctest.logger.info("Done Injecting Fault  to reset resync")

        return (True, str(out1))


    def method_resume_filerep_resync(self):
        """
        Resumes the process of resync
        """

        tinctest.logger.info("Resuming Resync")
        (ok, out) = self.util.inject_fault(f='filerep_resync', m='async',y='resume', r='primary', H='ALL')
        if not ok:
            raise Exception("Fault injection failed")   
        tinctest.logger.info("Done resuming resync")
        return (ok, out)

    def run_method_suspendresync(self):
        """
        Stops the cluster from going to resync
        """

        tinctest.logger.info("Suspending resync")
        (ok,out) = self.util.inject_fault(f='filerep_resync', m='async' , y='suspend', r ='primary', H='ALL')
        tinctest.logger.info('output from suspend resync %s'%out)
        if not ok:
            raise Exception("Fault injection failed")   
        tinctest.logger.info("Done Injecting Fault to suspend resync")
        return (ok, out)
      

    def count_of_masters(self):
        """
        Gives count of number of nodes in the cluster that are master 
        Return: count of number of nodes in the cluster that are master
        """

        tinctest.logger.info("Count the number of masters")
        cmd = "select count(*) from gp_segment_configuration where content = -1"
        (out) = PSQL.run_sql_command(cmd)
        num_master = out.split('\n')[3].strip()
        return num_master 


    def count_of_nodes(self):
        """
        Gives count of number of nodes in the cluster
        Return: count of number of nodes in the cluster
        """

        tinctest.logger.info("Counting number of nodes")
        cmd = "select count(*) from gp_segment_configuration"
        (num_cl) = PSQL.run_sql_command(cmd)
        total_num_rows = num_cl.split('\n')[3].strip()
        return total_num_rows


    def count_of_nodes_in_ct(self):
        """
        Gives count of number of nodes in change tracking
        Return: count of number of nodes in change tracking
        """

        tinctest.logger.info("Counting number of nodes in ct")
        sqlcmd = "select count(*) from gp_segment_configuration where mode = 'c'"
        (num_cl) = PSQL.run_sql_command(sqlcmd)
        num_cl = num_cl.split('\n')[3].strip()
        return num_cl


    def count_of_nodes_down(self):
        """
        Gives count of number of nodes marked as down
        Return: count of number of nodes marked as down
        """

        tinctest.logger.info("Counting the number of nodes down")
        sqlcmd = "select count(*) from gp_segment_configuration where status = 'd'"
        (num_down) = PSQL.run_sql_command(sqlcmd)
        num_down = num_down.split('\n')[3].strip()
        return num_down    


    def count_of_nodes_sync(self):
        """
        Gives count of number of nodes in sync
        Return: count of number of nodes in sync
        """

        tinctest.logger.info("Counting the number of nodes in sync")        
        sqlcmd = "select count(*) from gp_segment_configuration where mode = 's'"
        (num_sync) = PSQL.run_sql_command(sqlcmd)
        num_sync = num_sync.split('\n')[3].strip()
        return num_sync


    def count_of_nodes_not_sync(self):
        """
        Gives count of number of nodes not in sync
        Return: count of number of nodes not in sync
        """

        tinctest.logger.info("Counting number of nodes not in sync")
        sqlcmd = "select count(*) from gp_segment_configuration where mode <> 's'"
        (num_sync) = PSQL.run_sql_command(sqlcmd)
        num_sync = num_sync.split('\n')[3].strip()
        return num_sync

    def inject_fault_on_first_primary(self):
        """
	@product_version gpdb:[4.3.3.0-], gpdb:[4.2.8.1-4.2]
        """
        tinctest.logger.info("\n Injecting faults on first primary")
        (ok,out) = self.util.inject_fault(f='filerep_immediate_shutdown_request', m='async' , y='infinite_loop', r ='primary', seg_id=2, sleeptime=300)
        if not ok:
            raise Exception("Fault filerep_immediate_shutdown_request injection failed")   

        (ok,out) = self.util.inject_fault(f='fileRep_is_operation_completed', m='async' , y='infinite_loop', r ='primary', seg_id=2)
        if not ok:
            raise Exception("Fault fileRep_is_operation_completed injection failed")   
        tinctest.logger.info("\n Done Injecting Fault")


    def inject_fault_on_first_mirror(self):
        """
	@product_version gpdb:[4.3.3.0-], gpdb:[4.2.8.1-4.2]
        """
        sqlcmd = "select dbid from gp_segment_configuration where content=0 and role='m'"
        (first_mirror_dbid) = PSQL.run_sql_command(sqlcmd)
        first_mirror_dbid = first_mirror_dbid.split('\n')[3].strip()

        tinctest.logger.info("\n Injecting faults on first mirror")
        flag = self.util.check_fault_status(fault_name='fileRep_is_operation_completed', status='triggered', max_cycle=100);
        if not flag:
            raise Exception("Fault fileRep_is_operation_completed didn't trigger")   
 
        (ok,out) = self.util.inject_fault(f='filerep_consumer', m='async' , y='panic', r ='mirror', seg_id=first_mirror_dbid)
        if not ok:
            raise Exception("Fault filerep_consumer injection failed")   
        tinctest.logger.info("\n Done Injecting Fault")

    def setupGpfdist(self, port, path):
        gpfdist = Gpfdist(port , self.hostIP())
        gpfdist.killGpfdist()
        gpfdist.startGpfdist(' -t 30 -m 1048576 -d '+path)
        return True

    def cleanupGpfdist(self, port,path):
        gpfdist = Gpfdist(port , self.hostIP())
        gpfdist.killGpfdist()
        return True

    def hostIP(self):
        ok = run_shell_command('which gpfdist')
        if not ok:
            raise GPtestError("Error:'which gpfdist' command failed.")
        hostname = socket.gethostname()
        if hostname.find('mdw') > 0 :
            host = 'mdw'
        else:
            host = str(socket.gethostbyname(socket.gethostname())) #Must be an IP
        tinctest.logger.info('current host is %s'%host)
        return host

    def method_setup(self):
        tinctest.logger.info("Performing setup tasks")
        gpfs=Gpfilespace()
        gpfs.create_filespace('filerep_fs_a')
        gpfs.create_filespace('filerep_fs_b')
        gpfs.create_filespace('filerep_fs_c')
        gpfs.create_filespace('filerep_fs_z')
        gpfs.create_filespace('sync1_fs_1') 
 
        # Set max_resource_queues to 100 
        cmd = 'gpconfig -c max_resource_queues -v 100 '
        ok = run_shell_command(cmd)
        if not ok:
            raise Exception('Failure during setting the max_resource_queues value to 100 using gpconfig tool')
        #Restart the cluster
        self.gpstop.run_gpstop_cmd(immediate = 'i')
        ok = self.gpstart.run_gpstart_cmd()
        if not ok:
            raise Exception('Failure during restarting the cluster')
        return True


    def get_ext_table_query_from_gpstate(self):
        outfile = local_path("gpstate_tmp")
        ok = run_shell_command("gpstate --printSampleExternalTableSql >"+ outfile)
        querystring = ""
        flag = 'false'
        out = open(outfile, 'r').readlines()
        for line in out:
            line.strip()
            if (line.find('DROP EXTERNAL TABLE IF EXISTS gpstate_segment_status')>=0):
                flag = 'true'
            if flag == 'true':
                querystring = querystring + line
        return querystring ############RUN QYUERY

    def check_gpstate(self, type, phase):
        """ 
        Perform gpstate for each different transition state
        @type: failover type
        @phase: transition stage, can be sync1, ck_sync1, ct, resync, sync2
        """       

        if phase == 'sync1':
            state_num = self.query_select_count("select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Synchronized' and status_in_config='Up' and instance_status='Up'")
            sync1_num = self.query_select_count("select count(*) from gp_segment_configuration where content <> -1")
            if int(sync1_num) <> int(state_num):
                raise Exception("gpstate in Sync state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " %(phase))

        elif phase == 'ct':
            p_num = self.query_select_count("select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Change Tracking'  and role = 'Primary' and status_in_config='Up' and instance_status='Up'")
            m_num = self.query_select_count("select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Out of Sync'  and role = 'Mirror' and status_in_config='Down' and instance_status='Down in configuration' ")

            if int(p_num) <> int(m_num):
                raise Exception("gpstate in CT state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " %(phase))

        elif phase == 'resync_incr':
            
            if type == 'primary':
                query = "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Resynchronizing' and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Incremental'"
                resync_incr_num = self.query_select_count(query)
            else:
                query = "select count(*) from gpstate_segment_status where mirror_status ='Resynchronizing' and  status_in_config='Up' and instance_status='Up' and resync_mode= 'Incremental'"
                resync_incr_num = self.query_select_count(query)
            
            query_num_rows = "select count(*) from gp_segment_configuration where content <> -1"
            num_rows = self.query_select_count(query_num_rows)
            
            if int(resync_incr_num) <> int(num_rows):
                tinctest.logger.info("resync_incr_num query run %s" % query)
                tinctest.logger.info("num_rows query run %s" % query_num_rows)
                raise Exception("gpstate in Resync Incremental  state failed. resync_incr_num %s <> num_rows %s" % (resync_incr_num, num_rows))
            tinctest.logger.info("Done Running gpstate in %s phase " %(phase))

        elif phase == 'resync_full':
            num_rows = self.query_select_count("select count(*) from gp_segment_configuration where content <> -1")
          
            if type == 'primary':
                resync_full_num = self.query_select_count("select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Resynchronizing'  and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Full'")
            else:
                resync_full_num = self.query_select_count("select count(*) from gpstate_segment_status where mirror_status ='Resynchronizing'  and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Full'")

            if int(resync_full_num) <> int(num_rows):
                raise Exception("gptate in Resync Full state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " %(phase))
        
        return True
    
    def trigger_transition(self):
        PSQL.run_sql_file(local_path('mirrors.sql'))
        

    def run_gpstate(self, type, phase):            
        """
        Perform gpstate for each different transition state
        @type: failover type
        @phase: transition stage, can be sync1, ck_sync1, ct, resync, sync2
        """

        tinctest.logger.info("running gpstate")
        querystring = self.get_ext_table_query_from_gpstate()
        file1 = local_path('create_table_gpstate.sql')
        f1 = open(file1,'w')
        f1.write(querystring)
        f1.write('\n')
        f1.close()
        PSQL.run_sql_file(local_path('create_table_gpstate.sql'))

        gpstate_outfile = local_path('gpstate_out')
        cmd = 'gpstate -s -a > %s 2>&1' % (gpstate_outfile)

        ok  = run_shell_command(cmd)
        self.check_gpstate(type, phase)
        return ok


    def check_mirror_seg(self, master=False):
        tinctest.logger.info("running check mirror")
        self.dbstate.check_mirrorintegrity()

    def do_gpcheckcat(self, dbname=None, alldb=False, online=False, outputFile='checkcat.out', outdir=None):
        tinctest.logger.info("running gpcheckcat")
        self.dbstate.check_catalog(outputFile=outputFile)

    def query_select_count(self,sqlcmd):
        (num) = PSQL.run_sql_command(sqlcmd)
        num = num.split('\n')[3].strip()
        return num
    
    def method_run_failover(self,type):
        """
        Inject fault to failover nodes
        @type: primary [induces fault in mirror] mirror [creates panic in primary]   
        Return: (True, [result of fault injection]) if OK, or (False, [result of fault injection]) otherwise
        """

        if type == 'primary':
            tinctest.logger.info("\n primary failover")
            (ok,out) = self.util.inject_fault(f='filerep_consumer', m='async' , y='fault', r ='mirror', H='ALL')
            tinctest.logger.info("\n Done Injecting Fault")

        elif type == 'mirror':
            tinctest.logger.info("\n Mirror failover")
            (ok,out) = self.util.inject_fault(f='postmaster', m='async' , y='panic', r ='primary', H='ALL')
            tinctest.logger.info("\n Done Injecting Fault")
        return True

    def wait_till_change_tracking_transition(self):
        self.util.wait_till_change_tracking_transition()

    def wait_till_insync_transition(self):
        self.gpr.wait_till_insync_transition()
   
    def run_gprecoverseg(self,recover_mode):
        if recover_mode == 'full':
            self.gpr.full()
        else:
            self.gpr.incremental()

    def run_gpconfig(self, parameter, master_value, segment_value):
        if (parameter is not None):
            self.gpconfig.setParameter(parameter, master_value, segment_value)
            self.gpstop.run_gpstop_cmd(restart='r')

    def inject_fault(self, fault = None, mode = None, operation = None, prim_mirr = None, host = 'All', table = None, database = None, seg_id = None, sleeptime = None, occurence = None):
        if (fault == None or mode == None or operation == None or prim_mirr == None):
            raise Exception('Incorrect parameters provided for inject fault')

        (ok,out) = self.util.inject_fault(f=fault, m=mode , y=operation, r=prim_mirr, H='ALL', table=table, database=database, sleeptime=sleeptime, o=occurence, seg_id=seg_id)
Пример #38
0
class PersistentTables(ScenarioTestCase):

    """
    
    @description Test Cases for Persistent Table testing QA-2417 - Crash RecoveryTest
    @created 2013-03-29 10:10:10
    @modified 2013-05-24 17:10:15
    @tags persistent tables schedule_persistent_tables 
    @product_version gpdb:
    """

    @classmethod
    def setUpClass(cls):
        super(PersistentTables,cls).setUpClass()
        tinctest.logger.info('Setup Database ...')
        setdb = Fault()
        setdb.create_db()

        tinctest.logger.info('Running the initial setup sql files')
        setup = InitialSetup()
        setup.createSQLFiles()
        setup.runSQLFiles()
        tinctest.logger.info('Generating the load - sql files to be run concurrently')
        sqldatagen = GenerateSqls()
        sqldatagen.generate_sqls()

    # Replacing the setUp method with the following one, as setUp method is called twice redundantly
    def setUp(self):
        ''' Need to rebalance cluster as primary segments are killed during test'''
        super(PersistentTables,self).setUp()
        tinctest.logger.info('***Rebalancing cluster state***')
        fault_recovery = Fault()
        if not fault_recovery.rebalance_cluster():
            raise ClusterStateException("**FATAL!! Cluster rebalancing failed - segments went down after \
                                       gprecoverseg -ar, even incremental recovery couldn't bring the segments up. \
                                       Cannot proceed with the tests!! ")
    def wait_till_insync_transition(self):
        self.gpr = GpRecover()
        self.gpr.wait_till_insync_transition()


    def test_drop_db_when_rel_file_moved_out(self):
        ''' Test drop_db by moving relfile to other location '''
        #1. Create objects and load data
        #Done in setUpClass

        #2. Do fault test
        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.storage.persistent_tables.fault.fault.DropDBTest.test_drop_db')
        self.test_case_scenario.append(test_case_list1)

        #Check the Sate of DB and Cluster
        test_case_list2 = []
        test_case_list2.append("mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_catalog")
        self.test_case_scenario.append(test_case_list2)
        
        test_case_list3 = []
        test_case_list3.append("mpp.gpdb.tests.storage.persistent_tables.test_PT_DropDB.PersistentTables.wait_till_insync_transition")
        self.test_case_scenario.append(test_case_list3)
        
        test_case_list4 = []
        test_case_list4.append("mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity")
        self.test_case_scenario.append(test_case_list4)
Пример #39
0
class PersistentTables(ScenarioTestCase):
    """
    
    @description Test Cases for Persistent Table testing QA-2417 - Crash RecoveryTest
    @created 2013-03-29 10:10:10
    @modified 2013-05-24 17:10:15
    @tags persistent tables schedule_persistent_tables 
    @product_version gpdb:
    """
    def __init__(self, methodName):
        super(PersistentTables, self).__init__(methodName)

    @classmethod
    def setUpClass(cls):
        super(PersistentTables, cls).setUpClass()
        tinctest.logger.info('Setup Database ...')
        setdb = Fault()
        setdb.create_db()

        tinctest.logger.info('Running the initial setup sql files')
        setup = InitialSetup()
        setup.createSQLFiles()
        setup.runSQLFiles()
        tinctest.logger.info('Running the initial setup sql files - Done')
        tinctest.logger.info(
            'Generating the load - sql files to be run concurrently')
        sqldatagen = GenerateSqls()
        sqldatagen.generate_sqls()
        tinctest.logger.info(
            'Generating the load - sql files to be run concurrently - Done')
        tinctest.logger.info(
            'Generating the sql files required for Partition table scenario')
        sql_generation = GeneratePartitionSqls()
        sql_generation.generate_sqls()
        tinctest.logger.info('Partition SQL files created.')

    # Replacing the setUp method with the following one, as setUp method is called twice redundantly
    def setUp(self):
        ''' Need to rebalance cluster as primary segments are killed during test'''
        super(PersistentTables, self).setUp()
        tinctest.logger.info('***Rebalancing cluster state***')
        fault_recovery = Fault()
        if not fault_recovery.rebalance_cluster():
            raise ClusterStateException(
                "**FATAL!! Cluster rebalancing failed - segments went down after \
                                       gprecoverseg -ar, even incremental recovery couldn't bring the segments up. \
                                       Cannot proceed with the tests!! ")

    def wait_till_insync_transition(self):
        self.gpr = GpRecover()
        self.gpr.wait_till_insync_transition()

    def kill_primaries_while_in_transaction(self, skip_checkpoint=False):
        # Run skip checkpoint if needed
        if skip_checkpoint:
            test_case_list1 = []
            test_case_list1.append(
                'mpp.gpdb.tests.storage.persistent_tables.fault.fault.CheckpointTest.test_skip_checkpoint'
            )
            self.test_case_scenario.append(test_case_list1)
        ''' Following scenarios are run concurrently'''
        test_case_list2 = []
        # Start the Failover to mirror scenario
        test_case_list2.append(
            'mpp.gpdb.tests.storage.persistent_tables.fault.fault.FaultInjectionTests.test_failover_to_mirror_during_transactions'
        )
        # Start the load generation
        test_case_list2.append(
            'mpp.gpdb.tests.storage.persistent_tables.sqls.run_sqls_Concurrently.SQLLoadTest'
        )
        self.test_case_scenario.append(test_case_list2)

        # Recover the segments if down
        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.storage.persistent_tables.fault.fault.RecoveryTest.test_recovery'
        )
        self.test_case_scenario.append(test_case_list3)

        #Check the Sate of DB and Cluster
        test_case_list4 = []
        test_case_list4.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_catalog")
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append(
            "mpp.gpdb.tests.storage.persistent_tables.test_PT_CrashRecovery.PersistentTables.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list6)

    def postmaster_reset_while_in_transaction(self,
                                              in_sync=True,
                                              kill_all_segment_processes=True):
        if not in_sync:
            test_case_list = []
            test_case_list.append(
                'mpp.gpdb.tests.storage.persistent_tables.fault.fault.FaultInjectionTests.test_put_system_in_change_tracking_mode'
            )
            self.test_case_scenario.append(test_case_list)
        ''' Following scenarios are run concurrently'''
        test_case_list1 = []
        if kill_all_segment_processes:
            test_case_list1.append(
                'mpp.gpdb.tests.storage.persistent_tables.fault.fault.FaultInjectionTests.test_postmaster_reset_during_transaction_kill_all_segment_processes'
            )
        else:
            test_case_list1.append(
                'mpp.gpdb.tests.storage.persistent_tables.fault.fault.FaultInjectionTests.test_post_master_reset_during_transaction_kill_few_segment_processes'
            )
        # Start the load generation
        test_case_list1.append(
            'mpp.gpdb.tests.storage.persistent_tables.sqls.run_sqls_Concurrently.SQLLoadTest'
        )
        self.test_case_scenario.append(test_case_list1)

        # Recover the segments if down
        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.storage.persistent_tables.fault.fault.RecoveryTest.test_recovery'
        )
        self.test_case_scenario.append(test_case_list3)

        #Check the Sate of DB and Cluster
        test_case_list4 = []
        test_case_list4.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_catalog")
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append(
            "mpp.gpdb.tests.storage.persistent_tables.test_PT_CrashRecovery.PersistentTables.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list6)

    def test_partition_table_scenario(self):
        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.storage.persistent_tables.sqls.partition_tables.partitionTables.PartitionTableQueries'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.storage.persistent_tables.fault.fault.PartitionTableScenario'
        )
        self.test_case_scenario.append(test_case_list2)

        # Recover the segments if down
        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.storage.persistent_tables.fault.fault.RecoveryTest.test_recovery'
        )
        self.test_case_scenario.append(test_case_list3)

        #Check the Sate of DB and Cluster
        test_case_list4 = []
        test_case_list4.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_catalog")
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append(
            "mpp.gpdb.tests.storage.persistent_tables.test_PT_CrashRecovery.PersistentTables.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list6)

    def periodic_failover(self, skipckt='yes'):
        '''
          Test to simulate system faults induction over periodic time.
        '''
        #skip checkpoint,
        if skipckt == 'yes':
            test_case_list0 = []
            test_case_list0.append(
                'mpp.gpdb.tests.storage.persistent_tables.fault.fault.CheckpointTest.test_skip_checkpoint'
            )
            self.test_case_scenario.append(test_case_list0)

        #start load on database
        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.storage.persistent_tables.fault.fault.FaultInjectionTests.test_kill_segments_periodically'
        )
        test_case_list1.append(
            'mpp.gpdb.tests.storage.persistent_tables.sqls.run_sqls_Concurrently.SQLLoadTest'
        )
        self.test_case_scenario.append(test_case_list1)

        #recover segments
        test_case_list2 = []
        if skipckt == 'yes':
            # we are using full recovery for the test "test_periodic_failover_with_skip_checkpoint"
            # since we hit "inconsistent lsn issue" (Ref. MPP-23631)
            test_case_list2.append(
                'mpp.gpdb.tests.storage.persistent_tables.fault.fault.RecoveryTest.test_recovery_full'
            )
        else:
            test_case_list2.append(
                'mpp.gpdb.tests.storage.persistent_tables.fault.fault.RecoveryTest.test_recovery'
            )
        self.test_case_scenario.append(test_case_list2)

        #Check the Sate of DB and Cluster
        test_case_list3 = []
        test_case_list3.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_catalog")
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            "mpp.gpdb.tests.storage.persistent_tables.test_PT_CrashRecovery.PersistentTables.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list5)

    def crash_system_immediately_after_checkpoint(self, mode='InSync'):
        '''
           Test to simulate system Crash Immediately After Checkpoint
        '''
        #skip checkpoint,
        test_case_list0 = []
        test_case_list0.append(
            'mpp.gpdb.tests.storage.persistent_tables.fault.fault.CheckpointTest.test_skip_checkpoint'
        )
        self.test_case_scenario.append(test_case_list0)

        #start load on database
        test_case_list1 = []
        if mode == 'InSync':
            testMethod = 'mpp.gpdb.tests.storage.persistent_tables.fault.fault.FaultInjectionTests.test_crash_after_checkpoint_insync'
        else:
            testMethod = 'mpp.gpdb.tests.storage.persistent_tables.fault.fault.FaultInjectionTests.test_crash_after_checkpoint_ct'
        test_case_list1.append(testMethod)
        test_case_list1.append(
            'mpp.gpdb.tests.storage.persistent_tables.sqls.run_sqls_Concurrently.SQLLoadTest'
        )
        self.test_case_scenario.append(test_case_list1)

        if mode == 'CT':
            #recover segments
            test_case_list2 = []
            test_case_list2.append(
                'mpp.gpdb.tests.storage.persistent_tables.fault.fault.RecoveryTest.test_recovery'
            )
            self.test_case_scenario.append(test_case_list2)

        #Check the Sate of DB and Cluster
        test_case_list3 = []
        test_case_list3.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_catalog")
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            "mpp.gpdb.tests.storage.persistent_tables.test_PT_CrashRecovery.PersistentTables.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list5)

    def test_kill_primaries_while_in_transaction_with_checkpoint(self):
        """
        @fail_fast False
        """
        self.kill_primaries_while_in_transaction(False)

    def test_kill_primaries_while_in_transaction_with_skip_checkpoint(self):
        """
        @fail_fast False
        """
        ''' Invokes kill_primary scenario with skip checkpoint'''
        self.kill_primaries_while_in_transaction(True)

    def test_postmaster_reset_while_in_transaction_insync_kill_all(self):
        """
        @fail_fast False
        """
        ''' Sync Mode - Kill segment processes of ongoing transactions from all segments '''
        self.postmaster_reset_while_in_transaction(
            in_sync=True, kill_all_segment_processes=True)

    def test_postmaster_reset_while_in_transaction_insync_kill_few(self):
        """
        @fail_fast False
        """
        ''' Sync Mode - Kill segment processes of ongoing transactions from few segments '''
        self.postmaster_reset_while_in_transaction(
            in_sync=True, kill_all_segment_processes=False)

    def test_postmaster_reset_while_in_transaction_out_of_sync_kill_all(self):
        """
        @fail_fast False
        """
        ''' Out of Sync Mode - Kill segment processes of ongoing transactions from all segments '''
        self.postmaster_reset_while_in_transaction(
            in_sync=False, kill_all_segment_processes=True)

    def test_postmaster_reset_while_in_transaction_out_of_sync_kill_few(self):
        """
        @fail_fast False
        """
        ''' Out of Sync Mode - Kill segment processes of ongoing transactions from few segments '''
        self.postmaster_reset_while_in_transaction(
            in_sync=False, kill_all_segment_processes=False)

    def test_periodic_failover_with_checkpoint(self):
        """
        @fail_fast False
        """
        ''' Test by skip checkpoint '''
        self.periodic_failover('no')

    def test_periodic_failover_with_skip_checkpoint(self):
        """
        @fail_fast False
        """
        ''' Test by skip checkpoint '''
        self.periodic_failover('yes')

    def test_crash_system_immediately_after_checkpoint_insync(self):
        """
        @fail_fast False
        """
        ''' Test to simulate system Crash Immediately After Checkpoint in Sync state '''
        self.crash_system_immediately_after_checkpoint('InSync')

    def test_crash_system_immediately_after_checkpoint_in_ct(self):
        """
        @fail_fast False
        """
        ''' Test to simulate system Crash Immediately After Checkpoint in CT state '''
        self.crash_system_immediately_after_checkpoint('CT')
Пример #40
0
    def test_with_standby_and_filespace(self):
        """
        check that cluster's host address is same when it is with standby and without standby
        """
        if not self.config.is_multinode():
            self.skipTest('skipping test since the cluster is not multinode')

        gprecover = GpRecover()

        self._setup_gpaddmirrors()
        # adding mirrors first
        self._setup_gpaddmirrors()
        self._generate_gpinit_config_files()
        self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)

        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command("gpaddmirrors -a -i %s -s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with mirror spreading', res)
        self.assertEqual(0, res['rc'])
        gprecover.wait_till_insync_transition()
        
        get_mirror_address = 'SELECT content, address FROM gp_segment_configuration WHERE preferred_role = \'m\';'
        rows = self.format_sql_result(get_mirror_address)
        # create a dictionary for mirror and its host address
        mirror_hosts_wo_stdby = {}
        for row in rows:
            content = row[0]
            address = row[1]
            mirror_hosts_wo_stdby[content] = address

        # delete and reinitialize cluster again
        self._do_gpdeletesystem()
        self._do_gpinitsystem()
        gprecover.wait_till_insync_transition()
        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        # create filespace and standby, needs to get a new config_info instance for new cluster
        config_info = GPDBConfig()
        if not config_info.has_master_mirror():
            self._do_gpinitstandby()
        self._create_filespace('user_filespace')

        self._setup_gpaddmirrors()
        self._generate_gpinit_config_files()
        self._cleanup_segment_data_dir(self.host_file, self.mirror_data_dir)
        for fs_location in self.fs_location:
            self._cleanup_segment_data_dir(self.host_file, fs_location)        

        # add mirror for the new cluster which has standby and user filespace configured        
        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command("gpaddmirrors -a -i %s -s -d %s --verbose" % (self.mirror_config_file, self.mdd), 'run gpaddmirrros with mirror spreading', res)
        self.assertEqual(0, res['rc'])
        gprecover.wait_till_insync_transition()
        # verify that when there is filespace configured, the configuration will be same as mirror_config_file specified
        self.verify_config_file_with_gp_config()
        self.check_mirror_seg()

        rows = self.format_sql_result(get_mirror_address)
        mirror_hosts_with_stdby = {}
        for row in rows:
            content = row[0]
            address = row[1]
            mirror_hosts_with_stdby[content] = address
        for key in mirror_hosts_wo_stdby:
            self.assertEqual(mirror_hosts_wo_stdby[key], mirror_hosts_with_stdby[key])

        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command("gpinitstandby -ar", 'remove standby', res)
        if res['rc'] > 0:
           raise GPAddmirrorsTestCaseException("Failed to remove the standby")
        self._drop_filespace()
Пример #41
0
class FilerepResync(ScenarioTestCase):
    """
    
    @description test cases for MPP-11167
    @created 2013-03-15 10:10:10
    @modified 2013-05-07 17:10:15
    @tags persistent tables schedule_filerep
    @product_version gpdb:
    """

    @classmethod
    def setUpClass(cls):
        super(FilerepResync,cls).setUpClass()
        tinctest.logger.info('Setting up the filerep resync test.')

    def wait_till_insync_transition(self):
        self.gpr = GpRecover()
        self.gpr.wait_till_insync_transition()
        
    def test_filerep_resysnc(self):
        
        #Step 1: Create an append-only table
        test_case_list1 = []
        test_case_list1.append("mpp.gpdb.tests.storage.filerep.Filerep_Resync.schema.SchemaTest.AOTable")
        self.test_case_scenario.append(test_case_list1)
        
        #Step 2:1 Begin a transaction & insert values into created table
        test_case_list2 = []
        test_case_list2.append("mpp.gpdb.tests.storage.filerep.Filerep_Resync.runsql.TransactionTest.Transaction")
        #Step 2:2 Start a concurrent process to kill all the mirror processes.
        #            It should start only after the begin & insert are performed    
        test_case_list2.append("mpp.gpdb.tests.storage.filerep.Filerep_Resync.fault.FaultTest.ProcessKill")
        self.test_case_scenario.append(test_case_list2)
        
        #Step 3: Check the persistent table for duplicate entries
        test_case_list3 = []
        test_case_list3.append("mpp.gpdb.tests.storage.filerep.Filerep_Resync.schema.SchemaTest.DuplicateEntries.test_duplicate_entries_after_hitting_fault")
        self.test_case_scenario.append(test_case_list3)
        
        #Step 4: Perform incremental recovery 
        test_case_list4 = []
        test_case_list4.append("mpp.gpdb.tests.storage.filerep.Filerep_Resync.fault.FaultTest.Recovery")
        self.test_case_scenario.append(test_case_list4)
        
        #Step 5: Check if the mirror segments are up or not
        test_case_list5 = []
        test_case_list5.append("mpp.gpdb.tests.storage.filerep.Filerep_Resync.fault.FaultTest.Health")
        self.test_case_scenario.append(test_case_list5)
        
        #Step 6: Re-check the persistent table for duplicate entries
        test_case_list6 = []
        test_case_list6.append("mpp.gpdb.tests.storage.filerep.Filerep_Resync.schema.SchemaTest.DuplicateEntries.test_duplicate_entries_after_recovery")
        self.test_case_scenario.append(test_case_list6)
        
        #Step 7: Check the Sate of DB and Cluster
        test_case_list7 = []
        test_case_list7.append("mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_catalog")
        self.test_case_scenario.append(test_case_list7)
        
        test_case_list8 = []
        test_case_list8.append("mpp.gpdb.tests.storage.filerep.Filerep_Resync.test_filerep_resync.FilerepResync.wait_till_insync_transition")
        self.test_case_scenario.append(test_case_list8)
        
        test_case_list9 = []
        test_case_list9.append("mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity")
        self.test_case_scenario.append(test_case_list9)
Пример #42
0
 def wait_till_insync_transition(self):
     self.gpr = GpRecover()
     self.gpr.wait_till_insync_transition()
Пример #43
0
class GprecoversegTest(ScenarioTestCase):
    """
    
    @description This test-suite contains the automation for 'gprecoverseg' tests
    @created 2009-01-27 14:00:00
    @modified 2013-09-12 17:10:15
    @tags storage schema_topology 
    @product_version gpdb:4.2.x,gpdb:main
    """

    def __init__(self, methodName, should_fail=False):
        super(GprecoversegTest, self).__init__(methodName)

    def get_version(self):
        cmdStr = 'gpssh --version'
        cmd = Command('get product version', cmdStr=cmdStr)
        cmd.run(validateAfter=True)
        return cmd.get_results().stdout.strip().split()[2]

    def recover_segments(self,option,max_rtrycnt):
        """
        @summary: Recovers the segments and returns the status of recovery process.
        
        @param option: represents different gprecoverseg command options
        @param max_rtrycnt: the max no. of times state of cluster should be checked
        @return: Boolean value representing the status of recovery process
        """
        
        config = GPDBConfig()
        recoverseg = GpRecoverseg()
        tinctest.logger.info("Running gprecoverseg with '%s' option..."%option)
        recoverseg.run(option)
        rtrycnt = 0
        while ((config.is_not_insync_segments()) == False and rtrycnt <= max_rtrycnt):
            tinctest.logger.info("Waiting [%s] for DB to recover" %rtrycnt)
            sleep(10)
            rtrycnt = rtrycnt + 1
        if rtrycnt > max_rtrycnt:
            return False
        else:
            return True
    def wait_till_insync_transition(self):
        self.gpr = GpRecover()
        self.gpr.wait_till_insync_transition()
            
    def check_segment_roles(self):
        """
        @summary: Checks if the segments are in preferred roles or not.
                    If not, rebalances the cluster.
        
        @return: None
        """
        
        newfault = Fault()
        # If the segments are not in preferred roles, go for rebalancing the cluster
        if newfault.check_if_not_in_preferred_role():
            tinctest.logger.warn("***** Segments not in their preferred roles : rebalancing the segments...")
            # If rebalancing downs the segments, go for incremental recovery - this is observed sometimes
            if not self.recover_segments('-r',10):
                tinctest.logger.warn("***** Segments down after rebalance : Tests cannot proceed further!!")
            # If rebalancing passes proceed for tests
            else:
                tinctest.logger.info("***** Segments successfully rebalanced : Proceeding with the tests")
        # If segments in preferred roles, proceed for the tests
        else:
            tinctest.logger.info("***** Segments in preferred roles : Proceeding with the tests")

    def check_cluster_health(self, doFullRecovery = False):
        """
        @summary: Checks for the cluster health, tries to recover and rebalance the cluster, 
                    fails the test if not able to do so 
        
        @param doFullRecovery: Boolean value which decides whether to go for full 
                                recovery or not
        @return: None
        """
        
        tinctest.logger.info("***** Checking the cluster health before starting tests")
        config = GPDBConfig()
        # If the segments are not up, go for recovery
        if not config.is_not_insync_segments():
            tinctest.logger.info("***** Starting the recovery process")
            # if incremental didn't work, go for full recovery
            if not self.recover_segments(' ',10):
                tinctest.logger.warn("***** Segments not recovered after incremental recovery")
                if doFullRecovery:                    
                    # if full also fails, the tests cannot proceed, so fail it
                    if not self.recover_segments('-F',20):
                        tinctest.logger.error("***** Segments not recovered even after full recovery - Tests cannot proceed further!!")
                        self.fail("Segments are down - Tests cannot proceed further!!")
                    # if full recovery passes, check for rebalancing the cluster
                    else:
                        tinctest.logger.info("***** Segments up after full recovery : validating their roles...")
                        self.check_segment_roles()
                else:
                    self.fail("Segments are down - Tests cannot proceed!!")
            # if incremental recovery passes, check for rebalancing the cluster
            else:
                tinctest.logger.info("***** Segments up after incremental recovery : validating their roles...")
                self.check_segment_roles()
        # If the segments are up, check for rebalancing the cluster
        else:
            tinctest.logger.info("***** Segments are up : validating their roles...")
            self.check_segment_roles()

    def setUp(self):
        super(GprecoversegTest,self).setUp()
        self.check_cluster_health()


    def test_01_run_schema_topology(self):
        """
        [feature]: Test to run the schema_topology test-suite
        
        @fail_fast False
        """
        tinctest.logger.info('***Running the schema topology test-suite before starting the tests...')
        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.catalog.schema_topology.test_ST_AllSQLsTest.AllSQLsTest')
        self.test_case_scenario.append(test_case_list1)
        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.catalog.schema_topology.test_ST_OSSpecificSQLsTest.OSSpecificSQLsTest')
        self.test_case_scenario.append(test_case_list2)
        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.catalog.schema_topology.test_ST_GPFilespaceTablespaceTest.GPFilespaceTablespaceTest')
        self.test_case_scenario.append(test_case_list3)
        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.catalog.schema_topology.test_ST_EnhancedTableFunctionTest.EnhancedTableFunctionTest')
        self.test_case_scenario.append(test_case_list4)


    def test_gprecoverseg_config_validation(self):
        """
        [feature]: Test to check if gprecoverseg sets the config to invalid state or not
        
        """
        
        # Create a fault
        test_case_list1 = []
        #fault
        test_case_list1.append("mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_primary")
        self.test_case_scenario.append(test_case_list1)
        
        # Run the invalid state recovery process
        test_case_list2 = []
        test_case_list2.append("mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_invalid_state_recoverseg")
        test_case_list2.append("mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.SegmentConfigurations")
        self.test_case_scenario.append(test_case_list2)
        
        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.gprestartdb')
        self.test_case_scenario.append(test_case_list3)
    
        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.check_if_not_in_preferred_role')
        self.test_case_scenario.append(test_case_list4)
        
        # Check the Sate of DB and Cluster
        test_case_list5 = []
        test_case_list5.append("mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition")
        self.test_case_scenario.append(test_case_list5)
        
        test_case_list6 = []
        test_case_list6.append("mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity")
        self.test_case_scenario.append(test_case_list6)


    def test_gprecoverseg_incr_newDir(self):
        """
        [feature]: Incremental gprecoverseg to new location: Same host, with filespaces
        
        """
        
        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_primary')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_recovery_with_new_loc')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.gprestartdb')
        self.test_case_scenario.append(test_case_list3)
    
        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.check_if_not_in_preferred_role')
        self.test_case_scenario.append(test_case_list4)
        
        # Check the Sate of DB and Cluster
        test_case_list5 = []
        test_case_list5.append("mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition")
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append("mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity")
        self.test_case_scenario.append(test_case_list6)


    def test_failover_to_mirror(self):
        """
        [feature]:  System failover to mirror and do incremental recovery
        
        """

        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_primary')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery')
        self.test_case_scenario.append(test_case_list2) 

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.gprestartdb')
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.check_if_not_in_preferred_role')
        self.test_case_scenario.append(test_case_list4)
        
        # Check the Sate of DB and Cluster
        test_case_list5 = []
        test_case_list5.append("mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition")
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append("mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity")
        self.test_case_scenario.append(test_case_list6)

    def test_failover_to_primary(self):
        """
        [feature]:  System failover to Primary and do incremental recovery
        
        """
        
        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_mirror')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.gprestartdb')
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.check_if_not_in_preferred_role')
        self.test_case_scenario.append(test_case_list4)
        
        # Check the Sate of DB and Cluster
        test_case_list5 = []
        test_case_list5.append("mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition")
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append("mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity")
        self.test_case_scenario.append(test_case_list6)

    def test_drop_pg_dirs_on_primary(self):
        """
        [feature]:   System Failover to Mirror due to Drop pg_* dir on Primary
        
        """
        
        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_drop_pg_dirs_on_primary')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_full_recovery')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.gprestartdb')
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.check_if_not_in_preferred_role')
        self.test_case_scenario.append(test_case_list4)
        
        # Check the Sate of DB and Cluster
        test_case_list5 = []
        test_case_list5.append("mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition")
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append("mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity")
        self.test_case_scenario.append(test_case_list6)

    def test_incremental_recovery_with_symlinks(self):
        """
        [feature]: Incremental recovery when the data directory is a symlink inctead of a normal directory
        
        """

        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_create_symlink_for_seg')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery')
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition')
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_remove_symlink_for_seg')
        self.test_case_scenario.append(test_case_list5)

    def test_incremental_recovery_without_symlinks(self):
        """
        [feature]: Incremental recovery when the data directory is a symlink instead of a regular directory
        
        """

        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition')
        self.test_case_scenario.append(test_case_list3)

    def test_full_recovery_with_symlinks(self):
        """
        [feature]: Full recovery when data directory is a symlink instead of a regular directory
        
        """

        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_create_symlink_for_seg')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_full_recovery')
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition')
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_remove_symlink_for_seg')
        self.test_case_scenario.append(test_case_list5)
        
    def test_full_recovery_without_symlinks(self):
        """
        [feature]: Full recovery when data directory is a symlink instead of a regular directory
        
        """

        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_full_recovery')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition')
        self.test_case_scenario.append(test_case_list3)

    def test_skip_persistent_check_with_incremental_recovery(self):
        """
        [feature]: skip the persistent table check when the user does not supply the --persistent-check option 
        
        @product_version gpdb: [4.3.5.0 -]
        """

        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_incremental_recovery_skip_persistent_tables_check')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition')
        self.test_case_scenario.append(test_case_list3)

    def test_skip_persistent_check_with_full_recovery(self):
        """
        [feature]: skip the persistent table check when the user does not supply the --persistent-check option 
        
        @product_version gpdb: [4.3.5.0 -]
        """

        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_full_recovery_skip_persistent_tables_check')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition')
        self.test_case_scenario.append(test_case_list3)

    def test_persistent_check_with_incremental_recovery(self):
        """
        [feature]: Perform the persistent table check when the persistent tables have been corrupted 
        
        @product_version gpdb: [4.3.5.0 -]
        """
        
        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_corrupt_persistent_tables')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_incremental_recovery_with_persistent_tables_corruption')
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_rebuild_persistent_tables')
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_incremental_recovery_with_persistent_tables_corruption')
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition')
        self.test_case_scenario.append(test_case_list6)

    def test_persistent_check_with_full_recovery(self):
        """
        [feature]: Perform the persistent table check when the persistent tables have been corrupted 
        
        @product_version gpdb: [4.3.5.0 -]
        """
        
        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_corrupt_persistent_tables')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_full_recovery_with_persistent_tables_corruption')
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_rebuild_persistent_tables')
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_full_recovery_with_persistent_tables_corruption')
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition')
        self.test_case_scenario.append(test_case_list6)

    def test_clean_shared_mem(self):
        """
        [feature]: Check the shared memory cleanup for gprecoverseg 
        
        @product_version gpdb: [4.3.5.0 -]
        """

        version = self.get_version()
        if version.startswith('4.2'):
            self.skipTest('Skipping test for 4.2')

        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_primary')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_wait_till_segments_in_change_tracking')

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery')
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition')
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_shared_mem_is_cleaned')
        self.test_case_scenario.append(test_case_list5)

    def test_start_transaction(self):
        """
        [feature]: Check if we can start a transaction successfully before establishing a connection to database
        
        @product_version gpdb: [4.3.4.1 -]
        """

        test_case_list1 = []
        test_case_list1.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_primary_group')
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_wait_till_segments_in_change_tracking')
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery')
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append('mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition')
        self.test_case_scenario.append(test_case_list4)
Пример #44
0
 def test_recovery_full(self):
     gprecover = GpRecover()
     gprecover.full()
     gprecover.wait_till_insync_transition()
Пример #45
0
 def __init__(self):
     self.fileutil = Filerepe2e_Util()
     self.gprecover = GpRecover()
     self.config = GpConfig()
     self.base_dir = os.path.dirname(sys.modules[self.__class__.__module__].__file__)
Пример #46
0
 def full_recoverseg(self):
     gprecover = GpRecover(GPDBConfig())
     gprecover.full()
Пример #47
0
class GprecoversegTest(ScenarioTestCase):
    """
    
    @description This test-suite contains the automation for 'gprecoverseg' tests
    @created 2009-01-27 14:00:00
    @modified 2013-09-12 17:10:15
    @tags storage schema_topology 
    @product_version gpdb:4.2.x,gpdb:main
    """
    def __init__(self, methodName, should_fail=False):
        super(GprecoversegTest, self).__init__(methodName)

    def get_version(self):
        cmdStr = 'gpssh --version'
        cmd = Command('get product version', cmdStr=cmdStr)
        cmd.run(validateAfter=True)
        return cmd.get_results().stdout.strip().split()[2]

    def recover_segments(self, option, max_rtrycnt):
        """
        @summary: Recovers the segments and returns the status of recovery process.
        
        @param option: represents different gprecoverseg command options
        @param max_rtrycnt: the max no. of times state of cluster should be checked
        @return: Boolean value representing the status of recovery process
        """

        config = GPDBConfig()
        recoverseg = GpRecoverseg()
        tinctest.logger.info("Running gprecoverseg with '%s' option..." %
                             option)
        recoverseg.run(option)
        rtrycnt = 0
        while ((config.is_not_insync_segments()) == False
               and rtrycnt <= max_rtrycnt):
            tinctest.logger.info("Waiting [%s] for DB to recover" % rtrycnt)
            sleep(10)
            rtrycnt = rtrycnt + 1
        if rtrycnt > max_rtrycnt:
            return False
        else:
            return True

    def wait_till_insync_transition(self):
        self.gpr = GpRecover()
        self.gpr.wait_till_insync_transition()

    def check_segment_roles(self):
        """
        @summary: Checks if the segments are in preferred roles or not.
                    If not, rebalances the cluster.
        
        @return: None
        """

        newfault = Fault()
        # If the segments are not in preferred roles, go for rebalancing the cluster
        if newfault.check_if_not_in_preferred_role():
            tinctest.logger.warn(
                "***** Segments not in their preferred roles : rebalancing the segments..."
            )
            # If rebalancing downs the segments, go for incremental recovery - this is observed sometimes
            if not self.recover_segments('-r', 10):
                tinctest.logger.warn(
                    "***** Segments down after rebalance : Tests cannot proceed further!!"
                )
            # If rebalancing passes proceed for tests
            else:
                tinctest.logger.info(
                    "***** Segments successfully rebalanced : Proceeding with the tests"
                )
        # If segments in preferred roles, proceed for the tests
        else:
            tinctest.logger.info(
                "***** Segments in preferred roles : Proceeding with the tests"
            )

    def check_cluster_health(self, doFullRecovery=False):
        """
        @summary: Checks for the cluster health, tries to recover and rebalance the cluster, 
                    fails the test if not able to do so 
        
        @param doFullRecovery: Boolean value which decides whether to go for full 
                                recovery or not
        @return: None
        """

        tinctest.logger.info(
            "***** Checking the cluster health before starting tests")
        config = GPDBConfig()
        # If the segments are not up, go for recovery
        if not config.is_not_insync_segments():
            tinctest.logger.info("***** Starting the recovery process")
            # if incremental didn't work, go for full recovery
            if not self.recover_segments(' ', 10):
                tinctest.logger.warn(
                    "***** Segments not recovered after incremental recovery")
                if doFullRecovery:
                    # if full also fails, the tests cannot proceed, so fail it
                    if not self.recover_segments('-F', 20):
                        tinctest.logger.error(
                            "***** Segments not recovered even after full recovery - Tests cannot proceed further!!"
                        )
                        self.fail(
                            "Segments are down - Tests cannot proceed further!!"
                        )
                    # if full recovery passes, check for rebalancing the cluster
                    else:
                        tinctest.logger.info(
                            "***** Segments up after full recovery : validating their roles..."
                        )
                        self.check_segment_roles()
                else:
                    self.fail("Segments are down - Tests cannot proceed!!")
            # if incremental recovery passes, check for rebalancing the cluster
            else:
                tinctest.logger.info(
                    "***** Segments up after incremental recovery : validating their roles..."
                )
                self.check_segment_roles()
        # If the segments are up, check for rebalancing the cluster
        else:
            tinctest.logger.info(
                "***** Segments are up : validating their roles...")
            self.check_segment_roles()

    def setUp(self):
        super(GprecoversegTest, self).setUp()
        self.check_cluster_health()

    def test_01_run_schema_topology(self):
        """
        [feature]: Test to run the schema_topology test-suite
        
        @fail_fast False
        """
        tinctest.logger.info(
            '***Running the schema topology test-suite before starting the tests...'
        )
        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.catalog.schema_topology.test_ST_AllSQLsTest.AllSQLsTest'
        )
        self.test_case_scenario.append(test_case_list1)
        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.catalog.schema_topology.test_ST_OSSpecificSQLsTest.OSSpecificSQLsTest'
        )
        self.test_case_scenario.append(test_case_list2)
        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.catalog.schema_topology.test_ST_GPFilespaceTablespaceTest.GPFilespaceTablespaceTest'
        )
        self.test_case_scenario.append(test_case_list3)
        test_case_list4 = []
        test_case_list4.append(
            'mpp.gpdb.tests.catalog.schema_topology.test_ST_EnhancedTableFunctionTest.EnhancedTableFunctionTest'
        )
        self.test_case_scenario.append(test_case_list4)

    def test_gprecoverseg_config_validation(self):
        """
        [feature]: Test to check if gprecoverseg sets the config to invalid state or not
        
        """

        # Create a fault
        test_case_list1 = []
        #fault
        test_case_list1.append(
            "mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_primary"
        )
        self.test_case_scenario.append(test_case_list1)

        # Run the invalid state recovery process
        test_case_list2 = []
        test_case_list2.append(
            "mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_invalid_state_recoverseg"
        )
        test_case_list2.append(
            "mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.SegmentConfigurations"
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.gprestartdb'
        )
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.check_if_not_in_preferred_role'
        )
        self.test_case_scenario.append(test_case_list4)

        # Check the Sate of DB and Cluster
        test_case_list5 = []
        test_case_list5.append(
            "mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list6)

    def test_gprecoverseg_incr_newDir(self):
        """
        [feature]: Incremental gprecoverseg to new location: Same host, with filespaces
        
        """

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_primary'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_recovery_with_new_loc'
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.gprestartdb'
        )
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.check_if_not_in_preferred_role'
        )
        self.test_case_scenario.append(test_case_list4)

        # Check the Sate of DB and Cluster
        test_case_list5 = []
        test_case_list5.append(
            "mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list6)

    def test_failover_to_mirror(self):
        """
        [feature]:  System failover to mirror and do incremental recovery
        
        """

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_primary'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery'
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.gprestartdb'
        )
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.check_if_not_in_preferred_role'
        )
        self.test_case_scenario.append(test_case_list4)

        # Check the Sate of DB and Cluster
        test_case_list5 = []
        test_case_list5.append(
            "mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list6)

    def test_failover_to_primary(self):
        """
        [feature]:  System failover to Primary and do incremental recovery
        
        """

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_mirror'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery'
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.gprestartdb'
        )
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.check_if_not_in_preferred_role'
        )
        self.test_case_scenario.append(test_case_list4)

        # Check the Sate of DB and Cluster
        test_case_list5 = []
        test_case_list5.append(
            "mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list6)

    def test_drop_pg_dirs_on_primary(self):
        """
        [feature]:   System Failover to Mirror due to Drop pg_* dir on Primary
        
        """

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_drop_pg_dirs_on_primary'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_full_recovery'
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.gprestartdb'
        )
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GPDBdbOps.check_if_not_in_preferred_role'
        )
        self.test_case_scenario.append(test_case_list4)

        # Check the Sate of DB and Cluster
        test_case_list5 = []
        test_case_list5.append(
            "mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list5)

        test_case_list6 = []
        test_case_list6.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list6)

    def test_incremental_recovery_with_symlinks(self):
        """
        [feature]: Incremental recovery when the data directory is a symlink inctead of a normal directory
        
        """

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_create_symlink_for_seg'
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery'
        )
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition'
        )
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_remove_symlink_for_seg'
        )
        self.test_case_scenario.append(test_case_list5)

    def test_incremental_recovery_without_symlinks(self):
        """
        [feature]: Incremental recovery when the data directory is a symlink instead of a regular directory
        
        """

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery'
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition'
        )
        self.test_case_scenario.append(test_case_list3)

    def test_full_recovery_with_symlinks(self):
        """
        [feature]: Full recovery when data directory is a symlink instead of a regular directory
        
        """

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_create_symlink_for_seg'
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_full_recovery'
        )
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition'
        )
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_remove_symlink_for_seg'
        )
        self.test_case_scenario.append(test_case_list5)

    def test_full_recovery_without_symlinks(self):
        """
        [feature]: Full recovery when data directory is a symlink instead of a regular directory
        
        """

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_full_recovery'
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition'
        )
        self.test_case_scenario.append(test_case_list3)

    def test_incremental_recovery(self):
        """
        [feature]: Incremental recovery
        
        @product_version gpdb: [4.3.5.0 -]
        """

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_incremental_recovery'
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition'
        )
        self.test_case_scenario.append(test_case_list3)

    def test_full_recovery(self):
        """
        [feature]: Full recovery
        
        @product_version gpdb: [4.3.5.0 -]
        """

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_use_gpfaultinjector_to_mark_segment_down'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_full_recovery'
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition'
        )
        self.test_case_scenario.append(test_case_list3)

    def test_clean_shared_mem(self):
        """
        [feature]: Check the shared memory cleanup for gprecoverseg 
        
        @product_version gpdb: [4.3.5.0 -]
        """

        version = self.get_version()
        if version.startswith('4.2'):
            self.skipTest('Skipping test for 4.2')

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_primary'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_wait_till_segments_in_change_tracking'
        )

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery'
        )
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition'
        )
        self.test_case_scenario.append(test_case_list4)

        test_case_list5 = []
        test_case_list5.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_shared_mem_is_cleaned'
        )
        self.test_case_scenario.append(test_case_list5)

    def test_start_transaction(self):
        """
        [feature]: Check if we can start a transaction successfully before establishing a connection to database
        
        @product_version gpdb: [4.3.4.1 -]
        """

        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_kill_primary_group'
        )
        self.test_case_scenario.append(test_case_list1)

        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.FaultInjectorTestCase.test_wait_till_segments_in_change_tracking'
        )
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.fault.fault.GprecoversegClass.test_do_incremental_recovery'
        )
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            'mpp.gpdb.tests.utilities.recoverseg.gprecoverseg_tests.test_gprecoverseg.GprecoversegTest.wait_till_insync_transition'
        )
        self.test_case_scenario.append(test_case_list4)
Пример #48
0
Файл: base.py Проект: 50wu/gpdb
 def __init__(self,methodName):
     self.filereputil = Filerepe2e_Util()
     self.gprecover = GpRecover()
     super(BaseClass,self).__init__(methodName)
Пример #49
0
class FilerepTestCase(MPPTestCase):
    def __init__(self, methodName):
        self.pgport = os.environ.get('PGPORT')
        self.util = Filerepe2e_Util()
        self.gpconfig = GpConfig()
        self.config = GPDBConfig()
        self.gpr = GpRecover(self.config)
        self.dbstate = DbStateClass('run_validation', self.config)
        self.gpstart = GpStart()
        self.gpstop = GpStop()
        super(FilerepTestCase, self).__init__(methodName)

    def sleep(self, seconds=60):
        time.sleep(seconds)

    def create_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('create a file',
                      'touch %s' % file_path,
                      ctxt=REMOTE,
                      remoteHost=host)
        cmd.run(validateAfter=True)

    def remove_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('remove a file',
                      'rm %s' % file_path,
                      ctxt=REMOTE,
                      remoteHost=host)
        cmd.run(validateAfter=True)

    def get_timestamp_of_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command(
            'check timestamp',
            """ python -c "import os; print os.stat('%s').st_mtime" """ %
            file_path,
            ctxt=REMOTE,
            remoteHost=host)
        cmd.run(validateAfter=True)
        res = cmd.get_results().stdout.strip()
        return res

    def verify_file_exists(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('check if file exists',
                      'test -f %s' % file_path,
                      ctxt=REMOTE,
                      remoteHost=host)
        cmd.run(validateAfter=True)

    def handle_ext_cases(self, file):
        """
        @file: wet sql file to replace with specific machine env.
        """

        host = str(socket.gethostbyname(socket.gethostname()))  #Must be an IP
        querystring = "gpfdist://" + host + ":8088"

        if os.path.isfile(file):
            for line in fileinput.FileInput(file, inplace=1):
                line = re.sub('gpfdist.+8088', querystring, line)
                print str(re.sub('\n', '', line))

    def handle_hybrid_part_cases(self, file):
        """
        @file: hybrid sql file to replace with specific machine env
        """

        querystring = "FROM '" + local_path('hybrid_part.data') + "'"
        if os.path.isfile(file):
            for line in fileinput.FileInput(file, inplace=1):
                line = re.sub('FROM\s\'.+hybrid_part.data\'', querystring,
                              line)
                print str(re.sub('\n', '', line))

    def preprocess(self):
        """ 
        Replace the hard-coded information from sql files with correct hostname and ip address,etc 
        """

        list_workload_dir = [
            'set_sync1', 'sync1', 'set_ck_sync1', 'ck_sync1', 'set_ct', 'ct',
            'set_resync', 'resync', 'set_sync2', 'sync2'
        ]
        for dir in list_workload_dir:
            sql_path = os.path.join(local_path(dir), 'sql')
            ans_path = os.path.join(local_path(dir), 'expected')
            for file in os.listdir(sql_path):
                if (file.find('wet_ret') >= 0):
                    self.handle_ext_cases(os.path.join(sql_path, file))
                if (file.find('hybrid_part') >= 0):
                    self.handle_hybrid_part_cases(os.path.join(sql_path, file))
            for file in os.listdir(ans_path):
                if (file.find('wet_ret') >= 0):
                    self.handle_ext_cases(os.path.join(ans_path, file))
                if (file.find('hybrid_part') >= 0):
                    self.handle_hybrid_part_cases(os.path.join(ans_path, file))

    def clean_data(self):
        """ 
        Clean the data by removing the external table, otherwise, more data will be appended to the
        same external table from running multiple sql files. 
        """

        test = local_path("")
        test = str(test) + "data/*.*"

        cmd = 'rm -rfv ' + test
        run_shell_command(cmd)

    def anydownsegments(self):
        """
        checks if any segments are down
        """

        tinctest.logger.info("Checking if any segments are down")
        num_segments_down = self.count_of_nodes_down()
        if int(num_segments_down) == 0:
            return True
        else:
            return False

    def stop_start_validate(self, stopValidate=True):
        """
        Do gpstop -i, gpstart and see if all segments come back up fine 
        """

        tinctest.logger.info("Performing stop start validate")
        tinctest.logger.info("Shutting down the cluster")
        ok = self.gpstop.run_gpstop_cmd(immediate='i', validate=stopValidate)
        if not ok and stopValidate:
            raise Exception('Problem while shutting down the cluster')
        tinctest.logger.info("Successfully shutdown the cluster.")

        tinctest.logger.info("Restarting the cluster.")
        ok = self.gpstart.run_gpstart_cmd()
        if not ok:
            raise Exception('Failed to bring the cluster back up')
        tinctest.logger.info("Successfully restarted the cluster.")
        if not self.anydownsegments():
            raise Exception("segments were marked down")
        else:
            return (True, "All segments are up")

    def method_reset_fault_injection(self):
        """
        Resets fault injection
        Return: (True, [result]) if OK, or (False, [result]) otherwise
        """

        tinctest.logger.info("Resetting fault injection")

        (ok1, out1) = self.util.inject_fault(f='filerep_resync',
                                             m='async',
                                             y='reset',
                                             r='primary',
                                             H='ALL')
        if not ok1:
            raise Exception("Fault injection failed")
        tinctest.logger.info("Done Injecting Fault  to reset resync")

        return (True, str(out1))

    def method_resume_filerep_resync(self):
        """
        Resumes the process of resync
        """

        tinctest.logger.info("Resuming Resync")
        (ok, out) = self.util.inject_fault(f='filerep_resync',
                                           m='async',
                                           y='resume',
                                           r='primary',
                                           H='ALL')
        if not ok:
            raise Exception("Fault injection failed")
        tinctest.logger.info("Done resuming resync")
        return (ok, out)

    def run_method_suspendresync(self):
        """
        Stops the cluster from going to resync
        """

        tinctest.logger.info("Suspending resync")
        (ok, out) = self.util.inject_fault(f='filerep_resync',
                                           m='async',
                                           y='suspend',
                                           r='primary',
                                           H='ALL')
        tinctest.logger.info('output from suspend resync %s' % out)
        if not ok:
            raise Exception("Fault injection failed")
        tinctest.logger.info("Done Injecting Fault to suspend resync")
        return (ok, out)

    def count_of_masters(self):
        """
        Gives count of number of nodes in the cluster that are master 
        Return: count of number of nodes in the cluster that are master
        """

        tinctest.logger.info("Count the number of masters")
        cmd = "select count(*) from gp_segment_configuration where content = -1"
        (out) = PSQL.run_sql_command(cmd)
        num_master = out.split('\n')[3].strip()
        return num_master

    def count_of_nodes(self):
        """
        Gives count of number of nodes in the cluster
        Return: count of number of nodes in the cluster
        """

        tinctest.logger.info("Counting number of nodes")
        cmd = "select count(*) from gp_segment_configuration"
        (num_cl) = PSQL.run_sql_command(cmd)
        total_num_rows = num_cl.split('\n')[3].strip()
        return total_num_rows

    def count_of_nodes_in_ct(self):
        """
        Gives count of number of nodes in change tracking
        Return: count of number of nodes in change tracking
        """

        tinctest.logger.info("Counting number of nodes in ct")
        sqlcmd = "select count(*) from gp_segment_configuration where mode = 'c'"
        (num_cl) = PSQL.run_sql_command(sqlcmd)
        num_cl = num_cl.split('\n')[3].strip()
        return num_cl

    def count_of_nodes_down(self):
        """
        Gives count of number of nodes marked as down
        Return: count of number of nodes marked as down
        """

        tinctest.logger.info("Counting the number of nodes down")
        sqlcmd = "select count(*) from gp_segment_configuration where status = 'd'"
        (num_down) = PSQL.run_sql_command(sqlcmd)
        num_down = num_down.split('\n')[3].strip()
        return num_down

    def count_of_nodes_sync(self):
        """
        Gives count of number of nodes in sync
        Return: count of number of nodes in sync
        """

        tinctest.logger.info("Counting the number of nodes in sync")
        sqlcmd = "select count(*) from gp_segment_configuration where mode = 's'"
        (num_sync) = PSQL.run_sql_command(sqlcmd)
        num_sync = num_sync.split('\n')[3].strip()
        return num_sync

    def count_of_nodes_not_sync(self):
        """
        Gives count of number of nodes not in sync
        Return: count of number of nodes not in sync
        """

        tinctest.logger.info("Counting number of nodes not in sync")
        sqlcmd = "select count(*) from gp_segment_configuration where mode <> 's'"
        (num_sync) = PSQL.run_sql_command(sqlcmd)
        num_sync = num_sync.split('\n')[3].strip()
        return num_sync

    def inject_fault_on_first_primary(self):
        """
	@product_version gpdb:[4.3.3.0-], gpdb:[4.2.8.1-4.2]
        """
        tinctest.logger.info("\n Injecting faults on first primary")
        (ok,
         out) = self.util.inject_fault(f='filerep_immediate_shutdown_request',
                                       m='async',
                                       y='infinite_loop',
                                       r='primary',
                                       seg_id=2,
                                       sleeptime=300)
        if not ok:
            raise Exception(
                "Fault filerep_immediate_shutdown_request injection failed")

        (ok, out) = self.util.inject_fault(f='fileRep_is_operation_completed',
                                           m='async',
                                           y='infinite_loop',
                                           r='primary',
                                           seg_id=2)
        if not ok:
            raise Exception(
                "Fault fileRep_is_operation_completed injection failed")
        tinctest.logger.info("\n Done Injecting Fault")

    def inject_fault_on_first_mirror(self):
        """
	@product_version gpdb:[4.3.3.0-], gpdb:[4.2.8.1-4.2]
        """
        sqlcmd = "select dbid from gp_segment_configuration where content=0 and role='m'"
        (first_mirror_dbid) = PSQL.run_sql_command(sqlcmd)
        first_mirror_dbid = first_mirror_dbid.split('\n')[3].strip()

        tinctest.logger.info("\n Injecting faults on first mirror")
        flag = self.util.check_fault_status(
            fault_name='fileRep_is_operation_completed',
            status='triggered',
            max_cycle=100)
        if not flag:
            raise Exception(
                "Fault fileRep_is_operation_completed didn't trigger")

        (ok, out) = self.util.inject_fault(f='filerep_consumer',
                                           m='async',
                                           y='panic',
                                           r='mirror',
                                           seg_id=first_mirror_dbid)
        if not ok:
            raise Exception("Fault filerep_consumer injection failed")
        tinctest.logger.info("\n Done Injecting Fault")

    def setupGpfdist(self, port, path):
        gpfdist = Gpfdist(port, self.hostIP())
        gpfdist.killGpfdist()
        gpfdist.startGpfdist(' -t 30 -m 1048576 -d ' + path)
        return True

    def cleanupGpfdist(self, port, path):
        gpfdist = Gpfdist(port, self.hostIP())
        gpfdist.killGpfdist()
        return True

    def hostIP(self):
        ok = run_shell_command('which gpfdist')
        if not ok:
            raise GPtestError("Error:'which gpfdist' command failed.")
        hostname = socket.gethostname()
        if hostname.find('mdw') > 0:
            host = 'mdw'
        else:
            host = str(socket.gethostbyname(
                socket.gethostname()))  #Must be an IP
        tinctest.logger.info('current host is %s' % host)
        return host

    def method_setup(self):
        tinctest.logger.info("Performing setup tasks")
        gpfs = Gpfilespace()
        gpfs.create_filespace('filerep_fs_a')
        gpfs.create_filespace('filerep_fs_b')
        gpfs.create_filespace('filerep_fs_c')
        gpfs.create_filespace('filerep_fs_z')
        gpfs.create_filespace('sync1_fs_1')

        # Set max_resource_queues to 100
        cmd = 'gpconfig -c max_resource_queues -v 100 '
        ok = run_shell_command(cmd)
        if not ok:
            raise Exception(
                'Failure during setting the max_resource_queues value to 100 using gpconfig tool'
            )
        #Restart the cluster
        self.gpstop.run_gpstop_cmd(immediate='i')
        ok = self.gpstart.run_gpstart_cmd()
        if not ok:
            raise Exception('Failure during restarting the cluster')
        return True

    def get_ext_table_query_from_gpstate(self):
        outfile = local_path("gpstate_tmp")
        ok = run_shell_command("gpstate --printSampleExternalTableSql >" +
                               outfile)
        querystring = ""
        flag = 'false'
        out = open(outfile, 'r').readlines()
        for line in out:
            line.strip()
            if (line.find(
                    'DROP EXTERNAL TABLE IF EXISTS gpstate_segment_status') >=
                    0):
                flag = 'true'
            if flag == 'true':
                querystring = querystring + line
        return querystring  ############RUN QYUERY

    def check_gpstate(self, type, phase):
        """ 
        Perform gpstate for each different transition state
        @type: failover type
        @phase: transition stage, can be sync1, ck_sync1, ct, resync, sync2
        """

        if phase == 'sync1':
            state_num = self.query_select_count(
                "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Synchronized' and status_in_config='Up' and instance_status='Up'"
            )
            sync1_num = self.query_select_count(
                "select count(*) from gp_segment_configuration where content <> -1"
            )
            if int(sync1_num) <> int(state_num):
                raise Exception("gpstate in Sync state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " % (phase))

        elif phase == 'ct':
            p_num = self.query_select_count(
                "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Change Tracking'  and role = 'Primary' and status_in_config='Up' and instance_status='Up'"
            )
            m_num = self.query_select_count(
                "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Out of Sync'  and role = 'Mirror' and status_in_config='Down' and instance_status='Down in configuration' "
            )

            if int(p_num) <> int(m_num):
                raise Exception("gpstate in CT state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " % (phase))

        elif phase == 'resync_incr':

            if type == 'primary':
                query = "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Resynchronizing' and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Incremental'"
                resync_incr_num = self.query_select_count(query)
            else:
                query = "select count(*) from gpstate_segment_status where mirror_status ='Resynchronizing' and  status_in_config='Up' and instance_status='Up' and resync_mode= 'Incremental'"
                resync_incr_num = self.query_select_count(query)

            query_num_rows = "select count(*) from gp_segment_configuration where content <> -1"
            num_rows = self.query_select_count(query_num_rows)

            if int(resync_incr_num) <> int(num_rows):
                tinctest.logger.info("resync_incr_num query run %s" % query)
                tinctest.logger.info("num_rows query run %s" % query_num_rows)
                raise Exception(
                    "gpstate in Resync Incremental  state failed. resync_incr_num %s <> num_rows %s"
                    % (resync_incr_num, num_rows))
            tinctest.logger.info("Done Running gpstate in %s phase " % (phase))

        elif phase == 'resync_full':
            num_rows = self.query_select_count(
                "select count(*) from gp_segment_configuration where content <> -1"
            )

            if type == 'primary':
                resync_full_num = self.query_select_count(
                    "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Resynchronizing'  and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Full'"
                )
            else:
                resync_full_num = self.query_select_count(
                    "select count(*) from gpstate_segment_status where mirror_status ='Resynchronizing'  and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Full'"
                )

            if int(resync_full_num) <> int(num_rows):
                raise Exception("gptate in Resync Full state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " % (phase))

        return True

    def trigger_transition(self):
        PSQL.run_sql_file(local_path('mirrors.sql'))

    def run_gpstate(self, type, phase):
        """
        Perform gpstate for each different transition state
        @type: failover type
        @phase: transition stage, can be sync1, ck_sync1, ct, resync, sync2
        """

        tinctest.logger.info("running gpstate")
        querystring = self.get_ext_table_query_from_gpstate()
        file1 = local_path('create_table_gpstate.sql')
        f1 = open(file1, 'w')
        f1.write(querystring)
        f1.write('\n')
        f1.close()
        PSQL.run_sql_file(local_path('create_table_gpstate.sql'))

        gpstate_outfile = local_path('gpstate_out')
        cmd = 'gpstate -s -a > %s 2>&1' % (gpstate_outfile)

        ok = run_shell_command(cmd)
        self.check_gpstate(type, phase)
        return ok

    def check_mirror_seg(self, master=False):
        tinctest.logger.info("running check mirror")
        self.dbstate.check_mirrorintegrity()

    def do_gpcheckcat(self,
                      dbname=None,
                      alldb=False,
                      online=False,
                      outputFile='checkcat.out',
                      outdir=None):
        tinctest.logger.info("running gpcheckcat")
        self.dbstate.check_catalog(outputFile=outputFile)

    def query_select_count(self, sqlcmd):
        (num) = PSQL.run_sql_command(sqlcmd)
        num = num.split('\n')[3].strip()
        return num

    def method_run_failover(self, type):
        """
        Inject fault to failover nodes
        @type: primary [induces fault in mirror] mirror [creates panic in primary]   
        Return: (True, [result of fault injection]) if OK, or (False, [result of fault injection]) otherwise
        """

        if type == 'primary':
            tinctest.logger.info("\n primary failover")
            (ok, out) = self.util.inject_fault(f='filerep_consumer',
                                               m='async',
                                               y='fault',
                                               r='mirror',
                                               H='ALL')
            tinctest.logger.info("\n Done Injecting Fault")

        elif type == 'mirror':
            tinctest.logger.info("\n Mirror failover")
            (ok, out) = self.util.inject_fault(f='postmaster',
                                               m='async',
                                               y='panic',
                                               r='primary',
                                               H='ALL')
            tinctest.logger.info("\n Done Injecting Fault")
        return True

    def wait_till_change_tracking_transition(self):
        self.util.wait_till_change_tracking_transition()

    def wait_till_insync_transition(self):
        self.gpr.wait_till_insync_transition()

    def run_gprecoverseg(self, recover_mode):
        if recover_mode == 'full':
            self.gpr.full()
        else:
            self.gpr.incremental()

    def run_gpconfig(self, parameter, master_value, segment_value):
        if (parameter is not None):
            self.gpconfig.setParameter(parameter, master_value, segment_value)
            self.gpstop.run_gpstop_cmd(restart='r')

    def inject_fault(self,
                     fault=None,
                     mode=None,
                     operation=None,
                     prim_mirr=None,
                     host='All',
                     table=None,
                     database=None,
                     seg_id=None,
                     sleeptime=None,
                     occurence=None):
        if (fault == None or mode == None or operation == None
                or prim_mirr == None):
            raise Exception('Incorrect parameters provided for inject fault')

        (ok, out) = self.util.inject_fault(f=fault,
                                           m=mode,
                                           y=operation,
                                           r=prim_mirr,
                                           H='ALL',
                                           table=table,
                                           database=database,
                                           sleeptime=sleeptime,
                                           o=occurence,
                                           seg_id=seg_id)
Пример #50
0
 def incremental_recoverseg(self, workerPool=False):
     gprecover = GpRecover(GPDBConfig())
     gprecover.incremental(workerPool)
Пример #51
0
class PgtwoPhaseClass(MPPTestCase):
    '''Helper class for pg_twophase supporting functions '''
    def __init__(self, methodName):
        self.filereputil = Filerepe2e_Util()
        self.config = GPDBConfig()
        self.gprecover = GpRecover(self.config)
        self.gpstop = GpStop()
        self.gpstart = GpStart()
        self.gpfile = Gpfilespace(self.config)
        self.gpverify = GpdbVerify(config=self.config)
        self.dbstate = DbStateClass('run_validation', self.config)
        self.port = os.getenv('PGPORT')
        super(PgtwoPhaseClass, self).__init__(methodName)

    def invoke_fault(self,
                     fault_name,
                     type,
                     role='mirror',
                     port=None,
                     occurence=None,
                     sleeptime=None,
                     seg_id=None):
        ''' Reset the fault and then issue the fault with the given type'''
        self.filereputil.inject_fault(f=fault_name,
                                      y='reset',
                                      r=role,
                                      p=port,
                                      o=occurence,
                                      sleeptime=sleeptime,
                                      seg_id=seg_id)
        self.filereputil.inject_fault(f=fault_name,
                                      y=type,
                                      r=role,
                                      p=port,
                                      o=occurence,
                                      sleeptime=sleeptime,
                                      seg_id=seg_id)
        tinctest.logger.info(
            'Successfully injected fault_name : %s fault_type : %s' %
            (fault_name, type))

    def inject_fault(self, fault_type):
        '''
        @param fault_type : type of fault to ne suspended
        '''
        if fault_type == 'end_prepare_two_phase_sleep':
            self.filereputil.inject_fault(f='end_prepare_two_phase_sleep',
                                          sleeptime='1000',
                                          y='sleep',
                                          r='primary',
                                          p=self.port)
            tinctest.logger.info(
                'Injected fault to sleep in end_prepare_two_phase')

        elif fault_type == 'abort':
            # In case of abort fault we need to include this error type fault also, to fake a situation where one of the segment is not responding back, which can make the master to trigger an abort transaction
            self.invoke_fault('transaction_abort_after_distributed_prepared',
                              'error',
                              port=self.port,
                              occurence='0',
                              seg_id='1')

            self.invoke_fault('twophase_transaction_abort_prepared',
                              'suspend',
                              role='primary',
                              port=self.port,
                              occurence='0')

        elif fault_type == 'commit':
            self.invoke_fault('twophase_transaction_commit_prepared',
                              'suspend',
                              role='primary',
                              port=self.port,
                              occurence='0')

        elif fault_type == 'dtm_broadcast_prepare':
            self.invoke_fault('dtm_broadcast_prepare',
                              'suspend',
                              seg_id='1',
                              port=self.port,
                              occurence='0')

        elif fault_type == 'dtm_broadcast_commit_prepared':
            self.invoke_fault('dtm_broadcast_commit_prepared',
                              'suspend',
                              seg_id='1',
                              port=self.port,
                              occurence='0')

        elif fault_type == 'dtm_xlog_distributed_commit':
            self.invoke_fault('dtm_xlog_distributed_commit',
                              'suspend',
                              seg_id='1',
                              port=self.port,
                              occurence='0')

    def resume_faults(self, fault_type, cluster_state='sync'):
        '''
        @param fault_type : commit/abort/end_prepare_two_phase_sleep/dtm_broadcast_prepare/dtm_broadcast_commit_prepared/dtm_xlog_distributed_commit
        @description : Resume the suspended faults 
        '''
        tinctest.logger.info('coming to resume faults with xact %s' %
                             fault_type)
        if fault_type == 'abort':
            self.filereputil.inject_fault(
                f='twophase_transaction_abort_prepared',
                y='resume',
                r='primary',
                p=self.port,
                o='0')
            if cluster_state != 'resync':
                self.filereputil.inject_fault(
                    f='transaction_abort_after_distributed_prepared',
                    y='reset',
                    p=self.port,
                    o='0',
                    seg_id='1')
        elif fault_type == 'commit':
            self.filereputil.inject_fault(
                f='twophase_transaction_commit_prepared',
                y='resume',
                r='primary',
                p=self.port,
                o='0')

        elif fault_type == 'dtm_broadcast_prepare':
            self.filereputil.inject_fault(f='dtm_broadcast_prepare',
                                          y='resume',
                                          seg_id='1',
                                          p=self.port,
                                          o='0')

        elif fault_type == 'dtm_broadcast_commit_prepared':
            tinctest.logger.info('coming to if dtm_broadcast_commit_prepared')
            self.filereputil.inject_fault(f='dtm_broadcast_commit_prepared',
                                          y='resume',
                                          seg_id='1',
                                          p=self.port,
                                          o='0')

        elif fault_type == 'dtm_xlog_distributed_commit':
            self.filereputil.inject_fault(f='dtm_xlog_distributed_commit',
                                          y='resume',
                                          seg_id='1',
                                          p=self.port,
                                          o='0')

        else:
            tinctest.logger.info('No faults to resume')
        tinctest.logger.info('Resumed the suspended transaction fault')

        #Wait till all the trigger_sqls are complete before returning
        sql_count = PSQL.run_sql_command(
            'select count(*) from pg_stat_activity;',
            flags='-q -t',
            dbname='postgres')
        while (sql_count.strip() != '1'):
            sleep(5)
            sql_count = PSQL.run_sql_command(
                'select count(*) from pg_stat_activity;',
                flags='-q -t',
                dbname='postgres')
            tinctest.logger.info('stat_activity count %s ' % sql_count)
        return

    def start_db(self):
        '''Gpstart '''
        rc = self.gpstart.run_gpstart_cmd()
        if not rc:
            raise Exception('Failed to start the cluster')
        tinctest.logger.info('Started the cluster successfully')

    def stop_db(self):
        ''' Gpstop and dont check for rc '''
        cmd = Command('Gpstop_a', 'gpstop -a')
        tinctest.logger.info('Executing command: gpstop -a')
        cmd.run()

    def crash_and_recover(self,
                          crash_type,
                          fault_type,
                          checkpoint='noskip',
                          cluster_state='sync'):
        '''
        @param crash_type : gpstop_i/gpstop_a/failover_to_primary/failover_to_mirror
        @note: when skip checkpoint is enabled, gpstop -a returns a non-rc return code and fails in the library. To workaround, using a local function
        '''
        if crash_type == 'gpstop_i':
            rc = self.gpstop.run_gpstop_cmd(immediate=True)
            if not rc:
                raise Exception('Failed to stop the cluster')
            tinctest.logger.info('Stopped cluster immediately')
            self.start_db()
        elif crash_type == 'gpstop_a':
            self.resume_faults(fault_type, cluster_state)
            if checkpoint == 'skip':
                self.stop_db()
            else:
                rc = self.gpstop.run_gpstop_cmd()
                if not rc:
                    raise Exception('Failed to stop the cluster')
            tinctest.logger.info('Smart stop completed')
            self.start_db()
        elif crash_type == 'failover_to_primary':
            self.invoke_fault('filerep_consumer', 'fault')
            self.resume_faults(fault_type, cluster_state)
            (rc, num) = self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Value of rc and num_down %s, %s, %s' %
                                 (rc, num, fault_type))

        elif crash_type == 'failover_to_mirror':
            self.invoke_fault('postmaster', 'panic', role='primary')
            if fault_type in ('dtm_broadcast_prepare',
                              'dtm_broadcast_commit_prepared',
                              'dtm_xlog_distributed_commit'):
                self.resume_faults(fault_type, cluster_state)
            (rc, num) = self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Value of rc and num_down %s, %s' % (rc, num))
            if fault_type == 'abort':
                self.filereputil.inject_fault(
                    f='transaction_abort_after_distributed_prepared',
                    y='reset',
                    p=self.port,
                    o='0',
                    seg_id='1')

        if cluster_state == 'resync':
            if not self.gprecover.wait_till_insync_transition():
                raise Exception('Segments not in sync')

    def get_trigger_status_old(self, trigger_count):
        '''Compare the pg_stat_activity count with the total number of trigger_sqls executed '''
        for i in range(1, 50):
            psql_count = PSQL.run_sql_command(
                'select count(*) from pg_stat_activity;',
                flags='-q -t',
                dbname='postgres')
        tinctest.logger.info('Count of trigger sqls %s' % psql_count)
        if int(psql_count.strip()) < trigger_count:
            tinctest.logger.info('coming to the if loop in get_trigger_status')
            return False
        return True

    def get_trigger_status(self, trigger_count, fault_type):
        if fault_type == None:
            return self.get_trigger_status_old(trigger_count)

        return self.filereputil.check_fault_status(fault_name=fault_type,
                                                   status="triggered",
                                                   seg_id='1',
                                                   num_times_hit=trigger_count)

    def check_trigger_sql_hang(self, test_dir, fault_type=None):
        '''
        @description : Return the status of the trigger sqls: whether they are waiting on the fault 
        Since gpfaultinjector has no way to check if all the sqls are triggered, we are using 
        a count(*) on pg_stat_activity and compare the total number of trigger_sqls
        '''
        trigger_count = 0
        for dir in test_dir.split(","):
            trigger_dir = local_path('%s/trigger_sql/sql/' % (dir))
            trigger_count += len(glob.glob1(trigger_dir, "*.sql"))
        tinctest.logger.info('Total number of sqls to trigger %d in %s' %
                             (trigger_count, test_dir))
        return self.get_trigger_status(trigger_count, fault_type)

    def run_faults_before_pre(self, cluster_state):
        '''
        @param cluster_state : sync/change_tracking/resync
        @description: 1. Cluster into change_tracking in case of resync/ change_tracking. 
        '''
        if cluster_state == 'resync':
            self.invoke_fault('filerep_consumer', 'fault')
            self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Change_tracking transition complete')

    def run_faults_before_trigger(self, checkpoint, cluster_state, fault_type):
        '''
        @param checkpoint : skip/noskip
        @param cluster_state : sync/change_tracking/resync
        @param fault_type : commit/abort
        @param end_prepare_two_phase_sleep : True/False
        @description : 1. Suspend resync faults. 2. Issue Checkpoint before the skip checkpoint, so that the bufferpool is cleared. 3. If skip issue 'skip checkpoint'. 4. Suspend transaction_faults based on test_type.
        '''
        if cluster_state == 'change_tracking':
            self.invoke_fault('filerep_consumer', 'fault')
            self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Change_tracking transition complete')

        if cluster_state == 'resync':
            self.invoke_fault('filerep_resync', 'suspend', role='primary')

            if checkpoint == 'skip':
                self.invoke_fault(
                    'filerep_transition_to_sync_before_checkpoint',
                    'suspend',
                    role='primary',
                    port=self.port,
                    occurence='0')
            rc = self.gprecover.incremental()
            if not rc:
                raise Exception('Gprecvoerseg failed')
            tinctest.logger.info('Cluster in resync state')

        PSQL.run_sql_command('CHECKPOINT;', dbname='postgres')
        if checkpoint == 'skip':
            self.invoke_fault('checkpoint',
                              'skip',
                              role='primary',
                              port=self.port,
                              occurence='0')
        self.inject_fault(fault_type)

        if cluster_state == 'resync':
            self.filereputil.inject_fault(f='filerep_resync',
                                          y='resume',
                                          r='primary')

        PSQL.wait_for_database_up()

    def run_crash_and_recover(self,
                              crash_type,
                              fault_type,
                              test_dir,
                              cluster_state='sync',
                              checkpoint='noskip'):
        '''
        @param crash_type : gpstop_i/gpstop_a/failover_to_mirror/failover_to_primary
        @param fault_type : commit/abort/end_prepare_two_phase_sleep
        @param test_dir : dir of the trigger sqls
        @description : Execute the specified crash type before/after resuming the suspended fault and recover
        '''
        trigger_status = self.check_trigger_sql_hang(test_dir)
        tinctest.logger.info('trigger_status %s' % trigger_status)
        sleep(
            50
        )  # This sleep is needed till we get a way to find the state of all suspended sqls
        if trigger_status == True:
            if cluster_state == 'resync':
                self.filereputil.inject_fault(
                    f='filerep_transition_to_sync_before_checkpoint',
                    y='resume',
                    r='primary')
                sleep(15)  # wait little before crash
            self.crash_and_recover(crash_type, fault_type, checkpoint,
                                   cluster_state)
        else:
            tinctest.logger.info('The fault_status is not triggered')

    def gprecover_rebalance(self):
        '''
        @description: Run rebalance through gpstop -air is much faster than gprecoverseg -r for test purpose.
        '''
        rc = self.gpstop.run_gpstop_cmd(immediate=True)
        if not rc:
            raise Exception('Failed to stop the cluster')
        tinctest.logger.info('Stopped cluster immediately')
        self.start_db()

    def run_gprecover(self, crash_type, cluster_state='sync'):
        '''Recover the cluster if required. '''
        if crash_type in ('failover_to_primary', 'failover_to_mirror'
                          ) or cluster_state == 'change_tracking':
            rc = self.gprecover.incremental()
            if not rc:
                raise Exception('Gprecvoerseg failed')
            if not self.gprecover.wait_till_insync_transition():
                raise Exception('Segments not in sync')
            tinctest.logger.info('Cluster in sync state')
            if crash_type == 'failover_to_mirror':
                self.gprecover_rebalance()
                tinctest.logger.info('Successfully Rebalanced the cluster')
        else:
            tinctest.logger.info(
                'No need to run gprecoverseg. The cluster should be already in sync'
            )

    def switch_ckpt_faults_before_trigger(self, cluster_state, fault_type):
        '''
        @param cluster_state : sync/change_tracking/resync
        @param fault_type : dtm_broadcast_prepare/dtm_broadcast_commit_prepared/dtm_xlog_distributed_commit
        '''
        if cluster_state in ('change_tracking', 'resync'):
            self.invoke_fault('filerep_consumer', 'fault')
            self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Change_tracking transition complete')

        if cluster_state == 'resync':
            self.invoke_fault('filerep_resync', 'suspend', role='primary')
            rc = self.gprecover.incremental()
            if not rc:
                raise Exception('Gprecvoerseg failed')
            tinctest.logger.info('Cluster in resync state')
        self.inject_fault(fault_type)

    def switch_ckpt_switch_xlog(self):
        '''
        @description: pg_switch_xlog on segments
        '''
        sql_cmd = 'select * from pg_switch_xlog();'
        num_primary = self.config.get_countprimarysegments()
        for i in range(num_primary):
            (host,
             port) = self.config.get_hostandport_of_segment(psegmentNumber=i)
            PSQL.run_sql_command_utility_mode(sql_cmd, host=host, port=port)

    def switch_checkpoint_loop(self, fault_type):
        '''     
        @description: Run switch_xlog and checkpoint based on the fault_type
        '''
        if fault_type == 'dtm_xlog_distributed_commit':
            self.switch_ckpt_switch_xlog()
        else:
            for i in range(5):
                self.switch_ckpt_switch_xlog()

    def switch_ckpt_crash_and_recover(self,
                                      crash_type,
                                      fault_type,
                                      test_dir,
                                      cluster_state='sync',
                                      checkpoint='noskip'):
        '''
        @param crash_type : gpstop_i/gpstop_a/failover_to_mirror/failover_to_primary
        @param fault_type : dtm_broadcast_prepare/dtm_broadcast_commit_prepared/dtm_xlog_distributed_commit
        @param test_dir : dir of the trigger_sqls
        '''
        trigger_status = self.check_trigger_sql_hang(test_dir, fault_type)
        tinctest.logger.info('trigger_status %s' % trigger_status)

        if trigger_status == True:
            if cluster_state == 'resync':
                self.filereputil.inject_fault(f='filerep_resync',
                                              y='resume',
                                              r='primary')
                sleep(30)  #Give a little time before crash.
            self.crash_and_recover(crash_type, fault_type, checkpoint,
                                   cluster_state)
        else:
            tinctest.logger.info('The fault_status is not triggered')

    def cleanup_dangling_processes(self):
        '''
        @description: Since the test suspend transactions at different stages and does immediate shutdown, 
        few processes will not be cleaned up and eventually will eat up on the system resources
        This methods takes care of killing them at the end of each test, if such processes exists
        '''

        num_primary = self.config.get_countprimarysegments()
        for i in range(num_primary):
            (host,
             port) = self.config.get_hostandport_of_segment(psegmentNumber=i)
            grep_cmd = "ps -ef|grep %s|grep 'Distributed'" % port
            cmd = Command('Check for dangling process',
                          cmdStr='gpssh -h %s -e "%s" ' % (host, grep_cmd))
            cmd.run()
            result = cmd.get_results()
            if len(result.stdout.splitlines()) > 2:
                grep_and_kill_cmd = "ps -ef|grep %s|grep 'Distributed'|awk '{print \$2}'|xargs kill -9" % port
                cmd = Command('Kill dangling processes',
                              cmdStr='gpssh -h %s -e "%s" ' %
                              (host, grep_and_kill_cmd))
                cmd.run()
                tinctest.logger.info('Killing the dangling processes')
Пример #52
0
 def wait_till_insync(self):
     gprecover = GpRecover(GPDBConfig())
     gprecover.wait_till_insync_transition()
Пример #53
0
class AOCOAlterColumn(MPPTestCase):
    
    def __init__(self):
        self.fileutil = Filerepe2e_Util()
        self.gprecover = GpRecover()
        self.config = GpConfig()
        self.base_dir = os.path.dirname(sys.modules[self.__class__.__module__].__file__)


    def get_sql_files(self, sql_file_name):
        sql_file = os.path.join( self.base_dir, "sql", sql_file_name + ".sql");    
        return  sql_file

    def validate_sql(self, ans_file, out_file):
        ''' Compare the out and ans files '''
        init_file=os.path.join( self.base_dir, "sql",'init_file')
        result1 = Gpdiff.are_files_equal(out_file, ans_file, match_sub =[init_file])
        self.assertTrue(result1 ,'Gpdiff.are_files_equal')        

    def run_sql(self, filename, out_file,background=False):
        ''' Run the provided sql and validate it '''
        out_file = local_path(filename.replace(".sql", ".out"))
        PSQL.run_sql_file(filename,out_file=out_file,background=background)


    def run_test_CatalogCheck(self, action,storage):
        file_name =action+'_'+storage
        sql_file = self.get_sql_files(file_name)
        out_file = self.base_dir+ "/sql/"+file_name+'.out'
        tinctest.logger.info( 'sql-file == %s \n' % sql_file)
        tinctest.logger.info( 'out-file == %s \n' % out_file)
        # Run Add/Drop Column script
        self.run_sql(sql_file, out_file=out_file)

    def validate_test_CatalogCheck(self, action,storage):
        file_name =action+'_'+storage
        out_file = self.base_dir+ "/sql/"+file_name+'.out'
        ans_file = self.base_dir+ "/expected/"+file_name+'.ans'
        tinctest.logger.info( 'out-file == %s \n' % out_file)
        tinctest.logger.info( 'ans-file == %s \n' % ans_file)
        # Validate Ans file
        self.validate_sql(ans_file,out_file)
        if storage == 'multisegfiles':
            ''' check if multi_segfile_tab file has  multiple segfiles per column '''
            tablename='multi_segfile_tab'
            relid = self.get_relid(file_name=tablename )
            utilitymodeinfo=self.get_utilitymode_conn_info( relid=relid)
            u_port=utilitymodeinfo[0]
            u_host=utilitymodeinfo[1]
            assert(1 < int(self.get_segment_cnt(relid=relid,host=u_host,port= u_port)))
        # Check Correctness of the catalog
        self.dbstate = DbStateClass('run_validation')
        outfile = local_path("gpcheckcat_"+datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d%H%M%S')+".out")
        self.dbstate.check_catalog(outputFile=outfile)

    def run_test_ChangeTracking(self,filename):
        # Log the segment state before starting the test
        # Expectation is a SYNC state
        self.log_segment_state()
        primary_dbid=self.get_dbid()
        # Run the 'alter table add column cmmand' in the background
        self.run_sql_ChangeTracking(filename,stage='fail',validate=False,background=True)
        # Inject Fault to put one primary in panic
        self.fileutil.inject_fault(f='postmaster', y='reset',  seg_id=primary_dbid)
        self.fileutil.inject_fault(f='postmaster', y='panic',  seg_id=primary_dbid)
        state=self.fileutil.check_fault_status(fault_name='postmaster', status='triggered')
        self.log_segment_state()
        # Recover the down segments
        self.recover_seg()
        self.log_segment_state()
        # Validate that the previous alter failed because primary segment went down as the alter was taking place
        self.run_sql_ChangeTracking(filename,stage='failvalidate',validate=True,background=False) 
        # Now the system is in change tracking so the next alter should pass
        self.run_sql_ChangeTracking(filename,stage='pass',validate=True,background=False) 
        self.log_segment_state()


    def recover_seg(self):
        result=self.get_segcount_state(state='d')
        if result > 0:
            if not self.gprecover.incremental():
                raise Exception('Gprecoverseg failed')
            if not self.gprecover.wait_till_insync_transition():
                raise Exception('Segments not in sync')
        tinctest.logger.info('Segments recovered and back in sync')
        

    def run_sql_ChangeTracking(self,filename,stage,validate=False,background=False):
        fname=filename+'-'+stage
        sql_file = self.get_sql_files(fname)
        out_file = self.base_dir+ "/sql/"+fname +'.out'
        ans_file = self.base_dir+ "/expected/"+fname+'.ans'
        tinctest.logger.info( '\n==============stage = %s ================' % (stage))
        tinctest.logger.info( sql_file)
        tinctest.logger.info( out_file)
        tinctest.logger.info( ans_file)
        tinctest.logger.info( '==============================')
        result=self.run_sql(sql_file,out_file=out_file,background=background)
        if validate == True:
           self.validate_sql(ans_file,out_file)

 
    def get_dbid(self):
        sql_cmd = "select min(dbid) dbid from gp_segment_configuration where role = 'p' and status = 'u' and content > -1"
        dbid=PSQL.run_sql_command(sql_cmd= sql_cmd,flags='-q -t')
        tinctest.logger.info('Segments %s chosen for fault injection' % (dbid))
        return dbid
     
    def log_segment_state(self):
        sql_cmd = "select * from gp_segment_configuration order by dbid"
        result=PSQL.run_sql_command(sql_cmd= sql_cmd)
        tinctest.logger.info('==========================')
        tinctest.logger.info('State of Segments ')
        tinctest.logger.info(result)
        tinctest.logger.info('==========================')

    def get_segcount_state(self,state):
        sql_cmd = "select count(*) from gp_segment_configuration where status = '%s'" % (state)
        result=PSQL.run_sql_command(sql_cmd= sql_cmd,flags='-q -t')
        tinctest.logger.info('Number of segments in %s State == %d' % (state,(int(result))))
        return int(result)

    def get_utilitymode_conn_info(self, relid=0):
        #get the segment_id where to log in utility mode and then get the hostname and port for this segment
        sql_cmd="select port, hostname from gp_segment_configuration sc  where dbid > 1 and role = 'p' limit 1;"
        utilitymodeinfo=PSQL.run_sql_command(sql_cmd=sql_cmd,  flags='-q -t')
        u_port=utilitymodeinfo.strip().split('|')[0]
        u_host=utilitymodeinfo.strip().split('|')[1]
        return [u_port,u_host]

    def get_relid(self,file_name=None):
        sql_cmd="SELECT oid FROM pg_class WHERE relname='%s';\n" % file_name
        relid= PSQL.run_sql_command(sql_cmd=sql_cmd,  flags='-q -t')
        return relid;

    def get_segment_cnt(self, relid=0,host=None,port=None):
        sql_cmd="select count(*) from gp_toolkit.__gp_aocsseg(%s) group by column_num having count(*) > 1 limit 1" % (relid)
        segcnt=PSQL.run_sql_command_utility_mode(sql_cmd=sql_cmd,host=host, port=port,flags='-q -t')
        if (len(segcnt.strip()) == 0):
            segcnt='0'
        return segcnt

    def run_test_utility_mode(self,filename):
        #alter_aoco_tab_utilitymode
        relid = self.get_relid(file_name=filename )
        utilitymodeinfo=self.get_utilitymode_conn_info( relid=relid)
        u_port=utilitymodeinfo[0]
        u_host=utilitymodeinfo[1]
        self.run_sql_utility_mode(filename,host=u_host,port=u_port)

    
    def run_sql_utility_mode(self,filename,host=None,port=None):
        fname=filename
        sql_file = self.get_sql_files(fname)
        out_file = self.base_dir+ "/sql/"+fname +'.out'
        ans_file = self.base_dir+ "/expected/"+fname+'.ans'
        tinctest.logger.info( '\n==============================')
        tinctest.logger.info( sql_file)
        tinctest.logger.info( out_file)
        tinctest.logger.info( ans_file)
        tinctest.logger.info( '==============================')
        result=PSQL.run_sql_file_utility_mode(sql_file,out_file=out_file,host=host, port=port)
        self.validate_sql(ans_file,out_file)
Пример #54
0
class GpRecoversegRegressionTests(unittest.TestCase):

    def setUp(self):
        self.gprec = GpRecover()
        self.gphome = os.environ.get('GPHOME')

    def failover(self, type = 'mirror'):
        if type == 'mirror':
            fault_str = 'source %s/greenplum_path.sh;gpfaultinjector -f filerep_consumer  -m async -y fault -r mirror -H ALL' % self.gphome
        else:
            fault_str = 'source %s/greenplum_path.sh;gpfaultinjector -f postmaster -m async -y panic -r primary -H ALL' % self.gphome
        return run_shell_command(fault_str, cmdname = 'Run fault injector to failover')
    
    def test_incr_gprecoverseg(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover()):
            self.assertTrue(self.gprec.incremental())

    def test_full_gprecoverseg(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover()):
            self.assertTrue(self.gprec.full())

    def test_gprecoverseg_rebalance(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover('primary')):
            PSQL.run_sql_file(local_path('mirror_failover_trigger.sql'))
            self.gprec.incremental()
            if (self.gprec.wait_till_insync_transition()):
                self.assertTrue(self.gprec.rebalance())
    
    def test_wait_till_insync(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover()):
            self.gprec.incremental()
            self.assertTrue(self.gprec.wait_till_insync_transition())
Пример #55
0
class PersistentTables(ScenarioTestCase):
    """
    
    @description Test Cases for Persistent Table testing QA-2417 - Crash RecoveryTest
    @created 2013-03-29 10:10:10
    @modified 2013-05-24 17:10:15
    @tags persistent tables schedule_persistent_tables 
    @product_version gpdb:
    """
    def __init__(self, methodName):
        super(PersistentTables, self).__init__(methodName)

    @classmethod
    def setUpClass(cls):
        super(PersistentTables, cls).setUpClass()
        tinctest.logger.info('Setup Database ...')
        setdb = Fault()
        setdb.create_db()

        tinctest.logger.info('Running the initial setup sql files')
        setup = InitialSetup()
        setup.createSQLFiles()
        setup.runSQLFiles()
        tinctest.logger.info(
            'Generating the load - sql files to be run concurrently')
        sqldatagen = GenerateSqls()
        sqldatagen.generate_sqls()

    # Replacing the setUp method with the following one, as setUp method is called twice redundantly
    def setUp(self):
        ''' Need to rebalance cluster as primary segments are killed during test'''
        super(PersistentTables, self).setUp()
        tinctest.logger.info('***Rebalancing cluster state***')
        fault_recovery = Fault()
        if not fault_recovery.rebalance_cluster():
            raise ClusterStateException(
                "**FATAL!! Cluster rebalancing failed - segments went down after \
                                       gprecoverseg -ar, even incremental recovery couldn't bring the segments up. \
                                       Cannot proceed with the tests!! ")

    ''' Global Persistent table Rebuild -Test Enchantment ParisTX-PT '''

    def rebuild_persistent_table_objects(self, type='master'):
        ''' Rebuild Persistent Object '''
        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.storage.persistent_tables.PTRebuild.persistent_rebuild_scenario.RebuildPersistentObjectsTest.test_rebuild_persistent_objects_%s'
            % type)
        self.test_case_scenario.append(test_case_list1)

        #Check the Sate of DB and Cluster
        test_case_list2 = []
        test_case_list2.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_catalog")
        self.test_case_scenario.append(test_case_list2)

        test_case_list3 = []
        test_case_list3.append(
            "mpp.gpdb.tests.storage.persistent_tables.test_PT_RebuildPT.PersistentTables.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list3)

        test_case_list4 = []
        test_case_list4.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list4)

    def wait_till_insync_transition(self):
        self.gpr = GpRecover()
        self.gpr.wait_till_insync_transition()

    def test_rebuild_persistent_objects_master(self):
        ''' Rebuild Persistent Object on Master '''
        self.rebuild_persistent_table_objects('master')

    def test_rebuild_persistent_objects_segment(self):
        ''' Rebuild Persistent Object on Segment '''
        self.rebuild_persistent_table_objects('segment')

    def test_abort_pt_rebuild(self):
        ''' Abort Persistent Object Rebuild '''
        test_case_list1 = []
        test_case_list1.append(
            'mpp.gpdb.tests.storage.persistent_tables.PTRebuild.persistent_rebuild_scenario.AbortRebuildPersistentObjectsTest.test_stop_db'
        )
        test_case_list1.append(
            'mpp.gpdb.tests.storage.persistent_tables.PTRebuild.persistent_rebuild_scenario.AbortRebuildPersistentObjectsTest.test_rebuild_persistent_objects'
        )
        self.test_case_scenario.append(test_case_list1)

        #Start Database
        test_case_list2 = []
        test_case_list2.append(
            'mpp.gpdb.tests.storage.persistent_tables.fault.fault.GPDBdbOps.gpstart_db'
        )
        self.test_case_scenario.append(test_case_list2)

        #Issue gpcheckcat
        test_case_list3 = []
        test_case_list3.append(
            'mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_catalog')
        self.test_case_scenario.append(test_case_list3)

        #Do recovery
        #gprecoverseg Incr
        test_case_list4 = []
        test_case_list4.append(
            'mpp.gpdb.tests.storage.persistent_tables.fault.fault.RecoveryTest.test_recovery'
        )
        self.test_case_scenario.append(test_case_list4)

        #Re-balance segments to rebuild PT
        test_case_list5 = []
        test_case_list5.append(
            'mpp.gpdb.tests.storage.persistent_tables.fault.fault.RecoveryTest.test_rebalance_segment'
        )
        self.test_case_scenario.append(test_case_list5)

        #Do PT rebuild
        test_case_list6 = []
        test_case_list6.append(
            'mpp.gpdb.tests.storage.persistent_tables.PTRebuild.persistent_rebuild_scenario.RebuildPersistentObjectsTest.test_rebuild_persistent_objects_segment'
        )
        self.test_case_scenario.append(test_case_list6)

        #Check the Sate of DB and Cluster
        test_case_list7 = []
        test_case_list7.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_catalog")
        self.test_case_scenario.append(test_case_list7)

        test_case_list8 = []
        test_case_list8.append(
            "mpp.gpdb.tests.storage.persistent_tables.test_PT_RebuildPT.PersistentTables.wait_till_insync_transition"
        )
        self.test_case_scenario.append(test_case_list8)

        test_case_list9 = []
        test_case_list9.append(
            "mpp.gpdb.tests.storage.lib.dbstate.DbStateClass.check_mirrorintegrity"
        )
        self.test_case_scenario.append(test_case_list9)
Пример #56
0
 def setUp(self):
     self.gprec = GpRecover()
     self.gphome = os.environ.get('GPHOME')
Пример #57
0
 def setUpClass(cls):
     super(mpp23395, cls).setUpClass()
     recoverseg = GpRecover()
     recoverseg.recover_rebalance_segs()
Пример #58
0
    def test_resync_ct_blocks_per_query(self):
        '''Catch a bug in resync that manifests only after rebalance.
        The logic used by a resync worker to obtain changed blocks
        from CT log had a bug.  The SQL query used to obtain a batch
        of changed blocks from CT log was incorrectly using LSN to
        filter out changed blocks.  All of the following must be true
        for the bug to occur:

         * More than gp_filerep_ct_batch_size blocks of a relation
           are changed on a segment in changetracking.

         * A block with a higher number is changed earlier (lower
           LSN) than lower numbered blocks.

         * The first batch of changed blocks obtained by resync worker
           from CT log for this relation contains only lower
           (according to block number) blocks.  The higher block with
           lower LSN is not included in this batch.  Another query
           must be run against CT log to obtain this block.

         * The SQL query used to obtain next batch of changed blocks
           for this relation contains incorrect WHERE clause involving
           a filter based on LSN of previously obtained blocks.  The
           higher numbered block is missed out - not returned by the
           query as changed block for the relation.  The block is
           never shipped from primary to mirror, resulting in data
           loss.  The test aims to verify that this doesn't happen as
           the bug is now fixed.
        '''
        config = GPDBConfig()
        assert (config.is_not_insync_segments()
                & config.is_balanced_segments()
                ), 'cluster not in-sync and balanced'

        # Create table and insert data so that adequate number of
        # blocks are occupied.
        self.run_sql('resync_bug_setup')
        # Bring down primaries and transition mirrors to
        # changetracking.
        filerep = Filerepe2e_Util()
        filerep.inject_fault(y='fault',
                             f='segment_probe_response',
                             r='primary')
        # Trigger the fault by running a sql file.
        PSQL.run_sql_file(local_path('test_ddl.sql'))
        filerep.wait_till_change_tracking_transition()

        # Set gp_filerep_ct_batch_size = 3.
        cmd = Command('reduce resync batch size',
                      'gpconfig -c gp_filerep_ct_batch_size -v 3')
        cmd.run()
        assert cmd.get_results().rc == 0, 'gpconfig failed'
        cmd = Command('load updated config', 'gpstop -au')
        cmd.run()
        assert cmd.get_results().rc == 0, '"gpstop -au" failed'

        self.run_sql('change_blocks_in_ct')

        # Capture change tracking log contents from the segment of
        # interest for debugging, in case the test fails.
        (host, port) = GPDBConfig().get_hostandport_of_segment(0, 'p')
        assert PSQL.run_sql_file_utility_mode(
            sql_file=local_path('sql/ct_log_contents.sql'),
            out_file=local_path('output/ct_log_contents.out'),
            host=host,
            port=port), sql_file

        gprecover = GpRecover(GPDBConfig())
        gprecover.incremental(False)
        gprecover.wait_till_insync_transition()

        # Rebalance, so that original primary is back in the role
        gprecover = GpRecover(GPDBConfig())
        gprecover.rebalance()
        gprecover.wait_till_insync_transition()

        # Reset gp_filerep_ct_batch_size
        cmd = Command('reset resync batch size',
                      'gpconfig -r gp_filerep_ct_batch_size')
        cmd.run()
        assert cmd.get_results().rc == 0, 'gpconfig failed'
        cmd = Command('load updated config', 'gpstop -au')
        cmd.run()
        assert cmd.get_results().rc == 0, '"gpstop -au" failed'

        self.run_sql('select_after_rebalance')