示例#1
0
    def setUpClass(cls):
        # we need an empty db to run the tests
        tinctest.logger.info("recreate database wet using dropdb/createdb")
        cmd = Command('recreatedb', 'dropdb wet; createdb wet')
        cmd.run(validateAfter=False)

        cls.drop_roles()

        super(LegacyWETTestCase, cls).setUpClass()

        source_dir = cls.get_source_dir()
        config = GPDBConfig()
        host, _ = config.get_hostandport_of_segment(0)
        port = mppUtil.getOpenPort(8080)
        tinctest.logger.info("gpfdist host = {0}, port = {1}".format(host, port))

        cls.config = config

        data_dir = os.path.join(source_dir, 'data')
        cls.gpfdist = GPFDIST(port, host, directory=data_dir)
        cls.gpfdist.startGpfdist()

        # WET writes into this directory.
        data_out_dir = os.path.join(cls.gpfdist.getdir(), 'output')
        shutil.rmtree(data_out_dir, ignore_errors=True)
        os.mkdir(data_out_dir)
示例#2
0
 def get_substitutions(self):
     subst = {}
     config = GPDBConfig()
     host, _ = config.get_hostandport_of_segment(0)
     subst['@host@'] = 'rh55-qavm44'
     subst['@script@'] = os.path.join(self.get_sql_dir(), 'datagen.py')
     return subst
 def get_substitutions(self):
     subst = {}
     config = GPDBConfig()
     host, _ = config.get_hostandport_of_segment(0)
     subst['@host@'] = 'rh55-qavm44'
     subst['@script@'] = os.path.join(self.get_sql_dir(), 'datagen.py')
     return subst
示例#4
0
    def test_insert_commit_before_truncate(self):
        '''
        @description We suspend the vacuum on master after the first
                     transaction, and connect to segment.  Modify the
                     relation in vacuum and commit the segment local
                     transaction before the truncate transaction starts.
        '''
        fault_name = 'vacuum_relation_end_of_first_round'

        gpdbconfig = GPDBConfig()
        seghost, segport = gpdbconfig.get_hostandport_of_segment(0, 'p')
        filereputil = Filerepe2e_Util()
        filereputil.inject_fault(f=fault_name, y='suspend', seg_id='1')

        # run vacuum in background, it'll be blocked.
        sql_file1, ans_file1, out_file1 = self.get_file_names('conn1')
        psql1 = PSQL(sql_file=sql_file1, out_file=out_file1)
        thread1 = threading.Thread(target=self.run_psql, args=(psql1,))
        thread1.start()

        self.check_fault_triggered(fault_name)

        sql_file2, ans_file2, out_file2 = self.get_file_names('conn2')
        # utility to seg0
        psql2 = PSQL(sql_file=sql_file2, out_file=out_file2,
                     host=seghost, port=segport,
                     PGOPTIONS='-c gp_session_role=utility')
        self.run_psql(psql2)

        # resume vacuum
        filereputil.inject_fault(f=fault_name, y='reset', seg_id='1')
        thread1.join()
        self.assertTrue(Gpdiff.are_files_equal(out_file1, ans_file1))
        self.assertTrue(Gpdiff.are_files_equal(out_file2, ans_file2))
示例#5
0
    def test_insert_unlock_before_truncate(self):
        '''
        @description This is rather complicated.  We suspend the vacuum on
                     master after the first transaction, and connect to
                     segment, modify the relation in question, and release the
                     lock, keep the transaction.  To release the lock, we need
                     a special UDF.  Vacuum is supposed to skip truncate if it
                     sees such in-progress transaction.  Usually this should
                     not happen, but it rather simulates catalog DDL.
        '''
        fault_name = 'vacuum_relation_end_of_first_round'

        gpdbconfig = GPDBConfig()
        seghost, segport = gpdbconfig.get_hostandport_of_segment(0, 'p')
        filereputil = Filerepe2e_Util()
        filereputil.inject_fault(f=fault_name, y='suspend', seg_id='1')

        PSQL.run_sql_command(
            sql_cmd=
            'drop table if exists sync_table; create table sync_table(a int)')
        # Use pygresql to keep the connection and issue commands seprately.
        # thread2 will wait on sync_table before finish its work, so we
        # can keep the transaction open until the vacuum completes its work.
        conn = pygresql.pg.connect(host=seghost,
                                   port=int(segport),
                                   opt='-c gp_session_role=utility')
        conn.query('begin')
        conn.query('lock sync_table in access exclusive mode')

        # run vacuum background, it'll be blocked.
        sql_file1, ans_file1, out_file1 = self.get_file_names('conn1')
        psql1 = PSQL(sql_file=sql_file1, out_file=out_file1)
        thread1 = threading.Thread(target=self.run_psql, args=(psql1, ))
        thread1.start()

        self.check_fault_triggered(fault_name)

        sql_file2, ans_file2, out_file2 = self.get_file_names('conn2')
        # utility to seg0
        psql2 = PSQL(sql_file=sql_file2,
                     out_file=out_file2,
                     host=seghost,
                     port=segport,
                     PGOPTIONS='-c gp_session_role=utility')
        thread2 = threading.Thread(target=self.run_psql, args=(psql2, ))
        thread2.start()

        # resume vacuum
        filereputil.inject_fault(f=fault_name, y='reset', seg_id='1')

        # Once thread1 finishes, we can now release the lock on sync_table,
        # so that thread2 can proceed.
        thread1.join()
        conn.query('commit')
        thread2.join()

        self.assertTrue(Gpdiff.are_files_equal(out_file1, ans_file1))
        self.assertTrue(Gpdiff.are_files_equal(out_file2, ans_file2))
示例#6
0
 def get_substitutions(self):
     """
     Returns sustitution variables.
     """
     config = GPDBConfig()
     host, _ = config.get_hostandport_of_segment(0)
     variables = {
             'HOST': host, 
             }
     return variables
示例#7
0
 def get_substitutions(self):
     """
     Returns sustitution variables.
     """
     config = GPDBConfig()
     host, _ = config.get_hostandport_of_segment(0)
     variables = {
             'HOST': host, 
             }
     return variables
示例#8
0
 def kill_first_mirror(self):
     mirror_data_loc = self.get_default_fs_loc(role='m',content=0)
     gpconfig = GPDBConfig()
     (host, port) = gpconfig.get_hostandport_of_segment(psegmentNumber = 0, pRole = 'm')    
     cmdString = 'ps -ef|grep -v grep|grep \'%s\'|awk \'{print $2}\'|xargs kill -9'%mirror_data_loc
     remote = Command(name ='kill first mirror', cmdStr = cmdString, ctxt=2, remoteHost=host)
     remote.run() 
     tinctest.logger.info('run command %s'%cmdString)
     rc = remote.get_results().rc    
     result = remote.get_results().stdout
     tinctest.logger.info('Command returning, rc: %s, result: %s'%(rc,result))
示例#9
0
 def test_pg_aocsseg_corruption(self):
     self.create_appendonly_tables(row=False)
     config = GPDBConfig()
     host, port = config.get_hostandport_of_segment() 
     self.transform_sql_file(os.path.join(self.sql_dir, 'corrupt_pg_aocsseg.sql.t'), 'co1')
     out_file = os.path.join(self.output_dir, 'corrupt_pg_aocsseg.out')
     ans_file = os.path.join(self.ans_dir, 'corrupt_pg_aocsseg.ans')
     sql_file = os.path.join(self.sql_dir, 'corrupt_pg_aocsseg.sql')
     PSQL.run_sql_file_utility_mode(sql_file, out_file=out_file, host=host,
                                    port=port, dbname=os.environ['PGDATABASE'])
     if not Gpdiff.are_files_equal(out_file, ans_file, match_sub=[local_path('sql/init_file')]):
         raise Exception('Corruption test of pg_aocsseg failed for appendonly tables !')
示例#10
0
    def test_insert_unlock_before_truncate(self):
        '''
        @description This is rather complicated.  We suspend the vacuum on
                     master after the first transaction, and connect to
                     segment, modify the relation in question, and release the
                     lock, keep the transaction.  To release the lock, we need
                     a special UDF.  Vacuum is supposed to skip truncate if it
                     sees such in-progress transaction.  Usually this should
                     not happen, but it rather simulates catalog DDL.
        '''
        fault_name = 'vacuum_relation_end_of_first_round'

        gpdbconfig = GPDBConfig()
        seghost, segport = gpdbconfig.get_hostandport_of_segment(0, 'p')
        filereputil = Filerepe2e_Util()
        filereputil.inject_fault(f=fault_name, y='suspend', seg_id='1')

        PSQL.run_sql_command(sql_cmd='drop table if exists sync_table; create table sync_table(a int)')
        # Use pygresql to keep the connection and issue commands seprately.
        # thread2 will wait on sync_table before finish its work, so we
        # can keep the transaction open until the vacuum completes its work.
        conn = pygresql.pg.connect(host=seghost, port=int(segport), opt='-c gp_session_role=utility')
        conn.query('begin')
        conn.query('lock sync_table in access exclusive mode')

        # run vacuum background, it'll be blocked.
        sql_file1, ans_file1, out_file1 = self.get_file_names('conn1')
        psql1 = PSQL(sql_file=sql_file1, out_file=out_file1)
        thread1 = threading.Thread(target=self.run_psql, args=(psql1,))
        thread1.start()

        self.check_fault_triggered(fault_name)

        sql_file2, ans_file2, out_file2 = self.get_file_names('conn2')
        # utility to seg0
        psql2 = PSQL(sql_file=sql_file2, out_file=out_file2,
                     host=seghost, port=segport,
                     PGOPTIONS='-c gp_session_role=utility')
        thread2 = threading.Thread(target=self.run_psql, args=(psql2,))
        thread2.start()

        # resume vacuum
        filereputil.inject_fault(f=fault_name, y='reset', seg_id='1')

        # Once thread1 finishes, we can now release the lock on sync_table,
        # so that thread2 can proceed.
        thread1.join()
        conn.query('commit')
        thread2.join()

        self.assertTrue(Gpdiff.are_files_equal(out_file1, ans_file1))
        self.assertTrue(Gpdiff.are_files_equal(out_file2, ans_file2))
示例#11
0
文件: test_runsqls.py 项目: 50wu/gpdb
    def setUpClass(cls):
        super(other_tests, cls).setUpClass()
        source_dir = cls.get_source_dir()
        config = GPDBConfig()
        host, _ = config.get_hostandport_of_segment(0)
        port = mppUtil.getOpenPort(8080)
        tinctest.logger.info("gpfdist host = {0}, port = {1}".format(host, port))

        data_dir = os.path.join(source_dir, 'data')
        cls.gpfdist = GPFDIST(port, host, directory=data_dir)
        cls.gpfdist.startGpfdist()

        data_out_dir = os.path.join(data_dir, 'output')
        shutil.rmtree(data_out_dir, ignore_errors=True)
        os.mkdir(data_out_dir)
示例#12
0
    def setUpClass(cls):
        super(other_tests, cls).setUpClass()
        source_dir = cls.get_source_dir()
        config = GPDBConfig()
        host, _ = config.get_hostandport_of_segment(0)
        port = mppUtil.getOpenPort(8080)
        tinctest.logger.info("gpfdist host = {0}, port = {1}".format(
            host, port))

        data_dir = os.path.join(source_dir, 'data')
        cls.gpfdist = GPFDIST(port, host, directory=data_dir)
        cls.gpfdist.startGpfdist()

        data_out_dir = os.path.join(data_dir, 'output')
        shutil.rmtree(data_out_dir, ignore_errors=True)
        os.mkdir(data_out_dir)
示例#13
0
 def get_host_port_mapping(self, role):
     """ 
     Returns a dictionary having key as hostname and value as a list of port nos.
     For e.g {'vm9':['22001','22000'] , 'vm10':{'42000','42001'}...}
     """
     config = GPDBConfig()
     no_of_segments = config.get_countprimarysegments()
     hosts_dict = {}
     counter = 0
     while counter < no_of_segments:
         (host, port) = config.get_hostandport_of_segment(counter, role)
         if hosts_dict.has_key(host):
             hosts_dict[host].append(port)
         else:
             hosts_dict[host] = list()
             hosts_dict[host].append(port)
         counter += 1
     return hosts_dict
示例#14
0
文件: genFault.py 项目: LJoNe/gpdb
 def get_host_port_mapping(self,role):
     """ 
     Returns a dictionary having key as hostname and value as a list of port nos.
     For e.g {'vm9':['22001','22000'] , 'vm10':{'42000','42001'}...}
     """        
     config = GPDBConfig()
     no_of_segments = config.get_countprimarysegments()
     hosts_dict = {}
     counter = 0
     while counter < no_of_segments:
         (host,port) = config.get_hostandport_of_segment(counter,role)
         if hosts_dict.has_key(host):
             hosts_dict[host].append(port)
         else:
             hosts_dict[host] = list()
             hosts_dict[host].append(port)
         counter += 1
     return hosts_dict
示例#15
0
 def test_pg_aocsseg_corruption(self):
     self.create_appendonly_tables(row=False)
     config = GPDBConfig()
     host, port = config.get_hostandport_of_segment()
     self.transform_sql_file(
         os.path.join(self.sql_dir, 'corrupt_pg_aocsseg.sql.t'), 'co1')
     out_file = os.path.join(self.output_dir, 'corrupt_pg_aocsseg.out')
     ans_file = os.path.join(self.ans_dir, 'corrupt_pg_aocsseg.ans')
     sql_file = os.path.join(self.sql_dir, 'corrupt_pg_aocsseg.sql')
     PSQL.run_sql_file_utility_mode(sql_file,
                                    out_file=out_file,
                                    host=host,
                                    port=port,
                                    dbname=os.environ['PGDATABASE'])
     if not Gpdiff.are_files_equal(
             out_file, ans_file, match_sub=[local_path('sql/init_file')]):
         raise Exception(
             'Corruption test of pg_aocsseg failed for appendonly tables !')
示例#16
0
    def setUpClass(cls):
        super(LegacyRETTestCase, cls).setUpClass()

        cls.split_tbl()

        source_dir = cls.get_source_dir()
        config = GPDBConfig()
        host, _ = config.get_hostandport_of_segment(0)
        port = mppUtil.getOpenPort(8080)
        tinctest.logger.info("gpfdist host = {0}, port = {1}".format(host, port))

        data_dir = os.path.join(source_dir, 'data')
        cls.gpfdist = GPFDIST(port, host, directory=data_dir)
        cls.gpfdist.startGpfdist()

        # Some test writes data into disk temporarily.
        data_out_dir = os.path.join(data_dir, 'output')
        shutil.rmtree(data_out_dir, ignore_errors=True)
        os.mkdir(data_out_dir)
示例#17
0
    def setUpClass(cls):
        super(LegacyRETTestCase, cls).setUpClass()

        cls.split_tbl()

        source_dir = cls.get_source_dir()
        config = GPDBConfig()
        host, _ = config.get_hostandport_of_segment(0)
        port = mppUtil.getOpenPort(8080)
        tinctest.logger.info("gpfdist host = {0}, port = {1}".format(
            host, port))

        data_dir = os.path.join(source_dir, 'data')
        cls.gpfdist = GPFDIST(port, host, directory=data_dir)
        cls.gpfdist.startGpfdist()

        # Some test writes data into disk temporarily.
        data_out_dir = os.path.join(data_dir, 'output')
        shutil.rmtree(data_out_dir, ignore_errors=True)
        os.mkdir(data_out_dir)
示例#18
0
class GPDBConfigRegressionTests(unittest.TestCase):
    def __init__(self, methodName):
        self.gpconfig = GPDBConfig()
        super(GPDBConfigRegressionTests, self).__init__(methodName)

    def test_get_countprimarysegments(self):
        nprimary = self.gpconfig.get_countprimarysegments()
        self.assertTrue(nprimary > 0)

    def test_get_hostandport_of_segment(self):
        (host,
         port) = self.gpconfig.get_hostandport_of_segment(psegmentNumber=-1,
                                                          pRole='p')
        myhost = socket.gethostname()
        self.assertEquals(host, myhost)

    def test_get_count_segments(self):
        seg_count = self.gpconfig.get_count_segments()
        self.assertTrue(seg_count.strip() > 0)

    def test_seghostnames(self):
        hostlist = self.gpconfig.get_hosts()
        self.assertTrue(len(hostlist) > 0)

    def test_hostnames(self):
        hostlist = self.gpconfig.get_hosts(segments=False)
        self.assertTrue(len(hostlist) > 0)

    def tes_get_masterhost(self):
        master_host = self.gpconfig.get_masterhost()
        myhost = socket.gethostname()
        self.assertEquals(master_host, myhost)

    def test_get_masterdata_directory(self):
        master_dd = self.gpconfig.get_masterdata_directory()
        my_mdd = os.getenv("MASTER_DATA_DIRECTORY")
        self.assertEquals(master_dd, my_mdd)
示例#19
0
class GPDBConfigRegressionTests(unittest.TestCase):

    def __init__(self, methodName):
        self.gpconfig = GPDBConfig()
        super(GPDBConfigRegressionTests,self).__init__(methodName)

    def test_get_countprimarysegments(self):
        nprimary = self.gpconfig.get_countprimarysegments()
        self.assertTrue(nprimary > 0)

    def test_get_hostandport_of_segment(self):
        (host,port) = self.gpconfig.get_hostandport_of_segment(psegmentNumber = -1, pRole = 'p')
        myhost = socket.gethostname()
        self.assertEquals(host, myhost)

    def test_get_count_segments(self):
        seg_count = self.gpconfig.get_count_segments()
        self.assertTrue(seg_count.strip() >0)

    def test_seghostnames(self):
        hostlist = self.gpconfig.get_hosts()
        self.assertTrue(len(hostlist) >0)
 
    def test_hostnames(self):
        hostlist = self.gpconfig.get_hosts(segments=False)
        self.assertTrue(len(hostlist) >0)

    def tes_get_masterhost(self):
        master_host = self.gpconfig.get_masterhost()
        myhost = socket.gethostname()
        self.assertEquals(master_host, myhost)

    def test_get_masterdata_directory(self):
        master_dd = self.gpconfig.get_masterdata_directory()
        my_mdd = os.getenv("MASTER_DATA_DIRECTORY")
        self.assertEquals(master_dd, my_mdd)
示例#20
0
class GpCheckcatTests(MPPTestCase):
    """
    @description gpcheckcat test suite
    @tags gpcheckcat
    @product_version gpdb: [4.3.5.1 -]
    """

    def __init__(self, methodName):
        self.master_port = os.environ.get('PGPORT', '5432')
        super(GpCheckcatTests, self).__init__(methodName)

    def setUp(self):
        super(GpCheckcatTests, self).setUp()
        self.config = GPDBConfig()
        self.gpcheckcat_test_dir = local_path('gpcheckcat_dir')
        if not os.path.exists(self.gpcheckcat_test_dir):
            os.makedirs(self.gpcheckcat_test_dir, 0777)
        else:
            os.chmod(self.gpcheckcat_test_dir, 0777)

    def tearDown(self):
        super(GpCheckcatTests, self).tearDown()

    def test_error(self):
        """
        Test for errors during the generation of verify file
        """
        dbname = 'test_error'
        PSQL.run_sql_command('DROP DATABASE IF EXISTS %s' % dbname)
        stdout = PSQL.run_sql_command('CREATE DATABASE %s' % dbname)
        if not stdout.endswith('CREATE DATABASE\n'):
            self.fail('failed to create database: %s' % stdout)

        # Remove old verify files before runing the test.
        if not run_shell_command('rm -f %s/gpcheckcat.verify.%s.*' %
                                 (self.gpcheckcat_test_dir, dbname)):
            self.fail('failed to remove old verify files')

        sql_file = local_path('sql/create_tables.sql')
        if not PSQL.run_sql_file(sql_file, dbname=dbname,
                                 output_to_file=False):
            self.fail('failed to create tables')

        host, port = self.config.get_hostandport_of_segment()
        sql_file = local_path('sql/catalog_corruption.sql')
        if not PSQL.run_sql_file_utility_mode(
                sql_file, dbname=dbname, host=host, port=port,
                output_to_file=False):
            self.fail('failed to introduce catalog corruption')

        os.chmod(self.gpcheckcat_test_dir, 0555)
        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command(
            "cd %s && $GPHOME/bin/lib/gpcheckcat -p %s %s" %
            (self.gpcheckcat_test_dir, self.master_port, dbname),
            results=res)
        self.assertEqual(3, res['rc'])
        for f in os.listdir(self.gpcheckcat_test_dir):
            if fnmatch.fnmatch(f, 'gpcheckcat.verify.%s.*' % dbname):
                self.fail('found verify file when not expecting it')

    def test_no_corruption(self):
        """
        Test that gpcheckcat does not report any errors and it does
        not generate the verify file if the gpcheckcat test succeeds.
        We choose missing_extraneous test for this purpose.

        """
        dbname = 'test_no_corruption'
        PSQL.run_sql_command('DROP DATABASE IF EXISTS %s' % dbname)
        stdout = PSQL.run_sql_command('CREATE DATABASE %s' % dbname)
        if not stdout.endswith('CREATE DATABASE\n'):
            self.fail('failed to create database: %s' % stdout)

        sql_file = local_path('sql/create_tables.sql')
        if not PSQL.run_sql_file(sql_file, dbname=dbname,
                                 output_to_file=False):
            self.fail('failed to create tables')

        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command(
            "cd %s && $GPHOME/bin/lib/gpcheckcat -p %s -R missing_extraneous %s" %
            (self.gpcheckcat_test_dir, self.master_port, dbname),
            results=res)
        self.assertEqual(0, res['rc'])
        for f in os.listdir(self.gpcheckcat_test_dir):
            if fnmatch.fnmatch(f, 'gpcheckcat.verify.%s.*' % dbname):
                self.fail('found verify file when not expecting it')

    def test_singledb_corruption(self):
        """
        Test that gpcheckcat reports errors and it generates
        the verify file
        """
        dbname = 'test_singledb_corruption'
        PSQL.run_sql_command('DROP DATABASE IF EXISTS %s' % dbname)
        stdout = PSQL.run_sql_command('CREATE DATABASE %s' % dbname)
        if not stdout.endswith('CREATE DATABASE\n'):
            self.fail('failed to create database: %s' % stdout)

        sql_file = local_path('sql/create_tables.sql')
        if not PSQL.run_sql_file(sql_file, dbname=dbname,
                                 output_to_file=False):
            self.fail('failed to create tables')

        host, port = self.config.get_hostandport_of_segment()
        sql_file = local_path('sql/catalog_corruption.sql')
        if not PSQL.run_sql_file_utility_mode(
                sql_file, dbname=dbname, host=host, port=port,
                output_to_file=False):
            self.fail('failed to introduce catalog corruption')

        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command(
            "cd %s && $GPHOME/bin/lib/gpcheckcat -p %s %s" %
            (self.gpcheckcat_test_dir, self.master_port, dbname),
            results=res)

        self.assertEqual(3, res['rc'])
        found = False
        for f in os.listdir(self.gpcheckcat_test_dir):
            if fnmatch.fnmatch(f, 'gpcheckcat.verify.%s.*' % dbname):
                found = True
        self.assertTrue(found)

        verify_file_pat = 'gpcheckcat.verify.%s.*' % dbname
        mtime = lambda f: os.stat(
            os.path.join(self.gpcheckcat_test_dir, f)).st_mtime
        fname = list(sorted(
                        fnmatch.filter(
                            os.listdir(self.gpcheckcat_test_dir),
                            verify_file_pat),
                        key=mtime))[-1]
        if not PSQL.run_sql_file(os.path.join(self.gpcheckcat_test_dir, fname), output_to_file=False):
            self.fail('failed to run verify file for database %s' % dbname)

    def test_multidb_corruption(self):
        """
        Test that gpcheckcat reports errors and it generates
        the verify file
        """
        dbname1 = 'test_multidb_corruption1'
        dbname2 = 'test_multidb_corruption2'
        PSQL.run_sql_command('DROP DATABASE IF EXISTS %s' % dbname1)
        stdout = PSQL.run_sql_command('CREATE DATABASE %s' % dbname1)
        if not stdout.endswith('CREATE DATABASE\n'):
            self.fail('failed to create database: %s' % stdout)
        PSQL.run_sql_command('DROP DATABASE IF EXISTS %s' % dbname2)
        stdout = PSQL.run_sql_command('CREATE DATABASE %s' % dbname2)
        if not stdout.endswith('CREATE DATABASE\n'):
            self.fail('failed to create database: %s' % stdout)

        sql_file = local_path('sql/create_tables.sql')
        if not PSQL.run_sql_file(sql_file, dbname=dbname1,
                                 output_to_file=False):
            self.fail('failed to create tables in database %s' % dbname1)
        if not PSQL.run_sql_file(sql_file, dbname=dbname2,
                                output_to_file=False):
            self.fail('failed to create tables in database %s' % dbname2)

        host, port = self.config.get_hostandport_of_segment()
        sql_file = local_path('sql/catalog_corruption.sql')
        if not PSQL.run_sql_file_utility_mode(
                sql_file, dbname=dbname1, host=host, port=port,
                output_to_file=False):
            self.fail('failed to introduce corruption in database %s' % dbname1)
        if not PSQL.run_sql_file_utility_mode(
                sql_file, dbname=dbname2, host=host, port=port,
                output_to_file=False):
            self.fail('failed to introduce corruption in database %s' % dbname2)

        res = {'rc': 0, 'stdout' : '', 'stderr': ''}
        run_shell_command("cd %s && $GPHOME/bin/lib/gpcheckcat -p %s %s" %
                          (self.gpcheckcat_test_dir, self.master_port, dbname1),
                          results=res)
        self.assertTrue(res['rc'] > 0)
        run_shell_command("cd %s && $GPHOME/bin/lib/gpcheckcat -p %s %s" %
                          (self.gpcheckcat_test_dir, self.master_port, dbname2),
                          results=res)
        self.assertTrue(res['rc'] > 0)

        found = False
        for f in os.listdir(self.gpcheckcat_test_dir):
            if fnmatch.fnmatch(f, 'gpcheckcat.verify.%s.*' % dbname1):
                found = True
        self.assertTrue(found)

        found = False
        for f in os.listdir(self.gpcheckcat_test_dir):
            if fnmatch.fnmatch(f, 'gpcheckcat.verify.%s.*' % dbname2):
                found = True
        self.assertTrue(found)

        mtime = lambda f: os.stat(
            os.path.join(self.gpcheckcat_test_dir, f)).st_mtime
        # Choose the most recent verify file with dbname1 in its name.
        verify_file_pat = 'gpcheckcat.verify.%s.*' % dbname1
        fname = list(
            sorted(
                fnmatch.filter(
                    os.listdir(self.gpcheckcat_test_dir),
                    verify_file_pat),
                key=mtime))[-1]

        # Ensure that the verify file can be run.  It is difficult to
        # assert the SQL output against an expected answer file
        # because the output mostly has OIDs.  We are therefore
        # skipping this level of assertion for now.
        if not PSQL.run_sql_file(os.path.join(self.gpcheckcat_test_dir, fname), output_to_file=False):
            self.fail('failed to run verify file for database %s' % dbname1)

        # Similarly for dbname2.
        verify_file_pat = 'gpcheckcat.verify.%s.*' % dbname2
        mtime = lambda f: os.stat(
            os.path.join(self.gpcheckcat_test_dir, f)).st_mtime
        fname = list(sorted(
                        fnmatch.filter(
                            os.listdir(self.gpcheckcat_test_dir),
                            verify_file_pat),
                        key=mtime))[-1]
        if not PSQL.run_sql_file(os.path.join(self.gpcheckcat_test_dir, fname), output_to_file=False):
            self.fail('failed to run verify file for database %s' % dbname2)
示例#21
0
    def initial_setup(self):
        keyword = 'rh55-qavm65'
        config = GPDBConfig()
        (seg_host,
         seg_port) = config.get_hostandport_of_segment(psegmentNumber=0,
                                                       pRole='p')
        cur_path = local_path('')
        dir1 = os.path.join(cur_path, 'dml', 'sql',
                            'insert_from_external.sql.in')
        dir2 = os.path.join(cur_path, 'dml', 'sql', 'insert_from_external.sql')
        dir3 = os.path.join(cur_path, 'dml', 'expected',
                            'insert_from_external.ans.in')
        dir4 = os.path.join(cur_path, 'dml', 'expected',
                            'insert_from_external.ans')

        f1 = open(dir1, 'r')
        f2 = open(dir2, 'w')
        f3 = open(dir3, 'r')
        f4 = open(dir4, 'w')
        for line in f1:
            f2.write(line.replace(keyword, seg_host))
        f1.close()
        f2.close()

        for line in f3:
            f4.write(line.replace(keyword, seg_host))
        f3.close()
        f4.close()

        dir5 = os.path.join(cur_path, 'dml', 'sql',
                            'insert_with_gpload.sql.in')
        dir6 = os.path.join(cur_path, 'dml', 'sql', 'insert_with_gpload.sql')
        yaml_path = local_path('dml/sql/config/gpl.yaml')
        f5 = open(dir5, 'r')
        f6 = open(dir6, 'w')
        for line in f5:
            f6.write(line.replace('gpl.yaml', yaml_path))
        f5.close()
        f6.close()

        dir7 = os.path.join(cur_path, 'dml', 'sql', 'config', 'gpl.yaml.in')
        dir8 = os.path.join(cur_path, 'dml', 'sql', 'config', 'gpl.yaml')
        f7 = open(dir7, 'r')
        f8 = open(dir8, 'w')
        for line in f7:
            if 'DATABASE' in line:
                f8.write(line.replace('tangp3', os.environ.get('PGDATABASE')))
            elif 'USER' in line:
                f8.write(line.replace('tangp3', os.environ.get('USER')))
            elif 'HOST' in line:
                f8.write(line.replace('rh55-qavm61', socket.gethostname()))
            elif 'PORT' in line and '5432' in line:
                f8.write(line.replace('5432', os.environ.get('PGPORT')))
            elif 'mydata' in line:
                f8.write(
                    line.replace('mydata',
                                 local_path('dml/sql/gpload/mydata')))
            else:
                f8.write(line)
        f7.close()
        f8.close()

        dir9 = os.path.join(cur_path, 'dml', 'expected',
                            'insert_with_gpload.ans.in')
        dir10 = os.path.join(cur_path, 'dml', 'expected',
                             'insert_with_gpload.ans')
        f9 = open(dir9, 'r')
        f10 = open(dir10, 'w')
        for line in f9:
            f10.write(line.replace('gpl.yaml', yaml_path))
        f9.close()
        f10.close()

        dir11 = os.path.join(cur_path, 'dml', 'sql',
                             'select_from_copy_table.sql.in')
        dir12 = os.path.join(cur_path, 'dml', 'sql',
                             'select_from_copy_table.sql')
        f11 = open(dir11, 'r')
        f12 = open(dir12, 'w')
        for line in f11:
            if 'tenk.data' in line:
                f12.write(
                    line.replace('tenk.data',
                                 local_path('dml/sql/_data/tenk.data')))
            else:
                f12.write(line)
        f11.close()
        f12.close()

        dir13 = os.path.join(cur_path, 'dml', 'expected',
                             'select_from_copy_table.ans.in')
        dir14 = os.path.join(cur_path, 'dml', 'expected',
                             'select_from_copy_table.ans')
        f13 = open(dir13, 'r')
        f14 = open(dir14, 'w')
        for line in f13:
            if 'tenk.data' in line:
                f14.write(
                    line.replace('tenk.data',
                                 local_path('dml/sql/_data/tenk.data')))
            else:
                f14.write(line)
        f13.close()
        f14.close()

        external_table = local_path('dml/sql/_data/quote.csv')
        clean_file = 'rm -rf /tmp/quote.csv'
        rmt_cmd = "gpssh -h %s -e '%s' " % (seg_host, clean_file)
        cmd = Command(name='Running a remote command', cmdStr=rmt_cmd)
        cmd.run(validateAfter=False)
        command = 'scp %s %s:/tmp' % (external_table, seg_host)
        cmd = Command(name='run %s' % command, cmdStr='%s' % command)
        try:
            cmd.run(validateAfter=True)
        except Exception, e:
            tinctest.logger.error("Error running command %s\n" % e)
示例#22
0
    def initial_setup(self):
        keyword = "rh55-qavm65"
        config = GPDBConfig()
        (seg_host, seg_port) = config.get_hostandport_of_segment(psegmentNumber=0, pRole="p")
        cur_path = local_path("")
        dir1 = os.path.join(cur_path, "dml", "sql", "insert_from_external.sql.in")
        dir2 = os.path.join(cur_path, "dml", "sql", "insert_from_external.sql")
        dir3 = os.path.join(cur_path, "dml", "expected", "insert_from_external.ans.in")
        dir4 = os.path.join(cur_path, "dml", "expected", "insert_from_external.ans")

        f1 = open(dir1, "r")
        f2 = open(dir2, "w")
        f3 = open(dir3, "r")
        f4 = open(dir4, "w")
        for line in f1:
            f2.write(line.replace(keyword, seg_host))
        f1.close()
        f2.close()

        for line in f3:
            f4.write(line.replace(keyword, seg_host))
        f3.close()
        f4.close()

        dir5 = os.path.join(cur_path, "dml", "sql", "insert_with_gpload.sql.in")
        dir6 = os.path.join(cur_path, "dml", "sql", "insert_with_gpload.sql")
        yaml_path = local_path("dml/sql/config/gpl.yaml")
        f5 = open(dir5, "r")
        f6 = open(dir6, "w")
        for line in f5:
            f6.write(line.replace("gpl.yaml", yaml_path))
        f5.close()
        f6.close()

        dir7 = os.path.join(cur_path, "dml", "sql", "config", "gpl.yaml.in")
        dir8 = os.path.join(cur_path, "dml", "sql", "config", "gpl.yaml")
        f7 = open(dir7, "r")
        f8 = open(dir8, "w")
        for line in f7:
            if "DATABASE" in line:
                f8.write(line.replace("tangp3", os.environ.get("PGDATABASE")))
            elif "USER" in line:
                f8.write(line.replace("tangp3", os.environ.get("USER")))
            elif "HOST" in line:
                f8.write(line.replace("rh55-qavm61", socket.gethostname()))
            elif "PORT" in line and "5432" in line:
                f8.write(line.replace("5432", os.environ.get("PGPORT")))
            elif "mydata" in line:
                f8.write(line.replace("mydata", local_path("dml/sql/gpload/mydata")))
            else:
                f8.write(line)
        f7.close()
        f8.close()

        dir9 = os.path.join(cur_path, "dml", "expected", "insert_with_gpload.ans.in")
        dir10 = os.path.join(cur_path, "dml", "expected", "insert_with_gpload.ans")
        f9 = open(dir9, "r")
        f10 = open(dir10, "w")
        for line in f9:
            f10.write(line.replace("gpl.yaml", yaml_path))
        f9.close()
        f10.close()

        dir11 = os.path.join(cur_path, "dml", "sql", "select_from_copy_table.sql.in")
        dir12 = os.path.join(cur_path, "dml", "sql", "select_from_copy_table.sql")
        f11 = open(dir11, "r")
        f12 = open(dir12, "w")
        for line in f11:
            if "tenk.data" in line:
                f12.write(line.replace("tenk.data", local_path("dml/sql/_data/tenk.data")))
            else:
                f12.write(line)
        f11.close()
        f12.close()

        dir13 = os.path.join(cur_path, "dml", "expected", "select_from_copy_table.ans.in")
        dir14 = os.path.join(cur_path, "dml", "expected", "select_from_copy_table.ans")
        f13 = open(dir13, "r")
        f14 = open(dir14, "w")
        for line in f13:
            if "tenk.data" in line:
                f14.write(line.replace("tenk.data", local_path("dml/sql/_data/tenk.data")))
            else:
                f14.write(line)
        f13.close()
        f14.close()

        external_table = local_path("dml/sql/_data/quote.csv")
        clean_file = "rm -rf /tmp/quote.csv"
        rmt_cmd = "gpssh -h %s -e '%s' " % (seg_host, clean_file)
        cmd = Command(name="Running a remote command", cmdStr=rmt_cmd)
        cmd.run(validateAfter=False)
        command = "scp %s %s:/tmp" % (external_table, seg_host)
        cmd = Command(name="run %s" % command, cmdStr="%s" % command)
        try:
            cmd.run(validateAfter=True)
        except Exception, e:
            tinctest.logger.error("Error running command %s\n" % e)
示例#23
0
class PgtwoPhaseClass(MPPTestCase):
    '''Helper class for pg_twophase supporting functions '''
    def __init__(self, methodName):
        self.filereputil = Filerepe2e_Util()
        self.config = GPDBConfig()
        self.gprecover = GpRecover(self.config)
        self.gpstop = GpStop()
        self.gpstart = GpStart()
        self.gpfile = Gpfilespace(self.config)
        self.gpverify = GpdbVerify(config=self.config)
        self.dbstate = DbStateClass('run_validation', self.config)
        self.port = os.getenv('PGPORT')
        super(PgtwoPhaseClass, self).__init__(methodName)

    def invoke_fault(self,
                     fault_name,
                     type,
                     role='mirror',
                     port=None,
                     occurence=None,
                     sleeptime=None,
                     seg_id=None):
        ''' Reset the fault and then issue the fault with the given type'''
        self.filereputil.inject_fault(f=fault_name,
                                      y='reset',
                                      r=role,
                                      p=port,
                                      o=occurence,
                                      sleeptime=sleeptime,
                                      seg_id=seg_id)
        self.filereputil.inject_fault(f=fault_name,
                                      y=type,
                                      r=role,
                                      p=port,
                                      o=occurence,
                                      sleeptime=sleeptime,
                                      seg_id=seg_id)
        tinctest.logger.info(
            'Successfully injected fault_name : %s fault_type : %s' %
            (fault_name, type))

    def inject_fault(self, fault_type):
        '''
        @param fault_type : type of fault to ne suspended
        '''
        if fault_type == 'end_prepare_two_phase_sleep':
            self.filereputil.inject_fault(f='end_prepare_two_phase_sleep',
                                          sleeptime='1000',
                                          y='sleep',
                                          r='primary',
                                          p=self.port)
            tinctest.logger.info(
                'Injected fault to sleep in end_prepare_two_phase')

        elif fault_type == 'abort':
            # In case of abort fault we need to include this error type fault also, to fake a situation where one of the segment is not responding back, which can make the master to trigger an abort transaction
            self.invoke_fault('transaction_abort_after_distributed_prepared',
                              'error',
                              port=self.port,
                              occurence='0',
                              seg_id='1')

            self.invoke_fault('twophase_transaction_abort_prepared',
                              'suspend',
                              role='primary',
                              port=self.port,
                              occurence='0')

        elif fault_type == 'commit':
            self.invoke_fault('twophase_transaction_commit_prepared',
                              'suspend',
                              role='primary',
                              port=self.port,
                              occurence='0')

        elif fault_type == 'dtm_broadcast_prepare':
            self.invoke_fault('dtm_broadcast_prepare',
                              'suspend',
                              seg_id='1',
                              port=self.port,
                              occurence='0')

        elif fault_type == 'dtm_broadcast_commit_prepared':
            self.invoke_fault('dtm_broadcast_commit_prepared',
                              'suspend',
                              seg_id='1',
                              port=self.port,
                              occurence='0')

        elif fault_type == 'dtm_xlog_distributed_commit':
            self.invoke_fault('dtm_xlog_distributed_commit',
                              'suspend',
                              seg_id='1',
                              port=self.port,
                              occurence='0')

    def resume_faults(self, fault_type, cluster_state='sync'):
        '''
        @param fault_type : commit/abort/end_prepare_two_phase_sleep/dtm_broadcast_prepare/dtm_broadcast_commit_prepared/dtm_xlog_distributed_commit
        @description : Resume the suspended faults 
        '''
        tinctest.logger.info('coming to resume faults with xact %s' %
                             fault_type)
        if fault_type == 'abort':
            self.filereputil.inject_fault(
                f='twophase_transaction_abort_prepared',
                y='resume',
                r='primary',
                p=self.port,
                o='0')
            if cluster_state != 'resync':
                self.filereputil.inject_fault(
                    f='transaction_abort_after_distributed_prepared',
                    y='reset',
                    p=self.port,
                    o='0',
                    seg_id='1')
        elif fault_type == 'commit':
            self.filereputil.inject_fault(
                f='twophase_transaction_commit_prepared',
                y='resume',
                r='primary',
                p=self.port,
                o='0')

        elif fault_type == 'dtm_broadcast_prepare':
            self.filereputil.inject_fault(f='dtm_broadcast_prepare',
                                          y='resume',
                                          seg_id='1',
                                          p=self.port,
                                          o='0')

        elif fault_type == 'dtm_broadcast_commit_prepared':
            tinctest.logger.info('coming to if dtm_broadcast_commit_prepared')
            self.filereputil.inject_fault(f='dtm_broadcast_commit_prepared',
                                          y='resume',
                                          seg_id='1',
                                          p=self.port,
                                          o='0')

        elif fault_type == 'dtm_xlog_distributed_commit':
            self.filereputil.inject_fault(f='dtm_xlog_distributed_commit',
                                          y='resume',
                                          seg_id='1',
                                          p=self.port,
                                          o='0')

        else:
            tinctest.logger.info('No faults to resume')
        tinctest.logger.info('Resumed the suspended transaction fault')

        #Wait till all the trigger_sqls are complete before returning
        sql_count = PSQL.run_sql_command(
            'select count(*) from pg_stat_activity;',
            flags='-q -t',
            dbname='postgres')
        while (sql_count.strip() != '1'):
            sleep(5)
            sql_count = PSQL.run_sql_command(
                'select count(*) from pg_stat_activity;',
                flags='-q -t',
                dbname='postgres')
            tinctest.logger.info('stat_activity count %s ' % sql_count)
        return

    def start_db(self):
        '''Gpstart '''
        rc = self.gpstart.run_gpstart_cmd()
        if not rc:
            raise Exception('Failed to start the cluster')
        tinctest.logger.info('Started the cluster successfully')

    def stop_db(self):
        ''' Gpstop and dont check for rc '''
        cmd = Command('Gpstop_a', 'gpstop -a')
        tinctest.logger.info('Executing command: gpstop -a')
        cmd.run()

    def crash_and_recover(self,
                          crash_type,
                          fault_type,
                          checkpoint='noskip',
                          cluster_state='sync'):
        '''
        @param crash_type : gpstop_i/gpstop_a/failover_to_primary/failover_to_mirror
        @note: when skip checkpoint is enabled, gpstop -a returns a non-rc return code and fails in the library. To workaround, using a local function
        '''
        if crash_type == 'gpstop_i':
            rc = self.gpstop.run_gpstop_cmd(immediate=True)
            if not rc:
                raise Exception('Failed to stop the cluster')
            tinctest.logger.info('Stopped cluster immediately')
            self.start_db()
        elif crash_type == 'gpstop_a':
            self.resume_faults(fault_type, cluster_state)
            if checkpoint == 'skip':
                self.stop_db()
            else:
                rc = self.gpstop.run_gpstop_cmd()
                if not rc:
                    raise Exception('Failed to stop the cluster')
            tinctest.logger.info('Smart stop completed')
            self.start_db()
        elif crash_type == 'failover_to_primary':
            self.invoke_fault('filerep_consumer', 'fault')
            self.resume_faults(fault_type, cluster_state)
            (rc, num) = self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Value of rc and num_down %s, %s, %s' %
                                 (rc, num, fault_type))

        elif crash_type == 'failover_to_mirror':
            self.invoke_fault('postmaster', 'panic', role='primary')
            if fault_type in ('dtm_broadcast_prepare',
                              'dtm_broadcast_commit_prepared',
                              'dtm_xlog_distributed_commit'):
                self.resume_faults(fault_type, cluster_state)
            (rc, num) = self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Value of rc and num_down %s, %s' % (rc, num))
            if fault_type == 'abort':
                self.filereputil.inject_fault(
                    f='transaction_abort_after_distributed_prepared',
                    y='reset',
                    p=self.port,
                    o='0',
                    seg_id='1')

        if cluster_state == 'resync':
            if not self.gprecover.wait_till_insync_transition():
                raise Exception('Segments not in sync')

    def get_trigger_status_old(self, trigger_count):
        '''Compare the pg_stat_activity count with the total number of trigger_sqls executed '''
        for i in range(1, 50):
            psql_count = PSQL.run_sql_command(
                'select count(*) from pg_stat_activity;',
                flags='-q -t',
                dbname='postgres')
        tinctest.logger.info('Count of trigger sqls %s' % psql_count)
        if int(psql_count.strip()) < trigger_count:
            tinctest.logger.info('coming to the if loop in get_trigger_status')
            return False
        return True

    def get_trigger_status(self, trigger_count, fault_type):
        if fault_type == None:
            return self.get_trigger_status_old(trigger_count)

        return self.filereputil.check_fault_status(fault_name=fault_type,
                                                   status="triggered",
                                                   seg_id='1',
                                                   num_times_hit=trigger_count)

    def check_trigger_sql_hang(self, test_dir, fault_type=None):
        '''
        @description : Return the status of the trigger sqls: whether they are waiting on the fault 
        Since gpfaultinjector has no way to check if all the sqls are triggered, we are using 
        a count(*) on pg_stat_activity and compare the total number of trigger_sqls
        '''
        trigger_count = 0
        for dir in test_dir.split(","):
            trigger_dir = local_path('%s/trigger_sql/sql/' % (dir))
            trigger_count += len(glob.glob1(trigger_dir, "*.sql"))
        tinctest.logger.info('Total number of sqls to trigger %d in %s' %
                             (trigger_count, test_dir))
        return self.get_trigger_status(trigger_count, fault_type)

    def run_faults_before_pre(self, cluster_state):
        '''
        @param cluster_state : sync/change_tracking/resync
        @description: 1. Cluster into change_tracking in case of resync/ change_tracking. 
        '''
        if cluster_state == 'resync':
            self.invoke_fault('filerep_consumer', 'fault')
            self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Change_tracking transition complete')

    def run_faults_before_trigger(self, checkpoint, cluster_state, fault_type):
        '''
        @param checkpoint : skip/noskip
        @param cluster_state : sync/change_tracking/resync
        @param fault_type : commit/abort
        @param end_prepare_two_phase_sleep : True/False
        @description : 1. Suspend resync faults. 2. Issue Checkpoint before the skip checkpoint, so that the bufferpool is cleared. 3. If skip issue 'skip checkpoint'. 4. Suspend transaction_faults based on test_type.
        '''
        if cluster_state == 'change_tracking':
            self.invoke_fault('filerep_consumer', 'fault')
            self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Change_tracking transition complete')

        if cluster_state == 'resync':
            self.invoke_fault('filerep_resync', 'suspend', role='primary')

            if checkpoint == 'skip':
                self.invoke_fault(
                    'filerep_transition_to_sync_before_checkpoint',
                    'suspend',
                    role='primary',
                    port=self.port,
                    occurence='0')
            rc = self.gprecover.incremental()
            if not rc:
                raise Exception('Gprecvoerseg failed')
            tinctest.logger.info('Cluster in resync state')

        PSQL.run_sql_command('CHECKPOINT;', dbname='postgres')
        if checkpoint == 'skip':
            self.invoke_fault('checkpoint',
                              'skip',
                              role='primary',
                              port=self.port,
                              occurence='0')
        self.inject_fault(fault_type)

        if cluster_state == 'resync':
            self.filereputil.inject_fault(f='filerep_resync',
                                          y='resume',
                                          r='primary')

        PSQL.wait_for_database_up()

    def run_crash_and_recover(self,
                              crash_type,
                              fault_type,
                              test_dir,
                              cluster_state='sync',
                              checkpoint='noskip'):
        '''
        @param crash_type : gpstop_i/gpstop_a/failover_to_mirror/failover_to_primary
        @param fault_type : commit/abort/end_prepare_two_phase_sleep
        @param test_dir : dir of the trigger sqls
        @description : Execute the specified crash type before/after resuming the suspended fault and recover
        '''
        trigger_status = self.check_trigger_sql_hang(test_dir)
        tinctest.logger.info('trigger_status %s' % trigger_status)
        sleep(
            50
        )  # This sleep is needed till we get a way to find the state of all suspended sqls
        if trigger_status == True:
            if cluster_state == 'resync':
                self.filereputil.inject_fault(
                    f='filerep_transition_to_sync_before_checkpoint',
                    y='resume',
                    r='primary')
                sleep(15)  # wait little before crash
            self.crash_and_recover(crash_type, fault_type, checkpoint,
                                   cluster_state)
        else:
            tinctest.logger.info('The fault_status is not triggered')

    def gprecover_rebalance(self):
        '''
        @description: Run rebalance through gpstop -air is much faster than gprecoverseg -r for test purpose.
        '''
        rc = self.gpstop.run_gpstop_cmd(immediate=True)
        if not rc:
            raise Exception('Failed to stop the cluster')
        tinctest.logger.info('Stopped cluster immediately')
        self.start_db()

    def run_gprecover(self, crash_type, cluster_state='sync'):
        '''Recover the cluster if required. '''
        if crash_type in ('failover_to_primary', 'failover_to_mirror'
                          ) or cluster_state == 'change_tracking':
            rc = self.gprecover.incremental()
            if not rc:
                raise Exception('Gprecvoerseg failed')
            if not self.gprecover.wait_till_insync_transition():
                raise Exception('Segments not in sync')
            tinctest.logger.info('Cluster in sync state')
            if crash_type == 'failover_to_mirror':
                self.gprecover_rebalance()
                tinctest.logger.info('Successfully Rebalanced the cluster')
        else:
            tinctest.logger.info(
                'No need to run gprecoverseg. The cluster should be already in sync'
            )

    def switch_ckpt_faults_before_trigger(self, cluster_state, fault_type):
        '''
        @param cluster_state : sync/change_tracking/resync
        @param fault_type : dtm_broadcast_prepare/dtm_broadcast_commit_prepared/dtm_xlog_distributed_commit
        '''
        if cluster_state in ('change_tracking', 'resync'):
            self.invoke_fault('filerep_consumer', 'fault')
            self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Change_tracking transition complete')

        if cluster_state == 'resync':
            self.invoke_fault('filerep_resync', 'suspend', role='primary')
            rc = self.gprecover.incremental()
            if not rc:
                raise Exception('Gprecvoerseg failed')
            tinctest.logger.info('Cluster in resync state')
        self.inject_fault(fault_type)

    def switch_ckpt_switch_xlog(self):
        '''
        @description: pg_switch_xlog on segments
        '''
        sql_cmd = 'select * from pg_switch_xlog();'
        num_primary = self.config.get_countprimarysegments()
        for i in range(num_primary):
            (host,
             port) = self.config.get_hostandport_of_segment(psegmentNumber=i)
            PSQL.run_sql_command_utility_mode(sql_cmd, host=host, port=port)

    def switch_checkpoint_loop(self, fault_type):
        '''     
        @description: Run switch_xlog and checkpoint based on the fault_type
        '''
        if fault_type == 'dtm_xlog_distributed_commit':
            self.switch_ckpt_switch_xlog()
        else:
            for i in range(5):
                self.switch_ckpt_switch_xlog()

    def switch_ckpt_crash_and_recover(self,
                                      crash_type,
                                      fault_type,
                                      test_dir,
                                      cluster_state='sync',
                                      checkpoint='noskip'):
        '''
        @param crash_type : gpstop_i/gpstop_a/failover_to_mirror/failover_to_primary
        @param fault_type : dtm_broadcast_prepare/dtm_broadcast_commit_prepared/dtm_xlog_distributed_commit
        @param test_dir : dir of the trigger_sqls
        '''
        trigger_status = self.check_trigger_sql_hang(test_dir, fault_type)
        tinctest.logger.info('trigger_status %s' % trigger_status)

        if trigger_status == True:
            if cluster_state == 'resync':
                self.filereputil.inject_fault(f='filerep_resync',
                                              y='resume',
                                              r='primary')
                sleep(30)  #Give a little time before crash.
            self.crash_and_recover(crash_type, fault_type, checkpoint,
                                   cluster_state)
        else:
            tinctest.logger.info('The fault_status is not triggered')

    def cleanup_dangling_processes(self):
        '''
        @description: Since the test suspend transactions at different stages and does immediate shutdown, 
        few processes will not be cleaned up and eventually will eat up on the system resources
        This methods takes care of killing them at the end of each test, if such processes exists
        '''

        num_primary = self.config.get_countprimarysegments()
        for i in range(num_primary):
            (host,
             port) = self.config.get_hostandport_of_segment(psegmentNumber=i)
            grep_cmd = "ps -ef|grep %s|grep 'Distributed'" % port
            cmd = Command('Check for dangling process',
                          cmdStr='gpssh -h %s -e "%s" ' % (host, grep_cmd))
            cmd.run()
            result = cmd.get_results()
            if len(result.stdout.splitlines()) > 2:
                grep_and_kill_cmd = "ps -ef|grep %s|grep 'Distributed'|awk '{print \$2}'|xargs kill -9" % port
                cmd = Command('Kill dangling processes',
                              cmdStr='gpssh -h %s -e "%s" ' %
                              (host, grep_and_kill_cmd))
                cmd.run()
                tinctest.logger.info('Killing the dangling processes')
示例#24
0
文件: __init__.py 项目: LJoNe/gpdb
class FtsTransitions(MPPTestCase):

    def __init__(self, methodName):
        self.pgport = os.environ.get('PGPORT')
        self.fileutil = Filerepe2e_Util()
        self.gpconfig = GPDBConfig()
        self.gprecover = GpRecover(self.gpconfig)
        self.gpstate = Gpstate()
        self.gpprimarymirror = Gpprimarymirror()
        self.base = GPDBStorageBaseTestCase(self.gpconfig)
        super(FtsTransitions,self).__init__(methodName)

    def kill_first_mirror(self):
        mirror_data_loc = self.get_default_fs_loc(role='m',content=0)
        (host, port) = self.gpconfig.get_hostandport_of_segment(psegmentNumber = 0, pRole = 'm')    
        cmdString = 'ps -ef|grep -v grep|grep \'%s\'|awk \'{print $2}\'|xargs kill -9'%mirror_data_loc
        remote = Command(name ='kill first mirror', cmdStr = cmdString, ctxt=2, remoteHost=host)
        remote.run() 
        tinctest.logger.info('run command %s'%cmdString)
        rc = remote.get_results().rc    
        result = remote.get_results().stdout
        tinctest.logger.info('Command returning, rc: %s, result: %s'%(rc,result))

    def kill_master_process(self, ProcName=None):
        cmdString = 'ps -ef|grep postgres| grep %s | grep \'%s\'| awk \'{print $2}\'|xargs kill -9'%(self.pgport,ProcName) 
        cmd = Command('kill process on master', cmdStr = cmdString)
        cmd.run()
        tinctest.logger.info('run command %s'%cmdString)
        rc = cmd.get_results().rc    
        result = cmd.get_results().stdout
        tinctest.logger.info('Command returning, rc: %s, result: %s'%(rc,result))


    def get_default_fs_loc(self, role='m', content=0):
        fs_sql = '''select fselocation from pg_filespace_entry
                    where fsefsoid = 3052 and fsedbid = (select dbid from gp_segment_configuration
                    where role = \'%s\' and content = %s);'''%(role,content)
        result = PSQL.run_sql_command(fs_sql, flags = '-q -t', dbname= 'template1')
        result = result.strip()
        filespace_loc = result.split('\n')
        return filespace_loc[0]
  
    def gpconfig_alter(self,type,bool):
        ''' Alter postgres configuration '''
        if bool == 'true':
            fault_string = "filerep_inject_listener_fault=true"
        elif bool == 'false':
            fault_string = "filerep_inject_listener_fault=false"
        for record in self.gpconfig.record:
            if type == 'primary':
                if record.role and record.content != -1:
                    fse_location = record.datadir
                else:
                    continue
            if type == 'mirror':
                if (not record.role) and record.content != -1:
                    fse_location = record.datadir
                else:
                    continue
            run_shell_command('ssh ' + record.hostname + ' \'echo '+fault_string + ' >> ' + fse_location +'/postgresql.conf\'')
            tinctest.logger.info( "\n ssh   %s   'echo %s  >>   %s/postgresql.conf'" % (record.hostname, fault_string,  fse_location))
            tinctest.logger.info( "\n  Done set %s in postgresql.conf on all primary segments" % fault_string)

    def set_faults(self,fault_name, type, role='mirror', port=None, occurence=None, sleeptime=None, seg_id=None):
        ''' Reset the fault and then issue the fault with the given type'''
        self.fileutil.inject_fault(f=fault_name, y=type, r=role, p=port , o=occurence, sleeptime=sleeptime, seg_id=seg_id)

    def resume_faults(self,fault_name, role='mirror'):
        ''' Resume the fault issues '''
        self.fileutil.inject_fault(f=fault_name, y='resume', r=role)

    def run_validation(self):
        tinctest.logger.info('Veriy the integrity between primary and mirror ...')
        self.dbstate = DbStateClass('run_validation')
        self.dbstate.check_mirrorintegrity()

    def incremental_recoverseg(self, workerPool=False):
        self.gprecover.incremental(workerPool)

    def run_recoverseg_if_ct(self):
        num_down = self.gpconfig.count_of_nodes_in_mode('c')
        if (int(num_down) > 0):
            self.incremental_recoverseg()

    def wait_till_change_tracking(self):
        self.fileutil.wait_till_change_tracking_transition()

    def wait_till_insync(self):
        self.gprecover.wait_till_insync_transition()

    def run_gpstate(self, type, phase):
        self.gpstate.run_gpstate(type, phase)

    def run_gpprimarymirror(self):
        self.gpprimarymirror.run_gpprimarymirror()

    def verify_gpprimarymirror_output(self, total_resync=0, cur_resync=0):
        status = self.gpprimarymirror.verify_gpprimarymirror_output(total_resync, cur_resync)
        self.assertTrue(status, 'Total and Cur resync object count mismatch')

    def run_gpstate_shell_cmd(self, options):
        self.gpstate.run_gpstate_shell_cmd(options)

    def verify_gpstate_output(self):
        status = self.gpstate.verify_gpstate_output()
        self.assertTrue(status, 'Total and Cur resync object count mismatch')

    def run_trigger_sql(self):
        ''' Run a sql statement to trigger postmaster reset '''
        PSQL.run_sql_file(local_path('test_ddl.sql'))

    def run_fts_test_ddl_dml(self):
        PSQL.run_sql_file(local_path('fts_test_ddl_dml.sql'))

    def run_fts_test_ddl_dml_before_ct(self):
        PSQL.run_sql_file(local_path('fts_test_ddl_dml_before_ct.sql'))

    def run_fts_test_ddl_dml_ct(self):
        PSQL.run_sql_file(local_path('fts_test_ddl_dml_ct.sql'))

    def run_sql_in_background(self):
        PSQL.run_sql_command('drop table if exists bar; create table bar(i int);', background=True)

    def sleep_for_transition(self):
        #gp_segment_connect_timeout is set to 10s , still need a little more time than that to complete the transition to ct
        sleep(100)

    def restart_db(self):
        self.base.stop_db()
        self.base.start_db()

    def stop_db_with_no_rc_check(self):
        ''' Gpstop and dont check for rc '''
        cmd = Command('Gpstop_a', 'gpstop -a')
        tinctest.logger.info('Executing command: gpstop -a')
        cmd.run()

    def start_db_with_no_rc_check(self):
        ''' Gpstart and dont check for rc '''
        cmd = Command('Gpstart_a', 'gpstart -a')
        tinctest.logger.info('Executing command: gpstart -a')
        cmd.run()

    def restart_db_with_no_rc_check(self):
        self.stop_db_with_no_rc_check()
        self.start_db_with_no_rc_check()

    def set_gpconfig(self, param, value):
        ''' Set the configuration parameter using gpconfig '''
        command = "gpconfig -c %s -v %s --skipvalidation " % (param, value)
        run_shell_command(command)
        self.restart_db()

    def check_db(self):
        checkDBUp()

    def check_fault_status(self, fault_name, seg_id=None, role=None):
        status = self.fileutil.check_fault_status(fault_name = fault_name, status ='triggered', max_cycle=20, role=role, seg_id=seg_id)
        self.assertTrue(status, 'The fault is not triggered in the time expected')

    def cluster_state(self):
        state = self.gpconfig.is_not_insync_segments()
        self.assertTrue(state,'The cluster is not up and in sync')
示例#25
0
文件: __init__.py 项目: shwu/gpdb
class PgtwoPhaseClass(MPPTestCase):
    '''Helper class for pg_twophase supporting functions '''

    def __init__(self,methodName):
        self.filereputil = Filerepe2e_Util()
        self.config = GPDBConfig()
        self.gprecover = GpRecover(self.config)
        self.gpstop = GpStop()
        self.gpstart = GpStart()
        self.gpfile = Gpfilespace(self.config)
        self.gpverify = GpdbVerify(config=self.config)
        self.dbstate = DbStateClass('run_validation',self.config)
        self.port = os.getenv('PGPORT')
        super(PgtwoPhaseClass,self).__init__(methodName)

    def invoke_fault(self, fault_name, type, role='mirror', port=None, occurence=None, sleeptime=None, seg_id=None):
        ''' Reset the fault and then issue the fault with the given type'''
        self.filereputil.inject_fault(f=fault_name, y='reset', r=role, p=port , o=occurence, sleeptime=sleeptime, seg_id=seg_id)
        self.filereputil.inject_fault(f=fault_name, y=type, r=role, p=port , o=occurence, sleeptime=sleeptime, seg_id=seg_id)
        tinctest.logger.info('Successfully injected fault_name : %s fault_type : %s' % (fault_name, type))

    def inject_fault(self, fault_type):
        '''
        @param fault_type : type of fault to ne suspended
        '''
        if fault_type == 'end_prepare_two_phase_sleep':
            self.filereputil.inject_fault(f='end_prepare_two_phase_sleep', sleeptime='1000', y='sleep', r='primary', p=self.port)
            tinctest.logger.info('Injected fault to sleep in end_prepare_two_phase')

        elif fault_type == 'abort':
            # In case of abort fault we need to include this error type fault also, to fake a situation where one of the segment is not responding back, which can make the master to trigger an abort transaction
            self.invoke_fault('transaction_abort_after_distributed_prepared', 'error', port=self.port, occurence='0', seg_id='1')

            self.invoke_fault('twophase_transaction_abort_prepared', 'suspend', role='primary', port=self.port, occurence='0')

        elif fault_type == 'commit':
            self.invoke_fault('twophase_transaction_commit_prepared', 'suspend', role='primary', port=self.port, occurence='0')

        elif fault_type == 'dtm_broadcast_prepare':
            self.invoke_fault('dtm_broadcast_prepare', 'suspend', seg_id = '1', port=self.port, occurence='0')

        elif fault_type == 'dtm_broadcast_commit_prepared':
            self.invoke_fault('dtm_broadcast_commit_prepared', 'suspend', seg_id = '1', port=self.port, occurence='0')

        elif fault_type == 'dtm_xlog_distributed_commit':
            self.invoke_fault('dtm_xlog_distributed_commit', 'suspend', seg_id = '1', port=self.port, occurence='0')

    def resume_faults(self, fault_type, cluster_state='sync'):
        '''
        @param fault_type : commit/abort/end_prepare_two_phase_sleep/dtm_broadcast_prepare/dtm_broadcast_commit_prepared/dtm_xlog_distributed_commit
        @description : Resume the suspended faults 
        '''
        tinctest.logger.info('coming to resume faults with xact %s' % fault_type) 
        if fault_type == 'abort':
            self.filereputil.inject_fault(f='twophase_transaction_abort_prepared', y='resume', r='primary', p=self.port , o='0')
            if cluster_state !='resync':
                self.filereputil.inject_fault(f='transaction_abort_after_distributed_prepared', y='reset', p=self.port , o='0', seg_id='1')
        elif fault_type == 'commit':
            self.filereputil.inject_fault(f='twophase_transaction_commit_prepared', y='resume', r='primary', p=self.port , o='0')

        elif fault_type == 'dtm_broadcast_prepare':
            self.filereputil.inject_fault(f='dtm_broadcast_prepare', y='resume', seg_id = '1', p=self.port, o='0')

        elif fault_type == 'dtm_broadcast_commit_prepared':
            tinctest.logger.info('coming to if dtm_broadcast_commit_prepared')
            self.filereputil.inject_fault(f='dtm_broadcast_commit_prepared', y='resume', seg_id = '1', p=self.port, o='0')

        elif fault_type == 'dtm_xlog_distributed_commit':
            self.filereputil.inject_fault(f='dtm_xlog_distributed_commit', y='resume', seg_id = '1', p=self.port, o='0')

        else:
            tinctest.logger.info('No faults to resume')
        tinctest.logger.info('Resumed the suspended transaction fault')
        
        #Wait till all the trigger_sqls are complete before returning
        sql_count = PSQL.run_sql_command('select count(*) from pg_stat_activity;', flags ='-q -t', dbname='postgres')
        while(sql_count.strip() != '1'):
            sleep(5)
            sql_count = PSQL.run_sql_command('select count(*) from pg_stat_activity;', flags ='-q -t', dbname='postgres')
            tinctest.logger.info('stat_activity count %s ' % sql_count)
        return

    def start_db(self):
        '''Gpstart '''
        rc = self.gpstart.run_gpstart_cmd()
        if not rc:
            raise Exception('Failed to start the cluster')
        tinctest.logger.info('Started the cluster successfully')

    def stop_db(self):
        ''' Gpstop and dont check for rc '''
        cmd = Command('Gpstop_a', 'gpstop -a')
        tinctest.logger.info('Executing command: gpstop -a')
        cmd.run()

    def crash_and_recover(self, crash_type, fault_type, checkpoint='noskip', cluster_state='sync'):
        '''
        @param crash_type : gpstop_i/gpstop_a/failover_to_primary/failover_to_mirror
        @note: when skip checkpoint is enabled, gpstop -a returns a non-rc return code and fails in the library. To workaround, using a local function
        '''
        if crash_type == 'gpstop_i' :
            rc = self.gpstop.run_gpstop_cmd(immediate = True)
            if not rc:
                raise Exception('Failed to stop the cluster')
            tinctest.logger.info('Stopped cluster immediately')
            self.start_db()
        elif crash_type == 'gpstop_a':
            self.resume_faults(fault_type, cluster_state)
            if checkpoint == 'skip' :
                self.stop_db()
            else:
                rc = self.gpstop.run_gpstop_cmd()
                if not rc:
                    raise Exception('Failed to stop the cluster')
            tinctest.logger.info('Smart stop completed')
            self.start_db()                            
        elif crash_type == 'failover_to_primary':
            self.invoke_fault('filerep_consumer', 'fault')
            self.resume_faults(fault_type, cluster_state)
            (rc, num) =self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Value of rc and num_down %s, %s, %s' % (rc, num, fault_type))

        elif crash_type == 'failover_to_mirror':
            self.invoke_fault('postmaster', 'panic', role='primary')
            if fault_type in ('dtm_broadcast_prepare', 'dtm_broadcast_commit_prepared', 'dtm_xlog_distributed_commit') :
                self.resume_faults(fault_type, cluster_state)
            (rc, num) = self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Value of rc and num_down %s, %s' % (rc, num))
            if fault_type == 'abort' :
                self.filereputil.inject_fault(f='transaction_abort_after_distributed_prepared', y='reset',p=self.port , o='0', seg_id='1')

        if cluster_state == 'resync':
            if not self.gprecover.wait_till_insync_transition():
                raise Exception('Segments not in sync')                        

    def get_trigger_status_old(self, trigger_count):
        '''Compare the pg_stat_activity count with the total number of trigger_sqls executed '''
        for i in range(1,50):
            psql_count = PSQL.run_sql_command('select count(*) from pg_stat_activity;', flags='-q -t', dbname='postgres')
        tinctest.logger.info('Count of trigger sqls %s' % psql_count)
        if int(psql_count.strip()) < trigger_count :
            tinctest.logger.info('coming to the if loop in get_trigger_status')
            return False
        return True

    def get_trigger_status(self, trigger_count, fault_type):
        if fault_type == None:
            return self.get_trigger_status_old(trigger_count);

        return self.filereputil.check_fault_status(fault_name=fault_type, status="triggered", seg_id='1', num_times_hit=trigger_count);

    def check_trigger_sql_hang(self, test_dir, fault_type = None):
        '''
        @description : Return the status of the trigger sqls: whether they are waiting on the fault 
        Since gpfaultinjector has no way to check if all the sqls are triggered, we are using 
        a count(*) on pg_stat_activity and compare the total number of trigger_sqls
        '''
        trigger_count=0
        for dir in test_dir.split(","):
            trigger_dir = local_path('%s/trigger_sql/sql/' % (dir))
            trigger_count += len(glob.glob1(trigger_dir,"*.sql"))
        tinctest.logger.info('Total number of sqls to trigger %d in %s' % (trigger_count,test_dir));
        return self.get_trigger_status(trigger_count, fault_type)


    def run_faults_before_pre(self, cluster_state):
        '''
        @param cluster_state : sync/change_tracking/resync
        @description: 1. Cluster into change_tracking in case of resync/ change_tracking. 
        '''
        if cluster_state == 'resync':
            self.invoke_fault('filerep_consumer', 'fault')
            self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Change_tracking transition complete')

    def run_faults_before_trigger(self, checkpoint, cluster_state, fault_type):
        '''
        @param checkpoint : skip/noskip
        @param cluster_state : sync/change_tracking/resync
        @param fault_type : commit/abort
        @param end_prepare_two_phase_sleep : True/False
        @description : 1. Suspend resync faults. 2. Issue Checkpoint before the skip checkpoint, so that the bufferpool is cleared. 3. If skip issue 'skip checkpoint'. 4. Suspend transaction_faults based on test_type.
        '''
        if cluster_state == 'change_tracking':
            self.invoke_fault('filerep_consumer', 'fault')
            self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Change_tracking transition complete')

        if cluster_state == 'resync':
            self.invoke_fault('filerep_resync', 'suspend', role='primary')

            if checkpoint == 'skip':
                self.invoke_fault('filerep_transition_to_sync_before_checkpoint', 'suspend', role='primary', port=self.port, occurence='0')
            rc = self.gprecover.incremental()
            if not rc:
                raise Exception('Gprecvoerseg failed')
            tinctest.logger.info('Cluster in resync state')

        PSQL.run_sql_command('CHECKPOINT;', dbname='postgres')
        if checkpoint == 'skip':
            self.invoke_fault('checkpoint', 'skip', role='primary', port= self.port, occurence='0')
        self.inject_fault(fault_type)

        if cluster_state == 'resync':
            self.filereputil.inject_fault(f='filerep_resync', y='resume', r='primary')

        PSQL.wait_for_database_up();

    def run_crash_and_recover(self, crash_type, fault_type, test_dir, cluster_state='sync', checkpoint='noskip'):
        '''
        @param crash_type : gpstop_i/gpstop_a/failover_to_mirror/failover_to_primary
        @param fault_type : commit/abort/end_prepare_two_phase_sleep
        @param test_dir : dir of the trigger sqls
        @description : Execute the specified crash type before/after resuming the suspended fault and recover
        '''
        trigger_status = self.check_trigger_sql_hang(test_dir)
        tinctest.logger.info('trigger_status %s' % trigger_status)
        sleep(50) # This sleep is needed till we get a way to find the state of all suspended sqls
        if trigger_status == True:
            if cluster_state == 'resync':
                self.filereputil.inject_fault(f='filerep_transition_to_sync_before_checkpoint', y='resume', r='primary')
                sleep(15) # wait little before crash
            self.crash_and_recover(crash_type, fault_type, checkpoint, cluster_state)
        else:
            tinctest.logger.info('The fault_status is not triggered')
    
    def gprecover_rebalance(self):
        '''
        @description: Run gprecoverseg -r. If rc is not '0' rerun gprecoverseg -a/This is due to known open issues
        '''
        cmd = Command(name='Run gprecoverseg', cmdStr='gprecoverseg -r -a')
        tinctest.logger.info('Running %s' % cmd.cmdStr)
        cmd.run(validateAfter=False)
        result = cmd.get_results()
        if result.rc != 0:
            rc = self.gprecover.incremental()
            if rc:
                return True
        else:
            return True
        return False

    def run_gprecover(self, crash_type, cluster_state='sync'):
        '''Recover the cluster if required. '''
        if crash_type in ('failover_to_primary', 'failover_to_mirror') or cluster_state == 'change_tracking' :
            rc = self.gprecover.incremental()
            if not rc:
                raise Exception('Gprecvoerseg failed')
            if not self.gprecover.wait_till_insync_transition():
                raise Exception('Segments not in sync')                        
            tinctest.logger.info('Cluster in sync state')
            if crash_type == 'failover_to_mirror' :
                #rc = self.gprecover.rebalance()
                # -r has issues occasionally, may need another gprecoverseg, so using a local function
                rc = self.gprecover_rebalance()
                if not rc:
                    raise Exception('Rebalance failed')
                if not self.gprecover.wait_till_insync_transition():
                    raise Exception('Segments not in sync')                        
                tinctest.logger.info('Successfully Rebalanced the cluster')
    
        else:
            tinctest.logger.info('No need to run gprecoverseg. The cluster should be already in sync')


    def switch_ckpt_faults_before_trigger(self, cluster_state, fault_type):
        '''
        @param cluster_state : sync/change_tracking/resync
        @param fault_type : dtm_broadcast_prepare/dtm_broadcast_commit_prepared/dtm_xlog_distributed_commit
        '''
        if cluster_state in ('change_tracking', 'resync'):
            self.invoke_fault('filerep_consumer', 'fault')
            self.filereputil.wait_till_change_tracking_transition()
            tinctest.logger.info('Change_tracking transition complete') 

        if cluster_state == 'resync':
            self.invoke_fault('filerep_resync', 'suspend', role='primary')
            rc = self.gprecover.incremental()
            if not rc:
                raise Exception('Gprecvoerseg failed')
            tinctest.logger.info('Cluster in resync state')
        self.inject_fault(fault_type)

    def switch_ckpt_switch_xlog(self):
        '''
        @description: pg_switch_xlog on segments
        '''
        sql_cmd = 'select * from pg_switch_xlog();'
        num_primary = self.config.get_countprimarysegments()
        for i in range(num_primary):
            (host, port) = self.config.get_hostandport_of_segment(psegmentNumber=i)
            PSQL.run_sql_command_utility_mode(sql_cmd, host = host, port = port)

    def switch_checkpoint_loop(self, fault_type):
        '''     
        @description: Run switch_xlog and checkpoint based on the fault_type
        '''     
        if fault_type == 'dtm_xlog_distributed_commit':
            self.switch_ckpt_switch_xlog()
        else:
            for i in range(5):
                self.switch_ckpt_switch_xlog()

    def switch_ckpt_crash_and_recover(self, crash_type, fault_type, test_dir, cluster_state='sync', checkpoint='noskip'):
        '''
        @param crash_type : gpstop_i/gpstop_a/failover_to_mirror/failover_to_primary
        @param fault_type : dtm_broadcast_prepare/dtm_broadcast_commit_prepared/dtm_xlog_distributed_commit
        @param test_dir : dir of the trigger_sqls
        '''
        trigger_status = self.check_trigger_sql_hang(test_dir, fault_type)
        tinctest.logger.info('trigger_status %s' % trigger_status)

        if trigger_status == True:
            if cluster_state == 'resync':
                self.filereputil.inject_fault(f='filerep_resync', y='resume', r='primary')
                sleep(30) #Give a little time before crash.
            self.crash_and_recover(crash_type, fault_type, checkpoint, cluster_state)
        else:
            tinctest.logger.info('The fault_status is not triggered')
    
   
    def cleanup_dangling_processes(self):
        '''
        @description: Since the test suspend transactions at different stages and does immediate shutdown, 
        few processes will not be cleaned up and eventually will eat up on the system resources
        This methods takes care of killing them at the end of each test, if such processes exists
        '''

        num_primary = self.config.get_countprimarysegments()
        for i in range(num_primary):
            (host, port) = self.config.get_hostandport_of_segment(psegmentNumber=i)
            grep_cmd = "ps -ef|grep %s|grep 'Distributed'" % port
            cmd = Command('Check for dangling process', cmdStr = 'gpssh -h %s -e "%s" ' % (host, grep_cmd))
            cmd.run()
            result = cmd.get_results()
            if len(result.stdout.splitlines()) > 2 :
                grep_and_kill_cmd = "ps -ef|grep %s|grep 'Distributed'|awk '{print \$2}'|xargs kill -9" % port
                cmd = Command('Kill dangling processes', cmdStr='gpssh -h %s -e "%s" ' % (host, grep_and_kill_cmd ))
                cmd.run()
                tinctest.logger.info('Killing the dangling processes') 
示例#26
0
   def initial_setup(self):
       keyword = 'rh55-qavm65'
       config = GPDBConfig()
       (seg_host,seg_port) = config.get_hostandport_of_segment(psegmentNumber = 0, pRole = 'p')
       cur_path = local_path('')
       dir1 = os.path.join(cur_path, 'dml', 'sql','insert_from_external.sql.in')
       dir2 = os.path.join(cur_path, 'dml', 'sql','insert_from_external.sql')
       dir3 = os.path.join(cur_path, 'dml', 'expected','insert_from_external.ans.in')
       dir4 = os.path.join(cur_path, 'dml', 'expected','insert_from_external.ans')

       f1 = open(dir1,'r')
       f2 = open(dir2,'w')
       f3 = open(dir3,'r')
       f4 = open(dir4,'w')
       for line in f1:
           f2.write(line.replace(keyword,seg_host))
       f1.close()
       f2.close()

       for line in f3:
           f4.write(line.replace(keyword,seg_host))
       f3.close()
       f4.close()

       dir5 = os.path.join(cur_path, 'dml', 'sql','insert_with_gpload.sql.in')
       dir6 = os.path.join(cur_path, 'dml', 'sql','insert_with_gpload.sql')
       yaml_path = local_path('dml/sql/config/gpl.yaml')
       f5 = open(dir5,'r')
       f6 = open(dir6,'w')
       for line in f5:
           f6.write(line.replace('gpl.yaml',yaml_path))
       f5.close()
       f6.close()

       dir7 = os.path.join(cur_path,'dml','sql','config','gpl.yaml.in')
       dir8 = os.path.join(cur_path,'dml','sql','config','gpl.yaml')
       f7 = open(dir7,'r')
       f8 = open(dir8,'w')
       for line in f7:
           if 'DATABASE' in line:
               f8.write(line.replace('tangp3',os.environ.get('PGDATABASE')))
           elif 'USER' in line:
               f8.write(line.replace('tangp3',os.environ.get('USER')))
           elif 'HOST' in line:
               f8.write(line.replace('rh55-qavm61',socket.gethostname()))
           elif 'PORT' in line and '5432' in line:
               f8.write(line.replace('5432',os.environ.get('PGPORT')))
           elif 'mydata' in line:
               f8.write(line.replace('mydata',local_path('dml/sql/gpload/mydata')))
           else:
               f8.write(line)
       f7.close()
       f8.close()

       dir9 = os.path.join(cur_path,'dml','expected','insert_with_gpload.ans.in')
       dir10 = os.path.join(cur_path,'dml','expected','insert_with_gpload.ans')
       f9 = open(dir9,'r')
       f10 = open(dir10,'w')
       for line in f9:
           f10.write(line.replace('gpl.yaml',yaml_path))
       f9.close()
       f10.close()

       dir11 = os.path.join(cur_path,'dml','sql','select_from_copy_table.sql.in')
       dir12 = os.path.join(cur_path,'dml','sql','select_from_copy_table.sql')
       f11 = open(dir11,'r')
       f12 = open(dir12,'w')
       for line in f11:
           if 'tenk.data' in line:
               f12.write(line.replace('tenk.data',local_path('dml/sql/_data/tenk.data')))
           else:
               f12.write(line)
       f11.close()
       f12.close()

       dir13 = os.path.join(cur_path,'dml','expected','select_from_copy_table.ans.in')
       dir14 = os.path.join(cur_path,'dml','expected','select_from_copy_table.ans')
       f13 = open(dir13,'r')
       f14 = open(dir14,'w')
       for line in f13:
           if 'tenk.data' in line:
               f14.write(line.replace('tenk.data',local_path('dml/sql/_data/tenk.data')))
           else:
               f14.write(line)
       f13.close()
       f14.close()


       external_table = local_path('dml/sql/_data/quote.csv')
       clean_file = 'rm -rf /tmp/quote.csv'
       rmt_cmd = "gpssh -h %s -e '%s' " % (seg_host, clean_file)
       cmd = Command(name='Running a remote command', cmdStr = rmt_cmd)
       cmd.run(validateAfter=False)
       command = 'scp %s %s:/tmp'%(external_table,seg_host)
       cmd = Command(name='run %s'%command, cmdStr = '%s' % command)
       try:
           cmd.run(validateAfter=True)
       except Exception, e:
           tinctest.logger.error("Error running command %s\n" % e)
示例#27
0
class FtsTransitions(MPPTestCase):
    def __init__(self, methodName):
        self.pgport = os.environ.get('PGPORT')
        self.fileutil = Filerepe2e_Util()
        self.gpconfig = GPDBConfig()
        self.gprecover = GpRecover(self.gpconfig)
        self.gpstate = Gpstate()
        self.gpprimarymirror = Gpprimarymirror()
        self.base = GPDBStorageBaseTestCase(self.gpconfig)
        super(FtsTransitions, self).__init__(methodName)

    def kill_first_mirror(self):
        mirror_data_loc = self.get_default_fs_loc(role='m', content=0)
        (host,
         port) = self.gpconfig.get_hostandport_of_segment(psegmentNumber=0,
                                                          pRole='m')
        cmdString = 'ps -ef|grep -v grep|grep \'%s\'|awk \'{print $2}\'|xargs kill -9' % mirror_data_loc
        remote = Command(name='kill first mirror',
                         cmdStr=cmdString,
                         ctxt=2,
                         remoteHost=host)
        remote.run()
        tinctest.logger.info('run command %s' % cmdString)
        rc = remote.get_results().rc
        result = remote.get_results().stdout
        tinctest.logger.info('Command returning, rc: %s, result: %s' %
                             (rc, result))

    def kill_master_process(self, ProcName=None):
        cmdString = 'ps -ef|grep postgres| grep %s | grep \'%s\'| awk \'{print $2}\'|xargs kill -9' % (
            self.pgport, ProcName)
        cmd = Command('kill process on master', cmdStr=cmdString)
        cmd.run()
        tinctest.logger.info('run command %s' % cmdString)
        rc = cmd.get_results().rc
        result = cmd.get_results().stdout
        tinctest.logger.info('Command returning, rc: %s, result: %s' %
                             (rc, result))

    def get_default_fs_loc(self, role='m', content=0):
        fs_sql = '''select fselocation from pg_filespace_entry
                    where fsefsoid = 3052 and fsedbid = (select dbid from gp_segment_configuration
                    where role = \'%s\' and content = %s);''' % (role, content)
        result = PSQL.run_sql_command(fs_sql,
                                      flags='-q -t',
                                      dbname='template1')
        result = result.strip()
        filespace_loc = result.split('\n')
        return filespace_loc[0]

    def gpconfig_alter(self, type, bool):
        ''' Alter postgres configuration '''
        if bool == 'true':
            fault_string = "filerep_inject_listener_fault=true"
        elif bool == 'false':
            fault_string = "filerep_inject_listener_fault=false"
        for record in self.gpconfig.record:
            if type == 'primary':
                if record.role and record.content != -1:
                    fse_location = record.datadir
                else:
                    continue
            if type == 'mirror':
                if (not record.role) and record.content != -1:
                    fse_location = record.datadir
                else:
                    continue
            run_shell_command('ssh ' + record.hostname + ' \'echo ' +
                              fault_string + ' >> ' + fse_location +
                              '/postgresql.conf\'')
            tinctest.logger.info(
                "\n ssh   %s   'echo %s  >>   %s/postgresql.conf'" %
                (record.hostname, fault_string, fse_location))
            tinctest.logger.info(
                "\n  Done set %s in postgresql.conf on all primary segments" %
                fault_string)

    def set_faults(self,
                   fault_name,
                   type,
                   role='mirror',
                   port=None,
                   occurence=None,
                   sleeptime=None,
                   seg_id=None):
        ''' Reset the fault and then issue the fault with the given type'''
        self.fileutil.inject_fault(f=fault_name,
                                   y=type,
                                   r=role,
                                   p=port,
                                   o=occurence,
                                   sleeptime=sleeptime,
                                   seg_id=seg_id)

    def resume_faults(self, fault_name, role='mirror'):
        ''' Resume the fault issues '''
        self.fileutil.inject_fault(f=fault_name, y='resume', r=role)

    def run_validation(self):
        tinctest.logger.info(
            'Veriy the integrity between primary and mirror ...')
        self.dbstate = DbStateClass('run_validation')
        self.dbstate.check_mirrorintegrity()

    def incremental_recoverseg(self, workerPool=False):
        self.gprecover.incremental(workerPool)

    def run_recoverseg_if_ct(self):
        num_down = self.gpconfig.count_of_nodes_in_mode('c')
        if (int(num_down) > 0):
            self.incremental_recoverseg()

    def wait_till_change_tracking(self):
        self.fileutil.wait_till_change_tracking_transition()

    def wait_till_insync(self):
        self.gprecover.wait_till_insync_transition()

    def run_gpstate(self, type, phase):
        self.gpstate.run_gpstate(type, phase)

    def run_gpprimarymirror(self):
        self.gpprimarymirror.run_gpprimarymirror()

    def verify_gpprimarymirror_output(self, total_resync=0, cur_resync=0):
        status = self.gpprimarymirror.verify_gpprimarymirror_output(
            total_resync, cur_resync)
        self.assertTrue(status, 'Total and Cur resync object count mismatch')

    def run_gpstate_shell_cmd(self, options):
        self.gpstate.run_gpstate_shell_cmd(options)

    def verify_gpstate_output(self):
        status = self.gpstate.verify_gpstate_output()
        self.assertTrue(status, 'Total and Cur resync object count mismatch')

    def run_trigger_sql(self):
        ''' Run a sql statement to trigger postmaster reset '''
        PSQL.run_sql_file(local_path('test_ddl.sql'))

    def run_fts_test_ddl_dml(self):
        PSQL.run_sql_file(local_path('fts_test_ddl_dml.sql'))

    def run_fts_test_ddl_dml_before_ct(self):
        PSQL.run_sql_file(local_path('fts_test_ddl_dml_before_ct.sql'))

    def run_fts_test_ddl_dml_ct(self):
        PSQL.run_sql_file(local_path('fts_test_ddl_dml_ct.sql'))

    def run_sql_in_background(self):
        PSQL.run_sql_command(
            'drop table if exists bar; create table bar(i int);',
            background=True)

    def sleep_for_transition(self):
        #gp_segment_connect_timeout is set to 10s , still need a little more time than that to complete the transition to ct
        sleep(100)

    def restart_db(self):
        self.base.stop_db()
        self.base.start_db()

    def stop_db_with_no_rc_check(self):
        ''' Gpstop and dont check for rc '''
        cmd = Command('Gpstop_a', 'gpstop -a')
        tinctest.logger.info('Executing command: gpstop -a')
        cmd.run()

    def start_db_with_no_rc_check(self):
        ''' Gpstart and dont check for rc '''
        cmd = Command('Gpstart_a', 'gpstart -a')
        tinctest.logger.info('Executing command: gpstart -a')
        cmd.run()

    def restart_db_with_no_rc_check(self):
        self.stop_db_with_no_rc_check()
        self.start_db_with_no_rc_check()

    def set_gpconfig(self, param, value):
        ''' Set the configuration parameter using gpconfig '''
        command = "gpconfig -c %s -v %s --skipvalidation " % (param, value)
        run_shell_command(command)
        self.restart_db()

    def check_db(self):
        checkDBUp()

    def check_fault_status(self, fault_name, seg_id=None, role=None):
        status = self.fileutil.check_fault_status(fault_name=fault_name,
                                                  status='triggered',
                                                  max_cycle=20,
                                                  role=role,
                                                  seg_id=seg_id)
        self.assertTrue(status,
                        'The fault is not triggered in the time expected')

    def cluster_state(self):
        state = self.gpconfig.is_not_insync_segments()
        self.assertTrue(state, 'The cluster is not up and in sync')