Python GpRecover.full примеры использования

Язык программирования: Python

Пространство имен/Пакет: mpp.lib.gprecoverseg

Класс/Тип: GpRecover

Метод/Функция: full

Примеров на hotexamples.com: 9

Python GpRecover.full - 9 примеров найдено. Это лучшие примеры Python кода для mpp.lib.gprecoverseg.GpRecover.full, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GpRecover(28)

wait_till_insync_transition(28)

incremental(11)

full(5)

rebalance(2)

recover_rebalance_segs(1)

Пример #1

Показать файл

Файл: regress_gprecoverseg.py Проект: 50wu/gpdb

class GpRecoversegRegressionTests(unittest.TestCase):

    def setUp(self):
        self.gprec = GpRecover()
        self.gphome = os.environ.get('GPHOME')

    def failover(self, type = 'mirror'):
        if type == 'mirror':
            fault_str = 'source %s/greenplum_path.sh;gpfaultinjector -f filerep_consumer  -m async -y fault -r mirror -H ALL' % self.gphome
        else:
            fault_str = 'source %s/greenplum_path.sh;gpfaultinjector -f postmaster -m async -y panic -r primary -H ALL' % self.gphome
        return run_shell_command(fault_str, cmdname = 'Run fault injector to failover')
    
    def test_incr_gprecoverseg(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover()):
            self.assertTrue(self.gprec.incremental())

    def test_full_gprecoverseg(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover()):
            self.assertTrue(self.gprec.full())

    def test_gprecoverseg_rebalance(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover('primary')):
            PSQL.run_sql_file(local_path('mirror_failover_trigger.sql'))
            self.gprec.incremental()
            if (self.gprec.wait_till_insync_transition()):
                self.assertTrue(self.gprec.rebalance())
    
    def test_wait_till_insync(self):
        self.gprec.wait_till_insync_transition()
        if(self.failover()):
            self.gprec.incremental()
            self.assertTrue(self.gprec.wait_till_insync_transition())

Пример #2

Показать файл

Файл: regress_gprecoverseg.py Проект: ECNUdbgroup/gpdb_ecnu

class GpRecoversegRegressionTests(unittest.TestCase):
    def setUp(self):
        self.gprec = GpRecover()
        self.gphome = os.environ.get('GPHOME')

    def failover(self, type='mirror'):
        if type == 'mirror':
            fault_str = 'source %s/greenplum_path.sh;gpfaultinjector -f filerep_consumer  -m async -y fault -r mirror -H ALL' % self.gphome
        else:
            fault_str = 'source %s/greenplum_path.sh;gpfaultinjector -f postmaster -m async -y panic -r primary -H ALL' % self.gphome
        return run_shell_command(fault_str,
                                 cmdname='Run fault injector to failover')

    def test_incr_gprecoverseg(self):
        self.gprec.wait_till_insync_transition()
        if (self.failover()):
            self.assertTrue(self.gprec.incremental())

    def test_full_gprecoverseg(self):
        self.gprec.wait_till_insync_transition()
        if (self.failover()):
            self.assertTrue(self.gprec.full())

    def test_gprecoverseg_rebalance(self):
        self.gprec.wait_till_insync_transition()
        if (self.failover('primary')):
            PSQL.run_sql_file(local_path('mirror_failover_trigger.sql'))
            self.gprec.incremental()
            if (self.gprec.wait_till_insync_transition()):
                self.assertTrue(self.gprec.rebalance())

    def test_wait_till_insync(self):
        self.gprec.wait_till_insync_transition()
        if (self.failover()):
            self.gprec.incremental()
            self.assertTrue(self.gprec.wait_till_insync_transition())

Пример #3

Показать файл

class FilerepTestCase(MPPTestCase):
    def __init__(self, methodName):
        self.pgport = os.environ.get('PGPORT')
        self.util = Filerepe2e_Util()
        self.gpconfig = GpConfig()
        self.config = GPDBConfig()
        self.gpr = GpRecover(self.config)
        self.dbstate = DbStateClass('run_validation', self.config)
        self.gpstart = GpStart()
        self.gpstop = GpStop()
        super(FilerepTestCase, self).__init__(methodName)

    def sleep(self, seconds=60):
        time.sleep(seconds)

    def create_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('create a file',
                      'touch %s' % file_path,
                      ctxt=REMOTE,
                      remoteHost=host)
        cmd.run(validateAfter=True)

    def remove_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('remove a file',
                      'rm %s' % file_path,
                      ctxt=REMOTE,
                      remoteHost=host)
        cmd.run(validateAfter=True)

    def get_timestamp_of_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command(
            'check timestamp',
            """ python -c "import os; print os.stat('%s').st_mtime" """ %
            file_path,
            ctxt=REMOTE,
            remoteHost=host)
        cmd.run(validateAfter=True)
        res = cmd.get_results().stdout.strip()
        return res

    def verify_file_exists(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('check if file exists',
                      'test -f %s' % file_path,
                      ctxt=REMOTE,
                      remoteHost=host)
        cmd.run(validateAfter=True)

    def handle_ext_cases(self, file):
        """
        @file: wet sql file to replace with specific machine env.
        """

        host = str(socket.gethostbyname(socket.gethostname()))  #Must be an IP
        querystring = "gpfdist://" + host + ":8088"

        if os.path.isfile(file):
            for line in fileinput.FileInput(file, inplace=1):
                line = re.sub('gpfdist.+8088', querystring, line)
                print str(re.sub('\n', '', line))

    def handle_hybrid_part_cases(self, file):
        """
        @file: hybrid sql file to replace with specific machine env
        """

        querystring = "FROM '" + local_path('hybrid_part.data') + "'"
        if os.path.isfile(file):
            for line in fileinput.FileInput(file, inplace=1):
                line = re.sub('FROM\s\'.+hybrid_part.data\'', querystring,
                              line)
                print str(re.sub('\n', '', line))

    def preprocess(self):
        """ 
        Replace the hard-coded information from sql files with correct hostname and ip address,etc 
        """

        list_workload_dir = [
            'set_sync1', 'sync1', 'set_ck_sync1', 'ck_sync1', 'set_ct', 'ct',
            'set_resync', 'resync', 'set_sync2', 'sync2'
        ]
        for dir in list_workload_dir:
            sql_path = os.path.join(local_path(dir), 'sql')
            ans_path = os.path.join(local_path(dir), 'expected')
            for file in os.listdir(sql_path):
                if (file.find('wet_ret') >= 0):
                    self.handle_ext_cases(os.path.join(sql_path, file))
                if (file.find('hybrid_part') >= 0):
                    self.handle_hybrid_part_cases(os.path.join(sql_path, file))
            for file in os.listdir(ans_path):
                if (file.find('wet_ret') >= 0):
                    self.handle_ext_cases(os.path.join(ans_path, file))
                if (file.find('hybrid_part') >= 0):
                    self.handle_hybrid_part_cases(os.path.join(ans_path, file))

    def clean_data(self):
        """ 
        Clean the data by removing the external table, otherwise, more data will be appended to the
        same external table from running multiple sql files. 
        """

        test = local_path("")
        test = str(test) + "data/*.*"

        cmd = 'rm -rfv ' + test
        run_shell_command(cmd)

    def anydownsegments(self):
        """
        checks if any segments are down
        """

        tinctest.logger.info("Checking if any segments are down")
        num_segments_down = self.count_of_nodes_down()
        if int(num_segments_down) == 0:
            return True
        else:
            return False

    def stop_start_validate(self, stopValidate=True):
        """
        Do gpstop -i, gpstart and see if all segments come back up fine 
        """

        tinctest.logger.info("Performing stop start validate")
        tinctest.logger.info("Shutting down the cluster")
        ok = self.gpstop.run_gpstop_cmd(immediate='i', validate=stopValidate)
        if not ok and stopValidate:
            raise Exception('Problem while shutting down the cluster')
        tinctest.logger.info("Successfully shutdown the cluster.")

        tinctest.logger.info("Restarting the cluster.")
        ok = self.gpstart.run_gpstart_cmd()
        if not ok:
            raise Exception('Failed to bring the cluster back up')
        tinctest.logger.info("Successfully restarted the cluster.")
        if not self.anydownsegments():
            raise Exception("segments were marked down")
        else:
            return (True, "All segments are up")

    def method_reset_fault_injection(self):
        """
        Resets fault injection
        Return: (True, [result]) if OK, or (False, [result]) otherwise
        """

        tinctest.logger.info("Resetting fault injection")

        (ok1, out1) = self.util.inject_fault(f='filerep_resync',
                                             m='async',
                                             y='reset',
                                             r='primary',
                                             H='ALL')
        if not ok1:
            raise Exception("Fault injection failed")
        tinctest.logger.info("Done Injecting Fault  to reset resync")

        return (True, str(out1))

    def method_resume_filerep_resync(self):
        """
        Resumes the process of resync
        """

        tinctest.logger.info("Resuming Resync")
        (ok, out) = self.util.inject_fault(f='filerep_resync',
                                           m='async',
                                           y='resume',
                                           r='primary',
                                           H='ALL')
        if not ok:
            raise Exception("Fault injection failed")
        tinctest.logger.info("Done resuming resync")
        return (ok, out)

    def run_method_suspendresync(self):
        """
        Stops the cluster from going to resync
        """

        tinctest.logger.info("Suspending resync")
        (ok, out) = self.util.inject_fault(f='filerep_resync',
                                           m='async',
                                           y='suspend',
                                           r='primary',
                                           H='ALL')
        tinctest.logger.info('output from suspend resync %s' % out)
        if not ok:
            raise Exception("Fault injection failed")
        tinctest.logger.info("Done Injecting Fault to suspend resync")
        return (ok, out)

    def count_of_masters(self):
        """
        Gives count of number of nodes in the cluster that are master 
        Return: count of number of nodes in the cluster that are master
        """

        tinctest.logger.info("Count the number of masters")
        cmd = "select count(*) from gp_segment_configuration where content = -1"
        (out) = PSQL.run_sql_command(cmd)
        num_master = out.split('\n')[3].strip()
        return num_master

    def count_of_nodes(self):
        """
        Gives count of number of nodes in the cluster
        Return: count of number of nodes in the cluster
        """

        tinctest.logger.info("Counting number of nodes")
        cmd = "select count(*) from gp_segment_configuration"
        (num_cl) = PSQL.run_sql_command(cmd)
        total_num_rows = num_cl.split('\n')[3].strip()
        return total_num_rows

    def count_of_nodes_in_ct(self):
        """
        Gives count of number of nodes in change tracking
        Return: count of number of nodes in change tracking
        """

        tinctest.logger.info("Counting number of nodes in ct")
        sqlcmd = "select count(*) from gp_segment_configuration where mode = 'c'"
        (num_cl) = PSQL.run_sql_command(sqlcmd)
        num_cl = num_cl.split('\n')[3].strip()
        return num_cl

    def count_of_nodes_down(self):
        """
        Gives count of number of nodes marked as down
        Return: count of number of nodes marked as down
        """

        tinctest.logger.info("Counting the number of nodes down")
        sqlcmd = "select count(*) from gp_segment_configuration where status = 'd'"
        (num_down) = PSQL.run_sql_command(sqlcmd)
        num_down = num_down.split('\n')[3].strip()
        return num_down

    def count_of_nodes_sync(self):
        """
        Gives count of number of nodes in sync
        Return: count of number of nodes in sync
        """

        tinctest.logger.info("Counting the number of nodes in sync")
        sqlcmd = "select count(*) from gp_segment_configuration where mode = 's'"
        (num_sync) = PSQL.run_sql_command(sqlcmd)
        num_sync = num_sync.split('\n')[3].strip()
        return num_sync

    def count_of_nodes_not_sync(self):
        """
        Gives count of number of nodes not in sync
        Return: count of number of nodes not in sync
        """

        tinctest.logger.info("Counting number of nodes not in sync")
        sqlcmd = "select count(*) from gp_segment_configuration where mode <> 's'"
        (num_sync) = PSQL.run_sql_command(sqlcmd)
        num_sync = num_sync.split('\n')[3].strip()
        return num_sync

    def inject_fault_on_first_primary(self):
        """
	@product_version gpdb:[4.3.3.0-], gpdb:[4.2.8.1-4.2]
        """
        tinctest.logger.info("\n Injecting faults on first primary")
        (ok,
         out) = self.util.inject_fault(f='filerep_immediate_shutdown_request',
                                       m='async',
                                       y='infinite_loop',
                                       r='primary',
                                       seg_id=2,
                                       sleeptime=300)
        if not ok:
            raise Exception(
                "Fault filerep_immediate_shutdown_request injection failed")

        (ok, out) = self.util.inject_fault(f='fileRep_is_operation_completed',
                                           m='async',
                                           y='infinite_loop',
                                           r='primary',
                                           seg_id=2)
        if not ok:
            raise Exception(
                "Fault fileRep_is_operation_completed injection failed")
        tinctest.logger.info("\n Done Injecting Fault")

    def inject_fault_on_first_mirror(self):
        """
	@product_version gpdb:[4.3.3.0-], gpdb:[4.2.8.1-4.2]
        """
        sqlcmd = "select dbid from gp_segment_configuration where content=0 and role='m'"
        (first_mirror_dbid) = PSQL.run_sql_command(sqlcmd)
        first_mirror_dbid = first_mirror_dbid.split('\n')[3].strip()

        tinctest.logger.info("\n Injecting faults on first mirror")
        flag = self.util.check_fault_status(
            fault_name='fileRep_is_operation_completed',
            status='triggered',
            max_cycle=100)
        if not flag:
            raise Exception(
                "Fault fileRep_is_operation_completed didn't trigger")

        (ok, out) = self.util.inject_fault(f='filerep_consumer',
                                           m='async',
                                           y='panic',
                                           r='mirror',
                                           seg_id=first_mirror_dbid)
        if not ok:
            raise Exception("Fault filerep_consumer injection failed")
        tinctest.logger.info("\n Done Injecting Fault")

    def setupGpfdist(self, port, path):
        gpfdist = Gpfdist(port, self.hostIP())
        gpfdist.killGpfdist()
        gpfdist.startGpfdist(' -t 30 -m 1048576 -d ' + path)
        return True

    def cleanupGpfdist(self, port, path):
        gpfdist = Gpfdist(port, self.hostIP())
        gpfdist.killGpfdist()
        return True

    def hostIP(self):
        ok = run_shell_command('which gpfdist')
        if not ok:
            raise GPtestError("Error:'which gpfdist' command failed.")
        hostname = socket.gethostname()
        if hostname.find('mdw') > 0:
            host = 'mdw'
        else:
            host = str(socket.gethostbyname(
                socket.gethostname()))  #Must be an IP
        tinctest.logger.info('current host is %s' % host)
        return host

    def method_setup(self):
        tinctest.logger.info("Performing setup tasks")
        gpfs = Gpfilespace()
        gpfs.create_filespace('filerep_fs_a')
        gpfs.create_filespace('filerep_fs_b')
        gpfs.create_filespace('filerep_fs_c')
        gpfs.create_filespace('filerep_fs_z')
        gpfs.create_filespace('sync1_fs_1')

        # Set max_resource_queues to 100
        cmd = 'gpconfig -c max_resource_queues -v 100 '
        ok = run_shell_command(cmd)
        if not ok:
            raise Exception(
                'Failure during setting the max_resource_queues value to 100 using gpconfig tool'
            )
        #Restart the cluster
        self.gpstop.run_gpstop_cmd(immediate='i')
        ok = self.gpstart.run_gpstart_cmd()
        if not ok:
            raise Exception('Failure during restarting the cluster')
        return True

    def get_ext_table_query_from_gpstate(self):
        outfile = local_path("gpstate_tmp")
        ok = run_shell_command("gpstate --printSampleExternalTableSql >" +
                               outfile)
        querystring = ""
        flag = 'false'
        out = open(outfile, 'r').readlines()
        for line in out:
            line.strip()
            if (line.find(
                    'DROP EXTERNAL TABLE IF EXISTS gpstate_segment_status') >=
                    0):
                flag = 'true'
            if flag == 'true':
                querystring = querystring + line
        return querystring  ############RUN QYUERY

    def check_gpstate(self, type, phase):
        """ 
        Perform gpstate for each different transition state
        @type: failover type
        @phase: transition stage, can be sync1, ck_sync1, ct, resync, sync2
        """

        if phase == 'sync1':
            state_num = self.query_select_count(
                "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Synchronized' and status_in_config='Up' and instance_status='Up'"
            )
            sync1_num = self.query_select_count(
                "select count(*) from gp_segment_configuration where content <> -1"
            )
            if int(sync1_num) <> int(state_num):
                raise Exception("gpstate in Sync state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " % (phase))

        elif phase == 'ct':
            p_num = self.query_select_count(
                "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Change Tracking'  and role = 'Primary' and status_in_config='Up' and instance_status='Up'"
            )
            m_num = self.query_select_count(
                "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Out of Sync'  and role = 'Mirror' and status_in_config='Down' and instance_status='Down in configuration' "
            )

            if int(p_num) <> int(m_num):
                raise Exception("gpstate in CT state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " % (phase))

        elif phase == 'resync_incr':

            if type == 'primary':
                query = "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Resynchronizing' and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Incremental'"
                resync_incr_num = self.query_select_count(query)
            else:
                query = "select count(*) from gpstate_segment_status where mirror_status ='Resynchronizing' and  status_in_config='Up' and instance_status='Up' and resync_mode= 'Incremental'"
                resync_incr_num = self.query_select_count(query)

            query_num_rows = "select count(*) from gp_segment_configuration where content <> -1"
            num_rows = self.query_select_count(query_num_rows)

            if int(resync_incr_num) <> int(num_rows):
                tinctest.logger.info("resync_incr_num query run %s" % query)
                tinctest.logger.info("num_rows query run %s" % query_num_rows)
                raise Exception(
                    "gpstate in Resync Incremental  state failed. resync_incr_num %s <> num_rows %s"
                    % (resync_incr_num, num_rows))
            tinctest.logger.info("Done Running gpstate in %s phase " % (phase))

        elif phase == 'resync_full':
            num_rows = self.query_select_count(
                "select count(*) from gp_segment_configuration where content <> -1"
            )

            if type == 'primary':
                resync_full_num = self.query_select_count(
                    "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Resynchronizing'  and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Full'"
                )
            else:
                resync_full_num = self.query_select_count(
                    "select count(*) from gpstate_segment_status where mirror_status ='Resynchronizing'  and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Full'"
                )

            if int(resync_full_num) <> int(num_rows):
                raise Exception("gptate in Resync Full state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " % (phase))

        return True

    def trigger_transition(self):
        PSQL.run_sql_file(local_path('mirrors.sql'))

    def run_gpstate(self, type, phase):
        """
        Perform gpstate for each different transition state
        @type: failover type
        @phase: transition stage, can be sync1, ck_sync1, ct, resync, sync2
        """

        tinctest.logger.info("running gpstate")
        querystring = self.get_ext_table_query_from_gpstate()
        file1 = local_path('create_table_gpstate.sql')
        f1 = open(file1, 'w')
        f1.write(querystring)
        f1.write('\n')
        f1.close()
        PSQL.run_sql_file(local_path('create_table_gpstate.sql'))

        gpstate_outfile = local_path('gpstate_out')
        cmd = 'gpstate -s -a > %s 2>&1' % (gpstate_outfile)

        ok = run_shell_command(cmd)
        self.check_gpstate(type, phase)
        return ok

    def check_mirror_seg(self, master=False):
        tinctest.logger.info("running check mirror")
        self.dbstate.check_mirrorintegrity()

    def do_gpcheckcat(self,
                      dbname=None,
                      alldb=False,
                      online=False,
                      outputFile='checkcat.out',
                      outdir=None):
        tinctest.logger.info("running gpcheckcat")
        self.dbstate.check_catalog(outputFile=outputFile)

    def query_select_count(self, sqlcmd):
        (num) = PSQL.run_sql_command(sqlcmd)
        num = num.split('\n')[3].strip()
        return num

    def method_run_failover(self, type):
        """
        Inject fault to failover nodes
        @type: primary [induces fault in mirror] mirror [creates panic in primary]   
        Return: (True, [result of fault injection]) if OK, or (False, [result of fault injection]) otherwise
        """

        if type == 'primary':
            tinctest.logger.info("\n primary failover")
            (ok, out) = self.util.inject_fault(f='filerep_consumer',
                                               m='async',
                                               y='fault',
                                               r='mirror',
                                               H='ALL')
            tinctest.logger.info("\n Done Injecting Fault")

        elif type == 'mirror':
            tinctest.logger.info("\n Mirror failover")
            (ok, out) = self.util.inject_fault(f='postmaster',
                                               m='async',
                                               y='panic',
                                               r='primary',
                                               H='ALL')
            tinctest.logger.info("\n Done Injecting Fault")
        return True

    def wait_till_change_tracking_transition(self):
        self.util.wait_till_change_tracking_transition()

    def wait_till_insync_transition(self):
        self.gpr.wait_till_insync_transition()

    def run_gprecoverseg(self, recover_mode):
        if recover_mode == 'full':
            self.gpr.full()
        else:
            self.gpr.incremental()

    def run_gpconfig(self, parameter, master_value, segment_value):
        if (parameter is not None):
            self.gpconfig.setParameter(parameter, master_value, segment_value)
            self.gpstop.run_gpstop_cmd(restart='r')

    def inject_fault(self,
                     fault=None,
                     mode=None,
                     operation=None,
                     prim_mirr=None,
                     host='All',
                     table=None,
                     database=None,
                     seg_id=None,
                     sleeptime=None,
                     occurence=None):
        if (fault == None or mode == None or operation == None
                or prim_mirr == None):
            raise Exception('Incorrect parameters provided for inject fault')

        (ok, out) = self.util.inject_fault(f=fault,
                                           m=mode,
                                           y=operation,
                                           r=prim_mirr,
                                           H='ALL',
                                           table=table,
                                           database=database,
                                           sleeptime=sleeptime,
                                           o=occurence,
                                           seg_id=seg_id)

Пример #4

Показать файл

Файл: fault.py Проект: PengJi/gpdb-comments

 def test_recovery_full(self):
     gprecover = GpRecover()
     gprecover.full()
     gprecover.wait_till_insync_transition()

Пример #5

Показать файл

Файл: __init__.py Проект: wangbincmss/gpdb

 def full_recoverseg(self):
     gprecover = GpRecover(GPDBConfig())
     gprecover.full()

Пример #6

Показать файл

Файл: fault.py Проект: zts-myankovskiy/gpdb

 def test_recovery_full(self):
     gprecover = GpRecover()
     gprecover.full()
     gprecover.wait_till_insync_transition()

Пример #7

Показать файл

Файл: __init__.py Проект: 50wu/gpdb

class FilerepTestCase(MPPTestCase):

    def __init__(self, methodName):    
        self.pgport = os.environ.get('PGPORT')
        self.util = Filerepe2e_Util()
        self.gpconfig = GpConfig()
        self.config = GPDBConfig()
        self.gpr = GpRecover(self.config)
        self.dbstate = DbStateClass('run_validation',self.config)
        self.gpstart = GpStart()
        self.gpstop = GpStop()
        super(FilerepTestCase,self).__init__(methodName)

    def sleep(self, seconds=60):
        time.sleep(seconds)

    def create_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('create a file', 'touch %s' % file_path, ctxt=REMOTE, remoteHost=host)
        cmd.run(validateAfter=True)

    def remove_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('remove a file', 'rm %s' % file_path, ctxt=REMOTE, remoteHost=host)
        cmd.run(validateAfter=True)

    def get_timestamp_of_file_in_datadir(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('check timestamp', """ python -c "import os; print os.stat('%s').st_mtime" """ %
                      file_path, ctxt=REMOTE, remoteHost=host)
        cmd.run(validateAfter=True)
        res = cmd.get_results().stdout.strip()
        return res

    def verify_file_exists(self, content, role, filename):
        dbid = self.config.get_dbid(content=content, seg_role=role)
        host, datadir = self.config.get_host_and_datadir_of_segment(dbid=dbid)
        file_path = os.path.join(datadir, filename)
        cmd = Command('check if file exists', 'test -f %s' % file_path, ctxt=REMOTE, remoteHost=host)
        cmd.run(validateAfter=True)

    def handle_ext_cases(self,file):
        """
        @file: wet sql file to replace with specific machine env.
        """

        host = str(socket.gethostbyname(socket.gethostname())) #Must be an IP
        querystring = "gpfdist://"+host+":8088"
        
        if os.path.isfile(file):
            for line in fileinput.FileInput(file,inplace=1):
               line = re.sub('gpfdist.+8088',querystring,line)
               print str(re.sub('\n','',line))

    def handle_hybrid_part_cases(self, file):
        """
        @file: hybrid sql file to replace with specific machine env
        """

        querystring = "FROM '"+local_path('hybrid_part.data')+"'" 
        if os.path.isfile(file):
            for line in fileinput.FileInput(file,inplace=1):
                line = re.sub('FROM\s\'.+hybrid_part.data\'',querystring,line)
                print str(re.sub('\n','',line))


    def preprocess(self):
        """ 
        Replace the hard-coded information from sql files with correct hostname and ip address,etc 
        """

        list_workload_dir = ['set_sync1','sync1','set_ck_sync1','ck_sync1',
                        'set_ct','ct','set_resync','resync','set_sync2','sync2']
        for dir in list_workload_dir:
            sql_path = os.path.join(local_path(dir),'sql')
            ans_path = os.path.join(local_path(dir),'expected')
            for file in os.listdir(sql_path):
                    if (file.find('wet_ret')>=0):
                       self.handle_ext_cases(os.path.join(sql_path,file))
                    if (file.find('hybrid_part')>=0):
                       self.handle_hybrid_part_cases(os.path.join(sql_path,file))  
            for file in os.listdir(ans_path):
                    if (file.find('wet_ret')>=0):
                       self.handle_ext_cases(os.path.join(ans_path,file))
                    if (file.find('hybrid_part')>=0):
                       self.handle_hybrid_part_cases(os.path.join(ans_path,file)) 


    def clean_data(self):
        """ 
        Clean the data by removing the external table, otherwise, more data will be appended to the
        same external table from running multiple sql files. 
        """  

        test = local_path("")
        test = str(test) +"data/*.*"
    
        cmd = 'rm -rfv '+test
        run_shell_command(cmd)       

    def anydownsegments(self):
        """
        checks if any segments are down
        """        

        tinctest.logger.info("Checking if any segments are down")
        num_segments_down = self.count_of_nodes_down()
        if int(num_segments_down) == 0:
           return True
        else:
           return False

    def stop_start_validate(self, stopValidate=True):
        """
        Do gpstop -i, gpstart and see if all segments come back up fine 
        """        

        tinctest.logger.info("Performing stop start validate")
        tinctest.logger.info("Shutting down the cluster")
        ok = self.gpstop.run_gpstop_cmd(immediate = 'i', validate=stopValidate)
        if not ok and stopValidate:
           raise Exception('Problem while shutting down the cluster')
        tinctest.logger.info("Successfully shutdown the cluster.")

        tinctest.logger.info("Restarting the cluster.")
        ok = self.gpstart.run_gpstart_cmd()
        if not ok:
            raise Exception('Failed to bring the cluster back up')
        tinctest.logger.info("Successfully restarted the cluster.")
        if not self.anydownsegments():
           raise Exception("segments were marked down")
        else:
           return (True, "All segments are up")


    def method_reset_fault_injection(self):
        """
        Resets fault injection
        Return: (True, [result]) if OK, or (False, [result]) otherwise
        """        

        tinctest.logger.info("Resetting fault injection")
        
        (ok1,out1) = self.util.inject_fault(f='filerep_resync', m = 'async', y = 'reset', r = 'primary', H ='ALL')
        if not ok1:
            raise Exception("Fault injection failed")   
        tinctest.logger.info("Done Injecting Fault  to reset resync")

        return (True, str(out1))


    def method_resume_filerep_resync(self):
        """
        Resumes the process of resync
        """

        tinctest.logger.info("Resuming Resync")
        (ok, out) = self.util.inject_fault(f='filerep_resync', m='async',y='resume', r='primary', H='ALL')
        if not ok:
            raise Exception("Fault injection failed")   
        tinctest.logger.info("Done resuming resync")
        return (ok, out)

    def run_method_suspendresync(self):
        """
        Stops the cluster from going to resync
        """

        tinctest.logger.info("Suspending resync")
        (ok,out) = self.util.inject_fault(f='filerep_resync', m='async' , y='suspend', r ='primary', H='ALL')
        tinctest.logger.info('output from suspend resync %s'%out)
        if not ok:
            raise Exception("Fault injection failed")   
        tinctest.logger.info("Done Injecting Fault to suspend resync")
        return (ok, out)
      

    def count_of_masters(self):
        """
        Gives count of number of nodes in the cluster that are master 
        Return: count of number of nodes in the cluster that are master
        """

        tinctest.logger.info("Count the number of masters")
        cmd = "select count(*) from gp_segment_configuration where content = -1"
        (out) = PSQL.run_sql_command(cmd)
        num_master = out.split('\n')[3].strip()
        return num_master 


    def count_of_nodes(self):
        """
        Gives count of number of nodes in the cluster
        Return: count of number of nodes in the cluster
        """

        tinctest.logger.info("Counting number of nodes")
        cmd = "select count(*) from gp_segment_configuration"
        (num_cl) = PSQL.run_sql_command(cmd)
        total_num_rows = num_cl.split('\n')[3].strip()
        return total_num_rows


    def count_of_nodes_in_ct(self):
        """
        Gives count of number of nodes in change tracking
        Return: count of number of nodes in change tracking
        """

        tinctest.logger.info("Counting number of nodes in ct")
        sqlcmd = "select count(*) from gp_segment_configuration where mode = 'c'"
        (num_cl) = PSQL.run_sql_command(sqlcmd)
        num_cl = num_cl.split('\n')[3].strip()
        return num_cl


    def count_of_nodes_down(self):
        """
        Gives count of number of nodes marked as down
        Return: count of number of nodes marked as down
        """

        tinctest.logger.info("Counting the number of nodes down")
        sqlcmd = "select count(*) from gp_segment_configuration where status = 'd'"
        (num_down) = PSQL.run_sql_command(sqlcmd)
        num_down = num_down.split('\n')[3].strip()
        return num_down    


    def count_of_nodes_sync(self):
        """
        Gives count of number of nodes in sync
        Return: count of number of nodes in sync
        """

        tinctest.logger.info("Counting the number of nodes in sync")        
        sqlcmd = "select count(*) from gp_segment_configuration where mode = 's'"
        (num_sync) = PSQL.run_sql_command(sqlcmd)
        num_sync = num_sync.split('\n')[3].strip()
        return num_sync


    def count_of_nodes_not_sync(self):
        """
        Gives count of number of nodes not in sync
        Return: count of number of nodes not in sync
        """

        tinctest.logger.info("Counting number of nodes not in sync")
        sqlcmd = "select count(*) from gp_segment_configuration where mode <> 's'"
        (num_sync) = PSQL.run_sql_command(sqlcmd)
        num_sync = num_sync.split('\n')[3].strip()
        return num_sync

    def inject_fault_on_first_primary(self):
        """
	@product_version gpdb:[4.3.3.0-], gpdb:[4.2.8.1-4.2]
        """
        tinctest.logger.info("\n Injecting faults on first primary")
        (ok,out) = self.util.inject_fault(f='filerep_immediate_shutdown_request', m='async' , y='infinite_loop', r ='primary', seg_id=2, sleeptime=300)
        if not ok:
            raise Exception("Fault filerep_immediate_shutdown_request injection failed")   

        (ok,out) = self.util.inject_fault(f='fileRep_is_operation_completed', m='async' , y='infinite_loop', r ='primary', seg_id=2)
        if not ok:
            raise Exception("Fault fileRep_is_operation_completed injection failed")   
        tinctest.logger.info("\n Done Injecting Fault")


    def inject_fault_on_first_mirror(self):
        """
	@product_version gpdb:[4.3.3.0-], gpdb:[4.2.8.1-4.2]
        """
        sqlcmd = "select dbid from gp_segment_configuration where content=0 and role='m'"
        (first_mirror_dbid) = PSQL.run_sql_command(sqlcmd)
        first_mirror_dbid = first_mirror_dbid.split('\n')[3].strip()

        tinctest.logger.info("\n Injecting faults on first mirror")
        flag = self.util.check_fault_status(fault_name='fileRep_is_operation_completed', status='triggered', max_cycle=100);
        if not flag:
            raise Exception("Fault fileRep_is_operation_completed didn't trigger")   
 
        (ok,out) = self.util.inject_fault(f='filerep_consumer', m='async' , y='panic', r ='mirror', seg_id=first_mirror_dbid)
        if not ok:
            raise Exception("Fault filerep_consumer injection failed")   
        tinctest.logger.info("\n Done Injecting Fault")

    def setupGpfdist(self, port, path):
        gpfdist = Gpfdist(port , self.hostIP())
        gpfdist.killGpfdist()
        gpfdist.startGpfdist(' -t 30 -m 1048576 -d '+path)
        return True

    def cleanupGpfdist(self, port,path):
        gpfdist = Gpfdist(port , self.hostIP())
        gpfdist.killGpfdist()
        return True

    def hostIP(self):
        ok = run_shell_command('which gpfdist')
        if not ok:
            raise GPtestError("Error:'which gpfdist' command failed.")
        hostname = socket.gethostname()
        if hostname.find('mdw') > 0 :
            host = 'mdw'
        else:
            host = str(socket.gethostbyname(socket.gethostname())) #Must be an IP
        tinctest.logger.info('current host is %s'%host)
        return host

    def method_setup(self):
        tinctest.logger.info("Performing setup tasks")
        gpfs=Gpfilespace()
        gpfs.create_filespace('filerep_fs_a')
        gpfs.create_filespace('filerep_fs_b')
        gpfs.create_filespace('filerep_fs_c')
        gpfs.create_filespace('filerep_fs_z')
        gpfs.create_filespace('sync1_fs_1') 
 
        # Set max_resource_queues to 100 
        cmd = 'gpconfig -c max_resource_queues -v 100 '
        ok = run_shell_command(cmd)
        if not ok:
            raise Exception('Failure during setting the max_resource_queues value to 100 using gpconfig tool')
        #Restart the cluster
        self.gpstop.run_gpstop_cmd(immediate = 'i')
        ok = self.gpstart.run_gpstart_cmd()
        if not ok:
            raise Exception('Failure during restarting the cluster')
        return True


    def get_ext_table_query_from_gpstate(self):
        outfile = local_path("gpstate_tmp")
        ok = run_shell_command("gpstate --printSampleExternalTableSql >"+ outfile)
        querystring = ""
        flag = 'false'
        out = open(outfile, 'r').readlines()
        for line in out:
            line.strip()
            if (line.find('DROP EXTERNAL TABLE IF EXISTS gpstate_segment_status')>=0):
                flag = 'true'
            if flag == 'true':
                querystring = querystring + line
        return querystring ############RUN QYUERY

    def check_gpstate(self, type, phase):
        """ 
        Perform gpstate for each different transition state
        @type: failover type
        @phase: transition stage, can be sync1, ck_sync1, ct, resync, sync2
        """       

        if phase == 'sync1':
            state_num = self.query_select_count("select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Synchronized' and status_in_config='Up' and instance_status='Up'")
            sync1_num = self.query_select_count("select count(*) from gp_segment_configuration where content <> -1")
            if int(sync1_num) <> int(state_num):
                raise Exception("gpstate in Sync state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " %(phase))

        elif phase == 'ct':
            p_num = self.query_select_count("select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Change Tracking'  and role = 'Primary' and status_in_config='Up' and instance_status='Up'")
            m_num = self.query_select_count("select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Out of Sync'  and role = 'Mirror' and status_in_config='Down' and instance_status='Down in configuration' ")

            if int(p_num) <> int(m_num):
                raise Exception("gpstate in CT state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " %(phase))

        elif phase == 'resync_incr':
            
            if type == 'primary':
                query = "select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Resynchronizing' and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Incremental'"
                resync_incr_num = self.query_select_count(query)
            else:
                query = "select count(*) from gpstate_segment_status where mirror_status ='Resynchronizing' and  status_in_config='Up' and instance_status='Up' and resync_mode= 'Incremental'"
                resync_incr_num = self.query_select_count(query)
            
            query_num_rows = "select count(*) from gp_segment_configuration where content <> -1"
            num_rows = self.query_select_count(query_num_rows)
            
            if int(resync_incr_num) <> int(num_rows):
                tinctest.logger.info("resync_incr_num query run %s" % query)
                tinctest.logger.info("num_rows query run %s" % query_num_rows)
                raise Exception("gpstate in Resync Incremental  state failed. resync_incr_num %s <> num_rows %s" % (resync_incr_num, num_rows))
            tinctest.logger.info("Done Running gpstate in %s phase " %(phase))

        elif phase == 'resync_full':
            num_rows = self.query_select_count("select count(*) from gp_segment_configuration where content <> -1")
          
            if type == 'primary':
                resync_full_num = self.query_select_count("select count(*) from gpstate_segment_status where role = preferred_role and mirror_status ='Resynchronizing'  and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Full'")
            else:
                resync_full_num = self.query_select_count("select count(*) from gpstate_segment_status where mirror_status ='Resynchronizing'  and  status_in_config='Up' and instance_status='Up'  and resync_mode= 'Full'")

            if int(resync_full_num) <> int(num_rows):
                raise Exception("gptate in Resync Full state failed")
            tinctest.logger.info("Done Running gpstate in %s phase " %(phase))
        
        return True
    
    def trigger_transition(self):
        PSQL.run_sql_file(local_path('mirrors.sql'))
        

    def run_gpstate(self, type, phase):            
        """
        Perform gpstate for each different transition state
        @type: failover type
        @phase: transition stage, can be sync1, ck_sync1, ct, resync, sync2
        """

        tinctest.logger.info("running gpstate")
        querystring = self.get_ext_table_query_from_gpstate()
        file1 = local_path('create_table_gpstate.sql')
        f1 = open(file1,'w')
        f1.write(querystring)
        f1.write('\n')
        f1.close()
        PSQL.run_sql_file(local_path('create_table_gpstate.sql'))

        gpstate_outfile = local_path('gpstate_out')
        cmd = 'gpstate -s -a > %s 2>&1' % (gpstate_outfile)

        ok  = run_shell_command(cmd)
        self.check_gpstate(type, phase)
        return ok


    def check_mirror_seg(self, master=False):
        tinctest.logger.info("running check mirror")
        self.dbstate.check_mirrorintegrity()

    def do_gpcheckcat(self, dbname=None, alldb=False, online=False, outputFile='checkcat.out', outdir=None):
        tinctest.logger.info("running gpcheckcat")
        self.dbstate.check_catalog(outputFile=outputFile)

    def query_select_count(self,sqlcmd):
        (num) = PSQL.run_sql_command(sqlcmd)
        num = num.split('\n')[3].strip()
        return num
    
    def method_run_failover(self,type):
        """
        Inject fault to failover nodes
        @type: primary [induces fault in mirror] mirror [creates panic in primary]   
        Return: (True, [result of fault injection]) if OK, or (False, [result of fault injection]) otherwise
        """

        if type == 'primary':
            tinctest.logger.info("\n primary failover")
            (ok,out) = self.util.inject_fault(f='filerep_consumer', m='async' , y='fault', r ='mirror', H='ALL')
            tinctest.logger.info("\n Done Injecting Fault")

        elif type == 'mirror':
            tinctest.logger.info("\n Mirror failover")
            (ok,out) = self.util.inject_fault(f='postmaster', m='async' , y='panic', r ='primary', H='ALL')
            tinctest.logger.info("\n Done Injecting Fault")
        return True

    def wait_till_change_tracking_transition(self):
        self.util.wait_till_change_tracking_transition()

    def wait_till_insync_transition(self):
        self.gpr.wait_till_insync_transition()
   
    def run_gprecoverseg(self,recover_mode):
        if recover_mode == 'full':
            self.gpr.full()
        else:
            self.gpr.incremental()

    def run_gpconfig(self, parameter, master_value, segment_value):
        if (parameter is not None):
            self.gpconfig.setParameter(parameter, master_value, segment_value)
            self.gpstop.run_gpstop_cmd(restart='r')

    def inject_fault(self, fault = None, mode = None, operation = None, prim_mirr = None, host = 'All', table = None, database = None, seg_id = None, sleeptime = None, occurence = None):
        if (fault == None or mode == None or operation == None or prim_mirr == None):
            raise Exception('Incorrect parameters provided for inject fault')

        (ok,out) = self.util.inject_fault(f=fault, m=mode , y=operation, r=prim_mirr, H='ALL', table=table, database=database, sleeptime=sleeptime, o=occurence, seg_id=seg_id)

Пример #8

Показать файл

Файл: __init__.py Проект: 50wu/gpdb

class SubTransactionLimitRemovalTestCase(MPPTestCase):

    def __init__(self, methodName):    
        super(SubTransactionLimitRemovalTestCase,self).__init__(methodName)
   
    def check_system(self):
        '''
        @summary: Check whether the system is up and sync. Exit out if not 
        '''
        tinctest.logger.info("[STLRTest] Running check_system")   
        
        tinctest.logger.info("[STLRTest] Check whether the system is up and sync")   
        
        cmd ="select count(*) from gp_segment_configuration where content<> -1 ;"
        (num_cl) = PSQL.run_sql_command(cmd)
        count_all = num_cl.split('\n')[3].strip()
               
        cmd ="select count(*) from gp_segment_configuration where content<> -1 and mode = 's' and status = 'u';"
        (num_cl) = PSQL.run_sql_command(cmd)
        count_up_and_sync = num_cl.split('\n')[3].strip()
        tinctest.logger.info("[STLRTest] printing gp segment configuration")
        (gp_seg_conf) = PSQL.run_sql_command("select * from gp_segment_configuration order by dbid")
        tinctest.logger.info(gp_seg_conf)


        if count_all != count_up_and_sync :
            raise Exception("[STLRTest] System not in sync and up. Exiting test")
        else:
            tinctest.logger.info("[STLRTest] Starting New Test: System is up and in sync...")

    def run_sqls(self,test):
        '''
        @summary : Run the sql 
        @param test: the sql file list
        '''        
        tinctest.logger.info("[STLRTest] Running run_sqls")   
        tinctest.logger.info("[STLRTest]Starting new thread to run sql %s"%(test))
        PSQL.run_sql_file(local_path(test))
            
    def suspend_faults(self,fault_name):
        '''
        @summary : Suspend the specified fault: reset it before issuing suspend 
        @param fault_name : Name of the fault to suspend
        '''
        tinctest.logger.info("[STLRTest] Running suspend_faults")   

        self.util = Filerepe2e_Util()

        (ok1,out1) = self.util.inject_fault(f=fault_name, m = 'async', y = 'reset', r = 'primary', H ='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")   
        tinctest.logger.info("[STLRTest]Done resetting the %s fault"%(fault_name))      

        (ok1,out1) = self.util.inject_fault(f=fault_name, m = 'async', y = 'suspend', r = 'primary', H ='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")   
        tinctest.logger.info("[STLRTest]Done suspending the %s fault"%(fault_name))
        
    def check_fault_status(self,fault_name = None, status = None, max_cycle=10):
        ''' 
        Check whether a fault is triggered. Poll till the fault is triggered
        @param name : Fault name
        @param status : Status to be checked - triggered/completed
        '''
        self.util = Filerepe2e_Util()

        tinctest.logger.info("[STLRTest] Running check_fault_status %s", status)   

        if (not fault_name) or (not status) :
            raise Exception("[STLRTest]Need a value for fault_name and status to continue")

        poll =0
        while(poll < max_cycle):
            (ok1,out1) = self.util.inject_fault(f=fault_name, m = 'async', y = 'status', r = 'primary', H ='ALL')
            poll +=1
            for line in out1.splitlines():
                if line.find(fault_name) > 0 and line.find(status) > 0 :
                    tinctest.logger.info("[STLRTest]Fault %s is %s " % (fault_name,status))
                    poll = 0
                    tinctest.logger.info("[STLRTest] Running check_fault_status %s TRUE", status)
                    return True

            #sleep a while before start polling again
            sleep(10)
        tinctest.logger.info("[STLRTest] Running check_fault_status %s FALSE", status)
        return False
        
        
    def filerep_fault(self,trans_state):
        '''
        @summary : Inject the filerep fault supplied
        @param trans_state : type of transition 
        '''
        tinctest.logger.info("[STLRTest] Running filerep_fault")   
        self.util = Filerepe2e_Util()

        if trans_state == 'failover_to_primary':
            tinctest.logger.info("[STLRTest] primary failover")
            (ok1,out1) = self.util.inject_fault(f='filerep_consumer', m = 'async', y = 'fault', r = 'mirror', H ='ALL')

            if not ok1:
                raise Exception("[STLRTest]Fault injection failed")   
            tinctest.logger.info("[STLRTest]Done primary failover fault")

        elif trans_state == 'failover_to_mirror':
            tinctest.logger.info("[STLRTest] fault for postmaster panic")
            (ok1,out1) = self.util.inject_fault(f='postmaster', m = 'async', y = 'panic', r = 'primary', H ='ALL')

            if not ok1:
                raise Exception("[STLRTest]Fault injection failed")   
            tinctest.logger.info("[STLRTest]Done postmaster panic fault")

        elif trans_state == 'postmaster_reset':
            tinctest.logger.info("[STLRTest] fault for filerep_sender panic")
            (ok1,out1) = self.util.inject_fault(f='filerep_sender', m = 'async', y = 'panic', r = 'primary', H ='ALL')

            if not ok1:
                raise Exception("[STLRTest]Fault injection failed")   
            tinctest.logger.info("[STLRTest]Done filerep_sender panic fault")
            
        tinctest.logger.info("[STLRTest] Done Injecting Fault")

    def resume_faults(self,fault_name,trans_state):
        ''''
        @summary : Resume the fault and check status
        '''
        self.util = Filerepe2e_Util()

        tinctest.logger.info("[STLRTest] Running resume_faults")   

        if not trans_state == 'failover_to_mirror' :
            tinctest.logger.info("[STLRTest] fault for %s resume" % fault_name)
            (ok1,out1) = self.util.inject_fault(f=fault_name, m = 'async', y = 'resume', r = 'primary', H ='ALL')
            if not ok1:
                raise Exception("[STLRTest]Fault resume failed")   
            tinctest.logger.info("[STLRTest]Done fault for %s resume" % fault_name)

        if trans_state == 'postmaster_reset':
            (ok1,out1) = self.util.inject_fault(f=fault_name, m = 'async', y = 'resume', r = 'mirror', H ='ALL')
            if not ok1:
                tinctest.logger.info("[STLRTest]Failed fault for %s resume on mirror" % fault_name)

        if trans_state == 'failover_to_primary' :
            self.check_fault_status(fault_name,'completed')

    def checkPSQLRun(self, test):
        '''Check if the psql run started in parallel is over before running the _post.sql '''
        tinctest.logger.info("[STLRTest] Running checkPSQLRun")   
        cmd_str = 'ps -ef|grep sub_transaction_limit_removal|grep psql'
        while(1):
            is_running = 0 
            (rc , out) = shell.run(cmd_str)
            for line in out:
                if '%s' %test in line:
                    is_running = 1 
            if is_running == 0:
                return True
            else:
                sleep(10)
        return False
        

    def resume_filerep_resync(self):
        self.util = Filerepe2e_Util()

        tinctest.logger.info("[STLRTest] Running resume_filerep_resync")   

        tinctest.logger.info("[STLRTest] fault for failover_to_mirror resume")
        (ok1,out1) = self.util.inject_fault(f='filerep_resync', m = 'async', y = 'resume', r = 'primary', H ='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")   
        tinctest.logger.info("[STLRTest]Done fault for failover_to_mirror resume")
        sleep(10)

    def stop_start_validate(self, expect_down_segments=False):
        """
        Do gpstop -i, gpstart and see if all segments come back up fine 
        """        
        self.gpstart = GpStart()
        self.gpstop = GpStop()
        tinctest.logger.info("[STLRTest] Running stop_start_validate")   

        tinctest.logger.info("[STLRTest]Shutting down the cluster")
        ok = self.gpstop.run_gpstop_cmd(immediate = 'i')
        if not expect_down_segments:
            if not ok:
                raise Exception('[STLRTest]Problem while shutting down the cluster')
            tinctest.logger.info("[STLRTest]Successfully shutdown the cluster.")

        tinctest.logger.info("[STLRTest]Restarting the cluster.")
        ok = self.gpstart.run_gpstart_cmd()

        if not ok:
            raise Exception('[STLRTest]Failed to bring the cluster back up')
        tinctest.logger.info("[STLRTest]Successfully restarted the cluster.")
        if not self.anydownsegments():
           raise Exception("[STLRTest]segments were marked down")
        else:
           return (True, "All segments are up")

    def run_gprecoverseg(self,recover_option):
        '''
        @summary : Call gpecoverseg full or incremental to bring back the cluster to sync
        '''
        self.gpr = GpRecover()

        tinctest.logger.info("[STLRTest] Running run_gprecoverseg")   

        if recover_option == 'full':
            self.gpr.full()
        else:
            self.gpr.incremental()

        self.gpr.wait_till_insync_transition()
        
    def run_restart_database(self):
        '''
        @summary : Restart the database
        '''
        self.gpstart = GpStart()
        self.gpstop = GpStop()
        tinctest.logger.info("[STLRTest] Running run_restart_database")   
        ok = self.gpstop.run_gpstop_cmd(immediate = 'i')
        tinctest.logger.info(ok)
        ok = self.gpstart.run_gpstart_cmd()
        tinctest.logger.info(ok)       
       
    def reset_faults(self,fault_name,current_cluster_state):
        ''''
        @summary : Reset the faults at the end of test 
        '''
        self.util = Filerepe2e_Util()
        tinctest.logger.info("[STLRTest] Running reset_faults")   

        tinctest.logger.info("[STLRTest] Resetting fault before ending test")

        (ok1,out1) = self.util.inject_fault(f=fault_name, m = 'async', y = 'reset', r = 'primary', H ='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")   
        tinctest.logger.info("[STLRTest]Done resetting %s fault" %(fault_name))

        if current_cluster_state == 'resync':
            (ok1,out1) = self.util.inject_fault(f='filerep_resync', m = 'async', y = 'reset', r = 'primary', H ='ALL')
            if not ok1:
                raise Exception("[STLRTest]Fault injection failed")   
            tinctest.logger.info("[STLRTest]Done filerep_resync fault")

        (ok1,out1) = self.util.inject_fault(f='checkpoint', m = 'async', y = 'reset', r = 'primary', H ='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")   
        tinctest.logger.info("[STLRTest]Done resetting checkpoint fault" )
        
    def do_gpcheckcat(self, dbname=None, alldb=False, online=False, outputFile='checkcat.out', outdir=None):
        self.dbstate = DbStateClass('run_validation')
        tinctest.logger.info("[STLRTest] Running do_gpcheckcat")
        self.dbstate.check_catalog()
        return True

    def _validation(self):
        '''
        @summary :gpcheckcat and gpcheckmirrorintegrity
        '''
        
        ###psql.run_shell_command("CHECKPOINT; CHECKPOINT; CHECKPOINT;CHECKPOINT; CHECKPOINT;")
        ###sleep(30) # sleep for some time for the segments to be in sync before validation
 
        self.dbstate = DbStateClass('run_validation')
        tinctest.logger.info("[STLRTest] Running _validation")

        outfile = local_path("subt_checkcat.out")
        self.dbstate.check_catalog(outputFile=outfile)
         
        self.dbstate.check_mirrorintegrity()

    def inject_and_resume_fault(self, fault_name, trans_state):
        self.check_fault_status(fault_name, 'triggered')
        self.filerep_fault(trans_state)
        if trans_state == 'failover_to_mirror' :
            PSQL.run_sql_file(local_path('test_while_ct.sql'))
        self.resume_faults(fault_name, trans_state)

    def run_post_sqls(self, fault_name ='', trans_state=''):
        PSQL.wait_for_database_up();
        if (trans_state == 'failover_to_primary' or trans_state == ''):   
            post_sql = "failover_sql/subt_create_table_ao_post_commit"
        else:
            post_sql = "failover_sql/subt_create_table_ao_post_abort"       
            
        sql_file = post_sql+".sql"
        ans_file = post_sql+".ans"
        out_file = post_sql+".out"

        PSQL.run_sql_file(sql_file = local_path(sql_file), out_file = local_path(out_file))
        diff_res = Gpdiff.are_files_equal(local_path(out_file), local_path(ans_file))
        
        if not diff_res:
           self.fail("[STLRTest]Gpdiff failed for : %s %s" %(fault_name, trans_state))

    def reset_all_faults(self):
        ''''
        @summary : Reset all faults on primary and mirror 
        '''
        tinctest.logger.info("[STLRTest] Running reset_all_faults")   
        self.util = Filerepe2e_Util()

        (ok1,out1) = self.util.inject_fault(f='all', m = 'async', y = 'reset', r = 'primary', H ='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")   
        tinctest.logger.info("[STLRTest]Done resetting all faults on primary")

        (ok1,out1) = self.util.inject_fault(f='all', m = 'async', y = 'reset', r = 'mirror', H ='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")   
        tinctest.logger.info("[STLRTest]Done resetting all faults fault on mirror") 

    def kill_zombies(self):
        ''' 
        @summary : There are stray zombie processes running after each test. This method clears them 
        '''
        tinctest.logger.info("[STLRTest] Running kill_zombies")
        cmd_str = "ps -ef | grep \"port\" | awk '{print $3 \"#\" $2}' | grep -w 1"
        cmd = Command("shell_command", cmd_str)
        tinctest.logger.info('Executing command: %s : %s' %("shell command", cmd_str))
        cmd.run()
        result = cmd.get_results()
        out = result.stdout
        lines = out.split('\n')
        for line in lines:
            pids = line.split('#')
            if pids[0] == '1':
               kill_str= "kill -9 %s" %(pids[1])
               cmd2 = Command("kill_command", kill_str)
               cmd2.run()


    def skip_checkpoint(self):
        ''' 
        @summary : Routine to inject fault that skips checkpointing 
        '''

        self.util = Filerepe2e_Util()

        tinctest.logger.info("[STLRTest] Running skip_checkpoint")

        (ok1,out1) = self.util.inject_fault(f='checkpoint', m = 'async', y = 'reset', r = 'primary', H ='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")
        tinctest.logger.info("[STLRTest]Done resetting the checkpoint fault")

        (ok1,out1) = self.util.inject_fault(f='checkpoint', m = 'async', y = 'skip', r = 'primary', H ='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")
        tinctest.logger.info("[STLRTest]Done skipping the checkpoint fault")

    def method_setup(self):
        tinctest.logger.info("Performing setup tasks")
        gpfs=Gpfilespace()
        gpfs.create_filespace('subt_filespace_a')

    def cleandb(self):
        db = Database()
        db.setupDatabase('gptest')

Пример #9

Показать файл

class SubTransactionLimitRemovalTestCase(MPPTestCase):
    def __init__(self, methodName):
        super(SubTransactionLimitRemovalTestCase, self).__init__(methodName)

    def check_system(self):
        '''
        @summary: Check whether the system is up and sync. Exit out if not 
        '''
        tinctest.logger.info("[STLRTest] Running check_system")

        tinctest.logger.info(
            "[STLRTest] Check whether the system is up and sync")

        cmd = "select count(*) from gp_segment_configuration where content<> -1 ;"
        (num_cl) = PSQL.run_sql_command(cmd)
        count_all = num_cl.split('\n')[3].strip()

        cmd = "select count(*) from gp_segment_configuration where content<> -1 and mode = 's' and status = 'u';"
        (num_cl) = PSQL.run_sql_command(cmd)
        count_up_and_sync = num_cl.split('\n')[3].strip()
        tinctest.logger.info("[STLRTest] printing gp segment configuration")
        (gp_seg_conf) = PSQL.run_sql_command(
            "select * from gp_segment_configuration order by dbid")
        tinctest.logger.info(gp_seg_conf)

        if count_all != count_up_and_sync:
            raise Exception(
                "[STLRTest] System not in sync and up. Exiting test")
        else:
            tinctest.logger.info(
                "[STLRTest] Starting New Test: System is up and in sync...")

    def run_sqls(self, test):
        '''
        @summary : Run the sql 
        @param test: the sql file list
        '''
        tinctest.logger.info("[STLRTest] Running run_sqls")
        tinctest.logger.info("[STLRTest]Starting new thread to run sql %s" %
                             (test))
        PSQL.run_sql_file(local_path(test))

    def suspend_faults(self, fault_name):
        '''
        @summary : Suspend the specified fault: reset it before issuing suspend 
        @param fault_name : Name of the fault to suspend
        '''
        tinctest.logger.info("[STLRTest] Running suspend_faults")

        self.util = Filerepe2e_Util()

        (ok1, out1) = self.util.inject_fault(f=fault_name,
                                             m='async',
                                             y='reset',
                                             r='primary',
                                             H='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")
        tinctest.logger.info("[STLRTest]Done resetting the %s fault" %
                             (fault_name))

        (ok1, out1) = self.util.inject_fault(f=fault_name,
                                             m='async',
                                             y='suspend',
                                             r='primary',
                                             H='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")
        tinctest.logger.info("[STLRTest]Done suspending the %s fault" %
                             (fault_name))

    def check_fault_status(self, fault_name=None, status=None, max_cycle=10):
        ''' 
        Check whether a fault is triggered. Poll till the fault is triggered
        @param name : Fault name
        @param status : Status to be checked - triggered/completed
        '''
        self.util = Filerepe2e_Util()

        tinctest.logger.info("[STLRTest] Running check_fault_status %s",
                             status)

        if (not fault_name) or (not status):
            raise Exception(
                "[STLRTest]Need a value for fault_name and status to continue")

        poll = 0
        while (poll < max_cycle):
            (ok1, out1) = self.util.inject_fault(f=fault_name,
                                                 m='async',
                                                 y='status',
                                                 r='primary',
                                                 H='ALL')
            poll += 1
            for line in out1.splitlines():
                if line.find(fault_name) > 0 and line.find(status) > 0:
                    tinctest.logger.info("[STLRTest]Fault %s is %s " %
                                         (fault_name, status))
                    poll = 0
                    tinctest.logger.info(
                        "[STLRTest] Running check_fault_status %s TRUE",
                        status)
                    return True

            #sleep a while before start polling again
            sleep(10)
        tinctest.logger.info("[STLRTest] Running check_fault_status %s FALSE",
                             status)
        return False

    def filerep_fault(self, trans_state):
        '''
        @summary : Inject the filerep fault supplied
        @param trans_state : type of transition 
        '''
        tinctest.logger.info("[STLRTest] Running filerep_fault")
        self.util = Filerepe2e_Util()

        if trans_state == 'failover_to_primary':
            tinctest.logger.info("[STLRTest] primary failover")
            (ok1, out1) = self.util.inject_fault(f='filerep_consumer',
                                                 m='async',
                                                 y='fault',
                                                 r='mirror',
                                                 H='ALL')

            if not ok1:
                raise Exception("[STLRTest]Fault injection failed")
            tinctest.logger.info("[STLRTest]Done primary failover fault")

        elif trans_state == 'failover_to_mirror':
            tinctest.logger.info("[STLRTest] fault for postmaster panic")
            (ok1, out1) = self.util.inject_fault(f='postmaster',
                                                 m='async',
                                                 y='panic',
                                                 r='primary',
                                                 H='ALL')

            if not ok1:
                raise Exception("[STLRTest]Fault injection failed")
            tinctest.logger.info("[STLRTest]Done postmaster panic fault")

        elif trans_state == 'postmaster_reset':
            tinctest.logger.info("[STLRTest] fault for filerep_sender panic")
            (ok1, out1) = self.util.inject_fault(f='filerep_sender',
                                                 m='async',
                                                 y='panic',
                                                 r='primary',
                                                 H='ALL')

            if not ok1:
                raise Exception("[STLRTest]Fault injection failed")
            tinctest.logger.info("[STLRTest]Done filerep_sender panic fault")

        tinctest.logger.info("[STLRTest] Done Injecting Fault")

    def resume_faults(self, fault_name, trans_state):
        ''''
        @summary : Resume the fault and check status
        '''
        self.util = Filerepe2e_Util()

        tinctest.logger.info("[STLRTest] Running resume_faults")

        if not trans_state == 'failover_to_mirror':
            tinctest.logger.info("[STLRTest] fault for %s resume" % fault_name)
            (ok1, out1) = self.util.inject_fault(f=fault_name,
                                                 m='async',
                                                 y='resume',
                                                 r='primary',
                                                 H='ALL')
            if not ok1:
                raise Exception("[STLRTest]Fault resume failed")
            tinctest.logger.info("[STLRTest]Done fault for %s resume" %
                                 fault_name)

        if trans_state == 'postmaster_reset':
            (ok1, out1) = self.util.inject_fault(f=fault_name,
                                                 m='async',
                                                 y='resume',
                                                 r='mirror',
                                                 H='ALL')
            if not ok1:
                tinctest.logger.info(
                    "[STLRTest]Failed fault for %s resume on mirror" %
                    fault_name)

        if trans_state == 'failover_to_primary':
            self.check_fault_status(fault_name, 'completed')

    def checkPSQLRun(self, test):
        '''Check if the psql run started in parallel is over before running the _post.sql '''
        tinctest.logger.info("[STLRTest] Running checkPSQLRun")
        cmd_str = 'ps -ef|grep sub_transaction_limit_removal|grep psql'
        while (1):
            is_running = 0
            (rc, out) = shell.run(cmd_str)
            for line in out:
                if '%s' % test in line:
                    is_running = 1
            if is_running == 0:
                return True
            else:
                sleep(10)
        return False

    def resume_filerep_resync(self):
        self.util = Filerepe2e_Util()

        tinctest.logger.info("[STLRTest] Running resume_filerep_resync")

        tinctest.logger.info("[STLRTest] fault for failover_to_mirror resume")
        (ok1, out1) = self.util.inject_fault(f='filerep_resync',
                                             m='async',
                                             y='resume',
                                             r='primary',
                                             H='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")
        tinctest.logger.info(
            "[STLRTest]Done fault for failover_to_mirror resume")
        sleep(10)

    def stop_start_validate(self, expect_down_segments=False):
        """
        Do gpstop -i, gpstart and see if all segments come back up fine 
        """
        self.gpstart = GpStart()
        self.gpstop = GpStop()
        tinctest.logger.info("[STLRTest] Running stop_start_validate")

        tinctest.logger.info("[STLRTest]Shutting down the cluster")
        ok = self.gpstop.run_gpstop_cmd(immediate='i')
        if not expect_down_segments:
            if not ok:
                raise Exception(
                    '[STLRTest]Problem while shutting down the cluster')
            tinctest.logger.info(
                "[STLRTest]Successfully shutdown the cluster.")

        tinctest.logger.info("[STLRTest]Restarting the cluster.")
        ok = self.gpstart.run_gpstart_cmd()

        if not ok:
            raise Exception('[STLRTest]Failed to bring the cluster back up')
        tinctest.logger.info("[STLRTest]Successfully restarted the cluster.")
        if not self.anydownsegments():
            raise Exception("[STLRTest]segments were marked down")
        else:
            return (True, "All segments are up")

    def run_gprecoverseg(self, recover_option):
        '''
        @summary : Call gpecoverseg full or incremental to bring back the cluster to sync
        '''
        self.gpr = GpRecover()

        tinctest.logger.info("[STLRTest] Running run_gprecoverseg")

        if recover_option == 'full':
            self.gpr.full()
        else:
            self.gpr.incremental()

        self.gpr.wait_till_insync_transition()

    def run_restart_database(self):
        '''
        @summary : Restart the database
        '''
        self.gpstart = GpStart()
        self.gpstop = GpStop()
        tinctest.logger.info("[STLRTest] Running run_restart_database")
        ok = self.gpstop.run_gpstop_cmd(immediate='i')
        tinctest.logger.info(ok)
        ok = self.gpstart.run_gpstart_cmd()
        tinctest.logger.info(ok)

    def reset_faults(self, fault_name, current_cluster_state):
        ''''
        @summary : Reset the faults at the end of test 
        '''
        self.util = Filerepe2e_Util()
        tinctest.logger.info("[STLRTest] Running reset_faults")

        tinctest.logger.info("[STLRTest] Resetting fault before ending test")

        (ok1, out1) = self.util.inject_fault(f=fault_name,
                                             m='async',
                                             y='reset',
                                             r='primary',
                                             H='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")
        tinctest.logger.info("[STLRTest]Done resetting %s fault" %
                             (fault_name))

        if current_cluster_state == 'resync':
            (ok1, out1) = self.util.inject_fault(f='filerep_resync',
                                                 m='async',
                                                 y='reset',
                                                 r='primary',
                                                 H='ALL')
            if not ok1:
                raise Exception("[STLRTest]Fault injection failed")
            tinctest.logger.info("[STLRTest]Done filerep_resync fault")

        (ok1, out1) = self.util.inject_fault(f='checkpoint',
                                             m='async',
                                             y='reset',
                                             r='primary',
                                             H='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")
        tinctest.logger.info("[STLRTest]Done resetting checkpoint fault")

    def do_gpcheckcat(self,
                      dbname=None,
                      alldb=False,
                      online=False,
                      outputFile='checkcat.out',
                      outdir=None):
        self.dbstate = DbStateClass('run_validation')
        tinctest.logger.info("[STLRTest] Running do_gpcheckcat")
        self.dbstate.check_catalog()
        return True

    def _validation(self):
        '''
        @summary :gpcheckcat and gpcheckmirrorintegrity
        '''

        ###psql.run_shell_command("CHECKPOINT; CHECKPOINT; CHECKPOINT;CHECKPOINT; CHECKPOINT;")
        ###sleep(30) # sleep for some time for the segments to be in sync before validation

        self.dbstate = DbStateClass('run_validation')
        tinctest.logger.info("[STLRTest] Running _validation")

        outfile = local_path("subt_checkcat.out")
        self.dbstate.check_catalog(outputFile=outfile)

        self.dbstate.check_mirrorintegrity()

    def inject_and_resume_fault(self, fault_name, trans_state):
        self.check_fault_status(fault_name, 'triggered')
        self.filerep_fault(trans_state)
        if trans_state == 'failover_to_mirror':
            PSQL.run_sql_file(local_path('test_while_ct.sql'))
        self.resume_faults(fault_name, trans_state)

    def run_post_sqls(self, fault_name='', trans_state=''):
        PSQL.wait_for_database_up()
        if (trans_state == 'failover_to_primary' or trans_state == ''):
            post_sql = "failover_sql/subt_create_table_ao_post_commit"
        else:
            post_sql = "failover_sql/subt_create_table_ao_post_abort"

        sql_file = post_sql + ".sql"
        ans_file = post_sql + ".ans"
        out_file = post_sql + ".out"

        PSQL.run_sql_file(sql_file=local_path(sql_file),
                          out_file=local_path(out_file))
        diff_res = Gpdiff.are_files_equal(local_path(out_file),
                                          local_path(ans_file))

        if not diff_res:
            self.fail("[STLRTest]Gpdiff failed for : %s %s" %
                      (fault_name, trans_state))

    def reset_all_faults(self):
        ''''
        @summary : Reset all faults on primary and mirror 
        '''
        tinctest.logger.info("[STLRTest] Running reset_all_faults")
        self.util = Filerepe2e_Util()

        (ok1, out1) = self.util.inject_fault(f='all',
                                             m='async',
                                             y='reset',
                                             r='primary',
                                             H='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")
        tinctest.logger.info("[STLRTest]Done resetting all faults on primary")

        (ok1, out1) = self.util.inject_fault(f='all',
                                             m='async',
                                             y='reset',
                                             r='mirror',
                                             H='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")
        tinctest.logger.info(
            "[STLRTest]Done resetting all faults fault on mirror")

    def kill_zombies(self):
        ''' 
        @summary : There are stray zombie processes running after each test. This method clears them 
        '''
        tinctest.logger.info("[STLRTest] Running kill_zombies")
        cmd_str = "ps -ef | grep \"port\" | awk '{print $3 \"#\" $2}' | grep -w 1"
        cmd = Command("shell_command", cmd_str)
        tinctest.logger.info('Executing command: %s : %s' %
                             ("shell command", cmd_str))
        cmd.run()
        result = cmd.get_results()
        out = result.stdout
        lines = out.split('\n')
        for line in lines:
            pids = line.split('#')
            if pids[0] == '1':
                kill_str = "kill -9 %s" % (pids[1])
                cmd2 = Command("kill_command", kill_str)
                cmd2.run()

    def skip_checkpoint(self):
        ''' 
        @summary : Routine to inject fault that skips checkpointing 
        '''

        self.util = Filerepe2e_Util()

        tinctest.logger.info("[STLRTest] Running skip_checkpoint")

        (ok1, out1) = self.util.inject_fault(f='checkpoint',
                                             m='async',
                                             y='reset',
                                             r='primary',
                                             H='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")
        tinctest.logger.info("[STLRTest]Done resetting the checkpoint fault")

        (ok1, out1) = self.util.inject_fault(f='checkpoint',
                                             m='async',
                                             y='skip',
                                             r='primary',
                                             H='ALL')

        if not ok1:
            raise Exception("[STLRTest]Fault injection failed")
        tinctest.logger.info("[STLRTest]Done skipping the checkpoint fault")

    def method_setup(self):
        tinctest.logger.info("Performing setup tasks")
        gpfs = Gpfilespace()
        gpfs.create_filespace('subt_filespace_a')

    def cleandb(self):
        db = Database()
        db.setupDatabase('gptest')