Exemplo n.º 1
0
    def start(self):
        """
        Start the standby postmaster.  The options to pg_ctl needs to be
        determined by gppylib logic.
        """

        dburl = dbconn.DbURL()
        gparray = GpArray.initFromCatalog(dburl, utility=True)
        numcontent = gparray.getNumSegmentContents()
        standby = gparray.standbyMaster
        master = gp.MasterStart("Starting Master Standby",
                                self.datadir, self.port, standby.dbid,
                                0, numcontent, None, None, None)
        # -w option would wait forever.
        master.cmdStr = master.cmdStr.replace(' -w', '')
        master.run(validateAfter=True)

        return master.get_results()
Exemplo n.º 2
0
    def test_fail_back(self):
        """
        This test verifies that the fail-back mode is not allowed.
        Fail-back means original master acting as the new standby.
        """

        # Verify if the database is up. Run some sql.
        PSQL.run_sql_command('DROP table if exists foo')
        Command('remove standby', 'gpinitstandby -ra').run()
        self.assertEqual(self.standby.create(), 0)
        res = self.standby.start()
        self.assertTrue(res.wasSuccessful())

        # Wait for the walreceiver to start
        num_walsender = self.wait_for_walsender()
        self.assertEqual(num_walsender, 1)

        logger.info('Activated WAL Receiver...')

        # Promote the standby & shutdown the old Master
        # Generate a recovery.conf file for the old Master so
        # to make him the new standby that connects to the new
        # master (originally standby)

        logger.info('Promoting the standby...')
        self.standby.promote()

        dburl = dbconn.DbURL()
        gparray = GpArray.initFromCatalog(dburl, utility=True)
        numcontent = gparray.getNumSegmentContents()
        orig_master = gparray.master

        self.standby.remove_catalog_standby(dburl)

        if (os.path.exists(os.path.join(orig_master.datadir, 'wal_rcv.pid'))):
            os.remove(os.path.join(orig_master.datadir, 'wal_rcv.pid'))

        logger.info('Stop the original master...')
        cmd = Command("gpstop", "gpstop -aim")
        cmd.run()
        self.assertEqual(cmd.get_results().rc, 0, str(cmd))

        logger.info(
            'Generate recovery.conf for original master to make a new standby...'
        )
        master_recv_conf = open(
            os.path.join(orig_master.datadir, 'recovery.conf'), 'w')
        standby_recv_done = open(
            os.path.join(self.standby.datadir, 'recovery.done'))
        for line in standby_recv_done:
            master_recv_conf.write(
                line.replace("port=" + str(os.environ.get('PGPORT')),
                             "port=" + str(self.standby.port)))

        master_recv_conf.close()
        standby_recv_done.close()

        logger.info(
            'Start the old master again (to act as the new standby)...')
        master = gp.MasterStart("Starting orig Master in standby mode",
                                orig_master.datadir, orig_master.port,
                                orig_master.dbid, 0, numcontent, None, None,
                                None)

        # -w option would wait forever.
        master.cmdStr = master.cmdStr.replace(' -w', '')
        master.run(validateAfter=True)
        self.assertTrue((master.get_results()).wasSuccessful())

        # Have to do this to give the new standby some time to be active
        subprocess.check_call("psql -c 'create database foo' -p " +
                              str(self.standby.port),
                              shell=True,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)
        subprocess.check_call("psql -c 'drop database foo' -p " +
                              str(self.standby.port),
                              shell=True,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE)

        time.sleep(3)

        # The new standby can re-start but should not be able to connect to the new
        # master (originally standby). Thats the test
        self.assertTrue(
            os.path.exists(os.path.join(orig_master.datadir, 'wal_rcv.pid')))
        logger.info(
            'The WAL receiver pid file exists which means the new standby started\n'
            'but still could not connect to the new Master (originally standby) and hence the\n'
            'pid file was not cleared')

        # Remove the recovery.conf file from the new standby directory
        # as its no more needed
        os.remove(os.path.join(orig_master.datadir, 'recovery.conf'))

        logger.info('Stop the original master again...')
        rc = subprocess.Popen('pg_ctl stop -D ' + orig_master.datadir +
                              ' -m immediate',
                              shell=True,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.STDOUT)

        # Perform gpstart to get the original master (& cluster) back again
        cmd = Command("gpstart", "gpstart -a")
        cmd.run()
        self.assertTrue(cmd.get_results().rc in (0, 1), str(cmd))

        logger.info('Pass')
Exemplo n.º 3
0
class GpactivateStandby(object):
    '''Class for gpactivatestandby operations '''

    standby_port = '5656'
    db_name = 'walrepl'

    def __init__(self):
        self.gpinit = GpinitStandby()
        self.pgutil = GpUtility()
        self.runmixin = StandbyRunMixin()
        self.runmixin.createdb(dbname='walrepl')
        self.gphome = os.environ.get('GPHOME')
        self.pgport = os.environ.get('PGPORT')
        self.mdd = os.environ.get('MASTER_DATA_DIRECTORY')
        self.config = GPDBConfig()
        self.host = socket.gethostname()

        dburl = dbconn.DbURL()
        gparray = GpArray.initFromCatalog(dburl, utility=True)
        self.numcontent = gparray.getNumSegmentContents()
        self.orig_master = gparray.master

    def run_remote(self, standbyhost, rmt_cmd, pgport='', standbydd=''):
        '''Runs remote command and returns rc, result '''
        export_cmd = "source %s/greenplum_path.sh;export PGPORT=%s;export MASTER_DATA_DIRECTORY=%s" % (
            self.gphome, pgport, standbydd)
        remote_cmd = "gpssh -h %s -e '%s; %s'" % (standbyhost, export_cmd,
                                                  rmt_cmd)
        cmd = Command(name='Running Remote command', cmdStr='%s' % remote_cmd)
        tinctest.logger.info(" %s" % cmd)
        cmd.run(validateAfter=False)
        result = cmd.get_results()
        return result.rc, result.stdout

    def activate(self, option=''):
        ''' Stop the master and activate current standby to master'''
        standby_host = self.get_current_standby()
        standby_port = self.get_standby_port()
        standby_loc = self.get_standby_dd()

        self.run_remote(self.host,
                        'gpstop -aim',
                        pgport=self.pgport,
                        standbydd=self.mdd)

        gpactivate_cmd = 'gpactivatestandby -a -d %s %s' % (standby_loc,
                                                            option)
        (rc, result) = self.run_remote(standby_host,
                                       gpactivate_cmd,
                                       pgport=standby_port,
                                       standbydd=standby_loc)
        tinctest.logger.info(
            'Result without force option to activate standby %s' % result)
        if (rc != 0) and result.find('Force activation required') != -1:
            tinctest.logger.info(
                'activating standby failed, try force activation...')
            gpactivate_cmd = 'gpactivatestandby -a -f -d %s %s' % (standby_loc,
                                                                   option)
            (rc, result) = self.run_remote(standby_host,
                                           gpactivate_cmd,
                                           pgport=standby_port,
                                           standbydd=standby_loc)
            if (rc != 0):
                tinctest.logger.error('Force activating standby failed!')
                return False
        tinctest.logger.info('standby acvitated, host value %s' % standby_host)
        return True

    def remove_standby(self):
        return self.gpinit.run(option='-r')

    def failback_to_original_master(self):
        # Check if master is running.
        bashCmd = (
            self.gphome
        ) + '/bin/pg_ctl status -D $MASTER_DATA_DIRECTORY | grep \'pg_ctl: server is running\''
        cmd = Command(name='Running cmd %s' % bashCmd,
                      cmdStr="source %s/greenplum_path.sh; %s" %
                      (self.gphome, bashCmd))
        try:
            cmd.run()
        except Exception, e:
            tinctest.logger.error("Error running command %s\n" % e)
            return

        result = cmd.get_results()
        out = result.stdout
        if not out:
            tinctest.logger.info('Start the old master again ...')
            master = gp.MasterStart("Starting orig Master",
                                    self.orig_master.datadir,
                                    self.orig_master.port,
                                    self.orig_master.dbid, 0, self.numcontent,
                                    None, None, None)
            master.run(validateAfter=True)
            result = master.get_results()
            tinctest.logger.info('orig Master started result : %s' %
                                 result.stdout)
            if result.rc != 0:
                raise WalReplException(
                    'Unable to start original master process')
            Command('gpinitstandby -ra', 'gpinitstandby -ra').run()
            # failing back to old master, it takes a little bit to prepare the cluster ready for connection
            if os.path.exists(local_path('drop_filespace.sql')):
                PSQL.run_sql_file(local_path('drop_filespace.sql'),
                                  dbname=self.db_name)