def invoke_sigterm_and_verify(self): ''' Invoke sigterm on wal receiver and verify that a new process is spawned after ''' gpact_stdby = GpactivateStandby() standby_host = gpact_stdby.get_current_standby() standby_port = gpact_stdby.get_standby_port() wal_rec_pid_1 = self.pgutil.get_pid_by_keyword( host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') sig_cmd = "gpssh -h %s -e 'kill -15 %s'" % (standby_host, wal_rec_pid_1) cmd = Command('Issue SIGTERM to wam receiver process', cmdStr=sig_cmd) tinctest.logger.info('%s' % cmd) cmd.run(validateAfter=True) result = cmd.get_results() if result.rc != 0: return False wal_rec_pid_2 = self.pgutil.get_pid_by_keyword( host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') if wal_rec_pid_1 == wal_rec_pid_2: return False return True
def test_inject_primary_after_promote(self): """ Promote to standby, bring down primary segments, run gprecoverseg. """ tinctest.logger.info("-failover to standby, inject primary segments, and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() activatestdby.activate() # bring down primary segments inject_cmd = 'gpfaultinjector -f postmaster -m async -y panic -r primary -H ALL' activatestdby.run_remote(self.standby_host, inject_cmd, self.standby_port, self.standby_mdd) # wait till segments come up in change tracking self.wait_till_changetracking_transition(self.standby_host, self.standby_port) # recoverseg from new master (rc, stdout) = activatestdby.run_remote(self.standby_host,'gprecoverseg -a',self.standby_port,self.standby_mdd) tinctest.logger.info("in test_inject_primary_after_promote: gprecoverseg -a: %s"%stdout) keyword = 'Segment Pairs in Resynchronization' self.wait_till_insync_transition(activatestdby,self.standby_host,self.standby_port,self.standby_mdd,keyword,False) # rebalance from new master (rc, stdout) = activatestdby.run_remote(self.standby_host,'gprecoverseg -ra',self.standby_port,self.standby_mdd) tinctest.logger.info("in test_inject_primary_after_promote: gprecoverseg -ar: %s"%stdout) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby,self.standby_host,self.standby_port,self.standby_mdd, keyword, True)
def test_inject_mirror_after_promote(self): """ Promote to standby, bring down mirror segments,run gprecoverseg. """ tinctest.logger.info("-failover to standby, inject mirror segments, and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() activatestdby.activate() # inject the mirror segments from new master inject_cmd = 'gpfaultinjector -f filerep_consumer -m async -y fault -r mirror -H ALL' activatestdby.run_remote(self.standby_host, inject_cmd, self.standby_port, self.standby_mdd) # wait till segments come up in change tracking self.wait_till_changetracking_transition(self.standby_host, self.standby_port) # recoverseg from new master (rc, stdout) = activatestdby.run_remote(self.standby_host,'gprecoverseg -a',self.standby_port,self.standby_mdd) tinctest.logger.info("in test_inject_mirror_after_promote: gprecoverseg -a: %s"%stdout) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby,self.standby_host,self.standby_port,self.standby_mdd,keyword, True)
def test_failover_insync(self): """ bring down mirror segments,suspend in resync mode,failover to standby, run gprecoverseg. """ tinctest.logger.info("-failover to standby in resync and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() # bring down mirror segments and suspend Command('fault inject mirror segment', 'gpfaultinjector -f filerep_consumer -m async -y fault -r mirror -H ALL').run() # wait till segments come up in change tracking self.wait_till_changetracking_transition('localhost', os.environ['PGPORT']) Command('Injecting fault to suspend resync','gpfaultinjector -f filerep_resync -m async -y suspend -r primary -H ALL').run() Command('recover and suspend in insync','gprecoverseg -a').run() activatestdby.activate() # Injecting Fault to resume resync resume_resync_cmd = 'gpfaultinjector -f filerep_resync -m async -y resume -r primary -H ALL' activatestdby.run_remote(self.standby_host, resume_resync_cmd, self.standby_port, self.standby_mdd) # Injecting Fault to reset resync reset_resync_cmd = 'gpfaultinjector -f filerep_resync -m async -y reset -r primary -H ALL' activatestdby.run_remote(self.standby_host, reset_resync_cmd, self.standby_port, self.standby_mdd) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby,self.standby_host,self.standby_port,self.standby_mdd,keyword,True)
def test_failover_in_change_track(self): """ bring down mirror segments, failover to standby, run gprecoverseg. """ tinctest.logger.info( "-failover to standby in change tracking and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() # bring down mirror segments Command( 'fault inject mirror segment', 'gpfaultinjector -f filerep_consumer -m async -y fault -r mirror -H ALL' ).run() activatestdby.activate() # wait till segments come up in change tracking self.wait_till_changetracking_transition(self.standby_host, self.standby_port) (rc, stdout) = activatestdby.run_remote(self.standby_host, 'gprecoverseg -a', self.standby_port, self.standby_mdd) tinctest.logger.info( "in test_failover_in_change_track: gprecoverseg -a: %s" % stdout) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, True)
def test_gpstop_after_failover(self): tinctest.logger.info("test gpstop from new master after failover") activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_port = activatestdby.get_standby_port() standby_mdd = activatestdby.get_standby_dd() activatestdby.activate() (rc,stdout)=activatestdby.run_remote(standby_host, rmt_cmd='gpstop -a -M fast', pgport=standby_port,standbydd=standby_mdd) self.assertEqual(0,rc) activatestdby.run_remote(standby_host, rmt_cmd='gpstart -a', pgport=standby_port, standbydd=standby_mdd) self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port)
def test_gpstop_after_failover(self): tinctest.logger.info("test gpstop from new master after failover") activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_port = activatestdby.get_standby_port() standby_mdd = activatestdby.get_standby_dd() activatestdby.activate() (rc,stdout)=activatestdby.run_remote(standby_host, rmt_cmd='gpstop -a -M fast', pgport=standby_port,standbydd=standby_mdd) self.assertEqual(0,rc) activatestdby.run_remote(standby_host, rmt_cmd='gpstart -a', pgport=standby_port, standbydd=standby_mdd) self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port)
def test_inject_primary_after_promote(self): """ Promote to standby, bring down primary segments, run gprecoverseg. """ tinctest.logger.info( "-failover to standby, inject primary segments, and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() activatestdby.activate() # bring down primary segments inject_cmd = 'gpfaultinjector -f postmaster -m async -y panic -r primary -H ALL' activatestdby.run_remote(self.standby_host, inject_cmd, self.standby_port, self.standby_mdd) # wait till segments come up in change tracking self.wait_till_changetracking_transition(self.standby_host, self.standby_port) # recoverseg from new master (rc, stdout) = activatestdby.run_remote(self.standby_host, 'gprecoverseg -a', self.standby_port, self.standby_mdd) tinctest.logger.info( "in test_inject_primary_after_promote: gprecoverseg -a: %s" % stdout) keyword = 'Segment Pairs in Resynchronization' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, False) # rebalance from new master (rc, stdout) = activatestdby.run_remote(self.standby_host, 'gprecoverseg -ra', self.standby_port, self.standby_mdd) tinctest.logger.info( "in test_inject_primary_after_promote: gprecoverseg -ar: %s" % stdout) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, True)
def test_gpstart_master_after_failover(self): """ failover, start from new master, then recover the cluster back to have the old master active. """ tinctest.logger.info("failover, and run gpstart master test") self.gputil.check_and_start_gpdb() activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_mdd = activatestdby.get_standby_dd() standby_port = activatestdby.get_standby_port() activatestdby.activate() self.stdby._run_remote_command(standby_host, command = 'gpstop -a') stdout = self.stdby._run_remote_command(standby_host,command = 'gpstart -a') self.assertNotRegexpMatches(stdout,"FATAL","ERROR") self.assertTrue(self.gputil.gpstart_and_verify(master_dd = standby_mdd, host = standby_host)) self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port)
def invoke_sigterm_and_verify(self): ''' Invoke sigterm on wal receiver and verify that a new process is spawned after ''' gpact_stdby = GpactivateStandby() standby_host = gpact_stdby.get_current_standby() standby_port = gpact_stdby.get_standby_port() wal_rec_pid_1 = self.pgutil.get_pid_by_keyword(host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') sig_cmd = "gpssh -h %s -e 'kill -15 %s'" % (standby_host, wal_rec_pid_1) cmd = Command('Issue SIGTERM to wam receiver process', cmdStr=sig_cmd) tinctest.logger.info ('%s' % cmd) cmd.run(validateAfter=True) result = cmd.get_results() if result.rc != 0: return False wal_rec_pid_2 = self.pgutil.get_pid_by_keyword(host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') if wal_rec_pid_1 == wal_rec_pid_2: return False return True
def test_gpstart_original_master_after_promote(self): """ failover, start from new master, then recover the cluster back to have the old master active. """ tinctest.logger.info("activate and run gpstart for original master") activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_mdd = activatestdby.get_standby_dd() standby_port = activatestdby.get_standby_port() activatestdby.activate() (rc, stdout) = self.gputil.run('gpstart -a -v') self.gputil.run('pg_controldata %s' % self.origin_mdd) self.stdby._run_remote_command(standby_host, command = 'pg_controldata %s' % standby_mdd) self.assertNotEqual(rc, 0) # This below error message comes from gpstart product code (if its modified change it here as well.) self.assertRegexpMatches(stdout,"Standby activated, this node no more can act as master.") self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port)
def test_gpstart_master_only_after_failover(self): """ for test purpose, failing back to old master should remove standby from primary after activate standby """ tinctest.logger.info("start master only with -m option after failover") activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_mdd = activatestdby.get_standby_dd() standby_port = activatestdby.get_standby_port() activatestdby.activate() self.stdby._run_remote_command(standby_host,command = 'gpstop -a') stdout = self.stdby._run_remote_command(standby_host,command = 'export GPSTART_INTERNAL_MASTER_ONLY=1; gpstart -a -m') self.assertNotRegexpMatches(stdout,"ERROR","Start master only after failover failed") self.assertTrue(self.gputil.gpstart_and_verify(master_dd = standby_mdd, host = standby_host)) self.stdby._run_remote_command(standby_host,command = 'gpstop -a -m') self.gputil.run(command = 'gpstop -ar') self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port)
def test_failover_insync(self): """ bring down mirror segments,suspend in resync mode,failover to standby, run gprecoverseg. """ tinctest.logger.info("-failover to standby in resync and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() # bring down mirror segments and suspend Command( 'fault inject mirror segment', 'gpfaultinjector -f filerep_consumer -m async -y fault -r mirror -H ALL' ).run() # wait till segments come up in change tracking self.wait_till_changetracking_transition('localhost', os.environ['PGPORT']) Command( 'Injecting fault to suspend resync', 'gpfaultinjector -f filerep_resync -m async -y suspend -r primary -H ALL' ).run() Command('recover and suspend in insync', 'gprecoverseg -a').run() activatestdby.activate() # Injecting Fault to resume resync resume_resync_cmd = 'gpfaultinjector -f filerep_resync -m async -y resume -r primary -H ALL' activatestdby.run_remote(self.standby_host, resume_resync_cmd, self.standby_port, self.standby_mdd) # Injecting Fault to reset resync reset_resync_cmd = 'gpfaultinjector -f filerep_resync -m async -y reset -r primary -H ALL' activatestdby.run_remote(self.standby_host, reset_resync_cmd, self.standby_port, self.standby_mdd) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, True)
class GpstateTestCase(MPPTestCase): '''testcase for gpstart''' origin_mdd = os.environ.get('MASTER_DATA_DIRECTORY') def __init__(self,methodName): self.gputil = GpUtility() self.mirrorConfig = [] self.master_port = os.environ.get('PGPORT') self.masterdd = os.environ.get('MASTER_DATA_DIRECTORY') self.activatestdby = "" super(GpstateTestCase,self).__init__(methodName) def setUp(self): self.gputil.check_and_start_gpdb() stdby_presence = self.gputil.check_standby_presence() if stdby_presence: self.gputil.remove_standby() self.gputil.install_standby() get_mirror_sql = '''select port, hostname, fselocation from gp_segment_configuration, pg_filespace_entry where dbid = fsedbid and content != -1 and preferred_role=\'m\' ;''' segments=self.gputil.run_SQLQuery(get_mirror_sql, dbname='template1') for seg in segments: port = seg[0] host = seg[1] dir = seg[2] self.mirrorConfig.append(port) self.mirrorConfig.append(host) self.mirrorConfig.append(dir) self.activatestdby = GpactivateStandby() def tearDown(self): del self.mirrorConfig[:] self.gputil.remove_standby() def test_gpstate_disp_recovery(self): ''' run gpstate with -f option''' standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() standby_pid = self.gputil.get_pid_by_keyword(host=standby_host, user=os.environ.get('USER'), pgport=standby_port,keyword='master',option='bin') (rc, stdout) = self.gputil.run('gpstate -f') self.assertEqual(rc, 0) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.split('=') if "Standby address" in line: stdby_addr = items[1].strip() self.assertEqual(stdby_addr, standby_host) elif "Standby data directory" in line: stdby_dir = items[1].strip() self.assertEqual(stdby_dir, standby_dir) elif "Standby port" in line: stdby_port = int(items[1].strip()) self.assertEqual(stdby_port, int(standby_port)) elif "Standby PID" in line: pid = items[1].strip() self.assertEqual(pid, standby_pid) def test_gpstate_disp_failover(self): '''test if the master configuration detail changed after activating standby''' standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() standby_pid = self.gputil.get_pid_by_keyword(host=standby_host, user=os.environ.get('USER'), pgport=standby_port,keyword='master',option='bin') self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -s', pgport=standby_port, standbydd=standby_dir) self.assertIn(rc, (0,1)) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.strip().split('=') if "Master host" in line: master_host = items[1].strip() self.assertEqual(master_host, standby_host) elif "Master postgres process ID" in line: master_pid = items[1].strip() self.assertEqual(master_pid, standby_pid) elif "Master data directory" in line: master_dir = items[1].strip() self.assertEqual(master_dir, standby_dir) elif "Master port" in line: master_port = int(items[1].strip()) self.assertEqual(master_port, int(standby_port)) self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port) def test_gpstate_active_segment_failover(self): ''' test if gpstate show correct # of up and down nodes after failover''' count_up_seg = '''select count(*) from gp_segment_configuration where content != -1 and status = \'u\';''' count_down_seg = '''select count(*) from gp_segment_configuration where content != -1 and status = \'d\';''' number_up_segment = PSQL.run_sql_command(count_up_seg, flags = '-q -t', dbname='template1') number_down_segment = PSQL.run_sql_command(count_down_seg, flags = '-q -t', dbname='template1') standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -Q', pgport=standby_port, standbydd=standby_dir) self.assertIn(rc, (0,1)) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.strip().split('=') if "up segments" in line: self.assertEqual(number_up_segment.strip(),items[1].strip()) elif "down segments" in line: self.assertEqual(number_down_segment.strip(),items[1].strip()) self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port) def test_gpstate_disp_mirror_failover(self): ''' check if new master is able to get correct mirror configuration with gpstate -m''' inside_block = False keywords = ("Mirror","Datadir","Port") standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -m', pgport=standby_port, standbydd=standby_dir) self.assertEqual(rc, 0) for line in stdout: if inside_block: line_split = line.split('') line_split = [elem for elem in line_split if elem != ''] mirror_host = line_split[2] mirror_dir = line_split[3] mirror_port = line_split[4] self.assertTrue(mirror_host in self.mirrorConfig) self.assertTrue(mirror_dir in self.mirrorConfig) self.assertTrue(mirror_port in self.mirrorConfig) elif not all (s in line for s in keywords): continue else: inside_block = True self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port)