def test_inject_primary_after_promote(self): """ Promote to standby, bring down primary segments, run gprecoverseg. """ tinctest.logger.info("-failover to standby, inject primary segments, and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() activatestdby.activate() # bring down primary segments inject_cmd = 'gpfaultinjector -f postmaster -m async -y panic -r primary -H ALL' activatestdby.run_remote(self.standby_host, inject_cmd, self.standby_port, self.standby_mdd) # wait till segments come up in change tracking self.wait_till_changetracking_transition(self.standby_host, self.standby_port) # recoverseg from new master (rc, stdout) = activatestdby.run_remote(self.standby_host,'gprecoverseg -a',self.standby_port,self.standby_mdd) tinctest.logger.info("in test_inject_primary_after_promote: gprecoverseg -a: %s"%stdout) keyword = 'Segment Pairs in Resynchronization' self.wait_till_insync_transition(activatestdby,self.standby_host,self.standby_port,self.standby_mdd,keyword,False) # rebalance from new master (rc, stdout) = activatestdby.run_remote(self.standby_host,'gprecoverseg -ra',self.standby_port,self.standby_mdd) tinctest.logger.info("in test_inject_primary_after_promote: gprecoverseg -ar: %s"%stdout) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby,self.standby_host,self.standby_port,self.standby_mdd, keyword, True)
def test_inject_mirror_after_promote(self): """ Promote to standby, bring down mirror segments,run gprecoverseg. """ tinctest.logger.info("-failover to standby, inject mirror segments, and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() activatestdby.activate() # inject the mirror segments from new master inject_cmd = 'gpfaultinjector -f filerep_consumer -m async -y fault -r mirror -H ALL' activatestdby.run_remote(self.standby_host, inject_cmd, self.standby_port, self.standby_mdd) # wait till segments come up in change tracking self.wait_till_changetracking_transition(self.standby_host, self.standby_port) # recoverseg from new master (rc, stdout) = activatestdby.run_remote(self.standby_host,'gprecoverseg -a',self.standby_port,self.standby_mdd) tinctest.logger.info("in test_inject_mirror_after_promote: gprecoverseg -a: %s"%stdout) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby,self.standby_host,self.standby_port,self.standby_mdd,keyword, True)
def test_failover_insync(self): """ bring down mirror segments,suspend in resync mode,failover to standby, run gprecoverseg. """ tinctest.logger.info("-failover to standby in resync and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() # bring down mirror segments and suspend Command('fault inject mirror segment', 'gpfaultinjector -f filerep_consumer -m async -y fault -r mirror -H ALL').run() # wait till segments come up in change tracking self.wait_till_changetracking_transition('localhost', os.environ['PGPORT']) Command('Injecting fault to suspend resync','gpfaultinjector -f filerep_resync -m async -y suspend -r primary -H ALL').run() Command('recover and suspend in insync','gprecoverseg -a').run() activatestdby.activate() # Injecting Fault to resume resync resume_resync_cmd = 'gpfaultinjector -f filerep_resync -m async -y resume -r primary -H ALL' activatestdby.run_remote(self.standby_host, resume_resync_cmd, self.standby_port, self.standby_mdd) # Injecting Fault to reset resync reset_resync_cmd = 'gpfaultinjector -f filerep_resync -m async -y reset -r primary -H ALL' activatestdby.run_remote(self.standby_host, reset_resync_cmd, self.standby_port, self.standby_mdd) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby,self.standby_host,self.standby_port,self.standby_mdd,keyword,True)
def test_gpstop_after_failover(self): tinctest.logger.info("test gpstop from new master after failover") activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_port = activatestdby.get_standby_port() standby_mdd = activatestdby.get_standby_dd() activatestdby.activate() (rc,stdout)=activatestdby.run_remote(standby_host, rmt_cmd='gpstop -a -M fast', pgport=standby_port,standbydd=standby_mdd) self.assertEqual(0,rc) activatestdby.run_remote(standby_host, rmt_cmd='gpstart -a', pgport=standby_port, standbydd=standby_mdd) self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port)
def test_gpstop_after_failover(self): tinctest.logger.info("test gpstop from new master after failover") activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_port = activatestdby.get_standby_port() standby_mdd = activatestdby.get_standby_dd() activatestdby.activate() (rc,stdout)=activatestdby.run_remote(standby_host, rmt_cmd='gpstop -a -M fast', pgport=standby_port,standbydd=standby_mdd) self.assertEqual(0,rc) activatestdby.run_remote(standby_host, rmt_cmd='gpstart -a', pgport=standby_port, standbydd=standby_mdd) self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port)
def test_inject_primary_after_promote(self): """ Promote to standby, bring down primary segments, run gprecoverseg. """ tinctest.logger.info( "-failover to standby, inject primary segments, and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() activatestdby.activate() # bring down primary segments inject_cmd = 'gpfaultinjector -f postmaster -m async -y panic -r primary -H ALL' activatestdby.run_remote(self.standby_host, inject_cmd, self.standby_port, self.standby_mdd) # wait till segments come up in change tracking self.wait_till_changetracking_transition(self.standby_host, self.standby_port) # recoverseg from new master (rc, stdout) = activatestdby.run_remote(self.standby_host, 'gprecoverseg -a', self.standby_port, self.standby_mdd) tinctest.logger.info( "in test_inject_primary_after_promote: gprecoverseg -a: %s" % stdout) keyword = 'Segment Pairs in Resynchronization' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, False) # rebalance from new master (rc, stdout) = activatestdby.run_remote(self.standby_host, 'gprecoverseg -ra', self.standby_port, self.standby_mdd) tinctest.logger.info( "in test_inject_primary_after_promote: gprecoverseg -ar: %s" % stdout) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, True)
def test_failover_in_change_track(self): """ bring down mirror segments, failover to standby, run gprecoverseg. """ tinctest.logger.info( "-failover to standby in change tracking and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() # bring down mirror segments Command( 'fault inject mirror segment', 'gpfaultinjector -f filerep_consumer -m async -y fault -r mirror -H ALL' ).run() activatestdby.activate() # wait till segments come up in change tracking self.wait_till_changetracking_transition(self.standby_host, self.standby_port) (rc, stdout) = activatestdby.run_remote(self.standby_host, 'gprecoverseg -a', self.standby_port, self.standby_mdd) tinctest.logger.info( "in test_failover_in_change_track: gprecoverseg -a: %s" % stdout) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, True)
def test_failover_insync(self): """ bring down mirror segments,suspend in resync mode,failover to standby, run gprecoverseg. """ tinctest.logger.info("-failover to standby in resync and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() # bring down mirror segments and suspend Command( 'fault inject mirror segment', 'gpfaultinjector -f filerep_consumer -m async -y fault -r mirror -H ALL' ).run() # wait till segments come up in change tracking self.wait_till_changetracking_transition('localhost', os.environ['PGPORT']) Command( 'Injecting fault to suspend resync', 'gpfaultinjector -f filerep_resync -m async -y suspend -r primary -H ALL' ).run() Command('recover and suspend in insync', 'gprecoverseg -a').run() activatestdby.activate() # Injecting Fault to resume resync resume_resync_cmd = 'gpfaultinjector -f filerep_resync -m async -y resume -r primary -H ALL' activatestdby.run_remote(self.standby_host, resume_resync_cmd, self.standby_port, self.standby_mdd) # Injecting Fault to reset resync reset_resync_cmd = 'gpfaultinjector -f filerep_resync -m async -y reset -r primary -H ALL' activatestdby.run_remote(self.standby_host, reset_resync_cmd, self.standby_port, self.standby_mdd) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, True)
def test_promote_incomplete_stdby(self): ''' remove the standby base dir, try promote and check if fail ''' gpactivate_stdby = GpactivateStandby() gpinit_stdby = GpinitStandby() stdby_mdd = gpactivate_stdby.get_standby_dd() stdby_host = gpinit_stdby.get_standbyhost() stdby_port = gpactivate_stdby.get_standby_port() destDir = os.path.join(stdby_mdd, 'base') self.pgutil.clean_dir(stdby_host,destDir) promote_cmd = "pg_ctl promote -D %s"%stdby_mdd (rc, output) = gpactivate_stdby.run_remote(stdby_host,promote_cmd ,stdby_port,stdby_mdd) self.assertEqual(rc, 0) pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword='master', option='bin') self.assertTrue(int(pid) == -1, 'incomplete standby data directory promote succeeds.')
def test_promote_incomplete_stdby(self): ''' remove the standby base dir, try promote and check if fail ''' gpactivate_stdby = GpactivateStandby() gpinit_stdby = GpinitStandby() stdby_mdd = gpactivate_stdby.get_standby_dd() stdby_host = gpinit_stdby.get_standbyhost() stdby_port = gpactivate_stdby.get_standby_port() destDir = os.path.join(stdby_mdd, 'base') self.pgutil.clean_dir(stdby_host, destDir) promote_cmd = "pg_ctl promote -D %s" % stdby_mdd (rc, output) = gpactivate_stdby.run_remote(stdby_host, promote_cmd, stdby_port, stdby_mdd) self.assertEqual(rc, 0) pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword='master', option='bin') self.assertTrue( int(pid) == -1, 'incomplete standby data directory promote succeeds.')
class OODClass(MPPTestCase): def __init__(self,methodName): self.gp = GpactivateStandby() self.verify = StandbyVerify() self.config = GPDBConfig() self.disk = Disk() self.sdby_mdd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgport = os.environ.get('PGPORT') super(OODClass,self).__init__(methodName) def initiate_standby(self): self.gp.create_standby(local='no') def check_standby(self): self.assertFalse(self.verify.check_standby_processes()) def get_standby_dbid(self): std_sql = "select dbid from gp_segment_configuration where content='-1' and role='m';" standby_dbid = PSQL.run_sql_command(std_sql, flags = '-q -t', dbname= 'template1') return standby_dbid.strip() def restart_standby(self): sdby_host = self.config.get_master_standbyhost() stdby_dbid = self.get_standby_dbid() cmd="pg_ctl -D %s -o '-p %s --gp_dbid=%s --gp_num_contents_in_cluster=2 --silent-mode=true -i -M master --gp_contentid=-1 -x 0 -E' start &"%(self.sdby_mdd, self.pgport, stdby_dbid) self.assertTrue(self.gp.run_remote(sdby_host,cmd, self.pgport, self.sdby_mdd)) self.assertTrue(self.verify.check_standby_processes()) def check_diskusage(self, host): # This now checks for only /data (rc, result) = self.disk.get_disk_usage(host, '/data') if rc != 0: raise Exception ("The specified mount /data is not present for the device") else: available_usage = result return available_usage def _fill(self, filename, host): cmd_prefix = "ssh " +host+ " \"" cmd_postfix = "\"" location = '/data' if not os.path.isdir('%s/diskfill/' % location): os.makedirs('%s/diskfill/' % location) cmd_str = cmd_prefix + "dd if=/dev/zero bs=16384K count=2000 of=" +location+ "/diskfill/" + filename +cmd_postfix cmd = Command(name='Fill Disk', cmdStr=cmd_str) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() if result.rc !=0: tinctest.logger.error('disk fill not working. Its already full') def filldisk(self): host = self.config.get_master_standbyhost() disk_usage = self.check_diskusage(host) i = 0 while(int(disk_usage.strip()) >1000000): filename = 'new_space_%s' % i self._fill(filename, host) i +=1 disk_usage = self.check_diskusage(host) def remove_fillfiles(self, filename, host): location = '/data' cmd_str = "ssh %s rm %s/diskfill/%s*" % (host,location, filename) cmd = Command(name='Remove fill files', cmdStr=cmd_str) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() if result.rc !=0: raise Exception('Unable to delete the fill files') return def cleanup(self): host = self.config.get_master_standbyhost() self.remove_fillfiles('new_space', host) #Recover segemnts in case segments and standby were on the same host cmd = Command(name='gprecoverseg', cmdStr='gprecoverseg -a') tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() if result.rc !=0: raise Exception('gprecoverseg failed') while(self.config.is_not_insync_segments() == False): tinctest.logger.info('Waiting for DB to be in sync')
class OODClass(MPPTestCase): def __init__(self, methodName): self.gp = GpactivateStandby() self.verify = StandbyVerify() self.config = GPDBConfig() self.disk = Disk() self.sdby_mdd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgport = os.environ.get('PGPORT') super(OODClass, self).__init__(methodName) def initiate_standby(self): self.gp.create_standby(local='no') def check_standby(self): self.assertFalse(self.verify.check_standby_processes()) def get_standby_dbid(self): std_sql = "select dbid from gp_segment_configuration where content='-1' and role='m';" standby_dbid = PSQL.run_sql_command(std_sql, flags='-q -t', dbname='template1') return standby_dbid.strip() def restart_standby(self): sdby_host = self.config.get_master_standbyhost() stdby_dbid = self.get_standby_dbid() cmd = "pg_ctl -D %s -o '-p %s -b %s -z 2 --silent-mode=true -i -M master -C -1 -x 0 -E' start &" % ( self.sdby_mdd, self.pgport, stdby_dbid) self.assertTrue( self.gp.run_remote(sdby_host, cmd, self.pgport, self.sdby_mdd)) self.assertTrue(self.verify.check_standby_processes()) def check_diskusage(self, host): # This now checks for only /data (rc, result) = self.disk.get_disk_usage(host, '/data') if rc != 0: raise Exception( "The specified mount /data is not present for the device") else: available_usage = result return available_usage def _fill(self, filename, host): cmd_prefix = "ssh " + host + " \"" cmd_postfix = "\"" location = '/data' if not os.path.isdir('%s/diskfill/' % location): os.makedirs('%s/diskfill/' % location) cmd_str = cmd_prefix + "dd if=/dev/zero bs=16384K count=2000 of=" + location + "/diskfill/" + filename + cmd_postfix cmd = Command(name='Fill Disk', cmdStr=cmd_str) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() if result.rc != 0: tinctest.logger.error('disk fill not working. Its already full') def filldisk(self): host = self.config.get_master_standbyhost() disk_usage = self.check_diskusage(host) i = 0 while (int(disk_usage.strip()) > 1000000): filename = 'new_space_%s' % i self._fill(filename, host) i += 1 disk_usage = self.check_diskusage(host) def remove_fillfiles(self, filename, host): location = '/data' cmd_str = "ssh %s rm %s/diskfill/%s*" % (host, location, filename) cmd = Command(name='Remove fill files', cmdStr=cmd_str) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() if result.rc != 0: raise Exception('Unable to delete the fill files') return def cleanup(self): host = self.config.get_master_standbyhost() self.remove_fillfiles('new_space', host) #Recover segemnts in case segments and standby were on the same host cmd = Command(name='gprecoverseg', cmdStr='gprecoverseg -a') tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() if result.rc != 0: raise Exception('gprecoverseg failed') while (self.config.is_not_insync_segments() == False): tinctest.logger.info('Waiting for DB to be in sync')
class GpstateTestCase(MPPTestCase): '''testcase for gpstart''' origin_mdd = os.environ.get('MASTER_DATA_DIRECTORY') def __init__(self,methodName): self.gputil = GpUtility() self.mirrorConfig = [] self.master_port = os.environ.get('PGPORT') self.masterdd = os.environ.get('MASTER_DATA_DIRECTORY') self.activatestdby = "" super(GpstateTestCase,self).__init__(methodName) def setUp(self): self.gputil.check_and_start_gpdb() stdby_presence = self.gputil.check_standby_presence() if stdby_presence: self.gputil.remove_standby() self.gputil.install_standby() get_mirror_sql = '''select port, hostname, fselocation from gp_segment_configuration, pg_filespace_entry where dbid = fsedbid and content != -1 and preferred_role=\'m\' ;''' segments=self.gputil.run_SQLQuery(get_mirror_sql, dbname='template1') for seg in segments: port = seg[0] host = seg[1] dir = seg[2] self.mirrorConfig.append(port) self.mirrorConfig.append(host) self.mirrorConfig.append(dir) self.activatestdby = GpactivateStandby() def tearDown(self): del self.mirrorConfig[:] self.gputil.remove_standby() def test_gpstate_disp_recovery(self): ''' run gpstate with -f option''' standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() standby_pid = self.gputil.get_pid_by_keyword(host=standby_host, user=os.environ.get('USER'), pgport=standby_port,keyword='master',option='bin') (rc, stdout) = self.gputil.run('gpstate -f') self.assertEqual(rc, 0) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.split('=') if "Standby address" in line: stdby_addr = items[1].strip() self.assertEqual(stdby_addr, standby_host) elif "Standby data directory" in line: stdby_dir = items[1].strip() self.assertEqual(stdby_dir, standby_dir) elif "Standby port" in line: stdby_port = int(items[1].strip()) self.assertEqual(stdby_port, int(standby_port)) elif "Standby PID" in line: pid = items[1].strip() self.assertEqual(pid, standby_pid) def test_gpstate_disp_failover(self): '''test if the master configuration detail changed after activating standby''' standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() standby_pid = self.gputil.get_pid_by_keyword(host=standby_host, user=os.environ.get('USER'), pgport=standby_port,keyword='master',option='bin') self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -s', pgport=standby_port, standbydd=standby_dir) self.assertIn(rc, (0,1)) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.strip().split('=') if "Master host" in line: master_host = items[1].strip() self.assertEqual(master_host, standby_host) elif "Master postgres process ID" in line: master_pid = items[1].strip() self.assertEqual(master_pid, standby_pid) elif "Master data directory" in line: master_dir = items[1].strip() self.assertEqual(master_dir, standby_dir) elif "Master port" in line: master_port = int(items[1].strip()) self.assertEqual(master_port, int(standby_port)) self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port) def test_gpstate_active_segment_failover(self): ''' test if gpstate show correct # of up and down nodes after failover''' count_up_seg = '''select count(*) from gp_segment_configuration where content != -1 and status = \'u\';''' count_down_seg = '''select count(*) from gp_segment_configuration where content != -1 and status = \'d\';''' number_up_segment = PSQL.run_sql_command(count_up_seg, flags = '-q -t', dbname='template1') number_down_segment = PSQL.run_sql_command(count_down_seg, flags = '-q -t', dbname='template1') standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -Q', pgport=standby_port, standbydd=standby_dir) self.assertIn(rc, (0,1)) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.strip().split('=') if "up segments" in line: self.assertEqual(number_up_segment.strip(),items[1].strip()) elif "down segments" in line: self.assertEqual(number_down_segment.strip(),items[1].strip()) self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port) def test_gpstate_disp_mirror_failover(self): ''' check if new master is able to get correct mirror configuration with gpstate -m''' inside_block = False keywords = ("Mirror","Datadir","Port") standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -m', pgport=standby_port, standbydd=standby_dir) self.assertEqual(rc, 0) for line in stdout: if inside_block: line_split = line.split('') line_split = [elem for elem in line_split if elem != ''] mirror_host = line_split[2] mirror_dir = line_split[3] mirror_port = line_split[4] self.assertTrue(mirror_host in self.mirrorConfig) self.assertTrue(mirror_dir in self.mirrorConfig) self.assertTrue(mirror_port in self.mirrorConfig) elif not all (s in line for s in keywords): continue else: inside_block = True self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port)