def test_failover_run__workload(self): ''' activate the standby, run workload, check master and standby integrity, currently support local standby, can not run workload remotely ''' activatestdby = GpactivateStandby() activatestdby.activate() with walrepl.NewEnv(MASTER_DATA_DIRECTORY=self.standby_dir, PGPORT=WalReplKillProcessTestCase.stdby_port, PGDATABASE=self.pgdatabase) as env: test_case_list1 = [] test_case_list1.append( "mpp.gpdb.tests.storage.walrepl.crash.dml.test_dml.DMLTestCase" ) test_case_list1.append( "mpp.gpdb.tests.storage.walrepl.crash.ddl.test_ddl.DDLTestCase" ) self.test_case_scenario.append(test_case_list1) test_case_list2 = [] test_case_list2.append( "mpp.gpdb.tests.storage.walrepl.crash.WalReplKillProcessTestCase.check_mirror_seg" ) self.test_case_scenario.append(test_case_list2) test_case_list3 = [] test_case_list3.append( "mpp.gpdb.tests.storage.walrepl.crash.verify.verify.DataVerifyTestCase" ) self.test_case_scenario.append(test_case_list3) pgutil.failback_to_original_master( self.origin_mdd, WalReplKillProcessTestCase.stdby_host, self.standby_dir, WalReplKillProcessTestCase.stdby_port)
def test_failover_in_change_track(self): """ bring down mirror segments, failover to standby, run gprecoverseg. """ tinctest.logger.info( "-failover to standby in change tracking and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() # bring down mirror segments Command( 'fault inject mirror segment', 'gpfaultinjector -f filerep_consumer -m async -y fault -r mirror -H ALL' ).run() activatestdby.activate() # wait till segments come up in change tracking self.wait_till_changetracking_transition(self.standby_host, self.standby_port) (rc, stdout) = activatestdby.run_remote(self.standby_host, 'gprecoverseg -a', self.standby_port, self.standby_mdd) tinctest.logger.info( "in test_failover_in_change_track: gprecoverseg -a: %s" % stdout) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, True)
def invoke_sigterm_and_verify(self): ''' Invoke sigterm on wal receiver and verify that a new process is spawned after ''' gpact_stdby = GpactivateStandby() standby_host = gpact_stdby.get_current_standby() standby_port = gpact_stdby.get_standby_port() wal_rec_pid_1 = self.pgutil.get_pid_by_keyword( host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') sig_cmd = "gpssh -h %s -e 'kill -15 %s'" % (standby_host, wal_rec_pid_1) cmd = Command('Issue SIGTERM to wam receiver process', cmdStr=sig_cmd) tinctest.logger.info('%s' % cmd) cmd.run(validateAfter=True) result = cmd.get_results() if result.rc != 0: return False wal_rec_pid_2 = self.pgutil.get_pid_by_keyword( host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') if wal_rec_pid_1 == wal_rec_pid_2: return False return True
def test_run_workload_before_activate_standby(self): #run workload while removing initstandby, check master mirror integrity activatestdby = GpactivateStandby() test_case_list0 = [] test_case_list0.append( "mpp.gpdb.tests.storage.walrepl.crash.dml.test_dml.DMLTestCase") test_case_list0.append( "mpp.gpdb.tests.storage.walrepl.crash.ddl.test_ddl.DDLTestCase") self.test_case_scenario.append(test_case_list0) activatestdby.activate() test_case_list1 = [] test_case_list1.append( "mpp.gpdb.tests.storage.walrepl.crash.WalReplKillProcessTestCase.check_mirror_seg" ) self.test_case_scenario.append(test_case_list1) test_case_list2 = [] test_case_list2.append( "mpp.gpdb.tests.storage.walrepl.crash.verify.verify.DataVerifyTestCase" ) self.test_case_scenario.append(test_case_list2) pgutil.failback_to_original_master( self.origin_mdd, WalReplKillProcessTestCase.stdby_host, self.standby_dir, WalReplKillProcessTestCase.stdby_port)
def setUp(self): # For each test case we create a fresh standby and start it. self.db_name = self.__class__.db_name self.createdb(self.db_name) gputil.remove_standby() gputil.install_standby(new_stdby_host=socket.gethostname()) self.gpinit_stdby = GpinitStandby() self.activatestdby = GpactivateStandby()
def check_stdby_stop(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() master_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="master", option = "bin") if int(master_pid) != -1: raise Exception("standby should stop but failed!")
def __init__(self, methodName): self.gp = GpactivateStandby() self.verify = StandbyVerify() self.config = GPDBConfig() self.disk = Disk() self.sdby_mdd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgport = os.environ.get('PGPORT') super(OODClass, self).__init__(methodName)
def start_stdby(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() stdby_dbid = self.get_standby_dbid() activate_stdby = GpactivateStandby() stdby_mdd = activate_stdby.get_standby_dd() stdby_port = activate_stdby.get_standby_port() cmd="pg_ctl -l postmaster.log -D %s -o '-p %s --gp_dbid=%s -i --gp_contentid=-1 -E' start &"%(stdby_mdd, stdby_port, stdby_dbid) self.run_remote(stdby_host,cmd,stdby_port,stdby_mdd)
def start_stdby(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() stdby_dbid = self.get_standby_dbid() activate_stdby = GpactivateStandby() stdby_mdd = activate_stdby.get_standby_dd() stdby_port = activate_stdby.get_standby_port() cmd="pg_ctl -D %s -o '-p %s --gp_dbid=%s --gp_num_contents_in_cluster=2 --silent-mode=true -i -M master --gp_contentid=-1 -x 0 -E' start &"%(stdby_mdd, stdby_port, stdby_dbid) self.run_remote(stdby_host,cmd,stdby_port,stdby_mdd)
def setUp(self): super(WalReplKillProcessScenarioTestCase, self).setUp() pgutil.check_and_start_gpdb() # We should forcibly recreate standby, as it might has been promoted. pgutil.remove_standby() pgutil.install_standby() gpact_stdby = GpactivateStandby() gpinit_stdb = GpinitStandby() WalReplKillProcessTestCase.stdby_port = gpact_stdby.get_standby_port() WalReplKillProcessTestCase.stdby_host = gpinit_stdb.get_standbyhost()
def setUp(self): pgutil.check_and_start_gpdb() # We should forcibly recreate standby, as it might has been promoted. # here we need to install locally, otherwise can not run remote sql pgutil.remove_standby() pgutil.install_standby(new_stdby_host=socket.gethostname()) gpact_stdby = GpactivateStandby() gpinit_stdb = GpinitStandby() WalReplKillProcessTestCase.stdby_port = gpact_stdby.get_standby_port() WalReplKillProcessTestCase.stdby_host = gpinit_stdb.get_standbyhost() self.standby_dir = gpact_stdby.get_standby_dd()
def __init__(self, methodName): self.gpact = GpactivateStandby() self.gpinit = GpinitStandby() self.runmixin = StandbyRunMixin() self.runmixin.createdb(dbname=self.dbname) self.bkup_timestamp = "" self.gphome = os.environ.get('GPHOME') self.mdd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgport = os.environ.get('PGPORT') self.host = socket.gethostname() self.standby_loc = os.path.split(self.mdd)[0] + self.standby_dirname super(BkupRestore, self).__init__(methodName)
def test_gpactivatestandby_on_same_host(self): ''' Doesn't work due to STO-374''' gputil.install_standby(new_stdby_host='localhost') initstdby = GpinitStandby() gpact_stdby = GpactivateStandby() self.mdd = gpact_stdby.get_standby_dd() self.port = gpact_stdby.get_standby_port() self.standby_pid = gpact_stdby.get_standby_pid('localhost', self.port, self.mdd) PSQL.run_sql_file(local_path('create_tables.sql'), dbname = self.db_name) self.assertTrue(gpact_stdby.activate()) self.assertTrue(gpact_stdby.verify_gpactivatestandby(self.standby_pid, 'localhost', self.port, self.mdd)) gputil.failback_to_original_master(self.origin_mdd,socket.gethostname(),self.mdd,self.port)
def test_gpactivatestandby_on_new_host(self): gputil.install_standby() initstdby = GpinitStandby() gpact_stdby = GpactivateStandby() self.mdd = gpact_stdby.get_standby_dd() self.host = initstdby.get_standbyhost() self.port = gpact_stdby.get_standby_port() self.standby_pid = gpact_stdby.get_standby_pid(self.host, self.port, self.mdd) PSQL.run_sql_file(local_path('create_tables.sql'), dbname = self.db_name) self.assertTrue(gpact_stdby.activate()) self.assertTrue(gpact_stdby.verify_gpactivatestandby(self.standby_pid, self.host, self.port, self.mdd)) gputil.failback_to_original_master(self.origin_mdd,self.host,self.mdd,self.port)
def test_backup_restore(self): # Create standby if not present Command('createdb bkdb', 'dropdb bkdb; createdb bkdb').run() self.create_standby() # Run workload self.run_workload('sql') # Create backup self.assertTrue(self.run_backup()) # Activate standby gpac = GpactivateStandby() gpac.activate() # Restore from new master self.assertTrue(self.run_restore())
def kill_walreceiver(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() pid_list = [] walreceiver_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="wal receiver process") if int(walreceiver_pid) == -1: tinctest.logger.error("error: wal receiver process does not exist!") return False else: pid_list.append(walreceiver_pid) self.killProcess_byPid(pid_toKill=pid_list, host=stdby_host)
def test_inject_primary_after_promote(self): """ Promote to standby, bring down primary segments, run gprecoverseg. """ tinctest.logger.info( "-failover to standby, inject primary segments, and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() activatestdby.activate() # bring down primary segments inject_cmd = 'gpfaultinjector -f postmaster -m async -y panic -r primary -H ALL' activatestdby.run_remote(self.standby_host, inject_cmd, self.standby_port, self.standby_mdd) # wait till segments come up in change tracking self.wait_till_changetracking_transition(self.standby_host, self.standby_port) # recoverseg from new master (rc, stdout) = activatestdby.run_remote(self.standby_host, 'gprecoverseg -a', self.standby_port, self.standby_mdd) tinctest.logger.info( "in test_inject_primary_after_promote: gprecoverseg -a: %s" % stdout) keyword = 'Segment Pairs in Resynchronization' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, False) # rebalance from new master (rc, stdout) = activatestdby.run_remote(self.standby_host, 'gprecoverseg -ra', self.standby_port, self.standby_mdd) tinctest.logger.info( "in test_inject_primary_after_promote: gprecoverseg -ar: %s" % stdout) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, True)
def test_gpstop_after_failover(self): tinctest.logger.info("test gpstop from new master after failover") activatestdby = GpactivateStandby() standby_host = activatestdby.get_current_standby() standby_port = activatestdby.get_standby_port() standby_mdd = activatestdby.get_standby_dd() activatestdby.activate() (rc,stdout)=activatestdby.run_remote(standby_host, rmt_cmd='gpstop -a -M fast', pgport=standby_port,standbydd=standby_mdd) self.assertEqual(0,rc) activatestdby.run_remote(standby_host, rmt_cmd='gpstart -a', pgport=standby_port, standbydd=standby_mdd) self.gputil.failback_to_original_master(self.origin_mdd, standby_host, standby_mdd, standby_port)
def __init__(self, methodName): self.gpact = GpactivateStandby() self.host = socket.gethostname() self.mdd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgport = os.environ.get('PGPORT') self.port = self.pgport dburl = dbconn.DbURL() gparray = GpArray.initFromCatalog(dburl, utility=True) self.numcontent = gparray.getNumSegmentContents() self.orig_master = gparray.master self.standby_pid = '' super(GpactivateStandbyTestCase, self).__init__(methodName)
def run_test(self): """ Override of SQLTestCase. Create a base backup and start standby, run some SQL in primary side then promote, check if the data is streamed correctly. """ sql_file = self.sql_file ans_file = self.ans_file Command('gpinitstandby -r', 'gpinitstandby -ra').run() self.assertEqual(self.standby.create(), 0) gpact_stdby = GpactivateStandby() res = self.standby.start() self.assertTrue(res.wasSuccessful()) # wait for the walreceiver to start num_walsender = self.wait_for_walsender() self.assertEqual(num_walsender, 1) # setup script is run on primary while standby is running. # .in file will be substitute with runtime information, if any. setup_file = sql_file.replace('.sql', '_setup.sql') if os.path.exists(setup_file + '.in'): self.preprocess_file(setup_file + '.in') self.assertTrue(PSQL.run_sql_file(setup_file, dbname=self.db_name)) if self.promote_using_pg_ctl: self.assertTrue(self.standby.promote()) else: self.assertTrue(self.standby.promote_manual()) # fetch timelineids for both primary and standby (post-promote) primary_tli = self.fetch_tli(os.environ.get('MASTER_DATA_DIRECTORY')) standby_tli = self.fetch_tli(self.standby_datadir) logger.info("primary tli = " + primary_tli) logger.info("standby tli after promote = " + standby_tli) # primary_tli should be less than standby_tli by 1 self.assertTrue(int(primary_tli) + 1 == int(standby_tli)) # SQLTestCase doesn't allow to set port. Use environ to tell it. with NewEnv(PGPORT=self.standby_port, MASTER_DATA_DIRECTORY=self.standby_datadir) as env: result = super(PromoteTestCase, self).run_test() return result # always fail back to old master after test complete gpact_stdby.failback_to_original_master()
def test_gpactivatestandby_new_host_with_filespace(self): #from mpp.lib.gpfilespace import Gpfilespace #gpfile = Gpfilespace() #gpfile.create_filespace('fs_walrepl_a') #PSQL.run_sql_file(local_path('filespace.sql'), dbname= self.db_name) gputil.install_standby() initstdby = GpinitStandby() gpact_stdby = GpactivateStandby() self.mdd = gpact_stdby.get_standby_dd() self.host = initstdby.get_standbyhost() self.port = gpact_stdby.get_standby_port() self.standby_pid = gpact_stdby.get_standby_pid(self.host, self.port, self.mdd) PSQL.run_sql_file(local_path('create_tables.sql'), dbname = self.db_name) self.assertTrue(gpact_stdby.activate()) self.assertTrue(gpact_stdby.verify_gpactivatestandby(self.standby_pid, self.host, self.port, self.mdd)) gputil.failback_to_original_master(self.origin_mdd,self.host,self.mdd,self.port)
def test_run_five(self): for i in xrange(5): with NewEnv(PGPORT=self.port, MASTER_DATA_DIRECTORY=self.mdd): pguti = GpUtility() if i == 0: pguti.install_standby(socket.gethostname(), self.mdd) # starting from second time, init standby from new master, standby_dir will be like master_newstandby_newstandby... else: pguti.install_standby(socket.gethostname(), os.path.join(self.mdd, 'newstandby')) gpact = GpactivateStandby() self.mdd = gpact.get_standby_dd() self.port = gpact.get_standby_port() gpact.activate() tinctest.logger.info("self.mdd is %s, self.port is %s" % (self.mdd, self.port))
def setUp(self): self.gputil.check_and_start_gpdb() stdby_presence = self.gputil.check_standby_presence() if stdby_presence: self.gputil.remove_standby() self.gputil.install_standby() get_mirror_sql = '''select port, hostname, fselocation from gp_segment_configuration, pg_filespace_entry where dbid = fsedbid and content != -1 and preferred_role=\'m\' ;''' segments=self.gputil.run_SQLQuery(get_mirror_sql, dbname='template1') for seg in segments: port = seg[0] host = seg[1] dir = seg[2] self.mirrorConfig.append(port) self.mirrorConfig.append(host) self.mirrorConfig.append(dir) self.activatestdby = GpactivateStandby()
def test_failover_insync(self): """ bring down mirror segments,suspend in resync mode,failover to standby, run gprecoverseg. """ tinctest.logger.info("-failover to standby in resync and recoverseg") # get standby host, port, and master data directory activatestdby = GpactivateStandby() self.standby_host = activatestdby.get_current_standby() self.standby_mdd = activatestdby.get_standby_dd() self.standby_port = activatestdby.get_standby_port() # bring down mirror segments and suspend Command( 'fault inject mirror segment', 'gpfaultinjector -f filerep_consumer -m async -y fault -r mirror -H ALL' ).run() # wait till segments come up in change tracking self.wait_till_changetracking_transition('localhost', os.environ['PGPORT']) Command( 'Injecting fault to suspend resync', 'gpfaultinjector -f filerep_resync -m async -y suspend -r primary -H ALL' ).run() Command('recover and suspend in insync', 'gprecoverseg -a').run() activatestdby.activate() # Injecting Fault to resume resync resume_resync_cmd = 'gpfaultinjector -f filerep_resync -m async -y resume -r primary -H ALL' activatestdby.run_remote(self.standby_host, resume_resync_cmd, self.standby_port, self.standby_mdd) # Injecting Fault to reset resync reset_resync_cmd = 'gpfaultinjector -f filerep_resync -m async -y reset -r primary -H ALL' activatestdby.run_remote(self.standby_host, reset_resync_cmd, self.standby_port, self.standby_mdd) # check if all segments are up and sync keyword = 'All segments are running normally' self.wait_till_insync_transition(activatestdby, self.standby_host, self.standby_port, self.standby_mdd, keyword, True)
def test_promote_incomplete_stdby(self): ''' remove the standby base dir, try promote and check if fail ''' gpactivate_stdby = GpactivateStandby() gpinit_stdby = GpinitStandby() stdby_mdd = gpactivate_stdby.get_standby_dd() stdby_host = gpinit_stdby.get_standbyhost() stdby_port = gpactivate_stdby.get_standby_port() destDir = os.path.join(stdby_mdd, 'base') self.pgutil.clean_dir(stdby_host, destDir) promote_cmd = "pg_ctl promote -D %s" % stdby_mdd (rc, output) = gpactivate_stdby.run_remote(stdby_host, promote_cmd, stdby_port, stdby_mdd) self.assertEqual(rc, 0) pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword='master', option='bin') self.assertTrue( int(pid) == -1, 'incomplete standby data directory promote succeeds.')
def __init__(self, methodName): self.gpact_stdby = GpactivateStandby() super(FilerepTestCase, self).__init__(methodName)