class WalRecTestCase(MPPTestCase): ''' Testcases for SIGTERM on wal receiver''' def __init__(self, methodName): self.gp = GpinitStandby() self.mdd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgutil = GpUtility() super(WalRecTestCase,self).__init__(methodName) def setUp(self): #Remove standby if present self.gp.run(option='-r') def invoke_sigterm_and_verify(self): ''' Invoke sigterm on wal receiver and verify that a new process is spawned after ''' gpact_stdby = GpactivateStandby() standby_host = gpact_stdby.get_current_standby() standby_port = gpact_stdby.get_standby_port() wal_rec_pid_1 = self.pgutil.get_pid_by_keyword(host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') sig_cmd = "gpssh -h %s -e 'kill -15 %s'" % (standby_host, wal_rec_pid_1) cmd = Command('Issue SIGTERM to wam receiver process', cmdStr=sig_cmd) tinctest.logger.info ('%s' % cmd) cmd.run(validateAfter=True) result = cmd.get_results() if result.rc != 0: return False wal_rec_pid_2 = self.pgutil.get_pid_by_keyword(host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') if wal_rec_pid_1 == wal_rec_pid_2: return False return True @unittest.skipIf(not config.is_multinode(), "Test applies only to a multinode cluster") def test_sigterm_on_walreceiver(self): self.pgutil.install_standby() self.assertTrue(self.invoke_sigterm_and_verify())
class WalRecTestCase(MPPTestCase): ''' Testcases for SIGTERM on wal receiver''' def __init__(self, methodName): self.gp = GpinitStandby() self.mdd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgutil = GpUtility() super(WalRecTestCase,self).__init__(methodName) def setUp(self): #Remove standby if present self.gp.run(option='-r') def invoke_sigterm_and_verify(self): ''' Invoke sigterm on wal receiver and verify that a new process is spawned after ''' gpact_stdby = GpactivateStandby() standby_host = gpact_stdby.get_current_standby() standby_port = gpact_stdby.get_standby_port() wal_rec_pid_1 = self.pgutil.get_pid_by_keyword(host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') sig_cmd = "gpssh -h %s -e 'kill -15 %s'" % (standby_host, wal_rec_pid_1) cmd = Command('Issue SIGTERM to wam receiver process', cmdStr=sig_cmd) tinctest.logger.info ('%s' % cmd) cmd.run(validateAfter=True) result = cmd.get_results() if result.rc != 0: return False wal_rec_pid_2 = self.pgutil.get_pid_by_keyword(host=standby_host, pgport=standby_port, keyword='wal receiver process', option='') if wal_rec_pid_1 == wal_rec_pid_2: return False return True @unittest.skipIf(not config.is_multinode(), "Test applies only to a multinode cluster") def test_sigterm_on_walreceiver(self): self.pgutil.install_standby() self.assertTrue(self.invoke_sigterm_and_verify())
class PromoteTestCase(MPPTestCase): '''testcase for gpstart''' def __init__(self,methodName): self.pgutil = GpUtility() super(PromoteTestCase,self).__init__(methodName) def setUp(self): self.pgutil.check_and_start_gpdb() # We should forcibly recreate standby, as it might has been promoted. self.pgutil.remove_standby() self.pgutil.install_standby() def tearDown(self): self.pgutil.remove_standby() def test_promote_incomplete_stdby(self): ''' remove the standby base dir, try promote and check if fail ''' gpactivate_stdby = GpactivateStandby() gpinit_stdby = GpinitStandby() stdby_mdd = gpactivate_stdby.get_standby_dd() stdby_host = gpinit_stdby.get_standbyhost() stdby_port = gpactivate_stdby.get_standby_port() destDir = os.path.join(stdby_mdd, 'base') self.pgutil.clean_dir(stdby_host,destDir) promote_cmd = "pg_ctl promote -D %s"%stdby_mdd (rc, output) = gpactivate_stdby.run_remote(stdby_host,promote_cmd ,stdby_port,stdby_mdd) self.assertEqual(rc, 0) pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword='master', option='bin') self.assertTrue(int(pid) == -1, 'incomplete standby data directory promote succeeds.')
class PromoteTestCase(MPPTestCase): '''testcase for gpstart''' def __init__(self, methodName): self.pgutil = GpUtility() super(PromoteTestCase, self).__init__(methodName) def setUp(self): self.pgutil.check_and_start_gpdb() # We should forcibly recreate standby, as it might has been promoted. self.pgutil.remove_standby() self.pgutil.install_standby() def tearDown(self): self.pgutil.remove_standby() def test_promote_incomplete_stdby(self): ''' remove the standby base dir, try promote and check if fail ''' gpactivate_stdby = GpactivateStandby() gpinit_stdby = GpinitStandby() stdby_mdd = gpactivate_stdby.get_standby_dd() stdby_host = gpinit_stdby.get_standbyhost() stdby_port = gpactivate_stdby.get_standby_port() destDir = os.path.join(stdby_mdd, 'base') self.pgutil.clean_dir(stdby_host, destDir) promote_cmd = "pg_ctl promote -D %s" % stdby_mdd (rc, output) = gpactivate_stdby.run_remote(stdby_host, promote_cmd, stdby_port, stdby_mdd) self.assertEqual(rc, 0) pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword='master', option='bin') self.assertTrue( int(pid) == -1, 'incomplete standby data directory promote succeeds.')
class WalReplKillProcessTestCase(TINCTestCase): # this is not hard code, will be updated stdby_host = 'localhost' stdby_port = '5432' def __init__(self, methodName): self.gphome = os.environ.get('GPHOME') self.pgport = os.environ.get('PGPORT') self.pgdatabase = os.environ.get('PGDATABASE') self.stdby_host = 'localhost' self.master_dd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgutil = GpUtility() self.stdby = StandbyVerify() super(WalReplKillProcessTestCase, self).__init__(methodName) def killProcess_byPid(self, signal=9, pid_toKill=[], host="localhost"): pid_list = "" for pid in pid_toKill: pid_list = pid_list + " " + str(pid) kill_cmd = "%s/bin/gpssh -h %s -e 'kill -%s %s'" % ( os.environ.get('GPHOME'), host, signal, pid_list) (rc, result) = self.pgutil.run(kill_cmd) if rc == 0: tinctest.logger.info("Process killed, %s" % result) return True else: tinctest.logger.error( "Killing process error, Status Code non zero, cmd: %s\n" % kill_cmd) return False def kill_walstartup(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() pid_list = [] startup_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="startup process") if int(startup_pid) == -1: tinctest.logger.error("error:startup process does not exist!") return False else: pid_list.append(startup_pid) self.killProcess_byPid(pid_toKill=pid_list, host=stdby_host) def kill_walreceiver(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() pid_list = [] walreceiver_pid = self.pgutil.get_pid_by_keyword( host=stdby_host, pgport=stdby_port, keyword="wal receiver process") if int(walreceiver_pid) == -1: tinctest.logger.error( "error: wal receiver process does not exist!") return False else: pid_list.append(walreceiver_pid) self.killProcess_byPid(pid_toKill=pid_list, host=stdby_host) def kill_walsender_check_postmaster_reset(self): pid_list = [] walsender_old_pid = self.pgutil.get_pid_by_keyword( pgport=self.pgport, keyword="wal sender process") if int(walsender_old_pid) == -1: tinctest.logger.error( "error: process wal sender does not exist on host") return False else: pid_list.append(walsender_old_pid) self.killProcess_byPid(pid_toKill=pid_list) sleep(2) walsender_new_pid = self.pgutil.get_pid_by_keyword( pgport=self.pgport, keyword="wal sender process") if walsender_old_pid == walsender_new_pid: raise Exception( "Killing walsender failed to force postmaster reset") else: return True def kill_transc_backend_check_reset(self): dict_process = { 'stats collector process': -1, 'writer process': -1, 'checkpointer process': -1, 'seqserver process': -1, 'ftsprobe process': -1, 'sweeper process': -1, 'wal sender process': -1 } for process in dict_process: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport, keyword=process) dict_process[process] = pid self.kill_transc_backend() for process in dict_process: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport, keyword=process) delay = 1 while dict_process.get(process) == pid and delay < 5: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport, keyword=process) sleep(1) delay = delay + 1 if delay == 5: tinctest.logger.error( "Killing transaction backend process failed to force postmaster reset: %s" % process) raise Exception( "Killing transaction backend process failed to force postmaster reset child process" ) def kill_transc_backend(self): pid_list = [] sql = "SELECT procpid FROM pg_stat_activity WHERE datname='{0}' AND current_query like 'INSERT INTO%'".format( self.pgdatabase) tinctest.logger.info( "running sql command to get transaction backend process: --- %s" % sql) procid = PSQL.run_sql_command(sql, flags='-q -t', dbname=self.pgdatabase) count = 1 while not procid.strip() and count < 5: sleep(1) count += 1 procid = PSQL.run_sql_command(sql, flags='-q -t', dbname=self.pgdatabase) if procid.strip(): tinctest.logger.info("got procid to kill: %s " % procid) pid_list.append(procid) self.killProcess_byPid(pid_toKill=pid_list) else: tinctest.logger.error("There is no active backend process") def check_stdby_stop(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() master_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="master", option="bin") if int(master_pid) != -1: raise Exception("standby should stop but failed!") def start_stdby(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() stdby_dbid = self.get_standby_dbid() activate_stdby = GpactivateStandby() stdby_mdd = activate_stdby.get_standby_dd() stdby_port = activate_stdby.get_standby_port() cmd = "pg_ctl -D %s -o '-p %s --gp_dbid=%s --gp_num_contents_in_cluster=2 --silent-mode=true -i -M master --gp_contentid=-1 -E' start &" % ( stdby_mdd, stdby_port, stdby_dbid) self.run_remote(stdby_host, cmd, stdby_port, stdby_mdd) def run_remote(self, standbyhost, rmt_cmd, pgport='', standbydd=''): '''Runs remote command and returns rc, result ''' export_cmd = "source %s/greenplum_path.sh;export PGPORT=%s;export MASTER_DATA_DIRECTORY=%s" % ( self.gphome, pgport, standbydd) remote_cmd = "gpssh -h %s -e \"%s; %s\"" % (standbyhost, export_cmd, rmt_cmd) cmd = Command(name='Running Remote command', cmdStr='%s' % remote_cmd) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() return result.rc, result.stdout def check_mirror_seg(self): dbstate = DbStateClass('run_validation') dbstate.check_mirrorintegrity(master=True) def get_standby_dbid(self): std_sql = "select dbid from gp_segment_configuration where content='-1' and role='m';" standby_dbid = PSQL.run_sql_command(std_sql, flags='-q -t', dbname='template1') return standby_dbid.strip() def run_transaction_backend(self): tinctest.logger.info("local path for backend.sql is %s" % local_path('')) for file in os.listdir(local_path('')): if fnmatch.fnmatch(file, 'backend.sql'): PSQL.run_sql_file(local_path(file)) def get_down_segment(self): query = 'select * from gp_segment_configuration where mode <> \'s\' and status <>\'u\'' result = PSQL.run_sql_command(query, flags='-q -t', dbname='template1') return result.strip() def check_gpdb_status(self): down_segments = self.get_down_segment() self.assertEqual(down_segments, '') def gpstart_helper(self): '''helper method to run in scenario test''' (rc, result) = self.pgutil.run('gpstart -a') self.assertIn(rc, (0, 1)) def gpstop_helper(self): '''helper method to run in scenario test''' cmd = Command('run gpstop', cmdStr='gpstop -a') cmd.run(validateAfter=True) def gpinitstandby_helper(self): '''helper method to create a new standby''' self.pgutil.install_standby() def removestandby_helper(self): ''' helper method to remove standby''' self.pgutil.remove_standby() def verify_standby_sync(self): if (self.stdby.check_gp_segment_config()) and ( self.stdby.check_pg_stat_replication()) and ( self.stdby.check_standby_processes()): return True else: raise Exception('standby and master out of sync!') def kill_standby_postmaster(self): pid_list = [] delay = 0 postmaster_pid = self.pgutil.get_pid_by_keyword( host=WalReplKillProcessTestCase.stdby_host, pgport=WalReplKillProcessTestCase.stdby_port, keyword="master", option="bin") while int(postmaster_pid) == -1 and delay < 20: sleep(1) delay = delay + 1 postmaster_pid = self.pgutil.get_pid_by_keyword( host=WalReplKillProcessTestCase.stdby_host, pgport=WalReplKillProcessTestCase.stdby_port, keyword="master", option="bin") if int(postmaster_pid) == -1 or delay == 20: tinctest.logger.error( "error: standby postmaster process does not exist!") return False else: pid_list.append(postmaster_pid) return self.killProcess_byPid( pid_toKill=pid_list, host=WalReplKillProcessTestCase.stdby_host) def initial_setup(self): keyword = 'rh55-qavm65' config = GPDBConfig() (seg_host, seg_port) = config.get_hostandport_of_segment(psegmentNumber=0, pRole='p') cur_path = local_path('') dir1 = os.path.join(cur_path, 'dml', 'sql', 'insert_from_external.sql.in') dir2 = os.path.join(cur_path, 'dml', 'sql', 'insert_from_external.sql') dir3 = os.path.join(cur_path, 'dml', 'expected', 'insert_from_external.ans.in') dir4 = os.path.join(cur_path, 'dml', 'expected', 'insert_from_external.ans') f1 = open(dir1, 'r') f2 = open(dir2, 'w') f3 = open(dir3, 'r') f4 = open(dir4, 'w') for line in f1: f2.write(line.replace(keyword, seg_host)) f1.close() f2.close() for line in f3: f4.write(line.replace(keyword, seg_host)) f3.close() f4.close() dir5 = os.path.join(cur_path, 'dml', 'sql', 'insert_with_gpload.sql.in') dir6 = os.path.join(cur_path, 'dml', 'sql', 'insert_with_gpload.sql') yaml_path = local_path('dml/sql/config/gpl.yaml') f5 = open(dir5, 'r') f6 = open(dir6, 'w') for line in f5: f6.write(line.replace('gpl.yaml', yaml_path)) f5.close() f6.close() dir7 = os.path.join(cur_path, 'dml', 'sql', 'config', 'gpl.yaml.in') dir8 = os.path.join(cur_path, 'dml', 'sql', 'config', 'gpl.yaml') f7 = open(dir7, 'r') f8 = open(dir8, 'w') for line in f7: if 'DATABASE' in line: f8.write(line.replace('tangp3', os.environ.get('PGDATABASE'))) elif 'USER' in line: f8.write(line.replace('tangp3', os.environ.get('USER'))) elif 'HOST' in line: f8.write(line.replace('rh55-qavm61', socket.gethostname())) elif 'PORT' in line and '5432' in line: f8.write(line.replace('5432', os.environ.get('PGPORT'))) elif 'mydata' in line: f8.write( line.replace('mydata', local_path('dml/sql/gpload/mydata'))) else: f8.write(line) f7.close() f8.close() dir9 = os.path.join(cur_path, 'dml', 'expected', 'insert_with_gpload.ans.in') dir10 = os.path.join(cur_path, 'dml', 'expected', 'insert_with_gpload.ans') f9 = open(dir9, 'r') f10 = open(dir10, 'w') for line in f9: f10.write(line.replace('gpl.yaml', yaml_path)) f9.close() f10.close() dir11 = os.path.join(cur_path, 'dml', 'sql', 'select_from_copy_table.sql.in') dir12 = os.path.join(cur_path, 'dml', 'sql', 'select_from_copy_table.sql') f11 = open(dir11, 'r') f12 = open(dir12, 'w') for line in f11: if 'tenk.data' in line: f12.write( line.replace('tenk.data', local_path('dml/sql/_data/tenk.data'))) else: f12.write(line) f11.close() f12.close() dir13 = os.path.join(cur_path, 'dml', 'expected', 'select_from_copy_table.ans.in') dir14 = os.path.join(cur_path, 'dml', 'expected', 'select_from_copy_table.ans') f13 = open(dir13, 'r') f14 = open(dir14, 'w') for line in f13: if 'tenk.data' in line: f14.write( line.replace('tenk.data', local_path('dml/sql/_data/tenk.data'))) else: f14.write(line) f13.close() f14.close() external_table = local_path('dml/sql/_data/quote.csv') clean_file = 'rm -rf /tmp/quote.csv' rmt_cmd = "gpssh -h %s -e '%s' " % (seg_host, clean_file) cmd = Command(name='Running a remote command', cmdStr=rmt_cmd) cmd.run(validateAfter=False) command = 'scp %s %s:/tmp' % (external_table, seg_host) cmd = Command(name='run %s' % command, cmdStr='%s' % command) try: cmd.run(validateAfter=True) except Exception, e: tinctest.logger.error("Error running command %s\n" % e)
class GpinitStandby(object): '''Class for gpinitstandby operations Disclaimer: Some of these may repeat with the mpp/lib version''' def __init__(self): self.stdby = StandbyVerify() self.runmixin = StandbyRunMixin() self.runmixin.createdb(dbname='walrepl') self.mdd = os.environ.get('MASTER_DATA_DIRECTORY') self.config = GPDBConfig() self.pgutil = GpUtility() self.host = socket.gethostname() def run(self, option=''): '''Runs gpinitstandby and returns True if successfull ''' gpinitstandby_cmd = 'gpinitstandby -a %s' % option cmd = Command(name='Running Gpinitstandby', cmdStr="%s" % gpinitstandby_cmd) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() if result.rc != 0: return False return True def verify_gpinitstandby(self, primary_pid): '''Verify the presence of standby in recovery mode ''' if (self.stdby.check_gp_segment_config()) and ( self.stdby.check_pg_stat_replication()) and ( self.stdby.check_standby_processes() ) and self.compare_primary_pid(primary_pid): return True return False def get_masterhost(self): std_sql = "select hostname from gp_segment_configuration where content=-1 and role='p';" master_host = PSQL.run_sql_command(std_sql, flags='-q -t', dbname='postgres') return master_host.strip() def get_standbyhost(self): std_sql = "select hostname from gp_segment_configuration where content='-1' and role='m';" standby_host = PSQL.run_sql_command(std_sql, flags='-q -t', dbname='postgres') return standby_host.strip() def get_filespace_location(self): fs_sql = "select fselocation from pg_filespace_entry where fselocation like '%fs_walrepl_a%' and fsedbid=1;" filespace_loc = PSQL.run_sql_command(fs_sql, flags='-q -t', dbname='postgres') return filespace_loc.strip() def get_standbyhostnode(self): ''' Function used to obtain the hostname of one of the segment node inorder to use it as the standby master node" @return : returns the hostname of the segment node which can be used as the standby master node ''' hostlist = self.config.get_hosts() standby = '' for host in hostlist: if host.strip() != self.host: standby = host.strip() if len(standby) > 0: return standby else: tinctest.logger.error( 'No segment host other than master available to have remote standby' ) def get_primary_pid(self): pid = self.pgutil.get_pid_by_keyword(pgport=os.environ.get('PGPORT'), keyword=self.mdd) if int(pid) == -1: raise WalReplException( 'Unable to get pid of primary master process') else: return int(pid) def compare_primary_pid(self, initial_pid): final_pid = self.get_primary_pid() if initial_pid == final_pid: return True return False def create_dir_on_standby(self, standby, location): fs_cmd = "gpssh -h %s -e 'rm -rf %s; mkdir -p %s' " % ( standby, location, location) cmd = Command( name='Make dierctory on standby before running the command', cmdStr=fs_cmd) tinctest.logger.info('%s' % cmd) cmd.run(validateAfter=True) result = cmd.get_results() if result.rc != 0: raise WalReplException('Unable to create directory on standby') else: return True def initstand_by_with_default(self): master_host = self.get_masterhost() gp_cmd = "/bin/bash -c 'gpinitstandby -s %s'" % (master_host) cmd = Command(name='Running the command', cmdStr=gp_cmd) tinctest.logger.info('%s' % cmd) cmd.run(validateAfter=False) sleep(2) result = cmd.get_results() lines = result.stdout.splitlines() for line in lines: if 'Data directory already exists' in line: return True return False def init_with_prompt(self, filespace_loc): standby = self.get_standbyhostnode() gp_cmd = "/bin/bash -c 'gpinitstandby -s %s -a'" % (standby) logfile = open(local_path('install2.log'), 'w') child = pexpect.spawn(gp_cmd, timeout=400) child.logfile = logfile sleep(5) check = child.expect( ['.* Enter standby filespace location for filespace.*', ' ']) child.sendline(filespace_loc) sleep(10) check = child.expect(['.*Successfully created standby master.*']) if check != 0: tinctest.logger.error('gpinitstandy failed') return False child.close() return True
class GpstateTestCase(MPPTestCase): '''testcase for gpstart''' origin_mdd = os.environ.get('MASTER_DATA_DIRECTORY') def __init__(self,methodName): self.gputil = GpUtility() self.mirrorConfig = [] self.master_port = os.environ.get('PGPORT') self.masterdd = os.environ.get('MASTER_DATA_DIRECTORY') self.activatestdby = "" super(GpstateTestCase,self).__init__(methodName) def setUp(self): self.gputil.check_and_start_gpdb() stdby_presence = self.gputil.check_standby_presence() if stdby_presence: self.gputil.remove_standby() self.gputil.install_standby() get_mirror_sql = '''select port, hostname, fselocation from gp_segment_configuration, pg_filespace_entry where dbid = fsedbid and content != -1 and preferred_role=\'m\' ;''' segments=self.gputil.run_SQLQuery(get_mirror_sql, dbname='template1') for seg in segments: port = seg[0] host = seg[1] dir = seg[2] self.mirrorConfig.append(port) self.mirrorConfig.append(host) self.mirrorConfig.append(dir) self.activatestdby = GpactivateStandby() def tearDown(self): del self.mirrorConfig[:] self.gputil.remove_standby() def test_gpstate_disp_recovery(self): ''' run gpstate with -f option''' standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() standby_pid = self.gputil.get_pid_by_keyword(host=standby_host, user=os.environ.get('USER'), pgport=standby_port,keyword='master',option='bin') (rc, stdout) = self.gputil.run('gpstate -f') self.assertEqual(rc, 0) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.split('=') if "Standby address" in line: stdby_addr = items[1].strip() self.assertEqual(stdby_addr, standby_host) elif "Standby data directory" in line: stdby_dir = items[1].strip() self.assertEqual(stdby_dir, standby_dir) elif "Standby port" in line: stdby_port = int(items[1].strip()) self.assertEqual(stdby_port, int(standby_port)) elif "Standby PID" in line: pid = items[1].strip() self.assertEqual(pid, standby_pid) def test_gpstate_disp_failover(self): '''test if the master configuration detail changed after activating standby''' standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() standby_pid = self.gputil.get_pid_by_keyword(host=standby_host, user=os.environ.get('USER'), pgport=standby_port,keyword='master',option='bin') self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -s', pgport=standby_port, standbydd=standby_dir) self.assertIn(rc, (0,1)) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.strip().split('=') if "Master host" in line: master_host = items[1].strip() self.assertEqual(master_host, standby_host) elif "Master postgres process ID" in line: master_pid = items[1].strip() self.assertEqual(master_pid, standby_pid) elif "Master data directory" in line: master_dir = items[1].strip() self.assertEqual(master_dir, standby_dir) elif "Master port" in line: master_port = int(items[1].strip()) self.assertEqual(master_port, int(standby_port)) self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port) def test_gpstate_active_segment_failover(self): ''' test if gpstate show correct # of up and down nodes after failover''' count_up_seg = '''select count(*) from gp_segment_configuration where content != -1 and status = \'u\';''' count_down_seg = '''select count(*) from gp_segment_configuration where content != -1 and status = \'d\';''' number_up_segment = PSQL.run_sql_command(count_up_seg, flags = '-q -t', dbname='template1') number_down_segment = PSQL.run_sql_command(count_down_seg, flags = '-q -t', dbname='template1') standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -Q', pgport=standby_port, standbydd=standby_dir) self.assertIn(rc, (0,1)) context = stdout.split('\n') for line in context: if "=" not in line: continue items = line.strip().split('=') if "up segments" in line: self.assertEqual(number_up_segment.strip(),items[1].strip()) elif "down segments" in line: self.assertEqual(number_down_segment.strip(),items[1].strip()) self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port) def test_gpstate_disp_mirror_failover(self): ''' check if new master is able to get correct mirror configuration with gpstate -m''' inside_block = False keywords = ("Mirror","Datadir","Port") standby_host = self.activatestdby.get_current_standby() standby_port = self.activatestdby.get_standby_port() standby_dir = self.activatestdby.get_standby_dd() self.activatestdby.activate() (rc,stdout)=self.activatestdby.run_remote(standby_host, rmt_cmd='gpstate -m', pgport=standby_port, standbydd=standby_dir) self.assertEqual(rc, 0) for line in stdout: if inside_block: line_split = line.split('') line_split = [elem for elem in line_split if elem != ''] mirror_host = line_split[2] mirror_dir = line_split[3] mirror_port = line_split[4] self.assertTrue(mirror_host in self.mirrorConfig) self.assertTrue(mirror_dir in self.mirrorConfig) self.assertTrue(mirror_port in self.mirrorConfig) elif not all (s in line for s in keywords): continue else: inside_block = True self.gputil.failback_to_original_master(self.origin_mdd,standby_host,standby_dir,standby_port)
class WalReplKillProcessTestCase(TINCTestCase): # this is not hard code, will be updated stdby_host = 'localhost' stdby_port = '5432' def __init__(self,methodName): self.gphome = os.environ.get('GPHOME') self.pgport = os.environ.get('PGPORT') self.pgdatabase = os.environ.get('PGDATABASE') self.stdby_host = 'localhost' self.master_dd = os.environ.get('MASTER_DATA_DIRECTORY') self.pgutil = GpUtility() self.stdby = StandbyVerify() super(WalReplKillProcessTestCase,self).__init__(methodName) def killProcess_byPid(self, signal=9, pid_toKill=[], host="localhost"): pid_list = "" for pid in pid_toKill: pid_list = pid_list + " " + str(pid) kill_cmd = "%s/bin/gpssh -h %s -e 'kill -%s %s'" % (os.environ.get('GPHOME'), host, signal, pid_list) (rc, result) = self.pgutil.run(kill_cmd) if rc == 0: tinctest.logger.info("Process killed, %s" % result) return True else: tinctest.logger.error("Killing process error, Status Code non zero, cmd: %s\n"%kill_cmd) return False def kill_walstartup(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() pid_list = [] startup_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="startup process") if int(startup_pid) == -1: tinctest.logger.error("error:startup process does not exist!") return False else: pid_list.append(startup_pid) self.killProcess_byPid(pid_toKill=pid_list, host=stdby_host) def kill_walreceiver(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() pid_list = [] walreceiver_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="wal receiver process") if int(walreceiver_pid) == -1: tinctest.logger.error("error: wal receiver process does not exist!") return False else: pid_list.append(walreceiver_pid) self.killProcess_byPid(pid_toKill=pid_list, host=stdby_host) def kill_walsender_check_postmaster_reset(self): pid_list = [] walsender_old_pid=self.pgutil.get_pid_by_keyword(pgport=self.pgport,keyword="wal sender process") if int(walsender_old_pid) == -1: tinctest.logger.error("error: process wal sender does not exist on host") return False else: pid_list.append(walsender_old_pid) self.killProcess_byPid(pid_toKill=pid_list) sleep(2) walsender_new_pid=self.pgutil.get_pid_by_keyword(pgport=self.pgport,keyword="wal sender process") if walsender_old_pid == walsender_new_pid: raise Exception("Killing walsender failed to force postmaster reset") else: return True def kill_transc_backend_check_reset(self): dict_process = { 'stats collector process': -1, 'writer process': -1, 'checkpointer process': -1,'seqserver process': -1, 'ftsprobe process': -1,'sweeper process': -1,'wal sender process': -1} for process in dict_process: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport,keyword=process) dict_process[process] = pid self.kill_transc_backend() for process in dict_process: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport,keyword=process) delay = 1 while dict_process.get(process) == pid and delay < 5: pid = self.pgutil.get_pid_by_keyword(pgport=self.pgport,keyword=process) sleep(1) delay = delay +1 if delay == 5: tinctest.logger.error("Killing transaction backend process failed to force postmaster reset: %s"%process) raise Exception("Killing transaction backend process failed to force postmaster reset child process") def kill_transc_backend(self): pid_list = [] sql = "SELECT procpid FROM pg_stat_activity WHERE datname='{0}' AND current_query like 'INSERT INTO%'".format(self.pgdatabase) tinctest.logger.info("running sql command to get transaction backend process: --- %s"%sql) procid = PSQL.run_sql_command(sql, flags = '-q -t', dbname= self.pgdatabase) count = 1 while not procid.strip() and count < 5: sleep(1) count += 1 procid = PSQL.run_sql_command(sql, flags = '-q -t', dbname= self.pgdatabase) if procid.strip(): tinctest.logger.info("got procid to kill: %s " % procid) pid_list.append(procid) self.killProcess_byPid(pid_toKill = pid_list) else: tinctest.logger.error("There is no active backend process") def check_stdby_stop(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() activate_stdby = GpactivateStandby() stdby_port = activate_stdby.get_standby_port() master_pid = self.pgutil.get_pid_by_keyword(host=stdby_host, pgport=stdby_port, keyword="master", option = "bin") if int(master_pid) != -1: raise Exception("standby should stop but failed!") def start_stdby(self): gpstdby = GpinitStandby() stdby_host = gpstdby.get_standbyhost() stdby_dbid = self.get_standby_dbid() activate_stdby = GpactivateStandby() stdby_mdd = activate_stdby.get_standby_dd() stdby_port = activate_stdby.get_standby_port() cmd="pg_ctl -D %s -o '-p %s --gp_dbid=%s --gp_num_contents_in_cluster=2 --silent-mode=true -i -M master --gp_contentid=-1 -x 0 -E' start &"%(stdby_mdd, stdby_port, stdby_dbid) self.run_remote(stdby_host,cmd,stdby_port,stdby_mdd) def run_remote(self, standbyhost, rmt_cmd, pgport = '', standbydd = ''): '''Runs remote command and returns rc, result ''' export_cmd = "source %s/greenplum_path.sh;export PGPORT=%s;export MASTER_DATA_DIRECTORY=%s" % (self.gphome, pgport, standbydd) remote_cmd = "gpssh -h %s -e \"%s; %s\"" % (standbyhost, export_cmd, rmt_cmd) cmd = Command(name='Running Remote command', cmdStr='%s' % remote_cmd) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() return result.rc,result.stdout def check_mirror_seg(self): dbstate = DbStateClass('run_validation') dbstate.check_mirrorintegrity(master=True) def get_standby_dbid(self): std_sql = "select dbid from gp_segment_configuration where content='-1' and role='m';" standby_dbid = PSQL.run_sql_command(std_sql, flags = '-q -t', dbname= 'template1') return standby_dbid.strip() def run_transaction_backend(self): tinctest.logger.info("local path for backend.sql is %s"%local_path('')) for file in os.listdir(local_path('')): if fnmatch.fnmatch(file,'backend.sql'): PSQL.run_sql_file(local_path(file)) def get_down_segment(self): query = 'select * from gp_segment_configuration where mode <> \'s\' and status <>\'u\'' result = PSQL.run_sql_command(query, flags = '-q -t', dbname='template1') return result.strip() def check_gpdb_status(self): down_segments = self.get_down_segment() self.assertEqual(down_segments,'') def gpstart_helper(self): '''helper method to run in scenario test''' (rc, result) = self.pgutil.run('gpstart -a') self.assertIn(rc,(0,1)) def gpstop_helper(self): '''helper method to run in scenario test''' cmd = Command('run gpstop', cmdStr = 'gpstop -a') cmd.run(validateAfter=True) def gpinitstandby_helper(self): '''helper method to create a new standby''' self.pgutil.install_standby() def removestandby_helper(self): ''' helper method to remove standby''' self.pgutil.remove_standby() def verify_standby_sync(self): if (self.stdby.check_gp_segment_config()) and (self.stdby.check_pg_stat_replication()) and (self.stdby.check_standby_processes()): return True else: raise Exception('standby and master out of sync!') def kill_standby_postmaster(self): pid_list = [] delay = 0 postmaster_pid = self.pgutil.get_pid_by_keyword(host=WalReplKillProcessTestCase.stdby_host, pgport=WalReplKillProcessTestCase.stdby_port, keyword="master", option="bin") while int(postmaster_pid) == -1 and delay < 20: sleep(1) delay = delay + 1 postmaster_pid = self.pgutil.get_pid_by_keyword(host=WalReplKillProcessTestCase.stdby_host, pgport=WalReplKillProcessTestCase.stdby_port, keyword="master", option="bin") if int(postmaster_pid) == -1 or delay == 20: tinctest.logger.error("error: standby postmaster process does not exist!") return False else: pid_list.append(postmaster_pid) return self.killProcess_byPid(pid_toKill=pid_list, host=WalReplKillProcessTestCase.stdby_host) def initial_setup(self): keyword = 'rh55-qavm65' config = GPDBConfig() (seg_host,seg_port) = config.get_hostandport_of_segment(psegmentNumber = 0, pRole = 'p') cur_path = local_path('') dir1 = os.path.join(cur_path, 'dml', 'sql','insert_from_external.sql.in') dir2 = os.path.join(cur_path, 'dml', 'sql','insert_from_external.sql') dir3 = os.path.join(cur_path, 'dml', 'expected','insert_from_external.ans.in') dir4 = os.path.join(cur_path, 'dml', 'expected','insert_from_external.ans') f1 = open(dir1,'r') f2 = open(dir2,'w') f3 = open(dir3,'r') f4 = open(dir4,'w') for line in f1: f2.write(line.replace(keyword,seg_host)) f1.close() f2.close() for line in f3: f4.write(line.replace(keyword,seg_host)) f3.close() f4.close() dir5 = os.path.join(cur_path, 'dml', 'sql','insert_with_gpload.sql.in') dir6 = os.path.join(cur_path, 'dml', 'sql','insert_with_gpload.sql') yaml_path = local_path('dml/sql/config/gpl.yaml') f5 = open(dir5,'r') f6 = open(dir6,'w') for line in f5: f6.write(line.replace('gpl.yaml',yaml_path)) f5.close() f6.close() dir7 = os.path.join(cur_path,'dml','sql','config','gpl.yaml.in') dir8 = os.path.join(cur_path,'dml','sql','config','gpl.yaml') f7 = open(dir7,'r') f8 = open(dir8,'w') for line in f7: if 'DATABASE' in line: f8.write(line.replace('tangp3',os.environ.get('PGDATABASE'))) elif 'USER' in line: f8.write(line.replace('tangp3',os.environ.get('USER'))) elif 'HOST' in line: f8.write(line.replace('rh55-qavm61',socket.gethostname())) elif 'PORT' in line and '5432' in line: f8.write(line.replace('5432',os.environ.get('PGPORT'))) elif 'mydata' in line: f8.write(line.replace('mydata',local_path('dml/sql/gpload/mydata'))) else: f8.write(line) f7.close() f8.close() dir9 = os.path.join(cur_path,'dml','expected','insert_with_gpload.ans.in') dir10 = os.path.join(cur_path,'dml','expected','insert_with_gpload.ans') f9 = open(dir9,'r') f10 = open(dir10,'w') for line in f9: f10.write(line.replace('gpl.yaml',yaml_path)) f9.close() f10.close() dir11 = os.path.join(cur_path,'dml','sql','select_from_copy_table.sql.in') dir12 = os.path.join(cur_path,'dml','sql','select_from_copy_table.sql') f11 = open(dir11,'r') f12 = open(dir12,'w') for line in f11: if 'tenk.data' in line: f12.write(line.replace('tenk.data',local_path('dml/sql/_data/tenk.data'))) else: f12.write(line) f11.close() f12.close() dir13 = os.path.join(cur_path,'dml','expected','select_from_copy_table.ans.in') dir14 = os.path.join(cur_path,'dml','expected','select_from_copy_table.ans') f13 = open(dir13,'r') f14 = open(dir14,'w') for line in f13: if 'tenk.data' in line: f14.write(line.replace('tenk.data',local_path('dml/sql/_data/tenk.data'))) else: f14.write(line) f13.close() f14.close() external_table = local_path('dml/sql/_data/quote.csv') clean_file = 'rm -rf /tmp/quote.csv' rmt_cmd = "gpssh -h %s -e '%s' " % (seg_host, clean_file) cmd = Command(name='Running a remote command', cmdStr = rmt_cmd) cmd.run(validateAfter=False) command = 'scp %s %s:/tmp'%(external_table,seg_host) cmd = Command(name='run %s'%command, cmdStr = '%s' % command) try: cmd.run(validateAfter=True) except Exception, e: tinctest.logger.error("Error running command %s\n" % e)
class GpinitStandby(object): '''Class for gpinitstandby operations Disclaimer: Some of these may repeat with the mpp/lib version''' def __init__(self): self.stdby = StandbyVerify() self.runmixin = StandbyRunMixin() self.runmixin.createdb(dbname='walrepl') self.mdd = os.environ.get('MASTER_DATA_DIRECTORY') self.config = GPDBConfig() self.pgutil = GpUtility() self.host = socket.gethostname() def run(self, option = ''): '''Runs gpinitstandby and returns True if successfull ''' gpinitstandby_cmd = 'gpinitstandby -a %s' % option cmd = Command(name='Running Gpinitstandby', cmdStr="%s" % gpinitstandby_cmd) tinctest.logger.info(" %s" % cmd) cmd.run(validateAfter=False) result = cmd.get_results() if result.rc != 0: return False return True def verify_gpinitstandby(self, primary_pid): '''Verify the presence of standby in recovery mode ''' if (self.stdby.check_gp_segment_config()) and (self.stdby.check_pg_stat_replication()) and (self.stdby.check_standby_processes())and self.compare_primary_pid(primary_pid) : return True return False def get_masterhost(self): std_sql = "select hostname from gp_segment_configuration where content=-1 and role='p';" master_host = PSQL.run_sql_command(std_sql, flags = '-q -t', dbname= 'postgres') return master_host.strip() def get_standbyhost(self): std_sql = "select hostname from gp_segment_configuration where content='-1' and role='m';" standby_host = PSQL.run_sql_command(std_sql, flags = '-q -t', dbname= 'postgres') return standby_host.strip() def get_filespace_location(self): fs_sql = "select fselocation from pg_filespace_entry where fselocation like '%fs_walrepl_a%' and fsedbid=1;" filespace_loc = PSQL.run_sql_command(fs_sql, flags = '-q -t', dbname= 'postgres') return filespace_loc.strip() def get_standbyhostnode(self): ''' Function used to obtain the hostname of one of the segment node inorder to use it as the standby master node" @return : returns the hostname of the segment node which can be used as the standby master node ''' hostlist = self.config.get_hosts() standby = '' for host in hostlist: if host.strip() != self.host: standby = host.strip() if len(standby) > 0 : return standby else: tinctest.logger.error('No segment host other than master available to have remote standby') def get_primary_pid(self): pid = self.pgutil.get_pid_by_keyword(pgport=os.environ.get('PGPORT'), keyword=self.mdd) if int(pid) == -1: raise WalReplException('Unable to get pid of primary master process') else: return int(pid) def compare_primary_pid(self, initial_pid): final_pid = self.get_primary_pid() if initial_pid == final_pid : return True return False def create_dir_on_standby(self, standby, location): fs_cmd = "gpssh -h %s -e 'rm -rf %s; mkdir -p %s' " % (standby, location, location) cmd = Command(name='Make dierctory on standby before running the command', cmdStr = fs_cmd) tinctest.logger.info('%s' % cmd) cmd.run(validateAfter=True) result = cmd.get_results() if result.rc != 0: raise WalReplException('Unable to create directory on standby') else: return True def initstand_by_with_default(self): master_host = self.get_masterhost() gp_cmd = "/bin/bash -c 'gpinitstandby -s %s'" % (master_host) logfile = open(local_path('install.log'),'w') child = pexpect.spawn(gp_cmd, timeout=400) child.logfile = logfile sleep(2) check = child.expect(['.* Enter standby filespace location for filespace pg_system .*', ' ']) if check != 0: child.close() l_file = open(local_path('install.log'),'r') lines = l_file.readlines() for line in lines: if 'default: NA' in line: return True return False def init_with_prompt(self,filespace_loc): standby = self.get_standbyhostnode() gp_cmd = "/bin/bash -c 'gpinitstandby -s %s -a'" % (standby) logfile = open(local_path('install2.log'),'w') child = pexpect.spawn(gp_cmd, timeout=400) child.logfile = logfile sleep(5) check = child.expect(['.* Enter standby filespace location for filespace.*', ' ']) child.sendline(filespace_loc) sleep(10) check = child.expect(['.*Successfully created standby master.*']) if check != 0: tinctest.logger.error('gpinitstandy failed') return False child.close() return True