def __ensureStopped(self, gpEnv, directives): """ @param directives a list of the GpStopSegmentDirectoryDirective values indicating which segments to stop """ if len(directives) == 0: return logger.info("Ensuring %d failed segment(s) are stopped" % (len(directives))) segments = [d.getSegment() for d in directives] segmentByHost = GpArray.getSegmentsByHostName(segments) cmds = [] for hostName, segments in segmentByHost.iteritems(): cmd=gp.GpSegStopCmd("remote segment stop on host '%s'" % hostName, gpEnv.getGpHome(), gpEnv.getGpVersion(), mode='fast', dbs=segments, verbose=logging_is_verbose(), ctxt=base.REMOTE, remoteHost=hostName) cmds.append( cmd) # we suppress checking for the error. This is because gpsegstop will actually error # in many cases where the stop is actually done (that is, for example, the segment is # running but slow to shutdown so gpsegstop errors after whacking it with a kill) # # Perhaps we should make it so that it so that is checks if the seg is running and only attempt stop # if it's running? In that case, we could propagate the error # self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "stopping segments", suppressErrorCheck=True)
def execute(self): dburl = dbconn.DbURL() gparray = GpArray.initFromCatalog(dburl) my_fault_strategy = gparray.getFaultStrategy() if my_fault_strategy != FAULT_STRATEGY_FILE_REPLICATION: raise NoMirroringError( 'Fault strategy %s does not support mirror verification.' % FAULT_STRATEGY_LABELS[my_fault_strategy]) if self.content is not None: contents = set( [seg.getSegmentContentId() for seg in gparray.getDbList()]) if self.content not in contents: raise InvalidContentIDError(self.content) logger.info('Validating target contents...') to_verify = [x for x in gparray.getDbList() if x.isSegmentQE()] if self.content is not None: to_verify = [ x for x in to_verify if x.getSegmentContentId() == self.content ] if self.primaries_only: to_verify = [ x for x in to_verify if x.isSegmentPrimary(current_role=True) ] return to_verify
def execute(self): """ TODO: Improve with grouping by host and ParallelOperation dispatch. """ gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) primaries = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] dump_count = 0 for seg in primaries: if seg.isSegmentDown(): """ Why must every Segment function have the word Segment in it ?! """ raise ExceptionNoStackTraceNeeded( "Host %s dir %s dbid %d marked as invalid" % (seg.getSegmentHostName(), seg.getSegmentDataDirectory(), seg.getSegmentDbId())) path = os.path.join(seg.getSegmentDataDirectory(), DUMP_DIR, self.restore_timestamp[0:8]) host = seg.getSegmentHostName() path = os.path.join( path, "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), self.restore_timestamp)) if self.compress: path += ".gz" exists = CheckRemoteFile(path, host).run() if not exists: raise ExceptionNoStackTraceNeeded( "No dump file on %s at %s" % (seg.getSegmentHostName(), path))
class DeleteCurrentDump(Operation): def __init__(self, timestamp, master_datadir, master_port, ddboost): self.timestamp = timestamp self.master_datadir = master_datadir self.master_port = master_port self.ddboost = ddboost def execute(self): try: DeleteCurrentSegDump(self.timestamp, self.master_datadir).run() except OSError, e: logger.warn("Error encountered during deletion of %s on master" % self.timestamp) gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) segs = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] for seg in segs: try: RemoteOperation( DeleteCurrentSegDump(self.timestamp, seg.getSegmentDataDirectory()), seg.getSegmentHostName()).run() except OSError, e: logger.warn("Error encountered during deletion of %s on %s" % (self.timestamp, seg.getSegmentHostName()))
def __ensureMarkedDown(self, gpEnv, toEnsureMarkedDown): """Waits for FTS prober to mark segments as down""" wait_time = 60 * 30 # Wait up to 30 minutes to handle very large, busy # clusters that may have faults. In most cases the # actual time to wait will be small and this operation # is only needed when moving mirrors that are up and # needed to be stopped, an uncommon operation. dburl = dbconn.DbURL(port=gpEnv.getMasterPort(), dbname='template1') time_elapsed = 0 seg_up_count = 0 initial_seg_up_count = len(toEnsureMarkedDown) last_seg_up_count = initial_seg_up_count if initial_seg_up_count == 0: # Nothing to wait on return logger.info("Waiting for segments to be marked down.") logger.info("This may take up to %d seconds on large clusters." % wait_time) # wait for all needed segments to be marked down by the prober. We'll wait # a max time of double the interval while wait_time > time_elapsed: seg_up_count = 0 current_gparray = GpArray.initFromCatalog(dburl, True) seg_db_map = current_gparray.getSegDbMap() # go through and get the status of each segment we need to be marked down for segdb in toEnsureMarkedDown: if segdb.getSegmentDbId() in seg_db_map and seg_db_map[segdb.getSegmentDbId()].isSegmentUp() == True: seg_up_count += 1 if seg_up_count == 0: break else: if last_seg_up_count != seg_up_count: print "\n", logger.info("%d of %d segments have been marked down." % (initial_seg_up_count - seg_up_count, initial_seg_up_count)) last_seg_up_count = seg_up_count for _i in range(1,5): time.sleep(1) sys.stdout.write(".") sys.stdout.flush() time_elapsed += 5 if seg_up_count == 0: print "\n", logger.info("%d of %d segments have been marked down." % (initial_seg_up_count, initial_seg_up_count)) else: raise Exception("%d segments were not marked down by FTS" % seg_up_count)
def __sendPrimaryMirrorTransition(self, targetMode, segments, convertUsingFullResync, gpArray, resultOut): """ @param segments the segments to convert @param convertUsingFullResync in parallel with segments, may be None, gives true/false for whether fullResync flag should be passed to the transition """ if len(segments) == 0: logger.debug("%s conversion of zero segments...skipping" % targetMode) return logger.info( "Commencing parallel %s conversion of %s segments, please wait..." % (targetMode, len(segments))) ############################################### # for each host, create + transfer the transition arguments file dispatchCount = 0 dbIdToPeerMap = gpArray.getDbIdToPeerMap() segmentsByHostName = GpArray.getSegmentsByHostName(segments) for hostName, segments in segmentsByHostName.iteritems(): assert len(segments) > 0 logger.debug( "Dispatching command to convert segments on host: %s " % (hostName)) targetModePerSegment = [targetMode for seg in segments] pickledParams = self.__createPickledTransitionParameters( segments, targetModePerSegment, convertUsingFullResync, dbIdToPeerMap) address = segments[0].getSegmentAddress() cmd = gp.GpSegChangeMirrorModeCmd( "remote segment mirror mode conversion on host '%s' using address '%s'" % (hostName, address), self.__gpHome, self.__localeData, self.__gpVersion, segments, targetMode, pickledParams, verbose=logging_is_verbose(), ctxt=base.REMOTE, remoteHost=address) self.__workerPool.addCommand(cmd) dispatchCount += 1 self.__workerPool.wait_and_printdots(dispatchCount, self.__quiet) # process results self.__processStartOrConvertCommands(resultOut) self.__workerPool.empty_completed_items()
def execute(self): timestamp = datetime.now().strftime("%Y%m%d%H%M%S") config_backup_file = "gp_master_config_files_%s.tar" % timestamp if self.backup_dir is not None: path = os.path.join(self.backup_dir, DUMP_DIR, DUMP_DATE, config_backup_file) else: path = os.path.join(self.master_datadir, DUMP_DIR, DUMP_DATE, config_backup_file) logger.info("Dumping master config files") Command("Dumping master configuration files", "tar cf %s %s/*.conf" % (path, self.master_datadir)).run(validateAfter=True) if self.ddboost: abspath = path relpath = os.path.join(DUMP_DIR, DUMP_DATE, config_backup_file) logger.debug('Copying %s to DDBoost' % abspath) cmd = Command( 'DDBoost copy of %s' % abspath, 'gpddboost --copyToDDBoost --from-file=%s --to-file=%s' % (abspath, relpath)) cmd.run(validateAfter=True) logger.info("Dumping segment config files") gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) primaries = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] for seg in primaries: config_backup_file = "gp_segment_config_files_0_%d_%s.tar" % ( seg.getSegmentDbId(), timestamp) if self.backup_dir is not None: path = os.path.join(self.backup_dir, DUMP_DIR, DUMP_DATE, config_backup_file) else: path = os.path.join(seg.getSegmentDataDirectory(), DUMP_DIR, DUMP_DATE, config_backup_file) host = seg.getSegmentHostName() Command("Dumping segment config files", "tar cf %s %s/*.conf" % (path, seg.getSegmentDataDirectory()), ctxt=REMOTE, remoteHost=host).run(validateAfter=True) if self.ddboost: abspath = path relpath = os.path.join(DUMP_DIR, DUMP_DATE, config_backup_file) logger.debug('Copying %s to DDBoost' % abspath) cmd = Command( 'DDBoost copy of %s' % abspath, 'gpddboost --copyToDDBoost --from-file=%s --to-file=%s' % (abspath, relpath), ctxt=REMOTE, remoteHost=host) cmd.run(validateAfter=True)
def execute(self): gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) failed_segs = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) and seg.isSegmentDown() ] if len(failed_segs) != 0: logger.warn("Failed primary segment instances detected") failed_dbids = [seg.getSegmentDbid() for seg in failed_segs] raise ExceptionNoStackTraceNeeded( "Detected failed segment(s) with dbid=%s" % ",".join(failed_dbids))
def __cleanUpSegmentDirectories(self, directives): if len(directives) == 0: return logger.info("Cleaning files from %d segment(s)" % (len(directives))) segments = [d.getSegment() for d in directives] segmentByHost = GpArray.getSegmentsByHostName(segments) cmds = [] for hostName, segments in segmentByHost.iteritems(): cmds.append( gp.GpCleanSegmentDirectories("clean segment directories on %s" % hostName, \ segments, gp.REMOTE, hostName)) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "cleaning existing directories")
def checkForPortAndDirectoryConflicts(self, gpArray): """ Check gpArray for internal consistency -- no duplicate ports or directories on the same host, for example A detected problem causes an Exception to be raised """ for hostName, segmentArr in GpArray.getSegmentsByHostName(gpArray.getDbList()).iteritems(): usedPorts = {} usedDataDirectories = {} for segment in segmentArr: # check for port conflict replicationPort = segment.getSegmentReplicationPort() port = segment.getSegmentPort() dbid = segment.getSegmentDbId() if port in usedPorts: raise Exception("On host %s, a port for segment with dbid %s conflicts with a port for segment dbid %s" \ % (hostName, dbid, usedPorts.get(port))) if segment.isSegmentQE(): if replicationPort is None: raise Exception("On host %s, the replication port is not set for segment with dbid %s" \ % (hostName, dbid)) if replicationPort in usedPorts: raise Exception("On host %s, a port for segment with dbid %s conflicts with a port for segment dbid %s" \ % (hostName, dbid, usedPorts.get(replicationPort))) if port == replicationPort: raise Exception("On host %s, segment with dbid %s has equal port and replication port" \ % (hostName, dbid)) usedPorts[port] = dbid usedPorts[replicationPort] = dbid # check for directory conflict; could improve this by reporting nicer the conflicts paths = [path for oid, path in segment.getSegmentFilespaces().items() if oid != gparray.SYSTEM_FILESPACE] paths.append(segment.getSegmentDataDirectory()) for path in paths: if path in usedDataDirectories: raise Exception("On host %s, directory (base or filespace) for segment with dbid %s conflicts with a " \ "directory (base or filespace) for segment dbid %s; directory: %s" % \ (hostName, dbid, usedDataDirectories.get(path), path)) usedDataDirectories[path] = dbid
def _get_gpdb_host_list(self): """ TODO: AK: Get rid of this. Program logic should not be driving host list building . This method gets the host names of all hosts in the gpdb array. It sets the following variables GpPkgProgram.master_host to master GpPkgProgram.standby_host to standby GpPkgProgram.segment_host_list to segment hosts """ logger.debug('_get_gpdb_host_list') #Get host list gparr = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) master_host = None standby_host = None segment_host_list = [] segs = gparr.getDbList() for seg in segs: if seg.isSegmentMaster(current_role=True): master_host = seg.getSegmentHostName() elif seg.isSegmentStandby(current_role=True): standby_host = seg.getSegmentHostName() else: segment_host_list.append(seg.getSegmentHostName()) #Deduplicate the hosts so that we #dont install multiple times on the same host segment_host_list = list(set(segment_host_list)) #Segments might exist on the master host. Since we store the #master host separately in self.master_host, storing the master_host #in the segment_host_list is redundant. for host in segment_host_list: if host == master_host or host == standby_host: segment_host_list.remove(host) self.master_host = master_host self.standby_host = standby_host self.segment_host_list = segment_host_list
def mirrorlayout_test(self, hostlist, interface_list, primary_list, primary_portbase, mirror_type, mirror_list, mirror_portbase, dir_prefix, primary_replication_portbase, mirror_replication_portbase): master = GpDB(content=-1, preferred_role='p', dbid=0, role='p', mode='s', status='u', hostname='masterhost', address='masterhost-1', port=5432, datadir='/masterdir', replicationPort=5433) allrows = [] allrows.append(master) rows = createSegmentRows(hostlist, interface_list, primary_list, primary_portbase, mirror_type, mirror_list, mirror_portbase, dir_prefix, primary_replication_portbase, mirror_replication_portbase) for row in rows: newrow = GpDB( content=row.content, preferred_role='p' if convert_bool(row.isprimary) else 'm', dbid=row.dbid, role='p' if convert_bool(row.isprimary) else 'm', mode='s', status='u', hostname=row.host, address=row.address, port=row.port, datadir=row.fulldir, replicationPort=row.prPort) allrows.append(newrow) gparray = GpArray(allrows) self._validate_array(gparray)
def __updateGpIdFile(self, gpEnv, gpArray, segments): segmentByHost = GpArray.getSegmentsByHostName(segments) newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(segments) cmds = [] for hostName in segmentByHost.keys(): segmentInfo = newSegmentInfo[hostName] checkNotNone("segmentInfo for %s" % hostName, segmentInfo) cmd = gp.ConfigureNewSegment("update gpid file", segmentInfo, newSegments=False, verbose=gplog.logging_is_verbose(), batchSize=self.__parallelDegree, ctxt=gp.REMOTE, remoteHost=hostName, validationOnly=False, writeGpIdFileOnly=True) cmds.append(cmd) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "writing updated gpid files")
def execute(self): gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) from_host, from_path = self.host, self.path logger.info( "Commencing remote database dump file recovery process, please wait..." ) segs = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary( current_role=True) or seg.isSegmentMaster() ] pool = WorkerPool(numWorkers=min(len(segs), self.batch_default)) for seg in segs: if seg.isSegmentMaster(): file = '%s%s' % (MASTER_DBDUMP_PREFIX, self.restore_timestamp) else: file = '%s0_%d_%s' % (DBDUMP_PREFIX, seg.getSegmentDbId(), self.restore_timestamp) if self.compress: file += '.gz' to_host = seg.getSegmentHostName() to_path = os.path.join(seg.getSegmentDataDirectory(), DUMP_DIR, self.restore_timestamp[0:8]) if not CheckRemoteDir(to_path, to_host).run(): logger.info('Creating directory %s on %s' % (to_path, to_host)) try: MakeRemoteDir(to_path, to_host).run() except OSError, e: raise ExceptionNoStackTraceNeeded( "Failed to create directory %s on %s" % (to_path, to_host)) logger.info("Commencing remote copy from %s to %s:%s" % (from_host, to_host, to_path)) pool.addCommand( Scp('Copying dump for seg %d' % seg.getSegmentDbId(), srcFile=os.path.join(from_path, file), dstFile=os.path.join(to_path, file), srcHost=from_host, dstHost=to_host))
def testReadPostmasterTempFile(self): logger.info("testReadPostmasterTempFile") url = dbconn.DbURL() gpdb = GpArray.initFromCatalog(url) logger.info("Search for valid master port: %s" % gpdb.master.port) cmd = pg.ReadPostmasterTempFile.local('test pg tempfile read', gpdb.master.port) (exists, PID, datadir) = cmd.getResults() logger.info("exists:=%s PID=%d datadir='%s'" % (exists, PID, datadir)) self.assertTrue(exists) self.assertTrue(PID > 0) self.assertEquals(datadir, gpdb.master.datadir) gpdb.master.port = 4000 logger.info("Search for bogus master port: %s" % gpdb.master.port) cmd = pg.ReadPostmasterTempFile.local('test pg tempfile read', gpdb.master.port) (exists, PID, datadir) = cmd.getResults() logger.info("exists:=%s PID=%d datadir='%s'" % (exists, PID, datadir)) self.assertFalse(exists)
def get_host_list(): ''' Returns a tuple which consists of the standby and segment hosts ''' gparr = GpArray.initFromCatalog(dbconn.DbURL(port=MASTER_PORT), utility=True) segs = gparr.getDbList() standby_host = None segment_host_list = [] for seg in segs: if seg.isSegmentStandby(current_role=True): standby_host = seg.getSegmentHostName() elif not seg.isSegmentMaster(current_role=True): segment_host_list.append(seg.getSegmentHostName()) #Deduplicate the hosts so that we #dont install multiple times on the same host segment_host_list = list(set(segment_host_list)) return (standby_host, segment_host_list)
def execute(self): fake_timestamp = PickDumpTimestamp( restore_timestamp=self.restore_timestamp, compress=self.compress, master_datadir=self.master_datadir).run() gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) primaries = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] operations = [] for seg in primaries: real_filename = os.path.join( seg.getSegmentDataDirectory(), DUMP_DIR, self.restore_timestamp[0:8], "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), self.restore_timestamp)) fake_filename = os.path.join( seg.getSegmentDataDirectory(), DUMP_DIR, fake_timestamp[0:8], "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), fake_timestamp)) operations.append( BuildRemoteTableDump(self.restore_tables, real_filename, fake_filename, self.compress, seg.getSegmentHostName())) ParallelOperation(operations, self.batch_default).run() for operation in operations: try: operation.get_ret() except Exception, e: logger.exception('Parallel table dump file build failed.') raise ExceptionNoStackTraceNeeded( 'Parallel table dump file build failed, review log file for details' )
def execute(self): ValidateGpToolkit(database=self.dump_database, master_port=self.master_port).run() operations = [] gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) segs = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] for seg in segs: operations.append( RemoteOperation( ValidateSegDiskSpace( free_space_percent=self.free_space_percent, compress=self.compress, dump_database=self.dump_database, include_dump_tables=self.include_dump_tables, datadir=seg.getSegmentDataDirectory(), segport=seg.getSegmentPort()), seg.getSegmentHostName())) ParallelOperation(operations, self.batch_default).run() success = 0 for remote in operations: host = remote.host try: remote.get_ret() except NotEnoughDiskSpace, e: logger.error( "%s has insufficient disk space. [Need: %dK, Free %dK]" % (host, e.needed_space, e.free_space)) else: success += 1
def __runStartCommand(self, segments, startMethod, numContentsInCluster, resultOut, gpArray, era): """ Putt results into the resultOut object """ if len(segments) == 0: return if startMethod == START_AS_PRIMARY_OR_MIRROR: logger.info( "Commencing parallel primary and mirror segment instance startup, please wait..." ) else: logger.info( "Commencing parallel segment instance startup, please wait...") dispatchCount = 0 dbIdToPeerMap = gpArray.getDbIdToPeerMap() mirroringModePreTransition = MIRROR_MODE_MIRRORLESS if startMethod == START_AS_MIRRORLESS else MIRROR_MODE_QUIESCENT # launch the start for hostName, segments in GpArray.getSegmentsByHostName( segments).iteritems(): logger.debug("Dispatching command to start segments on host: %s, " \ "with %s contents in cluster" % (hostName, numContentsInCluster)) pickledTransitionData = None if startMethod == START_AS_PRIMARY_OR_MIRROR: mirroringModePerSegment = [] for seg in segments: modeThisSegment = MIRROR_MODE_PRIMARY if seg.isSegmentPrimary( True) else MIRROR_MODE_MIRROR mirroringModePerSegment.append(modeThisSegment) pickledTransitionData = self.__createPickledTransitionParameters( segments, mirroringModePerSegment, None, dbIdToPeerMap) # # This will call sbin/gpsegstart.py # cmd = gp.GpSegStartCmd("remote segment starts on host '%s'" % hostName, self.__gpHome, segments, self.__localeData, self.__gpVersion, mirroringModePreTransition, numContentsInCluster, era, self.__timeout, verbose=logging_is_verbose(), ctxt=base.REMOTE, remoteHost=segments[0].getSegmentAddress(), pickledTransitionData=pickledTransitionData, specialMode=self.__specialMode, wrapper=self.__wrapper, wrapper_args=self.__wrapper_args) self.__workerPool.addCommand(cmd) dispatchCount += 1 self.__workerPool.wait_and_printdots(dispatchCount, self.__quiet) # process results self.__processStartOrConvertCommands(resultOut) self.__workerPool.empty_completed_items()
def execute(self): fake_timestamp = PickDumpTimestamp( restore_timestamp=self.restore_timestamp, compress=self.compress, master_datadir=self.master_datadir).run() gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) primaries = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] pool = WorkerPool(numWorkers=min(primaries, self.batch_default)) for seg in primaries: real_filename = os.path.join( DUMP_DIR, self.restore_timestamp[0:8], "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), self.restore_timestamp)) fake_filename = os.path.join( seg.getSegmentDataDirectory(), DUMP_DIR, fake_timestamp[0:8], "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), fake_timestamp)) if self.compress: real_filename += '.gz' fake_filename += '.gz' table_str = ' '.join( ['--table=%s' % table for table in self.restore_tables]) # TODO: Escaping. Low priority due to MPP-12880, et al cmd = Command('DDBoost building dump file for dbid %s' % seg.getSegmentDbId(), 'gpddboost --from-file=%s --to-file=%s %s' % (real_filename, fake_filename, table_str), ctxt=REMOTE, remoteHost=seg.getSegmentHostName()) pool.addCommand(cmd) pool.join() pool.check_results() BuildDDBoostMasterTableDump(restore_timestamp=self.restore_timestamp, fake_timestamp=fake_timestamp, compress=self.compress, master_datadir=self.master_datadir).run() # Build master cdatabase file real_createdb = os.path.join( DUMP_DIR, self.restore_timestamp[0:8], "%s%s" % (CREATEDB_PREFIX, self.restore_timestamp)) fake_createdb = os.path.join( self.master_datadir, DUMP_DIR, fake_timestamp[0:8], "%s%s" % (CREATEDB_PREFIX, fake_timestamp)) Command( 'Copying cdatabase file from DDBoost', 'gpddboost --copyFromDDBoost --from-file=%s --to-file=%s' % (real_createdb, fake_createdb)).run(validateAfter=True) # Build master _post_data file: CopyPostData(self.restore_timestamp, fake_timestamp, self.compress, self.master_datadir).run() return fake_timestamp
def __copySegmentDirectories(self, gpEnv, gpArray, directives): """ directives should be composed of GpCopySegmentDirectoryDirective values """ if len(directives) == 0: return srcSegments = [d.getSrcSegment() for d in directives] destSegments = [d.getDestSegment() for d in directives] isTargetReusedLocation = [d.isTargetReusedLocation() for d in directives] destSegmentByHost = GpArray.getSegmentsByHostName(destSegments) newSegmentInfo = gp.ConfigureNewSegment.buildSegmentInfoForNewSegment(destSegments, isTargetReusedLocation) logger.info('Building template directory') (tempDir, blankTarFile, tarFileName) = self.__buildTarFileForTransfer(gpEnv, gpArray.master, srcSegments[0], destSegments) def createConfigureNewSegmentCommand(hostName, cmdLabel, validationOnly): segmentInfo = newSegmentInfo[hostName] checkNotNone("segmentInfo for %s" % hostName, segmentInfo) return gp.ConfigureNewSegment(cmdLabel, segmentInfo, tarFile=tarFileName, newSegments=True, verbose=gplog.logging_is_verbose(), batchSize=self.__parallelDegree, ctxt=gp.REMOTE, remoteHost=hostName, validationOnly=validationOnly) # # validate directories for target segments # logger.info('Validating remote directories') cmds = [] for hostName in destSegmentByHost.keys(): cmds.append(createConfigureNewSegmentCommand(hostName, 'validate blank segments', True)) for cmd in cmds: self.__pool.addCommand(cmd) self.__pool.wait_and_printdots(len(cmds), self.__quiet) validationErrors = [] for item in self.__pool.getCompletedItems(): results = item.get_results() if not results.wasSuccessful(): if results.rc == 1: # stdoutFromFailure = results.stdout.replace("\n", " ").strip() lines = results.stderr.split("\n") for line in lines: if len(line.strip()) > 0: validationErrors.append("Validation failure on host %s %s" % (item.remoteHost, line)) else: validationErrors.append(str(item)) self.__pool.empty_completed_items() if validationErrors: raise ExceptionNoStackTraceNeeded("\n" + ("\n".join(validationErrors))) # # copy tar from master to target hosts # logger.info('Copying template directory file') cmds = [] for hostName in destSegmentByHost.keys(): cmds.append( gp.RemoteCopy("copy segment tar", blankTarFile, hostName, tarFileName )) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "building and transferring basic segment directory") # # unpack and configure new segments # logger.info('Configuring new segments') cmds = [] for hostName in destSegmentByHost.keys(): cmds.append(createConfigureNewSegmentCommand(hostName, 'configure blank segments', False)) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "unpacking basic segment directory") # # Clean up copied tar from each remote host # logger.info('Cleaning files') cmds = [] for hostName, segments in destSegmentByHost.iteritems(): cmds.append(unix.RemoveFiles('remove tar file', tarFileName, ctxt=gp.REMOTE, remoteHost=hostName)) self.__runWaitAndCheckWorkerPoolForErrorsAndClear(cmds, "cleaning up tar file on segment hosts") # # clean up the local temp directory # unix.RemoveFiles.local('remove temp directory', tempDir)
logger.warn('Failed to remove %s on master' % path) # Remove master _post_data file path = os.path.join( self.master_datadir, DUMP_DIR, self.fake_timestamp[0:8], "%s%s_post_data" % (MASTER_DBDUMP_PREFIX, self.fake_timestamp)) if self.compress: path += '.gz' try: RemoveFile(path).run() except OSError, e: logger.warn('Failed to remove %s on master' % path) # Remove segment dump files operations = [] gparray = GpArray.initFromCatalog(dbconn.DbURL(port=self.master_port), utility=True) primaries = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] for seg in primaries: path = os.path.join( seg.getSegmentDataDirectory(), DUMP_DIR, self.fake_timestamp[0:8], "%s0_%d_%s" % (DBDUMP_PREFIX, seg.getSegmentDbId(), self.fake_timestamp)) if self.compress: path += '.gz' host = seg.getSegmentHostName() operations.append(RemoveRemoteFile(path, host)) ParallelOperation(operations, self.batch_default).run()
class DeleteOldestDumps(Operation): # TODO: This Operation isn't consuming backup_dir. Should it? def __init__(self, master_datadir, master_port, ddboost): self.master_datadir = master_datadir self.master_port = master_port self.ddboost = ddboost def execute(self): dburl = dbconn.DbURL(port=self.master_port) if self.ddboost: cmd = Command('List directories in DDBoost db_dumps dir', 'gpddboost --listDir --dir=db_dumps/ | grep ^[0-9]') cmd.run(validateAfter=False) rc = cmd.get_results().rc if rc != 0: logger.info("Cannot find old backup sets to remove on DDboost") return old_dates = cmd.get_results().stdout.splitlines() else: old_dates = ListFiles(os.path.join(self.master_datadir, DUMP_DIR)).run() try: old_dates.remove(DUMP_DATE) except ValueError, e: # DUMP_DATE was not found in old_dates pass if len(old_dates) == 0: logger.info("No old backup sets to remove") return old_dates.sort() old_date = old_dates[0] # Remove the directories on DDBoost only. This will avoid the problem # where we might accidently end up deleting local backup files, but # the intention was to delete only the files on DDboost. if self.ddboost: logger.info("Preparing to remove dump %s from DDBoost" % old_date) cmd = Command( 'DDBoost cleanup', 'gpddboost --del-dir=%s' % os.path.join(DUMP_DIR, old_date)) cmd.run(validateAfter=False) rc = cmd.get_results().rc if rc != 0: logger.info( "Error encountered during deletion of %s on DDBoost" % os.path.join(DUMP_DIR, old_date)) logger.debug(cmd.get_results().stdout) logger.debug(cmd.get_results().stderr) else: logger.info("Preparing to remove dump %s from all hosts" % old_date) path = os.path.join(self.master_datadir, DUMP_DIR, old_date) try: RemoveTree(path).run() except OSError, e: logger.warn("Error encountered during deletion of %s" % path) gparray = GpArray.initFromCatalog( dbconn.DbURL(port=self.master_port), utility=True) primaries = [ seg for seg in gparray.getDbList() if seg.isSegmentPrimary(current_role=True) ] for seg in primaries: path = os.path.join(seg.getSegmentDataDirectory(), DUMP_DIR, old_date) try: RemoveRemoteTree(path, seg.getSegmentHostName()).run() except ExecutionError, e: logger.warn( "Error encountered during deletion of %s on %s" % (path, seg.getSegmentHostName()))
def rebalance(self): # Get the unbalanced primary segments grouped by hostname # These segments are what we will shutdown. logger.info("Getting unbalanced segments") unbalanced_primary_segs = GpArray.getSegmentsByHostName( self.gpArray.get_unbalanced_primary_segdbs()) pool = WorkerPool() count = 0 try: # Disable ctrl-c signal.signal(signal.SIGINT, signal.SIG_IGN) logger.info("Stopping unbalanced primary segments...") for hostname in unbalanced_primary_segs.keys(): cmd = GpSegStopCmd("stop unbalanced primary segs", self.gpEnv.getGpHome(), self.gpEnv.getGpVersion(), 'fast', unbalanced_primary_segs[hostname], ctxt=REMOTE, remoteHost=hostname, timeout=600) pool.addCommand(cmd) count += 1 pool.wait_and_printdots(count, False) failed_count = 0 completed = pool.getCompletedItems() for res in completed: if not res.get_results().wasSuccessful(): failed_count += 1 if failed_count > 0: logger.warn( "%d segments failed to stop. A full rebalance of the") logger.warn( "system is not possible at this time. Please check the") logger.warn( "log files, correct the problem, and run gprecoverseg -r") logger.warn("again.") logger.info( "gprecoverseg will continue with a partial rebalance.") pool.empty_completed_items() # issue a distributed query to make sure we pick up the fault # that we just caused by shutting down segments conn = None try: logger.info("Triggering segment reconfiguration") dburl = dbconn.DbURL() conn = dbconn.connect(dburl) cmd = ReconfigDetectionSQLQueryCommand(conn) pool.addCommand(cmd) pool.wait_and_printdots(1, False) except Exception: # This exception is expected pass finally: if conn: conn.close() # Final step is to issue a recoverseg operation to resync segments logger.info("Starting segment synchronization") cmd = GpRecoverseg("rebalance recoverseg") pool.addCommand(cmd) pool.wait_and_printdots(1, False) except Exception, ex: raise ex