def deletePFN(self, pfn, lfn, command): """ Delete the given PFN """ try: impl = retrieveStageOutImpl(command) except Exception as ex: msg = "Unable to retrieve impl for file deletion in:\n" msg += "Error retrieving StageOutImpl for command named: %s\n" % ( command, ) raise StageOutFailure(msg, Command=command, LFN=lfn, ExceptionDetail=str(ex)) impl.numRetries = self.numberOfRetries impl.retryPause = self.retryPauseTime try: impl.removeFile(pfn) except Exception as ex: msg = "Failure for file deletion in:\n" msg += str(ex) try: import traceback msg += traceback.format_exc() except AttributeError as ex: msg += "Traceback unavailable\n" raise StageOutFailure(msg, Command=command, Protocol=command, LFN=lfn, TargetPFN=pfn) return pfn
def localStageOut(self, lfn, localPfn, checksums): """ _localStageOut_ Given the lfn and local stage out params, invoke the local stage out """ seName = self.siteCfg.localStageOut['se-name'] command = self.siteCfg.localStageOut['command'] options = self.siteCfg.localStageOut.get('option', None) pfn = self.searchTFC(lfn) protocol = self.tfc.preferredProtocol if pfn == None: msg = "Unable to match lfn to pfn: \n %s" % lfn raise StageOutFailure(msg, LFN=lfn, TFC=str(self.tfc)) try: impl = retrieveStageOutImpl(command) except Exception, ex: msg = "Unable to retrieve impl for local stage out:\n" msg += "Error retrieving StageOutImpl for command named: %s\n" % ( command, ) raise StageOutFailure(msg, Command=command, LFN=lfn, ExceptionDetail=str(ex))
def localStageIn(self, lfn, override=None): """ _localStageOut_ Given the lfn and local stage out params, invoke the local stage in i.e. stage in lfn to pfn if override is used the follwoing params should be defined: command - the stage out impl plugin name to be used option - the option values to be passed to that command (None is allowed) lfn-prefix - the LFN prefix to generate the PFN se-name - the Name of the SE to which the file is being xferred """ localPfn = os.path.join(os.getcwd(), os.path.basename(lfn)) if override: seName = override['se-name'] command = override['command'] options = override['option'] pfn = "%s%s" % (override['lfn-prefix'], lfn) protocol = command else: seName = self.siteCfg.localStageOut['se-name'] command = self.siteCfg.localStageOut['command'] options = self.siteCfg.localStageOut.get('option', None) pfn = self.searchTFC(lfn) protocol = self.tfc.preferredProtocol if pfn == None: msg = "Unable to match lfn to pfn: \n %s" % lfn raise StageOutFailure(msg, LFN=lfn, TFC=str(self.tfc)) try: impl = retrieveStageOutImpl(command, stagein=True) except Exception as ex: msg = "Unable to retrieve impl for local stage in:\n" msg += "Error retrieving StageOutImpl for command named: %s\n" % ( command, ) raise StageOutFailure(msg, Command=command, LFN=lfn, ExceptionDetail=str(ex)) impl.numRetries = self.numberOfRetries impl.retryPause = self.retryPauseTime try: impl(protocol, pfn, localPfn, options) except Exception as ex: msg = "Failure for local stage in:\n" msg += str(ex) raise StageOutFailure(msg, Command=command, Protocol=protocol, LFN=lfn, InputPFN=localPfn, TargetPFN=pfn) return localPfn
def localStageOut(self, lfn, localPfn, checksums): """ _localStageOut_ Given the lfn and local stage out params, invoke the local stage out """ seName = self.siteCfg.localStageOut['se-name'] pnn = self.siteCfg.localStageOut['phedex-node'] command = self.siteCfg.localStageOut['command'] options = self.siteCfg.localStageOut.get('option', None) pfn = self.searchTFC(lfn) protocol = self.tfc.preferredProtocol if pfn == None: msg = "Unable to match lfn to pfn: \n %s" % lfn raise StageOutFailure(msg, LFN=lfn, TFC=str(self.tfc)) try: impl = retrieveStageOutImpl(command) except Exception as ex: msg = "Unable to retrieve impl for local stage out:\n" msg += "Error retrieving StageOutImpl for command named: %s\n" % ( command, ) raise StageOutFailure(msg, Command=command, LFN=lfn, ExceptionDetail=str(ex)) impl.numRetries = self.numberOfRetries impl.retryPause = self.retryPauseTime try: impl(protocol, localPfn, pfn, options, checksums) except Exception as ex: msg = "Failure for local stage out:\n" msg += str(ex) try: import traceback msg += traceback.format_exc() except AttributeError as ex: msg += "Traceback unavailable\n" raise StageOutFailure(msg, Command=command, Protocol=protocol, LFN=lfn, InputPFN=localPfn, TargetPFN=pfn) return pfn
def deleteLFN(self, lfn, override=None): """ deleteLFN Given the lfn and local stage out params, invoke the delete if override is used the follwoing params should be defined: command - the stage out impl plugin name to be used option - the option values to be passed to that command (None is allowed) lfn-prefix - the LFN prefix to generate the PFN phedex-node - the Name of the PNN to which the file is being xferred """ if override: pnn = override['phedex-node'] command = override['command'] options = override['option'] pfn = "%s%s" % (override['lfn-prefix'], lfn) protocol = command else: pnn = self.siteCfg.localStageOut['phedex-node'] command = self.siteCfg.localStageOut['command'] options = self.siteCfg.localStageOut.get('option', None) pfn = self.searchTFC(lfn) protocol = self.tfc.preferredProtocol if pfn == None: msg = "Unable to match lfn to pfn: \n %s" % lfn raise StageOutFailure(msg, LFN=lfn, TFC=str(self.tfc)) return self.deletePFN(pfn, lfn, command)
def deletePFN(self, pfn, lfn, command): """ Delete the given PFN """ try: impl = retrieveStageOutImpl(command) except Exception as ex: msg = "Unable to retrieve impl for file deletion in:\n" msg += "Error retrieving StageOutImpl for command named: %s\n" % ( command, ) raise StageOutFailure(msg, Command=command, LFN=lfn, ExceptionDetail=str(ex)) impl.numRetries = self.numberOfRetries impl.retryPause = self.retryPauseTime try: impl.removeFile(pfn) except Exception as ex: self.logger.error("Failed to delete file: %s", pfn) ex.addInfo(Protocol=command, LFN=lfn, TargetPFN=pfn) raise ex return pfn
def __call__(self, fileToStage): """ _operator()_ Use call to invoke transfers """ lastException = None print "==>Working on file: %s" % fileToStage['LFN'] lfn = fileToStage['LFN'] # // # // No override => use local-stage-out from site conf #// invoke for all files and check failures/successes if not self.override: print "===> Attempting Local Stage Out." try: pfn = self.localStageOut(lfn, fileToStage['PFN'], fileToStage.get('Checksums')) fileToStage['PFN'] = pfn fileToStage['SEName'] = self.siteCfg.localStageOut['se-name'] fileToStage['StageOutCommand'] = self.siteCfg.localStageOut['command'] self.completedFiles[fileToStage['LFN']] = fileToStage print "===> Stage Out Successful: %s" % fileToStage return fileToStage except WMException, ex: lastException = ex print "===> Local Stage Out Failure for file:" print "======> %s\n" % fileToStage['LFN'] except Exception, ex: lastException = StageOutFailure("Error during local stage out", error = str(ex)) print "===> Local Stage Out Failure for file:\n" print "======> %s\n" % fileToStage['LFN']
def doTransfer(self, fromPfn, toPfn, stageOut, pnn, command, options, protocol, checksum): self.createOutputDirectory(toPfn) shutil.copy(fromPfn, toPfn) if os.path.getsize(fromPfn) != os.path.getsize(toPfn): raise StageOutFailure("Invalid file size") return toPfn
def fallbackStageOut(self, lfn, localPfn, fbParams, checksums): """ _fallbackStageOut_ Given the lfn and parameters for a fallback stage out, invoke it parameters should contain: command - the stage out impl plugin name to be used option - the option values to be passed to that command (None is allowed) lfn-prefix - the LFN prefix to generate the PFN se-name - the Name of the SE to which the file is being xferred """ pfn = "%s%s" % (fbParams['lfn-prefix'], lfn) try: impl = retrieveStageOutImpl(fbParams['command']) except Exception, ex: msg = "Unable to retrieve impl for fallback stage out:\n" msg += "Error retrieving StageOutImpl for command named: " msg += "%s\n" % fbParams['command'] raise StageOutFailure(msg, Command=fbParams['command'], LFN=lfn, ExceptionDetail=str(ex))
def doTransfer(self, sourcePFN, targetPFN, stageOut, pnn, command, options, protocol, checksum): """ performs a transfer. stageOut tells you which way to go. returns the new pfn or raises on failure. StageOutError (and inherited exceptions) are for expected errors such as temporary connection failures. Anything else will be handled as an unexpected error and skip retrying with this plugin """ original_size = os.stat(sourcePFN)[6] logging.info("Local File Size is: %s" % original_size) self.doWrapped( self.generateCommandFromPreAndPostParts( ['xrdcp', '-s3'], [sourcePFN, 'root://lxgate39.cern.ch/%s' % targetPFN], options)) p1 = Popen(["rfstat", '-recursion_depth=0', '-retry_num=0', targetPFN], stdout=PIPE) p2 = Popen(["grep", 'Size'], stdout=PIPE, stdin=p1.stdout) p3 = Popen(["cut", '-f2', '-d:'], stdout=PIPE, stdin=p2.stdout) remoteSize = p3.communicate()[0] if int(original_size) != int(remoteSize): try: self.doDelete(targetPFN, None, None, None, None) except: pass raise StageOutFailure("File sizes don't match")
def __call__(self, fileToStage): """ _operator()_ Use call to invoke transfers """ lastException = None print "==>Working on file: %s" % fileToStage['LFN'] lfn = fileToStage['LFN'] # // # // No override => use local-stage-out from site conf #// invoke for all files and check failures/successes if not self.override: print "===> Attempting Local Stage Out." try: pfn = self.localStageOut(lfn, fileToStage['PFN'], fileToStage.get('Checksums')) fileToStage['PFN'] = pfn fileToStage['SEName'] = self.siteCfg.localStageOut['se-name'] fileToStage['StageOutCommand'] = self.siteCfg.localStageOut['command'] self.completedFiles[fileToStage['LFN']] = fileToStage print "===> Stage Out Successful: %s" % fileToStage return fileToStage except WMException as ex: lastException = ex print "===> Local Stage Out Failure for file:" print "======> %s\n" % fileToStage['LFN'] except Exception as ex: lastException = StageOutFailure("Error during local stage out", error = str(ex)) print "===> Local Stage Out Failure for file:\n" print "======> %s\n" % fileToStage['LFN'] # // # // Still here => failure, start using the fallback stage outs #// If override is set, then that will be the only fallback available print "===> Attempting %s Fallback Stage Outs" % len(self.fallbacks) for fallback in self.fallbacks: try: pfn = self.fallbackStageOut(lfn, fileToStage['PFN'], fallback, fileToStage.get('Checksums')) fileToStage['PFN'] = pfn fileToStage['SEName'] = fallback['se-name'] fileToStage['StageOutCommand'] = fallback['command'] print "attempting fallback" self.completedFiles[fileToStage['LFN']] = fileToStage if lfn in self.failed: del self.failed[lfn] print "===> Stage Out Successful: %s" % fileToStage return fileToStage except Exception as ex: lastException = ex continue raise lastException
def __call__(self, fileToDelete): """ stages out a file, fileToStage is a dict with at least the LFN key the dict will be modified and returned, or an exception will be raised """ if 'LFN' not in fileToDelete: raise StageOutFailure('LFN not provided to deleteLFN') return self.deleteLFN(fileToDelete['LFN'])
def testFail(self): #first try to make a non existant file (regular) self.runMocker.runCommand( [self.commandPrepend,'dccp', '-o', '86400', '-d', '0', '-X', '-role=cmsprod', '/store/NONEXISTANTSOURCE', '/store/NONEXISTANTTARGET' ]\ ).AndReturn(("1", "This was a test of the fail system")) #then try to make a non existant file on lustre # -- fake making a directory self.runMocker.runCommand( [self.commandPrepend, 'mkdir', '-m', '755', '-p', '/store/unmerged']\ ).AndReturn(("0", "we made a directory, yay")) # -- fake the actual copy self.copyMocker.doTransfer( \ '/store/unmerged/lustre/NONEXISTANTSOURCE', '/store/unmerged/lustre/NONEXISTANTTARGET', True, None, None, None, None\ ).AndRaise(StageOutFailure("testFailure")) # do one with a real pfn self.runMocker.runCommand(\ [self.commandPrepend, 'mkdir', '-m', '755', '-p',\ '/pnfs/cms/WAX/11/store/temp/WMAgent/unmerged/RECO/WMAgentCommissioning10-v7newstageout']).AndReturn(("0","")) self.runMocker.runCommand([ self.commandPrepend, 'dccp', '-o', '86400', '-d', '0', '-X', '-role=cmsprod', 'file:///etc/hosts', 'dcap://cmsdca.fnal.gov:24037/pnfs/fnal.gov/usr/cms/WAX/11/store/temp/WMAgent/unmerged/RECO/WMAgentCommissioning10-v7newstageout/0000/0661D749-DD95-DF11-8A0F-00261894387C.root ' ]).AndReturn(("0", "")) # now try to delete it (pnfs) self.runMocker.runCommand( ['rm', '-fv', '/pnfs/cms/WAX/11/store/tmp/testfile' ]\ ).AndReturn(("1", "This was a test of the fail system")) # try to delete it (lustre) self.runMocker.runCommand( ['/bin/rm', '/lustre/unmerged/NOTAFILE']\ ).AndReturn(("1", "This was a test of the fail system")) mox.Replay(self.runMocker) mox.Replay(self.copyMocker) #ourPlugin.runCommand = runMocker.runCommand() testObject = ourPlugin() self.assertRaises(StageOutFailure, testObject.doTransfer, '/store/NONEXISTANTSOURCE', '/store/NONEXISTANTTARGET', True, None, None, None, None) self.assertRaises(StageOutFailure, testObject.doTransfer, '/store/unmerged/lustre/NONEXISTANTSOURCE', '/store/unmerged/lustre/NONEXISTANTTARGET', True, None, None, None, None) self.assertRaises( StageOutFailure, testObject.doTransfer, 'file:///etc/hosts', 'dcap://cmsdca.fnal.gov:24037/pnfs/fnal.gov/usr/cms/WAX/11/store/temp/WMAgent/unmerged/RECO/WMAgentCommissioning10-v7newstageout/0000/0661D749-DD95-DF11-8A0F-00261894387C.root ', True, None, None, None, None) testObject.doDelete('/store/tmp/testfile', None, None, None, None) testObject.doDelete('/store/unmerged/lustre/NOTAFILE', None, None, None, None) mox.Verify(self.runMocker) mox.Verify(self.copyMocker)
def fallbackStageOut(self, lfn, localPfn, fbParams, checksums): """ _fallbackStageOut_ Given the lfn and parameters for a fallback stage out, invoke it parameters should contain: command - the stage out impl plugin name to be used option - the option values to be passed to that command (None is allowed) lfn-prefix - the LFN prefix to generate the PFN phedex-node - the Name of the PNN to which the file is being xferred """ pfn = "%s%s" % (fbParams['lfn-prefix'], lfn) try: impl = retrieveStageOutImpl(fbParams['command']) except Exception as ex: msg = "Unable to retrieve impl for fallback stage out:\n" msg += "Error retrieving StageOutImpl for command named: " msg += "%s\n" % fbParams['command'] raise StageOutFailure(msg, Command=fbParams['command'], LFN=lfn, ExceptionDetail=str(ex)) impl.numRetries = self.numberOfRetries impl.retryPause = self.retryPauseTime try: impl(fbParams['command'], localPfn, pfn, fbParams.get("option", None), checksums) except Exception as ex: msg = "Failure for fallback stage out:\n" msg += str(ex) raise StageOutFailure(msg, Command=fbParams['command'], LFN=lfn, InputPFN=localPfn, TargetPFN=pfn) return pfn
def __call__(self, fileToDelete): """ _operator()_ Use call to delete a file """ print "==>Working on file: %s" % fileToDelete['LFN'] lfn = fileToDelete['LFN'] fileToDelete['SEName'] = self.seName fileToDelete['PNN'] = self.pnn deleteSuccess = False # // # // No override => use local-stage-out from site conf #// invoke for all files and check failures/successes if not self.override: print "===> Attempting To Delete." try: fileToDelete['PFN'] = self.deleteLFN(lfn) deleteSuccess = True except StageOutFailure as ex: msg = "===> Local Stage Out Failure for file:\n" msg += "======> %s\n" % fileToDelete['LFN'] msg += str(ex) print msg if not deleteSuccess and len(self.fallbacks) > 0: # // # // Still here => override start using the fallback stage outs #// If override is set, then that will be the only fallback available print "===> Attempting To Delete with Override." for fallback in self.fallbacks: if not deleteSuccess: try: fileToDelete['PFN'] = self.deleteLFN(lfn, fallback) deleteSuccess = True except StageOutFailure as ex: continue if deleteSuccess: msg = "===> Delete Successful:\n" msg += "====> LFN: %s\n" % fileToDelete['LFN'] msg += "====> PFN: %s\n" % fileToDelete['PFN'] msg += "====> SE: %s\n" % fileToDelete['SEName'] msg += "====> PNN: %s\n" % fileToDelete['PNN'] print msg return fileToDelete else: msg = "Unable to delete file:\n" msg += fileToDelete['LFN'] raise StageOutFailure(msg, **fileToDelete)
def doTransfer(self, fromPfn, toPfn, stageOut, seName, command, options, protocol, checksum): """ performs a transfer. stageOut tells you which way to go. returns the new pfn or raises on failure. StageOutError (and inherited exceptions) are for expected errors such as temporary connection failures. Anything else will be handled as an unexpected error and skip retrying with this plugin """ localFileName = fromPfn if stageOut: fromPfn2 = self.prependFileProtocol(fromPfn) toPfn2 = toPfn localFileName = fromPfn remoteFileName = toPfn else: fromPfn2 = fromPfn toPfn2 = self.prependFileProtocol(toPfn) localFileName = toPfn remoteFileName = fromPfn localDir = os.path.dirname(localFileName) if not os.path.exists(localDir): logging.info("Making local directory %s" % localDir) os.makedirs(localDir) if not options: options = "" transferCommand = "lcg-cp -b -D srmv2 --vo cms --srm-timeout 2400 --sendreceive-timeout 2400 --connect-timeout 300 --verbose %s %s %s " %\ ( options, fromPfn2, toPfn2 ) logging.info("Staging out with lcg-cp") logging.info(" commandline: %s" % transferCommand) self.runCommandFailOnNonZero(transferCommand) logging.info("Verifying file sizes") localSize = os.path.getsize(localFileName) remoteSize = subprocess.Popen( ['lcg-ls', '-l', '-b', '-D', 'srmv2', remoteFileName], stdout=subprocess.PIPE).communicate()[0] logging.info("got the following from lcg-ls %s" % remoteSize) remoteSize = remoteSize.split()[4] logging.info("Localsize: %s Remotesize: %s" % (localSize, remoteSize)) if int(localSize) != int(remoteSize): try: logging.error("Transfer failed, deleting partial file") self.doDelete(toPfn, None, None, None, None) except: pass raise StageOutFailure("File sizes don't match") return toPfn
def __call__(self, fileToDelete): """ _operator()_ Use call to delete a file """ self.logger.info("==>Working on file: %s" % fileToDelete['LFN']) lfn = fileToDelete['LFN'] fileToDelete['PNN'] = self.pnn deleteSuccess = False # // # // No override => use local-stage-out from site conf # // invoke for all files and check failures/successes if not self.override: self.logger.info("===> Attempting To Delete.") try: fileToDelete['PFN'] = self.deleteLFN(lfn) deleteSuccess = True except Exception as ex: self.logger.error( "===> Local file deletion failure. Exception:\n%s", str(ex)) if not deleteSuccess and len(self.fallbacks) > 0: # // # // Still here => override start using the fallback stage outs # // If override is set, then that will be the only fallback available self.logger.info("===> Attempting To Delete files with fallback.") for fallback in self.fallbacks: if not deleteSuccess: try: fileToDelete['PFN'] = self.deleteLFN(lfn, fallback) deleteSuccess = True except Exception as ex: continue if deleteSuccess: msg = "===> Delete Successful:\n" msg += "====> LFN: %s\n" % fileToDelete['LFN'] msg += "====> PFN: %s\n" % fileToDelete['PFN'] msg += "====> PNN: %s\n" % fileToDelete['PNN'] self.logger.info(msg) return fileToDelete else: msg = "Unable to delete file:\n" msg += fileToDelete['LFN'] raise StageOutFailure(msg, **fileToDelete)
def doTransfer(self, fromPfn, toPfn, stageOut, seName, command, options, protocol, checksum): toPfn = self.createSourceName(protocol, toPfn) fromPfn = self.createSourceName(protocol, fromPfn) (_, reportFile) = tempfile.mkstemp() ourCommand = \ self.generateCommandFromPreAndPostParts(\ ['srmcp','-report=%s'%reportFile, '-retry_num=0'], [fromPfn, toPfn], options) self.runCommandWarnOnNonZero(ourCommand) if not self.stageOut: remotePFN, localPFN = fromPfn, toPfn.replace("file://", "", 1) else: remotePFN, localPFN = toPfn, fromPfn.replace("file://", "", 1) targetPnfsPath = self.createPnfsPath(remotePFN) if _CheckExitCodeOption: p1 = Popen(["rfstat", remotePFN], stdout=PIPE) p3 = Popen(['cut', '-f3', '-d" "'], stdin=p1.stdout, stdout=PIPE) exitCode = p3.communicate()[0] if exitCode: raise StageOutError("srmcp failed! Error code: %s" % exitCode) localSize = os.path.getsize(localPFN) logging.info("Local Size %s" % localSize) # filesize() { cat "`dirname $1`/.(use)(2)(`basename $1`)'" | grep l= | sed -e's/.*;l=\([0-9]*\).*/\\1/'; } # the following replaces the above targetDirName = os.path.dirname(targetPnfsPath) targetBaseName = os.path.basename(targetPnfsPath) p1 = Popen( ["cat", "%s/.(use)(2)(%s)" % (targetDirName, targetBaseName)], stdout=PIPE) p2 = Popen(["grep", "l="], stdout=PIPE, stdin=p1.stdout) p3 = Popen(["sed", "-e's/.*;l=\([0-9]*\).*/\\1/'"], stdout=PIPE, stdin=p2.stdout) remoteSize = p3.communicate()[0] logging.info("Localsize: %s Remotesize: %s" % (localSize, remoteSize)) if int(localSize) != int(remoteSize): try: self.doDelete(toPfn, None, None, None, None) except: pass raise StageOutFailure("File sizes don't match") return toPfn
def deletePFN(self, pfn, lfn, command): """ Delete the given PFN """ try: impl = retrieveStageOutImpl(command) except Exception, ex: msg = "Unable to retrieve impl for file deletion in:\n" msg += "Error retrieving StageOutImpl for command named: %s\n" % ( command, ) raise StageOutFailure(msg, Command=command, LFN=lfn, ExceptionDetail=str(ex))
def doTransfer(self, fromPfn, toPfn, stageOut, pnn, command, options, protocol, checksum): """ performs a transfer. stageOut tells you which way to go. returns the new pfn or raises on failure. StageOutError (and inherited exceptions) are for expected errors such as temporary connection failures. Anything else will be handled as an unexpected error and skip retrying with this plugin """ localFileName = fromPfn if stageOut: fromPfn2 = self.prependFileProtocol(fromPfn) toPfn2 = toPfn localFileName = fromPfn remoteFileName = toPfn else: fromPfn2 = fromPfn toPfn2 = self.prependFileProtocol(toPfn) localFileName = toPfn remoteFileName = fromPfn localDir = os.path.dirname(localFileName) if not options: options = '' # parse the options parser = argparse.ArgumentParser() parser.add_argument('--nochecksum', action='store_true') args, unknown = parser.parse_known_args(options.split()) baseTransferCommand = "env -i X509_USER_PROXY=$X509_USER_PROXY gfal-copy -t 2400 -T 2400 -p " if args.nochecksum: transferCommand = "%s -vvv %s %s %s " % ( baseTransferCommand, ' '.join(unknown), fromPfn2, toPfn2) else: transferCommand = "%s -K adler32 -vvv %s %s %s " % ( baseTransferCommand, ' '.join(unknown), fromPfn2, toPfn2) logging.info("Staging out with gfal-copy") logging.info(" commandline: %s", transferCommand) commandExec = self.runCommandFailOnNonZero(transferCommand) if commandExec[0] != 0: try: logging.error("Transfer failed, deleting partial file") self.doDelete(toPfn, None, None, None, None) except: pass raise StageOutFailure("File transfer failed") return toPfn
def doTransfer(self, fromPfn, toPfn, stageOut, pnn, command, options, protocol, checksum): """ performs a transfer. stageOut tells you which way to go. returns the new pfn or raises on failure. StageOutError (and inherited exceptions) are for expected errors such as temporary connection failures. Anything else will be handled as an unexpected error and skip retrying with this plugin if stageOut is true: The fromPfn is the LOCAL FILE NAME on the node, without file:// the toPfn is the target PFN, mapped from the LFN using the TFC or overrrides if stageOut is false: The toPfn is the LOCAL FILE NAME on the node, without file:// the fromPfn is the target PFN, mapped from the LFN using the TFC or overrrides this behavior is because most transfer commands will switch their direction simply by swapping the order of the arguments. the stageOut flag is provided however, because sometimes you want to pass different command line args """ ourCommand = \ self.generateCommandFromPreAndPostParts(\ ["rfcp"], [fromPfn, toPfn], options) self.runCommandFailOnNonZero(ourCommand) # keeping this logic though I don't believe in it # AMM -7/13/2010 if not stageOut: remotePFN, localPFN = fromPfn, toPfn else: remotePFN, localPFN = toPfn, fromPfn localSize = os.path.getsize(localPFN) p1 = Popen(["rfstat", remotePFN], stdout=PIPE) p2 = Popen(["grep", "Size"], stdin=p1.stdout, stdout=PIPE) p3 = Popen(['cut', '-f2', '-d'], stdin=p2.stdout, stdout=PIPE) remoteSize = p3.communicate()[0] logging.info("Localsize: %s Remotesize: %s" % (localSize, remoteSize)) if int(localSize) != int(remoteSize): try: self.doDelete(toPfn, None, None, None, None) except: pass raise StageOutFailure("File sizes don't match") return toPfn
def __call__(self, **fileToStage): """ _operator()_ Use call to invoke transfers """ try: print "==>Working on file: %s" % fileToStage['LFN'] lfn = fileToStage['LFN'] # // # // No override => use local-stage-out from site conf #// invoke for all files and check failures/successes if not self.override: print "===> Attempting Local Stage In." try: pfn = self.localStageIn(lfn) fileToStage['PFN'] = pfn raise StageInSuccess except StageOutFailure as ex: msg = "===> Local Stage Out Failure for file:\n" msg += "======> %s\n" % fileToStage['LFN'] msg += str(ex) print msg # // # // Still here => override start using the fallback stage outs #// If override is set, then that will be the only fallback available print "===> Attempting %s Override Stage Outs" % len( self.fallbacks) for fallback in self.fallbacks: try: pfn = self.localStageIn(lfn, fallback) fileToStage['PFN'] = pfn raise StageInSuccess except StageOutFailure as ex: continue except StageInSuccess: msg = "===> Stage In Successful:\n" msg += "====> LFN: %s\n" % fileToStage['LFN'] msg += "====> PFN: %s\n" % fileToStage['PFN'] print msg return fileToStage msg = "Unable to stage out file:\n" msg += fileToStage['LFN'] raise StageOutFailure(msg, **fileToStage)
def doTransfer(self, fromPfn, toPfn, stageOut, pnn, command, options, protocol, checksum): toPfn = self.createSourceName(protocol, toPfn) fromPfn = self.createSourceName(protocol, fromPfn) (_, reportFile) = tempfile.mkstemp() ourCommand = \ self.generateCommandFromPreAndPostParts(\ ['srmcp','-report=%s'%reportFile, '-retry_num=0'], [fromPfn, toPfn], options) self.runCommandWarnOnNonZero(ourCommand) if not self.stageOut: remotePFN, localPFN = fromPfn, toPfn.replace("file://", "", 1) else: remotePFN, localPFN = toPfn, fromPfn.replace("file://", "", 1) targetPnfsPath = self.createPnfsPath(remotePFN) if _CheckExitCodeOption: p1 = Popen(["rfstat", remotePFN], stdout=PIPE) p3 = Popen(['cut', '-f3', '-d" "'], stdin=p1.stdout, stdout=PIPE) exitCode = p3.communicate()[0] if exitCode: raise StageOutError("srmcp failed! Error code: %s" % exitCode) localSize = os.path.getsize(localPFN) logging.info("Local Size %s" % localSize) # filesize() { `srm-get-metadata -retry_num=0 %s 2>/dev/null | grep 'size :[0-9]' | cut -f2 -d":"`} # the following replaces the above p1 = Popen(["srm-get-metadata", '-retry_num=0', remotePFN], stdout=PIPE) p2 = Popen(["grep", "size :[0-9]"], stdout=PIPE, stdin=p1.stdout) p3 = Popen(["sed", "-f2", '-d":"'], stdout=PIPE, stdin=p2.stdout) remoteSize = p3.communicate()[0] logging.info("Localsize: %s Remotesize: %s" % (localSize, remoteSize)) if int(localSize) != int(remoteSize): try: self.doDelete(toPfn, None, None, None, None) except: pass raise StageOutFailure("File sizes don't match") return toPfn
def doTransfer(self, fromPfn, toPfn, stageOut, seName, command, options, protocol, checksum): """ performs a transfer. stageOut tells you which way to go. returns the new pfn or raises on failure. StageOutError (and inherited exceptions) are for expected errors such as temporary connection failures. Anything else will be handled as an unexpected error and skip retrying with this plugin """ localFileName = fromPfn if stageOut: fromPfn2 = self.prependFileProtocol(fromPfn) toPfn2 = toPfn localFileName = fromPfn remoteFileName = toPfn else: fromPfn2 = fromPfn toPfn2 = self.prependFileProtocol(toPfn) localFileName = toPfn remoteFileName = fromPfn localDir = os.path.dirname(localFileName) if not options: options = "" transferCommand = "env -i X509_USER_PROXY=$X509_USER_PROXY gfal-copy -t 2400 -T 2400 -p -K adler32 -vvv %s %s %s " %\ (options, fromPfn2, toPfn2) logging.info("Staging out with gfal-copy") logging.info(" commandline: %s" % transferCommand) commandExec = self.runCommandFailOnNonZero(transferCommand) if commandExec[0] != 0: try: logging.error("Transfer failed, deleting partial file") self.doDelete(toPfn, None, None, None, None) except: pass raise StageOutFailure("File transfer failed") return toPfn
Command=fbParams['command'], LFN=lfn, ExceptionDetail=str(ex)) impl.numRetries = self.numberOfRetries impl.retryPause = self.retryPauseTime try: impl(fbParams['command'], localPfn, pfn, fbParams.get("option", None), checksums) except Exception, ex: msg = "Failure for fallback stage out:\n" msg += str(ex) raise StageOutFailure(msg, Command=fbParams['command'], LFN=lfn, InputPFN=localPfn, TargetPFN=pfn) return pfn def localStageOut(self, lfn, localPfn, checksums): """ _localStageOut_ Given the lfn and local stage out params, invoke the local stage out """ seName = self.siteCfg.localStageOut['se-name'] command = self.siteCfg.localStageOut['command'] options = self.siteCfg.localStageOut.get('option', None)
def __call__(self, fileToStage): """ _operator()_ Use call to invoke transfers """ lastException = "" logging.info("==>Working on file: %s", fileToStage['LFN']) lfn = fileToStage['LFN'] fileToStage['StageOutReport'] = [] # // # // No override => use local-stage-out from site conf # // invoke for all files and check failures/successes if not self.override: logging.info("===> Attempting Local Stage Out.") try: pfn = self.localStageOut(lfn, fileToStage['PFN'], fileToStage.get('Checksums')) fileToStage['PFN'] = pfn fileToStage['PNN'] = self.siteCfg.localStageOut['phedex-node'] fileToStage['StageOutCommand'] = self.siteCfg.localStageOut[ 'command'] self.completedFiles[fileToStage['LFN']] = fileToStage logging.info("===> Stage Out Successful: %s", fileToStage) fileToStage = stageoutPolicyReport(fileToStage, None, None, 'LOCAL', 0) return fileToStage except WMException as ex: lastException = ex logging.info("===> Local Stage Out Failure for file:") logging.info("======> %s\n", fileToStage['LFN']) fileToStage = stageoutPolicyReport( fileToStage, self.siteCfg.localStageOut.get('phedex-node', None), self.siteCfg.localStageOut['command'], 'LOCAL', 60311) except Exception as ex: lastException = StageOutFailure("Error during local stage out", error=str(ex)) logging.info("===> Local Stage Out Failure for file:\n") logging.info("======> %s\n", fileToStage['LFN']) fileToStage = stageoutPolicyReport( fileToStage, self.siteCfg.localStageOut.get('phedex-node', None), self.siteCfg.localStageOut['command'], 'LOCAL', 60311) # // # // Still here => failure, start using the fallback stage outs # // If override is set, then that will be the only fallback available logging.info("===> Attempting %s Fallback Stage Outs", len(self.fallbacks)) for fallback in self.fallbacks: try: pfn = self.fallbackStageOut(lfn, fileToStage['PFN'], fallback, fileToStage.get('Checksums')) fileToStage['PFN'] = pfn fileToStage['PNN'] = fallback['phedex-node'] fileToStage['StageOutCommand'] = fallback['command'] logging.info("attempting fallback") self.completedFiles[fileToStage['LFN']] = fileToStage if lfn in self.failed: del self.failed[lfn] logging.info("===> Stage Out Successful: %s", fileToStage) fileToStage = stageoutPolicyReport(fileToStage, None, None, 'FALLBACK', 0) return fileToStage except Exception as ex: fileToStage = stageoutPolicyReport( fileToStage, fallback.get('phedex-node', None), fallback['command'], 'FALLBACK', 60310) lastException = ex continue raise lastException
def doTransfer(self, fromPfn, toPfn, stageOut, pnn, command, options, protocol, checksum): toPfn = self.createSourceName(protocol, toPfn) fromPfn = self.createSourceName(protocol, fromPfn) # TODO tee the output to another file # attempt copy for x in range(self.numRetries): (_, reportFile) = tempfile.mkstemp() ourCommand = \ self.generateCommandFromPreAndPostParts(\ ['srmcp','-2','-report=%s'%reportFile, '-retry_num=0'], [fromPfn, toPfn], options) self.runCommandWarnOnNonZero(ourCommand) if not stageOut: remotePFN, localPFN = fromPfn, toPfn.replace("file://", "", 1) else: remotePFN, localPFN = toPfn, fromPfn.replace("file://", "", 1) if _CheckExitCodeOption: p1 = Popen(["cat", reportFile], stdout=PIPE) p3 = Popen(['cut', '-f3', '-d', ' '], stdin=p1.stdout, stdout=PIPE) exitCode = p3.communicate()[0].rstrip() logging.info("srmcp exit status: %s" % exitCode) p2 = Popen(['grep', '-c', 'SRM_INVALID_PATH', reportFile], stdout=PIPE) invalidPathCount = p2.communicate()[0] logging.info("got this for SRM_INVALID_PATH: %s" % invalidPathCount) if (invalidPathCount and (exitCode == '')): logging.warn( "Directory doesn't exist in srmv2 stageout...creating and retrying" ) self.createOutputDirectory(toPfn, stageOut) continue elif (str(exitCode) != "0"): logging.error("Couldn't stage out! Error code: %s" % exitCode) self.doDelete(toPfn, None, None, None, None) raise StageOutFailure("srmcp failed! Error code: %s" % exitCode) else: logging.info( "Tentatively succeeded transfer, will check metadata") break localSize = os.path.getsize(localPFN) logging.info("Local Size %s" % localSize) remotePath = None SFN = '?SFN=' sfn_idx = remotePFN.find(SFN) if sfn_idx >= 0: remotePath = remotePFN[sfn_idx + 5:] r = re.compile('srm://([A-Za-z\-\.0-9]*)(:[0-9]*)?(/.*)') m = r.match(remotePFN) if not m: raise StageOutError("Unable to determine path from PFN for " \ "target %s." % remotePFN) if remotePath == None: remotePath = m.groups()[2] remoteHost = m.groups()[0] # filesize() { `srm-get-metadata -retry_num=0 %s 2>/dev/null | grep 'size :[0-9]' | cut -f2 -d":"`} # the following replaces the above logging.info("remote path: %s" % remotePath) logging.info("remote host: %s" % remoteHost) p1 = Popen(["srmls", '-recursion_depth=0', '-retry_num=0', remotePFN], stdout=PIPE) p2 = Popen(["grep", remotePath], stdout=PIPE, stdin=p1.stdout) p3 = Popen(["grep", '-v', remoteHost], stdout=PIPE, stdin=p2.stdout) p4 = Popen(["awk", "{print $1;}"], stdout=PIPE, stdin=p3.stdout) remoteSize = p4.communicate()[0] logging.info("Localsize: %s Remotesize: %s" % (localSize, remoteSize)) if int(localSize) != int(remoteSize): try: self.doDelete(toPfn, None, None, None, None) except: pass raise StageOutFailure("File sizes don't match") return toPfn
def deleteLFN(self, lfn): """ attempts to delete a file. will raise if none of the methods work, returns details otherwise """ log.info("Beginning to delete %s" % 'lfn') retval = {} # generate list of stageout methods we will try stageOutMethods = [self.defaultMethod] stageOutMethods.extend(self.fallbacks) # loop over all the different methods. This unifies regular and fallback stuff. Nice. methodCounter = 0 for currentMethod in stageOutMethods: methodCounter += 1 (pnn, command, options, pfn, protocol) = \ self.getTransferDetails(lfn, currentMethod) retval = {'LFN': lfn, 'PFN': pfn, 'PNN': pnn} log.info("Attempting deletion method %s" % (methodCounter, )) log.info("Current method information: %s" % currentMethod) try: deleteSlave = retrieveStageOutImpl(command, useNewVersion=True) except RegistryError: deleteSlave = retrieveStageOutImpl(command, useNewVersion=False) logging.error( "Tried to load stageout backend %s, a new version isn't there yet" % command) logging.error( "Will try to fall back to the oldone, but it's really best to redo it" ) logging.error("Here goes...") deleteSlave.removeFile(pfn) return retval # do the delete. The implementation is responsible for its own verification try: deleteSlave.doDelete(pfn, pnn, command, options, protocol) except StageOutError as ex: log.info("Delete failed in an expected manner. Exception is:") log.info("%s" % str(ex)) log.info(traceback.format_exc()) if not self.firstException: self.firstException = ex continue # note to people who think it's cheeky to catch exception after ranting against it: # this makes sense because no matter what the exception, we want to keep going # additionally, it prints out the proper backtrace so we can diagnose issues # AMM - 6/30/2010 except Exception as ex: log.critical( "Delete failed in an unexpected manner. Exception is:") log.critical("%s" % str(ex)) log.info(traceback.format_exc()) if not self.firstException: self.firstException = ex continue # successful deletions make it here return retval # unseuccessful transfers make it here if self.firstException: raise self.firstException else: raise StageOutFailure("Could not delete", **retval)
def doTransfer(self, sourcePFN, targetPFN, stageOut, seName, command, options, protocol, checksum): """ performs a transfer. stageOut tells you which way to go. returns the new pfn or raises on failure. StageOutError (and inherited exceptions) are for expected errors such as temporary connection failures. Anything else will be handled as an unexpected error and skip retrying with this plugin """ targetPFN = self.createSourceName(protocol, targetPFN) sourcePFN = self.createSourceName(protocol, sourcePFN) # make directories self.createOutputDirectory(os.path.dirname(targetPFN)) if targetPFN.find('lustre') == -1: if not options: options = "" if stageOut: copyCommand = \ self.generateCommandFromPreAndPostParts(\ ["dccp", "-o", "86400", "-d", "0", "-X", "-role=cmsprod"], [sourcePFN, targetPFN], options) else: copyCommand = \ self.generateCommandFromPreAndPostParts(\ ["dccp"], [pnfsPfn(sourcePFN), targetPFN], options) logging.info("Staging out with DCCPFNAL") logging.info(" commandline: %s" % copyCommand) print "command is %s" % copyCommand (exitCode, output) = self.doWrapped(copyCommand) if exitCode: logging.error("Transfer failed") raise StageOutFailure("DCCP failed - No good") # riddle me this, the following line fails with: # not all arguments converted during string formatting #FIXME logging.info(" output from dccp: %s" % output) logging.info(" complete. #") #exit code" is %s" % exitCode) logging.info("Verifying file") (exitCode, output) = self.doWrapped([ '/opt/d-cache/dcap/bin/check_dCachefilecksum.sh', pnfsPfn(targetPFN), sourcePFN ]) if exitCode: logging.error("Checksum verify failed") try: self.doDelete(targetPFN, None, None, None, None) except: pass raise StageOutFailure("DCCP failed - No good") return targetPFN else: # looks like lustre -- do a regular CP copyGuy = retrieveStageOutImpl('cp', useNewVersion=True) return copyGuy.doTransfer(sourcePFN, targetPFN, stageOut, seName, command, options, protocol, checksum)
def executeCommand(self, command): msg = "FailImpl returns FAIL!!!" raise StageOutFailure(msg)