def __updateSandBox( self, jobid, output ): jobInfo = BigDataDB.getJobIDInfo( jobid ) result = self.sshConnect.scpCall( 100, output, output, False ) if not result['OK']: self.log.error( 'Error to get the data from BigData Software DFS:', result ) file_paths = [] file_paths.append( output ) outputSandbox = file_paths resolvedSandbox = self.__resolveOutputSandboxFiles( outputSandbox ) if not resolvedSandbox['OK']: self.log.warn( 'Output sandbox file resolution failed:' ) self.log.warn( resolvedSandbox['Message'] ) self.__report( 'Failed', 'Resolving Output Sandbox' ) fileList = resolvedSandbox['Value']['Files'] missingFiles = resolvedSandbox['Value']['Missing'] if missingFiles: self.jobReport.setJobParameter( 'OutputSandboxMissingFiles', ', '.join( missingFiles ), sendFlag = False ) if fileList and jobid: self.outputSandboxSize = getGlobbedTotalSize( fileList ) self.log.info( 'Attempting to upload Sandbox with limit:', self.sandboxSizeLimit ) result = self.sandboxClient.uploadFilesAsSandboxForJob( fileList, jobid, 'Output', self.sandboxSizeLimit ) # 1024*1024*10 if not result['OK']: self.log.error( 'Output sandbox upload failed with message', result['Message'] ) if result.has_key( 'SandboxFileName' ): outputSandboxData = result['SandboxFileName'] self.log.info( 'Attempting to upload %s as output data' % ( outputSandboxData ) ) outputData.append( outputSandboxData ) self.jobReport.setJobParameter( 'OutputSandbox', 'Sandbox uploaded to grid storage', sendFlag = False ) self.jobReport.setJobParameter( 'OutputSandboxLFN', self.__getLFNfromOutputFile( outputSandboxData )[0], sendFlag = False ) else: self.log.info( 'Could not get SandboxFileName to attempt upload to Grid storage' ) return S_ERROR( 'Output sandbox upload failed and no file name supplied for failover to Grid storage' ) else: # Do not overwrite in case of Error if not self.failedFlag: self.__report( 'Completed', 'Output Sandbox Uploaded' ) self.log.info( 'Sandbox uploaded successfully' ) return "OK"
def sendFiles(self,jobID,fileList,sizeLimit=0): """ Send files in the fileList to a Sandbox service for the given jobID. This is the preferable method to upload sandboxes. fileList can contain both files and directories """ print "sendFiles: sizeLimit =", sizeLimit error_files = [] files_to_send = [] for file in fileList: if re.search('^lfn:',file) or re.search('^LFN:',file): pass else: if os.path.exists(file): files_to_send.append(file) else: error_files.append(file) if error_files: return S_ERROR('Failed to locate files: \n'+string.join(error_files,',')) if sizeLimit > 0: # Evaluate the compressed size of the sandbox if getGlobbedTotalSize( files_to_send ) > sizeLimit: tname = 'Sandbox_'+str(jobID)+'.tar.gz' import tarfile tarFile = tarfile.open( tname, 'w:gz' ) for file in files_to_send: tarFile.add( file ) tarFile.close() result = S_ERROR('Size over the limit') result['SandboxFileName'] = tname return result sendName = str(jobID)+"::Job__Sandbox__" sandbox = TransferClient('WorkloadManagement/%sSandbox' % self.sandbox_type) result = sandbox.sendBulk(files_to_send,sendName) return result
def uploadFilesAsSandbox(self, fileList, sizeLimit=0, assignTo=None): """ Send files in the fileList to a Sandbox service for the given jobID. This is the preferable method to upload sandboxes. a fileList item can be: - a string, which is an lfn name - a file name (real), that is supposed to be on disk, in the current directory - a fileObject that should be a StringIO.StringIO type of object Parameters: - assignTo : Dict containing { 'Job:<jobid>' : '<sbType>', ... } """ errorFiles = [] files2Upload = [] if assignTo is None: assignTo = {} for key in assignTo: if assignTo[key] not in self.__validSandboxTypes: return S_ERROR("Invalid sandbox type %s" % assignTo[key]) if not isinstance(fileList, (list, tuple)): return S_ERROR("fileList must be a list or tuple!") for sFile in fileList: if isinstance(sFile, basestring): if re.search('^lfn:', sFile, flags=re.IGNORECASE): pass else: if os.path.exists(sFile): files2Upload.append(sFile) else: errorFiles.append(sFile) elif isinstance(sFile, StringIO.StringIO): files2Upload.append(sFile) else: return S_ERROR( "Objects of type %s can't be part of InputSandbox" % type(sFile)) if errorFiles: return S_ERROR("Failed to locate files: %s" % ", ".join(errorFiles)) try: fd, tmpFilePath = tempfile.mkstemp(prefix="LDSB.") os.close(fd) except Exception as e: return S_ERROR("Cannot create temporary file: %s" % repr(e)) with tarfile.open(name=tmpFilePath, mode="w|bz2") as tf: for sFile in files2Upload: if isinstance(sFile, basestring): tf.add(os.path.realpath(sFile), os.path.basename(sFile), recursive=True) elif isinstance(sFile, StringIO.StringIO): tarInfo = tarfile.TarInfo(name='jobDescription.xml') tarInfo.size = len(sFile.buf) tf.addfile(tarinfo=tarInfo, fileobj=sFile) if sizeLimit > 0: # Evaluate the compressed size of the sandbox if getGlobbedTotalSize(tmpFilePath) > sizeLimit: result = S_ERROR("Size over the limit") result['SandboxFileName'] = tmpFilePath return result oMD5 = hashlib.md5() with open(tmpFilePath, "rb") as fd: bData = fd.read(10240) while bData: oMD5.update(bData) bData = fd.read(10240) transferClient = self.__getTransferClient() result = transferClient.sendFile( tmpFilePath, ("%s.tar.bz2" % oMD5.hexdigest(), assignTo)) result['SandboxFileName'] = tmpFilePath try: if result['OK']: os.unlink(tmpFilePath) except OSError: pass return result
def uploadFilesAsSandbox(self, fileList, sizeLimit=0, assignTo=None): """ Send files in the fileList to a Sandbox service for the given jobID. This is the preferable method to upload sandboxes. a fileList item can be: - a string, which is an lfn name - a file name (real), that is supposed to be on disk, in the current directory - a fileObject that should be a StringIO.StringIO type of object Parameters: - assignTo : Dict containing { 'Job:<jobid>' : '<sbType>', ... } """ errorFiles = [] files2Upload = [] if assignTo is None: assignTo = {} for key in assignTo: if assignTo[key] not in self.__validSandboxTypes: return S_ERROR("Invalid sandbox type %s" % assignTo[key]) if not isinstance(fileList, (list, tuple)): return S_ERROR("fileList must be a list or tuple!") for sFile in fileList: if isinstance(sFile, basestring): if re.search('^lfn:', sFile, flags=re.IGNORECASE): pass else: if os.path.exists(sFile): files2Upload.append(sFile) else: errorFiles.append(sFile) elif isinstance(sFile, StringIO.StringIO): files2Upload.append(sFile) else: return S_ERROR("Objects of type %s can't be part of InputSandbox" % type(sFile)) if errorFiles: return S_ERROR("Failed to locate files: %s" % ", ".join(errorFiles)) try: fd, tmpFilePath = tempfile.mkstemp(prefix="LDSB.") os.close(fd) except Exception as e: return S_ERROR("Cannot create temporary file: %s" % repr(e)) with tarfile.open(name=tmpFilePath, mode="w|bz2") as tf: for sFile in files2Upload: if isinstance(sFile, basestring): tf.add(os.path.realpath(sFile), os.path.basename(sFile), recursive=True) elif isinstance(sFile, StringIO.StringIO): tarInfo = tarfile.TarInfo(name='jobDescription.xml') tarInfo.size = len(sFile.buf) tf.addfile(tarinfo=tarInfo, fileobj=sFile) if sizeLimit > 0: # Evaluate the compressed size of the sandbox if getGlobbedTotalSize(tmpFilePath) > sizeLimit: result = S_ERROR("Size over the limit") result['SandboxFileName'] = tmpFilePath return result oMD5 = hashlib.md5() with open(tmpFilePath, "rb") as fd: bData = fd.read(10240) while bData: oMD5.update(bData) bData = fd.read(10240) transferClient = self.__getTransferClient() result = transferClient.sendFile(tmpFilePath, ("%s.tar.bz2" % oMD5.hexdigest(), assignTo)) result['SandboxFileName'] = tmpFilePath try: if result['OK']: os.unlink(tmpFilePath) except OSError: pass return result
class SandboxStoreClient(object): __validSandboxTypes = ('Input', 'Output') __smdb = None def __init__(self, rpcClient=None, transferClient=None, **kwargs): self.__serviceName = "WorkloadManagement/SandboxStore" self.__rpcClient = rpcClient self.__transferClient = transferClient self.__kwargs = kwargs if SandboxStoreClient.__smdb == None: try: from DIRAC.WorkloadManagementSystem.DB.SandboxMetadataDB import SandboxMetadataDB SandboxStoreClient.__smdb = SandboxMetadataDB() result = SandboxStoreClient.__smdb._getConnection() if not result['OK']: SandboxStoreClient.__smdb = False else: result['Value'].close() except (ImportError, RuntimeError, AttributeError): SandboxStoreClient.__smdb = False def __getRPCClient(self): if self.__rpcClient: return self.__rpcClient else: return RPCClient(self.__serviceName, **self.__kwargs) def __getTransferClient(self): if self.__transferClient: return self.__transferClient else: return TransferClient(self.__serviceName, **self.__kwargs) # Upload sandbox to jobs and pilots def uploadFilesAsSandboxForJob(self, fileList, jobId, sbType, sizeLimit=0): if sbType not in self.__validSandboxTypes: return S_ERROR("Invalid Sandbox type %s" % sbType) return self.uploadFilesAsSandbox(fileList, sizeLimit, assignTo={"Job:%s" % jobId: sbType}) def uploadFilesAsSandboxForPilot(self, fileList, jobId, sbType, sizeLimit=0): if sbType not in self.__validSandboxTypes: return S_ERROR("Invalid Sandbox type %s" % sbType) return self.uploadFilesAsSandbox(fileList, sizeLimit, assignTo={"Pilot:%s" % jobId: sbType}) # Upload generic sandbox def uploadFilesAsSandbox(self, fileList, sizeLimit=0, assignTo={}): """ Send files in the fileList to a Sandbox service for the given jobID. This is the preferable method to upload sandboxes. fileList can contain both files and directories Parameters: - assignTo : Dict containing { 'Job:<jobid>' : '<sbType>', ... } """ errorFiles = [] files2Upload = [] for key in assignTo: if assignTo[key] not in self.__validSandboxTypes: return S_ERROR("Invalid sandbox type %s" % assignTo[key]) if type(fileList) not in (types.TupleType, types.ListType): return S_ERROR("fileList must be a tuple!") for sFile in fileList: if re.search('^lfn:', sFile) or re.search('^LFN:', sFile): pass else: if os.path.exists(sFile): files2Upload.append(sFile) else: errorFiles.append(sFile) if errorFiles: return S_ERROR("Failed to locate files: %s" % ", ".join(errorFiles)) try: fd, tmpFilePath = tempfile.mkstemp(prefix="LDSB.") os.close(fd) except Exception, e: return S_ERROR("Cannot create temporal file: %s" % str(e)) tf = tarfile.open(name=tmpFilePath, mode="w|bz2") for sFile in files2Upload: tf.add(os.path.realpath(sFile), os.path.basename(sFile), recursive=True) tf.close() if sizeLimit > 0: # Evaluate the compressed size of the sandbox if getGlobbedTotalSize(tmpFilePath) > sizeLimit: result = S_ERROR("Size over the limit") result['SandboxFileName'] = tmpFilePath return result oMD5 = md5.md5() fd = open(tmpFilePath, "rb") bData = fd.read(10240) while bData: oMD5.update(bData) bData = fd.read(10240) fd.close() transferClient = self.__getTransferClient() result = transferClient.sendFile( tmpFilePath, ("%s.tar.bz2" % oMD5.hexdigest(), assignTo)) result['SandboxFileName'] = tmpFilePath try: if result['OK']: os.unlink(tmpFilePath) except: pass return result
def __updateSandBox(self, jobid, software, version, hll, hllversion, cli): jobInfo = BigDataDB.getJobIDInfo(jobid) source = ( self.__tmpSandBoxDir + str(jobid) + "/InputSandbox" + str(jobid) + "/" + self.__getJobName(jobInfo[0][0]).replace(" ", "") + "_" + str(jobid) ) dest = ( self.__tmpSandBoxDir + str(jobid) + "/" + self.__getJobName(jobInfo[0][0]).replace(" ", "") + "_" + str(jobid) ) result = 0 if (software == "hadoop") and (version == "hdv1") and (hll == "none"): result = cli.getData(source, dest) if (software == "hadoop") and (version == "hdv2") and (hll == "none"): result = cli.getData(source, dest) if not result["OK"]: self.log.error("Error to get the data from BigData Software DFS:", result) result = cli.getdata(dest, dest) if not result["OK"]: self.log.error("Error to get the data from BigData Cluster to DIRAC:", result) outputSandbox = self.get_filepaths(dest) resolvedSandbox = self.__resolveOutputSandboxFiles(outputSandbox) if not resolvedSandbox["OK"]: self.log.warn("Output sandbox file resolution failed:") self.log.warn(resolvedSandbox["Message"]) self.__report("Failed", "Resolving Output Sandbox") self.fileList = resolvedSandbox["Value"]["Files"] missingFiles = resolvedSandbox["Value"]["Missing"] if missingFiles: self.jobReport.setJobParameter("OutputSandboxMissingFiles", ", ".join(missingFiles), sendFlag=False) if self.fileList and jobid: self.outputSandboxSize = getGlobbedTotalSize(self.fileList) self.log.info("Attempting to upload Sandbox with limit:", self.sandboxSizeLimit) result = self.sandboxClient.uploadFilesAsSandboxForJob( self.fileList, jobid, "Output", self.sandboxSizeLimit ) # 1024*1024*10 if not result["OK"]: self.log.error("Output sandbox upload failed with message", result["Message"]) if result.has_key("SandboxFileName"): outputSandboxData = result["SandboxFileName"] self.log.info("Attempting to upload %s as output data" % (outputSandboxData)) outputData.append(outputSandboxData) self.jobReport.setJobParameter("OutputSandbox", "Sandbox uploaded to grid storage", sendFlag=False) self.jobReport.setJobParameter( "OutputSandboxLFN", self.__getLFNfromOutputFile(outputSandboxData)[0], sendFlag=False ) else: self.log.info("Could not get SandboxFileName to attempt upload to Grid storage") return S_ERROR( "Output sandbox upload failed and no file name supplied for failover to Grid storage" ) else: # Do not overwrite in case of Error if not self.failedFlag: self.__report("Completed", "Output Sandbox Uploaded") self.log.info("Sandbox uploaded successfully") return "OK"
def __updateInteractiveSandBox(self, jobid, software, version, hll, hllversion, cli): # Detele content of InputSandbox jobInfo = BigDataDB.getJobIDInfo(jobid) source = self.__tmpSandBoxDir + str(jobid) + "/*_out" dest = self.__tmpSandBoxDir + str(jobid) result = 0 result = cli.delHadoopData(self.__tmpSandBoxDir + str(jobid) + "/InputSandbox" + str(jobid)) self.log.debug("ATENTION::Deleting InputSandBox Contain:", result) result = cli.getdata(dest, source) self.log.debug("Step 0:getting data from hadoop:", result) if not result["OK"]: self.log.error("Error to get the data from BigData Cluster to DIRAC:", result) self.log.debug("Step:1:GetFilePaths:") outputSandbox = self.get_filepaths(self.__tmpSandBoxDir + str(jobid)) self.log.debug("Step:2:OutputSandBox:", self.__tmpSandBoxDir + str(jobid)) self.log.debug("Step:2:OutputSandBox:", outputSandbox) resolvedSandbox = self.__resolveOutputSandboxFiles(outputSandbox) self.log.debug("Step:3:ResolveSandbox:", resolvedSandbox) if not resolvedSandbox["OK"]: self.log.warn("Output sandbox file resolution failed:") self.log.warn(resolvedSandbox["Message"]) self.__report("Failed", "Resolving Output Sandbox") self.fileList = resolvedSandbox["Value"]["Files"] missingFiles = resolvedSandbox["Value"]["Missing"] if missingFiles: self.jobReport.setJobParameter("OutputSandboxMissingFiles", ", ".join(missingFiles), sendFlag=False) if self.fileList and jobid: self.outputSandboxSize = getGlobbedTotalSize(self.fileList) self.log.info("Attempting to upload Sandbox with limit:", self.sandboxSizeLimit) result = self.sandboxClient.uploadFilesAsSandboxForJob( self.fileList, jobid, "Output", self.sandboxSizeLimit ) # 1024*1024*10 if not result["OK"]: self.log.error("Output sandbox upload failed with message", result["Message"]) if result.has_key("SandboxFileName"): outputSandboxData = result["SandboxFileName"] self.log.info("Attempting to upload %s as output data" % (outputSandboxData)) outputData.append(outputSandboxData) self.jobReport.setJobParameter("OutputSandbox", "Sandbox uploaded to grid storage", sendFlag=False) self.jobReport.setJobParameter( "OutputSandboxLFN", self.__getLFNfromOutputFile(outputSandboxData)[0], sendFlag=False ) else: self.log.info("Could not get SandboxFileName to attempt upload to Grid storage") return S_ERROR( "Output sandbox upload failed and no file name supplied for failover to Grid storage" ) else: # Do not overwrite in case of Error if not self.failedFlag: self.__report("Completed", "Output Sandbox Uploaded") self.log.info("Sandbox uploaded successfully") return "OK"