Python getGlobbedTotalSize示例，DIRAC.Core.Utilities.File.getGlobbedTotalSize Python示例

示例#1

0

显示文件

文件： InteractiveJobMonitor.py 项目： vfalbor/BigDataDIRAC

    def __updateSandBox( self, jobid, output ):

      jobInfo = BigDataDB.getJobIDInfo( jobid )
      result = self.sshConnect.scpCall( 100, output, output, False )

      if not result['OK']:
        self.log.error( 'Error to get the data from BigData Software DFS:', result )

      file_paths = []
      file_paths.append( output )
      outputSandbox = file_paths

      resolvedSandbox = self.__resolveOutputSandboxFiles( outputSandbox )
      if not resolvedSandbox['OK']:
        self.log.warn( 'Output sandbox file resolution failed:' )
        self.log.warn( resolvedSandbox['Message'] )
        self.__report( 'Failed', 'Resolving Output Sandbox' )
      fileList = resolvedSandbox['Value']['Files']
      missingFiles = resolvedSandbox['Value']['Missing']
      if missingFiles:
        self.jobReport.setJobParameter( 'OutputSandboxMissingFiles', ', '.join( missingFiles ), sendFlag = False )

      if fileList and jobid:
        self.outputSandboxSize = getGlobbedTotalSize( fileList )
        self.log.info( 'Attempting to upload Sandbox with limit:', self.sandboxSizeLimit )

        result = self.sandboxClient.uploadFilesAsSandboxForJob( fileList, jobid,
                                                           'Output', self.sandboxSizeLimit ) # 1024*1024*10
        if not result['OK']:
          self.log.error( 'Output sandbox upload failed with message', result['Message'] )
          if result.has_key( 'SandboxFileName' ):
            outputSandboxData = result['SandboxFileName']
            self.log.info( 'Attempting to upload %s as output data' % ( outputSandboxData ) )
            outputData.append( outputSandboxData )
            self.jobReport.setJobParameter( 'OutputSandbox', 'Sandbox uploaded to grid storage', sendFlag = False )
            self.jobReport.setJobParameter( 'OutputSandboxLFN',
                                            self.__getLFNfromOutputFile( outputSandboxData )[0], sendFlag = False )
          else:
            self.log.info( 'Could not get SandboxFileName to attempt upload to Grid storage' )
            return S_ERROR( 'Output sandbox upload failed and no file name supplied for failover to Grid storage' )
        else:
          # Do not overwrite in case of Error
          if not self.failedFlag:
            self.__report( 'Completed', 'Output Sandbox Uploaded' )
          self.log.info( 'Sandbox uploaded successfully' )

      return "OK"

示例#2

0

显示文件

文件： SandboxClient.py 项目： KrzysztofCiba/DIRAC

  def sendFiles(self,jobID,fileList,sizeLimit=0):
    """ Send files in the fileList to a Sandbox service for the given jobID.
        This is the preferable method to upload sandboxes. fileList can contain
        both files and directories
    """
    print "sendFiles: sizeLimit =", sizeLimit
    error_files = []
    files_to_send = []
    for file in fileList:

      if re.search('^lfn:',file) or re.search('^LFN:',file):
        pass
      else:
        if os.path.exists(file):
          files_to_send.append(file)
        else:
          error_files.append(file)

    if error_files:
      return S_ERROR('Failed to locate files: \n'+string.join(error_files,','))

    if sizeLimit > 0:
      # Evaluate the compressed size of the sandbox
      if getGlobbedTotalSize( files_to_send ) > sizeLimit:

        tname = 'Sandbox_'+str(jobID)+'.tar.gz'
        import tarfile
        tarFile = tarfile.open( tname, 'w:gz' )
        for file in files_to_send:
          tarFile.add( file )
        tarFile.close()

        result = S_ERROR('Size over the limit')
        result['SandboxFileName'] = tname
        return result

    sendName = str(jobID)+"::Job__Sandbox__"
    sandbox = TransferClient('WorkloadManagement/%sSandbox' % self.sandbox_type)
    result = sandbox.sendBulk(files_to_send,sendName)
    return result

示例#3

0

显示文件

    def uploadFilesAsSandbox(self, fileList, sizeLimit=0, assignTo=None):
        """ Send files in the fileList to a Sandbox service for the given jobID.
        This is the preferable method to upload sandboxes.

        a fileList item can be:
          - a string, which is an lfn name
          - a file name (real), that is supposed to be on disk, in the current directory
          - a fileObject that should be a StringIO.StringIO type of object

        Parameters:
          - assignTo : Dict containing { 'Job:<jobid>' : '<sbType>', ... }
    """
        errorFiles = []
        files2Upload = []
        if assignTo is None:
            assignTo = {}

        for key in assignTo:
            if assignTo[key] not in self.__validSandboxTypes:
                return S_ERROR("Invalid sandbox type %s" % assignTo[key])

        if not isinstance(fileList, (list, tuple)):
            return S_ERROR("fileList must be a list or tuple!")

        for sFile in fileList:
            if isinstance(sFile, basestring):
                if re.search('^lfn:', sFile, flags=re.IGNORECASE):
                    pass
                else:
                    if os.path.exists(sFile):
                        files2Upload.append(sFile)
                    else:
                        errorFiles.append(sFile)

            elif isinstance(sFile, StringIO.StringIO):
                files2Upload.append(sFile)
            else:
                return S_ERROR(
                    "Objects of type %s can't be part of InputSandbox" %
                    type(sFile))

        if errorFiles:
            return S_ERROR("Failed to locate files: %s" %
                           ", ".join(errorFiles))

        try:
            fd, tmpFilePath = tempfile.mkstemp(prefix="LDSB.")
            os.close(fd)
        except Exception as e:
            return S_ERROR("Cannot create temporary file: %s" % repr(e))

        with tarfile.open(name=tmpFilePath, mode="w|bz2") as tf:
            for sFile in files2Upload:
                if isinstance(sFile, basestring):
                    tf.add(os.path.realpath(sFile),
                           os.path.basename(sFile),
                           recursive=True)
                elif isinstance(sFile, StringIO.StringIO):
                    tarInfo = tarfile.TarInfo(name='jobDescription.xml')
                    tarInfo.size = len(sFile.buf)
                    tf.addfile(tarinfo=tarInfo, fileobj=sFile)

        if sizeLimit > 0:
            # Evaluate the compressed size of the sandbox
            if getGlobbedTotalSize(tmpFilePath) > sizeLimit:
                result = S_ERROR("Size over the limit")
                result['SandboxFileName'] = tmpFilePath
                return result

        oMD5 = hashlib.md5()
        with open(tmpFilePath, "rb") as fd:
            bData = fd.read(10240)
            while bData:
                oMD5.update(bData)
                bData = fd.read(10240)

        transferClient = self.__getTransferClient()
        result = transferClient.sendFile(
            tmpFilePath, ("%s.tar.bz2" % oMD5.hexdigest(), assignTo))
        result['SandboxFileName'] = tmpFilePath
        try:
            if result['OK']:
                os.unlink(tmpFilePath)
        except OSError:
            pass
        return result

示例#4

0

显示文件

文件： SandboxStoreClient.py 项目： DIRACGrid/DIRAC

  def uploadFilesAsSandbox(self, fileList, sizeLimit=0, assignTo=None):
    """ Send files in the fileList to a Sandbox service for the given jobID.
        This is the preferable method to upload sandboxes.

        a fileList item can be:
          - a string, which is an lfn name
          - a file name (real), that is supposed to be on disk, in the current directory
          - a fileObject that should be a StringIO.StringIO type of object

        Parameters:
          - assignTo : Dict containing { 'Job:<jobid>' : '<sbType>', ... }
    """
    errorFiles = []
    files2Upload = []
    if assignTo is None:
      assignTo = {}

    for key in assignTo:
      if assignTo[key] not in self.__validSandboxTypes:
        return S_ERROR("Invalid sandbox type %s" % assignTo[key])

    if not isinstance(fileList, (list, tuple)):
      return S_ERROR("fileList must be a list or tuple!")

    for sFile in fileList:
      if isinstance(sFile, basestring):
        if re.search('^lfn:', sFile, flags=re.IGNORECASE):
          pass
        else:
          if os.path.exists(sFile):
            files2Upload.append(sFile)
          else:
            errorFiles.append(sFile)

      elif isinstance(sFile, StringIO.StringIO):
        files2Upload.append(sFile)
      else:
        return S_ERROR("Objects of type %s can't be part of InputSandbox" % type(sFile))

    if errorFiles:
      return S_ERROR("Failed to locate files: %s" % ", ".join(errorFiles))

    try:
      fd, tmpFilePath = tempfile.mkstemp(prefix="LDSB.")
      os.close(fd)
    except Exception as e:
      return S_ERROR("Cannot create temporary file: %s" % repr(e))

    with tarfile.open(name=tmpFilePath, mode="w|bz2") as tf:
      for sFile in files2Upload:
        if isinstance(sFile, basestring):
          tf.add(os.path.realpath(sFile), os.path.basename(sFile), recursive=True)
        elif isinstance(sFile, StringIO.StringIO):
          tarInfo = tarfile.TarInfo(name='jobDescription.xml')
          tarInfo.size = len(sFile.buf)
          tf.addfile(tarinfo=tarInfo, fileobj=sFile)

    if sizeLimit > 0:
      # Evaluate the compressed size of the sandbox
      if getGlobbedTotalSize(tmpFilePath) > sizeLimit:
        result = S_ERROR("Size over the limit")
        result['SandboxFileName'] = tmpFilePath
        return result

    oMD5 = hashlib.md5()
    with open(tmpFilePath, "rb") as fd:
      bData = fd.read(10240)
      while bData:
        oMD5.update(bData)
        bData = fd.read(10240)

    transferClient = self.__getTransferClient()
    result = transferClient.sendFile(tmpFilePath, ("%s.tar.bz2" % oMD5.hexdigest(), assignTo))
    result['SandboxFileName'] = tmpFilePath
    try:
      if result['OK']:
        os.unlink(tmpFilePath)
    except OSError:
      pass
    return result

示例#5

0

显示文件

class SandboxStoreClient(object):

    __validSandboxTypes = ('Input', 'Output')
    __smdb = None

    def __init__(self, rpcClient=None, transferClient=None, **kwargs):

        self.__serviceName = "WorkloadManagement/SandboxStore"
        self.__rpcClient = rpcClient
        self.__transferClient = transferClient
        self.__kwargs = kwargs
        if SandboxStoreClient.__smdb == None:
            try:
                from DIRAC.WorkloadManagementSystem.DB.SandboxMetadataDB import SandboxMetadataDB
                SandboxStoreClient.__smdb = SandboxMetadataDB()
                result = SandboxStoreClient.__smdb._getConnection()
                if not result['OK']:
                    SandboxStoreClient.__smdb = False
                else:
                    result['Value'].close()
            except (ImportError, RuntimeError, AttributeError):
                SandboxStoreClient.__smdb = False

    def __getRPCClient(self):
        if self.__rpcClient:
            return self.__rpcClient
        else:
            return RPCClient(self.__serviceName, **self.__kwargs)

    def __getTransferClient(self):
        if self.__transferClient:
            return self.__transferClient
        else:
            return TransferClient(self.__serviceName, **self.__kwargs)

    # Upload sandbox to jobs and pilots

    def uploadFilesAsSandboxForJob(self, fileList, jobId, sbType, sizeLimit=0):
        if sbType not in self.__validSandboxTypes:
            return S_ERROR("Invalid Sandbox type %s" % sbType)
        return self.uploadFilesAsSandbox(fileList,
                                         sizeLimit,
                                         assignTo={"Job:%s" % jobId: sbType})

    def uploadFilesAsSandboxForPilot(self,
                                     fileList,
                                     jobId,
                                     sbType,
                                     sizeLimit=0):
        if sbType not in self.__validSandboxTypes:
            return S_ERROR("Invalid Sandbox type %s" % sbType)
        return self.uploadFilesAsSandbox(fileList,
                                         sizeLimit,
                                         assignTo={"Pilot:%s" % jobId: sbType})

    # Upload generic sandbox

    def uploadFilesAsSandbox(self, fileList, sizeLimit=0, assignTo={}):
        """ Send files in the fileList to a Sandbox service for the given jobID.
        This is the preferable method to upload sandboxes. fileList can contain
        both files and directories
        Parameters:
          - assignTo : Dict containing { 'Job:<jobid>' : '<sbType>', ... }
    """
        errorFiles = []
        files2Upload = []

        for key in assignTo:
            if assignTo[key] not in self.__validSandboxTypes:
                return S_ERROR("Invalid sandbox type %s" % assignTo[key])

        if type(fileList) not in (types.TupleType, types.ListType):
            return S_ERROR("fileList must be a tuple!")

        for sFile in fileList:
            if re.search('^lfn:', sFile) or re.search('^LFN:', sFile):
                pass
            else:
                if os.path.exists(sFile):
                    files2Upload.append(sFile)
                else:
                    errorFiles.append(sFile)

        if errorFiles:
            return S_ERROR("Failed to locate files: %s" %
                           ", ".join(errorFiles))

        try:
            fd, tmpFilePath = tempfile.mkstemp(prefix="LDSB.")
            os.close(fd)
        except Exception, e:
            return S_ERROR("Cannot create temporal file: %s" % str(e))

        tf = tarfile.open(name=tmpFilePath, mode="w|bz2")
        for sFile in files2Upload:
            tf.add(os.path.realpath(sFile),
                   os.path.basename(sFile),
                   recursive=True)
        tf.close()

        if sizeLimit > 0:
            # Evaluate the compressed size of the sandbox
            if getGlobbedTotalSize(tmpFilePath) > sizeLimit:
                result = S_ERROR("Size over the limit")
                result['SandboxFileName'] = tmpFilePath
                return result

        oMD5 = md5.md5()
        fd = open(tmpFilePath, "rb")
        bData = fd.read(10240)
        while bData:
            oMD5.update(bData)
            bData = fd.read(10240)
        fd.close()

        transferClient = self.__getTransferClient()
        result = transferClient.sendFile(
            tmpFilePath, ("%s.tar.bz2" % oMD5.hexdigest(), assignTo))
        result['SandboxFileName'] = tmpFilePath
        try:
            if result['OK']:
                os.unlink(tmpFilePath)
        except:
            pass
        return result

示例#6

0

显示文件

文件： BigDataJobMonitoring.py 项目： vfalbor/BigDataDIRAC

    def __updateSandBox(self, jobid, software, version, hll, hllversion, cli):
        jobInfo = BigDataDB.getJobIDInfo(jobid)

        source = (
            self.__tmpSandBoxDir
            + str(jobid)
            + "/InputSandbox"
            + str(jobid)
            + "/"
            + self.__getJobName(jobInfo[0][0]).replace(" ", "")
            + "_"
            + str(jobid)
        )
        dest = (
            self.__tmpSandBoxDir
            + str(jobid)
            + "/"
            + self.__getJobName(jobInfo[0][0]).replace(" ", "")
            + "_"
            + str(jobid)
        )
        result = 0
        if (software == "hadoop") and (version == "hdv1") and (hll == "none"):
            result = cli.getData(source, dest)
        if (software == "hadoop") and (version == "hdv2") and (hll == "none"):
            result = cli.getData(source, dest)
        if not result["OK"]:
            self.log.error("Error to get the data from BigData Software DFS:", result)

        result = cli.getdata(dest, dest)
        if not result["OK"]:
            self.log.error("Error to get the data from BigData Cluster to DIRAC:", result)

        outputSandbox = self.get_filepaths(dest)

        resolvedSandbox = self.__resolveOutputSandboxFiles(outputSandbox)
        if not resolvedSandbox["OK"]:
            self.log.warn("Output sandbox file resolution failed:")
            self.log.warn(resolvedSandbox["Message"])
            self.__report("Failed", "Resolving Output Sandbox")
        self.fileList = resolvedSandbox["Value"]["Files"]
        missingFiles = resolvedSandbox["Value"]["Missing"]
        if missingFiles:
            self.jobReport.setJobParameter("OutputSandboxMissingFiles", ", ".join(missingFiles), sendFlag=False)

        if self.fileList and jobid:
            self.outputSandboxSize = getGlobbedTotalSize(self.fileList)
            self.log.info("Attempting to upload Sandbox with limit:", self.sandboxSizeLimit)

            result = self.sandboxClient.uploadFilesAsSandboxForJob(
                self.fileList, jobid, "Output", self.sandboxSizeLimit
            )  # 1024*1024*10
            if not result["OK"]:
                self.log.error("Output sandbox upload failed with message", result["Message"])
                if result.has_key("SandboxFileName"):
                    outputSandboxData = result["SandboxFileName"]
                    self.log.info("Attempting to upload %s as output data" % (outputSandboxData))
                    outputData.append(outputSandboxData)
                    self.jobReport.setJobParameter("OutputSandbox", "Sandbox uploaded to grid storage", sendFlag=False)
                    self.jobReport.setJobParameter(
                        "OutputSandboxLFN", self.__getLFNfromOutputFile(outputSandboxData)[0], sendFlag=False
                    )
                else:
                    self.log.info("Could not get SandboxFileName to attempt upload to Grid storage")
                    return S_ERROR(
                        "Output sandbox upload failed and no file name supplied for failover to Grid storage"
                    )
            else:
                # Do not overwrite in case of Error
                if not self.failedFlag:
                    self.__report("Completed", "Output Sandbox Uploaded")
                self.log.info("Sandbox uploaded successfully")

        return "OK"

示例#7

0

显示文件

文件： BigDataJobMonitoring.py 项目： vfalbor/BigDataDIRAC

    def __updateInteractiveSandBox(self, jobid, software, version, hll, hllversion, cli):
        # Detele content of InputSandbox

        jobInfo = BigDataDB.getJobIDInfo(jobid)
        source = self.__tmpSandBoxDir + str(jobid) + "/*_out"
        dest = self.__tmpSandBoxDir + str(jobid)
        result = 0

        result = cli.delHadoopData(self.__tmpSandBoxDir + str(jobid) + "/InputSandbox" + str(jobid))
        self.log.debug("ATENTION::Deleting InputSandBox Contain:", result)

        result = cli.getdata(dest, source)
        self.log.debug("Step 0:getting data from hadoop:", result)
        if not result["OK"]:
            self.log.error("Error to get the data from BigData Cluster to DIRAC:", result)

        self.log.debug("Step:1:GetFilePaths:")
        outputSandbox = self.get_filepaths(self.__tmpSandBoxDir + str(jobid))
        self.log.debug("Step:2:OutputSandBox:", self.__tmpSandBoxDir + str(jobid))
        self.log.debug("Step:2:OutputSandBox:", outputSandbox)
        resolvedSandbox = self.__resolveOutputSandboxFiles(outputSandbox)

        self.log.debug("Step:3:ResolveSandbox:", resolvedSandbox)
        if not resolvedSandbox["OK"]:
            self.log.warn("Output sandbox file resolution failed:")
            self.log.warn(resolvedSandbox["Message"])
            self.__report("Failed", "Resolving Output Sandbox")
        self.fileList = resolvedSandbox["Value"]["Files"]
        missingFiles = resolvedSandbox["Value"]["Missing"]
        if missingFiles:
            self.jobReport.setJobParameter("OutputSandboxMissingFiles", ", ".join(missingFiles), sendFlag=False)

        if self.fileList and jobid:
            self.outputSandboxSize = getGlobbedTotalSize(self.fileList)
            self.log.info("Attempting to upload Sandbox with limit:", self.sandboxSizeLimit)

            result = self.sandboxClient.uploadFilesAsSandboxForJob(
                self.fileList, jobid, "Output", self.sandboxSizeLimit
            )  # 1024*1024*10
            if not result["OK"]:
                self.log.error("Output sandbox upload failed with message", result["Message"])
                if result.has_key("SandboxFileName"):
                    outputSandboxData = result["SandboxFileName"]
                    self.log.info("Attempting to upload %s as output data" % (outputSandboxData))
                    outputData.append(outputSandboxData)
                    self.jobReport.setJobParameter("OutputSandbox", "Sandbox uploaded to grid storage", sendFlag=False)
                    self.jobReport.setJobParameter(
                        "OutputSandboxLFN", self.__getLFNfromOutputFile(outputSandboxData)[0], sendFlag=False
                    )
                else:
                    self.log.info("Could not get SandboxFileName to attempt upload to Grid storage")
                    return S_ERROR(
                        "Output sandbox upload failed and no file name supplied for failover to Grid storage"
                    )
            else:
                # Do not overwrite in case of Error
                if not self.failedFlag:
                    self.__report("Completed", "Output Sandbox Uploaded")
                self.log.info("Sandbox uploaded successfully")

        return "OK"