Exemplo n.º 1
0
class CatalogPlugInTestCase(unittest.TestCase):
    """Base class for the CatalogPlugin test case"""
    def setUp(self):
        self.fullMetadata = [
            "Status",
            "ChecksumType",
            "OwnerRole",
            "CreationDate",
            "Checksum",
            "ModificationDate",
            "OwnerDN",
            "Mode",
            "GUID",
            "Size",
        ]
        self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"]
        self.fileMetadata = self.fullMetadata + ["NumberOfLinks"]

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assertTrue(valid)
        self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin"
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in range(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assertTrue(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = "protocol://host:port/storage/path%s" % lfn
        size = 10000000
        se = "DIRAC-storage"
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {
            "PFN": pfn,
            "Size": size,
            "SE": se,
            "GUID": guid,
            "Checksum": adler
        }
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assertTrue(res["OK"])
        self.assertTrue(res["Value"])
        self.assertTrue(res["Value"]["Successful"])
        self.assertTrue(path in res["Value"]["Successful"])
        return res["Value"]["Successful"][path]

    def parseError(self, res, path):
        self.assertTrue(res["OK"])
        self.assertTrue(res["Value"])
        self.assertTrue(res["Value"]["Failed"])
        self.assertTrue(path in res["Value"]["Failed"])
        return res["Value"]["Failed"][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = list(returnValue["Files"])
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assertTrue(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {"PFN": pfn, "SE": se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()
class FileUploader(object):
    def __init__(self):
        # set up gfal
        self.__gfalctxt = gfal2.creat_context()

        # setup DIRAC file catalogue
        self.__fc = FileCatalog()

    def safe_upload(self, source_file, base_pfn):
        """uploads a file using gfal2"""
        # TODO: This function needs a retry loop, catching any of the gfal exceptions
        full_source = "file://%s" % os.path.abspath(source_file)
        gfalparams = self.__gfalctxt.transfer_parameters()
        # this hopefully enables checksum verification (hard to test ...)
        gfalparams.checksum_check = True
        fileinfo = self.__gfalctxt.stat(full_source)
        filesize = fileinfo.st_size
        filesum = self.__gfalctxt.checksum(full_source, "adler32")
        gfalparams.set_user_defined_checksum("adler32", filesum)
        full_pfn = "%s/%s" % (base_pfn, source_file)
        self.__gfalctxt.filecopy(gfalparams, full_source, full_pfn)

        return (filesize, filesum)

    def upload_files(self, source, base_pfn, base_lfn, target_se):
        """for directories: recursively searches the source directory for files and uploads them to SE
    otherwise, just uploads file"""
        if os.path.isfile(source):
            # TODO: this and the same block below should be a function
            filesize, filesum = self.safe_upload(source, base_pfn)
            full_lfn = "%s/%s" % (base_lfn, source)
            full_pfn = "%s/%s" % (base_pfn, source)
            self.registerfile(full_pfn, full_lfn, target_se, filesum, filesize)
            return
        for dirpath, subdirs, files in os.walk(source):
            for filename in files:
                source_file = os.path.join(dirpath, filename)
                print "Trying %s" % filename
                filesize, filesum = self.safe_upload(source_file, base_pfn)
                print "Uploaded: %s (%s %s)" % (filename, filesize, filesum)
                full_lfn = "%s/%s" % (base_lfn, source_file)
                full_pfn = "%s/%s" % (base_pfn, source_file)
                self.registerfile(full_pfn, full_lfn, target_se, filesum,
                                  filesize)
                # TODO: if the registering fails, the file should be deleted to avoid 'dark data' that is on disk, but not in the catalogue

    def registerfile(self, full_pfn, full_lfn, target_se, checksum, size):
        infoDict = {}
        infoDict['PFN'] = full_pfn
        infoDict['Size'] = int(size)
        infoDict['SE'] = target_se
        infoDict['GUID'] = str(uuid.uuid4())
        infoDict['Checksum'] = checksum
        fileDict = {}
        fileDict[full_lfn] = infoDict
        result = self.__fc.addFile(fileDict)
        # TODO: handle the return code as an exception
        if not result["OK"]:
            print result
            return
        if result["Value"]["Failed"]:
            print result["Value"]
        return
Exemplo n.º 3
0
class CatalogPlugInTestCase(unittest.TestCase):
    """ Base class for the CatalogPlugin test case """
    def setUp(self):
        self.fullMetadata = [
            'Status', 'ChecksumType', 'OwnerRole', 'CreationDate', 'Checksum',
            'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size'
        ]
        self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths']
        self.fileMetadata = self.fullMetadata + ['NumberOfLinks']

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assertTrue(valid)
        self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin'
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in xrange(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assertTrue(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = 'protocol://host:port/storage/path%s' % lfn
        size = 10000000
        se = 'DIRAC-storage'
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {
            'PFN': pfn,
            'Size': size,
            'SE': se,
            'GUID': guid,
            'Checksum': adler
        }
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assertTrue(res['OK'])
        self.assertTrue(res['Value'])
        self.assertTrue(res['Value']['Successful'])
        self.assertTrue(path in res['Value']['Successful'])
        return res['Value']['Successful'][path]

    def parseError(self, res, path):
        self.assertTrue(res['OK'])
        self.assertTrue(res['Value'])
        self.assertTrue(res['Value']['Failed'])
        self.assertTrue(path in res['Value']['Failed'])
        return res['Value']['Failed'][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = returnValue['Files'].keys()
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assertTrue(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {'PFN': pfn, 'SE': se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()
Exemplo n.º 4
0
class FileUploader(object):

  def __init__(self):
    # set up gfal
    self.__gfalctxt = gfal2.creat_context()

    # setup DIRAC file catalogue
    self.__fc = FileCatalog()


  def safe_upload(self, source_file, base_pfn):
    """uploads a file using gfal2"""
    # TODO: This function needs a retry loop, catching any of the gfal exceptions
    full_source = "file://%s" % os.path.abspath(source_file)
    gfalparams = self.__gfalctxt.transfer_parameters()
    # this hopefully enables checksum verification (hard to test ...)
    gfalparams.checksum_check = True
    fileinfo = self.__gfalctxt.stat(full_source)
    filesize = fileinfo.st_size
    filesum = self.__gfalctxt.checksum(full_source, "adler32")
    gfalparams.set_user_defined_checksum("adler32", filesum)
    full_pfn = "%s/%s" % (base_pfn, source_file)
    self.__gfalctxt.filecopy(gfalparams, full_source, full_pfn) 

    return (filesize, filesum)


  def upload_files(self, source, base_pfn, base_lfn, target_se):
    """for directories: recursively searches the source directory for files and uploads them to SE
    otherwise, just uploads file"""
    if os.path.isfile(source):
      # TODO: this and the same block below should be a function
      filesize, filesum = self.safe_upload(source, base_pfn)
      full_lfn = "%s/%s" %(base_lfn, source)
      full_pfn = "%s/%s" %(base_pfn, source)
      self.registerfile(full_pfn, full_lfn, target_se, filesum, filesize)
      return 
    for dirpath, subdirs, files in os.walk(source):
      for filename in files:
        source_file = os.path.join(dirpath, filename)
        print "Trying %s" % filename
        filesize, filesum = self.safe_upload(source_file, base_pfn)
        print "Uploaded: %s (%s %s)" % (filename, filesize, filesum)
        full_lfn = "%s/%s" %(base_lfn, source_file)
        full_pfn = "%s/%s" %(base_pfn, source_file)
        self.registerfile(full_pfn, full_lfn, target_se, filesum, filesize)
        # TODO: if the registering fails, the file should be deleted to avoid 'dark data' that is on disk, but not in the catalogue

  def registerfile(self, full_pfn, full_lfn, target_se, checksum, size):
    infoDict = {}
    infoDict['PFN'] = full_pfn
    infoDict['Size'] = int(size)
    infoDict['SE'] = target_se
    infoDict['GUID'] = str(uuid.uuid4())
    infoDict['Checksum'] = checksum   
    fileDict = {}
    fileDict[full_lfn] = infoDict
    result = self.__fc.addFile(fileDict) 
    # TODO: handle the return code as an exception 
    if not result["OK"]:
      print result
      return
    if result["Value"]["Failed"]:
      print result["Value"]
    return
Exemplo n.º 5
0
for line in lines:
    line = line.strip()
    bits = line.split(' ')
    filename = bits[0]

    if not registering:
        if filename == firstfile:
            registering = True

    if registering:
        if bits[1] == "ERROR!":
            print("Skipping file", filename, "due to upload error")
        else:
            size = int(bits[1])
            print("Registering file", filename, "with size", size)
            lfn = LFNBASE + filename

            infoDict = {}
            infoDict['PFN'] = PFNBASE + filename
            infoDict['Size'] = size
            infoDict['SE'] = SE
            infoDict['Checksum'] = ''
            infoDict['GUID'] = str(uuid.uuid4())

            fileDict = {}
            fileDict[lfn] = infoDict

            result = fc.addFile(fileDict)
            print("Result:", result)
Exemplo n.º 6
0
class CatalogPlugInTestCase(unittest.TestCase):
  """ Base class for the CatalogPlugin test case """

  def setUp(self):
    self.fullMetadata = ['Status', 'CheckSumType', 'OwnerRole', 'CreationDate', 'Checksum', 'ModificationDate', 'OwnerDN', 'Mode', 'GUID', 'Size']
    self.dirMetadata = self.fullMetadata + ['NumberOfSubPaths']
    self.fileMetadata = self.fullMetadata + ['NumberOfLinks']

    self.catalog = FileCatalog(catalogs=[catalogClientToTest])
    valid = self.catalog.isOK()
    self.assert_(valid)
    self.destDir = '/lhcb/test/unit-test/TestCatalogPlugin'
    self.link = "%s/link" % self.destDir

    # Clean the existing directory
    self.cleanDirectory()
    res = self.catalog.createDirectory(self.destDir)
    returnValue = self.parseResult(res,self.destDir)

    # Register some files to work with
    self.numberOfFiles = 2
    self.files = []
    for i in range(self.numberOfFiles):
      lfn = "%s/testFile_%d" % (self.destDir,i)
      res = self.registerFile(lfn)
      self.assert_(res)
      self.files.append(lfn)

  def registerFile(self,lfn):
    pfn = 'protocol://host:port/storage/path%s' % lfn
    size = 10000000
    se = 'DIRAC-storage'
    guid = makeGuid()
    adler = stringAdler(guid)
    fileDict = {}
    fileDict[lfn] = {'PFN':pfn,'Size':size,'SE':se,'GUID':guid,'Checksum':adler}
    res = self.catalog.addFile(fileDict)
    return self.parseResult(res,lfn)

  def parseResult(self,res,path):
    self.assert_(res['OK'])
    self.assert_(res['Value'])
    self.assert_(res['Value']['Successful'])
    self.assert_(res['Value']['Successful'].has_key(path))
    return res['Value']['Successful'][path]

  def parseError(self,res,path):
    self.assert_(res['OK'])
    self.assert_(res['Value'])
    self.assert_(res['Value']['Failed'])
    self.assert_(res['Value']['Failed'].has_key(path))
    return res['Value']['Failed'][path]    

  def cleanDirectory(self):
    res = self.catalog.exists(self.destDir)
    returnValue = self.parseResult(res,self.destDir)
    if not returnValue:
      return
    res = self.catalog.listDirectory(self.destDir)  
    returnValue = self.parseResult(res,self.destDir)
    toRemove = returnValue['Files'].keys()
    if toRemove:
      self.purgeFiles(toRemove)
    res = self.catalog.removeDirectory(self.destDir)
    returnValue = self.parseResult(res,self.destDir)
    self.assert_(returnValue)

  def purgeFiles(self,lfns):
    for lfn in lfns:
      res = self.catalog.getReplicas(lfn,True)
      replicas = self.parseResult(res,lfn)
      for se,pfn in replicas.items():
        repDict = {}
        repDict[lfn] = {'PFN':pfn,'SE':se}
        res = self.catalog.removeReplica(repDict)
        self.parseResult(res,lfn)   
      res = self.catalog.removeFile(lfn)
      self.parseResult(res,lfn)

  def tearDown(self):
    self.cleanDirectory()
Exemplo n.º 7
0
class RAWIntegrityAgent(AgentModule):
  """
  .. class:: RAWIntegirtyAgent

  :param RAWIntegrityDB rawIntegrityDB: RAWIntegrityDB instance
  :param str gatewayUrl: URL to online RequestClient
  """

  def __init__(self, *args, **kwargs):
    """ c'tor
    """

    AgentModule.__init__(self, *args, **kwargs)

    self.rawIntegrityDB = None
    self.fileCatalog = None
    self.onlineRequestMgr = None

  def initialize(self):
    """ agent initialisation """

    self.rawIntegrityDB = RAWIntegrityDB()

    # The file catalog is used to register file once it has been transfered
    # But we want to register it in all the catalogs except the RAWIntegrityDB
    # otherwise it is register twice
    # We also remove the BK catalog because some files are not registered there
    # (detector calibration files for example). The real data are registered in
    # the bookeeping by the DataMover
    self.fileCatalog = FileCatalog(catalogs = 'FileCatalog')

    gMonitor.registerActivity("Iteration", "Agent Loops/min", "RAWIntegriryAgent", "Loops",
                              gMonitor.OP_SUM)
    gMonitor.registerActivity("WaitingFiles", "Files waiting for migration", "RAWIntegriryAgent",
                              "Files", gMonitor.OP_MEAN)
    gMonitor.registerActivity("WaitSize", "Size of migration buffer", "RAWIntegrityAgent", "GB",
                              gMonitor.OP_MEAN)

    gMonitor.registerActivity("NewlyMigrated", "Newly migrated files", "RAWIntegriryAgent", "Files",
                              gMonitor.OP_SUM)
    gMonitor.registerActivity("TotMigrated", "Total migrated files", "RAWIntegriryAgent", "Files",
                              gMonitor.OP_ACUM)
    gMonitor.registerActivity("TotMigratedSize", "Total migrated file size", "RAWIntegriryAgent",
                              "GB", gMonitor.OP_ACUM)

    gMonitor.registerActivity("BadChecksum", "Checksums mismatch", "RAWIntegriryAgent", "Files",
                              gMonitor.OP_SUM)
    gMonitor.registerActivity("ErrorMetadata", "Error when getting files metadata",
                              "RAWIntegriryAgent", "Files", gMonitor.OP_SUM)
    gMonitor.registerActivity("ErrorRegister", "Error when registering files", "RAWIntegriryAgent",
                              "Files", gMonitor.OP_SUM)
    gMonitor.registerActivity("ErrorRemove", "Error when removing files", "RAWIntegriryAgent",
                              "Files", gMonitor.OP_SUM)

    gMonitor.registerActivity("FailedMigrated",
                              "Number of files encountering error during migration",
                              "RAWIntegriryAgent", "Files", gMonitor.OP_SUM)
    gMonitor.registerActivity("TotFailMigrated",
                              "Total number of files encountering error during migration",
                              "RAWIntegriryAgent", "Files", gMonitor.OP_ACUM)

    gMonitor.registerActivity("MigrationTime", "Average migration time", "RAWIntegriryAgent",
                              "Seconds", gMonitor.OP_MEAN)
    # gMonitor.registerActivity("TimeInQueue", "Average current wait for migration",
    #                           "RAWIntegriryAgent", "Minutes", gMonitor.OP_MEAN)
    gMonitor.registerActivity("MigrationRate", "Observed migration rate", "RAWIntegriryAgent",
                              "MB/s", gMonitor.OP_MEAN)

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption('shifterProxy', 'DataProcessing')

    return S_OK()

  def _checkMigrationStatus(self, filesMetadata, lfnsMetadata):
    """ Check that the lfns in parameters are properly migrated,
        and compares the checksum between castor and the RAWIntegrityDB

        :param filesMetadata: dict {lfn: se metadata}
        :param lfnsMetadata: dict {lfn: metadata in RAWIntegrityDB}

        :returns: True/False in successful, Failed in case of problem
    """
    ############################################################
    #
    # Determine the files that have been newly migrated and their success
    #

    successful = {}
    failed = {}
    for lfn, seMetadata in filesMetadata.iteritems():
      isMigrated = seMetadata.get('Migrated', False)
      # If it is not migrated, go to the next one
      if not isMigrated:
        successful[lfn] = False
        continue
      else:
        self.log.info("%s is copied." % lfn)
        castorChecksum = seMetadata['Checksum']
        onlineChecksum = lfnsMetadata[lfn]['Checksum']
        if castorChecksum.lower().lstrip('0') == onlineChecksum.lower().lstrip('0').lstrip('x'):
          self.log.info("%s migrated checksum match." % lfn)
          successful[lfn] = True
        else:
          errStr = "Migrated checksum mis-match.", "%s %s %s" % (
              lfn, castorChecksum.lstrip('0'), onlineChecksum.lstrip('0').lstrip('x'))
          self.log.error(errStr)
          failed[lfn] = errStr

    return S_OK({'Successful': successful, 'Failed': failed})

  def getNewlyCopiedFiles(self, activeFiles):
    """
        Get the list of files newly copied and those not yet copied.

        :param activeFiles: dict {lfn:RawIntegrityDB metadata} for files in Active status

        :return: tuple filesNewlyCopied, filesNotYetCopied (lfns lists)
    """
    # This is a list of all the lfns that we will have newly copied
    filesNewlyCopied = []
    # This are the lfns that are not yet copied
    filesNotYetCopied = []

    self.log.info("Obtaining physical file metadata.")
    # Group the lfns by SEs
    seLfns = {}
    for lfn, metadataDict in activeFiles.iteritems():
      se = metadataDict['SE']
      seLfns.setdefault(se, []).append(lfn)

    for se in sorted(seLfns):
      lfnList = seLfns[se]
      failedMetadata = {}
      successfulMetadata = {}
      res = StorageElement(se).getFileMetadata(lfnList)
      if not res['OK']:
        errStr = "Failed to obtain physical file metadata."
        self.log.error(errStr, res['Message'])
        failedMetadata = dict((lfn, errStr) for lfn in lfnList)
      else:
        successfulMetadata = res['Value']['Successful']
        failedMetadata = res['Value']['Failed']

      if failedMetadata:
        self.log.info("Failed to obtain physical file metadata for %s files." % len(failedMetadata))
        gMonitor.addMark('ErrorMetadata', len(failedMetadata))

      if successfulMetadata:
        self.log.info("Obtained physical file metadata for %s files." % len(successfulMetadata))

        ############################################################
        #
        # Determine the files that have been newly migrated and their success
        #
        res = self._checkMigrationStatus(successfulMetadata, activeFiles)
        if not res['OK']:
          self.log.error("Error when checking migration status", res)
          gMonitor.addMark("BadChecksum", len(successfulMetadata))
        else:
          succCompare = res['Value']['Successful']
          failedCompare = res['Value']['Failed']
          seFilesCopied = []
          seFilesNotCopied = []
          # The copied files are those in True in the successful dictionary
          for lfn, isCopied in succCompare.iteritems():
            if isCopied:
              seFilesCopied.append(lfn)
            else:
              seFilesNotCopied.append(lfn)

          filesNewlyCopied.extend(seFilesCopied)
          filesNotYetCopied.extend(seFilesNotCopied)

          gMonitor.addMark("BadChecksum", len(failedCompare))

          self.log.info("%s files newly copied at %s." % (len(seFilesCopied), se))
          self.log.info("Found %s checksum mis-matches at %s." % (len(failedCompare), se))

    return filesNewlyCopied, filesNotYetCopied

  def registerCopiedFiles(self, filesNewlyCopied, copiedFiles, allUnmigratedFilesMeta):
    """
      Register successfuly copied files (newly, or in Copied status in the DB) in the DFC.

      :param filesNewlyCopied: [lfns] of files newly copied
      :param copiedFiles: {lfn:RIDb metadata} of files that were in Copied state.
      :param allUnmigratedFilesMeta: {lfn:RI Db metadata} for all lfns non migrated at
                                    the beginning of the loop.

      :return: {lfn:True} for successfuly registered lfns
    """
    if filesNewlyCopied or copiedFiles:
      self.log.info("Attempting to register %s newly copied and %s previously copied files" %
                    (len(filesNewlyCopied), len(copiedFiles)))
    else:
      self.log.info("No files to be registered")

    # Update copiedFiles to also contain the newly copied files
    copiedFiles.update(dict((lfn, allUnmigratedFilesMeta[lfn]) for lfn in filesNewlyCopied))

    successfulRegister = {}
    failedRegister = {}

    # Try to register them by batch
    for lfnChunk in breakListIntoChunks(copiedFiles, 100):
      # Add the metadata
      lfnDictChuck = dict((lfn, copiedFiles[lfn]) for lfn in lfnChunk)
      res = self.fileCatalog.addFile(lfnDictChuck)

      if not res['OK']:
        self.log.error("Completely failed to register some successfully copied file.",
                       res['Message'])
        failedRegister.update(dict((lfn, res['Message']) for lfn in lfnDictChuck))
      else:
        successfulRegister.update(res['Value']['Successful'])
        failedRegister.update(res['Value']['Failed'])

    gMonitor.addMark("ErrorRegister", len(failedRegister))
    for lfn, reason in failedRegister.iteritems():
      self.log.error("Failed to register lfn. Setting to Copied", "%s: %s" % (lfn, reason))
      res = self.rawIntegrityDB.setFileStatus(lfn, 'Copied')
      if not res['OK']:
        self.log.error("Error setting file status to Copied", "%s: %s" % (lfn, res['Message']))

    for lfn in successfulRegister:
      self.log.info("Successfully registered %s in the File Catalog." % lfn)

    return successfulRegister

  def removeRegisteredFiles(self, filesNewlyRegistered, registeredFiles, allUnmigratedFilesMeta):
    """
      Remove successfuly registered files (newly, or in Registered status in the DB)
      from the OnlineStorage

      :param filesNewlyCopied: [lfns] of files newly copied
      :param copiedFiles: {lfn:RIDb metadata} of files that were in Copied state.
      :param allUnmigratedFilesMeta: {lfn:RI Db metadata} for all lfns non migrated at
                                    the beginning of the loop.

      :return: {lfn:True} for successfuly registered lfns
    """
    if filesNewlyRegistered or registeredFiles:
      self.log.info("Attempting to remove %s newly registered and %s previously registered files" %
                    (len(filesNewlyRegistered), len(registeredFiles)))
    else:
      self.log.info("No files to be removed")

    # Update registeredFiles to also contain the newly registered files
    registeredFiles.update(dict((lfn, allUnmigratedFilesMeta[lfn]) for lfn in filesNewlyRegistered))

    onlineSE = StorageElement('OnlineRunDB')

    # Try to them them all
    res = onlineSE.removeFile(registeredFiles)

    filesNewlyRemoved = {}
    failedRemove = {}
    if not res['OK']:
      self.log.error("Completely failed to remove successfully registered files.", res['Message'])
      failedRemove = dict((lfn, res['Message']) for lfn in registeredFiles)
    else:
      filesNewlyRemoved = res['Value']['Successful']
      failedRemove = res['Value']['Failed']

    gMonitor.addMark("ErrorRemove", len(failedRemove))
    for lfn, reason in failedRemove.iteritems():
      self.log.error("Failed to remove lfn. Setting to Registered", "%s: %s" % (lfn, reason))
      res = self.rawIntegrityDB.setFileStatus(lfn, 'Registered')
      if not res['OK']:
        self.log.error("Error setting file status to Registered", "%s: %s" % (lfn, res['Message']))

    now = datetime.datetime.utcnow()
    for lfn in filesNewlyRemoved:
      self.log.info("Successfully removed %s from the Online storage. Setting it to Done" % lfn)
      res = self.rawIntegrityDB.setFileStatus(lfn, 'Done')
      if not res['OK']:
        self.log.error("Error setting file status to Done", "%s: %s" % (lfn, res['Message']))
      else:
        # SubmitTime is ALREADY a datetime since it is declared as such in the DB.
        submitTime = allUnmigratedFilesMeta[lfn]['SubmitTime']
        migrationTime = (now - submitTime).total_seconds()
        gMonitor.addMark("MigrationTime", migrationTime)
        fileSizeMB = allUnmigratedFilesMeta[lfn]['Size'] / (1024 * 1024.0)
        gMonitor.addMark("MigrationRate", fileSizeMB / migrationTime)

    return filesNewlyRemoved

  def execute(self):
    """ execution in one cycle

    """

    # Don't use the server certificate otherwise the DFC wont let us write
    gConfigurationData.setOptionInCFG('/DIRAC/Security/UseServerCertificate', 'false')

    gMonitor.addMark("Iteration", 1)

    ############################################################
    #
    # Obtain the files which have not yet been migrated
    #
    self.log.info("Obtaining un-migrated files.")
    res = self.rawIntegrityDB.getUnmigratedFiles()
    if not res['OK']:
      errStr = "Failed to obtain un-migrated files."
      self.log.error(errStr, res['Message'])
      return S_OK()

    # allUnmigratedFilesMeta contain all the files that are not yet
    # migrated (not copied, not registered, not removed), as well as their metadata
    allUnmigratedFilesMeta = res['Value']
    self.log.info("Obtained %s un-migrated files." % len(allUnmigratedFilesMeta))
    if not allUnmigratedFilesMeta:
      return S_OK()

    # activeFiles contains the files that are not yet copied
    activeFiles = {}
    # copiedFiles contains files that are copied but not yet registered
    copiedFiles = {}
    # registeredFiles contains files that are copied, registered, but not removed from source
    registeredFiles = {}

    # Assign them
    for lfn, lfnMetadata in allUnmigratedFilesMeta.iteritems():
      status = lfnMetadata.pop('Status')
      if status == 'Active':
        activeFiles[lfn] = lfnMetadata
      elif status == 'Copied':
        copiedFiles[lfn] = lfnMetadata
      elif status == 'Registered':
        registeredFiles[lfn] = lfnMetadata

    gMonitor.addMark("WaitingFiles", len(activeFiles))
    totalSize = 0
    for lfn, fileDict in activeFiles.iteritems():
      totalSize += int(fileDict['Size'])
      # gMonitor.addMark("TimeInQueue", (fileDict['WaitTime'] / 60))
    gMonitor.addMark("WaitSize", (totalSize / (1024 * 1024 * 1024.0)))

    ############################################################
    #
    # Checking newly copied files
    #

    # Get the list of lfns properly copied and not copied
    filesNewlyCopied, filesNotYetCopied = self.getNewlyCopiedFiles(activeFiles)

    ####################################################
    #
    # Registering copied files
    #
    ####################################################

    filesNewlyRegistered = self.registerCopiedFiles(filesNewlyCopied, copiedFiles,
                                                    allUnmigratedFilesMeta)

    ####################################################
    #
    # Performing the removal from the online storage
    #
    ####################################################
    filesNewlyRemoved = self.removeRegisteredFiles(filesNewlyRegistered, registeredFiles,
                                                   allUnmigratedFilesMeta)

    # Doing some accounting

    migratedSize = sum(allUnmigratedFilesMeta[lfn]['Size'] for lfn in filesNewlyRemoved)

    # The number of files that we failed at migrating
    # is the number of files at the beginning, minus the one we processed completely
    # minus those that are not yet copied
    failedMigrated = len(allUnmigratedFilesMeta) - len(filesNewlyRemoved) - len(filesNotYetCopied)

    res = self.rawIntegrityDB.setLastMonitorTime()
    migratedSizeGB = migratedSize / (1024 * 1024 * 1024.0)
    gMonitor.addMark("TotMigratedSize", migratedSizeGB)
    gMonitor.addMark("NewlyMigrated", len(filesNewlyRemoved))
    gMonitor.addMark("TotMigrated", len(filesNewlyRemoved))
    gMonitor.addMark("FailedMigrated", failedMigrated)
    gMonitor.addMark("TotFailMigrated", failedMigrated)

    return S_OK()
Exemplo n.º 8
0
class CatalogPlugInTestCase(unittest.TestCase):
    """ Base class for the CatalogPlugin test case """

    def setUp(self):
        self.fullMetadata = [
            "Status",
            "ChecksumType",
            "OwnerRole",
            "CreationDate",
            "Checksum",
            "ModificationDate",
            "OwnerDN",
            "Mode",
            "GUID",
            "Size",
        ]
        self.dirMetadata = self.fullMetadata + ["NumberOfSubPaths"]
        self.fileMetadata = self.fullMetadata + ["NumberOfLinks"]

        self.catalog = FileCatalog(catalogs=[catalogClientToTest])
        valid = self.catalog.isOK()
        self.assert_(valid)
        self.destDir = "/lhcb/test/unit-test/TestCatalogPlugin"
        self.link = "%s/link" % self.destDir

        # Clean the existing directory
        self.cleanDirectory()
        res = self.catalog.createDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)

        # Register some files to work with
        self.numberOfFiles = 2
        self.files = []
        for i in range(self.numberOfFiles):
            lfn = "%s/testFile_%d" % (self.destDir, i)
            res = self.registerFile(lfn)
            self.assert_(res)
            self.files.append(lfn)

    def registerFile(self, lfn):
        pfn = "protocol://host:port/storage/path%s" % lfn
        size = 10000000
        se = "DIRAC-storage"
        guid = makeGuid()
        adler = stringAdler(guid)
        fileDict = {}
        fileDict[lfn] = {"PFN": pfn, "Size": size, "SE": se, "GUID": guid, "Checksum": adler}
        res = self.catalog.addFile(fileDict)
        return self.parseResult(res, lfn)

    def parseResult(self, res, path):
        self.assert_(res["OK"])
        self.assert_(res["Value"])
        self.assert_(res["Value"]["Successful"])
        self.assert_(res["Value"]["Successful"].has_key(path))
        return res["Value"]["Successful"][path]

    def parseError(self, res, path):
        self.assert_(res["OK"])
        self.assert_(res["Value"])
        self.assert_(res["Value"]["Failed"])
        self.assert_(res["Value"]["Failed"].has_key(path))
        return res["Value"]["Failed"][path]

    def cleanDirectory(self):
        res = self.catalog.exists(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        if not returnValue:
            return
        res = self.catalog.listDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        toRemove = returnValue["Files"].keys()
        if toRemove:
            self.purgeFiles(toRemove)
        res = self.catalog.removeDirectory(self.destDir)
        returnValue = self.parseResult(res, self.destDir)
        self.assert_(returnValue)

    def purgeFiles(self, lfns):
        for lfn in lfns:
            res = self.catalog.getReplicas(lfn, True)
            replicas = self.parseResult(res, lfn)
            for se, pfn in replicas.items():
                repDict = {}
                repDict[lfn] = {"PFN": pfn, "SE": se}
                res = self.catalog.removeReplica(repDict)
                self.parseResult(res, lfn)
            res = self.catalog.removeFile(lfn)
            self.parseResult(res, lfn)

    def tearDown(self):
        self.cleanDirectory()