示例#1
0
def downloadListOfFiles(dm, source_dir, dest_dir, listOfFiles, tID):
    """
    Wrapper for multithreaded downloading of a list of files
    """
    log = gLogger.getLocalSubLogger("[Thread %s] " % tID)
    threadLine = "[Thread %s]" % tID
    for filename in listOfFiles:
        sourceLFN = os.path.join(source_dir, filename)
        res = returnSingleResult(dm.getFile(sourceLFN, dest_dir + ("/" + filename).rsplit("/", 1)[0]))
        if not res["OK"]:
            log.fatal(threadLine + " Downloading " + filename + " -X- [FAILED] " + res["Message"])
            listOfFailedFiles.append("%s: %s" % (sourceLFN, res["Message"]))
        else:
            log.notice(threadLine + " Downloading " + filename + " -> [DONE]")
示例#2
0
def uploadListOfFiles(dm, source_dir, dest_dir, storage, listOfFiles, tID):
    """
    Wrapper for multithreaded uploading of a list of files
    """
    log = gLogger.getLocalSubLogger("[Thread %s] " % tID)
    threadLine = "[Thread %s]" % tID
    for filename in listOfFiles:
        destLFN = os.path.join(dest_dir, filename)
        res = returnSingleResult(dm.putAndRegister(destLFN, source_dir + "/" + filename, storage, None))
        if not res["OK"]:
            log.fatal(threadLine + " Uploading " + filename + " -X- [FAILED] " + res["Message"])
            listOfFailedFiles.append("%s: %s" % (destLFN, res["Message"]))
        else:
            log.notice(threadLine + " Uploading " + filename + " -> [DONE]")
示例#3
0
    def moveFilesToDerivedTransformation(self, transDict, resetUnused=True):
        """move files input to a transformation, to the derived one"""
        prod = transDict["TransformationID"]
        parentProd = int(transDict.get("InheritedFrom", 0))
        movedFiles = {}
        log = gLogger.getLocalSubLogger(
            "[None] [%d] .moveFilesToDerivedTransformation:" % prod)
        if not parentProd:
            log.warn("Transformation was not derived...")
            return S_OK((parentProd, movedFiles))
        # get the lfns in status Unused/MaxReset of the parent production
        res = self.getTransformationFiles(
            condDict={
                "TransformationID":
                parentProd,
                "Status": [
                    TransformationFilesStatus.UNUSED,
                    TransformationFilesStatus.MAX_RESET
                ],
            })
        if not res["OK"]:
            log.error(" Error getting Unused files from transformation",
                      "%d: %s" % (parentProd, res["Message"]))
            return res
        parentFiles = res["Value"]
        lfns = [lfnDict["LFN"] for lfnDict in parentFiles]
        if not lfns:
            log.info(" No files found to be moved from transformation",
                     "%d" % parentProd)
            return S_OK((parentProd, movedFiles))
        # get the lfns of the derived production that were Unused/MaxReset in the parent one
        res = self.getTransformationFiles(condDict={
            "TransformationID": prod,
            "LFN": lfns
        })
        if not res["OK"]:
            log.error(" Error getting files from derived transformation:",
                      res["Message"])
            return res
        derivedFiles = res["Value"]
        derivedStatusDict = dict((derivedDict["LFN"], derivedDict["Status"])
                                 for derivedDict in derivedFiles)
        newStatusFiles = {}
        parentStatusFiles = {}
        badStatusFiles = {}
        for parentDict in parentFiles:
            lfn = parentDict["LFN"]
            derivedStatus = derivedStatusDict.get(lfn)
            if derivedStatus:
                parentStatus = parentDict["Status"]
                # By default move to the parent status (which is Unused or MaxReset)
                status = parentStatus
                moveStatus = parentStatus
                # For MaxReset, set Unused if requested
                if parentStatus == TransformationFilesStatus.MAX_RESET:
                    if resetUnused:
                        status = TransformationFilesStatus.UNUSED
                        moveStatus = "Unused from MaxReset"
                    else:
                        status = "MaxReset-inherited"
                if derivedStatus.endswith("-inherited"):
                    # This is the general case
                    newStatusFiles.setdefault((status, parentStatus),
                                              []).append(lfn)
                    movedFiles[moveStatus] = movedFiles.setdefault(
                        moveStatus, 0) + 1
                else:
                    badStatusFiles[derivedStatus] = badStatusFiles.setdefault(
                        derivedStatus, 0) + 1
                if parentStatus == TransformationFilesStatus.UNUSED:
                    # If the file was Unused, set it NotProcessed in parent
                    parentStatusFiles.setdefault("NotProcessed",
                                                 []).append(lfn)
                else:
                    parentStatusFiles.setdefault("Moved", []).append(lfn)

        for status, count in badStatusFiles.items():  # can be an iterator
            log.warn(
                "Files found in an unexpected status in derived transformation",
                ": %d files in status %s" % (count, status),
            )
        # Set the status in the parent transformation first
        for status, lfnList in parentStatusFiles.items():  # can be an iterator
            for lfnChunk in breakListIntoChunks(lfnList, 5000):
                res = self.setFileStatusForTransformation(
                    parentProd, status, lfnChunk)
                if not res["OK"]:
                    log.error(
                        " Error setting status in transformation",
                        "%d: status %s for %d files - %s" %
                        (parentProd, status, len(lfnList), res["Message"]),
                    )

        # Set the status in the new transformation
        for (status, oldStatus
             ), lfnList in newStatusFiles.items():  # can be an iterator
            for lfnChunk in breakListIntoChunks(lfnList, 5000):
                res = self.setFileStatusForTransformation(
                    prod, status, lfnChunk)
                if not res["OK"]:
                    log.debug(
                        " Error setting status in transformation",
                        "%d: status %s for %d files; resetting them %s. %s" %
                        (parentProd, status, len(lfnChunk), oldStatus,
                         res["Message"]),
                    )
                    res = self.setFileStatusForTransformation(
                        parentProd, oldStatus, lfnChunk)
                    if not res["OK"]:
                        log.error(
                            " Error setting status in transformation",
                            " %d: status %s for %d files: %s" %
                            (parentProd, oldStatus, len(lfnChunk),
                             res["Message"]),
                        )
                else:
                    log.info(
                        "Successfully moved files",
                        ": %d files from %s to %s" %
                        (len(lfnChunk), oldStatus, status))

        # If files were Assigned or Unused at the time of derivation, try and update them as jobs may have run since then
        res = self.getTransformationFiles(
            condDict={
                "TransformationID":
                prod,
                "Status": [
                    TransformationFilesStatus.ASSIGNED_INHERITED,
                    TransformationFilesStatus.UNUSED_INHERITED
                ],
            })
        if res["OK"]:
            assignedFiles = res["Value"]
            if assignedFiles:
                lfns = [lfnDict["LFN"] for lfnDict in assignedFiles]
                res = self.getTransformationFiles(condDict={
                    "TransformationID": parentProd,
                    "LFN": lfns
                })
                if res["OK"]:
                    parentFiles = res["Value"]
                    processedLfns = [
                        lfnDict["LFN"] for lfnDict in parentFiles
                        if lfnDict["Status"] ==
                        TransformationFilesStatus.PROCESSED
                    ]
                    if processedLfns:
                        res = self.setFileStatusForTransformation(
                            prod,
                            TransformationFilesStatus.PROCESSED_INHERITED,
                            processedLfns)
                        if res["OK"]:
                            log.info(
                                "Successfully set files status",
                                ": %d files to status %s" %
                                (len(processedLfns), TransformationFilesStatus.
                                 PROCESSED_INHERITED),
                            )
        if not res["OK"]:
            log.error("Error setting status for Assigned derived files",
                      res["Message"])

        return S_OK((parentProd, movedFiles))
示例#4
0
def getStorageElements(vo):
    """
    Get configuration of storage elements

    :param vo: VO name that an SE supports
    :return: S_OK/S_ERROR, Value dictionary with key SE and value protocol list
    """
    log = gLogger.getLocalSubLogger("RucioSynchronizer/%s" % vo)
    seProtocols = {}
    dms = DMSHelpers(vo=vo)
    for seName in dms.getStorageElements():
        se = StorageElement(seName)
        if not se.valid:
            log.warn("Storage element is not valid.", seName)
            continue
        if vo not in se.options.get("VO", []):
            log.debug("SE is valid, but it doesn't support the VO. Skipped.",
                      "[SE: %s, VO: %s]" % (seName, vo))
            continue
        log.debug(" Processing a valid SE for VO: ",
                  "[SE:%s, VO:%s]" % (seName, vo))
        log.debug("Available SE options ", se.options)
        seProtocols[seName] = []
        all_protocols = []
        read_protocols = {}
        protocols = se.options.get("AccessProtocols")
        log.debug("Global AccessProtocols:",
                  "[VO: %s, protocols: %s]" % (vo, protocols))
        if not protocols:
            protocols = dms.getAccessProtocols()
            if not protocols:
                log.warn(
                    " No global or SE specific access protocols defined for SE ",
                    seName)
                continue
        log.debug("AccessProtocols:",
                  "[VO: %s, protocols:%s]" % (vo, protocols))
        idx = 1
        for prot in protocols:
            read_protocols[prot] = idx
            idx += 1
            if prot not in all_protocols:
                all_protocols.append(prot)
        write_protocols = {}
        protocols = se.options.get("WriteProtocols")
        if not protocols:
            if not protocols:
                protocols = dms.getWriteProtocols()
                if not protocols:
                    log.warn(
                        " No global or SE specific write protocols defined for SE ",
                        seName)
                    continue
        idx = 1
        for prot in protocols:
            write_protocols[prot] = idx
            idx += 1
            if prot not in all_protocols:
                all_protocols.append(prot)

        mapping = {
            "Protocol": "scheme",
            "Host": "hostname",
            "Port": "port",
            "Path": "prefix"
        }
        for protocol in all_protocols:
            space_token = None
            params = {
                "hostname": None,
                "scheme": None,
                "port": None,
                "prefix": None,
                "impl": "rucio.rse.protocols.gfal.Default",
                "domains": {
                    "lan": {
                        "read": 0,
                        "write": 0,
                        "delete": 0
                    },
                    "wan": {
                        "read": 0,
                        "write": 0,
                        "delete": 0,
                        "third_party_copy": 0
                    },
                },
            }
            res = se.getStorageParameters(protocol=protocol)
            if res["OK"]:
                values = res["Value"]
                for key in [
                        "Protocol", "Host", "Access", "Path", "Port", "WSUrl",
                        "SpaceToken", "WSUrl", "PluginName"
                ]:
                    value = values.get(key)
                    if key in mapping:
                        params[mapping[key]] = value
                    else:
                        if key == "SpaceToken":
                            space_token = value
                        if params["scheme"] == "srm" and key == "WSUrl":
                            params["extended_attributes"] = {
                                "web_service_path": "%s" % value,
                                "space_token": space_token,
                            }
                    if key == "Protocol":
                        params["domains"]["lan"]["read"] = read_protocols.get(
                            value, 0)
                        params["domains"]["wan"]["read"] = read_protocols.get(
                            value, 0)
                        params["domains"]["lan"][
                            "write"] = write_protocols.get(value, 0)
                        params["domains"]["wan"][
                            "write"] = write_protocols.get(value, 0)
                        params["domains"]["lan"][
                            "delete"] = write_protocols.get(value, 0)
                        params["domains"]["wan"][
                            "delete"] = write_protocols.get(value, 0)
                        params["domains"]["wan"][
                            "third_party_copy"] = write_protocols.get(
                                value, 0)
                seProtocols[seName].append(params)
    log.debug("Accepted Dirac SEs: ", seProtocols)
    return S_OK(seProtocols)
示例#5
0
def configHelper(voList):
    """
    A helper function to gather necessary Rucio client options from the CS.

    :param volist: list of VO names, or  a VO name  (str)
    :return: a dictionary of a form {vo: params, vo: params,}
    :rtype: dict
    """
    log = gLogger.getLocalSubLogger("RucioSynchronizerHelper")

    if isinstance(voList, str):
        voList = [voList]
    clientConfig = {}
    log.debug("VO list to consider for synchronization: ", voList)
    # locate RucioFileCatalog type in resources first
    result = gConfig.getSections("/Resources/FileCatalogs")
    catNames = []
    if result["OK"]:
        catalogs = result["Value"]
        log.debug("File catalogs defined in Resources", catalogs)
        for catalog in catalogs:
            result = gConfig.getOptionsDict(getCatalogPath(catalog))
            if result["OK"]:
                options = result["Value"]
                log.debug("Rucio Catalog candidate options", options)
                if options.get("Status", None) == "Active" and options.get(
                        "CatalogType", None) == "RucioFileCatalog":
                    catNames.append(catalog)
    else:
        log.error("No catalogs defined in Resources.")
        return S_ERROR("No catalogs defined in Resources.")

    log.info(
        "Active FileCatalogs candidates of type RucioFileCatalog found in Resources:",
        catNames)
    # we found (possibly more that one) candidate, now we look for it in Operations
    # to find out which one is used by which VO. There can be only one
    # Rucio catalog per VO.

    for vo in voList:
        opHelper = Operations(vo=vo)
        result = opHelper.getSections("/Services/Catalogs")
        if result["OK"]:
            catSections = set(result["Value"])
        else:
            log.warn("No Services/Catalogs section in Operations, for ",
                     "VO=%s (skipped)" % vo)
            continue

        selectedCatalog = list(catSections.intersection(catNames))

        if len(selectedCatalog) > 1:
            log.error(
                "VO %s: Services/Catalogs section mis-configured."
                " More that one Rucio file catalog",
                "[VO: %s, Catalogs: %s]" % (vo, selectedCatalog),
            )
            continue

        if not selectedCatalog:
            log.warn("VO is not using RucioFileCatalog  (VO skipped)",
                     "[VO: %s]" % vo)
            continue

        # check if the section name is in the catalog list to use.
        # if the list is not empty it has to contain the selected catalog.
        fileCatalogs = opHelper.getValue("/Services/Catalogs/CatalogList", [])

        if fileCatalogs and selectedCatalog[0] not in fileCatalogs:
            log.warn(
                "VO is not using RucioFileCatalog - it is not in the catalog list",
                "[VO: %s]" % vo)
            continue
        # now collect Rucio specific parameters for the VO
        params = {}
        result = gConfig.getOptionsDict(getCatalogPath(selectedCatalog[0]))
        if result["OK"]:
            optDict = result["Value"]
            params["rucioHost"] = optDict.get("RucioHost", None)
            params["authHost"] = optDict.get("AuthHost", None)
            params["privilegedAccount"] = optDict.get("PrivilegedAccount",
                                                      "root")
            clientConfig[vo] = params
            log.info("RSEs and users will be configured in Rucio for the VO:",
                     vo)
        else:
            log.error(result["Message"])
    return clientConfig
示例#6
0
    def _setJobStatusBulk(cls, jobID, statusDict, force=False):
        """Set various status fields for job specified by its jobId.
        Set only the last status in the JobDB, updating all the status
        logging information in the JobLoggingDB. The statusDict has datetime
        as a key and status information dictionary as values
        """
        jobID = int(jobID)
        log = gLogger.getLocalSubLogger("JobStatusBulk/Job-%d" % jobID)

        result = cls.jobDB.getJobAttributes(
            jobID, ["Status", "StartExecTime", "EndExecTime"])
        if not result["OK"]:
            return result
        if not result["Value"]:
            # if there is no matching Job it returns an empty dictionary
            return S_ERROR("No Matching Job")

        # If the current status is Stalled and we get an update, it should probably be "Running"
        currentStatus = result["Value"]["Status"]
        if currentStatus == JobStatus.STALLED:
            currentStatus = JobStatus.RUNNING
        startTime = result["Value"].get("StartExecTime")
        endTime = result["Value"].get("EndExecTime")
        # getJobAttributes only returns strings :(
        if startTime == "None":
            startTime = None
        if endTime == "None":
            endTime = None
        # Remove useless items in order to make it simpler later, although there should not be any
        for sDict in statusDict.values():
            for item in sorted(sDict):
                if not sDict[item]:
                    sDict.pop(item, None)

        # Get the latest time stamps of major status updates
        result = cls.jobLoggingDB.getWMSTimeStamps(int(jobID))
        if not result["OK"]:
            return result
        if not result["Value"]:
            return S_ERROR("No registered WMS timeStamps")
        # This is more precise than "LastTime". timeStamps is a sorted list of tuples...
        timeStamps = sorted((float(t), s) for s, t in result["Value"].items()
                            if s != "LastTime")
        lastTime = Time.toString(Time.fromEpoch(timeStamps[-1][0]))

        # Get chronological order of new updates
        updateTimes = sorted(statusDict)
        log.debug(
            "*** New call ***", "Last update time %s - Sorted new times %s" %
            (lastTime, updateTimes))
        # Get the status (if any) at the time of the first update
        newStat = ""
        firstUpdate = Time.toEpoch(Time.fromString(updateTimes[0]))
        for ts, st in timeStamps:
            if firstUpdate >= ts:
                newStat = st
        # Pick up start and end times from all updates
        for updTime in updateTimes:
            sDict = statusDict[updTime]
            newStat = sDict.get("Status", newStat)

            if not startTime and newStat == JobStatus.RUNNING:
                # Pick up the start date when the job starts running if not existing
                startTime = updTime
                log.debug("Set job start time", startTime)
            elif not endTime and newStat in JobStatus.JOB_FINAL_STATES:
                # Pick up the end time when the job is in a final status
                endTime = updTime
                log.debug("Set job end time", endTime)

        # We should only update the status to the last one if its time stamp is more recent than the last update
        attrNames = []
        attrValues = []
        if updateTimes[-1] >= lastTime:
            minor = ""
            application = ""
            # Get the last status values looping on the most recent upupdateTimes in chronological order
            for updTime in [dt for dt in updateTimes if dt >= lastTime]:
                sDict = statusDict[updTime]
                log.debug("\t",
                          "Time %s - Statuses %s" % (updTime, str(sDict)))
                status = sDict.get("Status", currentStatus)
                # evaluate the state machine if the status is changing
                if not force and status != currentStatus:
                    res = JobStatus.JobsStateMachine(
                        currentStatus).getNextState(status)
                    if not res["OK"]:
                        return res
                    newStat = res["Value"]
                    # If the JobsStateMachine does not accept the candidate, don't update
                    if newStat != status:
                        # keeping the same status
                        log.error(
                            "Job Status Error",
                            "%s can't move from %s to %s: using %s" %
                            (jobID, currentStatus, status, newStat),
                        )
                        status = newStat
                        sDict["Status"] = newStat
                        # Change the source to indicate this is not what was requested
                        source = sDict.get("Source", "")
                        sDict["Source"] = source + "(SM)"
                    # at this stage status == newStat. Set currentStatus to this new status
                    currentStatus = newStat

                minor = sDict.get("MinorStatus", minor)
                application = sDict.get("ApplicationStatus", application)

            log.debug(
                "Final statuses:",
                "status '%s', minor '%s', application '%s'" %
                (status, minor, application))
            if status:
                attrNames.append("Status")
                attrValues.append(status)
            if minor:
                attrNames.append("MinorStatus")
                attrValues.append(minor)
            if application:
                attrNames.append("ApplicationStatus")
                attrValues.append(application)
            # Here we are forcing the update as it's always updating to the last status
            result = cls.jobDB.setJobAttributes(jobID,
                                                attrNames,
                                                attrValues,
                                                update=True,
                                                force=True)
            if not result["OK"]:
                return result

        # Update start and end time if needed
        if endTime:
            result = cls.jobDB.setEndExecTime(jobID, endTime)
            if not result["OK"]:
                return result
        if startTime:
            result = cls.jobDB.setStartExecTime(jobID, startTime)
            if not result["OK"]:
                return result

        # Update the JobLoggingDB records
        heartBeatTime = None
        for updTime in updateTimes:
            sDict = statusDict[updTime]
            status = sDict.get("Status", "idem")
            minor = sDict.get("MinorStatus", "idem")
            application = sDict.get("ApplicationStatus", "idem")
            source = sDict.get("Source", "Unknown")
            result = cls.jobLoggingDB.addLoggingRecord(
                jobID,
                status=status,
                minorStatus=minor,
                applicationStatus=application,
                date=updTime,
                source=source)
            if not result["OK"]:
                return result
            # If the update comes from a job, update the heart beat time stamp with this item's stamp
            if source.startswith("Job"):
                heartBeatTime = updTime
        if heartBeatTime is not None:
            result = cls.jobDB.setHeartBeatData(
                jobID, {"HeartBeatTime": heartBeatTime})
            if not result["OK"]:
                return result

        return S_OK((attrNames, attrValues))