示例#1
0
    def _monitorJobCallback(returnedValue):
        """Callback when a job has been monitored
        :param returnedValue: value returned by the _monitorJob method
                              (ftsJob, standard dirac return struct)
        """

        ftsJob, res = returnedValue
        log = gLogger.getLocalSubLogger("_monitorJobCallback/%s" % ftsJob.jobID)
        if not res["OK"]:
            log.error("Error updating job status", res)
        else:
            log.debug("Successfully updated job status")
示例#2
0
    def _treatOperationCallback(returnedValue):
        """Callback when an operation has been treated

        :param returnedValue: value returned by the _treatOperation method
                              (ftsOperation, standard dirac return struct)
        """

        operation, res = returnedValue
        log = gLogger.getLocalSubLogger("_treatOperationCallback/%s" % operation.operationID)
        if not res["OK"]:
            log.error("Error treating operation", res)
        else:
            log.debug("Successfully treated operation")
示例#3
0
    def submit(self, context=None, ftsServer=None, ucert=None, pinTime=36000, protocols=None):
        """submit the job to the FTS server

        Some attributes are expected to be defined for the submission to work:
          * type (set by FTS3Operation)
          * sourceSE (only for Transfer jobs)
          * targetSE
          * activity (optional)
          * priority (optional)
          * username
          * userGroup
          * filesToSubmit
          * operationID (optional, used as metadata for the job)

        We also expect the FTSFiles have an ID defined, as it is given as transfer metadata

        :param pinTime: Time the file should be pinned on disk (used for transfers and staging)
                        Used only if he source SE is a tape storage
        :param context: fts3 context. If not given, it is created (see ftsServer & ucert param)
        :param ftsServer: the address of the fts server to submit to. Used only if context is
                          not given. if not given either, use the ftsServer object attribute

        :param ucert: path to the user certificate/proxy. Might be inferred by the fts cli (see its doc)
        :param protocols: list of protocols from which we should choose the protocol to use

        :returns: S_OK([FTSFiles ids of files submitted])
        """

        log = gLogger.getLocalSubLogger("submit/%s/%s_%s" % (self.operationID, self.sourceSE, self.targetSE))

        if not context:
            if not ftsServer:
                ftsServer = self.ftsServer
            context = fts3.Context(endpoint=ftsServer, ucert=ucert, request_class=ftsSSLRequest, verify=False)

        # Construct the target SURL
        res = self.__fetchSpaceToken(self.targetSE, self.vo)
        if not res["OK"]:
            return res
        target_spacetoken = res["Value"]

        allLFNs = [ftsFile.lfn for ftsFile in self.filesToSubmit]

        if self.type == "Transfer":
            res = self._constructTransferJob(pinTime, allLFNs, target_spacetoken, protocols=protocols)
        elif self.type == "Staging":
            res = self._constructStagingJob(pinTime, allLFNs, target_spacetoken)
        # elif self.type == 'Removal':
        #   res = self._constructRemovalJob(context, allLFNs, failedLFNs, target_spacetoken)

        if not res["OK"]:
            return res

        job, fileIDsInTheJob = res["Value"]

        try:
            self.ftsGUID = fts3.submit(context, job)
            log.info("Got GUID %s" % self.ftsGUID)

            # Only increase the amount of attempt
            # if we succeeded in submitting -> no ! Why did I do that ??
            for ftsFile in self.filesToSubmit:
                ftsFile.attempt += 1

                # This should never happen because a file should be "released"
                # first by the previous job.
                # But we just print a warning
                if ftsFile.ftsGUID is not None:
                    log.warn(
                        "FTSFile has a non NULL ftsGUID at job submission time",
                        "FileID: %s existing ftsGUID: %s" % (ftsFile.fileID, ftsFile.ftsGUID),
                    )

                # `assign` the file to this job
                ftsFile.ftsGUID = self.ftsGUID
                if ftsFile.fileID in fileIDsInTheJob:
                    ftsFile.status = "Submitted"

            now = datetime.datetime.utcnow().replace(microsecond=0)
            self.submitTime = now
            self.lastUpdate = now
            self.lastMonitor = now

        except FTS3ClientException as e:
            log.exception("Error at submission", repr(e))
            return S_ERROR("Error at submission: %s" % e)

        return S_OK(fileIDsInTheJob)
示例#4
0
    def _treatOperation(self, operation):
        """Treat one operation:
          * does the callback if the operation is finished
          * generate new jobs and submits them

        :param operation: the operation to treat

        :return: operation, S_OK()/S_ERROR()
        """
        try:
            threadID = current_process().name
            log = gLogger.getLocalSubLogger("treatOperation/%s" % operation.operationID)

            # If the operation is totally processed
            # we perform the callback
            if operation.isTotallyProcessed():
                log.debug("FTS3Operation %s is totally processed" % operation.operationID)
                res = operation.callback()

                if not res["OK"]:
                    log.error("Error performing the callback", res)
                    log.info("Putting back the operation")
                    dbRes = self.fts3db.persistOperation(operation)

                    if not dbRes["OK"]:
                        log.error("Could not persist operation", dbRes)

                    return operation, res

            else:
                log.debug("FTS3Operation %s is not totally processed yet" % operation.operationID)

                # This flag is set to False if we want to stop the ongoing processing
                # of an operation, typically when the matching RMS Request has been
                # canceled (see below)
                continueOperationProcessing = True

                # Check the status of the associated RMS Request.
                # If it is canceled or does not exist anymore then we will not create new FTS3Jobs, and mark
                # this as FTS3Operation canceled.

                if operation.rmsReqID:
                    res = ReqClient().getRequestStatus(operation.rmsReqID)
                    if not res["OK"]:
                        # If the Request does not exist anymore
                        if cmpError(res, errno.ENOENT):
                            log.info(
                                "The RMS Request does not exist anymore, canceling the FTS3Operation",
                                "rmsReqID: %s, FTS3OperationID: %s" % (operation.rmsReqID, operation.operationID),
                            )
                            operation.status = "Canceled"
                            continueOperationProcessing = False
                        else:
                            log.error("Could not get request status", res)
                            return operation, res

                    else:
                        rmsReqStatus = res["Value"]

                        if rmsReqStatus == "Canceled":
                            log.info(
                                "The RMS Request is canceled, canceling the FTS3Operation",
                                "rmsReqID: %s, FTS3OperationID: %s" % (operation.rmsReqID, operation.operationID),
                            )
                            operation.status = "Canceled"
                            continueOperationProcessing = False

                if continueOperationProcessing:

                    res = operation.prepareNewJobs(
                        maxFilesPerJob=self.maxFilesPerJob, maxAttemptsPerFile=self.maxAttemptsPerFile
                    )

                    if not res["OK"]:
                        log.error("Cannot prepare new Jobs", "FTS3Operation %s : %s" % (operation.operationID, res))
                        return operation, res

                    newJobs = res["Value"]

                    log.debug("FTS3Operation %s: %s new jobs to be submitted" % (operation.operationID, len(newJobs)))

                    for ftsJob in newJobs:
                        res = self._serverPolicy.chooseFTS3Server()
                        if not res["OK"]:
                            log.error(res)
                            continue

                        ftsServer = res["Value"]
                        log.debug("Use %s server" % ftsServer)

                        ftsJob.ftsServer = ftsServer

                        res = self.getFTS3Context(ftsJob.username, ftsJob.userGroup, ftsServer, threadID=threadID)

                        if not res["OK"]:
                            log.error("Could not get context", res)
                            continue

                        context = res["Value"]

                        try:
                            tpcProtocols = operation.fts3Plugin.selectTPCProtocols(ftsJob=ftsJob)
                        except ValueError as e:
                            log.error("Could not select TPC list", repr(e))
                            continue

                        res = ftsJob.submit(context=context, protocols=tpcProtocols)

                        if not res["OK"]:
                            log.error(
                                "Could not submit FTS3Job", "FTS3Operation %s : %s" % (operation.operationID, res)
                            )
                            continue

                        operation.ftsJobs.append(ftsJob)

                        submittedFileIds = res["Value"]
                        log.info(
                            "FTS3Operation %s: Submitted job for %s transfers"
                            % (operation.operationID, len(submittedFileIds))
                        )

                # new jobs are put in the DB at the same time
            res = self.fts3db.persistOperation(operation)

            if not res["OK"]:
                log.error("Could not persist operation", res)

            return operation, res

        except Exception as e:
            log.exception("Exception in the thread", repr(e))
            return operation, S_ERROR("Exception %s" % repr(e))
示例#5
0
    def _monitorJob(self, ftsJob):
        """* query the FTS servers
        * update the FTSFile status
        * update the FTSJob status

        :param ftsJob: FTS job

        :return: ftsJob, S_OK()/S_ERROR()
        """
        # General try catch to avoid that the tread dies
        try:
            threadID = current_process().name
            log = gLogger.getLocalSubLogger("_monitorJob/%s" % ftsJob.jobID)

            res = self.getFTS3Context(ftsJob.username, ftsJob.userGroup, ftsJob.ftsServer, threadID=threadID)

            if not res["OK"]:
                log.error("Error getting context", res)
                return ftsJob, res

            context = res["Value"]

            res = ftsJob.monitor(context=context)

            if not res["OK"]:
                log.error("Error monitoring job", res)

                # If the job was not found on the server, update the DB
                if cmpError(res, errno.ESRCH):
                    res = self.fts3db.cancelNonExistingJob(ftsJob.operationID, ftsJob.ftsGUID)

                return ftsJob, res

            # { fileID : { Status, Error } }
            filesStatus = res["Value"]

            # Specify the job ftsGUID to make sure we do not overwrite
            # status of files already taken by newer jobs
            res = self.fts3db.updateFileStatus(filesStatus, ftsGUID=ftsJob.ftsGUID)

            if not res["OK"]:
                log.error("Error updating file fts status", "%s, %s" % (ftsJob.ftsGUID, res))
                return ftsJob, res

            upDict = {
                ftsJob.jobID: {
                    "status": ftsJob.status,
                    "error": ftsJob.error,
                    "completeness": ftsJob.completeness,
                    "operationID": ftsJob.operationID,
                    "lastMonitor": True,
                }
            }
            res = self.fts3db.updateJobStatus(upDict)

            if ftsJob.status in ftsJob.FINAL_STATES:
                self.__sendAccounting(ftsJob)

            return ftsJob, res

        except Exception as e:
            log.exception("Exception while monitoring job", repr(e))
            return ftsJob, S_ERROR(0, "Exception %s" % repr(e))