示例#1
0
    def _execute(self, resthost, resturi, config, task):
        mw = MasterWorker(config, quiet=False, debug=True, test=False)

        tapeRecallStatus = 'TAPERECALL'
        self.logger.info("Retrieving %s tasks", tapeRecallStatus)
        recallingTasks = mw.getWork(limit=999999, getstatus=tapeRecallStatus)
        if len(recallingTasks) > 0:
            self.logger.info("Retrieved a total of %d %s tasks",
                             len(recallingTasks), tapeRecallStatus)
            self.logger.debug("Retrieved the following %s tasks: \n%s",
                              tapeRecallStatus, str(recallingTasks))
            for recallingTask in recallingTasks:
                if not recallingTask['tm_DDM_reqid']:
                    self.logger.debug(
                        "tm_DDM_reqid' is not defined for task %s, skipping such task",
                        recallingTask['tm_taskname'])
                    continue
                ddmRequest = statusRequest(recallingTask['tm_DDM_reqid'],
                                           config.TaskWorker.DDMServer,
                                           config.TaskWorker.cmscert,
                                           config.TaskWorker.cmskey,
                                           verbose=False)
                self.logger.info("Contacted %s using %s and %s, got:\n%s",
                                 config.TaskWorker.DDMServer,
                                 config.TaskWorker.cmscert,
                                 config.TaskWorker.cmskey, ddmRequest)
                # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]}
                if ddmRequest["data"][0][
                        "status"] == "completed":  # possible values: new, activated, updated, completed, rejected, cancelled
                    self.logger.info(
                        "Request %d is completed, setting status of task %s to NEW",
                        recallingTask['tm_DDM_reqid'],
                        recallingTask['tm_taskname'])
                    mw.updateWork(recallingTask['tm_taskname'],
                                  recallingTask['tm_task_command'], 'NEW')
                    # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now)
                    server = HTTPRequests(config.TaskWorker.resturl,
                                          config.TaskWorker.cmscert,
                                          config.TaskWorker.cmskey,
                                          retry=20,
                                          logger=self.logger)
                    mpl = MyProxyLogon(config=config,
                                       server=server,
                                       resturi=config.TaskWorker.restURInoAPI,
                                       myproxylen=self.pollingTime)
                    mpl.execute(task=recallingTask
                                )  # this adds 'user_proxy' to recallingTask
                    mpl.deleteWarnings(recallingTask['user_proxy'],
                                       recallingTask['tm_taskname'])
示例#2
0
    def _execute(self, resthost, resturi, config, task):
        mw = MasterWorker(config, logWarning=False, logDebug=False, sequential=True, console=False)

        tapeRecallStatus = 'TAPERECALL'
        self.logger.info("Retrieving %s tasks", tapeRecallStatus)
        recallingTasks = mw.getWork(limit=999999, getstatus=tapeRecallStatus)
        if len(recallingTasks) > 0:
            self.logger.info("Retrieved a total of %d %s tasks", len(recallingTasks), tapeRecallStatus)
            self.logger.debug("Retrieved the following %s tasks: \n%s", tapeRecallStatus, str(recallingTasks))
            for recallingTask in recallingTasks:
                if not recallingTask['tm_DDM_reqid']:
                    self.logger.debug("tm_DDM_reqid' is not defined for task %s, skipping such task", recallingTask['tm_taskname'])
                    continue

                # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed
                from WMCore.Services.UserFileCache.UserFileCache import UserFileCache
                ufc = UserFileCache({'endpoint': recallingTask['tm_cache_url'], "pycurl": True})
                sandbox = recallingTask['tm_user_sandbox'].replace(".tar.gz","")
                try:
                    ufc.download(sandbox, sandbox, recallingTask['tm_username'])
                    os.remove(sandbox)
                except Exception as ex:
                    self.logger.exception(ex)
                    self.logger.info("The CRAB3 server backend could not download the input sandbox (%s) from the frontend (%s) using the '%s' username."+\
                                     " This could be a temporary glitch, will try again in next occurrence of the recurring action."+\
                                     " Error reason:\n%s", sandbox, recallingTask['tm_cache_url'], recallingTask['tm_username'], str(ex))

                ddmRequest = statusRequest(recallingTask['tm_DDM_reqid'], config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, verbose=False)
                self.logger.info("Contacted %s using %s and %s, got:\n%s", config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, ddmRequest)
                # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]}
                if ddmRequest["data"][0]["status"] == "completed": # possible values: new, activated, updated, completed, rejected, cancelled
                    self.logger.info("Request %d is completed, setting status of task %s to NEW", recallingTask['tm_DDM_reqid'], recallingTask['tm_taskname'])
                    mw.updateWork(recallingTask['tm_taskname'], recallingTask['tm_task_command'], 'NEW')
                    # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now)
                    server = HTTPRequests(config.TaskWorker.resturl, config.TaskWorker.cmscert, config.TaskWorker.cmskey, retry=20, logger=self.logger)
                    mpl = MyProxyLogon(config=config, server=server, resturi=config.TaskWorker.restURInoAPI, myproxylen=self.pollingTime)
                    mpl.execute(task=recallingTask) # this adds 'user_proxy' to recallingTask
                    mpl.deleteWarnings(recallingTask['user_proxy'], recallingTask['tm_taskname'])

        else:
            self.logger.info("No %s task retrieved.", tapeRecallStatus)
示例#3
0
    def _execute(self, resthost, resturi, config, task):
        mw = MasterWorker(config, logWarning=False, logDebug=False, sequential=True, console=False)

        tapeRecallStatus = 'TAPERECALL'
        self.logger.info("Retrieving %s tasks", tapeRecallStatus)
        recallingTasks = mw.getWork(limit=999999, getstatus=tapeRecallStatus, ignoreTWName=True)
        if len(recallingTasks) > 0:
            self.logger.info("Retrieved a total of %d %s tasks", len(recallingTasks), tapeRecallStatus)
            for recallingTask in recallingTasks:
                taskName = recallingTask['tm_taskname']
                self.logger.info("Working on task %s", taskName)

                reqId = recallingTask['tm_DDM_reqid']
                if not reqId:
                    self.logger.debug("tm_DDM_reqid' is not defined for task %s, skipping such task", taskName)
                    continue

                server = HTTPRequests(config.TaskWorker.resturl, config.TaskWorker.cmscert, config.TaskWorker.cmskey, retry=20, logger=self.logger)
                if (time.time() - getTimeFromTaskname(str(taskName)) > MAX_DAYS_FOR_TAPERECALL*24*60*60):
                    self.logger.info("Task %s is older than %d days, setting its status to FAILED", taskName, MAX_DAYS_FOR_TAPERECALL)
                    msg = "The disk replica request (ID: %d) for the input dataset did not complete in %d days." % (reqId, MAX_DAYS_FOR_TAPERECALL)
                    failTask(taskName, server, config.TaskWorker.restURInoAPI+'workflowdb', msg, self.logger, 'FAILED')
                    continue

                mpl = MyProxyLogon(config=config, server=server, resturi=config.TaskWorker.restURInoAPI, myproxylen=self.pollingTime)
                user_proxy = True
                try:
                    mpl.execute(task=recallingTask) # this adds 'user_proxy' to recallingTask
                except TaskWorkerException as twe:
                    user_proxy = False
                    self.logger.exception(twe)

                # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed
                if user_proxy:
                    from WMCore.Services.UserFileCache.UserFileCache import UserFileCache
                    ufc = UserFileCache({'cert': recallingTask['user_proxy'], 'key': recallingTask['user_proxy'], 'endpoint': recallingTask['tm_cache_url'], "pycurl": True})
                    sandbox = recallingTask['tm_user_sandbox'].replace(".tar.gz","")
                    debugFiles = recallingTask['tm_debug_files'].replace(".tar.gz","")
                    sandboxPath = os.path.join("/tmp", sandbox)
                    debugFilesPath = os.path.join("/tmp", debugFiles)
                    try:
                        ufc.download(sandbox, sandboxPath, recallingTask['tm_username'])
                        ufc.download(debugFiles, debugFilesPath, recallingTask['tm_username'])
                        self.logger.info("Successfully touched input and debug sandboxes (%s and %s) of task %s (frontend: %s) using the '%s' username (request_id = %d).",
                                         sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId)
                    except Exception as ex:
                        self.logger.info("The CRAB3 server backend could not download the input and/or debug sandbox (%s and/or %s) of task %s from the frontend (%s) using the '%s' username (request_id = %d)."+\
                                         " This could be a temporary glitch, will try again in next occurrence of the recurring action."+\
                                         " Error reason:\n%s", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId, str(ex))
                    finally:
                        if os.path.exists(sandboxPath): os.remove(sandboxPath)
                        if os.path.exists(debugFilesPath): os.remove(debugFilesPath)

                ddmRequest = statusRequest(reqId, config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, verbose=False)
                # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]}                
                self.logger.info("Contacted %s using %s and %s for request_id = %d, got:\n%s", config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, reqId, ddmRequest)

                if ddmRequest["message"] == "Request found":
                    status = ddmRequest["data"][0]["status"]
                    if status == "completed": # possible values: new, activated, updated, completed, rejected, cancelled
                        self.logger.info("Request %d is completed, setting status of task %s to NEW", reqId, taskName)
                        mw.updateWork(taskName, recallingTask['tm_task_command'], 'NEW')
                        # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now)
                        if user_proxy: mpl.deleteWarnings(recallingTask['user_proxy'], taskName)
                    elif status == "rejected":
                        msg = "The DDM request (ID: %d) has been rejected with this reason: %s" % (reqId, ddmRequest["data"][0]["reason"])
                        self.logger.info(msg + "\nSetting status of task %s to FAILED", taskName)
                        failTask(taskName, server, config.TaskWorker.restURInoAPI+'workflowdb', msg, self.logger, 'FAILED')

                else:
                    msg = "DDM request_id %d not found. Please report to experts" % reqId
                    self.logger.info(msg)
                    if user_proxy: mpl.uploadWarning(msg, recallingTask['user_proxy'], taskName)

        else:
            self.logger.info("No %s task retrieved.", tapeRecallStatus)
    def _execute(self, config, task):

        # setup logger
        if not self.logger:
            self.logger = logging.getLogger(__name__)
            handler = logging.StreamHandler(sys.stdout)  # pylint: disable=redefined-outer-name
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(module)s %(message)s")  # pylint: disable=redefined-outer-name
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
            self.logger.setLevel(logging.DEBUG)
        else:
            # do not use BaseRecurringAction logger but create a new logger
            # which writes to config.TaskWorker.logsDir/taks/recurring/TapeRecallStatus_YYMMDD-HHMM.log
            self.logger = logging.getLogger('TapeRecallStatus')
            logDir = config.TaskWorker.logsDir + '/tasks/recurring/'
            if not os.path.exists(logDir):
                os.makedirs(logDir)
            timeStamp = time.strftime('%y%m%d-%H%M', time.localtime())
            logFile = 'TapeRecallStatus_' + timeStamp + '.log'
            handler = logging.FileHandler(logDir + logFile)
            formatter = logging.Formatter(
                '%(asctime)s:%(levelname)s:%(module)s:%(message)s')
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)

        mw = MasterWorker(config,
                          logWarning=False,
                          logDebug=False,
                          sequential=True,
                          console=False,
                          name='masterForTapeRecall')

        tapeRecallStatus = 'TAPERECALL'
        self.logger.info("Retrieving %s tasks", tapeRecallStatus)
        recallingTasks = mw.getWork(limit=999999,
                                    getstatus=tapeRecallStatus,
                                    ignoreTWName=True)
        if not recallingTasks:
            self.logger.info("No %s task retrieved.", tapeRecallStatus)
            return

        self.logger.info("Retrieved a total of %d %s tasks",
                         len(recallingTasks), tapeRecallStatus)
        crabserver = mw.crabserver
        for recallingTask in recallingTasks:
            taskName = recallingTask['tm_taskname']
            self.logger.info("Working on task %s", taskName)

            reqId = recallingTask['tm_DDM_reqid']
            if not reqId:
                self.logger.debug(
                    "tm_DDM_reqid' is not defined for task %s, skipping such task",
                    taskName)
                continue
            else:
                msg = "Task points to Rucio RuleId:  %s " % reqId
                self.logger.info(msg)

            if (time.time() - getTimeFromTaskname(
                    str(taskName))) > MAX_DAYS_FOR_TAPERECALL * 24 * 60 * 60:
                self.logger.info(
                    "Task %s is older than %d days, setting its status to FAILED",
                    taskName, MAX_DAYS_FOR_TAPERECALL)
                msg = "The disk replica request (ID: %s) for the input dataset did not complete in %d days." % (
                    reqId, MAX_DAYS_FOR_TAPERECALL)
                failTask(taskName, crabserver, msg, self.logger, 'FAILED')
                continue

            if not 'S3' in recallingTask['tm_cache_url'].upper():
                # when using old crabcache had to worry about sandbox purging after 3 days
                mpl = MyProxyLogon(config=config,
                                   crabserver=crabserver,
                                   myproxylen=self.pollingTime)
                user_proxy = True
                try:
                    mpl.execute(task=recallingTask
                                )  # this adds 'user_proxy' to recallingTask
                except TaskWorkerException as twe:
                    user_proxy = False
                    self.logger.exception(twe)

                # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed
                if user_proxy:
                    self.refreshSandbox(recallingTask)

            # Retrieve status of recall request
            if not self.rucioClient:
                self.rucioClient = getNativeRucioClient(config=config,
                                                        logger=self.logger)
            try:
                ddmRequest = self.rucioClient.get_replication_rule(reqId)
            except RuleNotFound:
                msg = "Rucio rule id %s not found. Please report to experts" % reqId
                self.logger.error(msg)
                if user_proxy:
                    mpl.uploadWarning(msg, recallingTask['user_proxy'],
                                      taskName)
            if ddmRequest['state'] == 'OK':
                self.logger.info(
                    "Request %s is completed, setting status of task %s to NEW",
                    reqId, taskName)
                mw.updateWork(taskName, recallingTask['tm_task_command'],
                              'NEW')
                # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now)
                if user_proxy:
                    mpl.deleteWarnings(recallingTask['user_proxy'], taskName)
            else:
                expiration = ddmRequest[
                    'expires_at']  # this is a datetime.datetime object
                if expiration < datetime.datetime.now():
                    # give up waiting
                    msg = (
                        "Replication request %s for task %s expired. Setting its status to FAILED"
                        % (reqId, taskName))
                    self.logger.info(msg)
                    failTask(taskName, crabserver, msg, self.logger, 'FAILED')
示例#5
0
    def _execute(self, resthost, resturi, config, task):

        # setup logger
        if not self.logger:
            self.logger = logging.getLogger(__name__)
            handler = logging.StreamHandler(sys.stdout)
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(module)s %(message)s")
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
            self.logger.setLevel(logging.DEBUG)
        else:
            # do not use BaseRecurringAction logger but create a new logger
            # which writes to config.TaskWorker.logsDir/taks/recurring/TapeRecallStatus_YYMMDD-HHMM.log
            self.logger = logging.getLogger('TapeRecallStatus')
            logDir = config.TaskWorker.logsDir + '/tasks/recurring/'
            if not os.path.exists(logDir):
                os.makedirs(logDir)
            timeStamp = time.strftime('%y%m%d-%H%M', time.localtime())
            logFile = 'TapeRecallStatus_' + timeStamp + '.log'
            handler = logging.FileHandler(logDir + logFile)
            formatter = logging.Formatter(
                '%(asctime)s:%(levelname)s:%(module)s:%(message)s')
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)

        mw = MasterWorker(config,
                          logWarning=False,
                          logDebug=False,
                          sequential=True,
                          console=False,
                          name='masterForTapeRecall')

        tapeRecallStatus = 'TAPERECALL'
        self.logger.info("Retrieving %s tasks", tapeRecallStatus)
        recallingTasks = mw.getWork(limit=999999,
                                    getstatus=tapeRecallStatus,
                                    ignoreTWName=True)
        if len(recallingTasks) > 0:
            self.logger.info("Retrieved a total of %d %s tasks",
                             len(recallingTasks), tapeRecallStatus)
            for recallingTask in recallingTasks:
                taskName = recallingTask['tm_taskname']
                self.logger.info("Working on task %s", taskName)

                reqId = recallingTask['tm_DDM_reqid']
                if not reqId:
                    self.logger.debug(
                        "tm_DDM_reqid' is not defined for task %s, skipping such task",
                        taskName)
                    continue

                server = HTTPRequests(resthost,
                                      config.TaskWorker.cmscert,
                                      config.TaskWorker.cmskey,
                                      retry=20,
                                      logger=self.logger)
                if (time.time() - getTimeFromTaskname(str(taskName)) >
                        MAX_DAYS_FOR_TAPERECALL * 24 * 60 * 60):
                    self.logger.info(
                        "Task %s is older than %d days, setting its status to FAILED",
                        taskName, MAX_DAYS_FOR_TAPERECALL)
                    msg = "The disk replica request (ID: %d) for the input dataset did not complete in %d days." % (
                        reqId, MAX_DAYS_FOR_TAPERECALL)
                    failTask(taskName, server, resturi, msg, self.logger,
                             'FAILED')
                    continue

                mpl = MyProxyLogon(config=config,
                                   server=server,
                                   resturi=resturi,
                                   myproxylen=self.pollingTime)
                user_proxy = True
                try:
                    mpl.execute(task=recallingTask
                                )  # this adds 'user_proxy' to recallingTask
                except TaskWorkerException as twe:
                    user_proxy = False
                    self.logger.exception(twe)

                # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed
                if user_proxy:
                    from WMCore.Services.UserFileCache.UserFileCache import UserFileCache
                    ufc = UserFileCache({
                        'cert':
                        recallingTask['user_proxy'],
                        'key':
                        recallingTask['user_proxy'],
                        'endpoint':
                        recallingTask['tm_cache_url'],
                        "pycurl":
                        True
                    })
                    sandbox = recallingTask['tm_user_sandbox'].replace(
                        ".tar.gz", "")
                    debugFiles = recallingTask['tm_debug_files'].replace(
                        ".tar.gz", "")
                    sandboxPath = os.path.join("/tmp", sandbox)
                    debugFilesPath = os.path.join("/tmp", debugFiles)
                    try:
                        ufc.download(sandbox, sandboxPath,
                                     recallingTask['tm_username'])
                        ufc.download(debugFiles, debugFilesPath,
                                     recallingTask['tm_username'])
                        self.logger.info(
                            "Successfully touched input and debug sandboxes (%s and %s) of task %s (frontend: %s) using the '%s' username (request_id = %d).",
                            sandbox, debugFiles, taskName,
                            recallingTask['tm_cache_url'],
                            recallingTask['tm_username'], reqId)
                    except Exception as ex:
                        self.logger.info("The CRAB3 server backend could not download the input and/or debug sandbox (%s and/or %s) of task %s from the frontend (%s) using the '%s' username (request_id = %d)."+\
                                         " This could be a temporary glitch, will try again in next occurrence of the recurring action."+\
                                         " Error reason:\n%s", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId, str(ex))
                    finally:
                        if os.path.exists(sandboxPath): os.remove(sandboxPath)
                        if os.path.exists(debugFilesPath):
                            os.remove(debugFilesPath)

                ddmRequest = statusRequest(reqId,
                                           config.TaskWorker.DDMServer,
                                           config.TaskWorker.cmscert,
                                           config.TaskWorker.cmskey,
                                           verbose=False)
                # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]}
                self.logger.info(
                    "Contacted %s using %s and %s for request_id = %d, got:\n%s",
                    config.TaskWorker.DDMServer, config.TaskWorker.cmscert,
                    config.TaskWorker.cmskey, reqId, ddmRequest)

                if ddmRequest["message"] == "Request found":
                    status = ddmRequest["data"][0]["status"]
                    if status == "completed":  # possible values: new, activated, updated, completed, rejected, cancelled
                        self.logger.info(
                            "Request %d is completed, setting status of task %s to NEW",
                            reqId, taskName)
                        mw.updateWork(taskName,
                                      recallingTask['tm_task_command'], 'NEW')
                        # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now)
                        if user_proxy:
                            mpl.deleteWarnings(recallingTask['user_proxy'],
                                               taskName)
                    elif status == "rejected":
                        msg = "The DDM request (ID: %d) has been rejected with this reason: %s" % (
                            reqId, ddmRequest["data"][0]["reason"])
                        self.logger.info(
                            msg + "\nSetting status of task %s to FAILED",
                            taskName)
                        failTask(taskName, server, resturi, msg, self.logger,
                                 'FAILED')

                else:
                    msg = "DDM request_id %d not found. Please report to experts" % reqId
                    self.logger.info(msg)
                    if user_proxy:
                        mpl.uploadWarning(msg, recallingTask['user_proxy'],
                                          taskName)

        else:
            self.logger.info("No %s task retrieved.", tapeRecallStatus)