Exemplo n.º 1
0
    def executeAction(self, nextinput, work):
        """ Execute an action and deal with the error handling and upload of the tasklogfile to the crabcache
        """
        try:
            output = work.execute(nextinput, task=self._task, tempDir=self.tempDir)
        except TaskWorkerException as twe:
            self.logger.debug(str(traceback.format_exc())) #print the stacktrace only in debug mode
            raise WorkerHandlerException(str(twe), retry = twe.retry) #TaskWorker error, do not add traceback to the error propagated to the REST
        except Exception as exc:
            msg = "Problem handling %s because of %s failure, traceback follows\n" % (self._task['tm_taskname'], str(exc))
            msg += str(traceback.format_exc())
            self.logger.error(msg)
            raise WorkerHandlerException(msg) #Errors not foreseen. Print everything!
        finally:
            #TODO: we need to do that also in Worker.py otherwise some messages might only be in the TW file but not in the crabcache.
            logpath = 'logs/tasks/%s/%s.log' % (self._task['tm_username'], self._task['tm_taskname'])
            if os.path.isfile(logpath) and 'user_proxy' in self._task: #the user proxy might not be there if myproxy retrieval failed
                cacheurldict = {'endpoint':self._task['tm_cache_url'], 'cert':self._task['user_proxy'], 'key':self._task['user_proxy']}
                try:
                    ufc = UserFileCache(cacheurldict)
                    logfilename = self._task['tm_taskname'] + '_TaskWorker.log'
                    ufc.uploadLog(logpath, logfilename)
                except HTTPException as hte:
                    msg = "Failed to upload the logfile to %s for task %s. More details in the http headers and body:\n%s\n%s" % (self._task['tm_cache_url'], self._task['tm_taskname'], hte.headers, hte.result)
                    self.logger.error(msg)
                except Exception: #pylint: disable=broad-except
                    msg = "Unknown error while uploading the logfile for task %s" % self._task['tm_taskname']
                    self.logger.exception(msg) #upload logfile of the task to the crabcache

        return output
Exemplo n.º 2
0
class UserFileCacheTest(unittest.TestCase):
    """
    Unit tests for UserFileCache Service
    """


    def testChecksum(self):
        """
        Tests checksum method
        """
        self.ufc = UserFileCache()
        checksum1 = self.ufc.checksum(fileName=path.join(getTestBase(), 'WMCore_t/Services_t/UserFileCache_t/ewv_crab_EwvAnalysis_31_111229_140959_publish.tgz'))
        checksum2 = self.ufc.checksum(fileName=path.join(getTestBase(), 'WMCore_t/Services_t/UserFileCache_t/ewv_crab_EwvAnalysis_31_resubmit_111229_144319_publish.tgz'))
        self.assertTrue(checksum1)
        self.assertTrue(checksum2)
        self.assertFalse(checksum1 == checksum2)

        self.assertRaises(IOError, self.ufc.checksum, **{'fileName': 'does_not_exist'})
        return

    def testUploadDownload(self):
        if 'UFCURL' in os.environ:
            currdir = getTestBase()
            upfile = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/test_file.tgz') #file to upload
            ufc = UserFileCache({'endpoint':os.environ['UFCURL']})

            #named upload/download
            res = ufc.upload(upfile, 'name_publish.tgz')
            ufc.download(name=res['name'], output='name_publish.tgz')

            #hashkey upload/download
            res = ufc.upload(upfile)
            ufc.download(res['hashkey'], output='pippo_publish_down.tgz')
Exemplo n.º 3
0
    def executeInternal(self, *args, **kw):
        # FIXME: In PanDA, we provided the executable as a URL.
        # So, the filename becomes http:// -- and doesn't really work.  Hardcoding the analysis wrapper.
        #transform_location = getLocation(kw['task']['tm_transformation'], 'CAFUtilities/src/python/transformation/CMSRunAnalysis/')
        transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/')
        cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/')
        gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/')
        dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/')
        bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/")
        adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/")

        shutil.copy(transform_location, '.')
        shutil.copy(cmscp_location, '.')
        shutil.copy(gwms_location, '.')
        shutil.copy(dag_bootstrap_location, '.')
        shutil.copy(bootstrap_location, '.')
        shutil.copy(adjust_location, '.')

        # Bootstrap the ISB if we are using UFC
        if UserFileCache and kw['task']['tm_cache_url'].find('/crabcache')!=-1:
            ufc = UserFileCache(dict={'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint' : kw['task']['tm_cache_url']})
            try:
                ufc.download(hashkey=kw['task']['tm_user_sandbox'].split(".")[0], output="sandbox.tar.gz")
            except Exception, ex:
                self.logger.exception(ex)
                raise TaskWorkerException("The CRAB3 server backend could not download the input sandbox with your code "+\
                                    "from the frontend (crabcache component).\nThis could be a temporary glitch; please try to submit a new task later "+\
                                    "(resubmit will not work) and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO url!?
            kw['task']['tm_user_sandbox'] = 'sandbox.tar.gz'
Exemplo n.º 4
0
    def executeInternal(self, *args, **kw):
        inputFiles = args[0][2]
        splitterResult = args[0][3][0]

        cwd = os.getcwd()
        try:
            os.chdir(kw['tempDir'])
            splittingSummary = SplittingSummary(kw['task']['tm_split_algo'])
            for jobgroup in splitterResult:
                jobs = jobgroup.getJobs()
                splittingSummary.addJobs(jobs)
            splittingSummary.dump('splitting-summary.json')
            inputFiles.append('splitting-summary.json')

            self.packSandbox(inputFiles)

            self.logger.info('Uploading dry run tarball to the user file cache')
            ufc = UserFileCache(mydict={'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint': kw['task']['tm_cache_url']})
            result = ufc.uploadLog('dry-run-sandbox.tar.gz')
            os.remove('dry-run-sandbox.tar.gz')
            if 'hashkey' not in result:
                raise TaskWorkerException('Failed to upload dry-run-sandbox.tar.gz to the user file cache: ' + str(result))
            else:
                self.logger.info('Uploaded dry run tarball to the user file cache: ' + str(result))
                update = {'workflow': kw['task']['tm_taskname'], 'subresource': 'state', 'status': 'UPLOADED'}
                self.logger.debug('Updating task status: %s' % str(update))
                self.server.post(self.resturi, data=urllib.urlencode(update))

        finally:
            os.chdir(cwd)

        return Result(task=kw['task'], result=args[0])
Exemplo n.º 5
0
    def executeInternal(self, *args, **kw):
        # FIXME: In PanDA, we provided the executable as a URL.
        # So, the filename becomes http:// -- and doesn't really work.  Hardcoding the analysis wrapper.
        #transform_location = getLocation(kw['task']['tm_transformation'], 'CAFUtilities/src/python/transformation/CMSRunAnalysis/')
        transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/')
        cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/')
        gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/')
        dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/')
        bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/")
        adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/")

        shutil.copy(transform_location, '.')
        shutil.copy(cmscp_location, '.')
        shutil.copy(gwms_location, '.')
        shutil.copy(dag_bootstrap_location, '.')
        shutil.copy(bootstrap_location, '.')
        shutil.copy(adjust_location, '.')

        # Bootstrap the ISB if we are using UFC
        if UserFileCache and kw['task']['tm_cache_url'].find('/crabcache')!=-1:
            ufc = UserFileCache(mydict={'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint' : kw['task']['tm_cache_url']})
            try:
                ufc.download(hashkey=kw['task']['tm_user_sandbox'].split(".")[0], output="sandbox.tar.gz")
            except Exception as ex:
                self.logger.exception(ex)
                raise TaskWorkerException("The CRAB3 server backend could not download the input sandbox with your code "+\
                                    "from the frontend (crabcache component).\nThis could be a temporary glitch; please try to submit a new task later "+\
                                    "(resubmit will not work) and contact the experts if the error persists.\nError reason: %s" % str(ex)) #TODO url!?
            kw['task']['tm_user_sandbox'] = 'sandbox.tar.gz'

        # Bootstrap the runtime if it is available.
        job_runtime = getLocation('CMSRunAnalysis.tar.gz', 'CRABServer/')
        shutil.copy(job_runtime, '.')
        task_runtime = getLocation('TaskManagerRun.tar.gz', 'CRABServer/')
        shutil.copy(task_runtime, '.')

        kw['task']['resthost'] = self.server['host']
        kw['task']['resturinoapi'] = self.restURInoAPI
        self.task = kw['task']

        params = {}
        if kw['task']['tm_dry_run'] == 'F':
            params = self.sendDashboardTask()

        inputFiles = ['gWMS-CMSRunAnalysis.sh', 'CMSRunAnalysis.sh', 'cmscp.py', 'RunJobs.dag', 'Job.submit', 'dag_bootstrap.sh', \
                'AdjustSites.py', 'site.ad', 'site.ad.json', 'run_and_lumis.tar.gz', 'input_files.tar.gz']

        self.extractMonitorFiles(inputFiles, **kw)

        if kw['task'].get('tm_user_sandbox') == 'sandbox.tar.gz':
            inputFiles.append('sandbox.tar.gz')
        if os.path.exists("CMSRunAnalysis.tar.gz"):
            inputFiles.append("CMSRunAnalysis.tar.gz")
        if os.path.exists("TaskManagerRun.tar.gz"):
            inputFiles.append("TaskManagerRun.tar.gz")

        info, splitterResult = self.createSubdag(*args, **kw)

        return info, params, inputFiles, splitterResult
Exemplo n.º 6
0
    def actionWork(self, *args, **kwargs):
        """Performing the set of actions"""
        nextinput = args

        #set the logger to save the tasklog
        formatter = logging.Formatter("%(asctime)s:%(levelname)s:%(module)s:%(message)s")
        taskdirname = "logs/tasks/%s/" % self._task['tm_username']
        if not os.path.isdir(taskdirname):
            os.mkdir(taskdirname)
        taskhandler = FileHandler(taskdirname + self._task['tm_taskname'] + '.log')
        taskhandler.setLevel(logging.DEBUG)
        self.logger.addHandler(taskhandler)

        for work in self.getWorks():
            self.logger.debug("Starting %s on %s" % (str(work), self._task['tm_taskname']))
            t0 = time.time()
            try:
                output = work.execute(nextinput, task=self._task)
            except StopHandler as sh:
                msg = "Controlled stop of handler for %s on %s " % (self._task, str(sh))
                self.logger.error(msg)
                nextinput = Result(task=self._task, result='StopHandler exception received, controlled stop')
                break #exit normally. Worker will not notice there was an error
            except TaskWorkerException as twe:
                self.logger.debug(str(traceback.format_exc())) #print the stacktrace only in debug mode
                self.removeTaskLogHandler(taskhandler)
                raise WorkerHandlerException(str(twe)) #TaskWorker error, do not add traceback to the error propagated to the REST
            except Exception as exc:
                msg = "Problem handling %s because of %s failure, traceback follows\n" % (self._task['tm_taskname'], str(exc))
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                self.removeTaskLogHandler(taskhandler)
                raise WorkerHandlerException(msg) #Errors not foreseen. Print everything!
            finally:
                #upload logfile of the task to the crabcache
                logpath = 'logs/tasks/%s/%s.log' % (self._task['tm_username'], self._task['tm_taskname'])
                if os.path.isfile(logpath) and 'user_proxy' in self._task: #the user proxy might not be there if myproxy retrieval failed
                    cacheurldict = {'endpoint': self._task['tm_cache_url'], 'cert' : self._task['user_proxy'], 'key' : self._task['user_proxy']}
                    try:
                        ufc = UserFileCache(cacheurldict)
                        logfilename = self._task['tm_taskname'] + '_TaskWorker.log'
                        ufc.uploadLog(logpath, logfilename)
                    except HTTPException as hte:
                        msg = ("Failed to upload the logfile to %s for task %s. More details in the http headers and body:\n%s\n%s" %
                               (self._task['tm_cache_url'], self._task['tm_taskname'], hte.headers, hte.result))
                        self.logger.error(msg)
                    except Exception as e:
                        msg = "Unknown error while uploading the logfile for task %s" % self._task['tm_taskname']
                        self.logger.exception(msg)
            t1 = time.time()
            self.logger.info("Finished %s on %s in %d seconds" % (str(work), self._task['tm_taskname'], t1-t0))
            try:
                nextinput = output.result
            except AttributeError:
                nextinput = output

        self.removeTaskLogHandler(taskhandler)

        return nextinput
Exemplo n.º 7
0
    def actionWork(self, *args, **kwargs):
        """Performing the set of actions"""
        nextinput = args

        taskhandler = self.addTaskLogHandler()

        # I know it looks like a duplicated printout from the process logs (proc.N.log) perspective.
        # Infact we have a smilar printout in the processWorker function of the Worker module, but
        # it does not go to the task logfile and it is useful imho.
        self.logger.debug("Process %s is starting %s on task %s" % (self.procnum, self.workFunction, self._task['tm_taskname']))

        for work in self.getWorks():
            #Loop that iterates over the actions to be performed
            self.logger.debug("Starting %s on %s" % (str(work), self._task['tm_taskname']))
            t0 = time.time()
            try:
                output = work.execute(nextinput, task=self._task)
            except StopHandler as sh:
                msg = "Controlled stop of handler for %s on %s " % (self._task, str(sh))
                self.logger.error(msg)
                nextinput = Result(task=self._task, result='StopHandler exception received, controlled stop')
                break #exit normally. Worker will not notice there was an error
            except TaskWorkerException as twe:
                self.logger.debug(str(traceback.format_exc())) #print the stacktrace only in debug mode
                self.removeTaskLogHandler(taskhandler)
                raise WorkerHandlerException(str(twe)) #TaskWorker error, do not add traceback to the error propagated to the REST
            except Exception as exc:
                msg = "Problem handling %s because of %s failure, traceback follows\n" % (self._task['tm_taskname'], str(exc))
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                self.removeTaskLogHandler(taskhandler)
                raise WorkerHandlerException(msg) #Errors not foreseen. Print everything!
            finally:
                #upload logfile of the task to the crabcache
                logpath = 'logs/tasks/%s/%s.log' % (self._task['tm_username'], self._task['tm_taskname'])
                if os.path.isfile(logpath) and 'user_proxy' in self._task: #the user proxy might not be there if myproxy retrieval failed
                    cacheurldict = {'endpoint': self._task['tm_cache_url'], 'cert' : self._task['user_proxy'], 'key' : self._task['user_proxy']}
                    try:
                        ufc = UserFileCache(cacheurldict)
                        logfilename = self._task['tm_taskname'] + '_TaskWorker.log'
                        ufc.uploadLog(logpath, logfilename)
                    except HTTPException as hte:
                        msg = ("Failed to upload the logfile to %s for task %s. More details in the http headers and body:\n%s\n%s" %
                               (self._task['tm_cache_url'], self._task['tm_taskname'], hte.headers, hte.result))
                        self.logger.error(msg)
                    except Exception:
                        msg = "Unknown error while uploading the logfile for task %s" % self._task['tm_taskname']
                        self.logger.exception(msg)
            t1 = time.time()
            self.logger.info("Finished %s on %s in %d seconds" % (str(work), self._task['tm_taskname'], t1 - t0))
            try:
                nextinput = output.result
            except AttributeError:
                nextinput = output

        self.removeTaskLogHandler(taskhandler)

        return nextinput
Exemplo n.º 8
0
    def __call__(self):

        self.logger.info('Getting the tarball hash key')

        tarballdir = glob.glob(self.requestarea+'/inputs/*.tgz')
        if len(tarballdir) != 1:
            self.logger.info('%sError%s: Could not find tarball or there is more than one tarball'% (colors.RED, colors.NORMAL))
            raise ConfigurationException
        tarballdir = tarballdir[0]

        #checking task status

        self.logger.info('Checking task status')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        dictresult, status, _ = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'verbose': 0})

        dictresult = dictresult['result'][0] #take just the significant part

        if status != 200:
            msg = "Problem retrieving task status:\ninput: %s\noutput: %s\nreason: %s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        self.logger.info('Task status: %s' % dictresult['status'])
        accepstate = ['KILLED','FINISHED','FAILED','KILLFAILED', 'COMPLETED']
        if dictresult['status'] not in accepstate:
            msg = ('%sError%s: Only tasks with these status can be purged: {0}'.format(accepstate) % (colors.RED, colors.NORMAL))
            raise ConfigurationException(msg)

        #getting the cache url
        cacheresult = {}
        scheddresult = {}
        gsisshdict = {}
        if not self.options.scheddonly:
            baseurl = getUrl(self.instance, resource='info')
            cacheurl = server_info('backendurls', self.serverurl, self.proxyfilename, baseurl)
            cacheurl = cacheurl['cacheSSL']
            cacheurldict = {'endpoint': cacheurl, 'pycurl': True}

            ufc = UserFileCache(cacheurldict)
            hashkey = ufc.checksum(tarballdir)
            self.logger.info('Tarball hashkey: %s' %hashkey)
            self.logger.info('Attempting to remove task file from crab server cache')

            try:
                ufcresult = ufc.removeFile(hashkey)
            except HTTPException, re:
                if re.headers.has_key('X-Error-Info') and 'Not such file' in re.headers['X-Error-Info']:
                    self.logger.info('%sError%s: Failed to find task file in crab server cache; the file might have been already purged' % (colors.RED,colors.NORMAL))
                    raise HTTPException , re

            if ufcresult == '':
                self.logger.info('%sSuccess%s: Successfully removed task files from crab server cache' % (colors.GREEN, colors.NORMAL))
                cacheresult = 'SUCCESS'
            else:
                self.logger.info('%sError%s: Failed to remove task files from crab server cache' % (colors.RED, colors.NORMAL))
                cacheresult = 'FAILED'
Exemplo n.º 9
0
def uploadPublishWorkflow(config, workflow, ufcEndpoint, workDir):
    """
    Write out and upload to the UFC a JSON file
    with all the info needed to publish this dataset later
    """
    retok, proxyfile = getProxy(config, workflow.dn, workflow.vogroup, workflow.vorole)
    if not retok:
        logging.info("Cannot get the user's proxy")
        return False

    ufc = UserFileCache({'endpoint': ufcEndpoint, 'cert': proxyfile, 'key': proxyfile})

    # Skip tasks ending in LogCollect, they have nothing interesting.
    taskNameParts = workflow.task.split('/')
    if taskNameParts.pop() in ['LogCollect']:
        logging.info('Skipping LogCollect task')
        return False
    logging.info('Generating JSON for publication of %s of type %s' % (workflow.name, workflow.wfType))

    myThread = threading.currentThread()

    dbsDaoFactory = DAOFactory(package = "WMComponent.DBS3Buffer",
                                logger = myThread.logger, dbinterface = myThread.dbi)
    findFiles = dbsDaoFactory(classname = "LoadFilesByWorkflow")

    # Fetch and filter the files to the ones we actually need
    uploadDatasets = {}
    uploadFiles = findFiles.execute(workflowName = workflow.name)
    for file in uploadFiles:
        datasetName = file['datasetPath']
        if not uploadDatasets.has_key(datasetName):
            uploadDatasets[datasetName] = []
        uploadDatasets[datasetName].append(file)

    if not uploadDatasets:
        logging.info('No datasets found to upload.')
        return False

    # Write JSON file and then create tarball with it
    baseName = '%s_publish.tgz'  % workflow.name
    jsonName = os.path.join(workDir, '%s_publish.json' % workflow.name)
    tgzName = os.path.join(workDir, baseName)
    with open(jsonName, 'w') as jsonFile:
        json.dump(uploadDatasets, fp=jsonFile, cls=FileEncoder, indent=2)

    # Only in 2.7 does tarfile become usable as context manager
    tgzFile = tarfile.open(name=tgzName, mode='w:gz')
    tgzFile.add(jsonName)
    tgzFile.close()

    result = ufc.upload(fileName=tgzName, name=baseName)
    logging.debug('Upload result %s' % result)
    # If this doesn't work, exception will propogate up and block archiving the task
    logging.info('Uploaded with name %s and hashkey %s' % (result['name'], result['hashkey']))
    return
Exemplo n.º 10
0
    def executeInternal(self, *args, **kw):

        cwd = None
        if hasattr(self.config, 'TaskWorker') and hasattr(self.config.TaskWorker, 'scratchDir'):
            temp_dir = tempfile.mkdtemp(prefix='_' + kw['task']['tm_taskname'], dir=self.config.TaskWorker.scratchDir)

            # FIXME: In PanDA, we provided the executable as a URL.
            # So, the filename becomes http:// -- and doesn't really work.  Hardcoding the analysis wrapper.
            #transform_location = getLocation(kw['task']['tm_transformation'], 'CAFUtilities/src/python/transformation/CMSRunAnalysis/')
            transform_location = getLocation('CMSRunAnalysis.sh', 'CRABServer/scripts/')
            cmscp_location = getLocation('cmscp.py', 'CRABServer/scripts/')
            gwms_location = getLocation('gWMS-CMSRunAnalysis.sh', 'CRABServer/scripts/')
            dag_bootstrap_location = getLocation('dag_bootstrap_startup.sh', 'CRABServer/scripts/')
            bootstrap_location = getLocation("dag_bootstrap.sh", "CRABServer/scripts/")
            adjust_location = getLocation("AdjustSites.py", "CRABServer/scripts/")

            cwd = os.getcwd()
            os.chdir(temp_dir)
            shutil.copy(transform_location, '.')
            shutil.copy(cmscp_location, '.')
            shutil.copy(gwms_location, '.')
            shutil.copy(dag_bootstrap_location, '.')
            shutil.copy(bootstrap_location, '.')
            shutil.copy(adjust_location, '.')

            # Bootstrap the ISB if we are using UFC
            if UserFileCache and kw['task']['tm_cache_url'].find('/crabcache')!=-1:
                ufc = UserFileCache(dict={'cert': kw['task']['user_proxy'], 'key': kw['task']['user_proxy'], 'endpoint' : kw['task']['tm_cache_url']})
                ufc.download(hashkey=kw['task']['tm_user_sandbox'].split(".")[0], output="sandbox.tar.gz")
                kw['task']['tm_user_sandbox'] = 'sandbox.tar.gz'

            # Bootstrap the runtime if it is available.
            job_runtime = getLocation('CMSRunAnalysis.tar.gz', 'CRABServer/')
            shutil.copy(job_runtime, '.')
            task_runtime = getLocation('TaskManagerRun.tar.gz', 'CRABServer/')
            shutil.copy(task_runtime, '.')

            kw['task']['scratch'] = temp_dir

        kw['task']['restinstance'] = self.server['host']
        kw['task']['resturl'] = self.resturl.replace("/workflowdb", "/filemetadata")
        self.task = kw['task']
        params = self.sendDashboardTask()

        try:
            info = self.createSubdag(*args, **kw)
        finally:
            if cwd:
                os.chdir(cwd)

        return TaskWorker.DataObjects.Result.Result(task=kw['task'], result=(temp_dir, info, params))
Exemplo n.º 11
0
 def upload(self):
     """
     Upload the tarball to the File Cache
     """
     self.close()
     archiveName = self.tarfile.name
     serverUrl = ""
     self.logger.debug(" uploading archive to cache %s " % archiveName)
     ufc = UserFileCache({'endpoint' : self.config.JobType.filecacheurl})
     result = ufc.upload(archiveName)
     if 'hashkey' not in result:
         self.logger.error("Failed to upload source files: %s" % str(result))
         raise CachefileNotFoundException
     return self.config.JobType.filecacheurl, str(result['hashkey']) + '.tar.gz', self.checksum
Exemplo n.º 12
0
    def testUploadDownload(self):
        if "UFCURL" in os.environ:
            currdir = getTestBase()
            upfile = path.join(currdir, "WMCore_t/Services_t/UserFileCache_t/test_file.tgz")  # file to upload
            upfileLog = path.join(currdir, "WMCore_t/Services_t/UserFileCache_t/uplog.txt")  # file to upload
            ufc = UserFileCache({"endpoint": os.environ["UFCURL"], "pycurl": True})

            # hashkey upload/download
            res = ufc.upload(upfile)
            ufc.download(res["hashkey"], output="pippo_publish_down.tgz")

            # hashkey deletion
            ufc.removeFile(res["hashkey"])

            # log upload/download
            res = ufc.uploadLog(upfileLog)
            ufc.downloadLog(upfileLog, upfileLog + ".downloaded")
            self.assertTrue(filecmp.cmp(upfileLog, upfileLog + ".downloaded"))
Exemplo n.º 13
0
    def testUploadDownload(self):
        if 'UFCURL' in os.environ:
            currdir = getTestBase()
            upfile = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/test_file.tgz') #file to upload
            upfileLog = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/uplog.txt') #file to upload
            ufc = UserFileCache({'endpoint':os.environ['UFCURL'], 'pycurl': True})

            #hashkey upload/download
            res = ufc.upload(upfile)
            ufc.download(res['hashkey'], output='pippo_publish_down.tgz')

            #hashkey deletion
            ufc.removeFile(res['hashkey'])

            #log upload/download
            res = ufc.uploadLog(upfileLog)
            ufc.downloadLog(upfileLog, upfileLog+'.downloaded')
            self.assertTrue(filecmp.cmp(upfileLog, upfileLog+'.downloaded'))
Exemplo n.º 14
0
    def testChecksum(self):
        """
        Tests checksum method
        """
        self.ufc = UserFileCache()
        checksum1 = self.ufc.checksum(fileName=path.join(getTestBase(), 'WMCore_t/Services_t/UserFileCache_t/ewv_crab_EwvAnalysis_31_111229_140959_publish.tgz'))
        checksum2 = self.ufc.checksum(fileName=path.join(getTestBase(), 'WMCore_t/Services_t/UserFileCache_t/ewv_crab_EwvAnalysis_31_resubmit_111229_144319_publish.tgz'))
        self.assertTrue(checksum1)
        self.assertTrue(checksum2)
        self.assertFalse(checksum1 == checksum2)

        self.assertRaises(IOError, self.ufc.checksum, **{'fileName': 'does_not_exist'})
        return
Exemplo n.º 15
0
class UserFileCacheTest(unittest.TestCase):
    """
    Unit tests for UserFileCache Service
    """

    def setUp(self):
        """
        Setup for unit tests
        """
        self.ufc = UserFileCache()

    def testChecksum(self):
        """
        Tests checksum method
        """
        checksum1 = self.ufc.checksum(fileName=path.join(getTestBase(), '../data/ewv_crab_EwvAnalysis_31_111229_140959_publish.tgz'))
        checksum2 = self.ufc.checksum(fileName=path.join(getTestBase(), '../data/ewv_crab_EwvAnalysis_31_resubmit_111229_144319_publish.tgz'))
        self.assertTrue(checksum1)
        self.assertTrue(checksum2)
        self.assertFalse(checksum1 == checksum2)

        self.assertRaises(IOError, self.ufc.checksum, **{'fileName': 'does_not_exist'})
        return
Exemplo n.º 16
0
    def testUploadDownload(self):
        if "UFCURL" in os.environ:
            currdir = getTestBase()
            upfile = path.join(currdir, "WMCore_t/Services_t/UserFileCache_t/test_file.tgz")  # file to upload
            ufc = UserFileCache({"endpoint": os.environ["UFCURL"]})

            # named upload/download
            res = ufc.upload(upfile, "name_publish.tgz")
            ufc.download(name=res["name"], output="name_publish.tgz")

            # hashkey upload/download
            res = ufc.upload(upfile)
            ufc.download(res["hashkey"], output="pippo_publish_down.tgz")
Exemplo n.º 17
0
    def testUploadDownload(self):
        if 'UFCURL' in os.environ:
            currdir = getTestBase()
            upfile = path.join(currdir, 'WMCore_t/Services_t/UserFileCache_t/test_file.tgz') #file to upload
            ufc = UserFileCache({'endpoint':os.environ['UFCURL']})

            #named upload/download
            res = ufc.upload(upfile, 'name_publish.tgz')
            ufc.download(name=res['name'], output='name_publish.tgz')

            #hashkey upload/download
            res = ufc.upload(upfile)
            ufc.download(res['hashkey'], output='pippo_publish_down.tgz')
Exemplo n.º 18
0
class UserFileCacheTest(unittest.TestCase):
    """
    Unit tests for UserFileCache Service
    """

    def testChecksum(self):
        """
        Tests checksum method
        """
        self.ufc = UserFileCache()
        checksum1 = self.ufc.checksum(
            fileName=path.join(getTestBase(), "../data/ewv_crab_EwvAnalysis_31_111229_140959_publish.tgz")
        )
        checksum2 = self.ufc.checksum(
            fileName=path.join(getTestBase(), "../data/ewv_crab_EwvAnalysis_31_resubmit_111229_144319_publish.tgz")
        )
        self.assertTrue(checksum1)
        self.assertTrue(checksum2)
        self.assertFalse(checksum1 == checksum2)

        self.assertRaises(IOError, self.ufc.checksum, **{"fileName": "does_not_exist"})
        return

    def testUploadDownload(self):
        if "UFCURL" in os.environ:
            currdir = getTestBase()
            upfile = path.join(currdir, "WMCore_t/Services_t/UserFileCache_t/test_file.tgz")  # file to upload
            ufc = UserFileCache({"endpoint": os.environ["UFCURL"]})

            # named upload/download
            res = ufc.upload(upfile, "name_publish.tgz")
            ufc.download(name=res["name"], output="name_publish.tgz")

            # hashkey upload/download
            res = ufc.upload(upfile)
            ufc.download(res["hashkey"], output="pippo_publish_down.tgz")
Exemplo n.º 19
0
    def testChecksum(self):
        """
        Tests checksum method
        """
        self.ufc = UserFileCache()
        checksum1 = self.ufc.checksum(
            fileName=path.join(getTestBase(), "../data/ewv_crab_EwvAnalysis_31_111229_140959_publish.tgz")
        )
        checksum2 = self.ufc.checksum(
            fileName=path.join(getTestBase(), "../data/ewv_crab_EwvAnalysis_31_resubmit_111229_144319_publish.tgz")
        )
        self.assertTrue(checksum1)
        self.assertTrue(checksum2)
        self.assertFalse(checksum1 == checksum2)

        self.assertRaises(IOError, self.ufc.checksum, **{"fileName": "does_not_exist"})
        return
Exemplo n.º 20
0
def uploadlogfile(logger,
                  proxyfilename,
                  taskname=None,
                  logfilename=None,
                  logpath=None,
                  instance=None,
                  serverurl=None,
                  username=None):
    ## WMCore dependencies. Moved here to minimize dependencies in the bootstrap script
    from WMCore.Services.UserFileCache.UserFileCache import UserFileCache

    doupload = True

    if logfilename == None:
        logfilename = str(time.strftime("%Y-%m-%d_%H%M%S")) + '_crab.log'

    logger.info('Fetching user enviroment to log file')

    try:
        logger.debug('Running env command')
        stdout, _, _ = execute_command(command='env')
        logger.debug('\n\n\nUSER ENVIROMENT\n%s' % stdout)
    except Exception as se:
        logger.debug('Failed to get the user env\nException message: %s' %
                     (se))

    if logpath != None:
        if not os.path.exists(logpath):
            doupload = False
            logger.debug('%sError%s: %s does not exist' %
                         (colors.RED, colors.NORMAL, logpath))
    else:
        if os.path.exists(str(os.getcwd()) + '/crab.log'):
            logpath = str(os.getcwd()) + '/crab.log'
        else:
            logger.debug(
                '%sError%s: Failed to find crab.log in current directory %s' %
                (colors.RED, colors.NORMAL, str(os.getcwd())))

    if proxyfilename == None:
        logger.debug('No proxy was given')
        doupload = False

    if doupload:
        # uploadLog is executed directly from crab main script, does not inherit from SubCommand
        # so it needs its own REST server instantiation
        restClass = CRABClient.Emulator.getEmulator('rest')
        crabserver = restClass(hostname=serverurl,
                               localcert=proxyfilename,
                               localkey=proxyfilename,
                               retry=2,
                               logger=logger,
                               verbose=False,
                               version=__version__,
                               userAgent='CRABClient')
        crabserver.setDbInstance(instance)
        cacheurl = server_info(crabserver=crabserver,
                               subresource='backendurls')['cacheSSL']

        logger.info("Uploading log file...")
        if 'S3' in cacheurl.upper():
            objecttype = 'clientlog'
            uploadToS3(crabserver=crabserver,
                       filepath=logpath,
                       objecttype=objecttype,
                       taskname=taskname,
                       logger=logger)
            logfileurl = getDownloadUrlFromS3(crabserver=crabserver,
                                              objecttype=objecttype,
                                              taskname=taskname,
                                              logger=logger)
        else:
            cacheurldict = {'endpoint': cacheurl, "pycurl": True}
            ufc = UserFileCache(cacheurldict)
            logger.debug("cacheURL: %s\nLog file name: %s" %
                         (cacheurl, logfilename))
            ufc.uploadLog(logpath, logfilename)
            logfileurl = cacheurl + '/logfile?name=' + str(logfilename)
            if not username:
                from CRABClient.UserUtilities import getUsername
                username = getUsername(proxyFile=proxyfilename, logger=logger)
            logfileurl += '&username='******'Failed to upload the log file')
        logfileurl = False

    return logfileurl
Exemplo n.º 21
0
 def uploadSandboxToUFC( self, url, sandbox = None, name = "YAATFile" ):
     if not sandbox:
         sandbox = self.sandbox
     req = UserFileCache( { 'endpoint' : url} )
     return req.upload( sandbox, name )
Exemplo n.º 22
0
    def executeInternal(self, *args, **kw):
        inputFiles = args[0][2]
        splitterResult = args[0][3][0]

        cwd = os.getcwd()
        try:
            os.chdir(kw['tempDir'])
            splittingSummary = SplittingSummary(kw['task']['tm_split_algo'])
            for jobgroup in splitterResult:
                jobs = jobgroup.getJobs()
                splittingSummary.addJobs(jobs)
            splittingSummary.dump('splitting-summary.json')
            inputFiles.append('splitting-summary.json')

            self.packSandbox(inputFiles)

            self.logger.info(
                'Uploading dry run tarball to the user file cache')
            t0 = time.time()
            if 'S3' in kw['task']['tm_cache_url'].upper():
                uploadToS3(crabserver=self.crabserver,
                           filepath='dry-run-sandbox.tar.gz',
                           objecttype='runtimefiles',
                           taskname=kw['task']['tm_taskname'],
                           logger=self.logger)
                result = {
                    'hashkey': 'ok'
                }  # a dummy one to keep same semantics as when using UserFileCache
                os.remove('dry-run-sandbox.tar.gz')
            else:
                ufc = UserFileCache(
                    mydict={
                        'cert': kw['task']['user_proxy'],
                        'key': kw['task']['user_proxy'],
                        'endpoint': kw['task']['tm_cache_url']
                    })
                result = ufc.uploadLog('dry-run-sandbox.tar.gz')
                os.remove('dry-run-sandbox.tar.gz')
            if 'hashkey' not in result:
                raise TaskWorkerException(
                    'Failed to upload dry-run-sandbox.tar.gz to the user file cache: '
                    + str(result))
            self.logger.info(
                'Uploaded dry run tarball to the user file cache: %s',
                str(result))
            # wait until tarball is available, S3 may take a few seconds for this (ref. issue #6706 )
            t1 = time.time()
            lt1 = time.strftime("%H:%M:%S", time.localtime(t1))
            uploadTime = t1 - t0
            self.logger.debug(
                'runtimefiles upload took %s secs and completed at %s',
                uploadTime, lt1)
            self.logger.debug('check if tarball is available')
            tarballOK = False
            while not tarballOK:
                try:
                    self.logger.debug('download tarball to /dev/null')
                    downloadFromS3(crabserver=self.crabserver,
                                   filepath='/dev/null',
                                   objecttype='runtimefiles',
                                   taskname=kw['task']['tm_taskname'],
                                   logger=self.logger)
                    self.logger.debug('OK, it worked')
                    tarballOK = True
                except Exception as e:
                    self.logger.debug('runtimefiles tarball not ready yet')
                    self.logger.debug('Exception was raised: %s', e)
                    self.logger.debug('Sleep 5 sec')
                    time.sleep(5)
            update = {
                'workflow': kw['task']['tm_taskname'],
                'subresource': 'state',
                'status': 'UPLOADED'
            }
            self.logger.debug('Updating task status: %s', str(update))
            self.crabserver.post(api='workflowdb',
                                 data=urllib.urlencode(update))

        finally:
            os.chdir(cwd)

        return Result(task=kw['task'], result=args[0])
Exemplo n.º 23
0
        logger.debug('%sError%s: serverurl is None' %
                     (colors.RED, colors.NORMAL))
        doupload = False

    if proxyfilename == None:
        logger.debug('No proxy was given')
        doupload = False

    baseurl = getUrl(instance=instance, resource='info')
    if doupload:
        cacheurl = server_info('backendurls', serverurl, proxyfilename,
                               baseurl)
        cacheurl = cacheurl['cacheSSL']
        cacheurldict = {'endpoint': cacheurl}

        ufc = UserFileCache(cacheurldict)
        logger.debug("cacheURL: %s\nLog file name: %s" %
                     (cacheurl, logfilename))
        logger.info("Uploading log file...")
        ufc.uploadLog(logpath, logfilename)
        logger.info("%sSuccess%s: Log file uploaded successfully." %
                    (colors.GREEN, colors.NORMAL))
        logfileurl = cacheurl + '/logfile?name=' + str(logfilename)
        if not username:
            username = getUsernameFromSiteDB_wrapped(logger, quiet=True)
        if username:
            logfileurl += '&username='******'Failed to upload the log file')
Exemplo n.º 24
0
        instance = 'private'
    elif not instance in SERVICE_INSTANCES.keys() and serverurl == None:
        logger.debug('%sError%s: serverurl is None' % (colors.RED, colors.NORMAL))
        doupload = False

    if proxyfilename == None:
        logger.debug('No proxy was given')
        doupload = False

    baseurl = getUrl(instance = instance , resource = 'info')
    if doupload:
        cacheurl = server_info('backendurls', serverurl, proxyfilename, baseurl)
        cacheurl = cacheurl['cacheSSL']
        cacheurldict = {'endpoint': cacheurl}

        ufc = UserFileCache(cacheurldict)
        logger.debug("cacheURL: %s\nLog file name: %s" % (cacheurl, logfilename))
        logger.info("Uploading log file...")
        ufc.uploadLog(logpath, logfilename)
        logger.info("%sSuccess%s: Log file uploaded successfully." % (colors.GREEN, colors.NORMAL))
        logfileurl = cacheurl + '/logfile?name='+str(logfilename)
        if not username:
            username = getUsernameFromSiteDB_wrapped(logger, quiet = True)
        if username:
            logfileurl += '&username='******'Failed to upload the log file')
        logfileurl = False
Exemplo n.º 25
0
    def __call__(self):

        self.logger.info('Getting the tarball hash key')

        tarballdir=glob.glob(self.requestarea+'/inputs/*.tgz')
        if len(tarballdir) != 1 :
            self.logger.info('%sError%s: Could not find tarball or there is more than one tarball'% (colors.RED, colors.NORMAL))
            raise ConfigurationException
        tarballdir=tarballdir[0]

        #checking task status

        self.logger.info('Checking task status')
        server = HTTPRequests(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        dictresult, status, _ = server.get(self.uri, data = { 'workflow' : self.cachedinfo['RequestName'], 'verbose': 0 })

        dictresult = dictresult['result'][0] #take just the significant part

        if status != 200:
            msg = "Problem retrieving status:\ninput:%s\noutput:%s\nreason:%s" % (str(self.cachedinfo['RequestName']), str(dictresult), str(reason))
            raise RESTCommunicationException(msg)

        self.logger.info('Task status: %s' % dictresult['status'])
        accepstate = ['KILLED','FINISHED','FAILED','KILLFAILED', 'COMPLETED']
        if dictresult['status'] not in accepstate:
            msg = ('%sError%s: Only task with this status can be purge: {0}'.format(accepstate) % (colors.RED, colors.NORMAL))
            raise ConfigurationException(msg)

        #getting the cache url

        if not self.options.scheddonly:
            baseurl=self.getUrl(self.instance, resource='info')
            cacheurl=server_info('backendurls', self.serverurl, self.proxyfilename, baseurl)
            cacheurl=cacheurl['cacheSSL']
            cacheurldict={'endpoint' : cacheurl, 'pycurl': True}

            ufc = UserFileCache(cacheurldict)
            hashkey = ufc.checksum(tarballdir)
            self.logger.info('Tarball hashkey :%s' %hashkey)
            self.logger.info('Attempting to clean user file cache')
            ufcresult = ufc.removeFile(hashkey)
            if ufcresult == '' :
                self.logger.info('%sSuccess%s: Successfully remove file from cache' % (colors.GREEN, colors.NORMAL))
            else:
                self.logger.info('%sError%s: Failed to remove the file from cache' % (colors.RED, colors.NORMAL))

        if not self.options.cacheonly:
            self.logger.info('Getting the schedd address')
            baseurl=self.getUrl(self.instance, resource='info')
            try:
                sceddaddress = server_info('scheddaddress', self.serverurl, self.proxyfilename, baseurl, workflow = self.cachedinfo['RequestName'] )
            except HTTPException, he:
                self.logger.info('%sError%s: Failed to get the schedd address' % (colors.RED, colors.NORMAL))
                raise HTTPException,he
            self.logger.debug('%sSuccess%s: Successfully getting schedd address' % (colors.GREEN, colors.NORMAL))
            self.logger.debug('Schedd address: %s' % sceddaddress)
            self.logger.info('Attempting to clean user file schedd')

            gssishrm = 'gsissh -o ConnectTimeout=60 -o PasswordAuthentication=no ' + sceddaddress + ' rm -rf ' + self.cachedinfo['RequestName']
            self.logger.debug('gsissh command: %s' % gssishrm)

            delprocess=subprocess.Popen(gssishrm, stdout= subprocess.PIPE, stderr= subprocess.PIPE, shell=True)
            stdout, stderr = delprocess.communicate()
            exitcode = delprocess.returncode

            if exitcode == 0 :
                self.logger.info('%sSuccess%s: Successfully remove task from scehdd' % (colors.GREEN, colors.NORMAL))
            else :
                self.logger.info('%sError%s: Failed to remove task from schedd' % (colors.RED, colors.NORMAL))
                self.logger.debug('gsissh stdout: %s\ngsissh stderr: %s\ngsissh exitcode: %s' % (stdout,stderr,exitcode))
Exemplo n.º 26
0
    def __call__(self):

        self.logger.info('Getting the tarball hash key')

        tarballdir = glob.glob(self.requestarea + '/inputs/*.tgz')
        if len(tarballdir) != 1:
            self.logger.info(
                '%sError%s: Could not find tarball or there is more than one tarball'
                % (colors.RED, colors.NORMAL))
            raise ConfigurationException
        tarballdir = tarballdir[0]

        #checking task status

        self.logger.info('Checking task status')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl,
                               self.proxyfilename,
                               self.proxyfilename,
                               version=__version__)
        dictresult, status, _ = server.get(self.uri,
                                           data={
                                               'workflow':
                                               self.cachedinfo['RequestName'],
                                               'verbose':
                                               0
                                           })

        dictresult = dictresult['result'][0]  #take just the significant part

        if status != 200:
            msg = "Problem retrieving task status:\ninput: %s\noutput: %s\nreason: %s" % (
                str(self.cachedinfo['RequestName']), str(dictresult),
                str(reason))
            raise RESTCommunicationException(msg)

        self.logger.info('Task status: %s' % dictresult['status'])
        accepstate = [
            'KILLED', 'FINISHED', 'FAILED', 'KILLFAILED', 'COMPLETED'
        ]
        if dictresult['status'] not in accepstate:
            msg = ('%sError%s: Only tasks with these status can be purged: {0}'
                   .format(accepstate) % (colors.RED, colors.NORMAL))
            raise ConfigurationException(msg)

        #getting the cache url
        cacheresult = {}
        scheddresult = {}
        gsisshdict = {}
        if not self.options.scheddonly:
            baseurl = getUrl(self.instance, resource='info')
            cacheurl = server_info('backendurls', self.serverurl,
                                   self.proxyfilename, baseurl)
            cacheurl = cacheurl['cacheSSL']
            cacheurldict = {'endpoint': cacheurl, 'pycurl': True}

            ufc = UserFileCache(cacheurldict)
            hashkey = ufc.checksum(tarballdir)
            self.logger.info('Tarball hashkey: %s' % hashkey)
            self.logger.info(
                'Attempting to remove task file from crab server cache')

            try:
                ufcresult = ufc.removeFile(hashkey)
            except HTTPException, re:
                if re.headers.has_key(
                        'X-Error-Info'
                ) and 'Not such file' in re.headers['X-Error-Info']:
                    self.logger.info(
                        '%sError%s: Failed to find task file in crab server cache; the file might have been already purged'
                        % (colors.RED, colors.NORMAL))
                    raise HTTPException, re

            if ufcresult == '':
                self.logger.info(
                    '%sSuccess%s: Successfully removed task files from crab server cache'
                    % (colors.GREEN, colors.NORMAL))
                cacheresult = 'SUCCESS'
            else:
                self.logger.info(
                    '%sError%s: Failed to remove task files from crab server cache'
                    % (colors.RED, colors.NORMAL))
                cacheresult = 'FAILED'
Exemplo n.º 27
0
def uploadPublishWorkflow(config, workflow, ufcEndpoint, workDir):
    """
    Write out and upload to the UFC a JSON file
    with all the info needed to publish this dataset later
    """
    retok, proxyfile = getProxy(config, workflow.dn, workflow.vogroup,
                                workflow.vorole)
    if not retok:
        logging.info("Cannot get the user's proxy")
        return False

    ufc = UserFileCache({
        'endpoint': ufcEndpoint,
        'cert': proxyfile,
        'key': proxyfile
    })

    # Skip tasks ending in LogCollect, they have nothing interesting.
    taskNameParts = workflow.task.split('/')
    if taskNameParts.pop() in ['LogCollect']:
        logging.info('Skipping LogCollect task')
        return False
    logging.info('Generating JSON for publication of %s of type %s' %
                 (workflow.name, workflow.wfType))

    myThread = threading.currentThread()

    dbsDaoFactory = DAOFactory(package="WMComponent.DBS3Buffer",
                               logger=myThread.logger,
                               dbinterface=myThread.dbi)
    findFiles = dbsDaoFactory(classname="LoadFilesByWorkflow")

    # Fetch and filter the files to the ones we actually need
    uploadDatasets = {}
    uploadFiles = findFiles.execute(workflowName=workflow.name)
    for file in uploadFiles:
        datasetName = file['datasetPath']
        if not uploadDatasets.has_key(datasetName):
            uploadDatasets[datasetName] = []
        uploadDatasets[datasetName].append(file)

    if not uploadDatasets:
        logging.info('No datasets found to upload.')
        return False

    # Write JSON file and then create tarball with it
    baseName = '%s_publish.tgz' % workflow.name
    jsonName = os.path.join(workDir, '%s_publish.json' % workflow.name)
    tgzName = os.path.join(workDir, baseName)
    with open(jsonName, 'w') as jsonFile:
        json.dump(uploadDatasets, fp=jsonFile, cls=FileEncoder, indent=2)

    # Only in 2.7 does tarfile become usable as context manager
    tgzFile = tarfile.open(name=tgzName, mode='w:gz')
    tgzFile.add(jsonName)
    tgzFile.close()

    result = ufc.upload(fileName=tgzName, name=baseName)
    logging.debug('Upload result %s' % result)
    # If this doesn't work, exception will propogate up and block archiving the task
    logging.info('Uploaded with name %s and hashkey %s' %
                 (result['name'], result['hashkey']))
    return
Exemplo n.º 28
0
    def __call__(self):

        self.logger.info('Getting the tarball hash key')
        inputlist = {'subresource': 'search', 'workflow': self.cachedinfo['RequestName']}
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        uri = self.getUrl(self.instance, resource = 'task')
        dictresult, _, _ =  server.get(uri, data = inputlist)

        tm_user_sandbox = getColumn(dictresult, 'tm_user_sandbox')
        hashkey = tm_user_sandbox.replace(".tar.gz","")

        # Get the schedd address from the DB info and strip off the 'crab3@' prefix if it exists
        scheddaddress = getColumn(dictresult, 'tm_schedd')
        scheddaddress = scheddaddress.split('@')[1] if '@' in scheddaddress else scheddaddress

        self.logger.info('Checking task status')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        dictresult, _, _ = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'verbose': 0})

        dictresult = dictresult['result'][0] #take just the significant part

        self.logger.info('Task status: %s' % dictresult['status'])
        accepstate = ['SUBMITFAILED','KILLED','FINISHED','FAILED','KILLFAILED', 'COMPLETED']
        if dictresult['status'] not in accepstate:
            msg = ('%sError%s: Only tasks with these status can be purged: {0}'.format(accepstate) % (colors.RED, colors.NORMAL))
            raise ConfigurationException(msg)

        #getting the cache url
        cacheresult = {}
        scheddresult = {}
        gsisshdict = {}
        if not self.options.scheddonly:
            baseurl = getUrl(self.instance, resource='info')
            cacheurl = server_info('backendurls', self.serverurl, self.proxyfilename, baseurl)
            cacheurl = cacheurl['cacheSSL']
            cacheurldict = {'endpoint': cacheurl, 'pycurl': True}

            ufc = UserFileCache(cacheurldict)
            self.logger.info('Tarball hashkey: %s' %hashkey)
            self.logger.info('Attempting to remove task file from crab server cache')

            try:
                ufcresult = ufc.removeFile(hashkey)
            except HTTPException as re:
                if 'X-Error-Info' in re.headers and 'Not such file' in re.headers['X-Error-Info']:
                    self.logger.info('%sError%s: Failed to find task file in crab server cache; the file might have been already purged' % (colors.RED,colors.NORMAL))
                raise

            if ufcresult == '':
                self.logger.info('%sSuccess%s: Successfully removed task files from crab server cache' % (colors.GREEN, colors.NORMAL))
                cacheresult = 'SUCCESS'
            else:
                self.logger.info('%sError%s: Failed to remove task files from crab server cache' % (colors.RED, colors.NORMAL))
                cacheresult = 'FAILED'

        if not self.options.cacheonly:
            self.logger.debug('%sSuccess%s: Successfully got schedd address' % (colors.GREEN, colors.NORMAL))
            self.logger.debug('Schedd address: %s' % scheddaddress)
            self.logger.info('Attempting to remove task from schedd')

            gssishrm = 'gsissh -o ConnectTimeout=60 -o PasswordAuthentication=no ' + scheddaddress + ' rm -rf ' + self.cachedinfo['RequestName']
            self.logger.debug('gsissh command: %s' % gssishrm)

            delprocess=subprocess.Popen(gssishrm, stdout= subprocess.PIPE, stderr= subprocess.PIPE, shell=True)
            stdout, stderr = delprocess.communicate()
            exitcode = delprocess.returncode

            if exitcode == 0 :
                self.logger.info('%sSuccess%s: Successfully removed task from schedd' % (colors.GREEN, colors.NORMAL))
                scheddresult = 'SUCCESS'
                gsisshdict = {}
            else :
                self.logger.info('%sError%s: Failed to remove task from schedd' % (colors.RED, colors.NORMAL))
                scheddaddress = 'FAILED'
                self.logger.debug('gsissh stdout: %s\ngsissh stderr: %s\ngsissh exitcode: %s' % (stdout,stderr,exitcode))
                gsisshdict = {'stdout' : stdout, 'stderr' : stderr , 'exitcode' : exitcode}

            return {'cacheresult' : cacheresult , 'scheddresult' : scheddresult , 'gsiresult' : gsisshdict}
Exemplo n.º 29
0
def uploadlogfile(logger,
                  proxyfilename,
                  logfilename=None,
                  logpath=None,
                  instance='prod',
                  serverurl=None,
                  username=None):
    ## WMCore dependencies. Moved here to minimize dependencies in the bootstrap script
    from WMCore.Services.UserFileCache.UserFileCache import UserFileCache

    doupload = True

    if logfilename == None:
        logfilename = str(time.strftime("%Y-%m-%d_%H%M%S")) + '_crab.log'

    logger.info('Fetching user enviroment to log file')

    try:
        cmd = 'env'
        logger.debug('Running env command')
        pipe = subprocess.Popen(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                shell=True)
        stdout, dummyStderr = pipe.communicate()
        logger.debug('\n\n\nUSER ENVIROMENT\n%s' % stdout)
    except Exception as se:
        logger.debug('Failed to get the user env\nException message: %s' %
                     (se))

    if logpath != None:
        if not os.path.exists(logpath):
            doupload = False
            logger.debug('%sError%s: %s does not exist' %
                         (colors.RED, colors.NORMAL, logpath))
    else:
        if os.path.exists(str(os.getcwd()) + '/crab.log'):
            logpath = str(os.getcwd()) + '/crab.log'
        else:
            logger.debug(
                '%sError%s: Failed to find crab.log in current directory %s' %
                (colors.RED, colors.NORMAL, str(os.getcwd())))

    if serverurl == None and instance in SERVICE_INSTANCES.keys():
        serverurl = SERVICE_INSTANCES[instance]
    elif not instance in SERVICE_INSTANCES.keys() and serverurl != None:
        instance = 'private'
    elif not instance in SERVICE_INSTANCES.keys() and serverurl == None:
        logger.debug('%sError%s: serverurl is None' %
                     (colors.RED, colors.NORMAL))
        doupload = False

    if proxyfilename == None:
        logger.debug('No proxy was given')
        doupload = False

    baseurl = getUrl(instance=instance, resource='info')
    if doupload:
        cacheurl = server_info('backendurls', serverurl, proxyfilename,
                               baseurl)
        # Encode in ascii because old pycurl present in old CMSSW versions
        # doesn't support unicode.
        cacheurl = cacheurl['cacheSSL'].encode('ascii')
        cacheurldict = {'endpoint': cacheurl, "pycurl": True}

        ufc = UserFileCache(cacheurldict)
        logger.debug("cacheURL: %s\nLog file name: %s" %
                     (cacheurl, logfilename))
        logger.info("Uploading log file...")
        ufc.uploadLog(logpath, logfilename)
        logger.info("%sSuccess%s: Log file uploaded successfully." %
                    (colors.GREEN, colors.NORMAL))
        logfileurl = cacheurl + '/logfile?name=' + str(logfilename)
        if not username:
            username = getUserDNandUsername(logger).get('username')
        if username:
            logfileurl += '&username='******'Failed to upload the log file')
        logfileurl = False

    return logfileurl
Exemplo n.º 30
0
    def __call__(self):

        self.logger.info('Getting the tarball hash key')
        inputlist = {'subresource': 'search', 'workflow': self.cachedinfo['RequestName']}
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        uri = getUrl(self.instance, resource = 'task')
        dictresult, _, _ =  server.get(uri, data = inputlist)

        tm_user_sandbox = getColumn(dictresult, 'tm_user_sandbox')
        hashkey = tm_user_sandbox.replace(".tar.gz","")

        # Get the schedd address from the DB info and strip off the 'crab3@' prefix if it exists
        scheddaddress = getColumn(dictresult, 'tm_schedd')
        scheddaddress = scheddaddress.split('@')[1] if '@' in scheddaddress else scheddaddress

        self.logger.info('Checking task status')
        serverFactory = CRABClient.Emulator.getEmulator('rest')
        server = serverFactory(self.serverurl, self.proxyfilename, self.proxyfilename, version=__version__)
        dictresult, _, _ = server.get(self.uri, data = {'workflow': self.cachedinfo['RequestName'], 'verbose': 0})

        dictresult = dictresult['result'][0] #take just the significant part

        self.logger.info('Task status: %s' % dictresult['status'])
        accepstate = ['SUBMITFAILED','KILLED','FINISHED','FAILED','KILLFAILED', 'COMPLETED']
        if dictresult['status'] not in accepstate:
            msg = ('%sError%s: Only tasks with these status can be purged: {0}'.format(accepstate) % (colors.RED, colors.NORMAL))
            raise ConfigurationException(msg)

        #getting the cache url
        cacheresult = {}
        scheddresult = {}
        gsisshdict = {}
        if not self.options.scheddonly:
            baseurl = getUrl(self.instance, resource='info')
            cacheurl = server_info('backendurls', self.serverurl, self.proxyfilename, baseurl)
            cacheurl = cacheurl['cacheSSL']
            cacheurldict = {'endpoint': cacheurl, 'pycurl': True}

            ufc = UserFileCache(cacheurldict)
            self.logger.info('Tarball hashkey: %s' %hashkey)
            self.logger.info('Attempting to remove task file from crab server cache')

            try:
                ufcresult = ufc.removeFile(hashkey)
            except HTTPException as re:
                if 'X-Error-Info' in re.headers and 'Not such file' in re.headers['X-Error-Info']:
                    self.logger.info('%sError%s: Failed to find task file in crab server cache; the file might have been already purged' % (colors.RED,colors.NORMAL))
                raise

            if ufcresult == '':
                self.logger.info('%sSuccess%s: Successfully removed task files from crab server cache' % (colors.GREEN, colors.NORMAL))
                cacheresult = 'SUCCESS'
            else:
                self.logger.info('%sError%s: Failed to remove task files from crab server cache' % (colors.RED, colors.NORMAL))
                cacheresult = 'FAILED'

        if not self.options.cacheonly:
            self.logger.debug('%sSuccess%s: Successfully got schedd address' % (colors.GREEN, colors.NORMAL))
            self.logger.debug('Schedd address: %s' % scheddaddress)
            self.logger.info('Attempting to remove task from schedd')

            gssishrm = 'gsissh -o ConnectTimeout=60 -o PasswordAuthentication=no ' + scheddaddress + ' rm -rf ' + self.cachedinfo['RequestName']
            self.logger.debug('gsissh command: %s' % gssishrm)

            delprocess=subprocess.Popen(gssishrm, stdout= subprocess.PIPE, stderr= subprocess.PIPE, shell=True)
            stdout, stderr = delprocess.communicate()
            exitcode = delprocess.returncode

            if exitcode == 0 :
                self.logger.info('%sSuccess%s: Successfully removed task from schedd' % (colors.GREEN, colors.NORMAL))
                scheddresult = 'SUCCESS'
                gsisshdict = {}
            else :
                self.logger.info('%sError%s: Failed to remove task from schedd' % (colors.RED, colors.NORMAL))
                scheddaddress = 'FAILED'
                self.logger.debug('gsissh stdout: %s\ngsissh stderr: %s\ngsissh exitcode: %s' % (stdout,stderr,exitcode))
                gsisshdict = {'stdout' : stdout, 'stderr' : stderr , 'exitcode' : exitcode}

            return {'cacheresult' : cacheresult , 'scheddresult' : scheddresult , 'gsiresult' : gsisshdict}
Exemplo n.º 31
0
    def _execute(self, resthost, resturi, config, task):
        mw = MasterWorker(config, logWarning=False, logDebug=False, sequential=True, console=False)

        tapeRecallStatus = 'TAPERECALL'
        self.logger.info("Retrieving %s tasks", tapeRecallStatus)
        recallingTasks = mw.getWork(limit=999999, getstatus=tapeRecallStatus, ignoreTWName=True)
        if len(recallingTasks) > 0:
            self.logger.info("Retrieved a total of %d %s tasks", len(recallingTasks), tapeRecallStatus)
            for recallingTask in recallingTasks:
                taskName = recallingTask['tm_taskname']
                self.logger.info("Working on task %s", taskName)

                reqId = recallingTask['tm_DDM_reqid']
                if not reqId:
                    self.logger.debug("tm_DDM_reqid' is not defined for task %s, skipping such task", taskName)
                    continue

                server = HTTPRequests(config.TaskWorker.resturl, config.TaskWorker.cmscert, config.TaskWorker.cmskey, retry=20, logger=self.logger)
                if (time.time() - getTimeFromTaskname(str(taskName)) > MAX_DAYS_FOR_TAPERECALL*24*60*60):
                    self.logger.info("Task %s is older than %d days, setting its status to FAILED", taskName, MAX_DAYS_FOR_TAPERECALL)
                    msg = "The disk replica request (ID: %d) for the input dataset did not complete in %d days." % (reqId, MAX_DAYS_FOR_TAPERECALL)
                    failTask(taskName, server, config.TaskWorker.restURInoAPI+'workflowdb', msg, self.logger, 'FAILED')
                    continue

                mpl = MyProxyLogon(config=config, server=server, resturi=config.TaskWorker.restURInoAPI, myproxylen=self.pollingTime)
                user_proxy = True
                try:
                    mpl.execute(task=recallingTask) # this adds 'user_proxy' to recallingTask
                except TaskWorkerException as twe:
                    user_proxy = False
                    self.logger.exception(twe)

                # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed
                if user_proxy:
                    from WMCore.Services.UserFileCache.UserFileCache import UserFileCache
                    ufc = UserFileCache({'cert': recallingTask['user_proxy'], 'key': recallingTask['user_proxy'], 'endpoint': recallingTask['tm_cache_url'], "pycurl": True})
                    sandbox = recallingTask['tm_user_sandbox'].replace(".tar.gz","")
                    debugFiles = recallingTask['tm_debug_files'].replace(".tar.gz","")
                    sandboxPath = os.path.join("/tmp", sandbox)
                    debugFilesPath = os.path.join("/tmp", debugFiles)
                    try:
                        ufc.download(sandbox, sandboxPath, recallingTask['tm_username'])
                        ufc.download(debugFiles, debugFilesPath, recallingTask['tm_username'])
                        self.logger.info("Successfully touched input and debug sandboxes (%s and %s) of task %s (frontend: %s) using the '%s' username (request_id = %d).",
                                         sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId)
                    except Exception as ex:
                        self.logger.info("The CRAB3 server backend could not download the input and/or debug sandbox (%s and/or %s) of task %s from the frontend (%s) using the '%s' username (request_id = %d)."+\
                                         " This could be a temporary glitch, will try again in next occurrence of the recurring action."+\
                                         " Error reason:\n%s", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId, str(ex))
                    finally:
                        if os.path.exists(sandboxPath): os.remove(sandboxPath)
                        if os.path.exists(debugFilesPath): os.remove(debugFilesPath)

                ddmRequest = statusRequest(reqId, config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, verbose=False)
                # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]}                
                self.logger.info("Contacted %s using %s and %s for request_id = %d, got:\n%s", config.TaskWorker.DDMServer, config.TaskWorker.cmscert, config.TaskWorker.cmskey, reqId, ddmRequest)

                if ddmRequest["message"] == "Request found":
                    status = ddmRequest["data"][0]["status"]
                    if status == "completed": # possible values: new, activated, updated, completed, rejected, cancelled
                        self.logger.info("Request %d is completed, setting status of task %s to NEW", reqId, taskName)
                        mw.updateWork(taskName, recallingTask['tm_task_command'], 'NEW')
                        # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now)
                        if user_proxy: mpl.deleteWarnings(recallingTask['user_proxy'], taskName)
                    elif status == "rejected":
                        msg = "The DDM request (ID: %d) has been rejected with this reason: %s" % (reqId, ddmRequest["data"][0]["reason"])
                        self.logger.info(msg + "\nSetting status of task %s to FAILED", taskName)
                        failTask(taskName, server, config.TaskWorker.restURInoAPI+'workflowdb', msg, self.logger, 'FAILED')

                else:
                    msg = "DDM request_id %d not found. Please report to experts" % reqId
                    self.logger.info(msg)
                    if user_proxy: mpl.uploadWarning(msg, recallingTask['user_proxy'], taskName)

        else:
            self.logger.info("No %s task retrieved.", tapeRecallStatus)
Exemplo n.º 32
0
    def actionWork(self, *args, **kwargs):
        """Performing the set of actions"""
        nextinput = args

        taskhandler = self.addTaskLogHandler()

        # I know it looks like a duplicated printout from the process logs (proc.N.log) perspective.
        # Infact we have a smilar printout in the processWorker function of the Worker module, but
        # it does not go to the task logfile and it is useful imho.
        self.logger.debug(
            "Process %s is starting %s on task %s" %
            (self.procnum, self.workFunction, self._task['tm_taskname']))

        for work in self.getWorks():
            #Loop that iterates over the actions to be performed
            self.logger.debug("Starting %s on %s" %
                              (str(work), self._task['tm_taskname']))
            t0 = time.time()
            try:
                output = work.execute(nextinput, task=self._task)
            except StopHandler as sh:
                msg = "Controlled stop of handler for %s on %s " % (self._task,
                                                                    str(sh))
                self.logger.error(msg)
                nextinput = Result(
                    task=self._task,
                    result='StopHandler exception received, controlled stop')
                break  #exit normally. Worker will not notice there was an error
            except TaskWorkerException as twe:
                self.logger.debug(str(traceback.format_exc())
                                  )  #print the stacktrace only in debug mode
                self.removeTaskLogHandler(taskhandler)
                raise WorkerHandlerException(
                    str(twe)
                )  #TaskWorker error, do not add traceback to the error propagated to the REST
            except Exception as exc:
                msg = "Problem handling %s because of %s failure, traceback follows\n" % (
                    self._task['tm_taskname'], str(exc))
                msg += str(traceback.format_exc())
                self.logger.error(msg)
                self.removeTaskLogHandler(taskhandler)
                raise WorkerHandlerException(
                    msg)  #Errors not foreseen. Print everything!
            finally:
                #upload logfile of the task to the crabcache
                logpath = 'logs/tasks/%s/%s.log' % (self._task['tm_username'],
                                                    self._task['tm_taskname'])
                if os.path.isfile(
                        logpath
                ) and 'user_proxy' in self._task:  #the user proxy might not be there if myproxy retrieval failed
                    cacheurldict = {
                        'endpoint': self._task['tm_cache_url'],
                        'cert': self._task['user_proxy'],
                        'key': self._task['user_proxy']
                    }
                    try:
                        ufc = UserFileCache(cacheurldict)
                        logfilename = self._task[
                            'tm_taskname'] + '_TaskWorker.log'
                        ufc.uploadLog(logpath, logfilename)
                    except HTTPException as hte:
                        msg = (
                            "Failed to upload the logfile to %s for task %s. More details in the http headers and body:\n%s\n%s"
                            % (self._task['tm_cache_url'],
                               self._task['tm_taskname'], hte.headers,
                               hte.result))
                        self.logger.error(msg)
                    except Exception:
                        msg = "Unknown error while uploading the logfile for task %s" % self._task[
                            'tm_taskname']
                        self.logger.exception(msg)
            t1 = time.time()
            self.logger.info("Finished %s on %s in %d seconds" %
                             (str(work), self._task['tm_taskname'], t1 - t0))
            try:
                nextinput = output.result
            except AttributeError:
                nextinput = output

        self.removeTaskLogHandler(taskhandler)

        return nextinput
Exemplo n.º 33
0
 def setUp(self):
     """
     Setup for unit tests
     """
     self.ufc = UserFileCache()
Exemplo n.º 34
0
    def executeAction(self, nextinput, work):
        """ Execute an action and deal with the error handling and upload of the tasklogfile to the crabcache
        """
        try:
            output = work.execute(nextinput,
                                  task=self._task,
                                  tempDir=self.tempDir)
        except TapeDatasetException as tde:
            raise TapeDatasetException(str(tde))
        except TaskWorkerException as twe:
            self.logger.debug(str(traceback.format_exc())
                              )  #print the stacktrace only in debug mode
            raise WorkerHandlerException(
                str(twe), retry=twe.retry
            )  #TaskWorker error, do not add traceback to the error propagated to the REST
        except Exception as exc:
            msg = "Problem handling %s because of %s failure, traceback follows\n" % (
                self.taskname, str(exc))
            msg += str(traceback.format_exc())
            self.logger.error(msg)
            raise WorkerHandlerException(
                msg)  #Errors not foreseen. Print everything!
        finally:
            #TODO: we need to do that also in Worker.py otherwise some messages might only be in the TW file but not in the crabcache.
            logpath = self.config.TaskWorker.logsDir + '/tasks/%s/%s.log' % (
                self._task['tm_username'], self.taskname)
            if os.path.isfile(
                    logpath
            ) and 'user_proxy' in self._task:  #the user proxy might not be there if myproxy retrieval failed
                cacheurldict = {
                    'endpoint': self._task['tm_cache_url'],
                    'cert': self._task['user_proxy'],
                    'key': self._task['user_proxy']
                }
                if 'S3' in self._task['tm_cache_url'].upper():
                    # use S3
                    try:
                        uploadToS3(crabserver=self.crabserver,
                                   objecttype='twlog',
                                   filepath=logpath,
                                   taskname=self.taskname,
                                   logger=self.logger)
                    except Exception as e:
                        msg = 'Failed to upload logfile to S3 for task %s. ' % self.taskname
                        msg += 'Details:\n%s' % str(e)
                        self.logger.error(msg)
                else:
                    # use old crabcache
                    try:
                        ufc = UserFileCache(cacheurldict)
                        logfilename = self.taskname + '_TaskWorker.log'
                        ufc.uploadLog(logpath, logfilename)
                    except HTTPException as hte:
                        msg = "Failed to upload the logfile to %s for task %s. More details in the http headers and body:\n%s\n%s" % (
                            self._task['tm_cache_url'], self.taskname,
                            hte.headers, hte.result)
                        self.logger.error(msg)
                    except Exception:  #pylint: disable=broad-except
                        msg = "Unknown error while uploading the logfile for task %s" % self.taskname
                        self.logger.exception(
                            msg)  #upload logfile of the task to the crabcache

        return output
Exemplo n.º 35
0
    def _execute(self, resthost, resturi, config, task):

        # setup logger
        if not self.logger:
            self.logger = logging.getLogger(__name__)
            handler = logging.StreamHandler(sys.stdout)
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(module)s %(message)s")
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)
            self.logger.setLevel(logging.DEBUG)
        else:
            # do not use BaseRecurringAction logger but create a new logger
            # which writes to config.TaskWorker.logsDir/taks/recurring/TapeRecallStatus_YYMMDD-HHMM.log
            self.logger = logging.getLogger('TapeRecallStatus')
            logDir = config.TaskWorker.logsDir + '/tasks/recurring/'
            if not os.path.exists(logDir):
                os.makedirs(logDir)
            timeStamp = time.strftime('%y%m%d-%H%M', time.localtime())
            logFile = 'TapeRecallStatus_' + timeStamp + '.log'
            handler = logging.FileHandler(logDir + logFile)
            formatter = logging.Formatter(
                '%(asctime)s:%(levelname)s:%(module)s:%(message)s')
            handler.setFormatter(formatter)
            self.logger.addHandler(handler)

        mw = MasterWorker(config,
                          logWarning=False,
                          logDebug=False,
                          sequential=True,
                          console=False,
                          name='masterForTapeRecall')

        tapeRecallStatus = 'TAPERECALL'
        self.logger.info("Retrieving %s tasks", tapeRecallStatus)
        recallingTasks = mw.getWork(limit=999999,
                                    getstatus=tapeRecallStatus,
                                    ignoreTWName=True)
        if len(recallingTasks) > 0:
            self.logger.info("Retrieved a total of %d %s tasks",
                             len(recallingTasks), tapeRecallStatus)
            for recallingTask in recallingTasks:
                taskName = recallingTask['tm_taskname']
                self.logger.info("Working on task %s", taskName)

                reqId = recallingTask['tm_DDM_reqid']
                if not reqId:
                    self.logger.debug(
                        "tm_DDM_reqid' is not defined for task %s, skipping such task",
                        taskName)
                    continue

                server = HTTPRequests(resthost,
                                      config.TaskWorker.cmscert,
                                      config.TaskWorker.cmskey,
                                      retry=20,
                                      logger=self.logger)
                if (time.time() - getTimeFromTaskname(str(taskName)) >
                        MAX_DAYS_FOR_TAPERECALL * 24 * 60 * 60):
                    self.logger.info(
                        "Task %s is older than %d days, setting its status to FAILED",
                        taskName, MAX_DAYS_FOR_TAPERECALL)
                    msg = "The disk replica request (ID: %d) for the input dataset did not complete in %d days." % (
                        reqId, MAX_DAYS_FOR_TAPERECALL)
                    failTask(taskName, server, resturi, msg, self.logger,
                             'FAILED')
                    continue

                mpl = MyProxyLogon(config=config,
                                   server=server,
                                   resturi=resturi,
                                   myproxylen=self.pollingTime)
                user_proxy = True
                try:
                    mpl.execute(task=recallingTask
                                )  # this adds 'user_proxy' to recallingTask
                except TaskWorkerException as twe:
                    user_proxy = False
                    self.logger.exception(twe)

                # Make sure the task sandbox in the crabcache is not deleted until the tape recall is completed
                if user_proxy:
                    from WMCore.Services.UserFileCache.UserFileCache import UserFileCache
                    ufc = UserFileCache({
                        'cert':
                        recallingTask['user_proxy'],
                        'key':
                        recallingTask['user_proxy'],
                        'endpoint':
                        recallingTask['tm_cache_url'],
                        "pycurl":
                        True
                    })
                    sandbox = recallingTask['tm_user_sandbox'].replace(
                        ".tar.gz", "")
                    debugFiles = recallingTask['tm_debug_files'].replace(
                        ".tar.gz", "")
                    sandboxPath = os.path.join("/tmp", sandbox)
                    debugFilesPath = os.path.join("/tmp", debugFiles)
                    try:
                        ufc.download(sandbox, sandboxPath,
                                     recallingTask['tm_username'])
                        ufc.download(debugFiles, debugFilesPath,
                                     recallingTask['tm_username'])
                        self.logger.info(
                            "Successfully touched input and debug sandboxes (%s and %s) of task %s (frontend: %s) using the '%s' username (request_id = %d).",
                            sandbox, debugFiles, taskName,
                            recallingTask['tm_cache_url'],
                            recallingTask['tm_username'], reqId)
                    except Exception as ex:
                        self.logger.info("The CRAB3 server backend could not download the input and/or debug sandbox (%s and/or %s) of task %s from the frontend (%s) using the '%s' username (request_id = %d)."+\
                                         " This could be a temporary glitch, will try again in next occurrence of the recurring action."+\
                                         " Error reason:\n%s", sandbox, debugFiles, taskName, recallingTask['tm_cache_url'], recallingTask['tm_username'], reqId, str(ex))
                    finally:
                        if os.path.exists(sandboxPath): os.remove(sandboxPath)
                        if os.path.exists(debugFilesPath):
                            os.remove(debugFilesPath)

                ddmRequest = statusRequest(reqId,
                                           config.TaskWorker.DDMServer,
                                           config.TaskWorker.cmscert,
                                           config.TaskWorker.cmskey,
                                           verbose=False)
                # The query above returns a JSON with a format {"result": "OK", "message": "Request found", "data": [{"request_id": 14, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:25:41", "last_request": "2018-02-26 23:25:41", "request_count": 1}]}
                self.logger.info(
                    "Contacted %s using %s and %s for request_id = %d, got:\n%s",
                    config.TaskWorker.DDMServer, config.TaskWorker.cmscert,
                    config.TaskWorker.cmskey, reqId, ddmRequest)

                if ddmRequest["message"] == "Request found":
                    status = ddmRequest["data"][0]["status"]
                    if status == "completed":  # possible values: new, activated, updated, completed, rejected, cancelled
                        self.logger.info(
                            "Request %d is completed, setting status of task %s to NEW",
                            reqId, taskName)
                        mw.updateWork(taskName,
                                      recallingTask['tm_task_command'], 'NEW')
                        # Delete all task warnings (the tapeRecallStatus added a dataset warning which is no longer valid now)
                        if user_proxy:
                            mpl.deleteWarnings(recallingTask['user_proxy'],
                                               taskName)
                    elif status == "rejected":
                        msg = "The DDM request (ID: %d) has been rejected with this reason: %s" % (
                            reqId, ddmRequest["data"][0]["reason"])
                        self.logger.info(
                            msg + "\nSetting status of task %s to FAILED",
                            taskName)
                        failTask(taskName, server, resturi, msg, self.logger,
                                 'FAILED')

                else:
                    msg = "DDM request_id %d not found. Please report to experts" % reqId
                    self.logger.info(msg)
                    if user_proxy:
                        mpl.uploadWarning(msg, recallingTask['user_proxy'],
                                          taskName)

        else:
            self.logger.info("No %s task retrieved.", tapeRecallStatus)
Exemplo n.º 36
0
def uploadlogfile(logger, proxyfilename, logfilename = None, logpath = None, instance = 'prod', serverurl = None, username = None):
    ## WMCore dependencies. Moved here to minimize dependencies in the bootstrap script
    from WMCore.Services.UserFileCache.UserFileCache import UserFileCache

    doupload = True

    if logfilename == None:
        logfilename = str(time.strftime("%Y-%m-%d_%H%M%S"))+'_crab.log'

    logger.info('Fetching user enviroment to log file')

    try:
        cmd = 'env'
        logger.debug('Running env command')
        pipe = subprocess.Popen(cmd, stdout = subprocess.PIPE, stderr = subprocess.PIPE, shell = True)
        stdout, stderr = pipe.communicate()
        logger.debug('\n\n\nUSER ENVIROMENT\n%s' % stdout)
    except Exception as se:
        logger.debug('Failed to get the user env\nException message: %s' % (se))

    if logpath != None:
        if not os.path.exists(logpath):
            doupload = False
            logger.debug('%sError%s: %s does not exist' %(colors.RED, colors.NORMAL, logpath))
    else:
        if os.path.exists(str(os.getcwd()) + '/crab.log'):
            logpath = str(os.getcwd())+'/crab.log'
        else:
            logger.debug('%sError%s: Failed to find crab.log in current directory %s' % (colors.RED, colors.NORMAL, str(os.getcwd())))

    if serverurl == None and instance in SERVICE_INSTANCES.keys():
        serverurl = SERVICE_INSTANCES[instance]
    elif not instance in SERVICE_INSTANCES.keys() and serverurl != None:
        instance = 'private'
    elif not instance in SERVICE_INSTANCES.keys() and serverurl == None:
        logger.debug('%sError%s: serverurl is None' % (colors.RED, colors.NORMAL))
        doupload = False

    if proxyfilename == None:
        logger.debug('No proxy was given')
        doupload = False

    baseurl = getUrl(instance = instance , resource = 'info')
    if doupload:
        cacheurl = server_info('backendurls', serverurl, proxyfilename, baseurl)
        cacheurl = cacheurl['cacheSSL']
        cacheurldict = {'endpoint': cacheurl}

        ufc = UserFileCache(cacheurldict)
        logger.debug("cacheURL: %s\nLog file name: %s" % (cacheurl, logfilename))
        logger.info("Uploading log file...")
        ufc.uploadLog(logpath, logfilename)
        logger.info("%sSuccess%s: Log file uploaded successfully." % (colors.GREEN, colors.NORMAL))
        logfileurl = cacheurl + '/logfile?name='+str(logfilename)
        if not username:
            username = getUsernameFromSiteDB_wrapped(logger, quiet = True)
        if username:
            logfileurl += '&username='******'Failed to upload the log file')
        logfileurl = False

    return  logfileurl