def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)

    analyses = []
    watchdogAnalyses = []
    if opts.time:
        analyses.append(TimeAnalysis())
    if opts.size:
        analyses.append(SizeAnalysis(opts.sizeFile))
    if opts.memory:
        watchdogAnalyses.append(MemoryAnalysis())
    if opts.host:
        analyses.append(HostAnalysis())
    if opts.watchdog:
        analyses.append(WatchdogAnalysis())

    if len(analyses)+len(watchdogAnalyses) == 0:
        return 1

    for task in taskDirs:
        files = glob.glob(os.path.join(task, "res", "CMSSW_*.stdout"))
        files = excludeInclude(files, opts.include, opts.exclude)
        wfiles = []
        if len(watchdogAnalyses) > 0:
            wfiles = glob.glob(os.path.join(task, "res", "Watchdog_*.log.gz"))
            wfiles = excludeInclude(wfiles, opts.include, opts.exclude)

        if opts.byStatus:
            try:
                jobs = multicrab.crabStatusToJobs(task, opts.printCrab)
            except Exception:
                if not opts.allowFails:
                    raise
                print "%s: crab -status failed" % task
                continue
            print "Task %s" % task
            # Ignore running jobs
            for status in multicrabStatus.order_run:
                if status in jobs:
                    del jobs[status]
            stats = jobs.keys()
            stats.sort()
            for status in stats:
                ids = ",".join(["%d"%j.id for j in jobs[status]])
                f = excludeInclude(files, ids)
                wf = excludeInclude(wfiles, ids)
                print " %s, %d jobs" % (status, len(files))
                analyseTask(f, wf, analyses, watchdogAnalyses, prefix=" ")
        else:
            print "Task %s, %d jobs" % (task, len(files))
            analyseTask(files, wfiles, analyses, watchdogAnalyses)

    return 0
Exemplo n.º 2
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)

    analyses = []
    watchdogAnalyses = []
    if opts.time:
        analyses.append(TimeAnalysis())
    if opts.size:
        analyses.append(SizeAnalysis(opts.sizeFile))
    if opts.memory:
        watchdogAnalyses.append(MemoryAnalysis())
    if opts.host:
        analyses.append(HostAnalysis())
    if opts.watchdog:
        analyses.append(WatchdogAnalysis())

    if len(analyses) + len(watchdogAnalyses) == 0:
        return 1

    for task in taskDirs:
        files = glob.glob(os.path.join(task, "res", "CMSSW_*.stdout"))
        files = excludeInclude(files, opts.include, opts.exclude)
        wfiles = []
        if len(watchdogAnalyses) > 0:
            wfiles = glob.glob(os.path.join(task, "res", "Watchdog_*.log.gz"))
            wfiles = excludeInclude(wfiles, opts.include, opts.exclude)

        if opts.byStatus:
            try:
                jobs = multicrab.crabStatusToJobs(task, opts.printCrab)
            except Exception:
                if not opts.allowFails:
                    raise
                print "%s: crab -status failed" % task
                continue
            print "Task %s" % task
            # Ignore running jobs
            for status in multicrabStatus.order_run:
                if status in jobs:
                    del jobs[status]
            stats = jobs.keys()
            stats.sort()
            for status in stats:
                ids = ",".join(["%d" % j.id for j in jobs[status]])
                f = excludeInclude(files, ids)
                wf = excludeInclude(wfiles, ids)
                print " %s, %d jobs" % (status, len(files))
                analyseTask(f, wf, analyses, watchdogAnalyses, prefix=" ")
        else:
            print "Task %s, %d jobs" % (task, len(files))
            analyseTask(files, wfiles, analyses, watchdogAnalyses)

    return 0
Exemplo n.º 3
0
def main(opts, args):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    allJobs = []
    for task in taskDirs:
        if not os.path.exists(task):
            print "%s: Task directory missing" % task
            continue

        jobs = multicrab.crabStatusToJobs(task)
        if not "Created" in jobs:
            print "%s: no 'Created' jobs to submit" % task
            continue
        allJobs.extend(filter(lambda j: isInRange(opts, j), jobs["Created"]))

    maxJobs = len(allJobs)
    if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs):
        maxJobs = opts.maxJobs

    njobsSubmitted = 0
    while njobsSubmitted < maxJobs:
        njobsToSubmit = min(opts.jobs, maxJobs - njobsSubmitted, len(allJobs))
        njobsSubmitted += njobsToSubmit
        jobsToSubmit = {}
        for n in xrange(0, njobsToSubmit):
            job = allJobs.pop(0)
            multicrab._addToDictList(jobsToSubmit, job.task, job.id)

        for task, jobs in jobsToSubmit.iteritems():
            pretty = multicrab.prettyJobnums(jobs)
            command = ["crab", "-c", task, "-submit", pretty] + args
            print "Submitting %d jobs from task %s" % (len(jobs), task)
            print "Command", " ".join(command)
            if not opts.test:
                ret = subprocess.call(command)
                if ret != 0:
                    message = "Command '%s' failed with exit code %d" % (
                        " ".join(command), ret)
                    if opts.allowFails:
                        print message
                    else:
                        raise Exception()
        if njobsSubmitted < maxJobs:
            print "Submitted, sleeping %f seconds" % opts.sleep
            time.sleep(opts.sleep)
        else:
            print "Submitted"

    return 0
Exemplo n.º 4
0
def main(opts, args):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    allJobs = []
    for task in taskDirs:
        if not os.path.exists(task):
            print "%s: Task directory missing" % task
            continue

        jobs = multicrab.crabStatusToJobs(task)
        if not "Created" in jobs:
            print "%s: no 'Created' jobs to submit" % task
            continue
        allJobs.extend(filter(lambda j: isInRange(opts, j), jobs["Created"]))

    maxJobs = len(allJobs)
    if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs):
        maxJobs = opts.maxJobs

    njobsSubmitted = 0
    while njobsSubmitted < maxJobs:
        njobsToSubmit = min(opts.jobs, maxJobs-njobsSubmitted, len(allJobs))
        njobsSubmitted += njobsToSubmit    
        jobsToSubmit = {}
        for n in xrange(0, njobsToSubmit):
            job = allJobs.pop(0)
            multicrab._addToDictList(jobsToSubmit, job.task, job.id)

        for task, jobs in jobsToSubmit.iteritems():
            pretty = multicrab.prettyJobnums(jobs)
            command = ["crab", "-c", task, "-submit", pretty] + args
            print "Submitting %d jobs from task %s" % (len(jobs), task)
            print "Command", " ".join(command)
            if not opts.test:
                ret = subprocess.call(command)
                if ret != 0:
                    message = "Command '%s' failed with exit code %d" % (" ".join(command), ret)
                    if opts.allowFails:
                        print message
                    else:
                        raise Exception()
        if njobsSubmitted < maxJobs:
            print "Submitted, sleeping %f seconds" % opts.sleep
            time.sleep(opts.sleep)
        else:
            print "Submitted"

    return 0
Exemplo n.º 5
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    for task in taskDirs:
        if not os.path.exists(task):
            print "%s: Task directory missing" % task
            continue

        jobsList = []
        if opts.getoutput is None:
            jobs = multicrab.crabStatusToJobs(task, printCrab=False)
            for key in jobs.keys():
                if "Done" in key:
                    jobsList.extend([j.id for j in jobs[key]])
        else:
            jobsList.extend(multicrab.prettyToJobList(opts.getoutput))
        if len(jobsList) == 0:
            print "%s: no jobs to retrieve" % task
            continue

        # Getoutput loop
        maxJobs = len(jobsList)
        if opts.jobs > 0:
            maxJobs = opts.jobs

        for i in xrange(0, int(math.ceil(float(len(jobsList))/maxJobs))):
            jobsToGet = jobsList[i*maxJobs:(i+1)*maxJobs]
            jobsStr = ",".join([str(j) for j in jobsToGet])
            command = ["crab", "-c", task, "-getoutput", jobsStr]
            print "Getting %d jobs from task %s" % (len(jobsToGet), task)
            print "Command", " ".join(command)
            ret = subprocess.call(command)
            if ret != 0:
                print "Command '%s' failed with exit code %s" % (" ".join(command), ret)
                if not opts.allowFails:
                    return 1

    return 0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    resubmitMode = (len(opts.resubmit) > 0)
    resubmitIdListMode = (opts.resubmit not in ["failed", "aborted"])

    if resubmitMode and resubmitIdListMode and len(taskDirs) != 1:
        print "Option '--resubmit job_id_list' can be used with only one task, trying to use with %d tasks" % len(taskDirs)
        return 1

    if resubmitMode and resubmitIdListMode:
        resubmitJobList = multicrab.prettyToJobList(opts.resubmit)

    # Obtain all jobs to be (re)submitted
    allJobs = []
    seBlackLists = {}
    for task in taskDirs:
        if not os.path.exists(task):
            print "%s: Task directory missing" % task
            continue

        cfgparser = ConfigParser.ConfigParser()
        cfgparser.read(os.path.join(task, "share", "crab.cfg"))
        if cfgparser.has_section("GRID"):
            availableOptions = cfgparser.options("GRID")
            blacklist = None
            for ao in availableOptions:
                if ao.lower() == "se_black_list":
                    blacklist = cfgparser.get("GRID", ao)
                    break
            seBlackLists[task] = blacklist

        jobs = multicrab.crabStatusToJobs(task, printCrab=False)
        if not resubmitMode: # normal submission
            if not "Created" in jobs:
                print "%s: no 'Created' jobs to submit" % task
                continue
            allJobs.extend(filter(lambda j: isInRange(opts, j), jobs["Created"]))
        elif not resubmitIdListMode: # resubmit all failed jobs
            status = "all"
            if opts.resubmit == "aborted":
                status = "aborted"
            for joblist in jobs.itervalues():
                for job in joblist:
                    if job.failed(status):
                        allJobs.append(job)
        else: # resubmit explicit list of jobs
            for joblist in jobs.itervalues():
                for job in joblist:
                    if job.id in resubmitJobList:
                        allJobs.append(job)
                        resubmitJobList.remove(job.id)

    # Set the number of maximum jobs to submit
    maxJobs = len(allJobs)
    if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs):
        maxJobs = opts.maxJobs

    submitCommand = "-submit"
    if len(opts.resubmit) > 0:
        submitCommand = "-resubmit"

    sites = []
    siteSubmitIndex = 0
    if len(opts.toSites) > 0:
        sites = opts.toSites.split(",")

    # Submission loop
    njobsSubmitted = 0
    while njobsSubmitted < maxJobs:
        # Construct list of jobs per task to submit
        njobsToSubmit = min(opts.jobs, maxJobs-njobsSubmitted, len(allJobs))
        njobsSubmitted += njobsToSubmit    
        jobsToSubmit = {}
        for n in xrange(0, njobsToSubmit):
            job = allJobs.pop(0)
            aux.addToDictList(jobsToSubmit, job.task, job.id)

        # If explicit list of sites to submit was given, get the site to submit this time
        crabOptions = []
        if len(sites) > 0:
            site = sites[siteSubmitIndex]
            siteSubmitIndex = (siteSubmitIndex+1) % len(sites)
            crabOptions.append("-GRID.se_black_list= -GRID.se_white_list="+site)

        # Actual submission
        for task, jobs in jobsToSubmit.iteritems():
            pretty = multicrab.prettyJobnums(jobs)
            if len(jobs) == 1:
                pretty += "," # CRAB thinks one number is number of jobs, the comma translates it to job ID
            command = ["crab", "-c", task, submitCommand, pretty]
            if opts.crabArgs != "":
                command.extend(opts.crabArgs.split(" "))
            if len(crabOptions) > 0:
                command.extend(crabOptions)
            if opts.addSeBlackList != "":
                lst = seBlackLists[task]
                if lst is None:
                    lst = opts.addSeBlackList
                else:
                    lst += ","+opts.addSeBlackList
                command.extend(["-GRID.se_black_list="+lst])

            print "Submitting %d jobs from task %s" % (len(jobs), task)
            print "Command", " ".join(command)
            if not opts.test:
                timesLeft = 1
                if opts.tryAgainTimes > 0:
                    timesLeft = opts.tryAgainTimes
                while timesLeft > 0:
                    ret = subprocess.call(command)
                    if ret == 0:
                        break
                    else:
                        timesLeft -= 1
                        message = "Command '%s' failed with exit code %d" % (" ".join(command), ret)
                        if opts.allowFails:
                            print message
                        if opts.tryAgainTimes > 0:
                            print message
                            if timesLeft > 0:
                                print "Trying again after %d seconds (%d trials left)" % (opts.tryAgainSeconds, timesLeft)
                                time.sleep(opts.tryAgainSeconds)
                            else:
                                print "No trials left, continuing with next job block"
                        else:
                            raise Exception()

        # Sleep between submissions
        if njobsSubmitted < maxJobs:
            print "Submitted, sleeping %f seconds" % opts.sleep
            time.sleep(opts.sleep)
        else:
            print "Submitted"

    return 0
Exemplo n.º 7
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    resubmitJobs = {}
    failedJobs = {}
    stats = {}
    allJobs = 0

    for task in taskDirs:
        if not os.path.exists(task):
            if opts.showMissing:
                print "%s: Task directory missing" % task
            continue
        
        jobs = multicrab.crabStatusToJobs(task)

        jobSummaries = {}
        njobs = 0
        for key, item in jobs.iteritems():
            hosts = {}
            for job in item:
                if job.host != None:
                    hosts[job.host] = 1
            l = len(item)
            jobSummaries[key] = JobSummary(item, hosts)
            njobs += l
            allJobs += l
            if key in stats:
                stats[key] += l
            else:
                stats[key] = l

        # First the succesfully done
        line = "%s (%d jobs):" % (task, njobs)
        for s in order_done:
            if s in jobSummaries:
                line = formatSummaries(opts, line, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the aborted-submitted to the end of the line
        line_end = ""
        for s in order_run:
            if s in jobSummaries:
                line_end = formatSummaries(opts, line_end, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the failed ones to the middle
        keys = jobSummaries.keys()
        keys.sort()
        for key in keys:
            line = formatSummaries(opts, line, key, jobSummaries[key])
        line += line_end
        if line[-1] == ",":
            line = line[0:-1]
        
        print line

        # Infer the jobs to be resubmitted
        failed = []
        for key, joblist in jobs.iteritems():
            for job in joblist:
                if job.failed(opts.resubmit):
                    failed.append( (job.id, job.jobExitCode) )
        if len(failed) > 0:
            failed.sort()
            pretty = multicrab.prettyJobnums([x[0] for x in failed])
            resubmitJobs[task] = pretty
            for jobId, jobCode in failed:
                multicrab._addToDictList(failedJobs, jobCode, "%s/res/CMSSW_%d.stdout" % (task, jobId))
    
    print "----------------------------------------"
    print "Summary for %d task(s), total %d job(s):" % (len(taskDirs), allJobs)
    for s in order_done:
        if s in stats:
            print status_format % (s+":", stats[s])
            del stats[s]
    b = []
    for s in order_run:
        if s in stats:
            b.append(status_format % (s+":", stats[s]))
            del stats[s]
    keys = stats.keys()
    keys.sort()
    for key in keys:
        print status_format % (key+":", stats[key])
    for line in b:
        print line


    print "----------------------------------------"
    if len(resubmitJobs) == 0:
        print "No failed/aborted jobs to resubmit"
    else:
        print "Following jobs failed/aborted, and can be resubmitted"
        print
        for task in taskDirs:
            if task in resubmitJobs:
                print "crab -c %s -resubmit %s" % (task, resubmitJobs[task])
        print

    if opts.failedLogs:
        print "----------------------------------------"
        print "Log files of failed jobs"
        keys = failedJobs.keys()
        keys.sort()
        for code in keys:
            print
            print "Job exit code %d:" % code
            print "\n".join(failedJobs[code])

    return 0
Exemplo n.º 8
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    resubmitMode = (len(opts.resubmit) > 0)
    resubmitIdListMode = (opts.resubmit not in ["failed", "aborted"])

    if resubmitMode and resubmitIdListMode and len(taskDirs) != 1:
        print "Option '--resubmit job_id_list' can be used with only one task, trying to use with %d tasks" % len(
            taskDirs)
        return 1

    if resubmitMode and resubmitIdListMode:
        resubmitJobList = multicrab.prettyToJobList(opts.resubmit)

    # Obtain all jobs to be (re)submitted
    allJobs = []
    seBlackLists = {}
    for task in taskDirs:
        if not os.path.exists(task):
            print "%s: Task directory missing" % task
            continue

        cfgparser = ConfigParser.ConfigParser()
        cfgparser.read(os.path.join(task, "share", "crab.cfg"))
        if cfgparser.has_section("GRID"):
            availableOptions = cfgparser.options("GRID")
            blacklist = None
            for ao in availableOptions:
                if ao.lower() == "se_black_list":
                    blacklist = cfgparser.get("GRID", ao)
                    break
            seBlackLists[task] = blacklist

        jobs = multicrab.crabStatusToJobs(task, printCrab=False)
        if not resubmitMode:  # normal submission
            if not "Created" in jobs:
                print "%s: no 'Created' jobs to submit" % task
                continue
            allJobs.extend(
                filter(lambda j: isInRange(opts, j), jobs["Created"]))
        elif not resubmitIdListMode:  # resubmit all failed jobs
            status = "all"
            if opts.resubmit == "aborted":
                status = "aborted"
            for joblist in jobs.itervalues():
                for job in joblist:
                    if job.failed(status):
                        allJobs.append(job)
        else:  # resubmit explicit list of jobs
            for joblist in jobs.itervalues():
                for job in joblist:
                    if job.id in resubmitJobList:
                        allJobs.append(job)
                        resubmitJobList.remove(job.id)

    # Set the number of maximum jobs to submit
    maxJobs = len(allJobs)
    if opts.maxJobs >= 0 and int(opts.maxJobs) < int(maxJobs):
        maxJobs = opts.maxJobs

    submitCommand = "-submit"
    if len(opts.resubmit) > 0:
        submitCommand = "-resubmit"

    sites = []
    siteSubmitIndex = 0
    if len(opts.toSites) > 0:
        sites = opts.toSites.split(",")

    # Submission loop
    njobsSubmitted = 0
    while njobsSubmitted < maxJobs:
        # Construct list of jobs per task to submit
        njobsToSubmit = min(opts.jobs, maxJobs - njobsSubmitted, len(allJobs))
        njobsSubmitted += njobsToSubmit
        jobsToSubmit = {}
        for n in xrange(0, njobsToSubmit):
            job = allJobs.pop(0)
            aux.addToDictList(jobsToSubmit, job.task, job.id)

        # If explicit list of sites to submit was given, get the site to submit this time
        crabOptions = []
        if len(sites) > 0:
            site = sites[siteSubmitIndex]
            siteSubmitIndex = (siteSubmitIndex + 1) % len(sites)
            crabOptions.append("-GRID.se_black_list= -GRID.se_white_list=" +
                               site)

        # Actual submission
        for task, jobs in jobsToSubmit.iteritems():
            pretty = multicrab.prettyJobnums(jobs)
            if len(jobs) == 1:
                pretty += ","  # CRAB thinks one number is number of jobs, the comma translates it to job ID
            command = ["crab", "-c", task, submitCommand, pretty]
            if opts.crabArgs != "":
                command.extend(opts.crabArgs.split(" "))
            if len(crabOptions) > 0:
                command.extend(crabOptions)
            if opts.addSeBlackList != "":
                lst = seBlackLists[task]
                if lst is None:
                    lst = opts.addSeBlackList
                else:
                    lst += "," + opts.addSeBlackList
                command.extend(["-GRID.se_black_list=" + lst])

            print "Submitting %d jobs from task %s" % (len(jobs), task)
            print "Command", " ".join(command)
            if not opts.test:
                timesLeft = 1
                if opts.tryAgainTimes > 0:
                    timesLeft = opts.tryAgainTimes
                while timesLeft > 0:
                    ret = subprocess.call(command)
                    if ret == 0:
                        break
                    else:
                        timesLeft -= 1
                        message = "Command '%s' failed with exit code %d" % (
                            " ".join(command), ret)
                        if opts.allowFails:
                            print message
                        if opts.tryAgainTimes > 0:
                            print message
                            if timesLeft > 0:
                                print "Trying again after %d seconds (%d trials left)" % (
                                    opts.tryAgainSeconds, timesLeft)
                                time.sleep(opts.tryAgainSeconds)
                            else:
                                print "No trials left, continuing with next job block"
                        else:
                            raise Exception()

        # Sleep between submissions
        if njobsSubmitted < maxJobs:
            print "Submitted, sleeping %f seconds" % opts.sleep
            time.sleep(opts.sleep)
        else:
            print "Submitted"

    return 0
Exemplo n.º 9
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    resubmitJobs = {}
    failedJobs = {}
    stats = {}
    allJobs = 0

    for task in taskDirs:
        if not os.path.exists(task):
            if opts.showMissing:
                print "%s: Task directory missing" % task
            continue

        jobs = multicrab.crabStatusToJobs(task)

        jobSummaries = {}
        njobs = 0
        for key, item in jobs.iteritems():
            hosts = {}
            for job in item:
                if job.host != None:
                    hosts[job.host] = 1
            l = len(item)
            jobSummaries[key] = JobSummary(item, hosts)
            njobs += l
            allJobs += l
            if key in stats:
                stats[key] += l
            else:
                stats[key] = l

        # First the succesfully done
        line = "%s (%d jobs):" % (task, njobs)
        for s in order_done:
            if s in jobSummaries:
                line = formatSummaries(opts, line, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the aborted-submitted to the end of the line
        line_end = ""
        for s in order_run:
            if s in jobSummaries:
                line_end = formatSummaries(opts, line_end, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the failed ones to the middle
        keys = jobSummaries.keys()
        keys.sort()
        for key in keys:
            line = formatSummaries(opts, line, key, jobSummaries[key])
        line += line_end
        if line[-1] == ",":
            line = line[0:-1]

        print line

        # Infer the jobs to be resubmitted
        failed = []
        for key, joblist in jobs.iteritems():
            for job in joblist:
                if job.failed(opts.resubmit):
                    failed.append((job.id, job.jobExitCode))
        if len(failed) > 0:
            failed.sort()
            pretty = multicrab.prettyJobnums([x[0] for x in failed])
            resubmitJobs[task] = pretty
            for jobId, jobCode in failed:
                multicrab._addToDictList(
                    failedJobs, jobCode,
                    "%s/res/CMSSW_%d.stdout" % (task, jobId))

    print "----------------------------------------"
    print "Summary for %d task(s), total %d job(s):" % (len(taskDirs), allJobs)
    for s in order_done:
        if s in stats:
            print status_format % (s + ":", stats[s])
            del stats[s]
    b = []
    for s in order_run:
        if s in stats:
            b.append(status_format % (s + ":", stats[s]))
            del stats[s]
    keys = stats.keys()
    keys.sort()
    for key in keys:
        print status_format % (key + ":", stats[key])
    for line in b:
        print line

    print "----------------------------------------"
    if len(resubmitJobs) == 0:
        print "No failed/aborted jobs to resubmit"
    else:
        print "Following jobs failed/aborted, and can be resubmitted"
        print
        for task in taskDirs:
            if task in resubmitJobs:
                print "crab -c %s -resubmit %s" % (task, resubmitJobs[task])
        print

    if opts.failedLogs:
        print "----------------------------------------"
        print "Log files of failed jobs"
        keys = failedJobs.keys()
        keys.sort()
        for code in keys:
            print
            print "Job exit code %d:" % code
            print "\n".join(failedJobs[code])

    return 0
Exemplo n.º 10
0
def main(opts):
    taskDirs = multicrab.getTaskDirectories(opts)
    multicrab.checkCrabInPath()

    resubmitJobs = {}
    failedJobs = {}
    stats = {}
    allJobs = 0

    if opts.byHost:
        global status_format
        status_format = status_format.replace("18s", "40s")

    if opts.save:
        out = open(opts.saveFile, "w")

    for task in taskDirs:
        if not os.path.exists(task):
            if opts.showMissing:
                print >> sys.stderr, "%s: Task directory missing" % task
            continue

        try:
            jobs = multicrab.crabStatusToJobs(task, opts.printCrab)
        except Exception:
            if not opts.allowFails:
                raise
            print "%s: crab -status failed" % task
            continue

        jobSummaries = {}
        njobs = 0
        for key, item in jobs.iteritems():
            hosts = {}
            for job in item:
                if job.host != None:
                    aux.addToDictList(hosts, job.host, job)
            if opts.byHost:
                for host, joblist in hosts.iteritems():
                    jobSummaries[key + " " + host] = JobSummary(
                        joblist, [host])
            else:
                jobSummaries[key] = JobSummary(item, hosts)
            l = len(item)
            njobs += l
            allJobs += l
            if key in stats:
                stats[key] += l
            else:
                stats[key] = l

        # First the succesfully done
        line = "%s (%d jobs):" % (task, njobs)
        for s in order_done:
            if s in jobSummaries:
                line = formatSummaries(opts, line, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the aborted-submitted to the end of the line
        line_end = ""
        for s in order_run:
            if s in jobSummaries:
                line_end = formatSummaries(opts, line_end, s, jobSummaries[s])
                del jobSummaries[s]

        # Then the failed ones to the middle
        keys = jobSummaries.keys()
        keys.sort()
        for key in keys:
            line = formatSummaries(opts, line, key, jobSummaries[key])
        line += line_end
        if line[-1] == ",":
            line = line[0:-1]

        if opts.save:
            out.write(line)
            out.write("\n")
        print line

        # Infer the jobs to be resubmitted
        failed = []
        for key, joblist in jobs.iteritems():
            for job in joblist:
                if job.failed(opts.resubmit):
                    failed.append((job.id, job.jobExitCode))
        if len(failed) > 0:
            failed.sort()
            pretty = multicrab.prettyJobnums([x[0] for x in failed])
            resubmitJobs[task] = pretty
            for jobId, jobCode in failed:
                aux.addToDictList(failedJobs, jobCode,
                                  "%s/res/CMSSW_%d.stdout" % (task, jobId))

    summary = StringIO.StringIO()

    summary.write("----------------------------------------\n")
    print "Summary for %d task(s), total %d job(s):" % (len(taskDirs), allJobs)
    for s in order_done:
        if s in stats:
            summary.write(status_format % (s + ":", stats[s]))
            summary.write("\n")
            del stats[s]
    b = []
    for s in order_run:
        if s in stats:
            b.append(status_format % (s + ":", stats[s]))
            del stats[s]
    keys = stats.keys()
    keys.sort()
    for key in keys:
        summary.write(status_format % (key + ":", stats[key]))
        summary.write("\n")
    for line in b:
        summary.write(line)
        summary.write("\n")

    summary.write("----------------------------------------\n")
    if len(resubmitJobs) == 0:
        summary.write("No failed/aborted jobs to resubmit\n")
    else:
        summary.write(
            "Following jobs failed/aborted, and can be resubmitted\n\n")
        for task in taskDirs:
            if task in resubmitJobs:
                summary.write("crab -c %s -resubmit %s\n" %
                              (task, resubmitJobs[task]))
        summary.write("\n")

    if opts.failedLogs:
        summary.write("----------------------------------------\n")
        summary.write("Log files of failed jobs\n")
        keys = failedJobs.keys()
        keys.sort()
        for code in keys:
            summary.write("\nJob exit code %d:\n" % code)
            summary.write("\n".join(failedJobs[code]))
            summary.write("\n")

    if opts.save:
        out.write(summary.getvalue())
        out.close()
    print summary.getvalue()

    return 0