Пример #1
0
def main():
    """
    Main routine: Cross reference a list of nodes to find common jobs
    """
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0]  \
                 + ' [Accounting files]')
    else:
        joblist = sys.argv[1:]

    swfracs = list()
    for date in joblist:
        datelist = list()
        datelist.append(date)
        jobs = jobstats.alljobs(datelist)
        corehrslt = 0.0
        corehrsgt = 0.0
        for job in jobs:
            if 'sw' in job.queue:
                if job.cores < 12:
                    corehrslt += job.cores*job.walltime
                else:
                    corehrsgt += job.cores*job.walltime
        if corehrslt > 0 and corehrsgt > 0:
            frac = corehrslt/(corehrslt + corehrsgt)
            print(date + " " + str(frac))
            swfracs.append(frac)

    print("===Mean===")
    print(str(np.mean(swfracs)) + "+/-" + str(np.std(swfracs)/len(swfracs)))
                
    return
Пример #2
0
def main():
    """
    Main routine: Print User stats
    """
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0] +  ' [Accounting files]')
    else:
        joblist = sys.argv[1:]


    jobs = jobstats.alljobs(joblist)
    nodes = {}
    for job in jobs:
        if 'debug' not in job.queue and \
               'lmgpu' not in job.queue and \
               'scalemp' not in job.queue and \
               'class' not in job.queue:
            for node in job.nodes:
                if node not in nodes:
                    nodes[node] = jobstats.nodeClass(job, node)
                else:
                    nodes[node].joblist.append(job)
    for node in nodes:
        if len(nodes[node].joblist) > 20 and \
           (nodes[node].avgEfficiency() < 0.1 or \
           nodes[node].fractionBad() > 0.9):
               nodes[node].printStats()
Пример #3
0
def main():
    """
    Main routine: Print stats about ppn usage
    """
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0] \
                 + ' [Accounting files]')
    else:
        joblist = sys.argv[1:]

    usernames = {}
    jobs = jobstats.alljobs(joblist)
    for job in jobs:
        if job.user in usernames:
            usernames[job.user].addJob(job)
        else:
            usernames[job.user] = jobstats.userClass(job)

    
    for user in sorted(usernames):
        if any(1 < job.ppn < 12 for job in usernames[user].joblist):
            print(user + ':')
            usernames[user].printTopProp('ppn')

    return
Пример #4
0
def main():
    """
    Main routine: Print User stats
    """
    if len(sys.argv) < 3:
        sys.exit('Usage: ' + sys.argv[0] + ' username ' \
                 + ' [Accounting files]')
    else:
        username = sys.argv[1]
        joblist = sys.argv[2:]

    user = None
    jobs = jobstats.alljobs(joblist)
    qtimes = list()
    for job in jobs:
        if username in job.user:
            qtimes.append(job.tiq)
            if user == None:
                user = jobstats.userClass(job)
            else:
                user.addJob(job)
    user.printAllJobs()
    print("mean qtime: (" + str(np.mean(qtimes)/3600.0) + " +/- " \
          + str(np.std(qtimes)/(np.sqrt(len(qtimes))*3600.0)) + ") hours")
    print("max qtime: " + str(max(qtimes)/3600.0) + " hours")
    print("number of jobs: " + str(len(user.joblist)))
    return
Пример #5
0
def main():
    """
    Main routine: Show information about jobs with negative exit codes
    """
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0] \
                 + ' [Accounting files]')
    else:
        joblist = sys.argv[1:]

    probjobs = None
    jobs = jobstats.alljobs(joblist)
    for job in jobs:
        if job.efficiency() > 10000.0:
            print(str(job.id) + " " + str(job.efficiency()))
        if job.exitcode < 0:
            if probjobs != None:
                probjobs.addJob(job)
            else:
                probjobs = jobstats.jobGroup(job)

    probjobs.printStats()
    print("Top Users:")
    probjobs.printTopProp('user')
    print("Top Nodes:")
    probjobs.printTopProp('node')
    print("Top Queues:")
    probjobs.printTopProp('queue')
    print("Top Exit Codes:")
    probjobs.printTopProp('exitcode')
    print("Super efficient Jobs:")
    probjobs.printSuperEff()
    jobs.printSuperEff()

    return
Пример #6
0
def main():
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0] +  ' [Accounting files]')
    else:
        joblist = sys.argv[1:]


    jobs = jobstats.alljobs(joblist)
    qts = list()
    wallts = list()
    corehrs = list()
    allqts = list()
    for job in jobs:
        if job.cores > 1 and job.tiq > 0 and not np.isnan(job.tiq):
            allqts.append(np.log10(job.tiq/3600.0))
            corehrs.append(np.log10(job.cores*job.walltimereq/3600.0))
        if job.cores == 1 and job.tiq > 0 and not np.isnan(job.tiq):
            qts.append(np.log10(job.tiq/3600.0))
            wallts.append(np.log10(job.walltimereq/3600.0))
    print('max qt:' + str(max(qts)))
    print('avg qt:' + str(np.mean(qts)) + '+/-' + str(np.std(qts)/np.sqrt(len(qts))))

    subx = list()
    suby = list()
    for i,j in zip(wallts,qts):
        if j > -1.0 and i > -1.0:
            subx.append(i)
            suby.append(j)
    z = np.polyfit(subx,suby,1)
    p = np.poly1d(z)
    
        

    plt.cla()
    plt.hexbin(wallts, qts, bins='log')
    plt.plot([min(qts), max(qts)], [min(qts), max(qts)], 'k--')
    plt.plot(subx, p(subx), 'k')
    plt.xlabel('log(Walltime (hours))')
    plt.ylabel('log(Queue Time (hours))')
    plt.savefig('QueueTimes.png')

    subx = list()
    suby = list()
    for i,j in zip(corehrs,allqts):
        if j > -1.0 and i > -1.0:
            subx.append(i)
            suby.append(j)
    z = np.polyfit(subx,suby,1)
    p = np.poly1d(z)


    plt.cla()
    plt.hexbin(corehrs, allqts, bins='log')
    plt.plot([min(allqts), max(allqts)], [min(allqts), max(allqts)], 'k--')
    plt.plot(subx, p(subx),'k')
    plt.xlabel('log(Core Hours)')
    plt.ylabel('log(Queue Time (hours))')
    plt.savefig('CHvsQT.png')
Пример #7
0
def main():
    if len(sys.argv) < 2:
        sys.exit("Usage: " + sys.argv[0] + " [Accounting files]")
    else:
        joblist = sys.argv[1:]

    reset_time = dt.datetime(2013, 05, 10, 19, 30)
    reset_time_unix = time.mktime(reset_time.timetuple())
    crash_times = list()
    crash_times.append(dt.datetime(2013, 05, 11, 0, 50))
    crash_times.append(dt.datetime(2013, 05, 11, 7, 00))
    crash_times.append(dt.datetime(2013, 05, 11, 7, 25))
    crash_times.append(dt.datetime(2013, 05, 11, 8, 18))
    crash_times.append(dt.datetime(2013, 05, 11, 8, 50))
    crash_times.append(dt.datetime(2013, 05, 11, 10, 55))
    crash_times.append(dt.datetime(2013, 05, 13, 2, 43))
    crash_times.append(dt.datetime(2013, 05, 13, 6, 46))
    crash_times.append(dt.datetime(2013, 05, 13, 7, 14))
    crash_times.append(dt.datetime(2013, 05, 13, 11, 55))
    crash_times_unix = list()
    for ct in crash_times:
        crash_times_unix.append(time.mktime(ct.timetuple()))

    jobs = jobstats.alljobs(joblist)
    ctimes = list()
    etimes = list()
    qtimes = list()
    starts = list()
    ends = list()
    for job in jobs:
        if job.ctime > reset_time_unix and job.start > 0:
            ctimes.append(job.ctime)
            etimes.append(job.etime)
            qtimes.append(job.qtime)
            starts.append(job.start)
            ends.append(job.end)

    plt.cla()
    f, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, sharex=True, sharey=False, figsize=(30, 15))
    ax1.hist(ctimes, bins=(max(ctimes) - min(ctimes)) / 900, color="k", label="Created")
    ax1.vlines(crash_times_unix, 0, ax1.get_ylim()[1], "b")
    ax1.legend(loc=2, borderaxespad=0.0)
    ax2.hist(etimes, bins=(max(etimes) - min(etimes)) / 900, color="k", label="Eligible")
    ax2.vlines(crash_times_unix, 0, ax2.get_ylim()[1], "b")
    ax2.legend(loc=2, borderaxespad=0.0)

    ax3.hist(qtimes, bins=(max(qtimes) - min(qtimes)) / 900, color="k", label="Queued")
    ax3.vlines(crash_times_unix, 0, ax3.get_ylim()[1], "b")
    ax3.legend(loc=2, borderaxespad=0.0)
    ax4.hist(starts, bins=(max(starts) - min(starts)) / 900, color="k", label="Started")
    ax4.vlines(crash_times_unix, 0, ax4.get_ylim()[1], "b")
    ax4.legend(loc=2, borderaxespad=0.0)
    ax5.hist(ends, bins=(max(ends) - min(ends)) / 900, color="k", label="Finished")
    ax5.vlines(crash_times_unix, 0, ax5.get_ylim()[1], "b")
    ax5.legend(loc=2, borderaxespad=0.0)
    plt.xlabel("Unix Epoch Time")
    f.subplots_adjust(hspace=0)
    plt.savefig("ctimes.png")
Пример #8
0
def main():
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0] +  ' [Accounting files]')
    else:
        joblist = sys.argv[1:]


    jobs = jobstats.alljobs(joblist)
    qts = [job.tiq for job in jobs]
    if len(qts) > 0:
        meanqt = np.mean(qts)
        if(meanqt) > 0.0:
            errqt = np.std(qts)/np.sqrt(len(qts))
            print(str(joblist).strip("[]'/tmp") + ' ' + str(meanqt) + ' ' + str(errqt))
Пример #9
0
def main():
    """
    Main routine: Print User stats
    """
    if len(sys.argv) < 3:
        sys.exit("Usage: " + sys.argv[0] + " username " + " [Accounting files]")
    else:
        queue = sys.argv[1]
        joblist = sys.argv[2:]

    jobs = jobstats.alljobs(joblist)
    qnum = 0

    print("searching queue " + queue)
    for job in jobs:
        if queue in job.queue:
            qnum += 1
    print("Queue " + queue + " used in " + str(qnum) + " of " + str(len(jobs)) + " jobs")
    return
Пример #10
0
def main():
    """
    Main routine: Scans through jobs and identifies jobs that were running at the specified time
    """
    #Parse command-line options
    usage = "usage: %prog [options] [time (HH:MM:DD or Epoch time)] [Accounting files]"
    parser = OptionParser(usage=usage)
    parser.add_option("-n", "--node", dest="node", action="append",\
                          type="string", help="Search only for specified node(s)")
    parser.add_option("-d", "--date", dest="date", action="store",\
                          type="string", help="Specify a date in YYYY-MM-DD format (default: today)")
    parser.add_option("-s", "--summary", dest="summarize", action="store_true",\
                          default=False, help="Print a summary of each job (default: jobID only)")

    (options, args) = parser.parse_args()
    
    if len(args) < 2:
        sys.exit(usage + '\n --help for list of options')
    else:
        time = gettime(args[0], options.date)
        joblist = args[1:]

    #Filters prevent storing undesired jobs in the jobs list
    filters = None
    if options.node != None:
        filters = options.node
    jobs = jobstats.alljobs(joblist, filters)


    for job in jobs:
        if job.wasRunningAt(time):
            #Skip jobs on nodes that weren't specified
            if options.node != None:
                if not any(n in job.nodes for n in options.node):
                    continue
            if options.summarize:
                job.printStats()
            else:
                print job.id


    return
Пример #11
0
def main():
    """
    Main routine: Cross reference a list of nodes to find common jobs
    """
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0]  \
                 + ' [Accounting files]')
    else:
        joblist = sys.argv[2:]

    jobs = jobstats.alljobs(joblist)
    nodelist = ['sw-2r13-n50', 'sw-2r13-n51', 'sw-2r14-n20', 'sw-2r15-n17', 'sw-2r15-n44', 'sw-2r15-n66']
    
    print('cross referencing nodes:')
    print(nodelist)
    for job in jobs:
        if all(node in job.nodes for node in nodelist):
            job.printStats()

    return
Пример #12
0
def main():
    """
    Main routine: Process total core days used by all jobs
    """
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0]  \
                 + ' [Accounting files]')
    else:
        joblist = sys.argv[1:]

    jobs = jobstats.alljobs(joblist)

    coredays = 0.0
    for job in jobs:
        coredays += job.cores*job.walltime
    coredays /= 24.0*3600.0
    print( '%.1f' %coredays)


    return
Пример #13
0
def main():
    """
    Main routine: Print User stats
    """
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0] +  ' [Accounting files]')
    else:
        joblist = sys.argv[1:]

    corehours = np.zeros(1000)
    jobs = jobstats.alljobs(joblist)
    for job in jobs:
        if job.cores < 1000:
            corehours[job.cores] += job.cores*job.walltime/3600.0
    
    plt.cla()
    plt.bar(range(1,1001), list(corehours))
    plt.xlabel('Cores')
    plt.ylabel('Core hours')
    plt.savefig('corehoursvprocs.png')
    return
Пример #14
0
def main():
    """
    Main routine: Print User stats
    """
    if len(sys.argv) < 3:
        sys.exit('Usage: ' + sys.argv[0] + ' username ' \
                 + ' [Accounting files]')
    else:
        username = sys.argv[1]
        joblist = sys.argv[2:]

    user = None
    jobs = jobstats.alljobs(joblist)
    for job in jobs:
        if username in job.user:
            if user == None:
                user = jobstats.user(job)
            else:
                user.addJob(job)
    user.printStats()
    return
Пример #15
0
def main():
    """
    Main routine: Print User stats
    """
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0]  \
                 + ' [Accounting files]')
    else:
        joblist = sys.argv[1:]

    exitCodes = {}
    jobs = jobstats.alljobs(joblist)
    for job in jobs:
        if  job.exitcode not in exitCodes:
            exitCodes[job.exitcode] = 1
        else:
            exitCodes[job.exitcode] += 1
            
    sortedecs = sorted(exitCodes.iteritems(), key=operator.itemgetter(1))

    for ec, quant in sortedecs:
        print ec, 100.0*quant/len(jobs)
    return
Пример #16
0
def main():
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0] +  ' [Accounting files]')
    else:
        joblist = sys.argv[1:]


    jobs = jobstats.alljobs(joblist)
    qts_ppn = list()
    qts_procs = list()
    corehrs_ppn = list()
    corehrs_procs = list()
    for job in jobs:
        if job.cores > 1 and job.tiq > 0 and not np.isnan(job.tiq):
            if job.ppn == 0:
                qts_procs.append(np.log10(job.tiq/3600.0))
                corehrs_procs.append(np.log10(job.cores*job.walltimereq/3600.0))
            else:
                qts_ppn.append(np.log10(job.tiq/3600.0))
                corehrs_ppn.append(np.log10(job.cores*job.walltimereq/3600.0))

    subx_procs = list()
    suby = list()
    for i,j in zip(corehrs_procs,qts_procs):
        if j > -1.0 and i > -1.0:
            subx_procs.append(i)
            suby.append(j)
    z_procs = np.polyfit(subx_procs,suby,1)
    p_procs = np.poly1d(z_procs)
    print(z_procs)
    
    subx_ppn = list()
    suby = list()
    for i,j in zip(corehrs_ppn, qts_ppn):
        if j > -1.0 and i > -1.0:
            subx_ppn.append(i)
            suby.append(j)
    z_ppn = np.polyfit(subx_ppn,suby,1)
    p_ppn = np.poly1d(z_ppn)
    print(z_ppn)
        

    plt.cla()
    plt.hexbin(corehrs_procs, qts_procs, bins='log')
    #plt.plot([min(corehrs_procs), max(corehrs_procs)], [min(qts_procs), max(qts_procs)], 'k--')
    plt.plot(subx_ppn, p_ppn(subx_ppn), 'y', linewidth=4)
    plt.plot(subx_procs, p_procs(subx_procs), 'w', linewidth=4)
    plt.xlabel('log(Core hours requested)')
    plt.ylabel('log(Queue Time (hours))')
    plt.savefig('procs.png')



    plt.cla()
    plt.hexbin(corehrs_ppn, qts_ppn, bins='log')
#    plt.plot([min(qts_ppn), max(qts_ppn)], [min(qts_ppn), max(qts_ppn)], 'k--')
    plt.plot(subx_procs, p_procs(subx_procs), 'w', linewidth=4)
    plt.plot(subx_ppn, p_ppn(subx_ppn),'y', linewidth=4)
    plt.xlabel('log(Core hours requested)')
    plt.ylabel('log(Queue Time (hours))')
    plt.savefig('ppn.png')
Пример #17
0
def main():
    if len(sys.argv) < 2:
        sys.exit('Usage: ' + sys.argv[0] +  ' [Accounting files]')
    else:
        joblist = sys.argv[1:]

    reset_time = dt.datetime(2013, 05, 10, 19, 30)
    reset_time_unix = time.mktime(reset_time.timetuple())
    crash_times = list()
    crash_times.append(dt.datetime(2013, 05, 11, 0, 50))
    crash_times.append(dt.datetime(2013, 05, 11, 7, 00))
    crash_times.append(dt.datetime(2013, 05, 11, 7, 25))
    crash_times.append(dt.datetime(2013, 05, 11, 8, 18))
    crash_times.append(dt.datetime(2013, 05, 11, 8, 50))
    crash_times.append(dt.datetime(2013, 05, 11, 10, 55))
    crash_times.append(dt.datetime(2013, 05, 13, 2, 43))
    crash_times.append(dt.datetime(2013, 05, 13, 6, 46))
    crash_times.append(dt.datetime(2013, 05, 13, 7, 14))
    crash_times.append(dt.datetime(2013, 05, 13, 11, 31))
    crash_times.append(dt.datetime(2013, 05, 13, 11, 41))
    crash_times.append(dt.datetime(2013, 05, 13, 11, 55))
    crash_times_unix = list()
    for ct in crash_times:
        crash_times_unix.append(time.mktime(ct.timetuple()))

    jobs = jobstats.alljobs(joblist)
    ctimes = list()
    etimes = list()
    qtimes = list()
    starts = list()
    ends = list()
    for job in jobs:
        if job.ctime > reset_time_unix and job.start > 0:
            print(job.start)
            if job.ctime < 0 or job.etime < 0 or job.qtime < 0 or job.start < 0 or job.end < 0:
                print(job.id)
            
            ctimes.append(job.ctime)
            etimes.append(job.etime)
            qtimes.append(job.qtime)
            starts.append(job.start)
            ends.append(job.end)

    plt.cla()
    f, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, sharex=True, sharey=False, figsize=(20, 10))
    ax1.hist(ctimes, bins=(max(ctimes) - min(ctimes))/900, color='k', label='Created')
    ax1.vlines(crash_times_unix, 0, ax1.get_ylim()[1], 'b')
    ax1.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.)
    ax2.hist(etimes, bins=(max(etimes) - min(etimes))/900, color='k', label='Eligible')
    ax2.vlines(crash_times_unix, 0, ax2.get_ylim()[1], 'b')
    ax2.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.)

    ax3.hist(qtimes, bins=(max(qtimes) - min(qtimes))/900, color='k', label='Queued')
    ax3.vlines(crash_times_unix, 0, ax3.get_ylim()[1], 'b')
    ax3.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.)
    ax4.hist(starts, bins=(max(starts) - min(starts))/900, color='k', label='Started')
    ax4.vlines(crash_times_unix, 0, ax4.get_ylim()[1], 'b')
    ax4.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.)
    ax5.hist(ends, bins=(max(ends) - min(ends))/900, color='k', label='Finished')
    ax5.vlines(crash_times_unix, 0, ax5.get_ylim()[1], 'b')
    ax5.legend(bbox_to_anchor=(1, 1), loc=2, borderaxespad=0.)
    plt.xlabel('Unix Epoch Time')
    f.subplots_adjust(hspace=0, right=0.8)
    plt.savefig('ctimes.temp.png')