def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() jobs = m.dict() check_partial=functools.partial(do_check,jobs=jobs) pool.map(check_partial,filelist) pool.close() pool.join() total=0. hasexec=0. for i in jobs.keys(): total += 1. if jobs[i]!='unknown': hasexec += 1. print str(total) + ' ' + str(hasexec) + ' ' + str(hasexec/total)
def main(): parser = argparse.ArgumentParser(description='Plot MemUsed-AnonPages for jobs') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) for file in filelist: try: ts=tspl.TSPLSum(file,['mem','mem'],['MemUsed','AnonPages']) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): continue else: print ts.j.id fig=plt.figure() ax=fig.gca() ax.hold=True for k in ts.j.hosts.keys(): m=ts.data[0][k][0]-ts.data[1][k][0] m-=ts.data[0][k][0][0] ax.plot(ts.t/3600.,m) ax.set_ylabel('MemUsed - AnonPages ' + ts.j.get_schema(ts.k1[0])[ts.k2[0]].unit) ax.set_xlabel('Time (hr)') plt.suptitle(ts.title) fname='graph_'+ts.j.id+'_'+ts.k1[0]+'_'+ts.k2[0]+'.png' fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() jobs = m.dict() check_partial = functools.partial(do_check, jobs=jobs) pool.map(check_partial, filelist) pool.close() pool.join() total = 0. hasexec = 0. for i in jobs.keys(): total += 1. if jobs[i] != 'unknown': hasexec += 1. print str(total) + ' ' + str(hasexec) + ' ' + str(hasexec / total)
def main(): parser = argparse.ArgumentParser(description='Find jobs with a single highly' ' idle host') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-t', metavar='threshold', help='Treshold idleness', nargs=1, default=[0.001]) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() thresh=1.-n.t[0] filelist=tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() idleness = m.dict() partial_isidle=functools.partial(do_isidle,thresh=thresh,idleness=idleness) if len(filelist) != 0: pool.map(partial_isidle,filelist) pool.close() pool.join() print '----------- Idle Jobs -----------' for i in idleness.keys(): if idleness[i]: print i.split('/')[-1]
def main(): parser = argparse.ArgumentParser(description="Dump CSV for a key pair for some jobs") parser.add_argument("-k1", help="Set first key", nargs="+", type=str, default=["amd64_sock"]) parser.add_argument("-k2", help="Set second key", nargs="+", type=str, default=["DRAM"]) parser.add_argument("-f", help="File, directory, or quoted" " glob pattern", nargs=1, type=str, default=["jobs"]) n = parser.parse_args() filelist = tspl_utils.getfilelist(n.f[0]) for file in filelist: try: ts = tspl.TSPLSum(file, n.k1, n.k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 0, 16): continue elif ts.numhosts < 2: print ts.j.id + ": 1 host" continue tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 for k in ts.j.hosts.keys(): rates = [numpy.divide(numpy.diff(ts.data[x][k][0]), numpy.diff(ts.t)) for x in range(len(ts.data))] for i in range(len(tmid)): v = [rates[x][i] for x in range(len(ts.data))] print ",".join([ts.j.id, k, str(tmid[i])] + [str(x) for x in v])
def main(): parser = argparse.ArgumentParser(description='Dump CSV for a key pair for some jobs') parser.add_argument('-k1', help='Set first key', nargs='+', type=str, default=['amd64_sock']) parser.add_argument('-k2', help='Set second key', nargs='+', type=str, default=['DRAM']) parser.add_argument('-f', help='File, directory, or quoted' ' glob pattern', nargs=1, type=str, default=['jobs']) n=parser.parse_args() filelist=tspl_utils.getfilelist(n.f[0]) for file in filelist: try: ts=tspl.TSPLSum(file,n.k1,n.k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,0,16): continue elif ts.numhosts < 2: print ts.j.id + ': 1 host' continue tmid=(ts.t[:-1]+ts.t[1:])/2.0 for k in ts.j.hosts.keys(): rates=[numpy.divide(numpy.diff(ts.data[x][k][0]),numpy.diff(ts.t)) for x in range(len(ts.data))] for i in range(len(tmid)): v=[rates[x][i] for x in range(len(ts.data))] print ','.join([ts.j.id,k,str(tmid[i])]+[str(x) for x in v])
def main(): parser = argparse.ArgumentParser(description='Guesstimate per core usage') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) procs = min(len(filelist),n.p[0]) pool = multiprocessing.Pool(processes=procs) mpc=pool.map(mem_usage,filelist) mpc=list(itertools.chain.from_iterable(mpc)) print mpc hist,bins=numpy.histogram(mpc,30) fig,ax=plt.subplots(1,1,figsize=(8,8),dpi=80) # plt.subplots_adjust(hspace=0.35) ax.bar(bins[:-1], hist, width = min(numpy.diff(bins))) ax.set_xlim(min(bins), max(bins)) fname='mempercore' fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) k1=['amd64_sock'] k2=['DRAM'] pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() res = m.dict() fit_partial=functools.partial(fit_step,k1=k1,k2=k2,genplot=False,res=res) if len(filelist) != 0: pool.map(fit_partial,filelist) pool.close() pool.join() for fn in res.keys(): for (ind,ratio) in res[fn]: if ratio < 1e-3: print fn + ': ' + str(res[fn]) masterplot.master_plot(fn,'lines',False,n.o[0],'step', 1,[x+1 for x in range(16)], header='Step Function Performance') break
def main(): parser = argparse.ArgumentParser(description='Dump CSV keys for Weijia.') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) if len(filelist) != 0: res = pool.map(do_compute, filelist) pool.close() pool.join() with open('dump.csv', 'w') as file: file.write('# Job Id, Username, Executable, Mean DRAM BW, ' + 'Mean Stall Fraction, Mean DRAM/L1, Mean Flops\n') for line in res: if line: file.write(line + '\n')
def main(): parser = argparse.ArgumentParser(description='Look for imbalance between' 'hosts for a pair of keys') parser.add_argument('threshold', help='Treshold ratio for std dev:mean', nargs='?', default=0.25) parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('-n', help='Disable plots', action='store_true') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) ratios = {} # Place to store per job ranking metric for file in filelist: compute_imbalance(file, [n.key1], [n.key2], float(n.threshold), not n.n, n.f, ratios) # Find the top bad users and their jobs find_top_users(ratios)
def main(): parser = argparse.ArgumentParser(description='Dump CSV for a key pair for some jobs') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) print sys.argv[3] for file in filelist: try: ts=tspl.TSPLSum(file,[n.key1],[n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): continue elif ts.numhosts < 2: print ts.j.id + ': 1 host' continue tmid=(ts.t[:-1]+ts.t[1:])/2.0 rate={} for k in ts.j.hosts.keys(): rate[k]=numpy.divide(numpy.diff(ts.data[0][k][0]),numpy.diff(ts.t)) for i in range(len(tmid)): print ','.join([ts.j.id,k,str(tmid[i]),str(rate[k][i])])
def main(): parser = argparse.ArgumentParser(description='Find jobs with low flops but' 'reasonable levels of other activity.') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-t', metavar='threshold', help='Treshold flopiness', nargs=1, type=float, default=[0.001]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() floppy = m.dict() thresh = n.t[0] outdir = n.o[0] partial_floppy = functools.partial(do_floppy, thresh=thresh, floppy=floppy) if len(filelist) != 0: pool.map(partial_floppy, filelist) pool.close() pool.join() badjobs = [] for i in floppy.keys(): if floppy[i]: badjobs.append(i) pool = multiprocessing.Pool(processes=n.p[0]) if len(badjobs) != 0: pool.map( do_mp, zip(badjobs, [thresh for x in range(len(badjobs))], [outdir for x in range(len(badjobs))])) pool.close() pool.join() print '----------- Low Flops -----------' for i in badjobs: print i.split('/')[-1]
def main(): parser = argparse.ArgumentParser() parser.add_argument("-f", help="Set full mode", action="store_true") parser.add_argument("key1", help="First key", nargs="?", default="amd64_core") parser.add_argument("key2", help="Second key", nargs="?", default="SSE_FLOPS") parser.add_argument("filearg", help="File, directory, or quoted" " glob pattern", nargs="?", default="jobs") n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) for file in filelist: try: if n.f: full = "_full" ts = tspl.TSPLBase(file, [n.key1], [n.key2]) else: full = "" ts = tspl.TSPLSum(file, [n.key1], [n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ": 1 host" continue print ts.j.id tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 s = [] for v in ts: s = v break fig, ax = plt.subplots(2, 1, figsize=(8, 6), dpi=80) ax[0].hold = True ax[1].hold = True xmin, xmax = [0.0, 0.0] xmin1, xmax1 = [0.0, 0.0] dt = numpy.diff(ts.t) for v in ts: rate = numpy.array(numpy.divide(numpy.diff(v), dt), dtype=numpy.int64) d = numpy.linalg.norm(rate, ord=1) / float(len(rate)) xmin, xmax = [min(xmin, min(rate)), max(xmax, max(rate))] xmin1, xmax1 = [min(xmin1, min(rate - d)), max(xmax1, max(rate - d))] ax[0].plot(tmid, rate) ax[1].plot(tmid, rate - d) xmin, xmax = tspl_utils.expand_range(xmin, xmax, 0.1) xmin1, xmax1 = tspl_utils.expand_range(xmin1, xmax1, 0.1) ax[0].set_ylim(bottom=xmin, top=xmax) ax[1].set_ylim(bottom=xmin1, top=xmax1) fname = "_".join(["graph", ts.j.id, ts.k1[0], ts.k2[0], "adjust" + full]) fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser( description='Plot a key pair for some jobs') parser.add_argument('-t', help='Threshold', metavar='thresh') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('-m', help='Set heatmap mode', action='store_true') parser.add_argument('--max', help='Use max instead of mean', action='store_true') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) if n.max: func = max else: func = scipy.stats.tmean for file in filelist: try: if n.f: full = '_full' ts = tspl.TSPLBase(file, [n.key1], [n.key2]) else: full = '' ts = tspl.TSPLSum(file, [n.key1], [n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): continue reduction = [] # place to store reductions via func for v in ts: rate = numpy.divide(numpy.diff(v), numpy.diff(ts.t)) reduction.append(func(rate)) m = func(reduction) if not n.t or m > float(n.t): print ts.j.id + ': ' + str(m) if n.m: heatmap(ts, n, m, full) else: lineplot(ts, n, m, full) else: print ts.j.id + ': under threshold, ' + str(m) + ' < ' + n.t
def main(): parser = argparse.ArgumentParser(description="") parser.add_argument("-p", help="Set number of processes", nargs=1, type=int, default=[1]) parser.add_argument("-k1", help="Set first key", nargs="+", type=str, default=["amd64_sock"]) parser.add_argument("-k2", help="Set second key", nargs="+", type=str, default=["DRAM"]) parser.add_argument("-f", help="File, directory, or quoted" " glob pattern", nargs=1, type=str, default=["jobs"]) n = parser.parse_args() filelist = tspl_utils.getfilelist(n.f[0]) procs = min(len(filelist), n.p[0]) m = multiprocessing.Manager() histories = m.dict() times = m.list() print "Getting samples" partial_get_samples = functools.partial(get_samples, times=times) pool = multiprocessing.Pool(processes=procs) pool.map(partial_get_samples, filelist) pool.close() pool.join() samples = set([]) for t in times: samples = samples.union(t) samples = numpy.array(sorted(samples)) # samples=numpy.array(range(1349067600,1352440800+1,3600)) print len(samples) partial_glndf = functools.partial(get_lnet_data_file, k1=n.k1, k2=n.k2, samples=samples, histories=histories) print "Getting data" pool = multiprocessing.Pool(processes=procs) pool.map(partial_glndf, filelist) pool.close() pool.join() accum = numpy.zeros(len(samples)) for h in histories.values(): accum += h print "Plotting" fig, ax = plt.subplots(1, 1, dpi=80) t = numpy.array([float(x) for x in samples]) t -= t[0] ax.plot(t[:-1] / 3600.0, numpy.diff(accum) / numpy.diff(t)) fig.savefig("bar") plt.close()
def main(): parser = argparse.ArgumentParser(description='Look for imbalance between' 'hosts for a pair of keys') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) procs = min(len(filelist),n.p[0]) job=pickle.load(open(filelist[0])) jid=job.id epoch=job.end_time ld=lariat_utils.LariatData(jid,end_epoch=epoch,daysback=3,directory=analyze_conf.lariat_path) if procs < 1: print 'Must have at least one file' exit(1) pool = multiprocessing.Pool(processes=procs) partial_work=functools.partial(do_work,mintime=3600.,wayness=16, lariat_dict=ld.ld) results=pool.map(partial_work,filelist) print len(results) sus={} for (f_stall, mem_rate, cpi, ename, jid, user, su) in results: if f_stall is None: continue if ename in sus: sus[ename]+=su else: sus[ename]=su d=collections.Counter(sus) enames=zip(*d.most_common(50))[0] for k,v in d.most_common(50): print k,v for (f_stall, mem_rate, cpi, ename, jid, user, su) in results: if (f_stall is None) or (not ename in enames): continue cpec = 1./(1. - f_stall) if cpi > 1.0: # and cpec > 2.0: print jid, ename, cpi, cpec, user, sus[ename]
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) for file in filelist: try: full = '' ts = tspl.TSPLBase(file, ['amd64_sock', 'amd64_sock', 'amd64_sock'], ['HT0', 'HT1', 'HT2']) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' continue print ts.j.id tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 dt = numpy.diff(ts.t) fig, ax = plt.subplots(1, 1, figsize=(8, 6), dpi=80) ax.hold = True xmin, xmax = [0., 0.] c = Colors() for k in ts.j.hosts.keys(): h = ts.j.hosts[k] col = c.next() for i in range(3): for j in range(4): rate = numpy.divide(numpy.diff(ts.data[i][k][j]), dt) xmin, xmax = [min(xmin, min(rate)), max(xmax, max(rate))] ax.plot(tmid / 3600, rate, '-' + col) if xmax > 2.0e9: print ts.j.id + ' over limit: %(v)8.3f' % {'v': xmax} else: plt.close() continue plt.suptitle(ts.title) xmin, xmax = tspl_utils.expand_range(xmin, xmax, .1) ax.set_ylim(bottom=xmin, top=xmax) fname = '_'.join(['graph', ts.j.id, 'HT_rates']) fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser(description='Plot important stats for jobs') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) for file in filelist: master_plot(file)
def main(): parser = argparse.ArgumentParser( description='Deal with a directory of pickle' ' files nightly') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('threshold', help='Treshold ratio for std dev:mean', nargs='?', default=0.25) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() ratios = m.dict() partial_imbal = functools.partial(imbalance.compute_imbalance, k1=['amd64_core'], k2=['SSE_FLOPS'], threshold=float(n.threshold), plot_flag=False, full_flag=False, ratios=ratios) pool.map(partial_imbal, filelist) badfiles = [] th = [] for i in ratios.keys(): v = ratios[i][0] if v > float(n.threshold): for f in filelist: if re.search(i, f): badfiles.append(f) th.append(v) pool.map(do_mp, zip(badfiles, th)) # Pool.starmap should exist.... bad_users = imbalance.find_top_users(ratios) for file in badfiles: try: ts = tspl.TSPLSum(file, ['amd64_core', 'cpu'], ['SSE_FLOPS', 'user']) except tspl.TSPLException as e: continue uncorrelated.plot_correlation(ts, uncorrelated.pearson(ts), '')
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) for file in filelist: try: if n.f: full = '_full' ts = tspl.TSPLBase(file, [n.key1], [n.key2]) else: full = '' ts = tspl.TSPLSum(file, [n.key1], [n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' continue print ts.j.id fig, ax = plt.subplots(1, 1, figsize=(8, 6), dpi=80) xmin, xmax = [0., 0.] for v in ts: rate = numpy.divide(numpy.diff(v), numpy.diff(ts.t)) xmin, xmax = [min(xmin, min(rate)), max(xmax, max(rate))] ax.hold = True ax.plot(rate[1:], rate[:-1], '.') ax.set_ylim(bottom=xmin, top=xmax) ax.set_xlim(left=xmin, right=xmax) fname = '_'.join( ['graph', ts.j.id, ts.k1[0], ts.k2[0], 'phase' + full]) fig.savefig(fname) plt.close()
def main(): parser=argparse.ArgumentParser(description='Deal with a directory of pickle' ' files nightly') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('threshold', help='Treshold ratio for std dev:mean', nargs='?', default=0.25) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() ratios = m.dict() k1={'amd64' : ['amd64_core'], 'intel_snb' : [ 'intel_snb'],} k2={'amd64' : ['DRAM'], 'intel_snb' : ['LOAD_L1D_ALL'],} partial_imbal=functools.partial(imbalance.compute_imbalance, k1=k1, k2=k2, threshold=float(n.threshold), plot_flag=False,full_flag=False, ratios=ratios) if len(filelist) != 0: pool.map(partial_imbal,filelist) pool.close() pool.join() badfiles=[] th=[] dirs=[] for i in ratios.keys(): v=ratios[i][0] if v > float(n.threshold): for f in filelist: if re.search(i,f): badfiles.append(f) th.append(v) dirs.append(n.o[0]) if len(badfiles) != 0 or len(th) != 0 or len(dirs) != 0: pool = multiprocessing.Pool(processes=n.p[0]) pool.map(do_mp,zip(badfiles,th,dirs)) # Pool.starmap should exist.... pool.close() pool.join() bad_users=imbalance.find_top_users(ratios)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) for file in filelist: try: full='' ts=tspl.TSPLBase(file,['amd64_sock', 'amd64_sock', 'amd64_sock'], ['HT0', 'HT1', 'HT2']) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' continue print ts.j.id tmid=(ts.t[:-1]+ts.t[1:])/2.0 dt=numpy.diff(ts.t) fig,ax=plt.subplots(1,1,figsize=(8,6),dpi=80) ax.hold=True xmin,xmax=[0.,0.] c=Colors() for k in ts.j.hosts.keys(): h=ts.j.hosts[k] col=c.next() for i in range(3): for j in range(4): rate=numpy.divide(numpy.diff(ts.data[i][k][j]),dt) xmin,xmax=[min(xmin,min(rate)),max(xmax,max(rate))] ax.plot(tmid/3600,rate,'-'+col) if xmax > 2.0e9: print ts.j.id + ' over limit: %(v)8.3f' % {'v' : xmax} else: plt.close() continue plt.suptitle(ts.title) xmin,xmax=tspl_utils.expand_range(xmin,xmax,.1) ax.set_ylim(bottom=xmin,top=xmax) fname='_'.join(['graph',ts.j.id,'HT_rates']) fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser(description='Look for imbalance between' 'hosts for a pair of keys') parser.add_argument('threshold', help='Treshold ratio for std dev:mean', nargs='?', default=0.25) parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') # parser.add_argument('-f', help='Set full mode', action='store_true') # parser.add_argument('-n', help='Disable plots', action='store_true') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) procs = min(len(filelist),n.p[0]) job=pickle.load(open(filelist[0])) jid=job.id epoch=job.end_time ld=lariat_utils.LariatData(jid,end_epoch=epoch,daysback=3,directory=analyze_conf.lariat_path) if procs < 1: print 'Must have at least one file' exit(1) pool=multiprocessing.Pool(processes=procs) partial_imbal=functools.partial(compute_imbalance,k1=[n.key1], k2=[n.key2],thresh=float(n.threshold), lariat_dict=ld.ld) res=pool.map(partial_imbal,filelist) pool.close() pool.join() flagged_jobs=[r for r in res if r] print flagged_jobs print len(flagged_jobs) if len(flagged_jobs) != 0: pool = multiprocessing.Pool(processes=min(n.p[0],len(flagged_jobs))) pool.map(do_mp,zip(flagged_jobs,[n.o[0] for x in flagged_jobs])) # Pool.starmap should exist.... pool.close() pool.join()
def main(): parser = argparse.ArgumentParser(description='Plot a key pair for some jobs') parser.add_argument('-t', help='Threshold', metavar='thresh') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('-m', help='Set heatmap mode', action='store_true') parser.add_argument('--max', help='Use max instead of mean', action='store_true') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) if n.max: func=max else: func=scipy.stats.tmean for file in filelist: try: if n.f: full='_full' ts=tspl.TSPLBase(file,[n.key1],[n.key2]) else: full='' ts=tspl.TSPLSum(file,[n.key1],[n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): continue reduction=[] # place to store reductions via func for v in ts: rate=numpy.divide(numpy.diff(v),numpy.diff(ts.t)) reduction.append(func(rate)) m=func(reduction) if not n.t or m > float(n.t): print ts.j.id + ': ' + str(m) if n.m: fig, fname = heatmap(ts,n,m,full) else: fig, fname = lineplot(ts,n,m,full) else: print ts.j.id + ': under threshold, ' + str(m) + ' < ' + n.t fig.savefig(fname)
def main(): parser = argparse.ArgumentParser( description='Plot important stats for jobs') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) for file in filelist: master_plot(file)
def main(): parser = argparse.ArgumentParser(description='Look for lack of correlation' ' between two key pairs/') parser.add_argument('threshold', help='Treshold Pearson R', nargs='?', default=0.8) parser.add_argument('keya1', help='Key A1', nargs='?', default='amd64_core') parser.add_argument('keya2', help='Key A2', nargs='?', default='DCSF') parser.add_argument('keyb1', help='Key B1', nargs='?', default='amd64_core') parser.add_argument('keyb2', help='Key B2', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') parser.add_argument('-f', help='Set full mode', action='store_true') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) threshold = n.threshold k1 = [n.keya1, n.keyb1] k2 = [n.keya2, n.keyb2] for file in filelist: try: if n.f: full = '_full' ts = tspl.TSPLBase(file, k1, k2) else: full = '' ts = tspl.TSPLSum(file, k1, k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): continue r = pearson(ts) print ts.j.id + ': ' + str(r) if abs(r) < float(threshold): plot_correlation(ts, r, full)
def main(): parser = argparse.ArgumentParser(description='Find jobs with high mem' ' bandwidth') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-t', metavar='threshold', help='Treshold Bandwidth', nargs=1, type=float, default=[0.5]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() bw = m.dict() thresh = n.t[0] outdir = n.o[0] partial_highbw=functools.partial(do_bw,thresh=thresh,bw=bw) if len(filelist) != 0: pool.map(partial_highbw,filelist) pool.close() pool.join() jobs=[] for i in bw.keys(): if bw[i]: jobs.append(i) pool = multiprocessing.Pool(processes=n.p[0]) if len(jobs) != 0: pool.map(do_mp,zip(jobs, [thresh for x in range(len(jobs))], [outdir for x in range(len(jobs))])) pool.close() pool.join() print '----------- High BW -----------' for i in jobs: print i.split('/')[-1]
def main(): parser = argparse.ArgumentParser( description='Dump CSV for a key pair for some jobs') parser.add_argument('-k1', help='Set first key', nargs='+', type=str, default=['amd64_sock']) parser.add_argument('-k2', help='Set second key', nargs='+', type=str, default=['DRAM']) parser.add_argument('-f', help='File, directory, or quoted' ' glob pattern', nargs=1, type=str, default=['jobs']) n = parser.parse_args() filelist = tspl_utils.getfilelist(n.f[0]) for file in filelist: try: ts = tspl.TSPLSum(file, n.k1, n.k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 0, 16): continue elif ts.numhosts < 2: print ts.j.id + ': 1 host' continue tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 for k in ts.j.hosts.keys(): rates = [ numpy.divide(numpy.diff(ts.data[x][k][0]), numpy.diff(ts.t)) for x in range(len(ts.data)) ] for i in range(len(tmid)): v = [rates[x][i] for x in range(len(ts.data))] print ','.join([ts.j.id, k, str(tmid[i])] + [str(x) for x in v])
def main(): parser = argparse.ArgumentParser(description='Find jobs with low flops but' 'reasonable levels of other activity.') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-t', metavar='threshold', help='Treshold flopiness', nargs=1, type=float, default=[0.001]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() floppy = m.dict() thresh = n.t[0] outdir = n.o[0] partial_floppy=functools.partial(do_floppy,thresh=thresh,floppy=floppy) if len(filelist) != 0: pool.map(partial_floppy,filelist) pool.close() pool.join() badjobs=[] for i in floppy.keys(): if floppy[i]: badjobs.append(i) pool = multiprocessing.Pool(processes=n.p[0]) if len(badjobs) != 0: pool.map(do_mp,zip(badjobs, [thresh for x in range(len(badjobs))], [outdir for x in range(len(badjobs))])) pool.close() pool.join() print '----------- Low Flops -----------' for i in badjobs: print i.split('/')[-1]
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) for file in filelist: try: if n.f: full='_full' ts=tspl.TSPLBase(file,[n.key1],[n.key2]) else: full='' ts=tspl.TSPLSum(file,[n.key1],[n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' continue print ts.j.id fig,ax=plt.subplots(1,1,figsize=(8,6),dpi=80) xmin,xmax=[0.,0.] for v in ts: rate=numpy.divide(numpy.diff(v),numpy.diff(ts.t)) xmin,xmax=[min(xmin,min(rate)),max(xmax,max(rate))] ax.hold=True ax.plot(rate[1:],rate[:-1],'.') ax.set_ylim(bottom=xmin,top=xmax) ax.set_xlim(left=xmin,right=xmax) fname='_'.join(['graph',ts.j.id,ts.k1[0],ts.k2[0],'phase'+full]) fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser(description='Plot important stats for jobs') parser.add_argument('-m', help='Plot mode: lines, hist, percentile', nargs=1, type=str, default=['lines'], metavar='mode') parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-s', help='Set minimum time in seconds', nargs=1, type=int, default=[3600]) parser.add_argument('-w', help='Set wide plot format', action='store_true') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) procs = min(len(filelist),n.p[0]) job=pickle.load(open(filelist[0])) jid=job.id epoch=job.end_time ld=lariat_utils.LariatData(jid,end_epoch=epoch,daysback=3,directory=analyze_conf.lariat_path) if procs < 1: print 'Must have at least one file' exit(1) pool = multiprocessing.Pool(processes=procs) partial_master=functools.partial(mp_wrapper,mode=n.m[0], threshold=False, output_dir=n.o[0], prefix='graph', mintime=n.s[0], wayness=[x+1 for x in range(16)], lariat_dict=ld.ld, wide=n.w) pool.map(partial_master,filelist) pool.close() pool.join()
def main(): parser = argparse.ArgumentParser( description='Dump CSV for a key pair for some jobs') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) print sys.argv[3] for file in filelist: try: ts = tspl.TSPLSum(file, [n.key1], [n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600, 16): continue elif ts.numhosts < 2: print ts.j.id + ': 1 host' continue tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 rate = {} for k in ts.j.hosts.keys(): rate[k] = numpy.divide(numpy.diff(ts.data[0][k][0]), numpy.diff(ts.t)) for i in range(len(tmid)): print ','.join([ts.j.id, k, str(tmid[i]), str(rate[k][i])])
def main(): parser = argparse.ArgumentParser(description="Deal with a directory of pickle" " files nightly") parser.add_argument("-p", help="Set number of processes", nargs=1, type=int, default=[1]) parser.add_argument("threshold", help="Treshold ratio for std dev:mean", nargs="?", default=0.25) parser.add_argument("filearg", help="File, directory, or quoted" " glob pattern", nargs="?", default="jobs") n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() ratios = m.dict() partial_imbal = functools.partial( imbalance.compute_imbalance, k1=["amd64_core"], k2=["SSE_FLOPS"], threshold=float(n.threshold), plot_flag=False, full_flag=False, ratios=ratios, ) pool.map(partial_imbal, filelist) badfiles = [] th = [] for i in ratios.keys(): v = ratios[i][0] if v > float(n.threshold): for f in filelist: if re.search(i, f): badfiles.append(f) th.append(v) pool.map(do_mp, zip(badfiles, th)) # Pool.starmap should exist.... bad_users = imbalance.find_top_users(ratios) for file in badfiles: try: ts = tspl.TSPLSum(file, ["amd64_core", "cpu"], ["SSE_FLOPS", "user"]) except tspl.TSPLException as e: continue uncorrelated.plot_correlation(ts, uncorrelated.pearson(ts), "")
def main(): parser = argparse.ArgumentParser(description='Look for lack of correlation' ' between two key pairs/') parser.add_argument('threshold', help='Treshold Pearson R', nargs='?', default=0.8) parser.add_argument('keya1', help='Key A1', nargs='?', default='amd64_core') parser.add_argument('keya2', help='Key A2', nargs='?', default='DCSF') parser.add_argument('keyb1', help='Key B1', nargs='?', default='amd64_core') parser.add_argument('keyb2', help='Key B2', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') parser.add_argument('-f', help='Set full mode', action='store_true') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) threshold=n.threshold k1=[n.keya1, n.keyb1] k2=[n.keya2, n.keyb2] for file in filelist: try: if n.f: full='_full' ts=tspl.TSPLBase(file,k1,k2) else: full='' ts=tspl.TSPLSum(file,k1,k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): continue r=pearson(ts) print ts.j.id + ': ' + str(r) if abs(r) < float(threshold) : plot_correlation(ts,r,full)
def main(): parser = argparse.ArgumentParser( description='Find jobs with a single highly' ' idle host') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-t', metavar='threshold', help='Treshold idleness', nargs=1, default=[0.001]) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() thresh = 1. - n.t[0] filelist = tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() idleness = m.dict() partial_isidle = functools.partial(do_isidle, thresh=thresh, idleness=idleness) if len(filelist) != 0: pool.map(partial_isidle, filelist) pool.close() pool.join() print '----------- Idle Jobs -----------' for i in idleness.keys(): if idleness[i]: print i.split('/')[-1]
def main(): parser = argparse.ArgumentParser( description='Deal with a directory of pickle' ' files nightly') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('-u', help='User', nargs=1, type=str, default=['bbarth'], metavar='username') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() partial_getuser = functools.partial(getuser, user=n.u[0], output_dir=n.o[0]) pool.map(partial_getuser, filelist) pool.close() pool.join()
def main(): parser = argparse.ArgumentParser( description='Find a particular executable ' 'name') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('-e', help='Executable', nargs=1, type=str, default=['a.out'], metavar='exec') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() partial_getcode = functools.partial(getcode, code=n.e[0], output_dir=n.o[0]) pool.map(partial_getcode, filelist) pool.close() pool.join()
def main(): parser = argparse.ArgumentParser(description="Dump CSV keys for Weijia.") parser.add_argument("-p", help="Set number of processes", nargs=1, type=int, default=[1]) parser.add_argument("filearg", help="File, directory, or quoted" " glob pattern", nargs="?", default="jobs") n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) if len(filelist) != 0: res = pool.map(do_compute, filelist) pool.close() pool.join() with open("dump.csv", "w") as file: file.write("# Job Id, Username, Executable, Mean DRAM BW, " + "Mean Stall Fraction, Mean DRAM/L1, Mean Flops\n") for line in res: if line: file.write(line + "\n")
def main(): parser=argparse.ArgumentParser(description='Deal with a directory of pickle' ' files nightly') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('-u', help='User', nargs=1, type=str, default=['bbarth'], metavar='username') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() partial_getuser=functools.partial(getuser,user=n.u[0],output_dir=n.o[0]) pool.map(partial_getuser,filelist) pool.close() pool.join()
def main(): parser=argparse.ArgumentParser(description='Find a particular executable ' 'name') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('-e', help='Executable', nargs=1, type=str, default=['a.out'], metavar='exec') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() partial_getcode=functools.partial(getcode,code=n.e[0],output_dir=n.o[0]) pool.map(partial_getcode,filelist) pool.close() pool.join()
def main(): parser = argparse.ArgumentParser(description='Guesstimate per core usage') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) procs = min(len(filelist), n.p[0]) pool = multiprocessing.Pool(processes=procs) mpc = pool.map(mem_usage, filelist) mpc = list(itertools.chain.from_iterable(mpc)) print mpc hist, bins = numpy.histogram(mpc, 30) fig, ax = plt.subplots(1, 1, figsize=(8, 8), dpi=80) # plt.subplots_adjust(hspace=0.35) ax.bar(bins[:-1], hist, width=min(numpy.diff(bins))) ax.set_xlim(min(bins), max(bins)) fname = 'mempercore' fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) # Hash value must be a list k1={'amd64' : ['amd64_sock'], 'intel_snb': ['intel_snb']} k2={'amd64' : ['DRAM'], 'intel_snb': ['LOAD_L1D_ALL']} pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() res = m.dict() fit_partial=functools.partial(fit_step,k1=k1,k2=k2,genplot=False,res=res) if len(filelist) != 0: pool.map(fit_partial,filelist) pool.close() pool.join() for fn in res.keys(): for (ind,ratio) in res[fn]: if ratio < 1e-3: print fn + ': ' + str(res[fn]) masterplot.mp_wrapper(fn,'lines',False,n.o[0],'step', 1,[x+1 for x in range(16)], header='Step Function Performance',wide=True) break
def main(): parser = argparse.ArgumentParser(description='Plot important stats for jobs') parser.add_argument('-m', help='Plot mode: lines, hist, percentile', nargs=1, type=str, default=['lines'], metavar='mode') parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-s', help='Set minimum time in seconds', nargs=1, type=int, default=[3600]) n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) procs = min(len(filelist),n.p[0]) if procs < 1: print 'Must have at least one file' exit(1) pool = multiprocessing.Pool(processes=procs) partial_master=functools.partial(master_plot,mode=n.m[0], threshold=False, output_dir=n.o[0], prefix='graph', mintime=n.s[0], wayness=[x+1 for x in range(16)]) pool.map(partial_master,filelist) pool.close() pool.join()
def main(): parser = argparse.ArgumentParser(description='Look for imbalance between' 'hosts for a pair of keys') parser.add_argument('threshold', help='Treshold ratio for std dev:mean', nargs='?', default=0.25) parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('-n', help='Disable plots', action='store_true') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) ratios={} # Place to store per job ranking metric for file in filelist: compute_imbalance(file,[n.key1],[n.key2], float(n.threshold),not n.n,n.f,ratios) # Find the top bad users and their jobs find_top_users(ratios)
def main(): mem_rate_thresh = 0.5*75*1000000000/16 stall_thresh = 0.5 parser = argparse.ArgumentParser(description='Correlations') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-n', help='Set number of executables to catalog', nargs=1, type=int, default=[15]) parser.add_argument('-s', help='Use SUs instead of job counts', action='store_true') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) job=pickle.load(open(filelist[0])) jid=job.id epoch=job.end_time ld=lariat_utils.LariatData(jid,end_epoch=epoch,daysback=3,directory=analyze_conf.lariat_path) if n.p[0] < 1: print 'Must have at least one file' exit(1) partial_compute=functools.partial(compute_ratio,lariat_dict=ld.ld) pool = multiprocessing.Pool(processes=n.p[0]) res=pool.map(partial_compute,filelist) pool.close() pool.join() mdr={} msr={} mmr={} sus={} for tup in res: try: (jobid,su,ename,mean_data_ratio,mean_stall_ratio,mean_mem_rate) = tup except TypeError as e: continue if ename in mdr: mdr[ename]=numpy.append(mdr[ename],numpy.array([mean_data_ratio])) msr[ename]=numpy.append(msr[ename],numpy.array([mean_stall_ratio])) mmr[ename]=numpy.append(mmr[ename],numpy.array([mean_mem_rate])) sus[ename]+=su else: mdr[ename]=numpy.array([mean_data_ratio]) msr[ename]=numpy.array([mean_stall_ratio]) mmr[ename]=numpy.array([mean_mem_rate]) sus[ename]=su if (mean_mem_rate <= mem_rate_thresh) and \ (mean_stall_ratio > stall_thresh) : print ename, jobid, mean_mem_rate/1000000000, mean_stall_ratio # Find top codes by SUs top_count={} for k in mdr.keys(): if n.s: top_count[k]=sus[k] # by sus else: top_count[k]=len(mdr[k]) # by count d = collections.Counter(top_count) mdr2={} msr2={} mmr2={} for k,v in d.most_common(n.n[0]): print k,v mdr2[k]=numpy.log10(mdr[k]) msr2[k]=msr[k] mmr2[k]=numpy.log10(mmr[k]) # for k in mdr.keys(): # if len(mdr[k]) < 5: # continue # mdr2[k]=mdr[k] x=[top_count[k] for k in mdr2.keys()] l=len(mdr2.keys()) y=numpy.linspace(0.10,0.95,l) widths=numpy.interp(x,numpy.linspace(5.0,float(max(x)),l),y) fig,ax=plt.subplots(1,1,figsize=(8,8),dpi=80) plt.subplots_adjust(hspace=0.35,bottom=0.25) ax.boxplot(mdr2.values(),widths=widths) xtickNames = plt.setp(ax,xticklabels=mdr2.keys()) plt.setp(xtickNames, rotation=45, fontsize=8) ax.set_ylabel(r'log(DRAM BW/L1 Fill Rate)') fname='box_mdr' fig.savefig(fname) plt.close() markers = itertools.cycle(('o','x','+','^','s','8','p', 'h','*','D','<','>','v','d','.')) colors = itertools.cycle(('b','g','r','c','m','k','y')) fig,ax=plt.subplots(1,1,figsize=(10,8),dpi=80) for k in mdr2.keys(): ax.plot(mdr2[k],msr2[k],marker=markers.next(), markeredgecolor=colors.next(), linestyle='', markerfacecolor='None') ax.hold=True box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) ax.legend(mdr2.keys(),bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., numpoints=1) ax.set_xlabel('log(DRAM BW/L1 Fill Rate)') ax.set_ylabel('Stall Fraction') fname='msr_v_mdr' fig.savefig(fname) plt.close() markers = itertools.cycle(('o','x','+','^','s','8','p', 'h','*','D','<','>','v','d','.')) colors = itertools.cycle(('b','g','r','c','m','k','y')) fig,ax=plt.subplots(1,1,figsize=(10,8),dpi=80) for k in mdr2.keys(): ax.plot(mmr2[k],msr2[k],marker=markers.next(), markeredgecolor=colors.next(), linestyle='', markerfacecolor='None') ax.hold=True ax.plot(numpy.log10([mem_rate_thresh, mem_rate_thresh]), [0.95*min(numpy.concatenate(msr2.values())), 1.05*max(numpy.concatenate(msr2.values()))], 'r--') print [min(numpy.concatenate(mmr2.values())), max(numpy.concatenate(mmr2.values()))], [stall_thresh, stall_thresh], 'r--' ax.plot([min(numpy.concatenate(mmr2.values())), max(numpy.concatenate(mmr2.values()))], [stall_thresh, stall_thresh], 'r--') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) ax.legend(mdr2.keys(),bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., numpoints=1) ax.set_xlabel('log(DRAM BW)') ax.set_ylabel('Stall Fraction') fname='msr_v_mem' fig.savefig(fname) plt.close()
def main(): parser=argparse.ArgumentParser(description='Deal with a directory of pickle' ' files nightly') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('-u', help='User', nargs=1, type=str, default=['bbarth'], metavar='username') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) target_user=n.u[0] pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() files = m.list() partial_getuser=functools.partial(getuser,user=target_user) res = pool.map(partial_getuser,filelist) pool.close() pool.join() res = filter(lambda x: x != None, res) if len(res) == 0: print 'no jobs found' return res_sorted = sorted(res, key=lambda x:x[0]) res2 = zip(*res_sorted) t=res2[0] min_dram_rate=res2[1] max_dram_rate=res2[2] mean_dram_rate=res2[3] min_l1_rate=res2[4] max_l1_rate=res2[5] mean_l1_rate=res2[6] min_lnet_rate=res2[7] max_lnet_rate=res2[8] mean_lnet_rate=res2[9] min_ib_rate=res2[10] max_ib_rate=res2[11] mean_ib_rate=res2[12] min_user_rate=res2[13] max_user_rate=res2[14] mean_user_rate=res2[15] min_flops_rate=res2[16] max_flops_rate=res2[17] mean_flops_rate=res2[18] min_mem_usage=res2[19] max_mem_usage=res2[20] mean_mem_usage=res2[21] ids=res2[22] start_date = datetime.datetime.fromtimestamp(t[0]).strftime('%Y-%m-%d %H:%M:%S') end_date = datetime.datetime.fromtimestamp(t[-1]).strftime('%Y-%m-%d %H:%M:%S') fig,ax=plt.subplots(6,1,figsize=(8,12),dpi=80) plt.subplots_adjust(hspace=0.35) ax[0].plot(t,min_flops_rate,'x',t,mean_flops_rate,'+',t,max_flops_rate,'*') ax[0].set_ylabel('GFlops/s') ax[0].set_xticklabels(labels=[]) ax[1].plot(t,min_dram_rate,'x',t,mean_dram_rate,'+',t,max_dram_rate,'*') ax[1].set_ylabel('DRAM BW MB/s') ax[1].set_xticklabels(labels=[]) ax[2].plot(t,min_mem_usage,'x',t,mean_mem_usage,'+',t,max_mem_usage,'*') ax[2].set_ylabel('DRAM Usage GB') ax[2].set_xticklabels(labels=[]) ax[3].plot(t,min_lnet_rate,'x',t,mean_lnet_rate,'+',t,max_lnet_rate,'*') ax[3].set_ylabel('Lnet Rate MB/s') ax[3].set_xticklabels(labels=[]) ax[4].plot(t,min_ib_rate,'x',t,mean_ib_rate,'+',t,max_ib_rate,'*') ax[4].set_ylabel('IB - Lnet Rate MB/s') ax[4].set_xticklabels(labels=[]) ax[5].plot(t,min_user_rate,'x',t,mean_user_rate,'+',t,max_user_rate,'*') ax[5].set_ylabel('CPU User Fraction') ax[5].set_xticklabels(labels=[]) for i in range(6): tspl_utils.adjust_yaxis_range(ax[i],0.1) ax[5].set_xlabel('t') plt.suptitle(target_user+' '+start_date+' -- '+end_date) fname=target_user fig.savefig(fname) plt.close() print 'Found', len(res_sorted), 'jobs for', target_user, ids
def main(): parser = argparse.ArgumentParser( description='Deal with a directory of pickle' ' files nightly') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('threshold', help='Treshold ratio for std dev:mean', nargs='?', default=0.25) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() ratios = m.dict() k1 = { 'amd64': ['amd64_core'], 'intel_snb': ['intel_snb'], } k2 = { 'amd64': ['DRAM'], 'intel_snb': ['LOAD_L1D_ALL'], } partial_imbal = functools.partial(imbalance.compute_imbalance, k1=k1, k2=k2, threshold=float(n.threshold), plot_flag=False, full_flag=False, ratios=ratios) if len(filelist) != 0: pool.map(partial_imbal, filelist) pool.close() pool.join() badfiles = [] th = [] dirs = [] for i in ratios.keys(): v = ratios[i][0] if v > float(n.threshold): for f in filelist: if re.search(i, f): badfiles.append(f) th.append(v) dirs.append(n.o[0]) if len(badfiles) != 0 or len(th) != 0 or len(dirs) != 0: pool = multiprocessing.Pool(processes=n.p[0]) pool.map(do_mp, zip(badfiles, th, dirs)) # Pool.starmap should exist.... pool.close() pool.join() bad_users = imbalance.find_top_users(ratios)
def main(): parser = argparse.ArgumentParser( description='Deal with a directory of pickle' ' files nightly') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('-q', help='Queue', nargs=1, type=str, default=['normal'], metavar='queue') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) target_queue = n.q[0] pool = multiprocessing.Pool(processes=n.p[0]) m = multiprocessing.Manager() files = m.list() partial_getqueue = functools.partial(getqueue, queue=target_queue) res = pool.map(partial_getqueue, filelist) pool.close() pool.join() res = filter(lambda x: x != None, res) if len(res) == 0: print 'no jobs found' return res_sorted = sorted(res, key=lambda x: x[0]) res2 = zip(*res_sorted) t = res2[0] min_dram_rate = res2[1] max_dram_rate = res2[2] mean_dram_rate = res2[3] min_l1_rate = res2[4] max_l1_rate = res2[5] mean_l1_rate = res2[6] min_lnet_rate = res2[7] max_lnet_rate = res2[8] mean_lnet_rate = res2[9] min_ib_rate = res2[10] max_ib_rate = res2[11] mean_ib_rate = res2[12] min_user_rate = res2[13] max_user_rate = res2[14] mean_user_rate = res2[15] min_flops_rate = res2[16] max_flops_rate = res2[17] mean_flops_rate = res2[18] min_mem_usage = res2[19] max_mem_usage = res2[20] mean_mem_usage = res2[21] ids = res2[22] start_date = datetime.datetime.fromtimestamp( t[0]).strftime('%Y-%m-%d %H:%M:%S') end_date = datetime.datetime.fromtimestamp( t[-1]).strftime('%Y-%m-%d %H:%M:%S') fig, ax = plt.subplots(6, 1, figsize=(8, 12), dpi=80) plt.subplots_adjust(hspace=0.35) ax[0].plot(t, min_flops_rate, 'x', t, mean_flops_rate, '+', t, max_flops_rate, '*') ax[0].set_ylabel('GFlops/s') ax[0].set_xticklabels(labels=[]) ax[1].plot(t, min_dram_rate, 'x', t, mean_dram_rate, '+', t, max_dram_rate, '*') ax[1].set_ylabel('DRAM BW MB/s') ax[1].set_xticklabels(labels=[]) ax[2].plot(t, min_mem_usage, 'x', t, mean_mem_usage, '+', t, max_mem_usage, '*') ax[2].set_ylabel('DRAM Usage GB') ax[2].set_xticklabels(labels=[]) ax[3].plot(t, min_lnet_rate, 'x', t, mean_lnet_rate, '+', t, max_lnet_rate, '*') ax[3].set_ylabel('Lnet Rate MB/s') ax[3].set_xticklabels(labels=[]) ax[4].plot(t, min_ib_rate, 'x', t, mean_ib_rate, '+', t, max_ib_rate, '*') ax[4].set_ylabel('IB - Lnet Rate MB/s') ax[4].set_xticklabels(labels=[]) ax[5].plot(t, min_user_rate, 'x', t, mean_user_rate, '+', t, max_user_rate, '*') ax[5].set_ylabel('CPU User Fraction') ax[5].set_xticklabels(labels=[]) for i in range(6): tspl_utils.adjust_yaxis_range(ax[i], 0.1) ax[5].set_xlabel('t') plt.suptitle(target_queue + ' ' + start_date + ' -- ' + end_date) fname = target_queue fig.savefig(fname) plt.close() print 'Found', len(res_sorted), 'jobs for', target_queue, ids
def main(): parser = argparse.ArgumentParser(description='Look for imbalance between' 'hosts for a pair of keys') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) procs = min(len(filelist),n.p[0]) job=pickle.load(open(filelist[0])) jid=job.id epoch=job.end_time ld=lariat_utils.LariatData(jid,end_epoch=epoch,daysback=3,directory=analyze_conf.lariat_path) if procs < 1: print 'Must have at least one file' exit(1) pool = multiprocessing.Pool(processes=procs) partial_work=functools.partial(do_work,mintime=3600.,wayness=16,lariat_dict=ld.ld) results=pool.map(partial_work,filelist) fig1,ax1=plt.subplots(1,1,figsize=(20,8),dpi=80) fig2,ax2=plt.subplots(1,1,figsize=(20,8),dpi=80) maxx=0. for state in [ True, False ]: stalls=[] misses=[] cpis=[] enames=[] for (s,m,cpi,ename,flag) in results: if (s != None and m > 0. and m < 1.0 and flag==state): stalls.extend([s]) misses.extend([m]) cpis.extend([cpi]) enames.extend([ename]) markers = itertools.cycle(('o','x','+','^','s','8','p', 'h','*','D','<','>','v','d','.')) colors = itertools.cycle(('b','g','r','c','m','k','y')) fmt={} for e in enames: if not e in fmt: fmt[e]=markers.next()+colors.next() for (s,c,e) in zip(stalls,cpis,enames): # ax1.plot(numpy.log10(1.-(1.-s)),numpy.log10(c), maxx=max(maxx,1./(1.-s)) ax1.plot((1./(1.-s)),(c), marker=fmt[e][0], markeredgecolor=fmt[e][1], linestyle='', markerfacecolor='None', label=e) ax1.hold=True ax2.plot((1./(1.-s)),(c), marker=fmt[e][0], markeredgecolor=fmt[e][1], linestyle='', markerfacecolor='None', label=e) ax2.hold=True #ax.plot(numpy.log10(stalls),numpy.log10(cpis),fmt) #ax.plot(numpy.log10(1.0/(1.0-numpy.array(stalls))),numpy.log10(cpis),fmt) ax1.set_xscale('log') ax1.set_xlim(left=0.95,right=1.05*maxx) ax1.set_yscale('log') box = ax1.get_position() ax1.set_position([box.x0, box.y0, box.width * 0.45, box.height]) box = ax2.get_position() ax2.set_position([box.x0, box.y0, box.width * 0.45, box.height]) handles=[] labels=[] for h,l in zip(*ax1.get_legend_handles_labels()): if l in labels: continue else: handles.extend([h]) labels.extend([l]) ax1.legend(handles,labels,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., numpoints=1,ncol=4) ax1.set_xlabel('log(Cycles per Execution Cycle)') ax1.set_ylabel('log(CPI)') handles=[] labels=[] for h,l in zip(*ax2.get_legend_handles_labels()): if l in labels: continue else: handles.extend([h]) labels.extend([l]) ax2.legend(handles,labels,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., numpoints=1,ncol=4) ax2.set_xlabel('Cycles per Execution Cycle') ax2.set_ylabel('CPI') fname='miss_v_stall_log' fig1.savefig(fname) fname='miss_v_stall' fig2.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser(description='Look for imbalance between' 'hosts for a pair of keys') parser.add_argument('threshold', help='Treshold ratio for std dev:mean', nargs='?', default=0.25) parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') # parser.add_argument('-f', help='Set full mode', action='store_true') # parser.add_argument('-n', help='Disable plots', action='store_true') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) procs = min(len(filelist), n.p[0]) job = pickle.load(open(filelist[0])) jid = job.id epoch = job.end_time ld = lariat_utils.LariatData(jid, end_epoch=epoch, daysback=3, directory=analyze_conf.lariat_path) if procs < 1: print 'Must have at least one file' exit(1) pool = multiprocessing.Pool(processes=procs) partial_imbal = functools.partial(compute_imbalance, k1=[n.key1], k2=[n.key2], thresh=float(n.threshold), lariat_dict=ld.ld) res = pool.map(partial_imbal, filelist) pool.close() pool.join() flagged_jobs = [r for r in res if r] print flagged_jobs print len(flagged_jobs) if len(flagged_jobs) != 0: pool = multiprocessing.Pool(processes=min(n.p[0], len(flagged_jobs))) pool.map(do_mp, zip(flagged_jobs, [n.o[0] for x in flagged_jobs])) # Pool.starmap should exist.... pool.close() pool.join()
def main(): parser = argparse.ArgumentParser(description="Look for imbalance between" "hosts for a pair of keys") parser.add_argument("filearg", help="File, directory, or quoted" " glob pattern", nargs="?", default="jobs") n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) for file in filelist: res = get_data(file) if res is None: continue ( ts, ld, tmid, read_rate, write_rate, stall_rate, clock_rate, avx_rate, sse_rate, meta_rate, l1_rate, l2_rate, l3_rate, load_rate, read_frac, stall_frac, ) = res title = ts.title if ld.exc != "unknown": title += ", E: " + ld.exc.split("/")[-1] fig, ax = plt.subplots(5, 1, figsize=(8, 8), dpi=80) plt.subplots_adjust(hspace=0.35) plt.suptitle(title) ax[0].plot(tmid / 3600.0, read_frac) ax[0].set_ylabel("DRAM Read Fraction") # ax[0].set_ylim(getlimits(read_frac)) tspl_utils.adjust_yaxis_range(ax[0], 0.1) # ax[1].plot(tmid/3600., stall_frac) # ax[1].set_ylabel('Stall Fraction') # tspl_utils.adjust_yaxis_range(ax[1],0.1) ax[1].plot(tmid / 3600.0, avx_rate / 1e9) ax[1].hold = True ax[1].plot(tmid / 3600.0, sse_rate / 1e9, "r") ax[1].set_ylabel("AVX Rate") tspl_utils.adjust_yaxis_range(ax[1], 0.1) ax[2].plot(tmid / 3600.0, clock_rate) ax[2].set_ylabel("Observed Clock Rate") tspl_utils.adjust_yaxis_range(ax[2], 0.1) ax[3].plot(tmid / 3600.0, meta_rate) ax[3].set_ylabel("Meta Data Rate") tspl_utils.adjust_yaxis_range(ax[3], 0.1) ax[4].plot(tmid / 3600.0, load_rate - (l1_rate + l2_rate + l3_rate)) ax[4].set_ylabel("Cache Miss Rate?") tspl_utils.adjust_yaxis_range(ax[3], 0.1) fname = "_".join(["plot", ts.j.id, ts.owner]) fig.savefig(fname) plt.close()
def main(): mem_rate_thresh = 0.5 * 75 * 1000000000 / 16 stall_thresh = 0.5 parser = argparse.ArgumentParser(description='Correlations') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-n', help='Set number of executables to catalog', nargs=1, type=int, default=[15]) parser.add_argument('-s', help='Use SUs instead of job counts', action='store_true') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) job = pickle.load(open(filelist[0])) jid = job.id epoch = job.end_time ld = lariat_utils.LariatData(jid, end_epoch=epoch, daysback=3, directory=analyze_conf.lariat_path) if n.p[0] < 1: print 'Must have at least one file' exit(1) partial_compute = functools.partial(compute_ratio, lariat_dict=ld.ld) pool = multiprocessing.Pool(processes=n.p[0]) res = pool.map(partial_compute, filelist) pool.close() pool.join() mdr = {} msr = {} mmr = {} sus = {} for tup in res: try: (jobid, su, ename, mean_data_ratio, mean_stall_ratio, mean_mem_rate) = tup except TypeError as e: continue if ename in mdr: mdr[ename] = numpy.append(mdr[ename], numpy.array([mean_data_ratio])) msr[ename] = numpy.append(msr[ename], numpy.array([mean_stall_ratio])) mmr[ename] = numpy.append(mmr[ename], numpy.array([mean_mem_rate])) sus[ename] += su else: mdr[ename] = numpy.array([mean_data_ratio]) msr[ename] = numpy.array([mean_stall_ratio]) mmr[ename] = numpy.array([mean_mem_rate]) sus[ename] = su if (mean_mem_rate <= mem_rate_thresh) and \ (mean_stall_ratio > stall_thresh) : print ename, jobid, mean_mem_rate / 1000000000, mean_stall_ratio # Find top codes by SUs top_count = {} for k in mdr.keys(): if n.s: top_count[k] = sus[k] # by sus else: top_count[k] = len(mdr[k]) # by count d = collections.Counter(top_count) mdr2 = {} msr2 = {} mmr2 = {} for k, v in d.most_common(n.n[0]): print k, v mdr2[k] = numpy.log10(mdr[k]) msr2[k] = msr[k] mmr2[k] = numpy.log10(mmr[k]) # for k in mdr.keys(): # if len(mdr[k]) < 5: # continue # mdr2[k]=mdr[k] x = [top_count[k] for k in mdr2.keys()] l = len(mdr2.keys()) y = numpy.linspace(0.10, 0.95, l) widths = numpy.interp(x, numpy.linspace(5.0, float(max(x)), l), y) fig, ax = plt.subplots(1, 1, figsize=(8, 8), dpi=80) plt.subplots_adjust(hspace=0.35, bottom=0.25) ax.boxplot(mdr2.values(), widths=widths) xtickNames = plt.setp(ax, xticklabels=mdr2.keys()) plt.setp(xtickNames, rotation=45, fontsize=8) ax.set_ylabel(r'log(DRAM BW/L1 Fill Rate)') fname = 'box_mdr' fig.savefig(fname) plt.close() markers = itertools.cycle(('o', 'x', '+', '^', 's', '8', 'p', 'h', '*', 'D', '<', '>', 'v', 'd', '.')) colors = itertools.cycle(('b', 'g', 'r', 'c', 'm', 'k', 'y')) fig, ax = plt.subplots(1, 1, figsize=(10, 8), dpi=80) for k in mdr2.keys(): ax.plot(mdr2[k], msr2[k], marker=markers.next(), markeredgecolor=colors.next(), linestyle='', markerfacecolor='None') ax.hold = True box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) ax.legend(mdr2.keys(), bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., numpoints=1) ax.set_xlabel('log(DRAM BW/L1 Fill Rate)') ax.set_ylabel('Stall Fraction') fname = 'msr_v_mdr' fig.savefig(fname) plt.close() markers = itertools.cycle(('o', 'x', '+', '^', 's', '8', 'p', 'h', '*', 'D', '<', '>', 'v', 'd', '.')) colors = itertools.cycle(('b', 'g', 'r', 'c', 'm', 'k', 'y')) fig, ax = plt.subplots(1, 1, figsize=(10, 8), dpi=80) for k in mdr2.keys(): ax.plot(mmr2[k], msr2[k], marker=markers.next(), markeredgecolor=colors.next(), linestyle='', markerfacecolor='None') ax.hold = True ax.plot(numpy.log10([mem_rate_thresh, mem_rate_thresh]), [ 0.95 * min(numpy.concatenate(msr2.values())), 1.05 * max(numpy.concatenate(msr2.values())) ], 'r--') print[ min(numpy.concatenate(mmr2.values())), max(numpy.concatenate(mmr2.values())) ], [stall_thresh, stall_thresh], 'r--' ax.plot([ min(numpy.concatenate(mmr2.values())), max(numpy.concatenate(mmr2.values())) ], [stall_thresh, stall_thresh], 'r--') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) ax.legend(mdr2.keys(), bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., numpoints=1) ax.set_xlabel('log(DRAM BW)') ax.set_ylabel('Stall Fraction') fname = 'msr_v_mem' fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('-f', help='Set full mode', action='store_true') parser.add_argument('key1', help='First key', nargs='?', default='amd64_core') parser.add_argument('key2', help='Second key', nargs='?', default='SSE_FLOPS') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?',default='jobs') n=parser.parse_args() filelist=tspl_utils.getfilelist(n.filearg) for file in filelist: try: if n.f: full='_full' ts=tspl.TSPLBase(file,[n.key1],[n.key2]) else: full='' ts=tspl.TSPLSum(file,[n.key1],[n.key2]) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts,3600,16): # 1 hour, 16way only continue elif ts.numhosts < 2: # At least 2 hosts print ts.j.id + ': 1 host' continue print ts.j.id tmid=(ts.t[:-1]+ts.t[1:])/2.0 s=[] for v in ts: s=v break fig,ax=plt.subplots(2,1,figsize=(8,6),dpi=80) ax[0].hold=True ax[1].hold=True xmin,xmax=[0.,0.] xmin1,xmax1=[0.,0.] dt=numpy.diff(ts.t) for v in ts: rate=numpy.array(numpy.divide(numpy.diff(v),dt),dtype=numpy.int64) d=numpy.linalg.norm(rate,ord=1)/float(len(rate)) xmin,xmax=[min(xmin,min(rate)),max(xmax,max(rate))] xmin1,xmax1=[min(xmin1,min(rate-d)),max(xmax1,max(rate-d))] ax[0].plot(tmid,rate) ax[1].plot(tmid,rate-d) xmin,xmax=tspl_utils.expand_range(xmin,xmax,.1) xmin1,xmax1=tspl_utils.expand_range(xmin1,xmax1,.1) ax[0].set_ylim(bottom=xmin,top=xmax) ax[1].set_ylim(bottom=xmin1,top=xmax1) fname='_'.join(['graph',ts.j.id,ts.k1[0],ts.k2[0],'adjust'+full]) fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser( description='Plot important stats for jobs') parser.add_argument('-m', help='Plot mode: lines, hist, percentile', nargs=1, type=str, default=['lines'], metavar='mode') parser.add_argument('-o', help='Output directory', nargs=1, type=str, default=['.'], metavar='output_dir') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-s', help='Set minimum time in seconds', nargs=1, type=int, default=[3600]) parser.add_argument('-w', help='Set wide plot format', action='store_true') n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) procs = min(len(filelist), n.p[0]) job = pickle.load(open(filelist[0])) jid = job.id epoch = job.end_time ld = lariat_utils.LariatData(jid, end_epoch=epoch, daysback=3, directory=analyze_conf.lariat_path) if procs < 1: print 'Must have at least one file' exit(1) pool = multiprocessing.Pool(processes=procs) partial_master = functools.partial(mp_wrapper, mode=n.m[0], threshold=False, output_dir=n.o[0], prefix='graph', mintime=n.s[0], wayness=[x + 1 for x in range(16)], lariat_dict=ld.ld, wide=n.w) pool.map(partial_master, filelist) pool.close() pool.join()
def main(): parser = argparse.ArgumentParser(description='Look for high meta data rate'\ ' to Lustre') parser.add_argument('-t', metavar='thresh', help='Treshold metadata rate', nargs=1, default=[100000.]) parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') n = parser.parse_args() thresh = float(n.t[0]) print thresh filelist = tspl_utils.getfilelist(n.filearg) # k1=['llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite', 'llite', 'llite', 'llite', 'llite', # 'llite'] # k2=['open','close','mmap','seek','fsync','setattr', # 'truncate','flock','getattr','statfs','alloc_inode', # 'setxattr','getxattr',' listxattr', # 'removexattr', 'inode_permission', 'readdir', # 'create','lookup','link','unlink','symlink','mkdir', # 'rmdir','mknod','rename',] k1 = [ 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', 'llite', ] k2 = [ 'open', 'close', 'mmap', 'fsync', 'setattr', 'truncate', 'flock', 'getattr', 'statfs', 'alloc_inode', 'setxattr', ' listxattr', 'removexattr', 'readdir', 'create', 'lookup', 'link', 'unlink', 'symlink', 'mkdir', 'rmdir', 'mknod', 'rename', ] for file in filelist: try: ts = tspl.TSPLSum(file, k1, k2) except tspl.TSPLException as e: continue if not tspl_utils.checkjob(ts, 3600., range(1, 33)): continue tmid = (ts.t[:-1] + ts.t[1:]) / 2.0 ld = lariat_utils.LariatData(ts.j.id, ts.j.end_time, 'lariatData') meta_rate = numpy.zeros_like(tmid) for k in ts.j.hosts.keys(): meta_rate += numpy.diff(ts.assemble(range(0, len(k1)), k, 0)) / numpy.diff(ts.t) meta_rate /= float(ts.numhosts) if numpy.max(meta_rate) > thresh: title = ts.title if ld.exc != 'unknown': title += ', E: ' + ld.exc.split('/')[-1] fig, ax = plt.subplots(1, 1, figsize=(10, 8), dpi=80) plt.subplots_adjust(hspace=0.35) plt.suptitle(title) markers = ('o', 'x', '+', '^', 's', '8', 'p', 'h', '*', 'D', '<', '>', 'v', 'd', '.') colors = ('b', 'g', 'r', 'c', 'm', 'k', 'y') cnt = 0 for v in ts.data: for host in v: for vals in v[host]: rate = numpy.diff(vals) / numpy.diff(ts.t) c = colors[cnt % len(colors)] m = markers[cnt % len(markers)] # print cnt,(cnt % len(colors)), (cnt % len(markers)), k2[cnt], c, m ax.plot(tmid / 3600., rate, marker=m, markeredgecolor=c, linestyle='-', color=c, markerfacecolor='None', label=k2[cnt]) ax.hold = True cnt = cnt + 1 ax.set_ylabel('Meta Data Rate (op/s)') tspl_utils.adjust_yaxis_range(ax, 0.1) handles, labels = ax.get_legend_handles_labels() new_handles = {} for h, l in zip(handles, labels): new_handles[l] = h box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.9, box.height]) ax.legend(new_handles.values(), new_handles.keys(), prop={'size': 8}, bbox_to_anchor=(1.05, 1), borderaxespad=0., loc=2) fname = '_'.join(['metadata', ts.j.id, ts.owner]) fig.savefig(fname) plt.close()
def main(): parser = argparse.ArgumentParser(description='') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) parser.add_argument('-k1', help='Set first key', nargs='+', type=str, default=['amd64_sock']) parser.add_argument('-k2', help='Set second key', nargs='+', type=str, default=['DRAM']) parser.add_argument('-f', help='File, directory, or quoted' ' glob pattern', nargs=1, type=str, default=['jobs']) n = parser.parse_args() filelist = tspl_utils.getfilelist(n.f[0]) procs = min(len(filelist), n.p[0]) m = multiprocessing.Manager() histories = m.dict() times = m.list() print 'Getting samples' partial_get_samples = functools.partial(get_samples, times=times) pool = multiprocessing.Pool(processes=procs) pool.map(partial_get_samples, filelist) pool.close() pool.join() samples = set([]) for t in times: samples = samples.union(t) samples = numpy.array(sorted(samples)) # samples=numpy.array(range(1349067600,1352440800+1,3600)) print len(samples) partial_glndf = functools.partial(get_lnet_data_file, k1=n.k1, k2=n.k2, samples=samples, histories=histories) print 'Getting data' pool = multiprocessing.Pool(processes=procs) pool.map(partial_glndf, filelist) pool.close() pool.join() accum = numpy.zeros(len(samples)) for h in histories.values(): accum += h print 'Plotting' fig, ax = plt.subplots(1, 1, dpi=80) t = numpy.array([float(x) for x in samples]) t -= t[0] ax.plot(t[:-1] / 3600., numpy.diff(accum) / numpy.diff(t)) fig.savefig('bar') plt.close()
def main(): parser = argparse.ArgumentParser(description='Look for imbalance between' 'hosts for a pair of keys') parser.add_argument('filearg', help='File, directory, or quoted' ' glob pattern', nargs='?', default='jobs') parser.add_argument('-p', help='Set number of processes', nargs=1, type=int, default=[1]) n = parser.parse_args() filelist = tspl_utils.getfilelist(n.filearg) procs = min(len(filelist), n.p[0]) job = pickle.load(open(filelist[0])) jid = job.id epoch = job.end_time ld = lariat_utils.LariatData(jid, end_epoch=epoch, daysback=3, directory=analyze_conf.lariat_path) if procs < 1: print 'Must have at least one file' exit(1) pool = multiprocessing.Pool(processes=procs) partial_work = functools.partial(do_work, mintime=3600., wayness=16, lariat_dict=ld.ld) results = pool.map(partial_work, filelist) print len(results) sus = {} for (f_stall, mem_rate, cpi, ename, jid, user, su) in results: if f_stall is None: continue if ename in sus: sus[ename] += su else: sus[ename] = su d = collections.Counter(sus) enames = zip(*d.most_common(50))[0] for k, v in d.most_common(50): print k, v for (f_stall, mem_rate, cpi, ename, jid, user, su) in results: if (f_stall is None) or (not ename in enames): continue cpec = 1. / (1. - f_stall) if cpi > 1.0: # and cpec > 2.0: print jid, ename, cpi, cpec, user, sus[ename]