def parse_bsize(t): bsize, lis = t x = [] y = [] for csv in humansorted(lis): e = Experiment(csv) size = None total_dups = None if "pf" in csv: size = basename(dirname(csv)).split("_")[2] total_dups = sum(e.get_duplicate_faults_64k()) else: size = basename(dirname(csv)).split("_")[1] total_dups = sum(e.get_duplicate_faults_4k()) size = size x.append(size) y.append(total_dups) return (x, y, bsize)
def main(): parser = argparse.ArgumentParser() parser.add_argument('csv', type=str, help='Full path to CSV file') parser.add_argument('-o', type=str, default="", help='output filename') parser.add_argument('-d', type=str, default="", help='out dir') args = parser.parse_args() c = args.csv m = "*" if not ".txt" in args.csv: print("Suspicious input file with no/wrong extension:", args.csv) exit(1) e = Experiment(c) ranges, faults, batches, num_batches = e.get_raw_data() e.print_info() matplotlib.rcParams['agg.path.chunksize'] = 10000 #fig = plt.gcf() #ax = fig.add_subplot(1,1,1) #ax2 = ax.twiny() read = [] rx = [] write = [] wx = [] for i, f in enumerate(faults): if f.access_type == "r": #read.append(f.fault_address) #rx.append(i) pass elif f.access_type == "w": #write.append(f.fault_address) #wx.append(i) pass elif f.access_type == "p": pass else: print("unaccounted for access type:", f.access_type) true_bl = [len(batch) for batch in batches] bl = [0] + true_bl for i, b in enumerate(bl): if i == 0: continue bl[i] += bl[i - 1] del bl[0] #print(batches) sm_ids = sorted(e.count_utlb_client_pairs().keys()) print("sm_ids:", sm_ids) cmap = plt.get_cmap('jet') colors = cmap(np.linspace(0, 1.0, len(sm_ids))) id_f_map = {sm_id: [] for sm_id in sm_ids} for fault in faults: mapping = (fault.utlb_id, fault.client_id) id_f_map[mapping].append(fault) times_between_batches = e.batch_times avg = e.avg_time_batches() print("avg time between batches:", avg) print("min time between batches", min(times_between_batches)) print("max time between batches", max(times_between_batches)) print("total # deltas:", len(times_between_batches)) print("Num batches < 1e1:", len([t for t in times_between_batches if t < 1e1])) print("Num batches < 1e2:", len([t for t in times_between_batches if t < 1e2])) print("Num batches < 1e3:", len([t for t in times_between_batches if t < 1e3])) print("Num batches < 1e4:", len([t for t in times_between_batches if t < 1e4])) print("Num batches < 1e5:", len([t for t in times_between_batches if t < 1e5])) print("Num batches < 1e6:", len([t for t in times_between_batches if t < 1e6])) print("Num batches < 1e7:", len([t for t in times_between_batches if t < 1e7])) print("Num batches < 1e8:", len([t for t in times_between_batches if t < 1e8])) Q1 = np.quantile(times_between_batches, 0.25) Q3 = np.quantile(times_between_batches, 0.75) med = statistics.median(times_between_batches) avg = np.mean(times_between_batches) print("Q1, median, Q3:", Q1, ",", med, ",", Q3) #plt.plot(times, counts, marker="*") hist, bins, _ = plt.hist(times_between_batches) #hist, bins, _ = plt.hist(times_between_batches, bins=16) binlen = len(bins) print("bins:", bins) logbins = np.logspace(0.0, np.log10(bins[-1]), len(bins)) print("logbins:", logbins) plt.clf() hist, bins, _ = plt.hist(times_between_batches, bins=logbins) ax = plt.gca() plt.vlines([Q1, med, Q3], 0, 1, transform=ax.get_xaxis_transform(), label="Q1/Med/Q3") plt.vlines([avg], 0, 1, transform=ax.get_xaxis_transform(), label="Avg", color="r") plt.xlim(xmin=1e0) plt.ylim(ymin=0.0) plt.xscale("log") plt.xlabel("Time Between Batch Fault Arrival in Buffer (NS)") plt.ylabel("Frequency") plt.legend() psize = basename(dirname(args.csv)) #.split("_")[-1] print("psize:", psize) figname = None if args.o == "": figname = (args.d + "/" + splitext(basename(args.csv))[0] + "-" + psize + "-batch-time-dist.png").replace("_", "-") else: figname = args.o if ".png" not in figname: figname += ".png" #figname = splitext(basename(args.csv))[0].split("_")[0] + "-" + psize + "-sm-time-dist.png" plt.tight_layout() print('saving figure:', figname) plt.savefig(figname, dpi=500) plt.close() if ("pf") in args.csv: xs = [ len(batch) - dup + len(pfbatch) for batch, dup, pfbatch in zip( e.batches, e.get_duplicate_faults_64k(), e.pfbatches) ] else: xs = [ len(batch) - dup for batch, dup in zip(e.batches, e.get_duplicate_faults_4k()) ] #xs = [len(batch) - e.get_duplicate_faults_4k() for batch in e.batches] hist, bins, _ = plt.hist(xs, bins=len(logbins)) logbins = np.logspace(0.0, np.log10(bins[-1]), len(bins)) plt.clf() hist, bins, _ = plt.hist(xs, bins=logbins) #plt.hist([len(batch) for batch in e.batches]) plt.xlabel("Batch Sizes") plt.ylabel("Frequency") plt.xlim(xmin=1e0) plt.ylim(ymin=0.0) plt.xscale("log") if args.o == "": figname = (args.d + "/" + splitext(basename(args.csv))[0] + "-" + psize + "-batch-size-dist.png").replace("_", "-") else: figname = args.o if ".png" not in figname: figname += ".png" plt.tight_layout() print('saving figure:', figname) plt.savefig(figname, dpi=500) plt.close() ys = times_between_batches size_time_plot(xs, ys, psize, args, "pf-dups") if ("pf") in args.csv: xs = [ len(batch) - dup for batch, dup in zip(e.batches, e.get_duplicate_faults_64k()) ] else: xs = [ len(batch) - dup for batch, dup in zip(e.batches, e.get_duplicate_faults_4k()) ] #xs = [len(batch) - e.get_duplicate_faults_4k() for batch in e.batches] size_time_plot(xs, ys, psize, args, "dups") if ("pf") in args.csv: xs = [len(batch) for batch in e.batches] else: xs = [len(batch) for batch in e.batches] size_time_plot(xs, ys, psize, args, "") div = 65536 if "pf" in args.csv else 4096 xs = [] for batch in batches: vals = sorted(set([f.fault_address // div for f in batch])) transfers = 0 prev = vals[0] count = 0 for val in vals: if val > 1 + prev or val % (2097152 // div) == 0: transfers += 1 count = 0 else: count += 1 prev = val if count > 0: transfers += 1 xs.append(transfers) size_time_plot(xs, ys, psize, args, "transfers")