def plot_diff(ztype, title, *, file_name=None): colors = [[i / 10, 0, i / 20] for i in range(10)] plt.figure(figsize=(10, 8)) for i in range(10): recct = (i + 1) * 500000 data = misc.extract_file("res/test5/rec{}/zeros/{}".format( recct, ztype)) d = [] for j in range(len(data) - 1): d.append(sqsum(diff_norm(data[j + 1], data[j]))) plt.plot(range(len(d)), d, '-o', color=colors[i]) plt.figtext(0.5, 0.01, "Segment #", ha="center", fontsize=12) plt.figtext(0.5, 0.9, "{}".format(title), ha="center", va="bottom", fontsize=16) if file_name is None: plt.show() else: misc.savefig("res2/ctest5/" + file_name, plt) plt.clf()
def plot6a4(): colors = misc.palette(4) fig, ax = plt.subplots(1, figsize=(9, 4), sharey=True) plt.subplots_adjust(top=0.9, bottom=0.18, left=0.1, right=0.93) allpercs = [] for j, prefix in enumerate(['a', 'b', 'c', 'd', 'e']): for i in range(5): ztots = misc.extract_file( "res/test6/{}/fc{}/zeros/zt".format(prefix, i+1)) ztots = [sum(x) for x in ztots] sizes = misc.extract_file( "res/test6/{}/fc{}/lz4".format(prefix, i+1)) sizes = [x[-1] for x in sizes] zpercs = np.true_divide(ztots, sizes) for z in zpercs: allpercs.append(z) ax.hist(allpercs, weights=np.zeros_like(allpercs) + 1. / len(allpercs), color=colors[1], bins=[0.05*x for x in range(3, 16)]) print("6a - 4") print("std: {}".format(round(np.std(allpercs), 3))) print("av: {}".format(round(np.mean(allpercs), 3))) ax.set_ylabel("Relative Frequency") ax.set_xlabel("Zero Percentage") plt.suptitle( "Figure 4: Zero Percentage Frequency Distribution (Report 5)".format(prefix, j + 1)) misc.savefig("rep/rep6a/fig4", plt)
def plot6a3(): colors = misc.palette(7) fig, axs = plt.subplots(5, figsize=(9, 8), sharey=True, sharex=True) plt.subplots_adjust(top=0.9, bottom=0.08, left=0.1, right=0.93, hspace=0.4) for j, prefix in enumerate(['a', 'b', 'c', 'd', 'e']): for i in range(5): ztots = misc.extract_file( "res/test6/{}/fc{}/zeros/zt".format(prefix, i+1)) ztots = [sum(x) for x in ztots] sizes = misc.extract_file( "res/test6/{}/fc{}/lz4".format(prefix, i+1)) sizes = [x[-1] for x in sizes] zpercs = np.true_divide(ztots, sizes) axs[j].plot(range(len(zpercs)), zpercs, '-', color=colors[i]) axs[j].set_title("Part {}".format(j + 1)) plt.suptitle( "Figure 3: Zero Percentage vs Field Size (Report 5)".format(prefix, j + 1)) axs[2].set_ylabel("Zero Percentage") axs[-1].set_xlabel("Field Size") leg = axs[0].legend(["fields per rec: {}".format(x + 1) for x in range(5)], loc='upper right') for line in leg.get_lines(): line.set_linewidth(3.0) misc.savefig("rep/rep6a/fig3", plt) plt.close()
def plot_one_dist(ztype, title, *, file_name=None): plt.figure(figsize=(10, 8)) recct = 1500000 data = misc.extract_file("res/test5/rec{}/zeros/{}".format(recct, ztype)) d = data[10] plt.bar(range(250)[1:][:110], d[:110]) plt.xticks([i * 10 for i in range(11)]) plt.figtext(0.5, 0.04, "Sequence Length", ha="center", fontsize=12) plt.figtext(0.08, 0.5, "Frequency", va="center", fontsize=12, rotation="vertical") plt.figtext(0.5, 0.9, "{}".format(title), ha="center", va="bottom", fontsize=16) if file_name is None: plt.show() else: misc.savefig("res2/ctest5/" + file_name, plt) plt.clf()
def plot_cdiff(ctype, title, *, file_name=None): colors = [[i / 10, 0, i / 20] for i in range(10)] for i in range(10): recct = (i + 1) * 500000 data = misc.extract_file("res/test5/rec{}/{}".format(recct, ctype)) d = [] for j in range(len(data) - 1): d.append( av( diff(div(data[j][:-1], data[j][-1]), div(data[-1][:-1], data[j][-1])))) plt.plot(range(len(d)), d, '-o', color=colors[i]) plt.figtext(0.5, 0.03, "Segment #", ha="center", fontsize=12) plt.figtext(0.5, 0.9, "{}".format(title), ha="center", va="bottom", fontsize=16) if file_name is None: plt.show() else: misc.savefig("res2/ctest5/" + file_name, plt) plt.clf()
def plot6b7(): print("6b - 7") zpercs = extract_zpercs(low=10)[-500:] hist6b67(zpercs) plt.suptitle( "Figure 7: Zero Percentage Frequency Distribution (Low Memory)") misc.savefig("rep/rep6b/fig7", plt) plt.close()
def plot2(): fig, ax = plt.subplots(1, figsize=(4, 1.8)) plt.subplots_adjust(top=0.8, bottom=0.12, left=0.1, right=0.93) diff = extract_size_diff_sum('a') ax.plot(range(len(diff)), diff, '-') plt.suptitle("Figure 1c: Memory Differential Sum") misc.savefig("res2/ctest6/res2", plt) plt.close()
def plot5(): fig, ax = plt.subplots(1, figsize=(4, 1.8)) plt.subplots_adjust(top=0.8, bottom=0.12, left=0.1, right=0.93) comp = extract_comps('lz4') ax.plot(range(len(comp)), comp, '-') ax.set_ylim(ymin=1) plt.suptitle("Figure 3a: Aggregate lz4 Compression Ratio\nvs Field Size") misc.savefig("res2/ctest6/res5", plt) plt.close()
def plot6(): fig, ax = plt.subplots(1, figsize=(4, 1.8)) plt.subplots_adjust(top=0.8, bottom=0.12, left=0.1, right=0.93) comp = extract_comps('zstd') comp = np.abs(convolve(comp, [1, -1])) ax.plot(range(len(comp)), comp, '-') # ax.set_ylim(ymin=1) plt.suptitle("Figure 3b: Aggregate zstd Compression Ratio\nvs Field Size") misc.savefig("res2/ctest6/res6", plt) plt.close()
def plot6b1(): fig, ax = plt.subplots(1, figsize=(9, 2.8), sharey=True) plt.subplots_adjust(top=0.87, bottom=0.16, left=0.1, right=0.93) sizes = extract_sizes() ax.plot(range(len(sizes)), sizes, '-', color=misc.palette(3)[0]) ax.set_xlabel("Record Count (x1000)") ax.set_ylabel("Total Memory Consumption (Bytes)") ax.set_title("Figure 1: Memory Consumption vs Record Count") misc.savefig("rep/rep6b/fig1", plt) plt.close()
def plot6b5(): data = misc.extract_file("res/test7/zeros/zt") data_sum = np.zeros(len(data[0])) for i in range(500): data_sum = np.add(data_sum, data[-i]) data_av = sum_norm(data_sum) bdata_av = merge_bins(data_av) ax = bar1234(data_av, bdata_av) ax.set_title("Figure 5: Relative Frequency of Zero Bytes (High Memory)") misc.savefig("rep/rep6b/fig5", plt) plt.close()
def plot4(): fig, ax = plt.subplots(1, figsize=(4, 1.8)) plt.subplots_adjust(top=0.8, bottom=0.12, left=0.1, right=0.93) diffs = [] for i, prefix in enumerate(['a', 'b', 'c', 'd', 'e']): diffs.append(extract_size_diff_sum(prefix)) diff = normalized_sum(diffs) ax.plot(range(len(diff)), diff, '-') plt.suptitle("Figure 2b: Memory Differential Sum\nAll Experiments Sum") misc.savefig("res2/ctest6/res4", plt) plt.close() for i, d in enumerate(diff): if d > .2: print(i)
def plot6a1(): data = np.zeros(249, dtype=np.float64) for i in range(10): recct = (i+1) * 500000 zdata = misc.extract_file("res/test5/rec{}/zeros/zt".format(recct)) for z in zdata: data = np.add(data, z) data = np.true_divide(data, sum(data)) bdata = merge_bins(data) ax = bar1234(data, bdata) ax.set_title("Figure 1: Zero Distribution (Report 4 Global Average)") misc.savefig("rep/rep6a/fig1", plt) plt.close()
def plot7(): data = misc.extract_file("res/test6/a/fc5/zeros/zt") fig, axs = plt.subplots(50, 10, figsize=(15, 30)) plt.subplots_adjust(top=0.99, bottom=0.01, left=0.01, right=0.99) for j in range(10): for i in range(50): d = data[j * 50 + i][:-1] axs[i][j].plot(range(len(d)), d) axs[i][j].get_xaxis().set_visible(False) axs[i][j].get_yaxis().set_visible(False) misc.savefig("res2/ctest6/res7", plt) plt.close()
def plot_dists(ztype, ysize, title, *, file_name=None): fig, axs = plt.subplots(ysize, 10, sharey=True, figsize=(15, ysize * 3 / 4)) for i in range(10): recct = (i + 1) * 500000 data = misc.extract_file("res/test5/rec{}/zeros/{}".format( recct, ztype)) for j in range(ysize): d = norm(data[j]) axy = ysize - 1 - j axs[axy][i].plot(d) if axs[axy][i].is_first_col(): axs[axy][i].set_ylabel(j + 1) axs[axy][i].set_yticklabels([]) else: axs[axy][i].get_yaxis().set_visible(False) if axs[axy][i].is_last_row(): axs[axy][i].set_xlabel(str(recct)) axs[axy][i].set_xticklabels([]) else: axs[axy][i].get_xaxis().set_visible(False) plt.figtext(0.5, -0.007 + 0.0024 * ysize, "# of Records", ha="center", fontsize=18) plt.figtext(0.08, 0.5, "Segment #", va="center", fontsize=18, rotation="vertical") plt.figtext(0.5, 0.9, "{}".format(title), ha="center", va="bottom", fontsize=24) if file_name is None: plt.show() else: misc.savefig("res2/ctest5/" + file_name, plt) plt.clf()
def plot0(): fig, axs = plt.subplots(5, figsize=(4, 5.4)) plt.subplots_adjust(top=0.87, bottom=0.05, left=0.1, right=0.93, hspace=0.4) diffs = extract_sizes('a') for i, diff in enumerate(diffs): axs[i].plot(range(len(diff)), diff, '-') axs[i].set_title("Fields per Record: {}".format(i + 1)) if i != 4: axs[i].set_xticklabels([]) plt.suptitle("Figure 1a: Memory Consumption vs Field Size") misc.savefig("res2/ctest6/res0", plt) plt.close()
def plot6c1(): print("6c - 1") zpercs = extract_zpercs(low=10, subtest=8)[:500] fig, ax = plt.subplots(1, figsize=(9, 4)) plt.subplots_adjust(top=0.87, bottom=0.12, left=0.1, right=0.93) colors = misc.palette(4) ax.hist(zpercs, weights=np.zeros_like(zpercs) + 1. / len(zpercs), color=colors[1], bins=[0.01*x for x in range(12, 31)]) print("std: {}".format(round(np.std(zpercs), 3))) print("av: {}".format(round(np.mean(zpercs), 3))) ax.set_ylabel("Relative Frequency") ax.set_xlabel("Zero Percentage") plt.suptitle( "Figure 1: Zero Percentage Frequency Distribution") misc.savefig("rep/rep6c/fig1", plt) plt.close()
def plot3(): fig, axs = plt.subplots(5, figsize=(4, 5.4)) plt.subplots_adjust(top=0.87, bottom=0.05, left=0.1, right=0.93, hspace=0.4) for i, prefix in enumerate(['a', 'b', 'c', 'd', 'e']): diff = extract_size_diff_sum(prefix) axs[i].plot(range(len(diff)), diff, '-') axs[i].set_title("Experiment {}".format(i)) axs[i].set_yticklabels([]) if i != 4: axs[i].set_xticklabels([]) plt.suptitle("Figure 2a: Memory Differential Sum\nAll Experiments") misc.savefig("res2/ctest6/res3", plt) plt.close()
def plot6b3(): fig, ax = plt.subplots(1, figsize=(9, 4)) plt.subplots_adjust(top=0.87, bottom=0.12, left=0.1, right=0.93) colors = misc.palette(4) zpercs0 = extract_zpercs(low=10) zpercs1 = extract_zpercs(low=10, high=-1) zpercs2 = extract_zpercs(low=-1) ax.plot(range(len(zpercs0)), zpercs0, '-', color=colors[0]) ax.plot(range(len(zpercs1)), zpercs1, '-', color=colors[1]) ax.plot(range(len(zpercs2)), zpercs2, '-', color=colors[2]) leg = ax.legend(["sequence length: 10+", "sequence length: 10 - 249", "sequence length: 250+"]) ax.set_xlabel("Record Count (x1000)") ax.set_ylabel("Fraction of Total Bytes") ax.set_title("Figure 3: Zero Percentage vs Record Count") misc.savefig("rep/rep6b/fig3", plt) plt.close()
def plot6c3(): data = misc.extract_file("res/test8/zeros/zt") lz4_data = misc.extract_file("res/test8/lz4") size_data = [x[-1] for x in lz4_data] data_sum = np.zeros(len(data[0])) for d, s in zip(data, size_data): if s > 13000000000: data_sum = np.add(data_sum, d) data_av = sum_norm(data_sum) bdata_av = merge_bins(data_av) ax = bar1234(data_av, bdata_av) ax.set_title( "Figure 3: Relative Frequency of Zero Bytes (14.9GB Workloads)") misc.savefig("rep/rep6c/fig3", plt) plt.close()
def plot6b2(): fig, ax = plt.subplots(1, figsize=(9, 4)) plt.subplots_adjust(top=0.87, bottom=0.12, left=0.1, right=0.93) colors = misc.palette(4) ztots0 = extract_ztotals(low=10) ztots1 = extract_ztotals(low=10, high=-1) ztots2 = extract_ztotals(low=-1) ax.plot(range(len(ztots0)), ztots0, '-', color=colors[0]) ax.plot(range(len(ztots1)), ztots1, '-', color=colors[1]) ax.plot(range(len(ztots2)), ztots2, '-', color=colors[2]) ax.set_xlabel("Record Count (x1000)") ax.set_ylabel("# of Zero Bytes") ax.set_title("Figure 2: Zero Bytes vs Record Count") leg = ax.legend(["sequence length: 10+", "sequence length: 10 - 249", "sequence length: 250+"]) for line in leg.get_lines(): line.set_linewidth(3.0) misc.savefig("rep/rep6b/fig2", plt) plt.close()
def plot6a2(): data = np.zeros(119, dtype=np.float64) c = 0 for prefix in ['a', 'b', 'c', 'd', 'e']: for fc in range(1, 6): ef_data = misc.extract_file( 'res/test6/{}/fc{}/zeros/zt'.format(prefix, fc)) for efs_data in ef_data: data = np.add(data, efs_data) c += 1 data = np.true_divide(data, c) data = np.true_divide(data, sum(data)) data = data.tolist() data = data[:-1] + [0] * 130 + [data[-1]] bdata = merge_bins(data) ax = bar1234(data, bdata) ax.set_title("Figure 2: Zero Distribution (Report 5 Global Average)") misc.savefig("rep/rep6a/fig2", plt) plt.close()