def plot_runtime(algebras, data, output_name):
    # plot wall clock time
    runtime = []    # query runtime
    std = []        # standard deviation
    for row in data:
        if row[0] == query:
            runtime.append(to_float(row[2]))
            std.append(to_float(row[3]))

    ind = np.arange(len(runtime))  # the x locations for the groups
    fig, ax = plt.subplots()
    rects = ax.bar(ind+0.1, runtime, width=bar_width, color=colors,  yerr=std)
    ax.set_ylabel('Time (sec)')
    ax.set_xticks(ind+xstick_offset)
    ax.set_xticklabels(algebras)
    if query == "fb_q5":
        ax.set_ylim((0, 14000))
    # label the bars
    for rect in rects:
        height = rect.get_height()
        if int(height) == 0:
            ax.text(
                rect.get_x()+rect.get_width()/2.,
                1.05*height, "FAIL",
                ha='center', va='bottom', color='red')
        else:
            ax.text(
                rect.get_x()+rect.get_width()/2.,
                1.05*height, to_label(height),
                ha='center', va='bottom')
    print "outputing {}".format(output_name)
    plt.savefig(output_name, format='pdf', dpi=dpi)
示例#2
0
def plot_runtime(algebras, data, output_name):
    # plot wall clock time
    runtime = []  # query runtime
    std = []  # standard deviation
    for row in data:
        if row[0] == query:
            runtime.append(to_float(row[2]))
            std.append(to_float(row[3]))

    ind = np.arange(len(runtime))  # the x locations for the groups
    fig, ax = plt.subplots()
    rects = ax.bar(ind + 0.1, runtime, width=bar_width, color=colors, yerr=std)
    ax.set_ylabel('Time (sec)')
    ax.set_xticks(ind + xstick_offset)
    ax.set_xticklabels(algebras)
    if query == "fb_q5":
        ax.set_ylim((0, 14000))
    # label the bars
    for rect in rects:
        height = rect.get_height()
        if int(height) == 0:
            ax.text(rect.get_x() + rect.get_width() / 2.,
                    1.05 * height,
                    "FAIL",
                    ha='center',
                    va='bottom',
                    color='red')
        else:
            ax.text(rect.get_x() + rect.get_width() / 2.,
                    1.05 * height,
                    to_label(height),
                    ha='center',
                    va='bottom')
    print "outputing {}".format(output_name)
    plt.savefig(output_name, format='pdf', dpi=dpi)
def max_workload_plot(fname, ofname):
    """
        Plot the maximum workload per server using different HC configs
    """
    # read data from csv
    with open("csvs/{}".format(fname), "rU") as f:
        csvreader = csv.reader(f)
        data = [list(row) for row in csvreader]

    N = 4
    ind = np.arange(N)          # x locations
    bar_width = 0.27           # the width of the bars

    # prepare data
    raco_hc = []
    random_hc = []
    lp_round = []

    for row in data[1:]:
        opt = to_float(row[3])
        raco_hc.append(to_float(row[1])/opt)
        random_hc.append(to_float(row[2])/opt)
        lp_round.append(to_float(row[4])/opt)

    # set bars
    fig, ax = plt.subplots()
    data_groups = [raco_hc, lp_round, random_hc]
    bars = []
    for i, group in enumerate(data_groups):
        rect = ax.bar(
            ind+0.1+i*bar_width, group, width=bar_width, color=colors[i])
        bars.append(rect)

    # set labels
    ax.set_ylabel("Workload to optimal ratio")
    ax.set_xticks(ind+bar_width*2)
    ax.set_xticklabels(("Q1", "Q2", "Q3", "Q4"))
    ax.set_ylim((0, 7))

    # set bar labels
    for bar in bars:
        for rect in bar:
            height = rect.get_height()
            ax.text(
                rect.get_x()+rect.get_width()/2.,
                1.05*height, '%.2f' % height,
                ha='center', va='bottom', size=14)

    ax.legend(
        (bars[0][0], bars[1][0], bars[2][0]),
        ('Our Alg.', 'Round Down', 'Random(4096 cells)'),
        prop={'size': 15})

    output_path = "{}/{}".format(path, ofname)
    print "outputing {}".format(output_path)
    plt.savefig(output_path, format='pdf', dpi=dpi)
def attrorder_plot():
    datafile = "csvs/SIGMOD Experiment - attr_order_sum.csv"
    with open(datafile, "rU") as f:
        reader = csv.reader(f)
        data = [list(row) for row in reader]
    plt.figure()
    data1 = [(to_float(row[1]), to_float(row[2])) for row in data[1:]
             if row[3] == "Q3"]
    cost1, time1 = zip(*data1)
    data2 = [(to_float(row[1]), to_float(row[2])) for row in data[1:]
             if row[3] == "Q4"]
    cost2, time2 = zip(*data2)
    data3 = [(to_float(row[1]), to_float(row[2])) for row in data[1:]
             if row[3] == "Q7"]
    cost3, time3 = zip(*data3)
    data4 = [(to_float(row[1]), to_float(row[2])) for row in data[1:]
             if row[3] == "Q8"]
    cost4, time4 = zip(*data4)
    fig, ax = plt.subplots()
    ax.scatter(cost1, time1, color=colors[0], s=120, label="Q3", marker="x")
    ax.scatter(cost2, time2, color=colors[1], s=120, label="Q4", marker="x")
    ax.scatter(cost3, time3, color=colors[2], s=120, label="Q7", marker="x")
    ax.scatter(cost4, time4, color=colors[3], s=120, label="Q8", marker="x")
    ax.set_xscale('log')
    ax.set_yscale('log')
    plt.xlabel('Estimated cost')
    plt.ylabel('Actual running time (Sec)')
    ax.legend(prop={'size': 15})
    plt.axis([0, 10e22, -30, 1200])
    oname = "{}/attr_order_scatter.pdf".format(path)
    print "output to {}".format(oname)
    plt.savefig(oname, format='pdf')
def plot_shuffle(algebras, data, output_name):
    shuffle_size = []
    for row in data:
        if row[0] == query:
            shuffle_size.append(to_float(row[10]))
    ind = np.arange(len(shuffle_size))  # the x locations for the groups
    fig, ax = plt.subplots()
    rects = ax.bar(ind+0.1, shuffle_size, width=bar_width, color=colors)
    ax.set_ylabel('Tuples shuffled (million)')
    # ax.set_xlabel('Physical Algebra')
    ax.set_xticks(ind+xstick_offset)
    ax.set_xticklabels(algebras)
    if query == "fb_q5":
        ax.set_ylim((0, 17000))
    # label the bar
    for rect in rects:
        height = rect.get_height()
        if int(height) == 0:
            ax.text(
                rect.get_x()+rect.get_width()/2.,
                1.05*height, "FAIL",
                ha='center', va='bottom', color='red')
        else:
            ax.text(
                rect.get_x()+rect.get_width()/2.,
                1.05*height, to_label(height),
                ha='center', va='bottom')
    print "outputing {}".format(output_name)
    plt.savefig(output_name, format='pdf', dpi=dpi)
示例#6
0
def plot_shuffle(algebras, data, output_name):
    shuffle_size = []
    for row in data:
        if row[0] == query:
            shuffle_size.append(to_float(row[10]))
    ind = np.arange(len(shuffle_size))  # the x locations for the groups
    fig, ax = plt.subplots()
    rects = ax.bar(ind + 0.1, shuffle_size, width=bar_width, color=colors)
    ax.set_ylabel('Tuples shuffled (million)')
    # ax.set_xlabel('Physical Algebra')
    ax.set_xticks(ind + xstick_offset)
    ax.set_xticklabels(algebras)
    if query == "fb_q5":
        ax.set_ylim((0, 17000))
    # label the bar
    for rect in rects:
        height = rect.get_height()
        if int(height) == 0:
            ax.text(rect.get_x() + rect.get_width() / 2.,
                    1.05 * height,
                    "FAIL",
                    ha='center',
                    va='bottom',
                    color='red')
        else:
            ax.text(rect.get_x() + rect.get_width() / 2.,
                    1.05 * height,
                    to_label(height),
                    ha='center',
                    va='bottom')
    print "outputing {}".format(output_name)
    plt.savefig(output_name, format='pdf', dpi=dpi)