def accuracyPlot(topo_types, task_types, trace_types, pair_seq=0): accuracys, errors, x = [], [], None cherns, chern_errors, chern_x = [], [], None acc_boxplot_data = [] for trace_type in trace_types: topo_type = topo_types[1] task_type = task_types[0] if topo_type == topo_types[0]: pair_num = 4 elif topo_type == topo_types[1]: pair_num = 8 elif topo_type == topo_types[2]: pair_num = 14 infile_name1, infile_name2 = init_files(trace_type, task_type, topo_type, x_var='PVALUE') group_fields = ['#flows', 'is_correlated', 'key'] agg_fields = ['count_chern', 'byte_chern'] chern_x, case_num, acc_dict = retieveData(infile_name2, group_fields, agg_fields, accuracys, errors, div=False, pair_num=pair_num) acc_boxplot_data.append(acc_dict.values()) acc_legend = ['Database', 'Web', 'Hadoop'] xlabel, ylabel = 'The threshold for p-value', 'Average accuracy' print accuracys, chern_x pl.plot(accuracys, x=chern_x, k=2, errors=errors, xlabel=xlabel, ylabel=ylabel, title=infile_name1, xlog=False, ylog=False, acc_legend=acc_legend, legend_y=0.85) print acc_boxplot_data pl.box_plot([acc_boxplot_data[0]], x=chern_x, k=2, errors=errors, xlabel=xlabel, ylabel=ylabel, title=infile_name1, xlog=False, ylog=False, acc_legend=acc_legend, legend_y=0.85, xticks=[str(i) for i in chern_x])
def accuracyPlot(topo_types, task_types, trace_types, pair_seq=0): accuracys, errors, x = [], [], None cherns, chern_errors, chern_x = [], [], None acc_boxplot_data = [] q_75ss, xss = [], [] for topo_type in topo_types[0:3]: trace_type = trace_types[0] task_type = task_types[0] if topo_type == topo_types[0]: pair_num = 4 elif topo_type == topo_types[1]: pair_num = 8 elif topo_type == topo_types[2]: pair_num = 14 infile_name1, infile_name2 = init_files( trace_type, task_type, topo_type, x_var = 'CHERNPVALUEBYTE' #CHERNPVALUE ) infile_name3, infile_name4 = init_files( trace_type, task_type, topo_type, x_var = 'TTESTPVALUEBYTE' ) key_field = 'epsilon' group_fields = [key_field,'is_correlated', 'key'] agg_fields = ['count_chern', 'byte_chern'] chern_x, case_num, acc_dict = retieveData( infile_name2, group_fields, agg_fields, accuracys, errors, div=False, pair_num=pair_num, q_75ss=q_75ss, key_field=key_field, topo_type=topo_type, xss=xss ) chern_x, case_num, acc_dict = retieveData( infile_name4, group_fields, agg_fields, accuracys, errors, div=False, pair_num=pair_num, q_75ss=q_75ss, key_field=key_field, topo_type=topo_type, xss=xss ) acc_boxplot_data.append(acc_dict.values()) acc_legend = ['B4-Chernoff', 'B4-t-test','Tree-Chernoff', 'Tree-t-test', 'Jupiter-Chernoff', 'Jupiter-t-test'] xlabel, ylabel = '$P_b$', 'Average accuracy' #'Relative elephant flow size' print 'accuracys, chern_x', len(accuracys[0]), len(chern_x) print accuracys, xss pl.plot(accuracys, x=xss, k=2, errors=[], xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0].replace('.', '_'), xlog=True, ylog=False, acc_legend=acc_legend, legend_x=0.0, legend_y=0.3 ) print acc_boxplot_data pl.box_plot([acc_boxplot_data[0]], x=chern_x, k=2, errors=errors, xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0].replace('.', '_'), xlog=False, ylog=False, acc_legend=acc_legend, legend_y=0.85, xticks=[str(i) for i in chern_x])
def accuracyPlot(topo_types, task_types, trace_types, pair_seq=0): accuracys, errors, times, x = [], [], [], None cherns, chern_errors, time_chern, chern_x = [], [], [], None for trace_type in trace_types: topo_type = topo_types[0] task_type = task_types[1] infile_name1, infile_name2 = init_files(trace_type, task_type, topo_type, x_var='TTESTFLOWNUM', p=0.01) group_fields = ['#flows', 'is_correlated'] agg_fields = ['#correlated_detect', '#non-uniform_detect'] x, case_num = retieveData(infile_name1, group_fields, agg_fields, accuracys, errors, times, div=True) print x, case_num group_fields = ['#flows', 'is_correlated', 'key'] agg_fields = ['count_chern', 'byte_chern', 'time'] chern_x = retieveData(infile_name2, group_fields, agg_fields, cherns, chern_errors, time_chern, div=False) acc_legend = [ 'Database', 'Web', 'Hadoop', 'e1', 'e5', 'e10', 'e50', 'e100' ] xlabel, ylabel = 'The number of flows', 'Average accuracy' pl.plot(accuracys, x=x, k=2, errors=errors, xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('.')[0], xlog=False, ylog=False, acc_legend=acc_legend[0:3]) """xlabel, ylabel = 'The number of flows', 'Average p-value'
def rankingPlot(topo_types, task_types, trace_types, pair_seq=0): global ACAP accuracys, errors, x = [], [], None cherns, chern_errors, chern_x = [], [], None for trace_type in trace_types: topo_type = topo_types[1] task_type = task_types[1] cut = 5 if topo_type == topo_types[2]: ACAP = 256 cut = 10 infile_name1, infile_name2 = init_files(trace_type, task_type, topo_type) group_fields = ['#flows', 'is_correlated'] agg_fields = ['#correlated_detect', '#correlated_detect'] x, case_num = retieveData(infile_name1, group_fields, agg_fields, accuracys, errors, div=True, task_type='RANKING') """print(len(x)) group_fields = ['#flows','is_correlated', 'key'] agg_fields = ['a'+str(i) for i in range(ACAP)] #print(agg_fields) chern_x = rankingPair( infile_name2, cherns, 6, pair_seq, file_type=1, topo_type=topo_type, cut=cut )""" acc_legend = [ 'Database', 'Web', 'Hadoop', 'e1', 'e5', 'e10', 'e50', 'e100' ] xlabel, ylabel = 'The number of flows', 'Average accuracy' pl.plot(accuracys, x=x, k=2, errors=errors, xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('.')[0], xlog=False, ylog=False, acc_legend=acc_legend[3:7]) """xlabel, ylabel = 'Previous hop', 'Average p-value'
def rankingPlot(topo_types, task_types, trace_types, pair_seq=0): global ACAP accuracys, errors, x, q_75s, q_25s, times = [], [], None, [], [], [] cherns, chern_errors, chern_x, time_chern = [], [], None, [] xss = [] xss1 = [] for topo_type in topo_types[0:3]: trace_type = trace_types[0] task_type = task_types[1] cut = 5 if topo_type == topo_types[2]: ACAP = 256 cut = 10 if topo_type == topo_types[0]: pair_num = 4 elif topo_type == topo_types[1]: pair_num = 8 elif topo_type == topo_types[2]: pair_num = 14 infile_name1, infile_name2 = init_files(trace_type, task_type, topo_type, x_var='CHERNFLOWNUM', p=0.01) infile_name3, infile_name4 = init_files(trace_type, task_type, topo_type, x_var='TTESTFLOWNUM', p=0.01) key_field = '#flows' group_fields = ['#flows', 'is_correlated'] agg_fields = ['#correlated_detect', '#correlated_detect'] x, case_num = retieveData(infile_name1, group_fields, agg_fields, accuracys, errors, times, div=True, task_type='RANKING', q_75s=q_75s, q_25s=q_25s, xss=xss) x, case_num = retieveData(infile_name3, group_fields, agg_fields, accuracys, errors, time_chern, div=True, task_type='RANKING', q_75s=q_75s, q_25s=q_25s, xss=xss) getTime(infile_name2, times, group_fields=group_fields, key_field=key_field, file_type=1, topo_type=topo_type, case_num=pair_num, xss=xss1) getTime(infile_name4, times, group_fields=group_fields, key_field=key_field, file_type=1, topo_type=topo_type, case_num=pair_num, xss=xss1) print(len(x)) group_fields = ['#flows', 'is_correlated', 'key'] agg_fields = ['a' + str(i) for i in range(ACAP)] #print(agg_fields) """chern_x = rankingPair( infile_name2, cherns, 2, pair_seq, file_type=1, topo_type=topo_type, cut=cut )""" acc_legend = [ 'B4-Chernoff', 'B4-t-test', 'Tree-Chernoff', 'Tree-t-test', 'Jupiter-Chernoff', 'Jupiter-t-test', 'e1', 'e5', 'e10', 'e50', 'e100' ] xlabel, ylabel = 'The number of flows', 'Average accuracy' pl.plot(accuracys, x=xss, k=2, errors=[], xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0].replace( '.', '_'), xlog=False, ylog=False, acc_legend=acc_legend[:len(accuracys)], legend_x=0.45) x = [20, 30, 40, 50, 60, 70] xss = [x] * len(times) print times, len(times[0]) pl.plot(times, x=xss1, k=2, errors=[], xlabel=xlabel, ylabel='Execution time (seconds)', title=infile_name1.split('/outputs')[1].split('.csv')[0].replace( '.', '_') + '_time', xlog=False, ylog=False, acc_legend=acc_legend, legend_y=0.85, legend_x=0.0) xlabel, ylabel = 'Previous hop', 'Average p-value' print chern_x xticks = [] if chern_x != None: xticks = chern_x """pl.plot(cherns, x=[], k=len(cherns), errors=[],
def rankingPlot(topo_types, task_types, trace_types, pair_seq=0): global ACAP accuracys, errors, x, q_75s, q_25s = [], [], None, [], [] cherns, chern_errors, chern_x = [], [], None for trace_type in trace_types: topo_type = topo_types[1] task_type = task_types[1] cut = 5 if topo_type == topo_types[2]: ACAP = 256 cut = 10 infile_name1, infile_name2 = init_files(trace_type, task_type, topo_type) group_fields = ['#flows', 'is_correlated'] agg_fields = ['#correlated_detect', '#correlated_detect'] x, case_num = retieveData(infile_name1, group_fields, agg_fields, accuracys, errors, div=True, task_type='RANKING', q_75s=q_75s, q_25s=q_25s) print(len(x)) group_fields = ['#flows', 'is_correlated', 'key'] agg_fields = ['a' + str(i) for i in range(ACAP)] #print(agg_fields) chern_x = rankingPair(infile_name2, cherns, 0, pair_seq, file_type=1, topo_type=topo_type, cut=cut) acc_legend = [ 'Database', 'Web', 'Hadoop', 'e1', 'e5', 'e10', 'e50', 'e100' ] xlabel, ylabel = 'The number of flows', 'Average accuracy' pl.plot(accuracys, x=x, k=2, errors=q_75s, xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('.')[0], xlog=False, ylog=False, acc_legend=acc_legend[3:8]) xlabel, ylabel = 'Previous hop', 'Average p-value' print chern_x xticks = [] if chern_x != None: xticks = chern_x pl.plot(cherns, x=None, k=len(cherns), errors=[], xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('.')[0] + 'pair-key' + str(pair_seq), xlog=False, ylog=True, acc_legend=acc_legend, xticks=chern_x, figure_width=4.3307 * 1.25)
def rankingPlot(topo_types, task_types, trace_types, pair_seq=0): global ACAP accuracys, errors, x, q_75s, q_25s = [], [], None, [], [] cherns, chern_errors, chern_x = [], [], None for topo_type in topo_types[1:2]: trace_type = trace_types[0] task_type = task_types[1] cut = 4 if topo_type == topo_types[2]: ACAP = 256 cut = 10 infile_name1, infile_name2 = init_files( trace_type, task_type, topo_type, x_var='CHERNRANKINGPVALUEFLOWNUM') infile_name3, infile_name4 = init_files( trace_type, task_type, topo_type, x_var='TTESTRANKINGPVALUEFLOWNUM') group_fields = ['#flows', 'is_correlated', 'key'] agg_fields = ['a' + str(i) for i in range(ACAP)] chern_x = rankingPair(infile_name2, cherns, 0, [0, 1], file_type=1, topo_type=topo_type, cut=cut, q_25s=q_25s, q_75s=q_75s) chern_x = rankingPair(infile_name4, cherns, 0, [0, 1], file_type=1, topo_type=topo_type, cut=cut, q_25s=q_25s, q_75s=q_75s) xlabel, ylabel = 'Rank', 'Average p-value' xticks = [] if chern_x != None: xticks = chern_x print chern_x, cherns, print 'q****', q_25s, q_75s pl.plot(cherns, x=[], k=2, errors=[], xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0] + 'pair-key' + str(pair_seq), xlog=False, ylog=True, acc_legend=[ '0-hop-Chernoff', '1-hop-Chernoff', '0-hop-t-test', '1-hop-t-test' ], xticks=np.arange(1, cut + 1), figure_width=4.3307, figure_height=3.346, x_shift=0.06, y_shift=0.05, legend_x=0.3) #[q_25s, q_75s]
def accuracyPlot(topo_types, task_types, trace_types, pair_seq=0): accuracys, errors, x = [], [], None cherns, chern_errors, chern_x = [], [], None acc_boxplot_data = [] q_75ss, xss = [], [] for topo_type in topo_types[2:3]: trace_type = 'ABC' task_type = task_types[2] if topo_type == topo_types[0]: pair_num = 4 elif topo_type == topo_types[1]: pair_num = 8 elif topo_type == topo_types[2]: pair_num = 16 infile_name1, infile_name2 = init_files( trace_type, task_type, topo_type, x_var='TOPKBIASONEFIVE' #CHERNPVALUE ) infile_name3, infile_name4 = init_files( trace_type, task_type, topo_type, x_var='TOPKBIASSMALL' #CHERNPVALUE ) infile_name5, infile_name6 = init_files( trace_type, task_type, topo_type, x_var='TOPK' #CHERNPVALUE ) key_field = 'topk' group_fields = [key_field, 'is_correlated', 'key'] agg_fields = ['count_chern', 'byte_chern'] chern_x, case_num, acc_dict = retieveData(infile_name2, group_fields, agg_fields, accuracys, errors, div=False, pair_num=pair_num, q_75ss=q_75ss, key_field=key_field, topo_type=topo_type, xss=xss) acc_boxplot_data.append(acc_dict.values()) agg_fields = ['count_chern', 'byte_chern'] chern_x, case_num, acc_dict = retieveData(infile_name4, group_fields, agg_fields, accuracys, errors, div=False, pair_num=pair_num, q_75ss=q_75ss, key_field=key_field, topo_type=topo_type, xss=xss) acc_boxplot_data.append(acc_dict.values()) agg_fields = ['count_chern', 'byte_chern'] chern_x, case_num, acc_dict = retieveData(infile_name6, group_fields, agg_fields, accuracys, errors, div=False, pair_num=pair_num, q_75ss=q_75ss, key_field=key_field, topo_type=topo_type, xss=xss) acc_boxplot_data.append(acc_dict.values()) acc_legend = [ 'dropping rate = 0.1', 'dropping rate = 0.15', 'dropping rate = 0.2' ] xlabel, ylabel = 'The number of links', 'Average accuracy' #'Relative elephant flow size' print 'accuracys, chern_x', len(accuracys[0]), len(chern_x) print accuracys, xss pl.plot(accuracys, x=xss, k=2, errors=q_75ss, xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0].replace( '.', '_'), xlog=False, ylog=False, acc_legend=acc_legend, legend_x=0.3, legend_y=0.2) print len(acc_boxplot_data) chern_x = [2 * x for x in chern_x] pl.box_plot( acc_boxplot_data, x=chern_x, k=2, errors=errors, xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0].replace( '.', '_'), xlog=False, ylog=False, acc_legend=[], legend_y=0.85, xticks=[str(i) for i in chern_x])
def accuracyPlot(topo_types, task_types, c, pair_seq=0): accuracys, errors, times, x = [], [], [], None cherns, chern_errors, chern_x = [], [], None acc_boxplot_data = [] q_75ss, xss = [], [] for topo_type in topo_types[0:3]: trace_type = trace_types[0] task_type = task_types[0] if topo_type == topo_types[0]: pair_num = 4 elif topo_type == topo_types[1]: pair_num = 8 elif topo_type == topo_types[2]: pair_num = 14 infile_name1, infile_name2 = init_files(trace_type, task_type, topo_type, x_var='XFLOWNUM') infile_name3, infile_name4 = init_files(trace_type, task_type, topo_type, x_var='TTESTFLOWNUM') infile_name5, infile_name6 = init_files(trace_type, task_type, topo_type, x_var='CHERNTTFLOWNUM') group_fields = ['#flows', 'is_correlated', 'key'] agg_fields = ['count_chern', 'byte_chern'] chern_x, case_num, acc_dict = retieveData(infile_name2, group_fields, agg_fields, accuracys, errors, times, div=False, pair_num=pair_num, q_75ss=q_75ss, xss=xss) chern_x, case_num, acc_dict = retieveData(infile_name4, group_fields, agg_fields, accuracys, errors, times, div=False, pair_num=pair_num, q_75ss=q_75ss, xss=xss) """chern_x, case_num, acc_dict = retieveData( infile_name6, group_fields, agg_fields, accuracys, errors, times, div=False, pair_num=pair_num, q_75ss=q_75ss )""" acc_boxplot_data.append(acc_dict.values()) acc_legend = [ 'B4-Chernoff', 'B4-t-test', 'Tree-Chernoff', 'Tree-t-test', 'Jupiter-Chernoff', 'Jupiter-t-test' ] xlabel, ylabel = 'The number of flows', 'Average accuracy' len_x = min([len(acc) for acc in accuracys]) #accuracys = [acc[:len_x] for acc in accuracys] #chern_x = chern_x[:len_x] plot_cut = 20 accuracys = [acc[:plot_cut] for acc in accuracys] q_75ss = [[q_75[0][:plot_cut], q_75[1][:plot_cut]] for q_75 in q_75ss] print len(chern_x), len(accuracys[0]), q_75ss[0] print xss pl.plot(accuracys, x=xss, k=2, errors=[], xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0].replace( '.', '_'), xlog=False, ylog=False, acc_legend=acc_legend, legend_y=0.3, legend_x=0.45) times = [time[:plot_cut] for time in times] pl.plot(times, x=xss, k=2, errors=[], xlabel=xlabel, ylabel='Execution time (seconds)', title=infile_name1.split('/outputs')[1].split('.csv')[0].replace( '.', '_') + '_time', xlog=False, ylog=False, acc_legend=acc_legend, legend_y=0.8, legend_x=0.0) pl.box_plot( [acc_boxplot_data[0]], x=chern_x, k=2, errors=errors, xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0].replace( '.', '_'), xlog=False, ylog=False, acc_legend=acc_legend, legend_y=0.85, xticks=[str(i) for i in chern_x])
def accuracyPlot(topo_types, task_types, trace_types, pair_seq=0): accuracys, errors, x = [], [], None cherns, chern_errors, chern_x = [], [], None acc_boxplot_data = [] q_75ss = [] for topo_type in topo_types[0:2]: trace_type = trace_types[0] task_type = task_types[0] if topo_type == topo_types[0]: pair_num = 4 elif topo_type == topo_types[1]: pair_num = 8 elif topo_type == topo_types[2]: pair_num = 14 infile_name1, infile_name2 = init_files(trace_type, task_type, topo_type, x_var='TTESTRTHRESHOLD') group_fields = ['r_threshold', 'is_correlated', 'key'] agg_fields = ['count_chern', 'byte_chern'] chern_x, case_num, acc_dict = retieveData(infile_name2, group_fields, agg_fields, accuracys, errors, div=False, pair_num=pair_num, q_75ss=q_75ss) acc_boxplot_data.append(acc_dict.values()) acc_legend = ['Database', 'Web', 'Hadoop'] xlabel, ylabel = '$P_b$', 'Average accuracy' print 'accuracys, chern_x', len(accuracys[0]), len(chern_x) pl.plot(accuracys, x=chern_x, k=2, errors=q_75ss, xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0].replace( '.', '_'), xlog=False, ylog=False, acc_legend=acc_legend, legend_x=0.2, legend_y=0.2) print acc_boxplot_data pl.box_plot( [acc_boxplot_data[0]], x=chern_x, k=2, errors=errors, xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0].replace( '.', '_'), xlog=False, ylog=False, acc_legend=acc_legend, legend_y=0.85, xticks=[str(i) for i in chern_x])