def accuracyPlot(topo_types, task_types, trace_types, pair_seq=0):

    accuracys, errors, x = [], [], None
    cherns, chern_errors, chern_x = [], [], None
    acc_boxplot_data = []

    for trace_type in trace_types:
        topo_type = topo_types[1]
        task_type = task_types[0]
        if topo_type == topo_types[0]:
            pair_num = 4
        elif topo_type == topo_types[1]:
            pair_num = 8
        elif topo_type == topo_types[2]:
            pair_num = 14

        infile_name1, infile_name2 = init_files(trace_type,
                                                task_type,
                                                topo_type,
                                                x_var='PVALUE')

        group_fields = ['#flows', 'is_correlated', 'key']
        agg_fields = ['count_chern', 'byte_chern']
        chern_x, case_num, acc_dict = retieveData(infile_name2,
                                                  group_fields,
                                                  agg_fields,
                                                  accuracys,
                                                  errors,
                                                  div=False,
                                                  pair_num=pair_num)
        acc_boxplot_data.append(acc_dict.values())

    acc_legend = ['Database', 'Web', 'Hadoop']
    xlabel, ylabel = 'The threshold for p-value', 'Average accuracy'
    print accuracys, chern_x
    pl.plot(accuracys,
            x=chern_x,
            k=2,
            errors=errors,
            xlabel=xlabel,
            ylabel=ylabel,
            title=infile_name1,
            xlog=False,
            ylog=False,
            acc_legend=acc_legend,
            legend_y=0.85)
    print acc_boxplot_data

    pl.box_plot([acc_boxplot_data[0]],
                x=chern_x,
                k=2,
                errors=errors,
                xlabel=xlabel,
                ylabel=ylabel,
                title=infile_name1,
                xlog=False,
                ylog=False,
                acc_legend=acc_legend,
                legend_y=0.85,
                xticks=[str(i) for i in chern_x])
Пример #2
0
def accuracyPlot(topo_types, task_types, trace_types, pair_seq=0):
    
    accuracys, errors, x = [], [], None
    cherns, chern_errors, chern_x = [], [], None
    acc_boxplot_data = []
    q_75ss, xss = [], []
    
    for topo_type in topo_types[0:3]:
        trace_type = trace_types[0]
        task_type = task_types[0]
        if topo_type == topo_types[0]:
            pair_num = 4
        elif topo_type == topo_types[1]:
            pair_num = 8
        elif topo_type == topo_types[2]:
            pair_num = 14
        
        infile_name1, infile_name2 = init_files(
                                    trace_type, task_type, topo_type,
                                    x_var = 'CHERNPVALUEBYTE' #CHERNPVALUE
        )
        
        infile_name3, infile_name4 = init_files(
                                    trace_type, task_type, topo_type,
                                    x_var = 'TTESTPVALUEBYTE'
        )
        key_field = 'epsilon'
        group_fields = [key_field,'is_correlated', 'key']
        agg_fields = ['count_chern', 'byte_chern']
        chern_x, case_num, acc_dict = retieveData(
                infile_name2, group_fields, agg_fields, 
                accuracys, errors, div=False, pair_num=pair_num, q_75ss=q_75ss,
                key_field=key_field, topo_type=topo_type, xss=xss
        )
        chern_x, case_num, acc_dict = retieveData(
                infile_name4, group_fields, agg_fields, 
                accuracys, errors, div=False, pair_num=pair_num, q_75ss=q_75ss,
                key_field=key_field, topo_type=topo_type, xss=xss
        )
        acc_boxplot_data.append(acc_dict.values())
    
    acc_legend = ['B4-Chernoff', 'B4-t-test','Tree-Chernoff', 'Tree-t-test', 'Jupiter-Chernoff', 'Jupiter-t-test']
    xlabel, ylabel = '$P_b$', 'Average accuracy' #'Relative elephant flow size'
    print 'accuracys, chern_x', len(accuracys[0]), len(chern_x)
    print accuracys, xss
    pl.plot(accuracys, x=xss, k=2, errors=[], 
        xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0].replace('.', '_'), 
        xlog=True, ylog=False, acc_legend=acc_legend, legend_x=0.0,
        legend_y=0.3
    )
    print acc_boxplot_data
    
    pl.box_plot([acc_boxplot_data[0]], x=chern_x, k=2, errors=errors, 
        xlabel=xlabel, ylabel=ylabel, title=infile_name1.split('/outputs')[1].split('.csv')[0].replace('.', '_'), 
        xlog=False, ylog=False, acc_legend=acc_legend,
        legend_y=0.85, xticks=[str(i) for i in chern_x])
def accuracyPlot(topo_types, task_types, trace_types, pair_seq=0):

    accuracys, errors, times, x = [], [], [], None
    cherns, chern_errors, time_chern, chern_x = [], [], [], None

    for trace_type in trace_types:
        topo_type = topo_types[0]
        task_type = task_types[1]

        infile_name1, infile_name2 = init_files(trace_type,
                                                task_type,
                                                topo_type,
                                                x_var='TTESTFLOWNUM',
                                                p=0.01)
        group_fields = ['#flows', 'is_correlated']
        agg_fields = ['#correlated_detect', '#non-uniform_detect']
        x, case_num = retieveData(infile_name1,
                                  group_fields,
                                  agg_fields,
                                  accuracys,
                                  errors,
                                  times,
                                  div=True)
        print x, case_num

        group_fields = ['#flows', 'is_correlated', 'key']
        agg_fields = ['count_chern', 'byte_chern', 'time']
        chern_x = retieveData(infile_name2,
                              group_fields,
                              agg_fields,
                              cherns,
                              chern_errors,
                              time_chern,
                              div=False)

    acc_legend = [
        'Database', 'Web', 'Hadoop', 'e1', 'e5', 'e10', 'e50', 'e100'
    ]
    xlabel, ylabel = 'The number of flows', 'Average accuracy'

    pl.plot(accuracys,
            x=x,
            k=2,
            errors=errors,
            xlabel=xlabel,
            ylabel=ylabel,
            title=infile_name1.split('.')[0],
            xlog=False,
            ylog=False,
            acc_legend=acc_legend[0:3])
    """xlabel, ylabel = 'The number of flows', 'Average p-value'
Пример #4
0
def rankingPlot(topo_types, task_types, trace_types, pair_seq=0):
    global ACAP

    accuracys, errors, x = [], [], None
    cherns, chern_errors, chern_x = [], [], None

    for trace_type in trace_types:
        topo_type = topo_types[1]
        task_type = task_types[1]
        cut = 5
        if topo_type == topo_types[2]:
            ACAP = 256
            cut = 10

        infile_name1, infile_name2 = init_files(trace_type, task_type,
                                                topo_type)
        group_fields = ['#flows', 'is_correlated']
        agg_fields = ['#correlated_detect', '#correlated_detect']
        x, case_num = retieveData(infile_name1,
                                  group_fields,
                                  agg_fields,
                                  accuracys,
                                  errors,
                                  div=True,
                                  task_type='RANKING')
        """print(len(x))
        group_fields = ['#flows','is_correlated', 'key']
        agg_fields = ['a'+str(i) for i in range(ACAP)]
        #print(agg_fields)
        chern_x = rankingPair(
            infile_name2, cherns, 6, pair_seq, 
            file_type=1, topo_type=topo_type, cut=cut
        )"""

    acc_legend = [
        'Database', 'Web', 'Hadoop', 'e1', 'e5', 'e10', 'e50', 'e100'
    ]
    xlabel, ylabel = 'The number of flows', 'Average accuracy'
    pl.plot(accuracys,
            x=x,
            k=2,
            errors=errors,
            xlabel=xlabel,
            ylabel=ylabel,
            title=infile_name1.split('.')[0],
            xlog=False,
            ylog=False,
            acc_legend=acc_legend[3:7])
    """xlabel, ylabel = 'Previous hop', 'Average p-value'
def rankingPlot(topo_types, task_types, trace_types, pair_seq=0):
    global ACAP

    accuracys, errors, x, q_75s, q_25s, times = [], [], None, [], [], []
    cherns, chern_errors, chern_x, time_chern = [], [], None, []
    xss = []
    xss1 = []
    for topo_type in topo_types[0:3]:
        trace_type = trace_types[0]
        task_type = task_types[1]
        cut = 5
        if topo_type == topo_types[2]:
            ACAP = 256
            cut = 10

        if topo_type == topo_types[0]:
            pair_num = 4
        elif topo_type == topo_types[1]:
            pair_num = 8
        elif topo_type == topo_types[2]:
            pair_num = 14

        infile_name1, infile_name2 = init_files(trace_type,
                                                task_type,
                                                topo_type,
                                                x_var='CHERNFLOWNUM',
                                                p=0.01)

        infile_name3, infile_name4 = init_files(trace_type,
                                                task_type,
                                                topo_type,
                                                x_var='TTESTFLOWNUM',
                                                p=0.01)
        key_field = '#flows'
        group_fields = ['#flows', 'is_correlated']
        agg_fields = ['#correlated_detect', '#correlated_detect']
        x, case_num = retieveData(infile_name1,
                                  group_fields,
                                  agg_fields,
                                  accuracys,
                                  errors,
                                  times,
                                  div=True,
                                  task_type='RANKING',
                                  q_75s=q_75s,
                                  q_25s=q_25s,
                                  xss=xss)
        x, case_num = retieveData(infile_name3,
                                  group_fields,
                                  agg_fields,
                                  accuracys,
                                  errors,
                                  time_chern,
                                  div=True,
                                  task_type='RANKING',
                                  q_75s=q_75s,
                                  q_25s=q_25s,
                                  xss=xss)
        getTime(infile_name2,
                times,
                group_fields=group_fields,
                key_field=key_field,
                file_type=1,
                topo_type=topo_type,
                case_num=pair_num,
                xss=xss1)
        getTime(infile_name4,
                times,
                group_fields=group_fields,
                key_field=key_field,
                file_type=1,
                topo_type=topo_type,
                case_num=pair_num,
                xss=xss1)

        print(len(x))
        group_fields = ['#flows', 'is_correlated', 'key']
        agg_fields = ['a' + str(i) for i in range(ACAP)]
        #print(agg_fields)
        """chern_x = rankingPair(
            infile_name2, cherns, 2, pair_seq, 
            file_type=1, topo_type=topo_type, cut=cut
        )"""

    acc_legend = [
        'B4-Chernoff', 'B4-t-test', 'Tree-Chernoff', 'Tree-t-test',
        'Jupiter-Chernoff', 'Jupiter-t-test', 'e1', 'e5', 'e10', 'e50', 'e100'
    ]
    xlabel, ylabel = 'The number of flows', 'Average accuracy'
    pl.plot(accuracys,
            x=xss,
            k=2,
            errors=[],
            xlabel=xlabel,
            ylabel=ylabel,
            title=infile_name1.split('/outputs')[1].split('.csv')[0].replace(
                '.', '_'),
            xlog=False,
            ylog=False,
            acc_legend=acc_legend[:len(accuracys)],
            legend_x=0.45)
    x = [20, 30, 40, 50, 60, 70]
    xss = [x] * len(times)
    print times, len(times[0])
    pl.plot(times,
            x=xss1,
            k=2,
            errors=[],
            xlabel=xlabel,
            ylabel='Execution time (seconds)',
            title=infile_name1.split('/outputs')[1].split('.csv')[0].replace(
                '.', '_') + '_time',
            xlog=False,
            ylog=False,
            acc_legend=acc_legend,
            legend_y=0.85,
            legend_x=0.0)

    xlabel, ylabel = 'Previous hop', 'Average p-value'
    print chern_x
    xticks = []
    if chern_x != None:
        xticks = chern_x
    """pl.plot(cherns, x=[], k=len(cherns), errors=[], 
def rankingPlot(topo_types, task_types, trace_types, pair_seq=0):
    global ACAP

    accuracys, errors, x, q_75s, q_25s = [], [], None, [], []
    cherns, chern_errors, chern_x = [], [], None

    for trace_type in trace_types:
        topo_type = topo_types[1]
        task_type = task_types[1]
        cut = 5
        if topo_type == topo_types[2]:
            ACAP = 256
            cut = 10

        infile_name1, infile_name2 = init_files(trace_type, task_type,
                                                topo_type)
        group_fields = ['#flows', 'is_correlated']
        agg_fields = ['#correlated_detect', '#correlated_detect']
        x, case_num = retieveData(infile_name1,
                                  group_fields,
                                  agg_fields,
                                  accuracys,
                                  errors,
                                  div=True,
                                  task_type='RANKING',
                                  q_75s=q_75s,
                                  q_25s=q_25s)
        print(len(x))
        group_fields = ['#flows', 'is_correlated', 'key']
        agg_fields = ['a' + str(i) for i in range(ACAP)]
        #print(agg_fields)
        chern_x = rankingPair(infile_name2,
                              cherns,
                              0,
                              pair_seq,
                              file_type=1,
                              topo_type=topo_type,
                              cut=cut)

    acc_legend = [
        'Database', 'Web', 'Hadoop', 'e1', 'e5', 'e10', 'e50', 'e100'
    ]
    xlabel, ylabel = 'The number of flows', 'Average accuracy'
    pl.plot(accuracys,
            x=x,
            k=2,
            errors=q_75s,
            xlabel=xlabel,
            ylabel=ylabel,
            title=infile_name1.split('.')[0],
            xlog=False,
            ylog=False,
            acc_legend=acc_legend[3:8])

    xlabel, ylabel = 'Previous hop', 'Average p-value'
    print chern_x
    xticks = []
    if chern_x != None:
        xticks = chern_x
    pl.plot(cherns,
            x=None,
            k=len(cherns),
            errors=[],
            xlabel=xlabel,
            ylabel=ylabel,
            title=infile_name1.split('.')[0] + 'pair-key' + str(pair_seq),
            xlog=False,
            ylog=True,
            acc_legend=acc_legend,
            xticks=chern_x,
            figure_width=4.3307 * 1.25)
Пример #7
0
def rankingPlot(topo_types, task_types, trace_types, pair_seq=0):
    global ACAP

    accuracys, errors, x, q_75s, q_25s = [], [], None, [], []
    cherns, chern_errors, chern_x = [], [], None

    for topo_type in topo_types[1:2]:
        trace_type = trace_types[0]
        task_type = task_types[1]
        cut = 4
        if topo_type == topo_types[2]:
            ACAP = 256
            cut = 10

        infile_name1, infile_name2 = init_files(
            trace_type,
            task_type,
            topo_type,
            x_var='CHERNRANKINGPVALUEFLOWNUM')

        infile_name3, infile_name4 = init_files(
            trace_type,
            task_type,
            topo_type,
            x_var='TTESTRANKINGPVALUEFLOWNUM')

        group_fields = ['#flows', 'is_correlated', 'key']
        agg_fields = ['a' + str(i) for i in range(ACAP)]

        chern_x = rankingPair(infile_name2,
                              cherns,
                              0, [0, 1],
                              file_type=1,
                              topo_type=topo_type,
                              cut=cut,
                              q_25s=q_25s,
                              q_75s=q_75s)

        chern_x = rankingPair(infile_name4,
                              cherns,
                              0, [0, 1],
                              file_type=1,
                              topo_type=topo_type,
                              cut=cut,
                              q_25s=q_25s,
                              q_75s=q_75s)

    xlabel, ylabel = 'Rank', 'Average p-value'

    xticks = []
    if chern_x != None:
        xticks = chern_x

    print chern_x, cherns,
    print 'q****', q_25s, q_75s
    pl.plot(cherns,
            x=[],
            k=2,
            errors=[],
            xlabel=xlabel,
            ylabel=ylabel,
            title=infile_name1.split('/outputs')[1].split('.csv')[0] +
            'pair-key' + str(pair_seq),
            xlog=False,
            ylog=True,
            acc_legend=[
                '0-hop-Chernoff', '1-hop-Chernoff', '0-hop-t-test',
                '1-hop-t-test'
            ],
            xticks=np.arange(1, cut + 1),
            figure_width=4.3307,
            figure_height=3.346,
            x_shift=0.06,
            y_shift=0.05,
            legend_x=0.3)  #[q_25s, q_75s]
Пример #8
0
def accuracyPlot(topo_types, task_types, trace_types, pair_seq=0):

    accuracys, errors, x = [], [], None
    cherns, chern_errors, chern_x = [], [], None
    acc_boxplot_data = []
    q_75ss, xss = [], []

    for topo_type in topo_types[2:3]:
        trace_type = 'ABC'
        task_type = task_types[2]
        if topo_type == topo_types[0]:
            pair_num = 4
        elif topo_type == topo_types[1]:
            pair_num = 8
        elif topo_type == topo_types[2]:
            pair_num = 16

        infile_name1, infile_name2 = init_files(
            trace_type,
            task_type,
            topo_type,
            x_var='TOPKBIASONEFIVE'  #CHERNPVALUE
        )
        infile_name3, infile_name4 = init_files(
            trace_type,
            task_type,
            topo_type,
            x_var='TOPKBIASSMALL'  #CHERNPVALUE
        )
        infile_name5, infile_name6 = init_files(
            trace_type,
            task_type,
            topo_type,
            x_var='TOPK'  #CHERNPVALUE
        )

        key_field = 'topk'
        group_fields = [key_field, 'is_correlated', 'key']
        agg_fields = ['count_chern', 'byte_chern']
        chern_x, case_num, acc_dict = retieveData(infile_name2,
                                                  group_fields,
                                                  agg_fields,
                                                  accuracys,
                                                  errors,
                                                  div=False,
                                                  pair_num=pair_num,
                                                  q_75ss=q_75ss,
                                                  key_field=key_field,
                                                  topo_type=topo_type,
                                                  xss=xss)
        acc_boxplot_data.append(acc_dict.values())
        agg_fields = ['count_chern', 'byte_chern']
        chern_x, case_num, acc_dict = retieveData(infile_name4,
                                                  group_fields,
                                                  agg_fields,
                                                  accuracys,
                                                  errors,
                                                  div=False,
                                                  pair_num=pair_num,
                                                  q_75ss=q_75ss,
                                                  key_field=key_field,
                                                  topo_type=topo_type,
                                                  xss=xss)
        acc_boxplot_data.append(acc_dict.values())
        agg_fields = ['count_chern', 'byte_chern']
        chern_x, case_num, acc_dict = retieveData(infile_name6,
                                                  group_fields,
                                                  agg_fields,
                                                  accuracys,
                                                  errors,
                                                  div=False,
                                                  pair_num=pair_num,
                                                  q_75ss=q_75ss,
                                                  key_field=key_field,
                                                  topo_type=topo_type,
                                                  xss=xss)
        acc_boxplot_data.append(acc_dict.values())

    acc_legend = [
        'dropping rate = 0.1', 'dropping rate = 0.15', 'dropping rate = 0.2'
    ]
    xlabel, ylabel = 'The number of links', 'Average accuracy'  #'Relative elephant flow size'
    print 'accuracys, chern_x', len(accuracys[0]), len(chern_x)
    print accuracys, xss
    pl.plot(accuracys,
            x=xss,
            k=2,
            errors=q_75ss,
            xlabel=xlabel,
            ylabel=ylabel,
            title=infile_name1.split('/outputs')[1].split('.csv')[0].replace(
                '.', '_'),
            xlog=False,
            ylog=False,
            acc_legend=acc_legend,
            legend_x=0.3,
            legend_y=0.2)
    print len(acc_boxplot_data)

    chern_x = [2 * x for x in chern_x]
    pl.box_plot(
        acc_boxplot_data,
        x=chern_x,
        k=2,
        errors=errors,
        xlabel=xlabel,
        ylabel=ylabel,
        title=infile_name1.split('/outputs')[1].split('.csv')[0].replace(
            '.', '_'),
        xlog=False,
        ylog=False,
        acc_legend=[],
        legend_y=0.85,
        xticks=[str(i) for i in chern_x])
def accuracyPlot(topo_types, task_types, c, pair_seq=0):

    accuracys, errors, times, x = [], [], [], None
    cherns, chern_errors, chern_x = [], [], None
    acc_boxplot_data = []
    q_75ss, xss = [], []

    for topo_type in topo_types[0:3]:
        trace_type = trace_types[0]
        task_type = task_types[0]
        if topo_type == topo_types[0]:
            pair_num = 4
        elif topo_type == topo_types[1]:
            pair_num = 8
        elif topo_type == topo_types[2]:
            pair_num = 14

        infile_name1, infile_name2 = init_files(trace_type,
                                                task_type,
                                                topo_type,
                                                x_var='XFLOWNUM')

        infile_name3, infile_name4 = init_files(trace_type,
                                                task_type,
                                                topo_type,
                                                x_var='TTESTFLOWNUM')

        infile_name5, infile_name6 = init_files(trace_type,
                                                task_type,
                                                topo_type,
                                                x_var='CHERNTTFLOWNUM')

        group_fields = ['#flows', 'is_correlated', 'key']
        agg_fields = ['count_chern', 'byte_chern']
        chern_x, case_num, acc_dict = retieveData(infile_name2,
                                                  group_fields,
                                                  agg_fields,
                                                  accuracys,
                                                  errors,
                                                  times,
                                                  div=False,
                                                  pair_num=pair_num,
                                                  q_75ss=q_75ss,
                                                  xss=xss)
        chern_x, case_num, acc_dict = retieveData(infile_name4,
                                                  group_fields,
                                                  agg_fields,
                                                  accuracys,
                                                  errors,
                                                  times,
                                                  div=False,
                                                  pair_num=pair_num,
                                                  q_75ss=q_75ss,
                                                  xss=xss)
        """chern_x, case_num, acc_dict = retieveData(
                infile_name6, group_fields, agg_fields, 
                accuracys, errors, times, div=False, pair_num=pair_num, q_75ss=q_75ss
        )"""
        acc_boxplot_data.append(acc_dict.values())

    acc_legend = [
        'B4-Chernoff', 'B4-t-test', 'Tree-Chernoff', 'Tree-t-test',
        'Jupiter-Chernoff', 'Jupiter-t-test'
    ]
    xlabel, ylabel = 'The number of flows', 'Average accuracy'
    len_x = min([len(acc) for acc in accuracys])
    #accuracys = [acc[:len_x] for acc in accuracys]
    #chern_x = chern_x[:len_x]
    plot_cut = 20
    accuracys = [acc[:plot_cut] for acc in accuracys]
    q_75ss = [[q_75[0][:plot_cut], q_75[1][:plot_cut]] for q_75 in q_75ss]
    print len(chern_x), len(accuracys[0]), q_75ss[0]
    print xss
    pl.plot(accuracys,
            x=xss,
            k=2,
            errors=[],
            xlabel=xlabel,
            ylabel=ylabel,
            title=infile_name1.split('/outputs')[1].split('.csv')[0].replace(
                '.', '_'),
            xlog=False,
            ylog=False,
            acc_legend=acc_legend,
            legend_y=0.3,
            legend_x=0.45)

    times = [time[:plot_cut] for time in times]
    pl.plot(times,
            x=xss,
            k=2,
            errors=[],
            xlabel=xlabel,
            ylabel='Execution time (seconds)',
            title=infile_name1.split('/outputs')[1].split('.csv')[0].replace(
                '.', '_') + '_time',
            xlog=False,
            ylog=False,
            acc_legend=acc_legend,
            legend_y=0.8,
            legend_x=0.0)

    pl.box_plot(
        [acc_boxplot_data[0]],
        x=chern_x,
        k=2,
        errors=errors,
        xlabel=xlabel,
        ylabel=ylabel,
        title=infile_name1.split('/outputs')[1].split('.csv')[0].replace(
            '.', '_'),
        xlog=False,
        ylog=False,
        acc_legend=acc_legend,
        legend_y=0.85,
        xticks=[str(i) for i in chern_x])
def accuracyPlot(topo_types, task_types, trace_types, pair_seq=0):

    accuracys, errors, x = [], [], None
    cherns, chern_errors, chern_x = [], [], None
    acc_boxplot_data = []
    q_75ss = []

    for topo_type in topo_types[0:2]:
        trace_type = trace_types[0]
        task_type = task_types[0]
        if topo_type == topo_types[0]:
            pair_num = 4
        elif topo_type == topo_types[1]:
            pair_num = 8
        elif topo_type == topo_types[2]:
            pair_num = 14

        infile_name1, infile_name2 = init_files(trace_type,
                                                task_type,
                                                topo_type,
                                                x_var='TTESTRTHRESHOLD')

        group_fields = ['r_threshold', 'is_correlated', 'key']
        agg_fields = ['count_chern', 'byte_chern']
        chern_x, case_num, acc_dict = retieveData(infile_name2,
                                                  group_fields,
                                                  agg_fields,
                                                  accuracys,
                                                  errors,
                                                  div=False,
                                                  pair_num=pair_num,
                                                  q_75ss=q_75ss)
        acc_boxplot_data.append(acc_dict.values())

    acc_legend = ['Database', 'Web', 'Hadoop']
    xlabel, ylabel = '$P_b$', 'Average accuracy'
    print 'accuracys, chern_x', len(accuracys[0]), len(chern_x)
    pl.plot(accuracys,
            x=chern_x,
            k=2,
            errors=q_75ss,
            xlabel=xlabel,
            ylabel=ylabel,
            title=infile_name1.split('/outputs')[1].split('.csv')[0].replace(
                '.', '_'),
            xlog=False,
            ylog=False,
            acc_legend=acc_legend,
            legend_x=0.2,
            legend_y=0.2)
    print acc_boxplot_data

    pl.box_plot(
        [acc_boxplot_data[0]],
        x=chern_x,
        k=2,
        errors=errors,
        xlabel=xlabel,
        ylabel=ylabel,
        title=infile_name1.split('/outputs')[1].split('.csv')[0].replace(
            '.', '_'),
        xlog=False,
        ylog=False,
        acc_legend=acc_legend,
        legend_y=0.85,
        xticks=[str(i) for i in chern_x])