def main():
    score_mat = get_score_mat()
    pd_helper.print_df(score_mat)
    
    x_axis = list(score_mat.index)
    average_values = np.zeros(len(x_axis))
    for k in score_mat.columns:
        y_axis = score_mat[k].values
        average_values += y_axis
        plt.plot(x_axis, y_axis, '-')
    average_values /= len(score_mat.columns)

    plt.title("Distance cutoff v performance")
    plt.xlabel("Distance cutoff")
    plt.ylabel("ratio of program not looked at")
    plt.ylim((0.0,1.0))
    plt.show()

    # Plot the average performance
    print("Avg values {0}".format(average_values))
    plt.plot(x_axis, average_values, '-')
    plt.title("Distance cutoff v avg performance")
    plt.xlabel("Distance cutoff")
    plt.ylabel("ratio of program not looked at")
    plt.ylim((0.0,1.0))
    plt.show()

    # Plot median performance
    median = score_mat.median(axis=1)
    plt.plot(x_axis, median, '-')
    plt.title("Distance cutoff v median performance")
    plt.xlabel("Distance cutoff")
    plt.ylabel("ratio of program not looked at")
    plt.ylim((0.0,1.0))
    plt.show()
Exemplo n.º 2
0
def compare_filter(project_name, ranker_type, filter_type, provider_type):
    print("Computing with filter")
    filter_scores = get_total_scores(project_name, ranker_type, filter_type,
                                     provider_type)
    print("Computing without filter")
    nofilter_scores = get_total_scores(project_name, "tarantula", "none",
                                       provider_type)

    results = pd.DataFrame({'nofilter': nofilter_scores,
                            'filter': filter_scores,
                            'diff': filter_scores - nofilter_scores})
    results.sort_values('diff', inplace=True)

    pd_helper.print_df(results)

    filter_bins = get_bin_to_count(filter_scores)
    nofilter_bins = get_bin_to_count(nofilter_scores)
    bins = pd.DataFrame({'filter': filter_bins,
                         'nofilter': nofilter_bins,
                         'diff': filter_bins - nofilter_bins})
    pd_helper.print_df(bins)
Exemplo n.º 3
0
def print_total_scores(project_name, ranker_type, filter_type, provider_type,
                       versions=None):
    version_to_score = get_total_scores(project_name, ranker_type, filter_type,
                                        provider_type, versions=versions)
    print("Average score is {0}".format(version_to_score.mean()))
    pd_helper.print_df(version_to_score)