示例#1
0
def precision(res_path,classifiers,secondary_identifier):
    """
    Calculate the precision of the result. Given a result path folder and classifiers to look at.

    :param res_path:
    :param classifiers:
    :param secondary_identifier:
    :return:
    """
    precision=coll.Counter()
    total_counts=coll.Counter()

    right_res_iter=RightResultsIter(result_path=res_path,classifier=classifiers, secondary_identifier=secondary_identifier)
    for res_obj in right_res_iter:
        precision.update(res_obj.actual)
        total_counts.update(res_obj.actual)

    wrong_res_iter=WrongResultsIter(result_path=res_path,classifier=classifiers, secondary_identifier=secondary_identifier)
    for res_obj in wrong_res_iter:
        total_counts.update([wrong_res_iter.pred_transformer(res_obj.predicted)])

    precision=sorted([Precision(g[0],g[1],total_counts[g[0]],g[1]/total_counts[g[0]]) for g in precision.items()],key=op.itemgetter(3))
    print(precision)

    return precision
示例#2
0
def multi_class_misprediction_freq(res_folder):
    """
    Look at multi class instances that are frequently mispredicted

    :param res_folder:
    :return:
    """


    print("Loading Iter")

    wrong_res_iter=WrongResultsIter.load_iter_from_file(res_folder)
    right_res_iter=RightResultsIter.load_iter_from_file(res_folder)

    genre_to_wrong_genre_count=coll.Counter()
    right_count=0
    for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)):
        if c%500==0:
            print(c)

        actual=set(res_obj.actual)

        #check multiple genres
        if len(actual)>1:
            if not set(actual) <= set(res_obj.predicted[:len(actual)]):
                genre_to_wrong_genre_count.update([(tuple(actual),tuple(res_obj.predicted))])

                right_count+=1

    #sort the whole thing
    sorted_genre_to_wrong=sorted(genre_to_wrong_genre_count.items(),key=op.itemgetter(1),reverse=True)

    print(sorted_genre_to_wrong)
    print(right_count)
示例#3
0
def frequently_predicted_class(res_path,top_x=2):
    """
    Top x frequently predicted together class. The tuple of genre and genre is sorted so there is no repeats.

    :param res_path:
    :param top_x:
    :return:
    """

    print("Loading Iter")

    wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path)
    right_res_iter=RightResultsIter.load_iter_from_file(res_path)

    predicted_counter=coll.Counter()
    actual_counter=coll.Counter()


    predicted_counter.update((tuple(sorted(p)) for p in (
        res_obj.predicted[:top_x] for res_obj in it.chain(wrong_res_iter,right_res_iter) if len(res_obj.actual)>1)

                              ))
    actual_counter.update((tuple(sorted(p)) for p in (
        tuple(set(res_obj.actual)) for res_obj in it.chain(wrong_res_iter,right_res_iter) if len(set(res_obj.actual))>1)

                           ))

    print("Predicted")
    print(predicted_counter)
    print("Actual")
    print(actual_counter)
示例#4
0
def single_class_mispredition_freq(res_path):
    """
    Get the frequency of misprediction between single genre instances and the predicted genre

    :param res_path:
    :return:
    """

    print("Loading Iter")

    wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path)
    right_res_iter=RightResultsIter.load_iter_from_file(res_path)

    genre_to_wrong_genre_count=coll.Counter()
    for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)):
        if c%500==0:
            print(c)

        actual=res_obj.actual

        #single genre
        if len(actual)==1 and actual[0] != res_obj.predicted[0]:
            genre_to_wrong_genre_count.update([(actual[0],res_obj.predicted[0])])

    #plot
    plt=plot_word_frequency("Single Genre Mispredition",genre_to_wrong_genre_count)
    plt.tight_layout()
    save_fig("C:\\\\Users\\\\Kevin\\\\Desktop\\\\GitHub\\\\Research\\\\Webscraper\\\\classification_res\\\\genre_analysis\\\\single_miss.pdf",
             plt)
示例#5
0
def top_level_cdf(res_folder):
    print("Loading Iter")

    wrong_res_iter=WrongResultsIter.load_iter_from_file(res_folder)
    right_res_iter=RightResultsIter.load_iter_from_file(res_folder)

    dist_count={}
    for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)):
        dist_count[len(set(res_obj.actual))]=dist_count.get(len(set(res_obj.actual)),0)+1

    print(dist_count)