def precision(res_path,classifiers,secondary_identifier): """ Calculate the precision of the result. Given a result path folder and classifiers to look at. :param res_path: :param classifiers: :param secondary_identifier: :return: """ precision=coll.Counter() total_counts=coll.Counter() right_res_iter=RightResultsIter(result_path=res_path,classifier=classifiers, secondary_identifier=secondary_identifier) for res_obj in right_res_iter: precision.update(res_obj.actual) total_counts.update(res_obj.actual) wrong_res_iter=WrongResultsIter(result_path=res_path,classifier=classifiers, secondary_identifier=secondary_identifier) for res_obj in wrong_res_iter: total_counts.update([wrong_res_iter.pred_transformer(res_obj.predicted)]) precision=sorted([Precision(g[0],g[1],total_counts[g[0]],g[1]/total_counts[g[0]]) for g in precision.items()],key=op.itemgetter(3)) print(precision) return precision
def multi_class_misprediction_freq(res_folder): """ Look at multi class instances that are frequently mispredicted :param res_folder: :return: """ print("Loading Iter") wrong_res_iter=WrongResultsIter.load_iter_from_file(res_folder) right_res_iter=RightResultsIter.load_iter_from_file(res_folder) genre_to_wrong_genre_count=coll.Counter() right_count=0 for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)): if c%500==0: print(c) actual=set(res_obj.actual) #check multiple genres if len(actual)>1: if not set(actual) <= set(res_obj.predicted[:len(actual)]): genre_to_wrong_genre_count.update([(tuple(actual),tuple(res_obj.predicted))]) right_count+=1 #sort the whole thing sorted_genre_to_wrong=sorted(genre_to_wrong_genre_count.items(),key=op.itemgetter(1),reverse=True) print(sorted_genre_to_wrong) print(right_count)
def frequently_predicted_class(res_path,top_x=2): """ Top x frequently predicted together class. The tuple of genre and genre is sorted so there is no repeats. :param res_path: :param top_x: :return: """ print("Loading Iter") wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path) right_res_iter=RightResultsIter.load_iter_from_file(res_path) predicted_counter=coll.Counter() actual_counter=coll.Counter() predicted_counter.update((tuple(sorted(p)) for p in ( res_obj.predicted[:top_x] for res_obj in it.chain(wrong_res_iter,right_res_iter) if len(res_obj.actual)>1) )) actual_counter.update((tuple(sorted(p)) for p in ( tuple(set(res_obj.actual)) for res_obj in it.chain(wrong_res_iter,right_res_iter) if len(set(res_obj.actual))>1) )) print("Predicted") print(predicted_counter) print("Actual") print(actual_counter)
def single_class_mispredition_freq(res_path): """ Get the frequency of misprediction between single genre instances and the predicted genre :param res_path: :return: """ print("Loading Iter") wrong_res_iter=WrongResultsIter.load_iter_from_file(res_path) right_res_iter=RightResultsIter.load_iter_from_file(res_path) genre_to_wrong_genre_count=coll.Counter() for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)): if c%500==0: print(c) actual=res_obj.actual #single genre if len(actual)==1 and actual[0] != res_obj.predicted[0]: genre_to_wrong_genre_count.update([(actual[0],res_obj.predicted[0])]) #plot plt=plot_word_frequency("Single Genre Mispredition",genre_to_wrong_genre_count) plt.tight_layout() save_fig("C:\\\\Users\\\\Kevin\\\\Desktop\\\\GitHub\\\\Research\\\\Webscraper\\\\classification_res\\\\genre_analysis\\\\single_miss.pdf", plt)
def top_level_cdf(res_folder): print("Loading Iter") wrong_res_iter=WrongResultsIter.load_iter_from_file(res_folder) right_res_iter=RightResultsIter.load_iter_from_file(res_folder) dist_count={} for c,res_obj in enumerate(it.chain(wrong_res_iter,right_res_iter)): dist_count[len(set(res_obj.actual))]=dist_count.get(len(set(res_obj.actual)),0)+1 print(dist_count)