def calculate_adjusted_miss_rate(res_folder): """ Given the path to folder containing classifier results files that have the form of ClassifierName_wrong.txt or *_right.txt. We adjust for those wrong examples that have 1+ class and :param path_of_classifier_res: :return: """ """ Script that reads how many wrong we have """ #counter the number of instances classified as wrong, but is actually predicted #into one of its many genres swing_counter=collections.Counter() #get all result files that ends with _right or _wrong for a_result in filter(lambda x: os.path.isfile(os.path.join(res_folder,x)),os.listdir(res_folder)): assert isinstance(a_result,str) abs_result=os.path.join(res_folder,a_result) print(a_result) right=0 wrong=0 if a_result.find("right") > -1: #count the rights with open(abs_result) as file: right+=sum((1 for i in file if i.strip() != "")) elif a_result.find("wrong")>-1: wrong_res_objs=get_classification_res(abs_result) #grab all the genres and see if it exists for c,res_obj in enumerate(wrong_res_objs): found=False #grab all short genres and see if it matches url_bow_obj=URLBow.objects(index=res_obj.ref_id).only("short_genres")[0] found=res_obj.predicted in (normalize_genre_string(g,1) for g in url_bow_obj.short_genres) or found if found: swing_counter.update([res_obj.ref_id]) right+=1 else: wrong+=1 print("Total right: {}, total wrong: {}".format(right,wrong)) print("Swing counter {}".format(str(swing_counter))) print("Swing counter size : {}".format(len(swing_counter)))
def is_swing_sample(self,top_x_predicted=1): """ Test if the ClassificationResultInstance object is a swing instance, its predicted class is within one of its multiple classes. So, right predictions are automatically also swing instances. But, wrong predicted samples may be a swing instance :param: top_x_predicted: check if the top x predictions are in the class's genres. If they all are, :return: True or False if the sample is swing instance """ #grab all short genres and see if it matches url_bow_obj=URLBow.objects(index=self.ref_id).only("short_genres")[0] return all(pred_g in (normalize_genre_string(g,self.genre_lv) for g in url_bow_obj.short_genres) for pred_g in self.predicted[:top_x_predicted])