def evaluate(self): """Run fixcache, then calculate TP/TN/FP/FN.""" self.run_fixcache() print self.hit_count print self.miss_count cache_set = self.cache.file_set output = [] counter = 1 for commit in self.horizon_commit_list: if len(commit.parents) == 1: files = self.file_set.get_existing_multiple(commit.stats.files) if parsing.is_fix_commit(commit.message): # add files to horizon_faulty.""" map(lambda x: self.horizon_faulty_file_set.add(x), files) normal_set = self.horizon_normal_file_set \ - self.horizon_faulty_file_set faulty_set = self.horizon_faulty_file_set true_positive = len(cache_set & faulty_set) false_positive = len(cache_set & normal_set) true_negative = len(normal_set - cache_set) false_negative = len(faulty_set - cache_set) file_count = len(normal_set | faulty_set) out = (counter, true_positive, false_positive, true_negative, false_negative, file_count, commit.hexsha) output.append(out) else: # add files to horizon normal map(lambda x: self.horizon_normal_file_set.add(x), files) counter += 1 return output
def run_fixcache(self): commit_num = float(len(self.commit_order)) """Run fixcache for RandomRepository.""" for commit in self.commit_list: percentage = 100 * self.commit_order[commit.hexsha] / commit_num logger.debug('[%s%]Currently at %s' % (percentage, commit)) parents = commit.parents if len(parents) == 1: # return the list of tuples by file info f_info = self.file_set.get_and_update_multiple( git_stat=commit.stats.files, commit_num=self.commit_order[commit.hexsha]) files = [ x[1] for x in filter( lambda x: x[0] == 'changed' or x[0] == 'created', f_info) ] deleted_files = [ x[1] for x in filter(lambda x: x[0] == 'deleted', f_info) ] self.file_set.remove_files(deleted_files) if parsing.is_fix_commit(commit.message): random_file_set = self.file_set.get_random(self.cache_size) for file_ in files: if file_.path in random_file_set: self.hit_count += 1 else: self.miss_count += 1 elif len(parents) == 0: # initial commit files = self._get_commit_tree_files(commit) files_to_add = [] for path in files: line_count = self._get_line_count(path, commit) created, file_ = self.file_set.get_or_create_file( file_path=path, line_count=line_count) files_to_add.append(file_) else: pass
def run_fixcache(self): """Run fixcache with the given variables.""" commit_num = float(len(self.commit_order)) for commit in self.commit_list: percentage = 100 * self.commit_order[commit.hexsha] / commit_num logger.debug('[%s]Currently at %s' % (int(percentage), commit)) parents = commit.parents if len(parents) == 1: # return the list of tuples by file info f_info = self.file_set.get_and_update_multiple( git_stat=commit.stats.files, commit_num=self.commit_order[commit.hexsha]) changed_files = [ x[1] for x in filter(lambda x: x[0] == 'changed', f_info) ] deleted_files = [ x[1] for x in filter(lambda x: x[0] == 'deleted', f_info) ] created_files = [ x[1] for x in filter(lambda x: x[0] == 'created', f_info) ] self._cleanup_files(deleted_files) self._update_distance_set( created_files + changed_files, commit) if parsing.is_fix_commit(commit.message): for file_ in changed_files: file_.fault(self.commit_order[commit.hexsha]) if self.cache.file_in(file_): self.hit_count += 1 else: deleted_line_dict = self._get_diff_deleted_lines( commit, parents[0]) # print deleted_line_dict del_lines = deleted_line_dict[file_.path] self.miss_count += 1 self.cache.add(file_) line_intr_c = self._get_line_introducing_commits( del_lines, file_.path, commit.parents[0]) closest_file_set = [] for c in line_intr_c: # get closest files is nlogk, so optimal cf = self.file_distances.get_closest_files( file_, self.distance_to_fetch, self.commit_order[c.hexsha]) closest_file_set += cf closest_file_set = list(set(closest_file_set)) # there is no need for pre sorting, as already # fetchiing closest files self.cache.add_multiple( closest_file_set) new_entity_pre_fetch = self._get_per_rev_pre_fetch( created_files, commit) changed_entity_pre_fetch = self._get_per_rev_pre_fetch( changed_files, commit) self.cache.add_multiple(new_entity_pre_fetch) self.cache.add_multiple(changed_entity_pre_fetch) elif len(parents) == 0: # initial commit files = self._get_commit_tree_files(commit) files_to_add = [] for path in files: line_count = self._get_line_count(path, commit) created, file_ = self.file_set.get_or_create_file( file_path=path, line_count=line_count) if not created: file_.line_count = line_count files_to_add.append(file_) self.cache.add_multiple(files_to_add)