def metric_pk(forest1, forest2): masses1 = [get_untyped_masses(tree) for tree in forest1] masses2 = [get_untyped_masses(tree) for tree in forest2] segments1 = list(chain.from_iterable(masses1)) segments2 = list(chain.from_iterable(masses2)) score = segeval.pk(segments1, segments2) * 100 return score
def evaluateSegments(reference, hypothesis): ref, hyp = __initialization(reference, hypothesis) score=np.array([__getscores(reference,hypothesis)[2],\ float(segeval.pk(ref, hyp)),\ float(segeval.window_diff(ref, hyp)),\ float(segeval.boundary_similarity(ref, hyp)),\ float(segeval.segmentation_similarity(ref, hyp))]) # Return pk, windiff, boundary_sim, segmentation_sim and F_1 score. return score
def test_pk(self): ''' Test pk. ''' mean, std, var, stderr, count = \ summarize(pk(KAZANTSEVA2012_G5)) self.assertAlmostEquals(Decimal('0.35530058'), mean) self.assertAlmostEquals(Decimal('0.11001760'), std) self.assertAlmostEquals(Decimal('0.01210387'), var) self.assertAlmostEquals(Decimal('0.01587967'), stderr) self.assertEquals(48, count)
def pk(self, h, gold, window_size=-1): """ :param gold: gold segmentation (item in the list contains the number of words in segment) :param h: hypothesis segmentation (each item in the list contains the number of words in segment) :param window_size: optional :return: accuracy """ if window_size != -1: false_seg_count, total_count = seg.pk(h, gold, window_size=window_size, return_parts=True) else: false_seg_count, total_count = seg.pk(h, gold, return_parts=True) if total_count == 0: # TODO: Check when happens false_prob = -1 else: false_prob = float(false_seg_count) / float(total_count) return false_prob, total_count
def eval_tile_text(self, sample): ''' Returns a tuple of metric scores (Pk, WinDiff, B). ''' ### Record paragraph break points sent_bounds, normed_text = self.get_sb_nt(sample) ### Break up text into Pseudosentences # this list maps pseudosentence index to beginning token index ps_bounds = list(range(0, len(normed_text), self.w)) pseudosents = [normed_text[i:i + self.w] for i in ps_bounds] # discard pseudosents of length < self.w if len(pseudosents[-1]) < self.w: del pseudosents[-1] ### Group into blocks and calculate sim scores # List[Tuple(sim score, pseudosent index)] # here, the index is of the first PS in block_b sims = self.calculate_sims(pseudosents) ### Find boundaries (valleys) pred = [] for j in range(0, len(sims)): if j != 0 and j != len(sims) - 1: if sims[j] < sims[j - 1] and sims[j] < sims[j + 1]: pred.append(j) j += 1 pred = [j + self.k for j in pred] ### Evalute # map pseudosentence indices to beginning token index pred_btokis = [ps_bounds[i] for i in pred] # map beginning token index to closest sentence index # (this token is closest to the beginning of which sentence?) pred_sentis = [ self.btoki_to_senti(t, sent_bounds) for t in pred_btokis ] # add last boundary (which we know is always there) pred_sentis += [len(sent_bounds)] gold_sentis = sample.get_sent_bound_idxs() pred = self.array_derivative(pred_sentis) gold = self.array_derivative(gold_sentis) pk = float(segeval.pk(pred, gold)) wd = float(segeval.window_diff(pred, gold)) bs = float(segeval.boundary_similarity(pred, gold, one_minus=True)) return (pk, wd, bs)
[anno_pred, anno_seg, anno_end] = getTextTilingBoundaries(os.path.join(text_dir, f)) anno_idx2range = convertFromIndex2Range(anno_idx, anno_end) print("-----") print(anno_end) print(anno_idx2range) print(anno_seg) print("----") print(anno_pred) print(anno_idx) anno_pred = set(anno_pred) anno_idx = set(anno_idx) union = len(anno_pred.union(anno_idx)) correct = len(anno_pred.intersection(anno_idx)) precision = 1.0 * correct / union recall = 1.0 * correct / len(anno_idx) avg_prec += precision avg_recall += recall print("%s %f %f" % (f, precision, recall)) wd = segeval.window_diff(anno_seg, anno_idx2range) pk = segeval.pk(anno_seg, anno_idx2range) avg_wd += wd avg_pk += pk print("WD: %f P-k: %f" % (wd, pk)) print("Average: %f %f WD: %f Pk: %f (%d)" % (avg_prec / (sel_files), avg_recall / (sel_files), avg_wd / decimal.Decimal(sel_files), avg_pk / decimal.Decimal(sel_files), (sel_files)))
from nltk.metrics.segmentation import pk, windowdiff import segeval as se import horae as ho import codecs if __name__ == '__main__': test = sys.argv[1] classifier = sys.argv[2] type_ = sys.argv[3] level = sys.argv[4] path_pred = "../data/test/seg/" + test + "_" + level + ".pred_" +\ classifier path_ref = "../data/test/choiformat/" + type_ + "/" + test + "_" +\ level + ".ref" ref, nbref1, refs = ho.load_text(path_ref) pred, nbpred1, preds = ho.load_text(path_pred) d = {"stargazer": {"1": refs, "2": preds}} seg1 = d['stargazer']['1'] seg2 = d['stargazer']['2'] segs1 = se.convert_positions_to_masses(seg1) segs2 = se.convert_positions_to_masses(seg2) print("pk\tWindowdiff: \n") print(str(round(se.pk(segs2, segs1), 4)) + "\t" + str(round(se.window_diff(segs2, segs1), 4)))
# Compute Map ap_vector = [ average_precision_score(rstr_best_real_group_vec == group_id, rstr_algo_group_vec == group_id) for group_id in range(1, max(rstr_real_group_vec) + 1) ] map = np.mean(ap_vector) # Segmentation evaluation real_segm_vec = convert_positions_to_masses(rstr_real_group_vec) algo_segm_vec = convert_positions_to_masses(rstr_algo_group_vec) rdm_group_vec = rstr_real_group_vec.copy() rdm.shuffle(rdm_group_vec) rdm_segm_vec = convert_positions_to_masses(rdm_group_vec) pk_res = pk(algo_segm_vec, real_segm_vec) win_diff = window_diff(algo_segm_vec, real_segm_vec) pk_rdm = pk(rdm_segm_vec, real_segm_vec) win_diff_rdm = window_diff(rdm_segm_vec, real_segm_vec) # Compute the aggregate labels df_results = pd.DataFrame(result_matrix) df_results["Token"] = token_list type_results = df_results.groupby("Token").mean() type_list = list(type_results.index) type_values = type_results.to_numpy() # ------------------------------------- # --- Writing # -------------------------------------
def evaluate(self, batch, preds, sent=True, word=True): """ For a given batch and its corresponding preds, get metrics batch: Batch instance preds: list Usage: >> from loader import * >> from modules import * >> >> model = TextSeg(lstm_dim=200, score_dim=200, bidir=True, num_layers=2) >> trainer = Trainer(model=model, train_dir='../data/wiki_727/train', val_dir='../data/wiki_50/test', batch_size=10, lr=1e-3) >> evalu = Metrics() >> >> batch = sample_and_batch(trainer.train_dir, trainer.batch_size, TRAIN=True) >> preds = trainer.predict_batch(batch) >> evalu(batch, preds) """ metric_dict = {} assert (sent or word), 'Missing: choose sent- and / or word-level evaluation.' # Word level if word: w_true, w_pred = self._word(batch, preds) metric_dict['w_pk'] = seg.pk(w_pred, w_true) metric_dict['w_wd'] = seg.window_diff(w_pred, w_true, lamprier_et_al_2007_fix=True) metric_dict['w_ss'] = seg.segmentation_similarity(w_pred, w_true) metric_dict['w_bs'] = seg.boundary_similarity(w_pred, w_true) w_confusion = seg.boundary_confusion_matrix(w_pred, w_true) metric_dict['w_precision'] = seg.precision(w_confusion) metric_dict['w_recall'] = seg.recall(w_confusion) metric_dict['w_f1'] = seg.fmeasure(w_confusion) # Sentence level if sent: s_true, s_pred = self._sent(batch, preds) metric_dict['s_pk'] = seg.pk(s_pred, s_true) metric_dict['s_wd'] = seg.window_diff(s_pred, s_true, lamprier_et_al_2007_fix=True) metric_dict['s_ss'] = seg.segmentation_similarity(s_pred, s_true) metric_dict['s_bs'] = seg.boundary_similarity(s_pred, s_true) s_confusion = seg.boundary_confusion_matrix(s_pred, s_true) metric_dict['s_precision'] = seg.precision(s_confusion) metric_dict['s_recall'] = seg.recall(s_confusion) metric_dict['s_f1'] = seg.fmeasure(s_confusion) return metric_dict
output_vector = [] gold_set_files = [] for directory in parsed_arguments.directories: gold_set_files.extend(match(directory, parsed_arguments.gold_sets)) for file in gold_set_files: segment_sizes = read_csv(file).segment_size output_vector.extend(segment_sizes) return output_vector def retrieve_result_set_vector(parsed_arguments): output_vector = [] result_set_files = [] for directory in parsed_arguments.directories: result_set_files.extend(match(directory, parsed_arguments.results)) for file in result_set_files: segment_sizes = read_csv(file).segment_size output_vector.extend(segment_sizes) return output_vector if __name__ == "__main__": parsed_arguments = setup_argument_parser() gold_set_values = retrieve_gold_set_vector(parsed_arguments) result_set_values = retrieve_result_set_vector(parsed_arguments) print "Printing Comparison Statistics:" print "P_k value: {}".format(pk(gold_set_values, result_set_values)) print "WindowDiff value: {}".format( window_diff(gold_set_values, result_set_values))
def get_Pk_score(reference, hypothesis): ref, hyp = __initialization(reference, hypothesis) # Evaluate algorithm using pk metric return segeval.pk(ref, hyp)
return output_files def retrieve_gold_set_vector(parsed_arguments): output_vector = [] gold_set_files = [] for directory in parsed_arguments.directories: gold_set_files.extend(match(directory, parsed_arguments.gold_sets)) for file in gold_set_files: segment_sizes = read_csv(file).segment_size output_vector.extend(segment_sizes) return output_vector def retrieve_result_set_vector(parsed_arguments): output_vector = [] result_set_files = [] for directory in parsed_arguments.directories: result_set_files.extend(match(directory, parsed_arguments.results)) for file in result_set_files: segment_sizes = read_csv(file).segment_size output_vector.extend(segment_sizes) return output_vector if __name__ == "__main__": parsed_arguments = setup_argument_parser() gold_set_values = retrieve_gold_set_vector(parsed_arguments) result_set_values = retrieve_result_set_vector(parsed_arguments) print "Printing Comparison Statistics:" print "P_k value: {}".format(pk(gold_set_values, result_set_values)) print "WindowDiff value: {}".format(window_diff(gold_set_values, result_set_values))