示例#1
0
def metric_pk(forest1, forest2):
    masses1 = [get_untyped_masses(tree) for tree in forest1]
    masses2 = [get_untyped_masses(tree) for tree in forest2]
    segments1 = list(chain.from_iterable(masses1))
    segments2 = list(chain.from_iterable(masses2))
    score = segeval.pk(segments1, segments2) * 100
    return score
示例#2
0
def evaluateSegments(reference, hypothesis):
    ref, hyp = __initialization(reference, hypothesis)
    score=np.array([__getscores(reference,hypothesis)[2],\
           float(segeval.pk(ref, hyp)),\
           float(segeval.window_diff(ref, hyp)),\
           float(segeval.boundary_similarity(ref, hyp)),\
           float(segeval.segmentation_similarity(ref, hyp))])
    # Return pk, windiff, boundary_sim, segmentation_sim and F_1 score.
    return score
示例#3
0
    def test_pk(self):
        '''
        Test pk.
        '''

        mean, std, var, stderr, count = \
            summarize(pk(KAZANTSEVA2012_G5))
        self.assertAlmostEquals(Decimal('0.35530058'), mean)
        self.assertAlmostEquals(Decimal('0.11001760'), std)
        self.assertAlmostEquals(Decimal('0.01210387'), var)
        self.assertAlmostEquals(Decimal('0.01587967'), stderr)
        self.assertEquals(48, count)
示例#4
0
    def pk(self, h, gold, window_size=-1):
        """
        :param gold: gold segmentation (item in the list contains the number of words in segment) 
        :param h: hypothesis segmentation  (each item in the list contains the number of words in segment)
        :param window_size: optional 
        :return: accuracy
        """
        if window_size != -1:
            false_seg_count, total_count = seg.pk(h,
                                                  gold,
                                                  window_size=window_size,
                                                  return_parts=True)
        else:
            false_seg_count, total_count = seg.pk(h, gold, return_parts=True)

        if total_count == 0:
            # TODO: Check when happens
            false_prob = -1
        else:
            false_prob = float(false_seg_count) / float(total_count)

        return false_prob, total_count
示例#5
0
    def eval_tile_text(self, sample):
        '''
        Returns a tuple of metric scores (Pk, WinDiff, B).
        '''
        ### Record paragraph break points
        sent_bounds, normed_text = self.get_sb_nt(sample)

        ### Break up text into Pseudosentences
        # this list maps pseudosentence index to beginning token index
        ps_bounds = list(range(0, len(normed_text), self.w))
        pseudosents = [normed_text[i:i + self.w] for i in ps_bounds]

        # discard pseudosents of length < self.w
        if len(pseudosents[-1]) < self.w:
            del pseudosents[-1]

        ### Group into blocks and calculate sim scores
        # List[Tuple(sim score, pseudosent index)]
        # here, the index is of the first PS in block_b
        sims = self.calculate_sims(pseudosents)

        ### Find boundaries (valleys)
        pred = []
        for j in range(0, len(sims)):
            if j != 0 and j != len(sims) - 1:
                if sims[j] < sims[j - 1] and sims[j] < sims[j + 1]:
                    pred.append(j)
            j += 1
        pred = [j + self.k for j in pred]

        ### Evalute
        # map pseudosentence indices to beginning token index
        pred_btokis = [ps_bounds[i] for i in pred]
        # map beginning token index to closest sentence index
        # (this token is closest to the beginning of which sentence?)
        pred_sentis = [
            self.btoki_to_senti(t, sent_bounds) for t in pred_btokis
        ]
        # add last boundary (which we know is always there)
        pred_sentis += [len(sent_bounds)]
        gold_sentis = sample.get_sent_bound_idxs()

        pred = self.array_derivative(pred_sentis)
        gold = self.array_derivative(gold_sentis)

        pk = float(segeval.pk(pred, gold))
        wd = float(segeval.window_diff(pred, gold))
        bs = float(segeval.boundary_similarity(pred, gold, one_minus=True))

        return (pk, wd, bs)
    [anno_pred, anno_seg,
     anno_end] = getTextTilingBoundaries(os.path.join(text_dir, f))
    anno_idx2range = convertFromIndex2Range(anno_idx, anno_end)
    print("-----")
    print(anno_end)
    print(anno_idx2range)
    print(anno_seg)
    print("----")
    print(anno_pred)
    print(anno_idx)

    anno_pred = set(anno_pred)
    anno_idx = set(anno_idx)
    union = len(anno_pred.union(anno_idx))
    correct = len(anno_pred.intersection(anno_idx))
    precision = 1.0 * correct / union
    recall = 1.0 * correct / len(anno_idx)
    avg_prec += precision
    avg_recall += recall

    print("%s	%f	%f" % (f, precision, recall))
    wd = segeval.window_diff(anno_seg, anno_idx2range)
    pk = segeval.pk(anno_seg, anno_idx2range)
    avg_wd += wd
    avg_pk += pk
    print("WD:	%f	P-k:	%f" % (wd, pk))
print("Average:	%f	%f	WD:	%f	Pk:	%f	(%d)" %
      (avg_prec / (sel_files), avg_recall / (sel_files), avg_wd /
       decimal.Decimal(sel_files), avg_pk / decimal.Decimal(sel_files),
       (sel_files)))
示例#7
0
from nltk.metrics.segmentation import pk, windowdiff
import segeval as se
import horae as ho
import codecs


if __name__ == '__main__':

    test = sys.argv[1]
    classifier = sys.argv[2]
    type_ = sys.argv[3]
    level = sys.argv[4]

    path_pred = "../data/test/seg/" + test + "_" + level + ".pred_" +\
                classifier
    path_ref = "../data/test/choiformat/" + type_ + "/" + test + "_" +\
               level + ".ref"

    ref, nbref1, refs = ho.load_text(path_ref)
    pred, nbpred1, preds = ho.load_text(path_pred)

    d = {"stargazer": {"1": refs, "2": preds}}

    seg1 = d['stargazer']['1']
    seg2 = d['stargazer']['2']
    segs1 = se.convert_positions_to_masses(seg1)
    segs2 = se.convert_positions_to_masses(seg2)
    print("pk\tWindowdiff: \n")
    print(str(round(se.pk(segs2, segs1), 4)) + "\t" +
          str(round(se.window_diff(segs2, segs1), 4)))
示例#8
0
# Compute Map
ap_vector = [
    average_precision_score(rstr_best_real_group_vec == group_id,
                            rstr_algo_group_vec == group_id)
    for group_id in range(1,
                          max(rstr_real_group_vec) + 1)
]
map = np.mean(ap_vector)

# Segmentation evaluation
real_segm_vec = convert_positions_to_masses(rstr_real_group_vec)
algo_segm_vec = convert_positions_to_masses(rstr_algo_group_vec)
rdm_group_vec = rstr_real_group_vec.copy()
rdm.shuffle(rdm_group_vec)
rdm_segm_vec = convert_positions_to_masses(rdm_group_vec)
pk_res = pk(algo_segm_vec, real_segm_vec)
win_diff = window_diff(algo_segm_vec, real_segm_vec)
pk_rdm = pk(rdm_segm_vec, real_segm_vec)
win_diff_rdm = window_diff(rdm_segm_vec, real_segm_vec)

# Compute the aggregate labels
df_results = pd.DataFrame(result_matrix)
df_results["Token"] = token_list
type_results = df_results.groupby("Token").mean()
type_list = list(type_results.index)
type_values = type_results.to_numpy()

# -------------------------------------
# --- Writing
# -------------------------------------
示例#9
0
    def evaluate(self, batch, preds, sent=True, word=True):
        """ For a given batch and its corresponding preds, get metrics 
        
        batch: Batch instance
        preds: list
        
        Usage:
            >> from loader import *
            >> from modules import *
            >>
            >> model = TextSeg(lstm_dim=200, score_dim=200, bidir=True, num_layers=2)
            >> trainer = Trainer(model=model,
                                  train_dir='../data/wiki_727/train', 
                                  val_dir='../data/wiki_50/test',
                                  batch_size=10,
                                  lr=1e-3)  
            >> evalu = Metrics()
            >>
            >> batch = sample_and_batch(trainer.train_dir, trainer.batch_size, TRAIN=True)
            >> preds = trainer.predict_batch(batch)
            >> evalu(batch, preds)
        """
        metric_dict = {}

        assert (sent or
                word), 'Missing: choose sent- and / or word-level evaluation.'

        # Word level
        if word:
            w_true, w_pred = self._word(batch, preds)

            metric_dict['w_pk'] = seg.pk(w_pred, w_true)
            metric_dict['w_wd'] = seg.window_diff(w_pred,
                                                  w_true,
                                                  lamprier_et_al_2007_fix=True)
            metric_dict['w_ss'] = seg.segmentation_similarity(w_pred, w_true)
            metric_dict['w_bs'] = seg.boundary_similarity(w_pred, w_true)

            w_confusion = seg.boundary_confusion_matrix(w_pred, w_true)

            metric_dict['w_precision'] = seg.precision(w_confusion)
            metric_dict['w_recall'] = seg.recall(w_confusion)
            metric_dict['w_f1'] = seg.fmeasure(w_confusion)

        # Sentence level
        if sent:
            s_true, s_pred = self._sent(batch, preds)

            metric_dict['s_pk'] = seg.pk(s_pred, s_true)
            metric_dict['s_wd'] = seg.window_diff(s_pred,
                                                  s_true,
                                                  lamprier_et_al_2007_fix=True)
            metric_dict['s_ss'] = seg.segmentation_similarity(s_pred, s_true)
            metric_dict['s_bs'] = seg.boundary_similarity(s_pred, s_true)

            s_confusion = seg.boundary_confusion_matrix(s_pred, s_true)

            metric_dict['s_precision'] = seg.precision(s_confusion)
            metric_dict['s_recall'] = seg.recall(s_confusion)
            metric_dict['s_f1'] = seg.fmeasure(s_confusion)

        return metric_dict
    output_vector = []
    gold_set_files = []
    for directory in parsed_arguments.directories:
        gold_set_files.extend(match(directory, parsed_arguments.gold_sets))
    for file in gold_set_files:
        segment_sizes = read_csv(file).segment_size
        output_vector.extend(segment_sizes)
    return output_vector


def retrieve_result_set_vector(parsed_arguments):
    output_vector = []
    result_set_files = []
    for directory in parsed_arguments.directories:
        result_set_files.extend(match(directory, parsed_arguments.results))
    for file in result_set_files:
        segment_sizes = read_csv(file).segment_size
        output_vector.extend(segment_sizes)
    return output_vector


if __name__ == "__main__":
    parsed_arguments = setup_argument_parser()
    gold_set_values = retrieve_gold_set_vector(parsed_arguments)
    result_set_values = retrieve_result_set_vector(parsed_arguments)

    print "Printing Comparison Statistics:"
    print "P_k value: {}".format(pk(gold_set_values, result_set_values))
    print "WindowDiff value: {}".format(
        window_diff(gold_set_values, result_set_values))
示例#11
0
def get_Pk_score(reference, hypothesis):
    ref, hyp = __initialization(reference, hypothesis)
    # Evaluate algorithm using pk metric
    return segeval.pk(ref, hyp)
    return output_files

def retrieve_gold_set_vector(parsed_arguments):
    output_vector = []
    gold_set_files = []
    for directory in parsed_arguments.directories:
        gold_set_files.extend(match(directory, parsed_arguments.gold_sets))
    for file in gold_set_files:
        segment_sizes = read_csv(file).segment_size
        output_vector.extend(segment_sizes)
    return output_vector

def retrieve_result_set_vector(parsed_arguments):
    output_vector = []
    result_set_files = []
    for directory in parsed_arguments.directories:
        result_set_files.extend(match(directory, parsed_arguments.results))
    for file in result_set_files:
        segment_sizes = read_csv(file).segment_size
        output_vector.extend(segment_sizes)
    return output_vector

if __name__ == "__main__":
    parsed_arguments = setup_argument_parser()
    gold_set_values = retrieve_gold_set_vector(parsed_arguments)
    result_set_values = retrieve_result_set_vector(parsed_arguments)

    print "Printing Comparison Statistics:"
    print "P_k value: {}".format(pk(gold_set_values, result_set_values))
    print "WindowDiff value: {}".format(window_diff(gold_set_values, result_set_values))