Python AlignEval示例，daeso.pgc.evaluate.AlignEval Python示例

示例#1

0

显示文件

文件： agreement.py 项目： emsrc/daeso-framework

def cross_evaluate(corpus_fns, annotators=None, pickle_fn=None,
                   words_only=False):
    """
    Performs a cross evaluation of each annotator against each of the other
    ones. Returns an AlignEval object. Optionally pickles to a file.
    """
    if annotators:
        assert len(annotators) == len(corpus_fns)
    else:
        annotators = ["A%d" % (i+1) 
                      for i in range(len(corpus_fns))]
        
    corpora = read_corpora(corpus_fns, words_only)
    align_eval = AlignEval()
    
    for true_corp, true_annot in zip(corpora, annotators):
        for pred_corp, pred_annot in zip(corpora, annotators):
            if true_annot != pred_annot:
                name = (true_annot, pred_annot)
                align_eval.add(true_corp, pred_corp, name)

    align_eval.run_eval()
        
    if pickle_fn:
        pickle.dump(align_eval, open(pickle_fn, "wb"))
        
    return align_eval

示例#2

0

显示文件

文件： test_baseline.py 项目： emsrc/daeso-dutch

 def test_word_baseline_3(self):
     print "Baseline: greedy_align_phrases"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_phrases(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()

示例#3

0

显示文件

文件： test_baseline.py 项目： emsrc/daeso-dutch

 def test_word_baseline_2(self):
     # evaluation is incorrect, because true corpus also contains phrase
     # alignments
     print "Baseline: greedy_align_words"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_words(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()

示例#4

0

显示文件

文件： test_baseline.py 项目： danger89/daeso-dutch

 def test_word_baseline_3(self):
     print "Baseline: greedy_align_phrases"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_phrases(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()

示例#5

0

显示文件

文件： test_baseline.py 项目： danger89/daeso-dutch

 def test_word_baseline_2(self):
     # evaluation is incorrect, because true corpus also contains phrase
     # alignments
     print "Baseline: greedy_align_words"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_words(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()

示例#6

0

显示文件

def cross_evaluate(corpus_fns,
                   annotators=None,
                   pickle_fn=None,
                   words_only=False):
    """
    Performs a cross evaluation of each annotator against each of the other
    ones. Returns an AlignEval object. Optionally pickles to a file.
    """
    if annotators:
        assert len(annotators) == len(corpus_fns)
    else:
        annotators = ["A%d" % (i + 1) for i in range(len(corpus_fns))]

    corpora = read_corpora(corpus_fns, words_only)
    align_eval = AlignEval()

    for true_corp, true_annot in zip(corpora, annotators):
        for pred_corp, pred_annot in zip(corpora, annotators):
            if true_annot != pred_annot:
                name = (true_annot, pred_annot)
                align_eval.add(true_corp, pred_corp, name)

    align_eval.run_eval()

    if pickle_fn:
        pickle.dump(align_eval, open(pickle_fn, "wb"))

    return align_eval

示例#7

0

显示文件

文件： test_evaluate.py 项目： emsrc/daeso-framework

    def test_pickle(self):
        true_corpus = pred_corpus = ParallelGraphCorpus(inf="data/corpus-1.pgc")
        align_eval = AlignEval()
        align_eval.add(true_corpus, pred_corpus, "corpus-1")
        align_eval.run_eval()

        pickle_file = tempfile.TemporaryFile()
        pickle.dump(align_eval, pickle_file, 2)
        pickle_file.seek(0)
        align_eval_2 = pickle.load(pickle_file)
        align_eval_2.write()

示例#8

0

显示文件

def eval_corpora(true_corpora, pred_corpora, names, eval_fname,
                 align_eval=None, n=None):
    """
    Evaluate predicted against true parallel graph corpora.
    
    @param true_fns: iterable of true corpora
    
    @param pred_fns: iterable of predicted corpora
    
    @param names: iterable of labels for true/predicted pairs
    
    @param eval_fname: name of file to which evaluation output is written 
    
    @keyword align_eval: AlignEval instance
    
    @keyword n: limit evaluation to the first n files
    """
    if align_eval:
        assert isinstance(align_eval, AlignEval)
        # reset evaluator to prevent accidents
        align_eval.__init__()
    else:
        align_eval = AlignEval()
    
    count = 0

    for true_corpus, pred_corpus, name in itertools.izip(true_corpora, 
                                                         pred_corpora,
                                                         names):
        align_eval.add(true_corpus, pred_corpus, name)   
        count += 1
        if count == n:
            break
        
    align_eval.run_eval()
    log.info("saving evaluation report {0}".format(eval_fname))
    makedirs(os.path.dirname(eval_fname))
    align_eval.write(eval_fname)
    return align_eval

示例#9

0

显示文件

文件： eval-baselines.py 项目： emsrc/daeso-dutch

"""
calculate simple baselines based on greedy alignment of equal words/roots

run from the exp dir which contains a data subdir with the true pgc
files and an eval subdir for evaluation results
"""

import copy
import glob
import os

from daeso.pgc.corpus import ParallelGraphCorpus
from daeso.pgc.evaluate import AlignEval
from daeso_nl.ga.kb.baseline import greedy_align_equal_words, greedy_align_equal_words_roots

eval1 = AlignEval()
eval2 = AlignEval()

for pgc_fn in glob.glob("data/part*true.pgc"):
    true_corpus = ParallelGraphCorpus(inf=pgc_fn)
    pred_corpus = copy.deepcopy(true_corpus)

    greedy_align_equal_words(pred_corpus)
    eval1.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))

    greedy_align_equal_words_roots(pred_corpus)
    eval2.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))

eval1.run_eval()
eval1.write("eval/greedy_align_equals_words.txt")

示例#10

0

显示文件

文件： pgc_diff.py 项目： emsrc/daeso-framework

    "-r", "--relations",
    metavar="REL",
    nargs="*",
    help="limit output to given relations")

args = parser.parse_args()


corpus1 = ParallelGraphCorpus(inf=args.corpus1)
corpus2 = ParallelGraphCorpus(inf=args.corpus2)

pgc_diff(corpus1, corpus2, 
         corpus_name1=args.corpus1,
         corpus_name2=args.corpus2,
         annot1=args.first_annotator, 
         annot2=args.second_annotator,
         show_comments=args.with_comments,
         show_ident=args.with_ident,
         relations=args.relations)

if args.evaluate:
    from daeso.pgc.evaluate import AlignEval
    align_eval = AlignEval()
    align_eval.add(corpus1, corpus2)
    align_eval.run_eval()
    align_eval.write()

示例#11

0

显示文件

文件： eval-baselines.py 项目： danger89/daeso-dutch

calculate simple baselines based on greedy alignment of equal words/roots

run from the exp dir which contains a data subdir with the true pgc
files and an eval subdir for evaluation results
"""

import copy
import glob
import os

from daeso.pgc.corpus import ParallelGraphCorpus
from daeso.pgc.evaluate import AlignEval
from daeso_nl.ga.kb.baseline import greedy_align_equal_words, greedy_align_equal_words_roots


eval1 = AlignEval()
eval2 = AlignEval()

for pgc_fn in glob.glob("data/part*true.pgc"):
    true_corpus = ParallelGraphCorpus(inf=pgc_fn)
    pred_corpus = copy.deepcopy(true_corpus)
    
    greedy_align_equal_words(pred_corpus)
    eval1.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))
    
    greedy_align_equal_words_roots(pred_corpus)
    eval2.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))
    
eval1.run_eval()
eval1.write("eval/greedy_align_equals_words.txt")

示例#12

0

显示文件

    def init_procs(self):
        self.develop = True
        self.validate = False

        # parting
        self.part = False
        self.dev_parts = {}
        self.val_parts = {}
        self.part_max_size = None

        # feature description
        self.features = ()

        # feature extraction
        self.extract = True
        self.graph_selector = select_aligned_graph_pair
        self.node_selector = select_visible_node
        self.binary = False

        # sampling
        self.sample = False
        self.class_fracts = {}
        #self.sampler =  None

        # classification
        self.classify = True
        # Normally self.classifier will be None, and a TimblFile instance with
        # appropriate settings will be created on the fly. If you want to set
        # a classifier explicitly, notice that it must at least set the
        # verbosity options +vo, +vdb, +vdi, the -f option to specify the
        # instances file, and the -m options to specify that the
        # administrative features must be ignored.
        self.classifier = None
        self.timbl_opts = ""
        self.timbl_log = True
        self.feat_weight_graphs = False
        self.train_sample = False

        # weighting
        self.weight = True
        self.weight_func = entropy_weight

        # matching
        self.match = True
        self.matcher = Matcher()

        # merging
        self.merge = True
        self.merger = Merger()

        # evaluation
        self.evaluate = True
        self.evaluator = AlignEval()
        # AlignEval instances for develop and validation exps,
        # which are saved in a pickled seting
        self.dev_eval = None
        self.val_eval = None

        # iteraration limit during cross-validation
        self.n = None

        # store settings including evaluation as a pickle
        self.pickle = False