示例#1
0
def cross_evaluate(corpus_fns, annotators=None, pickle_fn=None,
                   words_only=False):
    """
    Performs a cross evaluation of each annotator against each of the other
    ones. Returns an AlignEval object. Optionally pickles to a file.
    """
    if annotators:
        assert len(annotators) == len(corpus_fns)
    else:
        annotators = ["A%d" % (i+1) 
                      for i in range(len(corpus_fns))]
        
    corpora = read_corpora(corpus_fns, words_only)
    align_eval = AlignEval()
    
    for true_corp, true_annot in zip(corpora, annotators):
        for pred_corp, pred_annot in zip(corpora, annotators):
            if true_annot != pred_annot:
                name = (true_annot, pred_annot)
                align_eval.add(true_corp, pred_corp, name)

    align_eval.run_eval()
        
    if pickle_fn:
        pickle.dump(align_eval, open(pickle_fn, "wb"))
        
    return align_eval
示例#2
0
 def test_word_baseline_3(self):
     print "Baseline: greedy_align_phrases"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_phrases(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
示例#3
0
 def test_word_baseline_2(self):
     # evaluation is incorrect, because true corpus also contains phrase
     # alignments
     print "Baseline: greedy_align_words"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_words(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
示例#4
0
 def test_word_baseline_3(self):
     print "Baseline: greedy_align_phrases"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_phrases(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
示例#5
0
 def test_word_baseline_2(self):
     # evaluation is incorrect, because true corpus also contains phrase
     # alignments
     print "Baseline: greedy_align_words"
     pred_pgc = copy.deepcopy(self.true_pgc)
     greedy_align_words(pred_pgc)
     align_eval = AlignEval()
     align_eval.add(self.true_pgc, pred_pgc)
     align_eval.run_eval()
     align_eval.write_alignment_overall()
示例#6
0
def cross_evaluate(corpus_fns,
                   annotators=None,
                   pickle_fn=None,
                   words_only=False):
    """
    Performs a cross evaluation of each annotator against each of the other
    ones. Returns an AlignEval object. Optionally pickles to a file.
    """
    if annotators:
        assert len(annotators) == len(corpus_fns)
    else:
        annotators = ["A%d" % (i + 1) for i in range(len(corpus_fns))]

    corpora = read_corpora(corpus_fns, words_only)
    align_eval = AlignEval()

    for true_corp, true_annot in zip(corpora, annotators):
        for pred_corp, pred_annot in zip(corpora, annotators):
            if true_annot != pred_annot:
                name = (true_annot, pred_annot)
                align_eval.add(true_corp, pred_corp, name)

    align_eval.run_eval()

    if pickle_fn:
        pickle.dump(align_eval, open(pickle_fn, "wb"))

    return align_eval
示例#7
0
    def test_pickle(self):
        true_corpus = pred_corpus = ParallelGraphCorpus(inf="data/corpus-1.pgc")
        align_eval = AlignEval()
        align_eval.add(true_corpus, pred_corpus, "corpus-1")
        align_eval.run_eval()

        pickle_file = tempfile.TemporaryFile()
        pickle.dump(align_eval, pickle_file, 2)
        pickle_file.seek(0)
        align_eval_2 = pickle.load(pickle_file)
        align_eval_2.write()
示例#8
0
def eval_corpora(true_corpora, pred_corpora, names, eval_fname,
                 align_eval=None, n=None):
    """
    Evaluate predicted against true parallel graph corpora.
    
    @param true_fns: iterable of true corpora
    
    @param pred_fns: iterable of predicted corpora
    
    @param names: iterable of labels for true/predicted pairs
    
    @param eval_fname: name of file to which evaluation output is written 
    
    @keyword align_eval: AlignEval instance
    
    @keyword n: limit evaluation to the first n files
    """
    if align_eval:
        assert isinstance(align_eval, AlignEval)
        # reset evaluator to prevent accidents
        align_eval.__init__()
    else:
        align_eval = AlignEval()
    
    count = 0

    for true_corpus, pred_corpus, name in itertools.izip(true_corpora, 
                                                         pred_corpora,
                                                         names):
        align_eval.add(true_corpus, pred_corpus, name)   
        count += 1
        if count == n:
            break
        
    align_eval.run_eval()
    log.info("saving evaluation report {0}".format(eval_fname))
    makedirs(os.path.dirname(eval_fname))
    align_eval.write(eval_fname)
    return align_eval
示例#9
0
"""
calculate simple baselines based on greedy alignment of equal words/roots

run from the exp dir which contains a data subdir with the true pgc
files and an eval subdir for evaluation results
"""

import copy
import glob
import os

from daeso.pgc.corpus import ParallelGraphCorpus
from daeso.pgc.evaluate import AlignEval
from daeso_nl.ga.kb.baseline import greedy_align_equal_words, greedy_align_equal_words_roots

eval1 = AlignEval()
eval2 = AlignEval()

for pgc_fn in glob.glob("data/part*true.pgc"):
    true_corpus = ParallelGraphCorpus(inf=pgc_fn)
    pred_corpus = copy.deepcopy(true_corpus)

    greedy_align_equal_words(pred_corpus)
    eval1.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))

    greedy_align_equal_words_roots(pred_corpus)
    eval2.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))

eval1.run_eval()
eval1.write("eval/greedy_align_equals_words.txt")
示例#10
0
    "-r", "--relations",
    metavar="REL",
    nargs="*",
    help="limit output to given relations")

args = parser.parse_args()


corpus1 = ParallelGraphCorpus(inf=args.corpus1)
corpus2 = ParallelGraphCorpus(inf=args.corpus2)

pgc_diff(corpus1, corpus2, 
         corpus_name1=args.corpus1,
         corpus_name2=args.corpus2,
         annot1=args.first_annotator, 
         annot2=args.second_annotator,
         show_comments=args.with_comments,
         show_ident=args.with_ident,
         relations=args.relations)

if args.evaluate:
    from daeso.pgc.evaluate import AlignEval
    align_eval = AlignEval()
    align_eval.add(corpus1, corpus2)
    align_eval.run_eval()
    align_eval.write()
    



示例#11
0
calculate simple baselines based on greedy alignment of equal words/roots

run from the exp dir which contains a data subdir with the true pgc
files and an eval subdir for evaluation results
"""

import copy
import glob
import os

from daeso.pgc.corpus import ParallelGraphCorpus
from daeso.pgc.evaluate import AlignEval
from daeso_nl.ga.kb.baseline import greedy_align_equal_words, greedy_align_equal_words_roots


eval1 = AlignEval()
eval2 = AlignEval()

for pgc_fn in glob.glob("data/part*true.pgc"):
    true_corpus = ParallelGraphCorpus(inf=pgc_fn)
    pred_corpus = copy.deepcopy(true_corpus)
    
    greedy_align_equal_words(pred_corpus)
    eval1.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))
    
    greedy_align_equal_words_roots(pred_corpus)
    eval2.add(true_corpus, pred_corpus, os.path.basename(pgc_fn))
    
eval1.run_eval()
eval1.write("eval/greedy_align_equals_words.txt")
    
示例#12
0
    def init_procs(self):
        self.develop = True
        self.validate = False

        # parting
        self.part = False
        self.dev_parts = {}
        self.val_parts = {}
        self.part_max_size = None

        # feature description
        self.features = ()

        # feature extraction
        self.extract = True
        self.graph_selector = select_aligned_graph_pair
        self.node_selector = select_visible_node
        self.binary = False

        # sampling
        self.sample = False
        self.class_fracts = {}
        #self.sampler =  None

        # classification
        self.classify = True
        # Normally self.classifier will be None, and a TimblFile instance with
        # appropriate settings will be created on the fly. If you want to set
        # a classifier explicitly, notice that it must at least set the
        # verbosity options +vo, +vdb, +vdi, the -f option to specify the
        # instances file, and the -m options to specify that the
        # administrative features must be ignored.
        self.classifier = None
        self.timbl_opts = ""
        self.timbl_log = True
        self.feat_weight_graphs = False
        self.train_sample = False

        # weighting
        self.weight = True
        self.weight_func = entropy_weight

        # matching
        self.match = True
        self.matcher = Matcher()

        # merging
        self.merge = True
        self.merger = Merger()

        # evaluation
        self.evaluate = True
        self.evaluator = AlignEval()
        # AlignEval instances for develop and validation exps,
        # which are saved in a pickled seting
        self.dev_eval = None
        self.val_eval = None

        # iteraration limit during cross-validation
        self.n = None

        # store settings including evaluation as a pickle
        self.pickle = False