def get_trees(self):
     trees = []
     for i, tree_file in enumerate(self.files):
         if (i + 1) % 100 == 0:
             print "Tree %d" % (i + 1)
         ff = open(tree_file, 'rb')
         for tree in Trees.PennTreeReader(ff):
             trees.append(tree)
         ff.close()
     return trees
 def unannotate_tree(cls, annotated_tree):
     """
     Remove intermediate nodes (labels beginning with "@")
     Remove all material on node labels which follow their base
     symbol (cuts at the leftmost -, ^, or : character)
     Examples: a node with label @NP->DT_JJ will be spliced out,
     and a node with label NP^S will be reduced to NP
     """
     debinarized_tree = Trees.splice_nodes(annotated_tree, TreeAnnotations.at_filter)
     unannotated_tree = Trees.FunctionNodeStripper.transform_tree(debinarized_tree)
     return unannotated_tree
Пример #3
0
def output(partId, ch_aux):
    """Uses the student code to compute the output for test cases."""

    print "== getting output for part: %d" % partId

    if (ch_aux == None):
        print "== Error receiving data from server. Please try again."

    version = 1

    out = sys.stdout
    sys.stdout = open(os.devnull, 'w')

    #parser = BaselineParser()
    parser = PCFGParser()
    base_path = "../data/parser/masc/"

    out.write("== Training parser...\n")
    train_trees = read_masc_trees("%strain" % base_path, 0, 38)
    parser.train(train_trees)
    out.write("== done training.\n")

    if partId == 1:
        out.write("== Reading in development set...\n")
        test_trees = read_masc_trees("%sdevtest" % base_path, 0, 11)
        out.write("== Testing on development set...\n")
        f1 = test_parser(parser, test_trees, out)
    elif partId == 2:
        out.write("== Reading in test set...\n")

        import StringIO
        test_string = StringIO.StringIO()
        test_string.write(ch_aux)
        test_string.seek(0)

        raw_test_trees = []
        ptr = Trees.PennTreeReader(test_string)
        raw_test_trees = [tree for tree in ptr]
        #while ptr.has_next():
        #    raw_test_trees.append(ptr.next())

        test_trees = [Trees.StandardTreeNormalizer.transform_tree(tree) \
            for tree in raw_test_trees]

        out.write("== Testing on test set...\n")
        f1 = test_parser(parser, test_trees, out)

    else:
        out.write("!!! Invalid part choice: %d\n" % partId)
        sys.stdout = out
        return None

    sys.stdout = out
    return "[%d, %d, %f]" % (partId, version, f1)
 def unannotate_tree(cls, annotated_tree):
     """
     Remove intermediate nodes (labels beginning with "@")
     Remove all material on node labels which follow their base
     symbol (cuts at the leftmost -, ^, or : character)
     Examples: a node with label @NP->DT_JJ will be spliced out,
     and a node with label NP^S will be reduced to NP
     """
     debinarized_tree = Trees.splice_nodes(annotated_tree,
                                           TreeAnnotations.at_filter)
     unannotated_tree = Trees.FunctionNodeStripper.transform_tree(
         debinarized_tree)
     return unannotated_tree
Пример #5
0
 def unbinarize_tree(cls, tree):
     """
     Remove intermediate nodes (labels beginning with "@")
     Example: a node with label @NP->DT_JJ will be spliced out,
     """
     return Trees.splice_nodes(tree, TreeBinarization.at_filter)
Пример #6
0
 def unannotate_tree(cls, annotated_tree):
     debinarized_tree = Trees.splice_nodes(annotated_tree, TreeAnnotations.at_filter)
     unannotated_tree = Trees.FunctionNodeStripper.transform_tree(debinarized_tree)
     return unannotated_tree
            else:
                return 1
        end = start
        for child in tree.children:
            child_span = self.add_constituents(child, aset, end)
            end += child_span
        label = tree.label
        if label not in self.labels_to_ignore:
            aset.add(LabeledConstituent(label, start, end))
        return end - start


if __name__ == '__main__':
    import StringIO

    gold_string = "(ROOT (S (NP (DT the) (NN can)) (VP (VBD fell))))"
    gold_io = StringIO.StringIO()
    gold_io.write(gold_string)
    gold_io.seek(0)
    gold_tree = Trees.PennTreeReader(gold_io).next()

    guess_string = "(ROOT (S (NP (DT the)) (VP (MB can) (VP (VBD fell)))))"
    guess_io = StringIO.StringIO()
    guess_io.write(guess_string)
    guess_io.seek(0)
    guess_tree = Trees.PennTreeReader(guess_io).next()

    evaluator = LabeledConstituentEval(["ROOT"], set())
    evaluator.evaluate(guess_tree, gold_tree)
    evaluator.display(True)