Пример #1
0
    def ids_to_tree(self, emb, postprocess=True):
        """Create a fake (flat) t-tree from token embeddings (IDs).

        @param emb: source embeddings (token IDs)
        @param postprocess: postprocess the sentence (capitalize sentence start, merge plural \
            markers)? True by default.
        @return: the corresponding tree
        """

        tree = TreeData()
        tokens = self.ids_to_strings(emb)

        for token in tokens:
            if token in ['<GO>', '<STOP>', '<VOID>']:
                continue
            if postprocess:
                # casing (only if set to lowercase)
                if self.lowercase and len(tree) == 1 or tree.nodes[-1].t_lemma in ['.', '?', '!']:
                    token = token[0].upper() + token[1:]
                # plural merging (if plural tokens come up)
                if token == '<-s>' and tree.nodes[-1].t_lemma is not None:
                    token = self._singular_to_plural(tree.nodes[-1].t_lemma)
                    tree.remove_node(len(tree) - 1)
                elif token == '<-s>':
                    continue

            tree.create_child(0, len(tree), NodeData(token, 'x'))

        return tree
Пример #2
0
    def ids_to_tree(self, emb):
        """Rebuild a tree from the embeddings (token IDs).

        @param emb: source embeddings (token IDs)
        @return: the corresponding tree
        """

        tree = TreeData()
        tree.nodes = []  # override the technical root -- the tree will be created including the technical root
        tree.parents = []

        # build the tree recursively (start at position 2 to skip the <GO> symbol and 1st opening bracket)
        self._create_subtree(tree, -1, emb, 2)
        return tree
Пример #3
0
    def ids_to_tree(self, emb):
        """Rebuild a tree from the embeddings (token IDs).

        @param emb: source embeddings (token IDs)
        @return: the corresponding tree
        """

        tree = TreeData()
        tree.nodes = []  # override the technical root -- the tree will be created including the technical root
        tree.parents = []

        # build the tree recursively (start at position 2 to skip the <GO> symbol and 1st opening bracket)
        self._create_subtree(tree, -1, emb, 2)
        return tree
Пример #4
0
    def _beam_search(self, enc_inputs, da):
        """Run beam search decoding."""

        # true "batches" not implemented
        assert len(enc_inputs[0]) == 1

        # run greedy decoder for comparison (debugging purposes)
        log_debug("GREEDY DEC WOULD RETURN:\n" +
                  " ".join(self.tree_embs.ids_to_strings(
                      [out_tok[0] for out_tok in self._greedy_decoding(enc_inputs, None)[0]])))

        # initialize
        self._init_beam_search(enc_inputs)
        empty_tree_emb = self.tree_embs.get_embeddings(TreeData())
        dec_inputs = cut_batch_into_steps([empty_tree_emb])

        paths = [self.DecodingPath(stop_token_id=self.tree_embs.STOP, dec_inputs=[dec_inputs[0]])]

        # beam search steps
        for step in xrange(len(dec_inputs)):

            new_paths = []

            for path in paths:
                out_probs, st = self._beam_search_step(path.dec_inputs, path.dec_states)
                new_paths.extend(path.expand(self.beam_size, out_probs, st))

            def cmp_func(p, q):
                """Length-weighted comparison of two paths' logprobs."""
                return cmp(p.logprob / (len(p) ** self.length_norm_weight),
                           q.logprob / (len(q) ** self.length_norm_weight))

            paths = sorted(new_paths, cmp=cmp_func, reverse=True)[:self.beam_size]

            if all([p.dec_inputs[-1] == self.tree_embs.VOID for p in paths]):
                break  # stop decoding if we have reached the end in all paths

            log_debug(("\nBEAM SEARCH STEP %d\n" % step) +
                      "\n".join([("%f\t" % p.logprob) +
                                 " ".join(self.tree_embs.ids_to_strings([inp[0] for inp in p.dec_inputs]))
                                 for p in paths]) + "\n")

        # rerank paths by their distance to the input DA
        if self.classif_filter or self.context_bleu_weight:
            paths = self._rerank_paths(paths, da)

        # measure slot error on the top k paths
        if self.slot_err_stats:
            for path in paths[:self.sample_top_k]:
                self.slot_err_stats.append(
                        da, self.tree_embs.ids_to_strings([inp[0] for inp in path.dec_inputs]))

        # select the "best" path -- either the best, or one in top k
        if self.sample_top_k > 1:
            best_path = self._sample_path(paths[:self.sample_top_k])
        else:
            best_path = paths[0]

        # return just the best path (as token IDs)
        return np.array(best_path.dec_inputs)
Пример #5
0
    def _init_training(self, das_file, ttree_file, data_portion):
        """Initialize training.

        Store input data, initialize 1-hot feature representations for input and output and
        transform training data accordingly, initialize the classification neural network.
        """
        # read input
        log_info('Reading DAs from ' + das_file + '...')
        das = read_das(das_file)
        log_info('Reading t-trees from ' + ttree_file + '...')
        ttree_doc = read_ttrees(ttree_file)
        trees = trees_from_doc(ttree_doc, self.language, self.selector)

        # make training data smaller if necessary
        train_size = int(round(data_portion * len(trees)))
        self.train_trees = trees[:train_size]
        self.train_das = das[:train_size]

        # add empty tree + empty DA to training data
        # (i.e. forbid the network to keep any of its outputs "always-on")
        train_size += 1
        self.train_trees.append(TreeData())
        empty_da = DA.parse('inform()')
        self.train_das.append(empty_da)

        self.train_order = range(len(self.train_trees))
        log_info('Using %d training instances.' % train_size)

        # initialize input features/embeddings
        if self.tree_embs:
            self.dict_size = self.tree_embs.init_dict(self.train_trees)
            self.X = np.array([
                self.tree_embs.get_embeddings(tree)
                for tree in self.train_trees
            ])
        else:
            self.tree_feats = Features(['node: presence t_lemma formeme'])
            self.tree_vect = DictVectorizer(sparse=False,
                                            binarize_numeric=True)
            self.X = [
                self.tree_feats.get_features(tree, {})
                for tree in self.train_trees
            ]
            self.X = self.tree_vect.fit_transform(self.X)

        # initialize output features
        self.da_feats = Features(['dat: dat_presence', 'svp: svp_presence'])
        self.da_vect = DictVectorizer(sparse=False, binarize_numeric=True)
        self.y = [
            self.da_feats.get_features(None, {'da': da})
            for da in self.train_das
        ]
        self.y = self.da_vect.fit_transform(self.y)

        # initialize I/O shapes
        self.input_shape = [list(self.X[0].shape)]
        self.num_outputs = len(self.da_vect.get_feature_names())

        # initialize NN classifier
        self._init_neural_network()
Пример #6
0
    def can_generate_greedy(self, tree, da):
        """Check if the candidate generator can generate a given tree greedily, always
        pursuing the first viable path.

        This is for debugging purposes only.
        Uses `get_all_successors` and always goes on with the first one that increases coverage
        of the current tree.
        """
        self.init_run(da)
        cur_subtree = TreeData()
        found = True

        while found and cur_subtree != tree:
            found = False
            for succ in self.get_all_successors(cur_subtree):
                # use the first successor that is still a subtree of the target tree
                if tree.common_subtree_size(succ) == len(succ):
                    cur_subtree = succ
                    found = True
                    break

        # we have hit a dead end
        if cur_subtree != tree:
            log_info('Did not find tree: ' + str(tree) + ' for DA: ' + str(da))
            return False

        # everything alright
        log_info('Found tree: %s for DA: %s' % (str(tree), str(da)))
        return True
Пример #7
0
    def _get_greedy_decoder_output(self, enc_inputs, dec_inputs, compute_cost=False):
        """Run greedy decoding with the given inputs; return decoder outputs and the cost
        (if required). For ensemble decoding, the gready search is implemented as a beam
        search with a beam size of 1.

        @param enc_inputs: encoder inputs (list of token IDs)
        @param dec_inputs: decoder inputs (list of token IDs)
        @param compute_cost: if True, decoding cost is computed (the dec_inputs must be valid trees)
        @return a tuple of list of decoder outputs + decoding cost (None if not required)
        """
        # TODO batches and cost computation not implemented
        assert len(enc_inputs[0]) == 1 and not compute_cost

        self._init_beam_search(enc_inputs)

        # for simplicity, this is implemented exacly like a beam search, but with a path sized one
        empty_tree_emb = self.tree_embs.get_embeddings(TreeData())
        dec_inputs = cut_batch_into_steps([empty_tree_emb])
        path = self.DecodingPath(stop_token_id=self.tree_embs.STOP, dec_inputs=[dec_inputs[0]])

        for step in xrange(len(dec_inputs)):
            out_probs, st = self._beam_search_step(path.dec_inputs, path.dec_states)
            path = path.expand(1, out_probs, st)[0]

            if path.dec_inputs[-1] == self.tree_embs.VOID:
                break  # stop decoding if we have reached the end of path

        # return just token IDs, ignore cost computation here
        return np.array(path.dec_inputs), None
Пример #8
0
    def can_generate(self, tree, da):
        """Check if the candidate generator can generate a given tree at all.

        This is for debugging purposes only.
        Tries if get_all_successors always returns a successor that leads to the given tree
        (puts on the open list only successors that are subtrees of the given tree).
        """
        self.init_run(da)
        open_list = CandidateList({TreeData(): 1})
        found = False
        tree_no = 0

        while open_list and not found:
            cur_st, _ = open_list.pop()
            if cur_st == tree:
                found = True
                break
            for succ in self.get_all_successors(cur_st):
                tree_no += 1
                # only push on the open list if the successor is still a subtree of the target tree
                if tree.common_subtree_size(succ) == len(succ):
                    open_list.push(succ, len(succ))

        if not found:
            log_info('Did not find tree: ' + str(tree) + ' for DA: ' +
                     str(da) + ('(total %d trees)' % tree_no))
            return False
        log_info('Found tree: %s for DA: %s (as %d-th tree)' %
                 (str(tree), str(da), tree_no))
        return tree_no
Пример #9
0
    def ids_to_tree(self, emb, postprocess=True):
        """Create a fake (flat) t-tree from token embeddings (IDs).

        @param emb: source embeddings (token IDs)
        @param postprocess: postprocess the sentence (capitalize sentence start, merge plural \
            markers)? True by default.
        @return: the corresponding tree
        """

        tree = TreeData()
        tokens = self.ids_to_strings(emb)

        for token in tokens:
            if token in ['<GO>', '<STOP>', '<VOID>']:
                continue
            tree.create_child(0, len(tree), NodeData(token, 'x'))

        return tree
Пример #10
0
    def ids_to_tree(self, emb, postprocess=True):
        """Create a fake (flat) t-tree from token embeddings (IDs).

        @param emb: source embeddings (token IDs)
        @param postprocess: postprocess the sentence (capitalize sentence start, merge plural \
            markers)? True by default.
        @return: the corresponding tree
        """

        tree = TreeData()
        tokens = self.ids_to_strings(emb)

        for token in tokens:
            if token in ['<GO>', '<STOP>', '<VOID>']:
                continue
            tree.create_child(0, len(tree), NodeData(token, 'x'))

        return tree
Пример #11
0
    def ids_to_tree(self, emb, postprocess=True):
        """Create a fake (flat) t-tree from token embeddings (IDs).

        @param emb: source embeddings (token IDs)
        @param postprocess: postprocess the sentence (capitalize sentence start, merge plural \
            markers)? True by default.
        @return: the corresponding tree
        """

        tree = TreeData()
        tokens = self.ids_to_strings(emb)

        for token in tokens:
            if token in ['<GO>', '<STOP>', '<VOID>']:
                continue
            if postprocess:
                # casing (only if set to lowercase)
                if self.lowercase and len(tree) == 1 or tree.nodes[-1].t_lemma in ['.', '?', '!']:
                    token = token[0].upper() + token[1:]
                # plural merging (if plural tokens come up)
                if token == '<-s>' and tree.nodes[-1].t_lemma is not None:
                    token = self._singular_to_plural(tree.nodes[-1].t_lemma)
                    tree.remove_node(len(tree) - 1)
                elif token == '<-s>':
                    continue

            tree.create_child(0, len(tree), NodeData(token, 'x'))

        return tree
Пример #12
0
    def _greedy_decoding(self, enc_inputs, gold_trees):
        """Run greedy decoding with the given encoder inputs; optionally use given gold trees
        as decoder inputs for cost computation."""

        # prepare decoder inputs (either fake, or true but used just for cost computation)
        if gold_trees is None:
            empty_tree_emb = self.tree_embs.get_embeddings(TreeData())
            dec_inputs = cut_batch_into_steps([empty_tree_emb for _ in enc_inputs[0]])
        else:
            dec_inputs = cut_batch_into_steps([self.tree_embs.get_embeddings(tree)
                                               for tree in gold_trees])

        # run the decoding per se
        dec_output_ids, dec_cost = self._get_greedy_decoder_output(
                enc_inputs, dec_inputs, compute_cost=gold_trees is not None)

        return dec_output_ids, dec_cost
Пример #13
0
def asearch_gen(args):
    """A*search generation"""
    from pytreex.core.document import Document

    opts, files = getopt(args, 'e:d:w:c:s:')
    eval_file = None
    fname_ttrees_out = None
    cfg_file = None
    eval_selector = ''

    for opt, arg in opts:
        if opt == '-e':
            eval_file = arg
        elif opt == '-s':
            eval_selector = arg
        elif opt == '-d':
            set_debug_stream(file_stream(arg, mode='w'))
        elif opt == '-w':
            fname_ttrees_out = arg
        elif opt == '-c':
            cfg_file = arg

    if len(files) != 3:
        sys.exit('Invalid arguments.\n' + __doc__)
    fname_cand_model, fname_rank_model, fname_da_test = files

    log_info('Initializing...')
    candgen = RandomCandidateGenerator.load_from_file(fname_cand_model)
    ranker = PerceptronRanker.load_from_file(fname_rank_model)
    cfg = Config(cfg_file) if cfg_file else {}
    cfg.update({'candgen': candgen, 'ranker': ranker})
    tgen = ASearchPlanner(cfg)

    log_info('Generating...')
    das = read_das(fname_da_test)

    if eval_file is None:
        gen_doc = Document()
    else:
        eval_doc = read_ttrees(eval_file)
        if eval_selector == tgen.selector:
            gen_doc = Document()
        else:
            gen_doc = eval_doc

    # generate and evaluate
    if eval_file is not None:
        # generate + analyze open&close lists
        lists_analyzer = ASearchListsAnalyzer()
        for num, (da, gold_tree) in enumerate(zip(
                das, trees_from_doc(eval_doc, tgen.language, eval_selector)),
                                              start=1):
            log_debug("\n\nTREE No. %03d" % num)
            gen_tree = tgen.generate_tree(da, gen_doc)
            lists_analyzer.append(gold_tree, tgen.open_list, tgen.close_list)
            if gen_tree != gold_tree:
                log_debug("\nDIFFING TREES:\n" +
                          tgen.ranker.diffing_trees_with_scores(
                              da, gold_tree, gen_tree) + "\n")

        log_info('Gold tree BEST: %.4f, on CLOSE: %.4f, on ANY list: %4f' %
                 lists_analyzer.stats())

        # evaluate the generated trees against golden trees
        eval_ttrees = ttrees_from_doc(eval_doc, tgen.language, eval_selector)
        gen_ttrees = ttrees_from_doc(gen_doc, tgen.language, tgen.selector)

        log_info('Evaluating...')
        evaler = Evaluator()
        for eval_bundle, eval_ttree, gen_ttree, da in zip(
                eval_doc.bundles, eval_ttrees, gen_ttrees, das):
            # add some stats about the tree directly into the output file
            add_bundle_text(
                eval_bundle, tgen.language, tgen.selector + 'Xscore',
                "P: %.4f R: %.4f F1: %.4f" %
                p_r_f1_from_counts(*corr_pred_gold(eval_ttree, gen_ttree)))

            # collect overall stats
            evaler.append(eval_ttree, gen_ttree,
                          ranker.score(TreeData.from_ttree(eval_ttree), da),
                          ranker.score(TreeData.from_ttree(gen_ttree), da))
        # print overall stats
        log_info("NODE precision: %.4f, Recall: %.4f, F1: %.4f" %
                 evaler.p_r_f1())
        log_info("DEP  precision: %.4f, Recall: %.4f, F1: %.4f" %
                 evaler.p_r_f1(EvalTypes.DEP))
        log_info("Tree size stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" %
                 evaler.size_stats())
        log_info("Score stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" %
                 evaler.score_stats())
        log_info(
            "Common subtree stats:\n -- SIZE: %s\n -- ΔGLD: %s\n -- ΔPRD: %s" %
            evaler.common_substruct_stats())
    # just generate
    else:
        for da in das:
            tgen.generate_tree(da, gen_doc)

    # write output
    if fname_ttrees_out is not None:
        log_info('Writing output...')
        write_ttrees(gen_doc, fname_ttrees_out)
Пример #14
0
    def _init_training(self, das, trees, data_portion):
        """Initialize training.

        Store input data, initialize 1-hot feature representations for input and output and
        transform training data accordingly, initialize the classification neural network.

        @param das: name of source file with training DAs, or list of DAs
        @param trees: name of source file with corresponding trees/sentences, or list of trees
        @param data_portion: portion of the training data to be used (0.0-1.0)
        """
        # read input from files or take it directly from parameters
        if not isinstance(das, list):
            log_info('Reading DAs from ' + das + '...')
            das = read_das(das)
        if not isinstance(trees, list):
            log_info('Reading t-trees from ' + trees + '...')
            ttree_doc = read_ttrees(trees)
            if self.mode == 'tokens':
                tokens = tokens_from_doc(ttree_doc, self.language,
                                         self.selector)
                trees = self._tokens_to_flat_trees(tokens)
            elif self.mode == 'tagged_lemmas':
                tls = tagged_lemmas_from_doc(ttree_doc, self.language,
                                             self.selector)
                trees = self._tokens_to_flat_trees(tls, use_tags=True)
            else:
                trees = trees_from_doc(ttree_doc, self.language, self.selector)
        elif self.mode in ['tokens', 'tagged_lemmas']:
            trees = self._tokens_to_flat_trees(
                trees, use_tags=self.mode == 'tagged_lemmas')

        # make training data smaller if necessary
        train_size = int(round(data_portion * len(trees)))
        self.train_trees = trees[:train_size]
        self.train_das = das[:train_size]

        # ignore contexts, if they are contained in the DAs
        if isinstance(self.train_das[0], tuple):
            self.train_das = [da for (context, da) in self.train_das]
        # delexicalize if DAs are lexicalized and we don't want that
        if self.delex_slots:
            self.train_das = [
                da.get_delexicalized(self.delex_slots) for da in self.train_das
            ]

        # add empty tree + empty DA to training data
        # (i.e. forbid the network to keep any of its outputs "always-on")
        train_size += 1
        self.train_trees.append(TreeData())
        empty_da = DA.parse('inform()')
        self.train_das.append(empty_da)

        self.train_order = range(len(self.train_trees))
        log_info('Using %d training instances.' % train_size)

        # initialize input features/embeddings
        if self.tree_embs:
            self.dict_size = self.tree_embs.init_dict(self.train_trees)
            self.X = np.array([
                self.tree_embs.get_embeddings(tree)
                for tree in self.train_trees
            ])
        else:
            self.tree_feats = Features(['node: presence t_lemma formeme'])
            self.tree_vect = DictVectorizer(sparse=False,
                                            binarize_numeric=True)
            self.X = [
                self.tree_feats.get_features(tree, {})
                for tree in self.train_trees
            ]
            self.X = self.tree_vect.fit_transform(self.X)

        # initialize output features
        self.da_feats = Features(['dat: dat_presence', 'svp: svp_presence'])
        self.da_vect = DictVectorizer(sparse=False, binarize_numeric=True)
        self.y = [
            self.da_feats.get_features(None, {'da': da})
            for da in self.train_das
        ]
        self.y = self.da_vect.fit_transform(self.y)
        log_info('Number of binary classes: %d.' %
                 len(self.da_vect.get_feature_names()))

        # initialize I/O shapes
        if not self.tree_embs:
            self.input_shape = list(self.X[0].shape)
        else:
            self.input_shape = self.tree_embs.get_embeddings_shape()
        self.num_outputs = len(self.da_vect.get_feature_names())

        # initialize NN classifier
        self._init_neural_network()
        # initialize the NN variables
        self.session.run(tf.global_variables_initializer())
Пример #15
0
def to_treedata(t):
    if isinstance(t, TreeNode):
        return t.tree
    elif isinstance(t, T):
        return TreeData.from_ttree(t)
Пример #16
0
from tgen.planner import CandidateList
from tgen.tree import TreeData, NodeData
import random
import zlib

random.seed(1206)

l = CandidateList()
for i in xrange(10000):
    #    l[str(i)] = random.randint(0, 100)
    #    l[str(random.randint(0,1000))] = random.randint(0, 100)
    #    l[(str(random.randint(0,1000)), str(random.randint(0,1000)))] = random.randint(0, 100)
    #    tree = TreeData()
    #    tree.create_child(0, 1, NodeData(str(random.randint(0, 1000)), str(random.randint(0, 1000))))
    #    l[tree] = random.randint(0, 100)
    tree = TreeData()
    for j in xrange(random.randint(1, 10)):
        tree.create_child(
            random.randint(0, len(tree) - 1),
            random.randint(0, 1) == 1,
            NodeData(str(random.randint(0, 1000)), str(random.randint(0, 1000))),
        )
    l[tree] = random.randint(0, 100)
x = []
while l:
    x.append(l.pop())
print zlib.crc32(str(x))
Пример #17
0
def asearch_gen(args):
    """A*search generation"""
    from pytreex.core.document import Document

    opts, files = getopt(args, 'e:d:w:c:s:')
    eval_file = None
    fname_ttrees_out = None
    cfg_file = None
    eval_selector = ''

    for opt, arg in opts:
        if opt == '-e':
            eval_file = arg
        elif opt == '-s':
            eval_selector = arg
        elif opt == '-d':
            set_debug_stream(file_stream(arg, mode='w'))
        elif opt == '-w':
            fname_ttrees_out = arg
        elif opt == '-c':
            cfg_file = arg

    if len(files) != 3:
        sys.exit('Invalid arguments.\n' + __doc__)
    fname_cand_model, fname_rank_model, fname_da_test = files

    log_info('Initializing...')
    candgen = RandomCandidateGenerator.load_from_file(fname_cand_model)
    ranker = PerceptronRanker.load_from_file(fname_rank_model)
    cfg = Config(cfg_file) if cfg_file else {}
    cfg.update({'candgen': candgen, 'ranker': ranker})
    tgen = ASearchPlanner(cfg)

    log_info('Generating...')
    das = read_das(fname_da_test)

    if eval_file is None:
        gen_doc = Document()
    else:
        eval_doc = read_ttrees(eval_file)
        if eval_selector == tgen.selector:
            gen_doc = Document()
        else:
            gen_doc = eval_doc

    # generate and evaluate
    if eval_file is not None:
        # generate + analyze open&close lists
        lists_analyzer = ASearchListsAnalyzer()
        for num, (da, gold_tree) in enumerate(zip(das,
                                                  trees_from_doc(eval_doc, tgen.language, eval_selector)),
                                              start=1):
            log_debug("\n\nTREE No. %03d" % num)
            gen_tree = tgen.generate_tree(da, gen_doc)
            lists_analyzer.append(gold_tree, tgen.open_list, tgen.close_list)
            if gen_tree != gold_tree:
                log_debug("\nDIFFING TREES:\n" + tgen.ranker.diffing_trees_with_scores(da, gold_tree, gen_tree) + "\n")

        log_info('Gold tree BEST: %.4f, on CLOSE: %.4f, on ANY list: %4f' % lists_analyzer.stats())

        # evaluate the generated trees against golden trees
        eval_ttrees = ttrees_from_doc(eval_doc, tgen.language, eval_selector)
        gen_ttrees = ttrees_from_doc(gen_doc, tgen.language, tgen.selector)

        log_info('Evaluating...')
        evaler = Evaluator()
        for eval_bundle, eval_ttree, gen_ttree, da in zip(eval_doc.bundles, eval_ttrees, gen_ttrees, das):
            # add some stats about the tree directly into the output file
            add_bundle_text(eval_bundle, tgen.language, tgen.selector + 'Xscore',
                            "P: %.4f R: %.4f F1: %.4f" % p_r_f1_from_counts(*corr_pred_gold(eval_ttree, gen_ttree)))

            # collect overall stats
            evaler.append(eval_ttree,
                          gen_ttree,
                          ranker.score(TreeData.from_ttree(eval_ttree), da),
                          ranker.score(TreeData.from_ttree(gen_ttree), da))
        # print overall stats
        log_info("NODE precision: %.4f, Recall: %.4f, F1: %.4f" % evaler.p_r_f1())
        log_info("DEP  precision: %.4f, Recall: %.4f, F1: %.4f" % evaler.p_r_f1(EvalTypes.DEP))
        log_info("Tree size stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" % evaler.size_stats())
        log_info("Score stats:\n * GOLD %s\n * PRED %s\n * DIFF %s" % evaler.score_stats())
        log_info("Common subtree stats:\n -- SIZE: %s\n -- ΔGLD: %s\n -- ΔPRD: %s" %
                 evaler.common_substruct_stats())
    # just generate
    else:
        for da in das:
            tgen.generate_tree(da, gen_doc)

    # write output
    if fname_ttrees_out is not None:
        log_info('Writing output...')
        write_ttrees(gen_doc, fname_ttrees_out)
Пример #18
0
from tgen.planner import CandidateList
from tgen.tree import TreeData, NodeData
import random
import zlib

random.seed(1206)

l = CandidateList()
for i in xrange(10000):
    #    l[str(i)] = random.randint(0, 100)
    #    l[str(random.randint(0,1000))] = random.randint(0, 100)
    #    l[(str(random.randint(0,1000)), str(random.randint(0,1000)))] = random.randint(0, 100)
    #    tree = TreeData()
    #    tree.create_child(0, 1, NodeData(str(random.randint(0, 1000)), str(random.randint(0, 1000))))
    #    l[tree] = random.randint(0, 100)
    tree = TreeData()
    for j in xrange(random.randint(1, 10)):
        tree.create_child(
            random.randint(0,
                           len(tree) - 1),
            random.randint(0, 1) == 1,
            NodeData(str(random.randint(0, 1000)), str(random.randint(0,
                                                                      1000))))
    l[tree] = random.randint(0, 100)
x = []
while l:
    x.append(l.pop())
print zlib.crc32(str(x))
Пример #19
0
def to_treedata(t):
    if isinstance(t, TreeNode):
        return t.tree
    elif isinstance(t, T):
        return TreeData.from_ttree(t)