示例#1
0
 def get_tree(n_inst):
     trees = ConllCorpus(file_parsed, howbig=1000000, lemmas=False, eval_spec_rels=self.eval_spec_rel,
                         dirname=self.dirname, lr=self.lr)
     trees.prepare_trees()
     # extend instances with trees
     assert len(trees.train) == n_inst, "Number of parses not equal to number of classification instances."
     c_append = 0
     for i in range(n_inst):
         # we have a parse:
         inst = self.normalize_tree(trees.train[c_append], trees.x_dict, c_append)
         c_append += 1
         # we don't have a parse:
         yield inst
示例#2
0
 def get_tree(n_inst):
     trees = ConllCorpus(file_parsed, howbig=1000000, lemmas=True, eval_spec_rels=self.eval_spec_rel,
                         dirname=self.dirname, lr=self.lr)
     trees.prepare_trees()
     # not every instance has a corresponding tree due to errors in parsing
     conll_idx = ConllFilesIndex(files_parsed_path)
     conll_idx.create_ids_set()
     # extend instances with trees
     c_append = 0
     for i in range(n_inst):
         # we have a parse:
         if i + 1 in conll_idx.fileids:
             inst = self.normalize_tree(trees.train[c_append], trees.x_dict, c_append)
             c_append += 1
         # we don't have a parse:
         else:
             inst = None
         yield inst
示例#3
0
 def get_tree(n_inst):
     trees = ConllCorpus(file_parsed,
                         howbig=1000000,
                         lemmas=False,
                         eval_spec_rels=self.eval_spec_rel,
                         dirname=self.dirname,
                         lr=self.lr)
     trees.prepare_trees()
     # extend instances with trees
     assert len(
         trees.train
     ) == n_inst, "Number of parses not equal to number of classification instances."
     c_append = 0
     for i in range(n_inst):
         # we have a parse:
         inst = self.normalize_tree(trees.train[c_append], trees.x_dict,
                                    c_append)
         c_append += 1
         # we don't have a parse:
         yield inst
示例#4
0
 def get_tree(n_inst):
     trees = ConllCorpus(file_parsed,
                         howbig=1000000,
                         lemmas=True,
                         eval_spec_rels=self.eval_spec_rel,
                         dirname=self.dirname,
                         lr=self.lr)
     trees.prepare_trees()
     self.tree_vocab = trees.x_dict
     # not every instance has a corresponding tree due to errors in parsing
     conll_idx = ConllFilesIndex(files_parsed_path)
     conll_idx.create_ids_set()
     # extend instances with trees
     c_append = 0
     for i in range(n_inst):
         # we have a parse:
         if i + 1 in conll_idx.fileids:
             inst = trees.train[c_append]
             c_append += 1
         # we don't have a parse:
         else:
             inst = None
         yield inst
示例#5
0
    # obtain model parameters
    n_states, n_obs, _, _, _, omit_class_cond, omit_emis_cond = read_params_from_path(path)
    lemmas = args.use_lemmas
    eval_spec_rel = args.synfunc
    lr = False

    # load model
    params_fixed = (np.load("{}ip.npy".format(path)),
                    np.load("{}tp.npy".format(path)),
                    np.load("{}fp.npy".format(path)),
                    np.load("{}ep.npy".format(path)))


    # prepare sents for decoding
    sents = ConllCorpus(infile, howbig=1000000, lemmas=lemmas, eval_spec_rels=eval_spec_rel, dirname=path, lr=lr)
    sents.prepare_trees()

    h = HMRTM(n_states, n_obs, R=len(sents.r_dict), params=params_fixed, writeout=False, dirname=path,
              omit_class_cond=omit_class_cond, omit_emis_cond=omit_emis_cond) if eval_spec_rel else \
        HMTM(n_states, n_obs, params=params_fixed, writeout=False, dirname=path)

    with open(args.outfile, "w") as out:
        for tree in sents.train:
            # obtain posteriors for all nodes
            node_to_rep = h.posterior_decode(tree, cont=True)
            # get words
            for node in tree.get_nonroots():
                out.write(
                    "{} {}\n".format(sents.x_dict.get_label_name(node.name), nparr_to_str(node_to_rep[node.index])))
            out.write("\n")