Пример #1
0
 def __init__(self, model_dir, beam_size, iter='FINAL'):
     self.feats_extractor = FeaturesExtractor()
     weight_name = 'weight.' + iter
     weight_path = os.path.join(model_dir, weight_name)
     # load already trained model
     self.perceptron = MulticlassModel(weight_path)
     self.beam_size = beam_size
Пример #2
0
 def __init__(self, model_path):
     with open(model_path, 'r') as f:
         self.beam_size = int(f.readline().strip())
         label_hash_file = f.readline().strip()
         self.label_hash = load_hash_from_file(label_hash_file)
         weights_file_path = f.readline().strip() + ".FINAL"
         self.perceptron = MulticlassModel(weights_file_path)
 def load(cls, fname):
     model = MulticlassModel(fname)
     labelmap = []
     for i, line in enumerate(file(fname + ".lmap")):
         label = line.strip()
         labelmap.append(label)
     return cls(model, labelmap)
Пример #4
0
def test(sents,
         model,
         iter="FINAL",
         quiet=False,
         ignore_punc=False,
         labeled=True):
    fext = model.featureExtractor()
    import time
    good = 0.0
    bad = 0.0
    complete = 0.0
    if labeled:
        from ml.sml import SparseMulticlassModel
        m = SparseMulticlassModel(file(model.weightsFile(iter)))
    else:
        m = MulticlassModel(model.weightsFile(iter))
    start = time.time()
    parser = Parser(m, fext, Oracle())
    if labeled:
        parser.id_to_action_mapper = pickle.load(
            file(model.weightsFile("amap")))
    scores = []
    for sent in sents:
        sent_good = 0.0
        sent_bad = 0.0
        no_mistakes = True
        if not quiet:
            print "@@@", good / (good + bad + 1)
        if labeled:
            deps = parser.parse_labeled(sent)
        else:
            deps = parser.parse(sent)
        sent = deps.annotate(sent)
        for tok in sent:
            if not quiet:
                if labeled:
                    print tok['id'], tok['form'], "_", tok['tag'], tok[
                        'tag'], "_", tok['pparent'], tok['pprel'], "_ _"
                else:
                    print tok['id'], tok['form'], "_", tok['tag'], tok[
                        'tag'], "_", tok['pparent'], "_ _ _"
            if ignore_punc and tok['form'][0] in "'`,.-;:!?{}": continue
            if tok['parent'] == tok['pparent']:
                good += 1
                sent_good += 1
            else:
                bad += 1
                sent_bad += 1
                no_mistakes = False
        if not quiet: print
        if no_mistakes: complete += 1
        scores.append((sent_good / (sent_good + sent_bad)))

    if not quiet:
        print "time(seconds):", time.time() - start
        print "num sents:", len(sents)
        print "complete:", complete / len(sents)
        print "macro:", sum(scores) / len(scores)
        print "micro:", good / (good + bad)
    return good / (good + bad), complete / len(sents)
Пример #5
0
def make_parser(modelfile, iter):
    weightsFile = "%s.weights" % (modelfile)
    modelfile = "%s.model" % (modelfile)
    model = Model.load(modelfile, iter)
    fext = model.featureExtractor()
    m = MulticlassModel(model.weightsFile(iter))
    parser = Parser(m, fext, Oracle())
    return parser
Пример #6
0
 def load(cls, model_path):
     model_dir, model_file = split_path(model_path)
     with open(model_path, 'r') as f:
         beam_size = int(f.readline().strip())
         label_hash_file = f.readline().strip()
         label_hash = load_hash_from_file(label_hash_file)
         weights_file_path = f.readline().strip() + ".FINAL"
         perceptron = MulticlassModel(weights_file_path)
         return cls(model_path, beam_size, perceptron, label_hash)
def parse_corpus(corpus_fname, weights_fname, features_fname):
   fext = moduleloader.load_module(features_fname).FeaturesExtractor()
   m=MulticlassModel(weights_fname)
   parser=Parser(m,fext,None)
   parsed = []
   for sent in io.conll_to_sents(file(corpus_fname)):
      deps = parser.parse(sent)
      sent = deps.annotate(sent)
      parsed.append(sent)
   return parsed
Пример #8
0
class BEParsingModel:
    def __init__(self, model_path):
        with open(model_path, 'r') as f:
            self.beam_size = int(f.readline().strip())
            label_hash_file = f.readline().strip()
            self.label_hash = load_hash_from_file(label_hash_file)
            weights_file_path = f.readline().strip() + ".FINAL"
            self.perceptron = MulticlassModel(weights_file_path)

    def get_scores(self, features):
        return self.perceptron.get_scores(features)
Пример #9
0
def parse(sents, model, iter="FINAL", beam_width=1):
    fext = model.featureExtractor()
    m = MulticlassModel(model.weightsFile(iter))
    # m = MultiClass(model.weightsFile(iter))
    parser = Parser(m, fext, Oracle(), beam_width)
    for sent in sents:
        deps = parser.parse(sent)
        sent = deps.annotate(sent)
        for tok in sent:
            print tok['id'], tok['form'], "_", tok['tag'], tok['tag'], "_", tok['pparent'], "_ _ _"
        print
Пример #10
0
class TestModel(object):
    """
    Model use to test
    """
    def __init__(self, model_dir, beam_size, iter='FINAL'):
        self.feats_extractor = FeaturesExtractor()
        weight_name = 'weight.' + iter
        weight_path = os.path.join(model_dir, weight_name)
        # load already trained model
        self.perceptron = MulticlassModel(weight_path)
        self.beam_size = beam_size

    def featex(self, pending, deps, i):
        return self.feats_extractor.extract(pending, deps, i)

    def get_score(self, features):
        return self.perceptron.get_scores(features)
Пример #11
0
def test(sents, model, iter="FINAL", quiet=False, ignore_punc=False,beam_width=1):
    fext = model.featureExtractor()
    import time
    good = 0.0
    bad = 0.0
    complete = 0.0
    m = MulticlassModel(model.weightsFile(iter))
    # m = MultiClass(model.weightsFile(iter))
    start = time.time()
    parser = Parser(m, fext, Oracle(),beam_width)
    scores = []
    for sent in sents:
        sent_good = 0.0
        sent_bad = 0.0
        no_mistakes = True
        if not quiet:
            print "@@@", good / (good + bad + 1)
        deps = parser.parse(sent)
        sent = deps.annotate(sent)
        for tok in sent:
            if not quiet: print tok['id'], tok['form'], "_", tok['tag'], tok['tag'], "_", tok['pparent'], "_ _ _"
            if ignore_punc and tok['form'][0] in "'`,.-;:!?{}": continue
            if tok['parent'] == tok['pparent']:
                good += 1
                sent_good += 1
            else:
                bad += 1
                sent_bad += 1
                no_mistakes = False
        if not quiet: print
        if no_mistakes: complete += 1
        scores.append((sent_good / (sent_good + sent_bad)))

    if not quiet:
        print "time(seconds):", time.time() - start
        print "num sents:", len(sents)
        print "complete:", complete / len(sents)
        print "macro:", sum(scores) / len(scores)
        print "micro:", good / (good + bad)
    return good / (good + bad), complete / len(sents)