def __init__(self, model_dir, beam_size, iter='FINAL'): self.feats_extractor = FeaturesExtractor() weight_name = 'weight.' + iter weight_path = os.path.join(model_dir, weight_name) # load already trained model self.perceptron = MulticlassModel(weight_path) self.beam_size = beam_size
def __init__(self, model_path): with open(model_path, 'r') as f: self.beam_size = int(f.readline().strip()) label_hash_file = f.readline().strip() self.label_hash = load_hash_from_file(label_hash_file) weights_file_path = f.readline().strip() + ".FINAL" self.perceptron = MulticlassModel(weights_file_path)
def load(cls, fname): model = MulticlassModel(fname) labelmap = [] for i, line in enumerate(file(fname + ".lmap")): label = line.strip() labelmap.append(label) return cls(model, labelmap)
def test(sents, model, iter="FINAL", quiet=False, ignore_punc=False, labeled=True): fext = model.featureExtractor() import time good = 0.0 bad = 0.0 complete = 0.0 if labeled: from ml.sml import SparseMulticlassModel m = SparseMulticlassModel(file(model.weightsFile(iter))) else: m = MulticlassModel(model.weightsFile(iter)) start = time.time() parser = Parser(m, fext, Oracle()) if labeled: parser.id_to_action_mapper = pickle.load( file(model.weightsFile("amap"))) scores = [] for sent in sents: sent_good = 0.0 sent_bad = 0.0 no_mistakes = True if not quiet: print "@@@", good / (good + bad + 1) if labeled: deps = parser.parse_labeled(sent) else: deps = parser.parse(sent) sent = deps.annotate(sent) for tok in sent: if not quiet: if labeled: print tok['id'], tok['form'], "_", tok['tag'], tok[ 'tag'], "_", tok['pparent'], tok['pprel'], "_ _" else: print tok['id'], tok['form'], "_", tok['tag'], tok[ 'tag'], "_", tok['pparent'], "_ _ _" if ignore_punc and tok['form'][0] in "'`,.-;:!?{}": continue if tok['parent'] == tok['pparent']: good += 1 sent_good += 1 else: bad += 1 sent_bad += 1 no_mistakes = False if not quiet: print if no_mistakes: complete += 1 scores.append((sent_good / (sent_good + sent_bad))) if not quiet: print "time(seconds):", time.time() - start print "num sents:", len(sents) print "complete:", complete / len(sents) print "macro:", sum(scores) / len(scores) print "micro:", good / (good + bad) return good / (good + bad), complete / len(sents)
def make_parser(modelfile, iter): weightsFile = "%s.weights" % (modelfile) modelfile = "%s.model" % (modelfile) model = Model.load(modelfile, iter) fext = model.featureExtractor() m = MulticlassModel(model.weightsFile(iter)) parser = Parser(m, fext, Oracle()) return parser
def load(cls, model_path): model_dir, model_file = split_path(model_path) with open(model_path, 'r') as f: beam_size = int(f.readline().strip()) label_hash_file = f.readline().strip() label_hash = load_hash_from_file(label_hash_file) weights_file_path = f.readline().strip() + ".FINAL" perceptron = MulticlassModel(weights_file_path) return cls(model_path, beam_size, perceptron, label_hash)
def parse_corpus(corpus_fname, weights_fname, features_fname): fext = moduleloader.load_module(features_fname).FeaturesExtractor() m=MulticlassModel(weights_fname) parser=Parser(m,fext,None) parsed = [] for sent in io.conll_to_sents(file(corpus_fname)): deps = parser.parse(sent) sent = deps.annotate(sent) parsed.append(sent) return parsed
class BEParsingModel: def __init__(self, model_path): with open(model_path, 'r') as f: self.beam_size = int(f.readline().strip()) label_hash_file = f.readline().strip() self.label_hash = load_hash_from_file(label_hash_file) weights_file_path = f.readline().strip() + ".FINAL" self.perceptron = MulticlassModel(weights_file_path) def get_scores(self, features): return self.perceptron.get_scores(features)
def parse(sents, model, iter="FINAL", beam_width=1): fext = model.featureExtractor() m = MulticlassModel(model.weightsFile(iter)) # m = MultiClass(model.weightsFile(iter)) parser = Parser(m, fext, Oracle(), beam_width) for sent in sents: deps = parser.parse(sent) sent = deps.annotate(sent) for tok in sent: print tok['id'], tok['form'], "_", tok['tag'], tok['tag'], "_", tok['pparent'], "_ _ _" print
class TestModel(object): """ Model use to test """ def __init__(self, model_dir, beam_size, iter='FINAL'): self.feats_extractor = FeaturesExtractor() weight_name = 'weight.' + iter weight_path = os.path.join(model_dir, weight_name) # load already trained model self.perceptron = MulticlassModel(weight_path) self.beam_size = beam_size def featex(self, pending, deps, i): return self.feats_extractor.extract(pending, deps, i) def get_score(self, features): return self.perceptron.get_scores(features)
def test(sents, model, iter="FINAL", quiet=False, ignore_punc=False,beam_width=1): fext = model.featureExtractor() import time good = 0.0 bad = 0.0 complete = 0.0 m = MulticlassModel(model.weightsFile(iter)) # m = MultiClass(model.weightsFile(iter)) start = time.time() parser = Parser(m, fext, Oracle(),beam_width) scores = [] for sent in sents: sent_good = 0.0 sent_bad = 0.0 no_mistakes = True if not quiet: print "@@@", good / (good + bad + 1) deps = parser.parse(sent) sent = deps.annotate(sent) for tok in sent: if not quiet: print tok['id'], tok['form'], "_", tok['tag'], tok['tag'], "_", tok['pparent'], "_ _ _" if ignore_punc and tok['form'][0] in "'`,.-;:!?{}": continue if tok['parent'] == tok['pparent']: good += 1 sent_good += 1 else: bad += 1 sent_bad += 1 no_mistakes = False if not quiet: print if no_mistakes: complete += 1 scores.append((sent_good / (sent_good + sent_bad))) if not quiet: print "time(seconds):", time.time() - start print "num sents:", len(sents) print "complete:", complete / len(sents) print "macro:", sum(scores) / len(scores) print "micro:", good / (good + bad) return good / (good + bad), complete / len(sents)