Exemplo n.º 1
0
 def setup_model(self, corpus_file):
     # index dependency label
     for label in get_label_conll(corpus_file):
         self.label_hash.add(label)
     n_class = len(self.label_hash) * 2
     self.perceptron = MultitronParameters(n_class)
     self.save()
Exemplo n.º 2
0
 def __init__(self, model_dir, beam_size):
     # model_dir to save model
     self.model_dir = model_dir
     # object use to extract feature in pending
     self.feats_extractor = FeaturesExtractor()
     self.beam_size = beam_size
     # if model_dir is not exist, create it
     if not os.path.isdir(self.model_dir):
         os.makedirs(self.model_dir)
     # A linear model classification with 2 class
     # 0: is left
     # 1: is right
     self.perceptron = MultitronParameters(2)
Exemplo n.º 3
0
def train(sents,
          model,
          dev=None,
          ITERS=20,
          save_every=None,
          explore_policy=None,
          shuffle_sents=True):
    fext = model.featureExtractor()
    oracle = CostOracle()
    scorer = MultitronParameters(2)
    parser = Parser(scorer, fext, oracle)
    for ITER in xrange(1, ITERS + 1):
        parser.cumcost = 0
        print "Iteration", ITER, "[",
        if shuffle_sents: random.shuffle(sents)
        for i, sent in enumerate(sents):
            if i % 100 == 0:
                print i, "(%s)" % parser.cumcost,
                sys.stdout.flush()
            parser.train(sent, ITER, explore_policy)
        print "]"
        if save_every and (ITER % save_every == 0):
            print "saving weights at iter", ITER
            parser.scorer.dump_fin(file(model.weightsFile(ITER), "w"))
            if dev:
                print "testing dev"
                print "\nscore: %s" % (test(
                    dev, model, ITER, quiet=True, labeled=False), )
    parser.scorer.dump_fin(file(model.weightsFile("FINAL"), "w"))
class LabelerTrainer:  #{{{
    def __init__(self, labels):
        self._num2label = labels
        self._label2num = dict([(l, n) for n, l in enumerate(labels)])
        self._params = MultitronParameters(len(labels))

    def get_label(self, features, cls, real_label):
        '''
      cls: reserved for future / compatibility
      '''
        real_num = self._label2num[real_label]
        #features = ["%s_%s" % (cls,f) for f in features]
        for x in xrange(10):  # perform up to 10 updates toward real label
            # usually much less should be enough
            pred = self._params.update(real_num, features)
            if pred == real_num: break
        return self._num2label[pred]

    def save(self, fname):
        self._params.dump_fin(file(fname, "w"))
        fout = file(fname + ".lmap", "w")
        for label in self._num2label:
            fout.write("%s\n" % label)
        fout.close()
Exemplo n.º 5
0
class BETrainingModel:
    def __init__(self, model_path='default_model', beam_size=1):
        self.model_path, self.model_name = split_path(model_path)
        self.beam_size = beam_size
        self.perceptron = None
        self.label_hash = HashTable()

    def save(self):
        labels_file = self.create_file_with_extension('label')
        self.label_hash.save_file(labels_file)
        with open(self.create_file_with_extension(), 'w') as model_file:
            model_file.write(str(self.beam_size) + '\n')
            model_file.write(str(labels_file) + '\n')
            model_file.write(
                str(self.create_file_with_extension('weights')) + '\n')

    def setup_model(self, corpus_file):
        # index dependency label
        for label in get_label_conll(corpus_file):
            self.label_hash.add(label)
        n_class = len(self.label_hash) * 2
        self.perceptron = MultitronParameters(n_class)
        self.save()

    def save_weight_file(self, iter):
        weights_file_path = self.create_file_with_extension('weights.' + iter)
        self.perceptron.dump_fin(file(weights_file_path, 'w'))

    def update_perceptron_counter(self):
        self.perceptron.tick()

    def create_file_with_extension(self, ext=''):
        return os.path.join(self.model_path, self.model_name + '.' + ext)

    def get_scores(self, features):
        return self.perceptron.get_scores(features)

    def update_paramaters(self, features, cls, value):
        self.perceptron.add(features, cls, value)

    @classmethod
    def load(cls, model_path):
        model_dir, model_file = split_path(model_path)
        with open(model_path, 'r') as f:
            beam_size = int(f.readline().strip())
            label_hash_file = f.readline().strip()
            label_hash = load_hash_from_file(label_hash_file)
            weights_file_path = f.readline().strip() + ".FINAL"
            perceptron = MulticlassModel(weights_file_path)
            return cls(model_path, beam_size, perceptron, label_hash)
Exemplo n.º 6
0
def train(sents, model, dev=None, ITERS=20, save_every=None, beam_width=1):
    fext = model.featureExtractor()
    oracle = Oracle()
    scorer = MultitronParameters(2)
    # scorer = Perceptron(2,5000)
    parser = Parser(scorer, fext, oracle, beam_width)
    for ITER in xrange(1, ITERS + 1):
        print "Iteration ",ITER,"[",
        for i, sent in enumerate(sents):
            if i % 100 == 0:
                print i
                sys.stdout.flush()
            parser.train(sent)
        print "]"
        if save_every and (ITER % save_every == 0):
            print "saving weights at iter", ITER
            parser.scorer.dump_fin(file(model.weightsFile(ITER), "w"))
            # if dev:
            #     print "testing dev"
            #     print "\nscore: %s" % (test(dev, model, ITER, quiet=True),)
        parser.scorer.dump_fin(file(model.weightsFile("FINAL"), "w"))
Exemplo n.º 7
0
class TrainModel(object):
    """
    A model use in training phrase
    """
    def __init__(self, model_dir, beam_size):
        # model_dir to save model
        self.model_dir = model_dir
        # object use to extract feature in pending
        self.feats_extractor = FeaturesExtractor()
        self.beam_size = beam_size
        # if model_dir is not exist, create it
        if not os.path.isdir(self.model_dir):
            os.makedirs(self.model_dir)
        # A linear model classification with 2 class
        # 0: is left
        # 1: is right
        self.perceptron = MultitronParameters(2)

    def update(self, neg_state, pos_state):
        """
        rewarding features lead to correct action
        file features lead to wrong action
        :param neg_state: 
        :param pos_state: 
        :return: 
        """
        # tick() increase variable store number of update
        self.perceptron.tick()
        # features give correct action
        pos_feats = pos_state['features']
        # right class
        pos_cls = pos_state['cls']
        # update paramater by plus one
        self.perceptron.add(pos_feats, pos_cls, 1)
        # features give wrong action
        neg_feats = neg_state['features']
        # wrong class
        neg_cls = neg_state['cls']
        # update paramaters by minus one
        self.perceptron.add(neg_feats, neg_cls, -1)

    def save(self, iter):
        # save model paramaters
        weight_file = 'weight.%s' % iter
        weight_file_path = os.path.join(self.model_dir, weight_file)
        self.perceptron.dump_fin(file(weight_file_path, 'w'))

    def tick(self):
        self.perceptron.tick()

    def featex(self, pending, deps, i):
        # called by parser object
        return self.feats_extractor.extract(pending, deps, i)

    def get_score(self, features):
        # return a dict of score in aspact of class
        return self.perceptron.get_scores(features)
 def __init__(self, labels):
     self._num2label = labels
     self._label2num = dict([(l, n) for n, l in enumerate(labels)])
     self._params = MultitronParameters(len(labels))