Пример #1
0
 def __init__(self,
              e0: numbers.Real = 0.01,
              mom: numbers.Real = 0.9,
              skip_noisy: bool = False) -> None:
     super().__init__(optimizer=dy.MomentumSGDTrainer(
         ParamManager.global_collection(), e0, mom),
                      skip_noisy=skip_noisy)
Пример #2
0
    def _init_optimizer(self, model, **kwargs):
        mom = float(kwargs.get('mom', 0.0))
        optim = kwargs.get('optim', 'sgd')
        clip = kwargs.get('clip')

        self.current_lr = kwargs.get('eta', kwargs.get('lr', 0.01))
        if optim == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(model.pc)
        elif optim == 'adam':
            self.optimizer = dy.AdamTrainer(model.pc,
                                            alpha=self.current_lr,
                                            beta_1=kwargs.get('beta1', 0.9),
                                            beta_2=kwargs.get('beta2', 0.999),
                                            eps=kwargs.get('epsilon', 1e-8))
        elif optim == 'rmsprop':
            self.optimizer = dy.RMSPropTrainer(model.pc,
                                               learning_rate=self.current_lr)
        else:
            if mom == 0 or mom is None:
                self.optimizer = dy.SimpleSGDTrainer(
                    model.pc, learning_rate=self.current_lr)
            else:
                logging.info('Using mom %f', mom)
                self.optimizer = dy.MomentumSGDTrainer(
                    model.pc, learning_rate=self.current_lr, mom=mom)
        if clip is not None:
            self.optimizer.set_clip_threshold(clip)
        self.optimizer.set_sparse_updates(False)
Пример #3
0
def get_trainer(opt, s2s):
    if opt.trainer == 'sgd':
        trainer = dy.SimpleSGDTrainer(s2s.pc,
                                      e0=opt.learning_rate,
                                      edecay=opt.learning_rate_decay)
    elif opt.trainer == 'clr':
        trainer = dy.CyclicalSGDTrainer(s2s.pc,
                                        e0_min=opt.learning_rate / 10.0,
                                        e0_max=opt.learning_rate,
                                        edecay=opt.learning_rate_decay)
    elif opt.trainer == 'momentum':
        trainer = dy.MomentumSGDTrainer(s2s.pc,
                                        e0=opt.learning_rate,
                                        edecay=opt.learning_rate_decay)
    elif opt.trainer == 'rmsprop':
        trainer = dy.RMSPropTrainer(s2s.pc,
                                    e0=opt.learning_rate,
                                    edecay=opt.learning_rate_decay)
    elif opt.trainer == 'adam':
        trainer = dy.AdamTrainer(s2s.pc,
                                 opt.learning_rate,
                                 edecay=opt.learning_rate_decay)
    else:
        print('Trainer name invalid or not provided, using SGD',
              file=sys.stderr)
        trainer = dy.SimpleSGDTrainer(s2s.pc,
                                      e0=opt.learning_rate,
                                      edecay=opt.learning_rate_decay)

    trainer.set_clip_threshold(opt.gradient_clip)

    return trainer
Пример #4
0
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.001):

        self.mdl = dy.Model()  # Create model
        self.sgd = dy.MomentumSGDTrainer(self.mdl, learning_rate=learning_rate)

        # Constructing weights and biasees
        self.W1 = self.mdl.add_parameters((hidden_dim, input_dim))
        self.hbias = self.mdl.add_parameters((hidden_dim, ))
        self.W2 = self.mdl.add_parameters((output_dim, hidden_dim))
Пример #5
0
 def __init__(self,Cemb,character_idx_map,options):
     model = dy.Model()
     #self.trainer = dy.MomentumSGDTrainer(model,options['lr'],options['momentum'],options['edecay']) # we use Momentum SGD
     self.trainer = dy.MomentumSGDTrainer(model,options['lr'],options['momentum']) # we use Momentum SGD
     self.params = self.initParams(model,Cemb,options)
     self.options = options
     self.model = model
     self.character_idx_map = character_idx_map
     self.known_words = None
    def __init__(self, data, opt):
        self.opt = opt
        self.model = dy.ParameterCollection()
        self.trainer = dy.MomentumSGDTrainer(self.model)
        self.w2i = data.w2i
        self.wdims = opt.embedding_size
        self.ldims = opt.hidden_size
        self.attsize = opt.attention_size

        self.ext_embeddings = data.ext_embeddings
        # Model Parameters
        self.wlookup = self.model.add_lookup_parameters(
            (len(self.w2i), self.wdims))

        self.__load_external_embeddings()

        if self.opt.encoder_dir == "single":
            if self.opt.encoder_type == "lstm":
                self.sentence_rnn = [
                    dy.VanillaLSTMBuilder(1, self.wdims, self.ldims,
                                          self.model)
                ]
            elif self.opt.encoder_type == "gru":
                self.sentence_rnn = [
                    dy.GRUBuilder(1, self.wdims, self.ldims, self.model)
                ]
            self.attention_w = self.model.add_parameters(
                (self.attsize, self.ldims))
            self.attention_b = self.model.add_parameters(self.attsize)
            self.att_context = self.model.add_parameters(self.attsize)
            self.mlp_w = self.model.add_parameters(
                (1, self.ldims + 2 * self.ldims))
            self.mlp_b = self.model.add_parameters(1)
        elif self.opt.encoder_dir == "bidirectional":
            if self.opt.encoder_type == "lstm":
                self.sentence_rnn = [
                    dy.VanillaLSTMBuilder(1, self.wdims, self.ldims,
                                          self.model),
                    dy.VanillaLSTMBuilder(1, self.wdims, self.ldims,
                                          self.model),
                ]
            elif self.opt.encoder_type == "gru":
                self.sentence_rnn = [
                    dy.GRUBuilder(1, self.wdims, self.ldims, self.model),
                    dy.GRUBuilder(1, self.wdims, self.ldims, self.model),
                ]

            self.attention_w = self.model.add_parameters(
                (self.attsize, 2 * self.ldims))
            self.attention_b = self.model.add_parameters(self.attsize)
            self.att_context = self.model.add_parameters(self.attsize)
            self.mlp_w = self.model.add_parameters(
                (1, 2 * self.ldims + 4 * self.ldims))
            self.mlp_b = self.model.add_parameters(1)
Пример #7
0
 def __init__(self, Cemb, character_idx_map, options):
     model = dy.Model()  # Initialize ParameterCollection
     # pre_gt = lr/(1+edecy ** t)
     # gt = pre_gt + momentum * gt-1
     # edecay is not avaiable after dynet 1.0
     self.trainer = dy.MomentumSGDTrainer(
         model, options['lr'], options['momentum'])  # we use Momentum SGD
     self.params = self.initParams(model, Cemb, options)  # Init parameters
     self.options = options
     self.model = model
     self.character_idx_map = character_idx_map
     self.known_words = None
Пример #8
0
    def __init__(self, params, vocab, label2tag, pretrained_embeddings=None):
        """

        :param params:
        :param vocab:
        :param label2tag:
        :param pretrained_embeddings:
        """
        self.dim_w = params.dim_w
        self.win = params.win
        self.vocab = vocab
        self.n_words = len(self.vocab)
        self.dim_asp = params.dim_asp
        self.dim_y_asp = params.n_asp_tags
        self.n_steps = params.n_steps
        self.asp_label2tag = label2tag
        self.dropout_asp = params.dropout_asp
        self.dropout = params.dropout
        self.ds_name = params.ds_name
        self.model_name = params.model_name
        self.attention_type = params.attention_type

        self.pc = dy.ParameterCollection()
        self.Emb = WDEmb(pc=self.pc, n_words=self.n_words, dim_w=self.dim_w,
                         pretrained_embeddings=pretrained_embeddings)
        
        self.DEP_RecNN = DTreeBuilder(pc=self.pc, n_in=self.win * self.dim_w, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        
        self.ASP_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)

        self.BiAttention_F=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        self.BiAttention_B=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        self.BiAttention_T=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)

        self.MultiWeightLayer=MultiWeightLayer(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)

        self.ASP_FC = Linear(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_y_asp)
        
        self.layers = [self.ASP_FC,self.DEP_RecNN,self.BiAttention_F,self.BiAttention_B,self.BiAttention_T,self.MultiWeightLayer]

        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adagrad':
            self.optimizer = dy.AdagradTrainer(self.pc)
        elif params.optimizer == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        else:
            raise Exception("Invalid optimizer!!")
Пример #9
0
 def set_trainer(self, optimization):
     if optimization == 'MomentumSGD':
         self.trainer = dy.MomentumSGDTrainer(
             self.model, learning_rate=self.hp.learning_rate)
     if optimization == 'CyclicalSGD':
         self.trainer = dy.CyclicalSGDTrainer(
             self.model,
             learning_rate_max=self.hp.learning_rate_max,
             learning_rate_min=self.hp.learning_rate_min)
     if optimization == 'Adam':
         self.trainer = dy.AdamTrainer(self.model)
     if optimization == 'RMSProp':
         self.trainer = dy.RMSPropTrainer(self.model)
     else:  # 'SimpleSGD'
         self.trainer = dy.SimpleSGDTrainer(
             self.model, learning_rate=self.hp.learning_rate)
Пример #10
0
 def __init__(self, model, optim='sgd', clip=5, mom=0.9, **kwargs):
     super(ClassifyTrainerDynet, self).__init__()
     self.model = model
     eta = kwargs.get('eta', kwargs.get('lr', 0.01))
     print("Using eta [{:.4f}]".format(eta))
     print("Using optim [{}]".format(optim))
     self.labels = model.labels
     if optim == 'adadelta':
         self.optimizer = dy.AdadeltaTrainer(model.pc)
     elif optim == 'adam':
         self.optimizer = dy.AdamTrainer(model.pc)
     elif optim == 'rmsprop':
         self.optimizer = dy.RMSPropTrainer(model.pc, learning_rate=eta)
     else:
         print("using mom {:.3f}".format(mom))
         self.optimizer = dy.MomentumSGDTrainer(model.pc,
                                                learning_rate=eta,
                                                mom=mom)
     self.optimizer.set_clip_threshold(clip)
Пример #11
0
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.001):
        self._model = dy.ParameterCollection()

        self._input_dim = input_dim
        self._hidden_dim = hidden_dim
        self._output_dim = output_dim

        self._rnn = dy.SimpleRNNBuilder(self.LAYERS, self._input_dim,
                                        self._hidden_dim, self._model)
        # self._rnn.disable_dropout()
        self._W = self._model.add_parameters(
            (self._output_dim, self._hidden_dim), init=dy.NormalInitializer())

        self._learning_rate = learning_rate
        self._trainer = dy.MomentumSGDTrainer(
            self._model, learning_rate=self._learning_rate)

        self._l2_param = 0.0006
        # self._l2_param = 0.0

        self._init_layers()
Пример #12
0
def optimizer(model, optim='sgd', eta=0.01, clip=None, mom=0.9, **kwargs):
    if 'lr' in kwargs:
        eta = kwargs['lr']
    print('Using eta [{:.4f}]'.format(eta))
    print('Using optim [{}]'.format(optim))
    if optim == 'adadelta':
        opt = dy.AdadeltaTrainer(model.pc)
    elif optim == 'adam':
        opt = dy.AdamTrainer(model.pc)
    elif optim == 'rmsprop':
        opt = dy.RMSPropTrainer(model.pc, learning_rate=eta)
    else:
        if mom == 0 or mom is None:
            opt = dy.SimpleSGDTrainer(model.pc, learning_rate=eta)
        else:
            print('Using mom {:.3f}'.format(mom))
            opt = dy.MomentumSGDTrainer(model.pc, learning_rate=eta, mom=mom)
    if clip is not None:
        opt.set_clip_threshold(clip)
    opt.set_sparse_updates(False)
    return opt
Пример #13
0
def train_model_with_config():
    import research_toolbox.tb_logging as tb_lg

    if cfg["optimizer_type"] == "sgd":
        trainer = dy.SimpleSGDTrainer(m, cfg["step_size_start"])
    elif cfg["optimizer_type"] == "adam":
        trainer = dy.AdamTrainer(m, cfg["step_size_start"])
    elif cfg["optimizer_type"] == "sgd_mom":
        trainer = dy.MomentumSGDTrainer(m, cfg["step_size_start"])
    else:
        raise ValueError
    trainer.set_sparse_updates(0)

    # restarting from a checkpoint if it exists.
    # optimizer state is not kept.
    ckpt_filepath = cfg["out_folder"] + "/checkpoint.json"
    if tb_fs.file_exists(ckpt_filepath):
        log_d = tb_io.read_jsonfile(ckpt_filepath)
        current_epoch = len(log_d["dev_acc"])
        best_dev_acc = np.max(log_d["dev_acc"])
        m.populate(cfg["out_folder"] + '/model.ckpt')
    else:
        current_epoch = 0
        best_dev_acc = 0.0

        log_d = {
            'dev_acc': [],
            'avg_loss': [],
            'train_tks/sec': [],
            'eval_tks/sec': [],
            'secs_per_epoch': [],
            "lr": []
        }
        if cfg["debug"] or cfg["compute_train_acc"]:
            log_d["train_acc"] = []

    if cfg["loss_type"] == "log_neighbors":
        loss_fn = loss_log_neighbors
    elif cfg["loss_type"] == "log_beam":
        loss_fn = loss_log_beam
    elif cfg["loss_type"] == "cost_sensitive_margin_last":
        loss_fn = loss_cost_sensitive_margin_last
    elif cfg["loss_type"] == "margin_last":
        loss_fn = loss_margin_last
    elif cfg["loss_type"] == "perceptron_first":
        loss_fn = loss_perceptron_first
    elif cfg["loss_type"] == "perceptron_last":
        loss_fn = loss_perceptron_last
    elif cfg["loss_type"] == "upper_bound":
        loss_fn = loss_upper_bound
    else:
        raise ValueError

    cfg_accuracy = lambda data: beam_accuracy(data, cfg["beam_size"])
    cfg_train_graph = lambda e: train_beam_graph(e, cfg["beam_size"], cfg[
        "traj_type"], loss_fn)

    for epoch in range(current_epoch, cfg["num_epochs"]):
        if cfg["step_size_schedule_type"] == 'fixed':
            lr = cfg["step_size_start"]
        elif cfg["step_size_schedule_type"] == 'cosine':
            lr = cosine_get_lr(cfg["step_size_start"], cfg["step_size_end"],
                               cfg["num_epochs"], epoch)
        else:
            raise ValueError
        log_d['lr'].append(lr)

        trainer.learning_rate = lr

        acc_loss = 0.0
        random.shuffle(train_data)
        epoch_timer = tb_lg.TimeTracker()
        train_timer = tb_lg.TimeTracker()
        for i, e in enumerate(train_data):
            if i % cfg["print_every_num_examples"] == 0 and i > 0:
                print "Epoch %d - Example %d/%d" % (epoch, i, len(train_data))
            loss = cfg_train_graph(e)
            acc_loss += loss.value()
            loss.backward()
            trainer.update()

        log_d["avg_loss"].append(acc_loss / len(train_data))
        log_d["train_tks/sec"].append(num_train_tokens /
                                      train_timer.time_since_start())
        eval_timer = tb_lg.TimeTracker()
        # log_d['train_acc'].append(accuracy(train_data))
        log_d['dev_acc'].append(cfg_accuracy(dev_data))
        # log_d['test_acc'].append(accuracy(test_data))
        log_d['eval_tks/sec'].append((  #len(train_data) +
            num_dev_tokens
            # + num_test_tokens
        ) / eval_timer.time_since_start())
        log_d["secs_per_epoch"].append(epoch_timer.time_since_start())
        if cfg["debug"] or cfg["compute_train_acc"]:
            train_acc = cfg_accuracy(train_data)
            print "train_acc: ", train_acc
            log_d["train_acc"].append(train_acc)
        pprint({k: vs[-1] for k, vs in log_d.iteritems()})

        if best_dev_acc < log_d["dev_acc"][-1]:
            best_dev_acc = log_d["dev_acc"][-1]
            m.save(cfg["out_folder"] + '/best_model.ckpt')
        tb_io.write_jsonfile(log_d, cfg["out_folder"] + "/checkpoint.json")
        m.save(cfg["out_folder"] + '/model.ckpt')

    results_filepath = cfg["out_folder"] + "/results.json"
    if not tb_fs.file_exists(results_filepath):
        m.populate(cfg["out_folder"] + '/best_model.ckpt')
        log_d['test_acc'] = cfg_accuracy(test_data)
        tb_io.write_jsonfile(log_d, cfg["out_folder"] + "/results.json")
Пример #14
0
    model = PyTorchModel()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=LEARNING_RATE,
                                weight_decay=WEIGHT_DECAY)
    rescale_lr = lambda epoch: 1 / (1 + LEARNING_DECAY_RATE * epoch)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                  lr_lambda=rescale_lr)
else:
    model = DyNetModel()
    optimizer = None
    if args.opt == 'sgd':
        optimizer = dy.SimpleSGDTrainer(model.model,
                                        learning_rate=LEARNING_RATE)
    elif args.opt == 'mom':
        optimizer = dy.MomentumSGDTrainer(model.model,
                                          learning_rate=LEARNING_RATE,
                                          mom=MOMENTUM)
    elif args.opt == 'csgd':
        pass
###        optimizer = dy.CyclicalSGDTrainer(model.model, learning_rate=LEARNING_RATE, mom=MOMENTUM) lrmin, lrmax, step, gamma (decay)
    optimizer.set_clip_threshold(args.clip)

prev_best = None
if args.train:
    step = 0
    for epoch in range(EPOCHS):
        random.shuffle(train)

        # Update learning rate
        if PYTORCH:
            scheduler.step()
Пример #15
0
    # the BiLSTM for all the chars, take input of embed dim, and output of the hidden_dim minus the embed_dim because we will concatenate
    # with output from a separate bilstm of just the word
    bilstm = BILSTMTransducer(BILSTM_LAYERS, EMBED_DIM, HIDDEN_DIM, model)

    # a prev-pos lstm. The mlp's will take this as input as well
    prev_pos_lstm = dy.LSTMBuilder(BILSTM_LAYERS, EMBED_DIM, EMBED_DIM, model)

    # now the class mlp, it will take input of 2*HIDDEN_DIM (A concatenate of the before and after the word) + EMBED_DIM from the prev-pos
    # output of 2, unknown\talmud
    class_mlp = MLP(model, "classmlp", 2 * HIDDEN_DIM + EMBED_DIM, HIDDEN_DIM, 2)
    # pos mlp, same input but output the size of pos_vocab
    pos_mlp = MLP(model, 'posmlp', 2 * HIDDEN_DIM + EMBED_DIM, HIDDEN_DIM, pos_vocab.size())

    # the trainer
    trainer = dy.MomentumSGDTrainer(model)

    print "LOADING"
    # if we are loading in a model
    if filename_to_load:
        model.load(filename_to_load)

    print "DONE"

    if train_test:
        run_network_on_validation(START_EPOCH - 1)
        pos_conf_matrix.clear()
        # train!
        for epoch in range(START_EPOCH, 20):
            last_loss, last_pos_prec, last_class_prec, last_rough_pos_prec = 0.0, 0.0, 0.0, 0.0
            total_loss, total_pos_prec, total_class_prec, total_rough_pos_prec = 0.0, 0.0, 0.0, 0.0
Пример #16
0
def train_model(model, encoder, decoder, params, train_inputs, train_outputs,
                dev_inputs, dev_outputs, y2int, int2y, epochs, optimization,
                results_file_path, plot, batch_size, eval_after):
    print 'training...'

    np.random.seed(17)
    random.seed(17)

    # sort training sentences by length in descending order
    train_data = zip(train_inputs, train_outputs)
    train_data.sort(key=lambda t: -len(t[0]))
    train_order = [
        x * batch_size for x in range(len(train_data) / batch_size + 1)
    ]

    # sort dev sentences by length in descending order
    dev_batch_size = 1
    dev_data = zip(dev_inputs, dev_outputs)
    dev_data.sort(key=lambda t: -len(t[0]))
    dev_order = [
        x * dev_batch_size for x in range(len(dev_data) / dev_batch_size + 1)
    ]

    if optimization == 'ADAM':
        trainer = dn.AdamTrainer(
            model
        )  # lam=REGULARIZATION, alpha=LEARNING_RATE, beta_1=0.9, beta_2=0.999, eps=1e-8)
    elif optimization == 'MOMENTUM':
        trainer = dn.MomentumSGDTrainer(model)
    elif optimization == 'SGD':
        trainer = dn.SimpleSGDTrainer(model)
    elif optimization == 'ADAGRAD':
        trainer = dn.AdagradTrainer(model)
    elif optimization == 'ADADELTA':
        trainer = dn.AdadeltaTrainer(model)
    else:
        trainer = dn.SimpleSGDTrainer(model)

    trainer.set_clip_threshold(float(arguments['--grad-clip']))
    seen_examples_count = 0
    total_loss = 0
    best_dev_epoch = 0
    best_train_epoch = 0
    patience = 0
    train_len = len(train_outputs)
    dev_len = len(dev_inputs)
    avg_train_loss = -1
    train_loss_patience = 0
    train_loss_patience_threshold = 99999999
    max_patience = int(arguments['--max-patience'])
    log_path = results_file_path + '_log.txt'
    start_epoch, checkpoints_x, train_loss_y, dev_loss_y, dev_accuracy_y = read_from_log(
        log_path)

    if len(train_loss_y) > 0:
        total_batches = checkpoints_x[-1]
        best_avg_train_loss = max(train_loss_y)
        best_dev_accuracy = max(dev_accuracy_y)
        best_dev_loss = max(dev_loss_y)
    else:
        total_batches = 0
        best_avg_train_loss = 999999
        best_dev_loss = 999999
        best_dev_accuracy = 0

    # progress bar init
    # noinspection PyArgumentList
    widgets = [progressbar.Bar('>'), ' ', progressbar.ETA()]
    train_progress_bar = progressbar.ProgressBar(widgets=widgets,
                                                 maxval=epochs).start()

    for e in xrange(start_epoch, epochs):

        # shuffle the batch start indices in each epoch
        random.shuffle(train_order)
        batches_per_epoch = len(train_order)
        start = time.time()

        # go through batches
        for i, batch_start_index in enumerate(train_order, start=1):
            total_batches += 1

            # get batch examples
            batch_inputs = [
                x[0] for x in train_data[batch_start_index:batch_start_index +
                                         batch_size]
            ]
            batch_outputs = [
                x[1] for x in train_data[batch_start_index:batch_start_index +
                                         batch_size]
            ]
            actual_batch_size = len(batch_inputs)

            # skip empty batches
            if actual_batch_size == 0 or len(batch_inputs[0]) == 0:
                continue

            # compute batch loss
            loss = compute_batch_loss(encoder, decoder, batch_inputs,
                                      batch_outputs, y2int)

            # forward pass
            total_loss += loss.scalar_value()
            loss.backward()

            # update parameters
            trainer.update()

            seen_examples_count += actual_batch_size

            # avg loss per sample
            avg_train_loss = total_loss / float(i * batch_size + e * train_len)

            # start patience counts only after 20 batches
            if avg_train_loss < best_avg_train_loss and total_batches > 20:
                best_avg_train_loss = avg_train_loss
                train_loss_patience = 0
            else:
                train_loss_patience += 1
                if train_loss_patience > train_loss_patience_threshold:
                    print 'train loss patience exceeded: {}'.format(
                        train_loss_patience)
                    return model, params, e, best_train_epoch

            if total_batches % 100 == 0 and total_batches > 0:
                print 'epoch {}: {} batches out of {} ({} examples out of {}) total: {} batches, {} examples. avg \
loss per example: {}'.format(e, i, batches_per_epoch, i * batch_size,
                             train_len, total_batches,
                             total_batches * batch_size, avg_train_loss)

                # print sentences per second
                end = time.time()
                elapsed_seconds = end - start
                print '{} sentences per second'.format(seen_examples_count /
                                                       elapsed_seconds)
                seen_examples_count = 0
                start = time.time()

            # checkpoint
            if total_batches % eval_after == 0:

                print 'starting checkpoint evaluation'
                dev_bleu, dev_loss = checkpoint_eval(
                    encoder,
                    decoder,
                    params,
                    dev_batch_size,
                    dev_data,
                    dev_inputs,
                    dev_len,
                    dev_order,
                    dev_outputs,
                    int2y,
                    y2int,
                    results_file_path=results_file_path)

                log_to_file(log_path, e, total_batches, avg_train_loss,
                            dev_loss, dev_bleu)
                save_model(model,
                           results_file_path,
                           total_batches,
                           models_to_save=int(arguments['--models-to-save']))
                if dev_bleu >= best_dev_accuracy:
                    best_dev_accuracy = dev_bleu
                    best_dev_epoch = e

                    # save best model to disk
                    save_best_model(model, results_file_path)
                    print 'saved new best model'
                    patience = 0
                else:
                    patience += 1

                if dev_loss < best_dev_loss:
                    best_dev_loss = dev_loss

                print 'epoch: {0} train loss: {1:.4f} dev loss: {2:.4f} dev bleu: {3:.4f} \
best dev bleu {4:.4f} (epoch {5}) patience = {6}'.format(
                    e, avg_train_loss, dev_loss, dev_bleu, best_dev_accuracy,
                    best_dev_epoch, patience)

                if patience == max_patience:
                    print 'out of patience after {0} checkpoints'.format(
                        str(e))
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    print 'checkpoint patience exceeded'
                    return model, params, e, best_train_epoch

                # plotting results from checkpoint evaluation
                if plot:
                    train_loss_y.append(avg_train_loss)
                    checkpoints_x.append(total_batches)
                    dev_accuracy_y.append(dev_bleu)
                    dev_loss_y.append(dev_loss)

                    y_vals = [('train_loss', train_loss_y),
                              ('dev loss', dev_loss_y),
                              ('dev_bleu', dev_accuracy_y)]
                    common.plot_to_file(y_vals,
                                        x_name='total batches',
                                        x_vals=checkpoints_x,
                                        file_path=results_file_path +
                                        '_learning_curve.png')

        # update progress bar after completing epoch
        train_progress_bar.update(e)

    # update progress bar after completing training
    train_progress_bar.finish()

    if plot:
        # clear plot when done
        plt.cla()

    print 'finished training. average loss: {} best epoch on dev: {} best epoch on train: {}'.format(
        str(avg_train_loss), best_dev_epoch, best_train_epoch)

    return model, params, e, best_train_epoch
Пример #17
0
    def train(self, training_instances, dev_instances=None, num_epochs=60, batch_size=20, learning_rate=0.01,
              learning_rate_decay=0.9, dropout=0.2, clip_norm=None, save_path=None, logger=None, debug=False):
        if dev_instances is None:
            dev_instances = training_instances[:int(len(training_instances) * 0.1)]
        if type(training_instances) is TSVCorpus:
            training_instances = training_instances.sentences
        if type(dev_instances) is TSVCorpus:
            dev_instances = dev_instances.sentences
        if debug:
            training_instances = training_instances[:200]
            dev_instances = dev_instances[:100]
            num_epochs = 2
        trainer = dy.MomentumSGDTrainer(self.model, learning_rate, 0.9)
        if clip_norm > 0:
            trainer.set_clip_threshold(clip_norm)
        if logger:
            # logger.info("Training Algorithm: {}".format(type(trainer)))
            logger.info("# training instances: {}".format(len(training_instances)))
            logger.info("# dev instances: {}".format(len(dev_instances)))
        training_total_tokens = 0
        best_f1 = 0.
        for epoch in range(num_epochs):
            if logger:
                logger.info("Epoch {} out of {}".format(epoch + 1, num_epochs))
            random.shuffle(training_instances)
            train_loss = 0.0
            train_total_instance = 0  # size of trained instances

            if dropout > 0:
                self.set_dropout(dropout)

            nbatches = (len(training_instances) + batch_size - 1) // batch_size
            bar = utils.Progbar(target=nbatches)
            for batch_id, batch in enumerate(utils.minibatches(training_instances, batch_size)):
                for instance in batch:
                    train_total_instance += 1
                    loss_expr = self.neg_log_loss(instance.words, instance.tags)
                    # Forward pass
                    loss = loss_expr.scalar_value()
                    # Backward pass
                    loss_expr.backward()

                    # Bail if loss is NaN
                    if math.isnan(loss):
                        assert False, "NaN occured"

                    train_loss += loss
                    training_total_tokens += len(instance.words)

                trainer.update()
                if batch_size == 1 and batch_id % 10 != 0 and batch_id + 1 != train_total_instance:
                    # online learning, don't print too often
                    continue
                bar.update(batch_id + 1, exact=[("train loss", train_loss / train_total_instance)])

            trainer.learning_rate *= learning_rate_decay
            f1 = self.evaluate(dev_instances)[-1]
            if f1 > best_f1:
                best_f1 = f1
                if logger:
                    logger.info('%.2f%% - new best dev score' % f1)
                if save_path:
                    self.save(save_path)
            else:
                if logger:
                    logger.info('%.2f%%' % f1)
Пример #18
0
    parser.add_argument('--evec', type=int)
    group.add_argument('--save-model', dest='save_model')
    group.add_argument('--load-model', dest='load_model')
    args = parser.parse_args()
    np.random.seed(args.seed)
    random.seed(args.seed)

    meta = Meta()
    if args.dev:
        dev = read(args.dev)
    if not args.load_model:
        train = read(args.train)
        wvm = Word2Vec.load_word2vec_format(args.embd, binary=args.evec)
        meta.w_dim = wvm.syn0.shape[1]
        meta.n_words = wvm.syn0.shape[0] + meta.add_words

        get_cc(train)
        meta.w2i = {}
        for w in wvm.vocab:
            meta.w2i[w] = wvm.vocab[w].index + meta.add_words

    if args.save_model:
        pickle.dump(meta, open('%s.meta' % args.save_model, 'wb'))
    if args.load_model:
        tagger = Tagger(model=args.load_model)
        eval(dev)
    else:
        tagger = Tagger(meta=meta)
        trainer = dy.MomentumSGDTrainer(tagger.model)
        train_tagger(train)
Пример #19
0
 def __init__(self, exp_global=Ref(Path("exp_global")), e0=0.01, mom=0.9):
     self.optimizer = dy.MomentumSGDTrainer(
         exp_global.dynet_param_collection.param_col, e0, mom)
Пример #20
0
    def __init__(self,
                 word_count,
                 tag_count,
                 word_dims,
                 tag_dims,
                 lstm_units,
                 hidden_units,
                 struct_out,
                 label_out,
                 droprate=0,
                 struct_spans=4,
                 label_spans=3,
                 optimizer=1):

        self.word_count = word_count
        self.tag_count = tag_count
        self.word_dims = word_dims
        self.tag_dims = tag_dims
        self.lstm_units = lstm_units
        self.hidden_units = hidden_units
        self.struct_out = struct_out
        self.label_out = label_out

        self.droprate = droprate

        self.model = dynet.Model()

        if optimizer == 1:
            self.trainer = dynet.SimpleSGDTrainer(self.model)
        elif optimizer == 2:
            self.trainer = dynet.MomentumSGDTrainer(self.model)
        elif optimizer == 3:
            self.trainer = dynet.AdagradTrainer(self.model,
                                                learning_rate=0.01,
                                                eps=0.001)
        elif optimizer == 4:
            self.trainer = dynet.RMSPropTrainer(self.model)
        elif optimizer == 5:
            self.trainer = dynet.AdamTrainer(self.model)
        random.seed(1)

        self.activation = dynet.rectify

        self.word_embed = self.model.add_lookup_parameters(
            (word_count, word_dims), )
        self.tag_embed = self.model.add_lookup_parameters(
            (tag_count, tag_dims), )

        self.fwd_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model)
        self.back_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model)

        self.fwd_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model)
        self.back_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model)

        self.struct_hidden_W = self.model.add_parameters(
            (hidden_units, 4 * struct_spans * lstm_units),
            dynet.UniformInitializer(0.01),
        )
        self.struct_hidden_b = self.model.add_parameters(
            (hidden_units, ),
            dynet.ConstInitializer(0),
        )
        self.struct_output_W = self.model.add_parameters(
            (struct_out, hidden_units),
            dynet.ConstInitializer(0),
        )
        self.struct_output_b = self.model.add_parameters(
            (struct_out, ),
            dynet.ConstInitializer(0),
        )

        self.label_hidden_W = self.model.add_parameters(
            (hidden_units, 4 * label_spans * lstm_units),
            dynet.UniformInitializer(0.01),
        )
        self.label_hidden_b = self.model.add_parameters(
            (hidden_units, ),
            dynet.ConstInitializer(0),
        )
        self.label_output_W = self.model.add_parameters(
            (label_out, hidden_units),
            dynet.ConstInitializer(0),
        )
        self.label_output_b = self.model.add_parameters(
            (label_out, ),
            dynet.ConstInitializer(0),
        )
Пример #21
0
    out_size = 2

    with open(input_vocab, 'r') as f:
        vocab = f.readlines()
        vocab = map(lambda s: s.strip(), vocab)
    vocab_size = len(vocab)
    adv_net = AdvNN(hid_size,
                    hid_size,
                    out_size,
                    hid_size,
                    adv_hid_size,
                    out_size,
                    num_adv,
                    vocab_size,
                    dropout,
                    lstm_size,
                    adv_depth,
                    rnn_dropout=rnn_dropout,
                    rnn_type=rnn_type)

    trainer = dy.MomentumSGDTrainer(adv_net._model)
    batch = 32
    if ro == str(-1):
        logger.debug('1 task')
        train_task(adv_net, x_train, x_test, trainer, num_epoch, batch,
                   task_type, logger)
    else:
        logger.debug('2 tasks')
        train(adv_net, x_train, x_test, trainer, num_epoch, batch, vec_dropout,
              logger)
def train_model(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, train_lemmas, train_feat_dicts, train_words, dev_lemmas,
                dev_feat_dicts, dev_words, alphabet_index, inverse_alphabet_index, epochs, optimization,
                results_file_path, train_aligned_pairs, dev_aligned_pairs, feat_index, feature_types,
                plot):
    print 'training...'

    np.random.seed(17)
    random.seed(17)

    if optimization == 'ADAM':
        trainer = pc.AdamTrainer(model, lam=REGULARIZATION, alpha=LEARNING_RATE, beta_1=0.9, beta_2=0.999, eps=1e-8)
    elif optimization == 'MOMENTUM':
        trainer = pc.MomentumSGDTrainer(model)
    elif optimization == 'SGD':
        trainer = pc.SimpleSGDTrainer(model)
    elif optimization == 'ADAGRAD':
        trainer = pc.AdagradTrainer(model)
    elif optimization == 'ADADELTA':
        trainer = pc.AdadeltaTrainer(model)
    else:
        trainer = pc.SimpleSGDTrainer(model)

    total_loss = 0
    best_avg_dev_loss = 999
    best_dev_accuracy = -1
    best_train_accuracy = -1
    patience = 0
    train_len = len(train_words)
    sanity_set_size = 100
    epochs_x = []
    train_loss_y = []
    dev_loss_y = []
    train_accuracy_y = []
    dev_accuracy_y = []
    e = -1

    # progress bar init
    widgets = [progressbar.Bar('>'), ' ', progressbar.ETA()]
    train_progress_bar = progressbar.ProgressBar(widgets=widgets, maxval=epochs).start()
    avg_loss = -1

    for e in xrange(epochs):

        # randomize the training set
        indices = range(train_len)
        random.shuffle(indices)
        train_set = zip(train_lemmas, train_feat_dicts, train_words, train_aligned_pairs)
        train_set = [train_set[i] for i in indices]

        # compute loss for each example and update
        for i, example in enumerate(train_set):
            lemma, feats, word, alignment = example
            loss = one_word_loss(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word,
                                 alphabet_index, alignment, feat_index, feature_types)
            loss_value = loss.value()
            total_loss += loss_value
            loss.backward()
            trainer.update()
            if i > 0:
                avg_loss = total_loss / float(i + e * train_len)
            else:
                avg_loss = total_loss

        if EARLY_STOPPING:

            # get train accuracy
            print 'evaluating on train...'
            train_predictions = predict_sequences(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, alphabet_index,
                                                  inverse_alphabet_index, train_lemmas[:sanity_set_size],
                                                  train_feat_dicts[:sanity_set_size],
                                                  feat_index,
                                                  feature_types)

            train_accuracy = evaluate_model(train_predictions, train_lemmas[:sanity_set_size],
                                            train_feat_dicts[:sanity_set_size],
                                            train_words[:sanity_set_size],
                                            feature_types, print_results=False)[1]

            if train_accuracy > best_train_accuracy:
                best_train_accuracy = train_accuracy

            dev_accuracy = 0
            avg_dev_loss = 0

            if len(dev_lemmas) > 0:

                # get dev accuracy
                dev_predictions = predict_sequences(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, alphabet_index,
                                                    inverse_alphabet_index, dev_lemmas, dev_feat_dicts, feat_index,
                                                    feature_types)
                print 'evaluating on dev...'
                # get dev accuracy
                dev_accuracy = evaluate_model(dev_predictions, dev_lemmas, dev_feat_dicts, dev_words, feature_types,
                                              print_results=True)[1]

                if dev_accuracy > best_dev_accuracy:
                    best_dev_accuracy = dev_accuracy

                    # save best model to disk
                    save_pycnn_model(model, results_file_path)
                    print 'saved new best model'
                    patience = 0
                else:
                    patience += 1

                # found "perfect" model
                if dev_accuracy == 1:
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e

                # get dev loss
                total_dev_loss = 0
                for i in xrange(len(dev_lemmas)):
                    total_dev_loss += one_word_loss(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, dev_lemmas[i],
                                                    dev_feat_dicts[i], dev_words[i], alphabet_index,
                                                    dev_aligned_pairs[i], feat_index, feature_types).value()

                avg_dev_loss = total_dev_loss / float(len(dev_lemmas))
                if avg_dev_loss < best_avg_dev_loss:
                    best_avg_dev_loss = avg_dev_loss

                print 'epoch: {0} train loss: {1:.4f} dev loss: {2:.4f} dev accuracy: {3:.4f} train accuracy = {4:.4f} \
 best dev accuracy {5:.4f} best train accuracy: {6:.4f} patience = {7}'.format(e, avg_loss, avg_dev_loss, dev_accuracy,
                                                                               train_accuracy, best_dev_accuracy,
                                                                               best_train_accuracy, patience)

                log_to_file(results_file_path + '_log.txt', e, avg_loss, train_accuracy, dev_accuracy)

                if patience == MAX_PATIENCE:
                    print 'out of patience after {0} epochs'.format(str(e))
                    # TODO: would like to return best model but pycnn has a bug with save and load. Maybe copy via code?
                    # return best_model[0]
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e
            else:

                # if no dev set is present, optimize on train set
                print 'no dev set for early stopping, running all epochs until perfectly fitting or patience was \
                reached on the train set'

                if train_accuracy > best_train_accuracy:
                    best_train_accuracy = train_accuracy

                    # save best model to disk
                    save_pycnn_model(model, results_file_path)
                    print 'saved new best model'
                    patience = 0
                else:
                    patience += 1

                print 'epoch: {0} train loss: {1:.4f} train accuracy = {2:.4f} best train accuracy: {3:.4f} \
                patience = {4}'.format(e, avg_loss, train_accuracy, best_train_accuracy, patience)

                # found "perfect" model on train set or patience has reached
                if train_accuracy == 1 or patience == MAX_PATIENCE:
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e

            # update lists for plotting
            train_accuracy_y.append(train_accuracy)
            epochs_x.append(e)
            train_loss_y.append(avg_loss)
            dev_loss_y.append(avg_dev_loss)
            dev_accuracy_y.append(dev_accuracy)

        # finished epoch
        train_progress_bar.update(e)
        if plot:
            with plt.style.context('fivethirtyeight'):
                p1, = plt.plot(epochs_x, dev_loss_y, label='dev loss')
                p2, = plt.plot(epochs_x, train_loss_y, label='train loss')
                p3, = plt.plot(epochs_x, dev_accuracy_y, label='dev acc.')
                p4, = plt.plot(epochs_x, train_accuracy_y, label='train acc.')
                plt.legend(loc='upper left', handles=[p1, p2, p3, p4])
            plt.savefig(results_file_path + '.png')
    train_progress_bar.finish()
    if plot:
        plt.cla()
    print 'finished training. average loss: ' + str(avg_loss)
    return model, e
Пример #23
0
    def __init__(self, params, vocab, embeddings, char_embeddings):
        """

        :param params:
        :param vocab:
        :param embeddings:
        :param char_embeddings:
        """
        self.params = params
        self.name = 'lstm_cascade'
        self.dim_char = params.dim_char
        self.dim_w = params.dim_w
        self.dim_char_h = params.dim_char_h
        self.dim_ote_h = params.dim_ote_h
        self.dim_ts_h = params.dim_ts_h
        self.input_win = params.input_win
        self.ds_name = params.ds_name
        # tag vocabulary of opinion target extraction and targeted sentiment
        self.ote_tag_vocab = params.ote_tag_vocab
        self.ts_tag_vocab = params.ts_tag_vocab
        self.dim_ote_y = len(self.ote_tag_vocab)
        self.dim_ts_y = len(self.ts_tag_vocab)
        self.n_epoch = params.n_epoch
        self.dropout_rate = params.dropout
        self.tagging_schema = params.tagging_schema
        self.clip_grad = params.clip_grad
        self.use_char = params.use_char
        # name of word embeddings
        self.emb_name = params.emb_name
        self.embeddings = embeddings
        self.vocab = vocab
        # character vocabulary
        self.char_vocab = params.char_vocab
        #self.td_proportions = params.td_proportions
        self.epsilon = params.epsilon
        #self.tc_proportions = params.tc_proportions
        self.pc = dy.ParameterCollection()

        if self.use_char:
            self.char_emb = CharEmb(pc=self.pc,
                                    n_chars=len(self.char_vocab),
                                    dim_char=self.dim_char,
                                    pretrained_embeddings=char_embeddings)
            self.lstm_char = dy.LSTMBuilder(1, self.dim_char, self.dim_char_h,
                                            self.pc)
            dim_input = self.input_win * self.dim_w + 2 * self.dim_char_h
        else:
            dim_input = self.input_win * self.dim_w
        # word embedding layer
        self.emb = WDEmb(pc=self.pc,
                         n_words=len(vocab),
                         dim_w=self.dim_w,
                         pretrained_embeddings=embeddings)

        # lstm layers
        self.lstm_ote = dy.LSTMBuilder(1, dim_input, self.dim_ote_h, self.pc)
        self.lstm_ts = dy.LSTMBuilder(1, 2 * self.dim_ote_h, self.dim_ts_h,
                                      self.pc)

        # fully connected layer
        self.fc_ote = Linear(pc=self.pc,
                             n_in=2 * self.dim_ote_h,
                             n_out=self.dim_ote_y)
        self.fc_ts = Linear(pc=self.pc,
                            n_in=2 * self.dim_ts_h,
                            n_out=self.dim_ts_y)

        assert self.tagging_schema == 'BIEOS'
        transition_path = {
            'B': ['B-POS', 'B-NEG', 'B-NEU'],
            'I': ['I-POS', 'I-NEG', 'I-NEU'],
            'E': ['E-POS', 'E-NEG', 'E-NEU'],
            'S': ['S-POS', 'S-NEG', 'S-NEU'],
            'O': ['O']
        }
        self.transition_scores = np.zeros((self.dim_ote_y, self.dim_ts_y))
        for t in transition_path:
            next_tags = transition_path[t]
            n_next_tag = len(next_tags)
            ote_id = self.ote_tag_vocab[t]
            for nt in next_tags:
                ts_id = self.ts_tag_vocab[nt]
                self.transition_scores[ote_id][ts_id] = 1.0 / n_next_tag
        print(self.transition_scores)
        self.transition_scores = np.array(self.transition_scores,
                                          dtype='float32').transpose()

        # opinion target-opinion words co-occurrence modeling
        self.stm_lm = Linear(pc=self.pc,
                             n_in=2 * self.dim_ote_h,
                             n_out=2 * self.dim_ote_h,
                             nonlinear='tanh')
        # fully connected layer for opinion-enhanced indicator prediction task
        self.fc_stm = Linear(pc=self.pc, n_in=2 * self.dim_ote_h, n_out=2)

        # gate for maintaining sentiment consistency
        self.W_gate = self.pc.add_parameters(
            (2 * self.dim_ote_h, 2 * self.dim_ote_h),
            init=dy.UniformInitializer(0.2))

        # determine the optimizer
        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        else:
            raise Exception("Unsupported optimizer type: %s" %
                            params.optimizer)
Пример #24
0
def train(opt):
    # Load data =========================================================
    if opt.verbose:
        print('Reading corpora')
    # Read vocabs
    if opt.dic_src:
        widss, ids2ws = data.load_dic(opt.dic_src)
    else:
        widss, ids2ws = data.read_dic(opt.train_src, max_size=opt.src_vocab_size)
        data.save_dic(opt.exp_name + '_src_dic.txt', widss)

    if opt.dic_dst:
        widst, ids2wt = data.load_dic(opt.dic_dst)
    else:
        widst, ids2wt = data.read_dic(opt.train_dst, max_size=opt.trg_vocab_size)
        data.save_dic(opt.exp_name + '_trg_dic.txt', widst)

    # Read training
    trainings_data = data.read_corpus(opt.train_src, widss)
    trainingt_data = data.read_corpus(opt.train_dst, widst)
    # Read validation
    valids_data = data.read_corpus(opt.valid_src, widss)
    validt_data = data.read_corpus(opt.valid_dst, widst)

    # Create model ======================================================
    if opt.verbose:
        print('Creating model')
        sys.stdout.flush()
    s2s = seq2seq.Seq2SeqModel(opt.emb_dim,
                               opt.hidden_dim,
                               opt.att_dim,
                               widss,
                               widst,
                               model_file=opt.model,
                               bidir=opt.bidir,
                               word_emb=opt.word_emb,
                               dropout=opt.dropout_rate,
                               max_len=opt.max_len)

    if s2s.model_file is not None:
        s2s.load()
    s2s.model_file = opt.exp_name+'_model.txt'
    # Trainer ==========================================================
    if opt.trainer == 'sgd':
        trainer = dy.SimpleSGDTrainer(
            s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay)
    if opt.trainer == 'clr':
        trainer = dy.CyclicalSGDTrainer(s2s.model, e0_min=opt.learning_rate / 10,
                                        e0_max=opt.learning_rate, edecay=opt.learning_rate_decay)
    elif opt.trainer == 'momentum':
        trainer = dy.MomentumSGDTrainer(
            s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay)
    elif opt.trainer == 'rmsprop':
        trainer = dy.RMSPropTrainer(s2s.model, e0=opt.learning_rate,
                                    edecay=opt.learning_rate_decay)
    elif opt.trainer == 'adam':
        trainer = dy.AdamTrainer(s2s.model, opt.learning_rate, edecay=opt.learning_rate_decay)
    else:
        print('Trainer name invalid or not provided, using SGD', file=sys.stderr)
        trainer = dy.SimpleSGDTrainer(
            s2s.model, e0=opt.learning_rate, edecay=opt.learning_rate_decay)
    if opt.verbose:
        print('Using '+opt.trainer+' optimizer')
    trainer.set_clip_threshold(opt.gradient_clip)
    # Print configuration ===============================================
    if opt.verbose:
        options.print_config(opt, src_dict_size=len(widss), trg_dict_size=len(widst))
        sys.stdout.flush()
    # Creat batch loaders ===============================================
    if opt.verbose:
        print('Creating batch loaders')
        sys.stdout.flush()
    trainbatchloader = data.BatchLoader(trainings_data, trainingt_data, opt.batch_size)
    devbatchloader = data.BatchLoader(valids_data, validt_data, opt.dev_batch_size)
    # Start training ====================================================
    if opt.verbose:
        print('starting training')
        sys.stdout.flush()
    start = time.time()
    train_loss = 0
    processed = 0
    best_bleu = 0
    i = 0
    for epoch in range(opt.num_epochs):
        for x, y in trainbatchloader:
            processed += sum(map(len, y))
            bsize = len(y)
            # Compute loss
            loss = s2s.calculate_loss(x, y)
            # Backward pass and parameter update
            loss.backward()
            trainer.update()
            train_loss += loss.scalar_value() * bsize
            if (i+1) % opt.check_train_error_every == 0:
                # Check average training error from time to time
                logloss = train_loss / processed
                ppl = np.exp(logloss)
                elapsed = time.time()-start
                trainer.status()
                print(" Training_loss=%f, ppl=%f, time=%f s, tokens processed=%d" %
                      (logloss, ppl, elapsed, processed))
                start = time.time()
                train_loss = 0
                processed = 0
                sys.stdout.flush()
            if (i+1) % opt.check_valid_error_every == 0:
                # Check generalization error on the validation set from time to time
                dev_loss = 0
                dev_processed = 0
                dev_start = time.time()
                for x, y in devbatchloader:
                    dev_processed += sum(map(len, y))
                    bsize = len(y)
                    loss = s2s.calculate_loss(x, y, test=True)
                    dev_loss += loss.scalar_value() * bsize
                dev_logloss = dev_loss/dev_processed
                dev_ppl = np.exp(dev_logloss)
                dev_elapsed = time.time()-dev_start
                print("[epoch %d] Dev loss=%f, ppl=%f, time=%f s, tokens processed=%d" %
                      (epoch, dev_logloss, dev_ppl, dev_elapsed, dev_processed))
                sys.stdout.flush()
                start = time.time()

            if (i+1) % opt.valid_bleu_every == 0:
                # Check BLEU score on the validation set from time to time
                print('Start translating validation set, buckle up!')
                sys.stdout.flush()
                bleu_start = time.time()
                with open(opt.valid_out, 'w+') as f:
                    for x in valids_data:
                        y_hat = s2s.translate(x, beam_size=opt.beam_size)
                        translation = [ids2wt[w] for w in y_hat[1:-1]]
                        print(' '.join(translation), file=f)
                bleu, details = evaluation.bleu_score(opt.valid_dst, opt.valid_out)
                bleu_elapsed = time.time()-bleu_start
                print('Finished translating validation set', bleu_elapsed, 'elapsed.')
                print(details)
                # Early stopping : save the latest best model
                if bleu > best_bleu:
                    best_bleu = bleu
                    print('Best BLEU score up to date, saving model to', s2s.model_file)
                    s2s.save()
                sys.stdout.flush()
                start = time.time()
            i = i+1
        trainer.update_epoch()
Пример #25
0
    def __init__(self, params, vocab, embeddings):
        """

        :param params: parameters
        :param vocab: vocabulary
        :param embeddings: pretrained word embeddings
        """
        self.params = params
        self.name = 'lstm_crf'
        self.dim_char = params.dim_char
        self.dim_w = params.dim_w
        self.dim_char_h = params.dim_char_h
        self.dim_ote_h = params.dim_ote_h
        self.dim_ts_h = params.dim_ts_h
        self.input_win = params.input_win
        self.ds_name = params.ds_name
        # tag vocabulary of opinion target extraction and targeted sentiment
        self.ote_tag_vocab = params.ote_tag_vocab
        self.ts_tag_vocab = params.ts_tag_vocab
        self.dim_ote_y = len(self.ote_tag_vocab)
        self.dim_ts_y = len(self.ts_tag_vocab)
        self.n_epoch = params.n_epoch
        self.dropout_rate = params.dropout
        self.tagging_schema = params.tagging_schema
        self.clip_grad = params.clip_grad
        self.use_char = params.use_char
        # name of word embeddings
        self.emb_name = params.emb_name
        self.embeddings = embeddings
        self.vocab = vocab
        # character vocabulary
        self.char_vocab = params.char_vocab
        self.pc = dy.ParameterCollection()

        # word embedding layer
        self.emb = WDEmb(pc=self.pc,
                         n_words=len(vocab),
                         dim_w=self.dim_w,
                         pretrained_embeddings=embeddings)

        # input dimension
        dim_input = self.input_win * self.dim_w

        self.lstm_ts = dy.LSTMBuilder(1, dim_input, self.dim_ts_h, self.pc)

        # hidden layer between LSTM and CRF decoding layer
        self.hidden = Linear(pc=self.pc,
                             n_in=2 * self.dim_ts_h,
                             n_out=self.dim_ts_h,
                             use_bias=True,
                             nonlinear='tanh')
        # map the word representation to the ts label space
        # in the label space, both BEG and END tag are considered
        self.fc_ts = Linear(pc=self.pc,
                            n_in=self.dim_ts_h,
                            n_out=self.dim_ts_y)

        # transition matrix, [i, j] is the transition score from tag i to tag j
        self.transitions = self.pc.add_lookup_parameters(
            (self.dim_ts_y + 2, self.dim_ts_y + 2))

        # determine the optimizer
        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        else:
            raise Exception("Unsupported optimizer type: %s" %
                            params.optimizer)
Пример #26
0
 def __init__(self, yaml_context, e0=0.01, mom=0.9):
     self.optimizer = dy.MomentumSGDTrainer(
         yaml_context.dynet_param_collection.param_col, e0, mom)
Пример #27
0
    def finetune(self, best_scores):
        # freeze all encoders
        self.encoders['feat'].set_freeze(True)

        for task in self.args.tasks:
            # self.decoders[task].tree_encoder.set_freeze(True)

            # restart the trainer to clear the momentum from the previous training
            self.trainer = dy.AdamTrainer(self.model)
            # load the best model from the previous finetuning
            self.load_model()
            self.log(f'Start finetuning {task}')
            switch_trainer = (
                len(self.args.tasks) == 1
            )  # directly change to SGD if there is only one task
            switched = False
            waited = 0
            step = 0

            for batch in self.iterate_batch:
                loss = total = correct = 0

                if switch_trainer:
                    self.trainer = dy.MomentumSGDTrainer(self.model)
                    switch_trainer = False
                    switched = True

                # train on a batch of sentences
                t0 = time()
                for sent in tqdm(batch):
                    step += 1
                    self.encode_sent(sent, True)
                    res = self.decoders[task].train_one_step(sent)
                    sent.clear_pred()
                    loss += res['loss']
                    total += res['total']
                    correct += res['correct']
                    if res['loss_expr']:
                        try:
                            res['loss_expr'].backward()
                            self.trainer.update()
                        except:
                            self.log('bad gradient, load previous model')
                            self.load_model()
                train_time = time() - t0

                # evaluate on dev set
                res = self.predict(self.dev_sents[:1000], task)
                score = res['score']

                self.log(
                    f"[step={step}]\ttrain_time={train_time:.1f}s\ttest_time={res[f'time']:.1f}s"
                )
                self.log(f"[step={step}]\t{task}_loss={loss/self.args.eval_every:.2f}, "\
                        f"train_{task}_score={correct}/{total}={100*correct/total:.2f}")
                self.log(f"[step={step}]\tdev_{task}_score={100*score:.2f}")

                if score > best_scores[task]:
                    best_scores[task] = score
                    self.save_model()
                    waited = 0
                else:
                    waited += 1
                    if waited > self.args.patience:
                        if switched:
                            self.log('out of patience')
                            break
                        else:
                            self.log('switch trainer')
                            switch_trainer = True
                            waited = 0

                if step >= self.args.max_step:
                    break

            self.log(f'Finish finetuning {task}')
            self.decoders[task].set_freeze(True)
Пример #28
0
    def __init__(self, params, vocab, label2tag, pretrained_embeddings=None):
        """

        :param params:
        :param vocab:
        :param label2tag:
        :param pretrained_embeddings:
        """
        self.dim_w = params.dim_w
        self.win = params.win
        self.vocab = vocab
        self.n_words = len(self.vocab)
        self.dim_asp = params.dim_asp
        self.dim_opi = params.dim_opi
        self.dim_y_asp = params.n_asp_tags
        self.dim_y_opi = params.n_opi_tags
        self.n_steps = params.n_steps
        self.asp_label2tag = label2tag
        self.opi_label2tag = {0: 'O', 1: 'T'}
        self.dropout_asp = params.dropout_asp
        self.dropout_opi = params.dropout_opi
        self.dropout = params.dropout
        self.rnn_type = params.rnn_type
        self.ds_name = params.ds_name
        self.model_name = params.model_name
        self.attention_type = params.attention_type

        self.pc = dy.ParameterCollection()
        self.Emb = WDEmb(pc=self.pc, n_words=self.n_words, dim_w=self.dim_w,
                         pretrained_embeddings=pretrained_embeddings)
        #self.ASP_RNN = LSTM(pc=self.pc, n_in=self.win*self.dim_w, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        #self.OPI_RNN = LSTM(pc=self.pc, n_in=self.win*self.dim_w, n_out=self.dim_opi, dropout_rate=self.dropout_opi)
        # use dynet RNNBuilder rather than the self-defined RNN classes
        if self.rnn_type == 'LSTM':
            self.ASP_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)
            self.OPI_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_opi, self.pc)
        elif self.rnn_type == 'GRU':
            # NOT TRIED!
            self.ASP_RNN = dy.GRUBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)
            self.OPI_RNN = dy.GRUBuilder(1, self.win * self.dim_w, self.dim_opi, self.pc)
        else:
            raise Exception("Invalid RNN type!!!")
        self.THA = THA(pc=self.pc, n_steps=self.n_steps, n_in=2*self.dim_asp)
        if self.attention_type == 'bilinear':
            self.STN = ST_bilinear(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        # here dot attention is not applicable since the aspect representation and opinion representation
        # have different dimensions
        # elif self.attention_type == 'dot':
        #    self.STN = ST_dot(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        elif self.attention_type == 'concat':
            self.STN = ST_concat(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        else:
            raise Exception("Invalid attention type!!!")

        self.ASP_FC = Linear(pc=self.pc, n_in=2*self.dim_asp+2*self.dim_opi, n_out=self.dim_y_asp)
        self.OPI_FC = Linear(pc=self.pc, n_in=2*self.dim_opi, n_out=self.dim_y_opi)

        self.layers = [self.ASP_FC, self.OPI_FC, self.THA, self.STN]

        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adagrad':
            self.optimizer = dy.AdagradTrainer(self.pc)
        elif params.optimizer == 'adadelta':
             # use default value of adadelta
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        else:
            raise Exception("Invalid optimizer!!")
Пример #29
0
    training_instances, training_vocab, \
    dev_instances, dev_vocab, test_instances, tag_set_sizes = processed_dataset.get_all_params()

    # ===-----------------------------------------------------------------------===
    # Build model and trainer
    # ===-----------------------------------------------------------------------===


    model = LSTMTagger(tagset_sizes=tag_set_sizes,
                       num_lstm_layers=options.lstm_layers,
                       hidden_dim=options.hidden_dim,
                       word_level_dim=options.word_level_dim,
                       charset_size=len(c2i),
                       char_embedding_dim=DEFAULT_CHAR_EMBEDDING_SIZE)

    trainer = dy.MomentumSGDTrainer(model.model, options.learning_rate, 0.9)
    logging.info("Training Algorithm: {}".format(type(trainer)))

    logging.info("Number training instances: {}".format(len(training_instances)))
    logging.info("Number dev instances: {}".format(len(dev_instances)))

    best_dev_pos = 0.0
    old_best_name = None

    for epoch in range(options.num_epochs):
        bar = progressbar.ProgressBar()

        # set up epoch
        random.shuffle(training_instances)
        train_loss = 0.0
Пример #30
0
 def __init__(self, e0=0.01, mom=0.9):
     self.optimizer = dy.MomentumSGDTrainer(
         ParamManager.global_collection(), e0, mom)