Пример #1
0
    def add_parameters(self,
                       dropout,
                       lstm_size,
                       optimizer,
                       model_type,
                       gru=True):

        if model_type == "gru":
            self.encoder_rnn = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                             lstm_size, self.model)
            self.encoder_rnn.set_dropout(dropout)
            self.encoder_rnn2 = dy.GRUBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                              lstm_size, self.model)
            self.encoder_rnn2.set_dropout(dropout)
            self.decoder_rnn = dy.GRUBuilder(NUM_LAYERS,
                                             EMBEDDING_SIZE + lstm_size,
                                             lstm_size, self.model)
            self.decoder_rnn.set_dropout(dropout)
        else:

            self.encoder_rnn = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                              lstm_size, self.model)
            self.encoder_rnn.set_dropout(dropout)
            self.encoder_rnn2 = dy.LSTMBuilder(NUM_LAYERS, EMBEDDING_SIZE,
                                               lstm_size, self.model)
            self.encoder_rnn2.set_dropout(dropout)
            self.decoder_rnn = dy.LSTMBuilder(NUM_LAYERS,
                                              EMBEDDING_SIZE + lstm_size,
                                              lstm_size, self.model)
            self.decoder_rnn.set_dropout(dropout)

        global DROPOUT
        DROPOUT = dropout

        self.W1 = self.model.add_parameters((200, lstm_size))
        self.b1 = self.model.add_parameters((200, 1))
        self.W2 = self.model.add_parameters((100, 200))
        self.b2 = self.model.add_parameters((100, 1))
        self.W3 = self.model.add_parameters((len(self.C2I), 100))
        self.b3 = self.model.add_parameters((len(self.C2I), 1))
        self.W_query = self.model.add_parameters((lstm_size, lstm_size))
        self.W_key = self.model.add_parameters((lstm_size, lstm_size))
        self.W_val = self.model.add_parameters((lstm_size, lstm_size))
        self.W_att = self.model.add_parameters((1, EMBEDDING_SIZE))
        self.W_c_s = self.model.add_parameters((lstm_size, EMBEDDING_SIZE))
        self.W_direct = self.model.add_parameters((len(self.C2I), lstm_size))
        self.b_att = self.model.add_parameters((lstm_size, 1))
        self.b_direct = self.model.add_parameters((len(self.C2I), 1))
        self.E_lang = self.model.add_lookup_parameters((7, EMBEDDING_SIZE))

        if optimizer == "sgd":
            self.trainer = dy.SimpleSGDTrainer(self.model)
        elif optimizer == "rms":
            self.trainer = dy.RMSPropTrainer(self.model)
        if optimizer == "cyclic":
            self.trainer = dy.CyclicalSGDTrainer(self.model)
        elif optimizer == "adam":
            self.trainer = dy.AdamTrainer(self.model)
        else:
            self.trainer = dy.AdagradTrainer(self.model)
Пример #2
0
 def __init__(self,
              e0: numbers.Real = 0.1,
              eps: numbers.Real = 1e-20,
              skip_noisy: bool = False) -> None:
     super().__init__(optimizer=dy.AdagradTrainer(
         ParamManager.global_collection(), e0, eps=eps),
                      skip_noisy=skip_noisy)
Пример #3
0
 def __init__(self, Cemb, character_idx_map, options):
     model = dy.Model()
     self.trainer = dy.AdagradTrainer(model,
                                      options['lr'])  # we use Adagrad
     self.params = self.initParams(model, Cemb, options)
     self.options = options
     self.model = model
     self.character_idx_map = character_idx_map
Пример #4
0
 def __init__(self, model, num_input, num_hidden, num_out=2):
     self.model = model
     HIDDEN_DIM = 100
     MLP_DIM = 100
     self.trainer = dy.AdagradTrainer(model, 0.01)
     self.W1 = model.add_parameter((num_out, HIDDEN_DIM))
     self.W2 = model.add_parameter((MLP_DIM, num_hidden * 2))
     self.pT = model.add_lookup_parameter((num_out, MLP_DIM))
     self.activation_func = dy.tanh
     self.spec = (num_input, num_hidden, num_out, self.activation_func)
Пример #5
0
    def __init__(self, params, vocab, label2tag, pretrained_embeddings=None):
        """

        :param params:
        :param vocab:
        :param label2tag:
        :param pretrained_embeddings:
        """
        self.dim_w = params.dim_w
        self.win = params.win
        self.vocab = vocab
        self.n_words = len(self.vocab)
        self.dim_asp = params.dim_asp
        self.dim_y_asp = params.n_asp_tags
        self.n_steps = params.n_steps
        self.asp_label2tag = label2tag
        self.dropout_asp = params.dropout_asp
        self.dropout = params.dropout
        self.ds_name = params.ds_name
        self.model_name = params.model_name
        self.attention_type = params.attention_type

        self.pc = dy.ParameterCollection()
        self.Emb = WDEmb(pc=self.pc, n_words=self.n_words, dim_w=self.dim_w,
                         pretrained_embeddings=pretrained_embeddings)
        
        self.DEP_RecNN = DTreeBuilder(pc=self.pc, n_in=self.win * self.dim_w, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        
        self.ASP_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)

        self.BiAttention_F=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        self.BiAttention_B=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        self.BiAttention_T=BiAttention(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)

        self.MultiWeightLayer=MultiWeightLayer(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_asp, dropout_rate=self.dropout_asp)

        self.ASP_FC = Linear(pc=self.pc, n_in=self.dim_asp, n_out=self.dim_y_asp)
        
        self.layers = [self.ASP_FC,self.DEP_RecNN,self.BiAttention_F,self.BiAttention_B,self.BiAttention_T,self.MultiWeightLayer]

        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adagrad':
            self.optimizer = dy.AdagradTrainer(self.pc)
        elif params.optimizer == 'adadelta':
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        else:
            raise Exception("Invalid optimizer!!")
Пример #6
0
 def __init__(self, model, num_layers, num_input, num_hidden, num_out):
     self.model = model
     HIDDEN_DIM = 100
     MLP_DIM = 100
     self.trainer = dy.AdagradTrainer(model, 0.01)
     self.pH = model.add_parameter((num_out, HIDDEN_DIM))
     self.pO = model.add_parameter((MLP_DIM, num_hidden * 2))
     self.pT = model.add_lookup_parameter((num_out, MLP_DIM))
     self.builders = [
         dy.LSTMBuilder(num_layers, num_input, num_hidden, model),
         dy.LSTMBuilder(num_layers, num_input, num_hidden, model)
     ]
     self.activation_func = dy.tanh
     self.spec = (num_input, num_hidden, num_out, self.activation_func)
Пример #7
0
    def __init__(self, embedding_size, hidden_size, labels_size, embedding):
        self.embedding = embedding
        self.model = dy.Model()
        self.trainer = dy.AdagradTrainer(self.model, 0.05)

        self.linear = self.model.add_parameters((embedding_size, hidden_size))
        self.feed_F = FeedForward(self.model, (hidden_size, hidden_size),
                                  (hidden_size, hidden_size), 0.2)
        self.feed_G = FeedForward(self.model, (hidden_size, 2 * hidden_size),
                                  (hidden_size, hidden_size), 0.2)

        self.h_step_1 = self.model.add_parameters(
            (2 * hidden_size, hidden_size))
        self.h_step_2 = self.model.add_parameters((hidden_size, hidden_size))

        self.linear2 = self.model.add_parameters((hidden_size, labels_size))
Пример #8
0
 def __init__(self, exp_global=Ref(Path("exp_global")), e0=0.1, eps=1e-20):
     self.optimizer = dy.AdagradTrainer(
         exp_global.dynet_param_collection.param_col, e0, eps=eps)
Пример #9
0
                                ds.dev.matrices])
    te_graphs = ds.dev.matrices if opts.eval_dev \
                                else ds.test.matrices

    if opts.model is not None:
        # load and skip training (eval mode)
        timeprint('loading association model from file: {}'.format(opts.model))
        assoc_model = AssociationModel(tr_graphs,
                                       embs,
                                       opts.assoc_mode,
                                       model_path=opts.model)
    else:
        # training phase
        assoc_model = AssociationModel(tr_graphs, embs, opts.assoc_mode,
                                       opts.dropout)
        trainer = dy.AdagradTrainer(assoc_model.model, opts.learning_rate)
        with open(
                'assoc-pred-train-log-{}_{}.txt'.format(
                    start_time.date(), start_time.time()), 'a') as log_file:
            if opts.no_log:
                log_file = None
            else:
                log_file.write('====\n')
            iteration_losses = []  # will hold loss averages
            dev_mrrs = []
            saved_name = None

            N = assoc_model.vocab_size
            for ep in range(opts.epochs):
                # report
                if opts.v > 0:
Пример #10
0
def train_model(model, encoder, decoder, params, train_inputs, train_outputs,
                dev_inputs, dev_outputs, y2int, int2y, epochs, optimization,
                results_file_path, plot, batch_size, eval_after):
    print 'training...'

    np.random.seed(17)
    random.seed(17)

    # sort training sentences by length in descending order
    train_data = zip(train_inputs, train_outputs)
    train_data.sort(key=lambda t: -len(t[0]))
    train_order = [
        x * batch_size for x in range(len(train_data) / batch_size + 1)
    ]

    # sort dev sentences by length in descending order
    dev_batch_size = 1
    dev_data = zip(dev_inputs, dev_outputs)
    dev_data.sort(key=lambda t: -len(t[0]))
    dev_order = [
        x * dev_batch_size for x in range(len(dev_data) / dev_batch_size + 1)
    ]

    if optimization == 'ADAM':
        trainer = dn.AdamTrainer(
            model
        )  # lam=REGULARIZATION, alpha=LEARNING_RATE, beta_1=0.9, beta_2=0.999, eps=1e-8)
    elif optimization == 'MOMENTUM':
        trainer = dn.MomentumSGDTrainer(model)
    elif optimization == 'SGD':
        trainer = dn.SimpleSGDTrainer(model)
    elif optimization == 'ADAGRAD':
        trainer = dn.AdagradTrainer(model)
    elif optimization == 'ADADELTA':
        trainer = dn.AdadeltaTrainer(model)
    else:
        trainer = dn.SimpleSGDTrainer(model)

    trainer.set_clip_threshold(float(arguments['--grad-clip']))
    seen_examples_count = 0
    total_loss = 0
    best_dev_epoch = 0
    best_train_epoch = 0
    patience = 0
    train_len = len(train_outputs)
    dev_len = len(dev_inputs)
    avg_train_loss = -1
    train_loss_patience = 0
    train_loss_patience_threshold = 99999999
    max_patience = int(arguments['--max-patience'])
    log_path = results_file_path + '_log.txt'
    start_epoch, checkpoints_x, train_loss_y, dev_loss_y, dev_accuracy_y = read_from_log(
        log_path)

    if len(train_loss_y) > 0:
        total_batches = checkpoints_x[-1]
        best_avg_train_loss = max(train_loss_y)
        best_dev_accuracy = max(dev_accuracy_y)
        best_dev_loss = max(dev_loss_y)
    else:
        total_batches = 0
        best_avg_train_loss = 999999
        best_dev_loss = 999999
        best_dev_accuracy = 0

    # progress bar init
    # noinspection PyArgumentList
    widgets = [progressbar.Bar('>'), ' ', progressbar.ETA()]
    train_progress_bar = progressbar.ProgressBar(widgets=widgets,
                                                 maxval=epochs).start()

    for e in xrange(start_epoch, epochs):

        # shuffle the batch start indices in each epoch
        random.shuffle(train_order)
        batches_per_epoch = len(train_order)
        start = time.time()

        # go through batches
        for i, batch_start_index in enumerate(train_order, start=1):
            total_batches += 1

            # get batch examples
            batch_inputs = [
                x[0] for x in train_data[batch_start_index:batch_start_index +
                                         batch_size]
            ]
            batch_outputs = [
                x[1] for x in train_data[batch_start_index:batch_start_index +
                                         batch_size]
            ]
            actual_batch_size = len(batch_inputs)

            # skip empty batches
            if actual_batch_size == 0 or len(batch_inputs[0]) == 0:
                continue

            # compute batch loss
            loss = compute_batch_loss(encoder, decoder, batch_inputs,
                                      batch_outputs, y2int)

            # forward pass
            total_loss += loss.scalar_value()
            loss.backward()

            # update parameters
            trainer.update()

            seen_examples_count += actual_batch_size

            # avg loss per sample
            avg_train_loss = total_loss / float(i * batch_size + e * train_len)

            # start patience counts only after 20 batches
            if avg_train_loss < best_avg_train_loss and total_batches > 20:
                best_avg_train_loss = avg_train_loss
                train_loss_patience = 0
            else:
                train_loss_patience += 1
                if train_loss_patience > train_loss_patience_threshold:
                    print 'train loss patience exceeded: {}'.format(
                        train_loss_patience)
                    return model, params, e, best_train_epoch

            if total_batches % 100 == 0 and total_batches > 0:
                print 'epoch {}: {} batches out of {} ({} examples out of {}) total: {} batches, {} examples. avg \
loss per example: {}'.format(e, i, batches_per_epoch, i * batch_size,
                             train_len, total_batches,
                             total_batches * batch_size, avg_train_loss)

                # print sentences per second
                end = time.time()
                elapsed_seconds = end - start
                print '{} sentences per second'.format(seen_examples_count /
                                                       elapsed_seconds)
                seen_examples_count = 0
                start = time.time()

            # checkpoint
            if total_batches % eval_after == 0:

                print 'starting checkpoint evaluation'
                dev_bleu, dev_loss = checkpoint_eval(
                    encoder,
                    decoder,
                    params,
                    dev_batch_size,
                    dev_data,
                    dev_inputs,
                    dev_len,
                    dev_order,
                    dev_outputs,
                    int2y,
                    y2int,
                    results_file_path=results_file_path)

                log_to_file(log_path, e, total_batches, avg_train_loss,
                            dev_loss, dev_bleu)
                save_model(model,
                           results_file_path,
                           total_batches,
                           models_to_save=int(arguments['--models-to-save']))
                if dev_bleu >= best_dev_accuracy:
                    best_dev_accuracy = dev_bleu
                    best_dev_epoch = e

                    # save best model to disk
                    save_best_model(model, results_file_path)
                    print 'saved new best model'
                    patience = 0
                else:
                    patience += 1

                if dev_loss < best_dev_loss:
                    best_dev_loss = dev_loss

                print 'epoch: {0} train loss: {1:.4f} dev loss: {2:.4f} dev bleu: {3:.4f} \
best dev bleu {4:.4f} (epoch {5}) patience = {6}'.format(
                    e, avg_train_loss, dev_loss, dev_bleu, best_dev_accuracy,
                    best_dev_epoch, patience)

                if patience == max_patience:
                    print 'out of patience after {0} checkpoints'.format(
                        str(e))
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    print 'checkpoint patience exceeded'
                    return model, params, e, best_train_epoch

                # plotting results from checkpoint evaluation
                if plot:
                    train_loss_y.append(avg_train_loss)
                    checkpoints_x.append(total_batches)
                    dev_accuracy_y.append(dev_bleu)
                    dev_loss_y.append(dev_loss)

                    y_vals = [('train_loss', train_loss_y),
                              ('dev loss', dev_loss_y),
                              ('dev_bleu', dev_accuracy_y)]
                    common.plot_to_file(y_vals,
                                        x_name='total batches',
                                        x_vals=checkpoints_x,
                                        file_path=results_file_path +
                                        '_learning_curve.png')

        # update progress bar after completing epoch
        train_progress_bar.update(e)

    # update progress bar after completing training
    train_progress_bar.finish()

    if plot:
        # clear plot when done
        plt.cla()

    print 'finished training. average loss: {} best epoch on dev: {} best epoch on train: {}'.format(
        str(avg_train_loss), best_dev_epoch, best_train_epoch)

    return model, params, e, best_train_epoch
Пример #11
0
BEGIN_TOKEN = '<s>'
END_TOKEN = '<e>'

# define model and obtain vocabulary
# (reload vocab files is saved model or create new vocab files if new model)

model = dynet.Model()

if not args.trainer or args.trainer=="simple_sgd":
    trainer = dynet.SimpleSGDTrainer(model)
elif args.trainer == "momentum_sgd":
    trainer = dynet.MomentumSGDTrainer(model)
elif args.trainer == "adadelta":
    trainer = dynet.AdadeltaTrainer(model)
elif args.trainer == "adagrad":
    trainer = dynet.AdagradTrainer(model)
elif args.trainer == "adam":
    trainer = dynet.AdamTrainer(model)
else:
    raise Exception("Trainer not recognized! Please use one of {simple_sgd, momentum_sgd, adadelta, adagrad, adam}")

trainer.set_clip_threshold(-1.0)
trainer.set_sparse_updates(True)

# load corpus

print "Loading corpus..."
train_data = list(util.get_reader(args.reader_mode)(args.train, mode=args.reader_mode, begin=BEGIN_TOKEN, end=END_TOKEN))
if args.valid:
    valid_data = list(util.get_reader(args.reader_mode)(args.valid, mode=args.reader_mode, begin=BEGIN_TOKEN, end=END_TOKEN))
else:
Пример #12
0
def main():
    parser = argparse.ArgumentParser(description='Train attention model')
    parser.add_argument('--model_path', default=None, type=str)
    parser.add_argument('--checkpoint_dir', default='./checkpoints', type=str)
    parser.add_argument('--train_set', default='./train_set', type=str)
    parser.add_argument('--train_set_dmp', default='./train_set.dmp', type=str)
    parser.add_argument('--valid_set', default='./valid_set', type=str)
    parser.add_argument('--valid_set_dmp', default='./valid_set_dmp', type=str)
    parser.add_argument('--vocab_path', default='./vocab.dmp', type=str)
    parser.add_argument('--unk_threshold', default=20, type=int)
    parser.add_argument('--batch_size', default=8, type=int)
    parser.add_argument('--trainer',
                        default='adam',
                        choices={'sgd', 'adam', 'adagrad'},
                        type=str)
    parser.add_argument('--type_embed_dim', default=128, type=int)
    parser.add_argument('--literal_embed_dim', default=128, type=int)
    parser.add_argument('--byte_embed_dim', default=64, type=int)
    parser.add_argument('--hash_dim', default=64, type=int)
    parser.add_argument('--att_dim', default=64, type=int)
    parser.add_argument('--num_layers', default=2, type=int)
    parser.add_argument('--hidden_dim', default=256, type=int)
    parser.add_argument('--dropout', default=None, type=float)
    parser.add_argument('--seed', default=11927, type=int)

    args, _ = parser.parse_known_args()

    if not os.path.exists(args.train_set_dmp):
        train_set = []
        for path in glob.glob('%s/*.py' % args.train_set):
            with codecs.open(path, 'r', 'utf-8') as f:
                train_set.append(tokenize_without_empty_tail(f.read()))
        with open(args.train_set_dmp, 'wb') as f:
            pickle.dump(train_set, f)
    else:
        with open(args.train_set_dmp, 'rb') as f:
            train_set = pickle.load(f)
    train_set = [tokens for tokens in train_set if len(tokens) < 4000]

    print('size of train_set:', len(train_set))

    token_literal_counters = defaultdict(lambda: defaultdict(int))
    for token_type, token_literal in chain(*map(set, train_set)):
        token_literal_counters[token_type][token_literal] += 1

    if not os.path.exists(args.vocab_path):
        type_vocabs = {
            token_type: {
                literal
                for literal, count in literal_counters.items()
                if count > args.unk_threshold
            }
            for token_type, literal_counters in token_literal_counters.items()
        }
        for token_type in tok_name:
            if token_type not in type_vocabs:
                type_vocabs[token_type] = set()
        with open(args.vocab_path, 'wb') as f:
            pickle.dump(type_vocabs, f)
    else:
        with open(args.vocab_path, 'rb') as f:
            type_vocabs = pickle.load(f)
    print(
        'vocab_types:', {
            tok_name[token_type]: len(type_vocab)
            for token_type, type_vocab in type_vocabs.items()
            if len(type_vocab) > 2
        })

    copyable_types = {STRING, NAME, NUMBER}
    print('copyable_types:',
          {tok_name[token_type]
           for token_type in copyable_types})

    if not os.path.exists(args.valid_set_dmp):
        valid_set = []
        for path in glob.glob('%s/*.py' % args.valid_set):
            with codecs.open(path, 'r', 'utf-8') as f:
                valid_set.append(tokenize_without_empty_tail(f.read()))
        with open(args.valid_set_dmp, 'wb') as f:
            pickle.dump(valid_set, f)
    else:
        with open(args.valid_set_dmp, 'rb') as f:
            valid_set = pickle.load(f)

    print('size of valid_set:', len(valid_set))

    random.seed(args.seed)

    model = dy.ParameterCollection()

    if args.trainer == 'sgd':
        trainer = dy.SimpleSGDTrainer(model)
    elif args.trainer == 'adam':
        trainer = dy.AdamTrainer(model)
    elif args.trainer == 'adagrad':
        trainer = dy.AdagradTrainer(model)

    decoder = Decoder(model, type_vocabs, copyable_types, args.type_embed_dim,
                      args.literal_embed_dim, args.byte_embed_dim,
                      args.hash_dim, args.att_dim, args.num_layers,
                      args.hidden_dim)

    if not os.path.exists(args.checkpoint_dir):
        os.makedirs(args.checkpoint_dir)

    if args.model_path is None:
        model.save('%s/init.dmp' % args.checkpoint_dir)
    else:
        model.populate(args.model_path)

    if args.dropout is not None:
        decoder.set_dropout(args.dropout)

    num_samples = len(train_set)
    for num_epoch in itertools.count(1):
        random.shuffle(train_set)
        epoch_loss = 0.0
        epoch_seq_length = 0
        batch_losses = []
        hash_cache = {}
        batch_seq_length = 0
        num_batch = 0
        dy.renew_cg()
        for i, (tokens) in enumerate(train_set, 1):
            print('batch', i, len(tokens))
            loss = cal_loss(decoder, hash_cache, tokens)
            batch_losses.append(loss)
            batch_seq_length += len(tokens)
            epoch_seq_length += len(tokens)
            if i % args.batch_size == 0 or i == num_samples:
                batch_loss = dy.esum(batch_losses) / len(batch_losses)
                batch_loss.backward()
                trainer.update()
                batch_loss_value = batch_loss.value()
                epoch_loss += batch_loss_value
                dy.renew_cg()
                num_batch += 1
                batch_losses = []
                hash_cache = {}
                if num_batch % 20 == 0:
                    batch_per_item_loss = batch_loss_value / batch_seq_length
                    epoch_perplexity = math.exp(epoch_loss / epoch_seq_length)
                    print('epoch %d, batch %d, batch_per_item_loss %f, epoch_perplexity %f' % \
                          (num_epoch, num_batch, batch_per_item_loss, epoch_perplexity))
                batch_seq_length = 0
        model.save('%s/epoch_%d.dmp' % (args.checkpoint_dir, num_epoch))
Пример #13
0
def main():
    parser = argparse.ArgumentParser(description='Train attention model')
    parser.add_argument('--model_path', default=None, type=str)
    parser.add_argument('--checkpoint_dir', default='./checkpoints', type=str)
    parser.add_argument('--vocab_file', default='./vocab.dmp', type=str)
    parser.add_argument('--train_set', default='./train_set.dmp', type=str)
    parser.add_argument('--valid_set', default='./valid_set.dmp', type=str)
    parser.add_argument('--batch_size', default=64, type=int)
    parser.add_argument('--trainer',
                        default='adam',
                        choices={'sgd', 'adam', 'adagrad'},
                        type=str)
    parser.add_argument('--word_embed_dim', default=256, type=int)
    parser.add_argument('--encoder_num_layers', default=2, type=int)
    parser.add_argument('--encoder_state_dim', default=256, type=int)
    parser.add_argument('--op_embed_dim', default=32, type=int)
    parser.add_argument('--num_embed_dim', default=256, type=int)
    parser.add_argument('--sign_embed_dim', default=64, type=int)
    parser.add_argument('--att_dim', default=128, type=int)
    parser.add_argument('--decoder_num_layers', default=2, type=int)
    parser.add_argument('--decoder_state_dim', default=256, type=int)
    parser.add_argument('--dropout', default=None, type=float)
    parser.add_argument('--seed', default=11747, type=int)
    parser.add_argument('--max_op_count', default=50, type=int)

    args, _ = parser.parse_known_args()

    with open(args.vocab_file, 'rb') as f:
        op_names, word2wid, wid2word, num2nid, nid2num = pickle.load(f)
        op_names = sorted(op_names)

    with open(args.train_set, 'rb') as f:
        train_set = pickle.load(f)

    if len(train_set) > 0 and len(train_set[0][2][0]) == 8:
        print('add expr values...')
        train_set = add_expr_val(train_set)
        with open(args.train_set, 'wb') as f:
            pickle.dump(train_set, f)

    if len(train_set) > 0 and type(train_set[0][0][0]) == str:
        print('add num values...')
        train_set = add_num_val(train_set)
        with open(args.train_set, 'wb') as f:
            pickle.dump(train_set, f)

    print('size of train_set:', len(train_set))

    random.seed(args.seed)

    model = dy.ParameterCollection()

    if args.trainer == 'sgd':
        trainer = dy.SimpleSGDTrainer(model)
    elif args.trainer == 'adam':
        trainer = dy.AdamTrainer(model)
    elif args.trainer == 'adagrad':
        trainer = dy.AdagradTrainer(model)

    encoder = Encoder(model, word2wid, args.word_embed_dim,
                      args.encoder_num_layers, args.encoder_state_dim)
    decoder = Decoder(model, op_names, args.op_embed_dim, num2nid,
                      args.num_embed_dim, args.sign_embed_dim,
                      args.encoder_state_dim, args.att_dim,
                      args.decoder_num_layers, args.decoder_state_dim)

    if not os.path.exists(args.checkpoint_dir):
        os.makedirs(args.checkpoint_dir)

    if args.model_path is None:
        model.save('%s/init.dmp' % args.checkpoint_dir)
    else:
        model.populate(args.model_path)

    if args.dropout is not None:
        encoder.set_dropout(args.dropout)
        decoder.set_dropout(args.dropout)

    num_problems = len(train_set)
    for num_epoch in itertools.count(1):
        random.shuffle(train_set)
        epoch_loss = 0.0
        epoch_seq_length = 0
        batch_losses = []
        batch_seq_length = 0
        num_batch = 0
        dy.renew_cg()
        for i, (question, options, trace, input_num_indexes,
                answer) in enumerate(train_set, 1):
            problem_loss = cal_loss(encoder, decoder, question, options,
                                    input_num_indexes, trace, answer)
            batch_losses.append(problem_loss)
            batch_seq_length += len(trace)
            epoch_seq_length += len(trace)
            if i % args.batch_size == 0 or i == num_problems:
                batch_loss = dy.esum(batch_losses) / len(batch_losses)
                batch_loss.backward()
                trainer.update()
                batch_loss_value = batch_loss.value()
                batch_per_item_loss = batch_loss_value / batch_seq_length
                epoch_loss += batch_loss_value
                epoch_perplexity = math.exp(epoch_loss / epoch_seq_length)
                dy.renew_cg()
                num_batch += 1
                batch_losses = []
                batch_seq_length = 0
                if num_batch % 20 == 0:
                    print('epoch %d, batch %d, batch_per_item_loss %f, epoch_perplexity %f' % \
                          (num_epoch, num_batch, batch_per_item_loss, epoch_perplexity))
        model.save('%s/epoch_%d.dmp' % (args.checkpoint_dir, num_epoch))
Пример #14
0
    def __init__(self, params, vocab, label2tag, pretrained_embeddings=None):
        """

        :param params:
        :param vocab:
        :param label2tag:
        :param pretrained_embeddings:
        """
        self.dim_w = params.dim_w
        self.win = params.win
        self.vocab = vocab
        self.n_words = len(self.vocab)
        self.dim_asp = params.dim_asp
        self.dim_opi = params.dim_opi
        self.dim_y_asp = params.n_asp_tags
        self.dim_y_opi = params.n_opi_tags
        self.n_steps = params.n_steps
        self.asp_label2tag = label2tag
        self.opi_label2tag = {0: 'O', 1: 'T'}
        self.dropout_asp = params.dropout_asp
        self.dropout_opi = params.dropout_opi
        self.dropout = params.dropout
        self.rnn_type = params.rnn_type
        self.ds_name = params.ds_name
        self.model_name = params.model_name
        self.attention_type = params.attention_type

        self.pc = dy.ParameterCollection()
        self.Emb = WDEmb(pc=self.pc, n_words=self.n_words, dim_w=self.dim_w,
                         pretrained_embeddings=pretrained_embeddings)
        #self.ASP_RNN = LSTM(pc=self.pc, n_in=self.win*self.dim_w, n_out=self.dim_asp, dropout_rate=self.dropout_asp)
        #self.OPI_RNN = LSTM(pc=self.pc, n_in=self.win*self.dim_w, n_out=self.dim_opi, dropout_rate=self.dropout_opi)
        # use dynet RNNBuilder rather than the self-defined RNN classes
        if self.rnn_type == 'LSTM':
            self.ASP_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)
            self.OPI_RNN = dy.LSTMBuilder(1, self.win * self.dim_w, self.dim_opi, self.pc)
        elif self.rnn_type == 'GRU':
            # NOT TRIED!
            self.ASP_RNN = dy.GRUBuilder(1, self.win * self.dim_w, self.dim_asp, self.pc)
            self.OPI_RNN = dy.GRUBuilder(1, self.win * self.dim_w, self.dim_opi, self.pc)
        else:
            raise Exception("Invalid RNN type!!!")
        self.THA = THA(pc=self.pc, n_steps=self.n_steps, n_in=2*self.dim_asp)
        if self.attention_type == 'bilinear':
            self.STN = ST_bilinear(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        # here dot attention is not applicable since the aspect representation and opinion representation
        # have different dimensions
        # elif self.attention_type == 'dot':
        #    self.STN = ST_dot(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        elif self.attention_type == 'concat':
            self.STN = ST_concat(pc=self.pc, dim_asp=self.dim_asp, dim_opi=self.dim_opi)
        else:
            raise Exception("Invalid attention type!!!")

        self.ASP_FC = Linear(pc=self.pc, n_in=2*self.dim_asp+2*self.dim_opi, n_out=self.dim_y_asp)
        self.OPI_FC = Linear(pc=self.pc, n_in=2*self.dim_opi, n_out=self.dim_y_opi)

        self.layers = [self.ASP_FC, self.OPI_FC, self.THA, self.STN]

        if params.optimizer == 'sgd':
            self.optimizer = dy.SimpleSGDTrainer(self.pc, params.sgd_lr)
        elif params.optimizer == 'momentum':
            self.optimizer = dy.MomentumSGDTrainer(self.pc, 0.01, 0.9)
        elif params.optimizer == 'adam':
            self.optimizer = dy.AdamTrainer(self.pc, 0.001, 0.9, 0.9)
        elif params.optimizer == 'adagrad':
            self.optimizer = dy.AdagradTrainer(self.pc)
        elif params.optimizer == 'adadelta':
             # use default value of adadelta
            self.optimizer = dy.AdadeltaTrainer(self.pc)
        else:
            raise Exception("Invalid optimizer!!")
Пример #15
0
def main():
    parser = argparse.ArgumentParser(description='Train attention model')
    parser.add_argument('--nl_embed_dim', default=256, type=int)
    parser.add_argument('--nl_rnn_layers', default=1, type=int)
    parser.add_argument('--nl_rnn_state_dim', default=256, type=int)
    parser.add_argument('--code_embed_dim', default=256, type=int)
    parser.add_argument('--code_rnn_layers', default=1, type=int)
    parser.add_argument('--code_rnn_state_dim', default=256, type=int)
    # parser.add_argument('--rnn_token_mlp_dim', default=128, type=int)
    # parser.add_argument('--rnn_type_mlp_dim', default=32, type=int)
    # parser.add_argument('--rnn_word_mlp_dim', default=128, type=int)
    parser.add_argument('--attention_dim', default=256, type=int)
    parser.add_argument('--dropout', default=0.5, type=float)
    parser.add_argument('--rnn_dropout', default=0.2, type=float)
    parser.add_argument('--nl_to_code', default=True, action='store_true')
    parser.add_argument('--code_to_nl', dest='nl_to_code', action='store_false')
    parser.add_argument('--vocab_file', default='./vocab.dmp', type=str)
    parser.add_argument('--batch_size', default=32, type=int)
    parser.add_argument('--train_set', default='./train.txt', type=str)
    parser.add_argument('--valid_set', default='./valid.txt', type=str)
    parser.add_argument('--trainer', default='adam', choices={'sgd', 'adam', 'adagrad'}, type=str)
    # TODO: Commented out for now, could implement the learning rate if necessary
    # parser.add_argument('--learning_rate', type=float)

    args, unknown = parser.parse_known_args()
    
    is_nl2code = args.nl_to_code
    nl_voc2wid, nl_wid2voc, code_voc2wid, code_wid2voc = load_vocabs(args.vocab_file)
    args.nl_vocab_size = len(nl_wid2voc)
    args.code_vocab_size = len(code_wid2voc)
    args.num_token_type = len(tok_type2id) + 1 # count the undifined_token for <S> and </S>
    
    if is_nl2code:
        model, translator = new_nl2code_model(args)
        config_name = 'nl2code'
    else:
        model, translator = new_code2nl_model(args)
        config_name = 'code2nl'
    
    config_name = datetime.now().strftime(config_name + '_%m%d%H%M%S')

    logging.basicConfig(filename=config_name + '.log', level=logging.DEBUG, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
    
    if args.trainer == 'sgd':
        trainer = dy.SimpleSGDTrainer(model)
    elif args.trainer == 'adam':
        trainer = dy.AdamTrainer(model)
    elif args.trainer == 'adagrad':
        trainer = dy.AdagradTrainer(model)
    
    # if args.learning_rate is None:
    #     args.learning_rate = learning_rate
    
    def lookup_nl(seqs):
        return [[START] + map(lambda w:nl_voc2wid[w], seq) + [END] for seq in seqs]
    
    def lookup_code(seqs):
        return [[START] + map(lambda w:code_voc2wid[w[1]], seq) + [END] for seq in seqs]
    
    tokenized_nl_train, tokenized_code_train = read_data(args.train_set)
    tokenized_nl_valid, tokenized_code_valid = read_data(args.valid_set)
    
    nl_train = lookup_nl(tokenized_nl_train)
    nl_valid = lookup_nl(tokenized_nl_valid)
    code_train = lookup_code(tokenized_code_train)
    code_valid = lookup_code(tokenized_code_valid)
    
    if is_nl2code:
        train_pairs = partition(zip(nl_train, code_train))
        valid_pairs = partition(zip(nl_valid, code_valid))
    else:
        train_pairs = partition(zip(code_train, nl_train))
        valid_pairs = partition(zip(code_valid, nl_valid))
    
    def validate_loss():
        cum_loss = 0.0
        cum_trg_item_count = 0
        for batch_pairs in batch_iter(valid_pairs, args.batch_size):
            src_seqs, trg_seqs = map(list, zip(*batch_pairs))
            dy.renew_cg()
            batch_loss = translator.calc_loss(src_seqs, trg_seqs, training=False)
            cum_loss += batch_loss.value()
            cum_trg_item_count += sum(map(len, trg_seqs))
        return cum_loss, cum_trg_item_count

    logging.info('config: %s', args)
    logging.info('nl vocab size: %d, code vocab size: %d' % (len(nl_voc2wid), len(code_voc2wid)))

    min_v_cum_loss = 1e20
    
    for epoch in count(1):
        epoch_cum_loss = 0.0
        epoch_cum_trg_item_count = 0
        for batch_id, batch_pairs in enumerate(batch_iter(train_pairs, args.batch_size), 1):
            src_seqs, trg_seqs = map(list, zip(*batch_pairs))
            dy.renew_cg()
            batch_loss = translator.calc_loss(src_seqs, trg_seqs, training=True)
            batch_loss.backward()
            trainer.update()
            batch_loss_value = batch_loss.value()
            batch_trg_item_count = sum(map(len, trg_seqs))
            batch_per_item_loss = batch_loss_value / batch_trg_item_count
            epoch_cum_loss += batch_loss_value
            epoch_cum_trg_item_count += batch_trg_item_count
            epoch_cum_perplexity = math.exp(epoch_cum_loss / epoch_cum_trg_item_count)
            if batch_id % 100 == 0:
                logging.info('epoch %d, batch %d, batch_per_item_loss %f, epoch_cum_perplexity %f' %                              (epoch, batch_id, batch_per_item_loss, epoch_cum_perplexity))
    
        epoch_cum_perplexity = math.exp(epoch_cum_loss / epoch_cum_trg_item_count)
        logging.info('epoch %d, #training item count#\t%d' % (epoch, epoch_cum_trg_item_count))
        logging.info('epoch %d, #training total loss#\t%f' % (epoch, epoch_cum_loss))
        logging.info('epoch %d, #training per item loss#\t%f' % (epoch, epoch_cum_loss / epoch_cum_trg_item_count))
        logging.info('epoch %d, #training perplexity#\t%f' % (epoch, epoch_cum_perplexity))
        
        v_cum_loss, v_cum_trg_item_count = validate_loss()
        v_cum_perplexity = math.exp(v_cum_loss / v_cum_trg_item_count)
        logging.info('epoch %d, #validation item count#\t%d' % (epoch, v_cum_trg_item_count))
        logging.info('epoch %d, #validation total loss#\t%f' % (epoch, v_cum_loss))
        logging.info('epoch %d, #validation per item loss#\t%f' % (epoch, v_cum_loss / v_cum_trg_item_count))
        logging.info('epoch %d, #validation perplexity#\t%f' % (epoch, v_cum_perplexity))
        
        if v_cum_loss < min_v_cum_loss:
            min_v_cum_loss = v_cum_loss
            min_v_cum_perplexity = v_cum_perplexity
            dmp_name = config_name + '_model_dmp'
            model.save(dmp_name+'.data')
            with open(dmp_name+'.meta', 'wb') as f:
              for k, v in vars(args).items():
                f.write('--{}\t{}\n'.format(k, v))
            logging.info('epoch %d, model saved to %s' % (epoch, dmp_name))
Пример #16
0
 def __init__(self, e0=0.1, eps=1e-20):
     self.optimizer = dy.AdagradTrainer(ParamManager.global_collection(),
                                        e0,
                                        eps=eps)
Пример #17
0
    def __init__(self,
                 word_count,
                 tag_count,
                 word_dims,
                 tag_dims,
                 lstm_units,
                 hidden_units,
                 struct_out,
                 label_out,
                 droprate=0,
                 struct_spans=4,
                 label_spans=3,
                 optimizer=1):

        self.word_count = word_count
        self.tag_count = tag_count
        self.word_dims = word_dims
        self.tag_dims = tag_dims
        self.lstm_units = lstm_units
        self.hidden_units = hidden_units
        self.struct_out = struct_out
        self.label_out = label_out

        self.droprate = droprate

        self.model = dynet.Model()

        if optimizer == 1:
            self.trainer = dynet.SimpleSGDTrainer(self.model)
        elif optimizer == 2:
            self.trainer = dynet.MomentumSGDTrainer(self.model)
        elif optimizer == 3:
            self.trainer = dynet.AdagradTrainer(self.model,
                                                learning_rate=0.01,
                                                eps=0.001)
        elif optimizer == 4:
            self.trainer = dynet.RMSPropTrainer(self.model)
        elif optimizer == 5:
            self.trainer = dynet.AdamTrainer(self.model)
        random.seed(1)

        self.activation = dynet.rectify

        self.word_embed = self.model.add_lookup_parameters(
            (word_count, word_dims), )
        self.tag_embed = self.model.add_lookup_parameters(
            (tag_count, tag_dims), )

        self.fwd_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model)
        self.back_lstm1 = LSTM(word_dims + tag_dims, lstm_units, self.model)

        self.fwd_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model)
        self.back_lstm2 = LSTM(2 * lstm_units, lstm_units, self.model)

        self.struct_hidden_W = self.model.add_parameters(
            (hidden_units, 4 * struct_spans * lstm_units),
            dynet.UniformInitializer(0.01),
        )
        self.struct_hidden_b = self.model.add_parameters(
            (hidden_units, ),
            dynet.ConstInitializer(0),
        )
        self.struct_output_W = self.model.add_parameters(
            (struct_out, hidden_units),
            dynet.ConstInitializer(0),
        )
        self.struct_output_b = self.model.add_parameters(
            (struct_out, ),
            dynet.ConstInitializer(0),
        )

        self.label_hidden_W = self.model.add_parameters(
            (hidden_units, 4 * label_spans * lstm_units),
            dynet.UniformInitializer(0.01),
        )
        self.label_hidden_b = self.model.add_parameters(
            (hidden_units, ),
            dynet.ConstInitializer(0),
        )
        self.label_output_W = self.model.add_parameters(
            (label_out, hidden_units),
            dynet.ConstInitializer(0),
        )
        self.label_output_b = self.model.add_parameters(
            (label_out, ),
            dynet.ConstInitializer(0),
        )
Пример #18
0
words.append("_UNK_")
chars.add("<*>")

vw = Vocab.from_corpus([words]) 
vt = Vocab.from_corpus([tags])
vc = Vocab.from_corpus([chars])
UNK = vw.w2i["_UNK_"]

nwords = vw.size()
ntags  = vt.size()
nchars  = vc.size()

# DyNet Starts

model = dy.Model()
trainer = dy.AdagradTrainer(model)

NUM_LAYERS = 1

embeddings, emb_dim = load_embeddings_file(embedding)
# init model parameters and initialize them
WORDS_LOOKUP = model.add_lookup_parameters((nwords, emb_dim))
CHARS_LOOKUP = model.add_lookup_parameters((nchars, 20))
init = 0
UNK_vec = np.random.rand(emb_dim)

notfound = found= 0.0
for word in vw.w2i.keys():
    # for those words we have already in w2i, update vector, otherwise add to w2i (since we keep data as integers)
    if word in embeddings.keys():
        found+=1
Пример #19
0
def run(filename):

    startTime = time.time()

    # read word embedding
    word_embedding_size = 300
    word_embedding_file = "small_glove.txt"
    word_embedding = []
    with open(word_embedding_file, 'r') as f:
        for (counter, line) in enumerate(f):
            if counter == 0:
                word_embedding_length = int(line)
            else:
                word_embedding.append(
                    np.asarray([float(i)
                                for i in line.split()]).reshape(1, -1))

    word_embedding = np.concatenate(word_embedding, axis=0)
    print(word_embedding.shape)
    print(word_embedding_length)

    # read tree_data
    tree_data_file = "array_tree.txt"
    scores = []
    words = []
    lchs = []
    rchs = []
    with open(tree_data_file, 'r') as f:
        for (counter, line) in enumerate(f):
            if counter == 0:
                tree_data_size = int(line)
            else:
                temp = np.asarray([int(i) for i in line.split()])
                if (counter - 1) % 5 == 1: scores.append(temp)
                elif (counter - 1) % 5 == 2: words.append(temp)
                elif (counter - 1) % 5 == 3: lchs.append(temp)
                elif (counter - 1) % 5 == 4: rchs.append(temp)
    print(len(scores))
    print(len(words))
    print(len(lchs))
    print(len(rchs))
    print(tree_data_size)

    # hyperparameters
    hidden_size = 150
    output_size = 5
    learning_rate = 0.05
    batch = 1  # using larger batch size actually hurt the performance

    # parameters
    # for leaf
    m = dy.ParameterCollection()
    # Wi = m.add_parameters((hidden_size, word_embedding_size), init='normal', std=0.01)
    # bi = m.add_parameters(hidden_size, init = 0)
    # Wo = m.add_parameters((hidden_size, word_embedding_size), init='normal', std=0.01)
    # bo = m.add_parameters(hidden_size, init = 0)
    Wu = m.add_parameters((hidden_size, word_embedding_size),
                          init='normal',
                          std=0.01)
    bu = m.add_parameters(hidden_size, init=0)
    # for non leaf
    # U0i = m.add_parameters((hidden_size, hidden_size), init='normal', std=0.01)
    # U1i = m.add_parameters((hidden_size, hidden_size), init='normal', std=0.01)
    # bbi = m.add_parameters(hidden_size, init = 0)
    # U00f = m.add_parameters((hidden_size, hidden_size), init='normal', std=0.01)
    # U01f = m.add_parameters((hidden_size, hidden_size), init='normal', std=0.01)
    # U10f = m.add_parameters((hidden_size, hidden_size), init='normal', std=0.01)
    # U11f = m.add_parameters((hidden_size, hidden_size), init='normal', std=0.01)
    # bbf = m.add_parameters(hidden_size, init = 0)
    # U0o = m.add_parameters((hidden_size, hidden_size), init='normal', std=0.01)
    # U1o = m.add_parameters((hidden_size, hidden_size), init='normal', std=0.01)
    # bbo = m.add_parameters(hidden_size, init = 0)
    U0u = m.add_parameters((hidden_size, hidden_size), init='normal', std=0.01)
    U1u = m.add_parameters((hidden_size, hidden_size), init='normal', std=0.01)
    bbu = m.add_parameters(hidden_size, init=0)
    # for softmax
    Why = m.add_parameters((output_size, hidden_size), init='normal', std=0.01)
    by = m.add_parameters(output_size, init=0)

    trainer = dy.AdagradTrainer(m, learning_rate=learning_rate, eps=1e-8)

    # create a network for the xor problem given input and output
    def tree_lstm_network(scores, words, lchs, rchs):
        def rec(index):
            if (words[index] == -1):
                # branch node
                (l_loss, l_hidden) = rec(lchs[index])
                (r_loss, r_hidden) = rec(rchs[index])
                # i_gate = dy.logistic(U0i * l_hidden + U1i * r_hidden + bbi)
                # fl_gate = dy.logistic(U00f * l_hidden + U01f * r_hidden + bbf)
                # fr_gate = dy.logistic(U10f * l_hidden + U11f * r_hidden + bbf)
                # o_gate = dy.logistic(U0o * l_hidden + U1o * r_hidden + bbo)
                hidden = dy.tanh(U0u * l_hidden + U1u * r_hidden + bbu)
                # cell = dy.cmult(i_gate, u_value) + dy.cmult(fl_gate, l_cell) + dy.cmult(fr_gate, r_cell)
                # hidden = dy.cmult(o_gate, dy.tanh(cell))
                pred1 = dy.log_softmax(Why * hidden + by)
                loss = l_loss + r_loss - pred1[int(scores[index])]
                return (loss, hidden)
            else:
                embedding_tensor = dy.inputTensor(word_embedding[words[index]])
                # i_gate = dy.logistic(Wi * embedding_tensor + bi)
                # o_gate = dy.logistic(Wo * embedding_tensor + bo)
                hidden = dy.tanh(Wu * embedding_tensor + bu)
                # cell = dy.cmult(i_gate, u_value)
                # hidden = dy.cmult(o_gate, dy.tanh(cell))
                pred1 = dy.log_softmax(Why * hidden + by)
                loss = -pred1[int(scores[index])]
                return (loss, hidden)

        return rec(0)[0]

    epocNum = 6
    loopStart = time.time()
    loss_save = []
    for epoc in range(epocNum):
        total_loss = 0
        for batch_n in range(int(tree_data_size // batch)):
            dy.renew_cg()  # new computation graph
            losses = []
            for n in range(batch):
                index = batch_n * batch + n
                losses.append(
                    tree_lstm_network(scores[index], words[index], lchs[index],
                                      rchs[index]))
                batch_loss = dy.esum(losses)
                total_loss += batch_loss.value()
                batch_loss.backward()
                trainer.update()
        loss_save.append(total_loss / tree_data_size)
        print("epoc {}, average_loss {}".format(epoc,
                                                total_loss / tree_data_size))

    loopEnd = time.time()
    print('looptime is %s ' % (loopEnd - loopStart))

    prepareTime = loopStart - startTime
    loopTime = loopEnd - loopStart
    timePerEpoch = loopTime / epocNum

    with open(filename, "w") as f:
        f.write("unit: " + "1 epoch\n")
        for loss in loss_save:
            f.write(str(loss) + "\n")
        f.write("run time: " + str(prepareTime) + " " + str(timePerEpoch) +
                "\n")
Пример #20
0
        ergm = MultiGraphErgm(tr_graphs, embs, opts.assoc_mode, reg=opts.regularize, dropout=drop,
                              model_path=opts.model,
                              path_only_init=True,
                              ergm_path=opts.ergm_model)
    else:
        dev_results = []
        # training phase
        if opts.model is not None:  # there's a pretrained association model
            ergm = MultiGraphErgm(tr_graphs, embs, opts.assoc_mode, reg=opts.regularize,
                                  dropout=drop, model_path=opts.model,
                                  path_only_init=True)
        else:
            ergm = MultiGraphErgm(tr_graphs, embs, opts.assoc_mode, reg=opts.regularize,
                                  dropout=drop)
        initial_weights = ergm.ergm_weights.as_array()
        trainer = dy.AdagradTrainer(ergm.model, opts.learning_rate)
        iteration_scores = []
        log_file_name = 'pred-train-log-{}_{}.txt'.format(start_time.date(), start_time.time())
        timeprint('starting training phase, writing to {}'.format(log_file_name))
        with open(log_file_name, 'a') as log_file:
            log_file.write('====\n')
            for ep in range(opts.epochs):
                iteration_scores.extend(macro_loops(opts, ep + 1, ergm, trainer, log_file, synsets))
                if opts.eval_dev and ep < opts.epochs - 1:
                    dev_results.append(eval(tr_graphs, te_graphs, ergm, opts, N, log_file=None, rerank_file=None))
        if opts.model_out is not None:
            # save model
            timeprint('saving trained model to {}'.format(opts.model_out))
            ergm.save(opts.model_out, initial_weights)
        print('scores:', '\t'.join([str(sc) for sc in iteration_scores[::100]]))
Пример #21
0
 def __init__(self, yaml_context, e0=0.1, eps=1e-20):
     self.optimizer = dy.AdagradTrainer(
         yaml_context.dynet_param_collection.param_col, e0, eps=eps)
def train_model(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, train_lemmas, train_feat_dicts, train_words, dev_lemmas,
                dev_feat_dicts, dev_words, alphabet_index, inverse_alphabet_index, epochs, optimization,
                results_file_path, train_aligned_pairs, dev_aligned_pairs, feat_index, feature_types,
                plot):
    print 'training...'

    np.random.seed(17)
    random.seed(17)

    if optimization == 'ADAM':
        trainer = pc.AdamTrainer(model, lam=REGULARIZATION, alpha=LEARNING_RATE, beta_1=0.9, beta_2=0.999, eps=1e-8)
    elif optimization == 'MOMENTUM':
        trainer = pc.MomentumSGDTrainer(model)
    elif optimization == 'SGD':
        trainer = pc.SimpleSGDTrainer(model)
    elif optimization == 'ADAGRAD':
        trainer = pc.AdagradTrainer(model)
    elif optimization == 'ADADELTA':
        trainer = pc.AdadeltaTrainer(model)
    else:
        trainer = pc.SimpleSGDTrainer(model)

    total_loss = 0
    best_avg_dev_loss = 999
    best_dev_accuracy = -1
    best_train_accuracy = -1
    patience = 0
    train_len = len(train_words)
    sanity_set_size = 100
    epochs_x = []
    train_loss_y = []
    dev_loss_y = []
    train_accuracy_y = []
    dev_accuracy_y = []
    e = -1

    # progress bar init
    widgets = [progressbar.Bar('>'), ' ', progressbar.ETA()]
    train_progress_bar = progressbar.ProgressBar(widgets=widgets, maxval=epochs).start()
    avg_loss = -1

    for e in xrange(epochs):

        # randomize the training set
        indices = range(train_len)
        random.shuffle(indices)
        train_set = zip(train_lemmas, train_feat_dicts, train_words, train_aligned_pairs)
        train_set = [train_set[i] for i in indices]

        # compute loss for each example and update
        for i, example in enumerate(train_set):
            lemma, feats, word, alignment = example
            loss = one_word_loss(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feats, word,
                                 alphabet_index, alignment, feat_index, feature_types)
            loss_value = loss.value()
            total_loss += loss_value
            loss.backward()
            trainer.update()
            if i > 0:
                avg_loss = total_loss / float(i + e * train_len)
            else:
                avg_loss = total_loss

        if EARLY_STOPPING:

            # get train accuracy
            print 'evaluating on train...'
            train_predictions = predict_sequences(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, alphabet_index,
                                                  inverse_alphabet_index, train_lemmas[:sanity_set_size],
                                                  train_feat_dicts[:sanity_set_size],
                                                  feat_index,
                                                  feature_types)

            train_accuracy = evaluate_model(train_predictions, train_lemmas[:sanity_set_size],
                                            train_feat_dicts[:sanity_set_size],
                                            train_words[:sanity_set_size],
                                            feature_types, print_results=False)[1]

            if train_accuracy > best_train_accuracy:
                best_train_accuracy = train_accuracy

            dev_accuracy = 0
            avg_dev_loss = 0

            if len(dev_lemmas) > 0:

                # get dev accuracy
                dev_predictions = predict_sequences(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, alphabet_index,
                                                    inverse_alphabet_index, dev_lemmas, dev_feat_dicts, feat_index,
                                                    feature_types)
                print 'evaluating on dev...'
                # get dev accuracy
                dev_accuracy = evaluate_model(dev_predictions, dev_lemmas, dev_feat_dicts, dev_words, feature_types,
                                              print_results=True)[1]

                if dev_accuracy > best_dev_accuracy:
                    best_dev_accuracy = dev_accuracy

                    # save best model to disk
                    save_pycnn_model(model, results_file_path)
                    print 'saved new best model'
                    patience = 0
                else:
                    patience += 1

                # found "perfect" model
                if dev_accuracy == 1:
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e

                # get dev loss
                total_dev_loss = 0
                for i in xrange(len(dev_lemmas)):
                    total_dev_loss += one_word_loss(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, dev_lemmas[i],
                                                    dev_feat_dicts[i], dev_words[i], alphabet_index,
                                                    dev_aligned_pairs[i], feat_index, feature_types).value()

                avg_dev_loss = total_dev_loss / float(len(dev_lemmas))
                if avg_dev_loss < best_avg_dev_loss:
                    best_avg_dev_loss = avg_dev_loss

                print 'epoch: {0} train loss: {1:.4f} dev loss: {2:.4f} dev accuracy: {3:.4f} train accuracy = {4:.4f} \
 best dev accuracy {5:.4f} best train accuracy: {6:.4f} patience = {7}'.format(e, avg_loss, avg_dev_loss, dev_accuracy,
                                                                               train_accuracy, best_dev_accuracy,
                                                                               best_train_accuracy, patience)

                log_to_file(results_file_path + '_log.txt', e, avg_loss, train_accuracy, dev_accuracy)

                if patience == MAX_PATIENCE:
                    print 'out of patience after {0} epochs'.format(str(e))
                    # TODO: would like to return best model but pycnn has a bug with save and load. Maybe copy via code?
                    # return best_model[0]
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e
            else:

                # if no dev set is present, optimize on train set
                print 'no dev set for early stopping, running all epochs until perfectly fitting or patience was \
                reached on the train set'

                if train_accuracy > best_train_accuracy:
                    best_train_accuracy = train_accuracy

                    # save best model to disk
                    save_pycnn_model(model, results_file_path)
                    print 'saved new best model'
                    patience = 0
                else:
                    patience += 1

                print 'epoch: {0} train loss: {1:.4f} train accuracy = {2:.4f} best train accuracy: {3:.4f} \
                patience = {4}'.format(e, avg_loss, train_accuracy, best_train_accuracy, patience)

                # found "perfect" model on train set or patience has reached
                if train_accuracy == 1 or patience == MAX_PATIENCE:
                    train_progress_bar.finish()
                    if plot:
                        plt.cla()
                    return model, e

            # update lists for plotting
            train_accuracy_y.append(train_accuracy)
            epochs_x.append(e)
            train_loss_y.append(avg_loss)
            dev_loss_y.append(avg_dev_loss)
            dev_accuracy_y.append(dev_accuracy)

        # finished epoch
        train_progress_bar.update(e)
        if plot:
            with plt.style.context('fivethirtyeight'):
                p1, = plt.plot(epochs_x, dev_loss_y, label='dev loss')
                p2, = plt.plot(epochs_x, train_loss_y, label='train loss')
                p3, = plt.plot(epochs_x, dev_accuracy_y, label='dev acc.')
                p4, = plt.plot(epochs_x, train_accuracy_y, label='train acc.')
                plt.legend(loc='upper left', handles=[p1, p2, p3, p4])
            plt.savefig(results_file_path + '.png')
    train_progress_bar.finish()
    if plot:
        plt.cla()
    print 'finished training. average loss: ' + str(avg_loss)
    return model, e
Пример #23
0
        meta.w2i = {}
        for w in wvm.vocab:
            meta.w2i[w] = wvm.vocab[w].index

    if args.save_model:
        pickle.dump(meta, open('%s.meta' % args.save_model, 'wb'))
    if args.load_model:
        ontoparser = SubsumptionLearning(model=args.load_model)
    else:
        ontoparser = SubsumptionLearning(meta=meta)
        trainers = {
            'momsgd': dy.MomentumSGDTrainer(ontoparser.model, edecay=0.25),
            'adam': dy.AdamTrainer(ontoparser.model, edecay=0.25),
            'simsgd': dy.SimpleSGDTrainer(ontoparser.model, edecay=0.25),
            'adagrad': dy.AdagradTrainer(ontoparser.model, edecay=0.25),
            'adadelta': dy.AdadeltaTrainer(ontoparser.model, edecay=0.25)
        }
        trainer = trainers[args.trainer]
        nntraining(train_sents)

    if args.dev:
        accuracy = Test(inputGenDev)
        sys.stdout.write("Accuracy: {}%\n".format(accuracy))

    if args.isDaemon and args.daemonPort:
        sys.stderr.write('Leastening at port %d\n' % args.daemonPort)
        host = "0.0.0.0"  #Listen on all interfaces
        port = args.daemonPort  #Port number

        tcpsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)