Пример #1
0
def train_eval(x_train, x_test, is_peeky):
    if is_peeky:
        model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size)
    else:
        model = Seq2seq(vocab_size, wordvec_size, hidden_size)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    acc_list = []
    for epoch in range(max_epoch):
        trainer.fit(x_train,
                    t_train,
                    max_epoch=1,
                    batch_size=batch_size,
                    max_grad=max_grad)
        correct_num = 0
        for i in range(len(x_test)):
            question, correct = x_test[[i]], t_test[[i]]
            verbose = i < 10
            correct_num += eval_seq2seq(model, question, correct, id_to_char,
                                        verbose)
        acc = float(correct_num) / len(x_test)
        acc_list.append(acc)
        print('val acc %.3f%%' % (acc * 100))
    return acc_list
def experiment_fn(run_config, params):
    data = Data(FLAGS)
    data.initialize_word_vectors()

    model = Seq2seq(data.vocab_size, FLAGS, data.embeddings_mat)
    estimator = tf.estimator.Estimator(
        model_fn=model.make_graph,
        #                                        model_dir=FLAGS.model_dir,
        config=run_config,
        params=FLAGS)

    train_input_fn, train_feed_fn = data.make_input_fn('train')
    eval_input_fn, eval_feed_fn = data.make_input_fn('test')

    print_vars = [
        'source', 'predict'
        # 'decoder_output',
        # 'actual'
    ]
    print_inputs = tf.train.LoggingTensorHook(print_vars,
                                              every_n_iter=FLAGS.print_every,
                                              formatter=data.get_formatter(
                                                  ['source', 'predict']))

    experiment = tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        train_steps=FLAGS.iterations,
        min_eval_frequency=FLAGS.print_every,
        train_monitors=[tf.train.FeedFnHook(train_feed_fn), print_inputs],
        eval_hooks=[tf.train.FeedFnHook(eval_feed_fn)],
        eval_steps=10)
    return experiment
Пример #3
0
def main() -> None:
    (x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')
    char_to_id, id_to_char = sequence.get_vocab()

    vocab_size = len(char_to_id)
    wordvec_size = 16
    hidden_size = 128
    batch_size = 128
    max_epoch = 25
    max_grad = 5.0

    model = Seq2seq(vocab_size, wordvec_size, hidden_size)
    optimizer = Adam()
    trainer = Trainer(model, optimizer)

    acc_list = []
    for epoch in range(1, max_epoch + 1):
        trainer.fit(x_train,
                    t_train,
                    max_epoch=1,
                    batch_size=batch_size,
                    max_grad=max_grad)

        correct_num = 0
        for i in range(len(x_test)):
            question, correct = x_test[[i]], t_test[[i]]
            verbose = i < 10
            correct_num += eval_seq2seq(model, question, correct, id_to_char,
                                        verbose)

        acc = float(correct_num) / len(x_test)
        acc_list.append(acc)
        print(f'val acc {acc*100}%')
    print('DONE')
Пример #4
0
 def __init__(self, trainable=True):
     self.trainable = trainable
     self.seq2seq = Seq2seq(trainable=False)
     self.seq2seq.build()
     init_op = tf.global_variables_initializer()
     self.sess = tf.Session()
     self.sess.run(init_op)
     self.seq2seq.init(self.sess)
Пример #5
0
 def __init__(self):
     self.seq2seq = Seq2seq()
     self.seq2seq.build()
     init_op = tf.global_variables_initializer()
     self.sess = tf.Session()
     self.sess.run(init_op)
     self.reverse_vocab = preprocessor.load_reverse_vocab(
         Config.vocab_file_path)
     self.restore_variables(self.sess)
Пример #6
0
    def evaluate(model, data, k=1):
        beam_search = Seq2seq(model.encoder, TopKDecoder(model.decoder, k))
        input_vocab = data.fields[GlobalNames.src_field_name].vocab
        output_vocab = data.fields[GlobalNames.tgt_field_name].vocab
        pred_machine = Predictor(beam_search, input_vocab, output_vocab)

        result = [
            " ".join(pred_machine.predict(item.src)) for item in data.examples
        ]
        return result
Пример #7
0
def main(args, load_exclude_set, restoreCallback):
    logging.basicConfig(\
     filename=0,\
     level=logging.DEBUG,\
     format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',\
     datefmt='%H:%M:%S')

    if args.debug:
        debug()
    logging.info(json.dumps(args, indent=2))

    cuda_init(0, args.cuda)

    volatile = Storage()
    volatile.load_exclude_set = load_exclude_set
    volatile.restoreCallback = restoreCallback

    data_class = SingleTurnDialog.load_class(args.dataset)
    data_arg = Storage()
    data_arg.file_id = args.datapath
    wordvec_class = WordVector.load_class(args.wvclass)
    if wordvec_class is None:
        wordvec_class = Glove

    def load_dataset(data_arg, wvpath, embedding_size):
        wv = wordvec_class(wvpath)
        dm = data_class(**data_arg)
        return dm, wv.load(embedding_size, dm.vocab_list)

    if args.cache:
        dm, volatile.wordvec = try_cache(
            load_dataset, (data_arg, args.wvpath, args.embedding_size),
            args.cache_dir, data_class.__name__ + "_" + wordvec_class.__name__)
    else:
        dm, volatile.wordvec = load_dataset(data_arg, args.wvpath,
                                            args.embedding_size)

    volatile.dm = dm

    param = Storage()
    param.args = args
    param.volatile = volatile

    model = Seq2seq(param)
    if args.mode == "train":
        model.train_process()
    elif args.mode == "test":
        test_res = model.test_process()

        for key, val in test_res.items():
            if isinstance(val, bytes):
                test_res[key] = str(val)
        json.dump(test_res, open("./result.json", "w"))
    else:
        raise ValueError("Unknown mode")
Пример #8
0
def main(args, load_exclude_set, restoreCallback):
    logging.basicConfig(\
     filename=0,\
     level=logging.DEBUG,\
     format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',\
     datefmt='%H:%M:%S')

    if args.debug:
        debug()
    logging.info(json.dumps(args, indent=2))

    cuda_init(0, args.cuda)

    volatile = Storage()
    volatile.load_exclude_set = load_exclude_set
    volatile.restoreCallback = restoreCallback

    data_class = SingleTurnDialog.load_class(args.dataset)
    data_arg = Storage()
    data_arg.file_id = args.datapath + "#OpenSubtitles"
    data_arg.tokenizer = PretrainedTokenizer(
        BertTokenizer.from_pretrained(args.bert_vocab))
    data_arg.pretrained = "bert"
    wordvec_class = WordVector.load_class(args.wvclass)
    if wordvec_class is None:
        wordvec_class = Glove

    def load_dataset(data_arg, wvpath, embedding_size):
        wv = wordvec_class(wvpath)
        dm = data_class(**data_arg)
        return dm, wv.load_matrix(embedding_size, dm.frequent_vocab_list)

    if args.cache:
        dm, volatile.wordvec = try_cache(
            load_dataset, (data_arg, args.wvpath, args.embedding_size),
            args.cache_dir, data_class.__name__ + "_" + wordvec_class.__name__)
    else:
        dm, volatile.wordvec = load_dataset(data_arg, args.wvpath,
                                            args.embedding_size)

    volatile.dm = dm

    param = Storage()
    param.args = args
    param.volatile = volatile

    model = Seq2seq(param)
    if args.mode == "train":
        model.train_process()
    elif args.mode == "test":
        model.test_process()
    else:
        raise ValueError("Unknown mode")
Пример #9
0
def train(train_loader, model: seq2seq.Seq2seq, criterion, optimizer, epoch, teacher_forcing_ratio):
    """Run one train epoch"""
    losses = AverageMeter()

    # Switch to train mode
    model.train()

    for i, batch in enumerate(train_loader):
        # data: seq_len, N
        # data_mask: seq_len, N
        # target: seq_len, N
        data, data_mask, target = batch
        target = target.cuda(non_blocking=True)
        data_mask = data_mask.cuda(non_blocking=True)
        data = data.cuda()

        batch_size = data.size(1)
        target_len = target.size(0)
        
        # Forward
        # Encoder
        source_hs, hidden = model.encoder(data)
        # Decoder
        ctx = None
        hidden = model.transformHidden(hidden)
        
        outputs = []
        use_teacher_forcing = random.random() < teacher_forcing_ratio
        x = target[0]
        for j in range(1, target_len):
            output, hidden, ctx = model.decoder(x, hidden, ctx, source_hs, data_mask)
            outputs.append(output)

            with torch.no_grad():
                if use_teacher_forcing:
                    x = target[j]
                else:
                    topi = torch.topk(output, 1, dim=1)[1] # N, 1
                    x = topi.squeeze() # N

        outputs = torch.stack(outputs) # seq_len, N, n_tokens
        loss = criterion(outputs, target[1:], batch_size)

        # Backward
        optimizer.zero_grad()
        loss.backward()

        # Update
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        optimizer.step()

        # Measure loss
        losses.update(loss.item(), batch_size)

        # Print Training Information
        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                      epoch, i, len(train_loader), loss=losses))
Пример #10
0
def load_model(model_path, device):

    state = torch.load(model_path, map_location=device)

    params = state['parameter']
    if params['name'] == 'Transformer':
        params.pop('name')
        model = Transformer(**params)
    else:
        model = Seq2seq(**params)
    model.to(device)
    model.load_state_dict(state['state_dict'])

    return model, state['src_lang'], state['tgt_lang'], state[
        'src_vocab'], state['tgt_vocab']
Пример #11
0
def main():
    seq2seq = Seq2seq(lr=0.3, init_range=0.3)

    for i in range(1000):
        cost = seq2seq.train([2, 1], [2])
        cost += seq2seq.train([1], [1])
        cost += seq2seq.train([3, 1], [3])

        if i % 100 == 0:
            print 'Epoch:', i
            print 'training cost:', cost / 3

            print[2, 1], '->', seq2seq.predict([2, 1])
            print[1], '->', seq2seq.predict([1])
            print[3, 1], '->', seq2seq.predict([3, 1])
            print
def main(args):
    # tf.logging._logger.setLevel(logging.INFO)
    tf.logging.set_verbosity(logging.INFO)

    data = Data(FLAGS)
    model = Seq2seq(data.vocab_size, FLAGS)

    input_fn, feed_fn = data.make_input_fn()
    print_inputs = tf.train.LoggingTensorHook(
        ['source', 'target', 'predict'],
        every_n_iter=FLAGS.print_every,
        formatter=data.get_formatter(['source', 'target', 'predict']))

    estimator = tf.estimator.Estimator(
        model_fn=model.make_graph, model_dir=FLAGS.model_dir)  #, params=FLAGS)
    estimator.train(input_fn=input_fn,
                    hooks=[tf.train.FeedFnHook(feed_fn), print_inputs],
                    steps=FLAGS.iterations)
Пример #13
0
    def __init__(self):
        self.data = Data(FLAGS)
        model = Seq2seq(self.data.vocab_size, FLAGS)
        estimator = tf.estimator.Estimator(model_fn=model.make_graph,
                                           model_dir=FLAGS.model_dir)

        def input_fn():
            inp = tf.placeholder(tf.int64, shape=[None, None], name='input')
            output = tf.placeholder(tf.int64,
                                    shape=[None, None],
                                    name='output')
            tf.identity(inp[0], 'source')
            tf.identity(output[0], 'target')
            dict = {'input': inp, 'output': output}
            return tf.estimator.export.ServingInputReceiver(dict, dict)

        self.predictor = tf.contrib.predictor.from_estimator(
            estimator, input_fn)
Пример #14
0
def main():
    seq2seq = Seq2seq(lr=0.3, init_range=0.3)

    for i in range(1000):
        import random

        cost = 0
        for t in range(10):
            #a=random.randrange(9)

            #b=random.randrange(9)
            cost = seq2seq.train([1, 1], [1])
            cost += seq2seq.train([8, 1], [1])
            cost += seq2seq.train([7, 1], [1])
            cost += seq2seq.train([9, 1], [1])
            cost += seq2seq.train([4, 1], [1])
            cost += seq2seq.train([3, 1], [1])
            cost += seq2seq.train([1, 1], [1])
            cost += seq2seq.train([4, 1], [1])
            cost += seq2seq.train([0, 2], [2])
            cost += seq2seq.train([3, 2], [2])
            cost += seq2seq.train([5, 2], [2])
            cost += seq2seq.train([6, 2], [2])
            cost += seq2seq.train([1, 2], [2])
            cost += seq2seq.train([9, 2], [2])
            cost += seq2seq.train([8, 2], [2])
            cost += seq2seq.train([7, 2], [2])
            cost += seq2seq.train([6, 2], [2])
            cost += seq2seq.train([5, 2], [2])
            cost += seq2seq.train([4, 2], [2])
            cost += seq2seq.train([3, 2], [2])
            cost += seq2seq.train([2, 2], [2])
            cost += seq2seq.train([1, 2], [2])

        print('training cost:', cost / 22)

        if i % 100 == 0:
            print('Epoch:', i)
            print('training cost:', cost / 3)
            a = random.randrange(9)
            b = random.randrange(9)

            print([5, 2], '->', seq2seq.predict([5, 2]))
Пример #15
0
def main(args):
    logging.basicConfig(\
     filename=0,\
     level=logging.DEBUG,\
     format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',\
     datefmt='%H:%M:%S')

    if args.debug:
        debug()
    logging.info(json.dumps(args, indent=2))

    cuda_init(0, args.cuda)

    volatile = Storage()
    data_class = SingleTurnDialog.load_class(args.dataset)
    wordvec_class = WordVector.load_class(args.wvclass)
    if wordvec_class is None:
        wordvec_class = Glove
    if args.cache:
        dm = try_cache(data_class, (args.datapath, ), args.cache_dir)
        volatile.wordvec = try_cache(\
         lambda wv, ez, vl: wordvec_class(wv).load(ez, vl), \
         (args.wvpath, args.embedding_size, dm.vocab_list),
         args.cache_dir, wordvec_class.__name__)
    else:
        dm = data_class(args.datapath)
        wv = wordvec_class(args.wvpath)
        volatile.wordvec = wv.load(args.embedding_size, dm.vocab_list)

    volatile.dm = dm

    param = Storage()
    param.args = args
    param.volatile = volatile

    model = Seq2seq(param)
    if args.mode == "train":
        model.train_process()
    elif args.mode == "test":
        model.test_process()
    else:
        raise ValueError("Unknown mode")
Пример #16
0
    def __init__(self, checkpoint='checkpoint', directory='coco'):
        self.data = Data(directory + '/train_source.txt',
                         directory + '/train_target.txt',
                         directory + '/train_vocab.txt')
        model = Seq2seq(self.data.vocab_size)
        estimator = tf.estimator.Estimator(model_fn=model.make_graph,
                                           model_dir=checkpoint)

        def input_fn():
            inp = tf.placeholder(tf.int64, shape=[None, None], name='input')
            output = tf.placeholder(tf.int64,
                                    shape=[None, None],
                                    name='output')
            tf.identity(inp[0], 'source')
            tf.identity(output[0], 'target')
            dict = {'input': inp, 'output': output}
            return tf.estimator.export.ServingInputReceiver(dict, dict)

        self.predictor = tf.contrib.predictor.from_estimator(
            estimator, input_fn)
Пример #17
0
def build_model(encoder_vocab, decoder_vocab):
    model = Seq2seq(encoder_vocab_size=encoder_vocab.get_vocab_size(),
                    encoder_embedding_size=opt.encoder_embedding_size,
                    encoder_hidden_size=opt.encoder_hidden_size,
                    encoder_num_layers=opt.encoder_num_layers,
                    encoder_bidirectional=opt.encoder_bidirectional,
                    decoder_vocab_size=decoder_vocab.get_vocab_size(),
                    decoder_embedding_size=opt.decoder_embedding_size,
                    decoder_hidden_size=opt.decoder_hidden_size,
                    decoder_num_layers=opt.decoder_num_layers,
                    decoder_attn_type=opt.decoder_attn_type,
                    dropout_ratio=opt.dropout_ratio,
                    padding_idx=PAD_id,
                    tied=opt.tied,
                    device=device)

    print(model)

    model.to(device=device)
    return model
Пример #18
0
def vectorize_nn(word_index,
                 embedding_matrix,
                 sentences,
                 max_num_vectors=200,
                 num_features=200,
                 batch_size=32,
                 latent_dim=200,
                 timesteps=200,
                 epochs=5):
    print("Substituting words in descriptions by their vector representetion")
    vec_sentences = []
    sec_count = 0
    for i, sentence in enumerate(sentences):
        # Print run information
        sec_count += 1
        if sec_count % 5000 == 0:
            print('{} descriptions have been preprocessed.'.format(sec_count))

        vector = seqWords2seqVec(sentence, word_index, embedding_matrix,
                                 max_num_vectors, num_features)

        # Store vector
        vector = np.asarray(vector)
        vec_sentences.append(vector)

    print("All words have been subtituted by their vector representation")

    vec_sentences = np.asarray(vec_sentences)
    vec_sentences = np.reshape(vec_sentences,
                               (len(sentences), num_vectors, num_features))
    # Create and train Neural net
    s2s = Seq2seq(num_vectors, latent_dim, timesteps, batch_size, word_index,
                  embedding_matrix)
    print("Training autoencoder...")
    s2s.fit(vec_sentences, epochs)
    print("Getting vector representation of each description...")
    predictions = s2s.predict(vec_sentences)
    print("Saving neural network...")
    s2s.encoder.save('model/encoder.h5')
    return predictions
Пример #19
0
def main():
    seq2seq = Seq2seq()

    last_seq = None
    cost = 0

    for i in range(100000):

        X = [randint(1, 2) for _ in range(randint(1, 10))]
        Y = [x for x in X if x == 1]
        cost += seq2seq.train(X, Y)

        if i % 1000 == 0:
            print i, '\t', cost / 1000
            cost = 0

            X = [randint(1, 2) for _ in range(randint(1, 10))]
            Y = seq2seq.predict(X)

            print X, '->', Y

            seq2seq.lr /= 2
Пример #20
0
        hidden_size = 128
        bidirectional = True
        encoder = EncoderRNN(len(src.vocab),
                             max_len,
                             hidden_size,
                             bidirectional=bidirectional,
                             variable_lengths=True)
        decoder = DecoderRNN(len(tgt.vocab),
                             max_len,
                             hidden_size * 2 if bidirectional else hidden_size,
                             dropout_p=0.2,
                             use_attention=True,
                             bidirectional=bidirectional,
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

            # Optimizer and learning rate scheduler can be customized by
            # explicitly constructing the objects and pass to the trainer.
            #
            optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
                                  max_grad_norm=5)
            scheduler = StepLR(optimizer.optimizer, 1)
            optimizer.set_scheduler(scheduler)

    # train
Пример #21
0
 def __init__(self, config):
     super(Model, self).__init__()
     self.config = config
     self.embedding = Embedding(config)
     self.seq2seq = Seq2seq(config)
Пример #22
0
# Reverse input? =================================================
is_reverse = False  # True
if is_reverse:
    x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]
# ================================================================

# ハイパーパラメータの設定
vocab_size = len(char_to_id)
wordvec_size = 16
hideen_size = 128
batch_size = 128
max_epoch = 25
max_grad = 5.0

# Normal or Peeky? ==============================================
model = Seq2seq(vocab_size, wordvec_size, hideen_size)
# model = PeekySeq2seq(vocab_size, wordvec_size, hideen_size)
# ================================================================
optimizer = Adam()
trainer = Trainer(model, optimizer)

acc_list = []
for epoch in range(max_epoch):
    trainer.fit(x_train,
                t_train,
                max_epoch=1,
                batch_size=batch_size,
                max_grad=max_grad)

    correct_num = 0
    for i in range(len(x_test)):
Пример #23
0
def main(args):
    logging.basicConfig(
        filename=0,
        level=logging.DEBUG,
        format='%(asctime)s %(filename)s[line:%(lineno)d] %(message)s',
        datefmt='%H:%M:%S')

    if args.debug:
        debug()
    logging.info(json.dumps(args, indent=2))

    cuda_init(args.cuda_num, args.cuda)

    volatile = Storage()
    volatile.load_exclude_set = args.load_exclude_set
    volatile.restoreCallback = args.restoreCallback

    if args.dataset == 'WizardOfWiki':
        data_class = WizardOfWiki
    elif args.dataset == 'HollE':
        data_class = HollE
    else:
        raise ValueError
    wordvec_class = WordVector.load_class(args.wvclass)
    if wordvec_class is None:
        wordvec_class = Glove

    if not os.path.exists(args.cache_dir):
        os.mkdir(args.cache_dir)
    args.cache_dir = os.path.join(args.cache_dir, args.dataset)

    if not os.path.exists(args.out_dir):
        os.mkdir(args.out_dir)
    args.out_dir = os.path.join(args.out_dir, args.dataset)

    if not os.path.exists(args.model_dir):
        os.mkdir(args.model_dir)
    if args.dataset not in args.model_dir:
        args.model_dir = os.path.join(args.model_dir, args.dataset)

    if args.cache:
        dm = try_cache(data_class, (args.datapath, ), args.cache_dir)
        volatile.wordvec = try_cache(
            lambda wv, ez, vl: wordvec_class(wv).load_matrix(ez, vl),
            (args.wvpath, args.embedding_size, dm.vocab_list), args.cache_dir,
            wordvec_class.__name__)
    else:
        dm = data_class(args.datapath)
        wv = wordvec_class(args.wvpath)
        volatile.wordvec = wv.load_matrix(args.embedding_size, dm.vocab_list)

    volatile.dm = dm

    param = Storage()
    param.args = args
    param.volatile = volatile

    model = Seq2seq(param)
    if args.mode == "train":
        model.train_process()
    elif args.mode == "test":
        model.test_process()
    elif args.mode == 'dev':
        model.test_dev()
    else:
        raise ValueError("Unknown mode")
Пример #24
0
    word2idx.update({'start_id': start_id})
    word2idx.update({'end_id': end_id})
    idx2word = idx2word + ['start_id', 'end_id']

    src_vocab_size = tgt_vocab_size = src_vocab_size + 2

    num_epochs = 10
    vocabulary_size = src_vocab_size

    decoder_seq_length = 25
    model_ = Seq2seq(
        decoder_seq_length=decoder_seq_length,
        cell_enc=tf.keras.layers.GRUCell,
        cell_dec=tf.keras.layers.GRUCell,
        n_layer=3,
        n_units=1024,
        embedding_layer=tl.layers.Embedding(vocabulary_size=vocabulary_size,
                                            embedding_size=emb_dim),
    )

    # Uncomment below statements if you have already saved the model

    load_weights = tl.files.load_npz(name='WinterMute_rms1024_cornell.npz')
    tl.files.assign_weights(load_weights, model_)

    optimizer = tf.optimizers.Adam(learning_rate=0.001)
    #optimizer = tf.optimizers.RMSprop(learning_rate=0.001)
    model_.train()

    for epoch in range(num_epochs):
Пример #25
0
        encoder = EncoderRNN(len(src.vocab),
                             max_len,
                             hidden_size,
                             bidirectional=bidirectional,
                             rnn_cell='lstm',
                             variable_lengths=True)
        decoder = DecoderRNN(len(tgt.vocab),
                             max_len,
                             hidden_size * 2,
                             dropout_p=0.2,
                             use_attention=True,
                             bidirectional=bidirectional,
                             rnn_cell='lstm',
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=32,
                          checkpoint_every=50,
                          print_every=10,
                          expt_dir=opt.expt_dir)

    seq2seq = t.train(seq2seq,
                      train,
Пример #26
0
def main():

    args = args_set('big')

    # Create save dir
    create_dirs(args.save_dir)

    # Check CUDA
    if torch.cuda.is_available():
        args.cuda = True
    args.device = torch.device("cuda" if args.cuda else "cpu")
    print("Using CUDA: {}".format(args.cuda))

    # Set seeds
    set_seeds(seed=1234, cuda=args.cuda)

    # load state
    model_spatial = SpatialModel(num_input_channels=5,
                                 out_num=1053,
                                 dropout_p=args.dropout_p)

    model_time = Seq2seq(num_features=1053,
                         hidden_size=512,
                         input_seq_len=args.input_seq_len,
                         pred_seq_len=args.pred_seq_len,
                         batch_size=1)
    # model_time = Seq2seq_attn(num_features=1053,
    #                           input_seq_len=args.input_seq_len,
    #                           pred_seq_len=args.pred_seq_len,
    #                           batch_size=1,
    #                           dropout=args.dropout_p)
    # model_time = Seq2seq_mlp(num_features=1053,
    #                          input_seq_len=args.input_seq_len,
    #                          pred_seq_len=args.pred_seq_len,
    #                          batch_size=1, device=args.device)

    resume = os.path.join(args.save_dir, 'check_point_{}'.format(40))
    print('Resuming model check point from {}\n'.format(40))
    check_point = torch.load(resume)
    model_spatial.load_state_dict(check_point['model_spatial'])
    model_spatial.to(args.device)
    model_time.load_state_dict(check_point['model_time'])
    model_time.to(args.device)

    # data = DataPrepare(save_dir=args.save_dir, data_folder=args.data_folder,
    #                    train_size=args.train_size,
    #                    val_size=args.val_size,
    #                    test_size=args.test_size,
    #                    input_seq_len=args.input_seq_len,
    #                    pred_seq_len=args.pred_seq_len, shuffle=True)
    # data.create_data()

    test_exps = np.load('exp_list.npy')
    scales = np.load('scales.npy')

    tester = Tester(test_exps=test_exps,
                    data_folder=args.data_folder,
                    scales=scales,
                    input_seq_len=args.input_seq_len,
                    pred_seq_len=args.pred_seq_len,
                    model_spatial=model_spatial,
                    model_time=model_time,
                    extract_num=4,
                    save_dir=args.save_dir,
                    save_sample_path=args.save_sample_path,
                    device='cuda')
    tester.run_test_loop()
Пример #27
0
                           embedding_size=512,
                           num_layers=1,
                           dropout=0,
                           is_training=True)
conv_decoder = ConvDecoder(len(word_to_index),
                           max_target_length + 2,
                           hidden_size=128,
                           embedding_size=512,
                           num_layers=1,
                           dropout=0,
                           is_training=True)

examples = np.array(examples)
examples_target = np.array(examples_target)

seq2seq = Seq2seq(conv_encoder, conv_decoder, len(word_to_index))

seq_output = seq2seq(examples, examples_target)
seq_output = seq_output.data.numpy()
sentences = [index_to_word_sentence(seq) for seq in seq_output]
print(sentences)

while True:
    new_text = input('type in text to predict:')
    new_text_token = np.array(
        [[word_to_index[token] for token in new_text.lower().split()]])
    new_text_token = np.concatenate([new_text_token, [[1]]], axis=1)

    outputs = seq2seq(new_text_token, is_training=False)
    outputs = outputs.data.numpy()
    sentences = [index_to_word_sentence(seq) for seq in outputs]
Пример #28
0
mode = 'train'
rnn_size = 1024
num_layers = 1
max_encoder_steps = 30
max_decoder_steps = 30
embedding_size = 256

data_processor = DataProcessor(mode)
idx2word_dict = data_processor.get_dictionary()
vocab_size = len(idx2word_dict)

model = Seq2seq(rnn_size=rnn_size,
                num_layers=num_layers,
                batch_size=batch_size,
                vocab_size=vocab_size,
                mode=mode,
                max_encoder_steps=max_encoder_steps,
                max_decoder_steps=max_decoder_steps,
                embedding_size=embedding_size)


# TODO: sampling probability for each epoch
def func(x):
    return 2 - 2 / (1 + np.exp(0.3 * (x - 2 * epochs)))


sampling_prob = func(np.arange(epochs))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
Пример #29
0
def main():
    # Arguments
    args = args_set('big')

    # Create save dir
    create_dirs(args.save_dir)

    # Check CUDA
    if torch.cuda.is_available():
        args.cuda = True
    args.device = torch.device("cuda" if args.cuda else "cpu")
    print("Using CUDA: {}".format(args.cuda))

    # Set seeds
    set_seeds(seed=1234, cuda=args.cuda)

    dataset = SpatialTimeDataset(args.save_sample_path)

    # create model
    model_spatial = SpatialModel(num_input_channels=dataset[0][0].shape[1],
                                 out_num=1053,
                                 dropout_p=args.dropout_p)

    # model_time = Seq2seq_mlp(num_features=1053,
    #                          input_seq_len=args.input_seq_len,
    #                          pred_seq_len=args.pred_seq_len,
    #                          batch_size=args.batch_size, device=args.device)
    model_time = Seq2seq(num_features=1053,
                         hidden_size=512,
                         input_seq_len=args.input_seq_len,
                         pred_seq_len=args.pred_seq_len,
                         batch_size=args.batch_size)
    # model_time = Seq2seq_attn(num_features=data.targets_time['train'].shape[2],
    #                           input_seq_len=args.input_seq_len,
    #                           pred_seq_len=args.pred_seq_len,
    #                           batch_size=args.batch_size,
    #                           dropout=args.dropout_p)

    optimizer = optim.Adam([{
        'params': model_spatial.parameters()
    }, {
        'params': model_time.parameters()
    }],
                           lr=args.learning_rate,
                           weight_decay=1e-4)
    # scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
    #                                            milestones=[12, 25, 37],
    #                                            gamma=0.1,
    #                                            last_epoch = start_epoch-1)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                     mode='min',
                                                     factor=0.5,
                                                     patience=1)

    start_epoch = args.resume
    train_state = args_train_state(
        early_stopping_criteria=args.early_stopping_criteria,
        learning_rate=args.learning_rate)

    if args.resume:
        resume = os.path.join(args.save_dir,
                              'check_point_{}'.format(args.resume))
        print('Resuming model check point from {}\n'.format(resume))
        check_point = torch.load(resume)
        start_epoch = check_point['epoch']

        model_spatial.load_state_dict(check_point['model_spatial'])
        model_spatial.to(args.device)

        model_time.load_state_dict(check_point['model_time'])
        model_time.to(args.device)

        optimizer.load_state_dict(check_point['optimizer'])

        train_state = check_point['train_state']

        scheduler.optimizer = optimizer
        scheduler.last_epoch = start_epoch - 1
        scheduler.cooldown_counter = check_point['lr']['cooldown_counter']
        scheduler.best = check_point['lr']['best']
        scheduler.num_bad_epochs = check_point['lr']['num_bad_epochs']
        scheduler.mode_worse = check_point['lr']['mode_worse']
        scheduler.is_better = check_point['lr']['is_better']

    # define train class
    trainer = Trainer(dataset=dataset,
                      model_spatial=model_spatial,
                      model_time=model_time,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      device=args.device,
                      teacher_forcing_ratio=args.teacher_forcing_ratio,
                      train_state=train_state)

    # train & validation
    print('start train29 training...')
    for epoch_index in range(start_epoch, args.num_epochs):
        epoch_start = time.time()

        trainer.train_state['epoch_index'] = epoch_index + 1

        dataset.set_split('train')
        batch_generator_train = dataset.generate_batches(
            batch_size=args.batch_size,
            collate_fn=collate_fn,
            shuffle=args.shuffle,
            device=args.device)
        trainer.run_train_loop(batch_generator_train,
                               args.alpha,
                               device=args.device)

        epoch_end = time.time()

        print('\nEntire epoch train time cost: {:.2f} min'.format(
            (epoch_end - epoch_start) / 60))

        dataset.set_split('val')
        batch_generator_val = dataset.generate_batches(
            batch_size=args.batch_size,
            collate_fn=collate_fn,
            shuffle=False,
            device=args.device)
        trainer.run_val_loop(batch_generator_val, device=args.device)

        # check point
        save_name = os.path.join(
            args.save_dir,
            'check_point_{}'.format(trainer.train_state['epoch_index']))
        check_point = {
            'epoch': trainer.train_state['epoch_index'],
            'model_spatial': trainer.model_spatial.state_dict(),
            'model_time': trainer.model_time.state_dict(),
            'optimizer': trainer.optimizer.state_dict(),
            'train_state': trainer.train_state,
            'lr': {
                'cooldown_counter': trainer.scheduler.cooldown_counter,
                'best': trainer.scheduler.cooldown_counter,
                'num_bad_epochs': trainer.scheduler.num_bad_epochs,
                'mode_worse': trainer.scheduler.mode_worse,
                'is_better': trainer.scheduler.is_better
            }
        }

        torch.save(check_point, save_name)

        if trainer.train_state['stop_early']:
            break

    #plot loss
    plot_performance(trainer.train_state['train_loss'],
                     trainer.train_state['val_loss'], args.save_dir)

    print('start testing...')

    test_exps = np.load('exp_list.npy', allow_pickle=True)
    scales = np.load('scales.npy', allow_pickle=True)
    # test
    tester = Tester(test_exps=test_exps,
                    data_folder=args.data_folder,
                    scales=scales,
                    input_seq_len=args.input_seq_len,
                    pred_seq_len=args.pred_seq_len,
                    model_spatial=model_spatial,
                    model_time=model_time,
                    extract_num=4,
                    save_dir=args.save_dir,
                    save_sample_path=args.save_sample_path,
                    device='cuda')
    tester.run_test_loop()
Пример #30
0
from seq2seq import Seq2seq as seq
# 训练
import tensorflow as tf
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
char_inputs = [[2,1],[1,2],[2,3],[3,4],[4,0]]

used = tf.sign(tf.abs(char_inputs))
length = tf.reduce_sum(used, reduction_indices=0)
lengths = tf.cast(length, tf.int32)


sess = tf.Session()
print(sess.run(lengths))

# 训练
seq.train()
# 预测
seq.predict("天气")
# 重新训练
seq.retrain()