outputs, attention = model(inputs, None, None)
        outputs = torch.transpose(outputs, 0, 1)

        zip_list = zip(outputs.tolist(), order_list)
        sorted_list = sorted(zip_list, key=lambda x: x[1], reverse=False)

        output = torch.tensor([x[0] for x in sorted_list])

        for i in range(output.size(0)):
            end_pos = output[i].tolist().index(dl.char2idx['?']) if dl.char2idx['?'] in output[i].tolist() \
                else output[i].size(0)
            pred = "".join(dl.idx2char[o] for o in output[i].tolist()[:end_pos])
            pred_final.append(pred)

    result = pd.DataFrame(np.array(pred_final))
    result.columns = ['Predicted']
    result.to_csv('submission.csv', index_label='Id')


if __name__ == "__main__":
    train_loader, val_loader, test_loader = dl.get_loaders()
    model = models.Seq2Seq(40, 128, dl.get_char_length())
    model = model.to(device)
    if train_mode:
        train(model, train_loader, val_loader)
        with torch.no_grad():
            run_test(model, test_loader)



示例#2
0
def run():
    Seed = 1234
    random.seed(Seed)
    np.random.seed(Seed)
    torch.manual_seed(Seed)
    torch.cuda.manual_seed(Seed)
    torch.backends.cudnn.deterministic = True
    train, valid, test, SRC, TRG = dataset.create_dataset()
    train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
        (train, valid, test),
        sort_key=lambda x: len(x.source),
        batch_size=config.BATCH_SIZE,
        device=config.device)

    INPUT_DIM = len(SRC.vocab)
    OUTPUT_DIM = len(TRG.vocab)
    HID_DIM = 256
    ENC_LAYERS = 3
    DEC_LAYERS = 3
    ENC_HEADS = 8
    DEC_HEADS = 8
    ENC_PF_DIM = 512
    DEC_PF_DIM = 512
    ENC_DROPOUT = 0.1
    DEC_DROPOUT = 0.1

    enc = model.Encoder(INPUT_DIM, HID_DIM, ENC_LAYERS, ENC_HEADS, ENC_PF_DIM,
                        ENC_DROPOUT, config.device)

    dec = model.Decoder(OUTPUT_DIM, HID_DIM, DEC_LAYERS, DEC_HEADS, DEC_PF_DIM,
                        DEC_DROPOUT, config.device)

    SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]
    TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

    model_tr = model.Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX,
                             config.device).to(config.device)

    def initialize_weights(m):
        if hasattr(m, 'weight') and m.weight.dim() > 1:
            nn.init.xavier_uniform_(m.weight.data)

    model_tr.apply(initialize_weights)

    optimizer = optim.Adam(model_tr.parameters(), lr=config.LEARNING_RATE)

    TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

    criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)

    def epoch_time(start_time, end_time):
        elapsed_time = end_time - start_time
        elapsed_mins = int(elapsed_time / 60)
        elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
        return elapsed_mins, elapsed_secs

    if (args.action == 'train'):
        best_valid_loss = float('inf')

        for epoch in tqdm(range(config.N_EPOCHS)):

            start_time = time.time()

            train_loss = config.train(model_tr, train_iterator, optimizer,
                                      criterion, config.CLIP)
            valid_loss = config.evaluate(model_tr, valid_iterator, criterion)

            end_time = time.time()

            epoch_mins, epoch_secs = epoch_time(start_time, end_time)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                torch.save(model_tr.state_dict(), 'model.bin')

            with open(config.RESULTS_SAVE_FILE, 'a') as f:
                print(
                    f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s',
                    file=f)
                print(
                    f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}',
                    file=f)
                print(
                    f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}',
                    file=f)

    elif (args.action == 'test'):
        model_tr.load_state_dict(torch.load('model.bin'))

        test_loss, t, o = engine.test(model_tr, test_iterator, criterion, TRG)

        metric_val = 0

        for i in range(len(t)):
            metric_val = metric_val + metric_utils.compute_metric(
                o[i], 1.0, t[i])

        print('Nl2Cmd Metric  | ', metric_val / len(t))

        print(
            f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |'
        )

    elif (args.action == 'save_vocab'):
        print('Source Vocab Length', len(SRC.vocab))
        print('Target vocab length', len(TRG.vocab))
        s1 = '\n'.join(k for k in SRC.vocab.itos)
        s2 = '\n'.join(k for k in TRG.vocab.itos)
        with open('NL_vocabulary.txt', 'w') as f:
            f.write(s1)
        with open('Bash_vocabulary.txt', 'w') as f:
            f.write(s2)
示例#3
0
文件: train.py 项目: WBY1993/Seq2Seq
import tensorflow as tf
import os
import input_data
import model

EPOCHS = 200
BATCH_SIZE = 20
SHUFFLE_SIZE = 50
VOCAB_SIZE = 284
EMBEDDING_SIZE = 256
LEARNING_RATE = 0.001
SAVE_DIR = "./log/"


if __name__ == "__main__":
    QA_model = model.Seq2Seq(BATCH_SIZE, VOCAB_SIZE, EMBEDDING_SIZE, LEARNING_RATE, True)
    summary_op = tf.summary.merge_all()
    saver = tf.train.Saver()
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    tra_summary_writer = tf.summary.FileWriter(os.path.join(SAVE_DIR, "tra"), sess.graph)
    
    step = 0
    for e in range(EPOCHS):
        for encoder_input_data, decoder_input_data, decoder_input_label in input_data.get_batch("data/text_list", BATCH_SIZE, SHUFFLE_SIZE):
            step += 1
            _, tra_loss, summary_str = sess.run([QA_model.train_op, QA_model.loss, summary_op],
                                                feed_dict={
                                                        QA_model.encoder_input_data: encoder_input_data,
                                                        QA_model.decoder_input_data: decoder_input_data,