def setUpClass(self):
        self.test_wd = os.getcwd()
        self.dataset = Dataset(path=os.path.join(self.test_wd,'tests/data/eng-fra.txt'),
                               src_max_len=50, tgt_max_len=50, src_max_vocab=50000, tgt_max_vocab=50000)
        self.encoder = EncoderRNN(self.dataset.input_vocab,max_len=10, hidden_size=10, rnn_cell='lstm')
        self.decoder = DecoderRNN(self.dataset.output_vocab, max_len=10, hidden_size=10, rnn_cell='lstm')
        self.seq2seq = Seq2seq(self.encoder,self.decoder)
        if torch.cuda.is_available():
            self.seq2seq.cuda()
        self.mock_seq2seq = Seq2seq(self.encoder, self.decoder)

        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
Пример #2
0
def predict(expt_dir, seq_str, date, epoch, step, n=3):
    seq = seq_str.strip().split()
    checkpoint_path = os.path.join(expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, date, epoch, step)
    seq2seq, input_vocab, output_vocab = get_model(checkpoint_path)
    beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 4))
    predictor = Predictor(beam_search, input_vocab, output_vocab)
    return predictor.predict_n(seq, n=n)
Пример #3
0
def build_model(tgt_field, max_len=50, hidden_size=100, bidirectional=False):
    print("building model...")
    vocab: torchtext.vocab.Vocab = tgt_field.vocab
    print("vocab: ", vocab.stoi)

    encoder = EncoderCNN2D()
    decoder = DecoderRNN(vocab_size=len(vocab),
                         max_len=max_len,
                         hidden_size=hidden_size *
                         2 if bidirectional else hidden_size,
                         dropout_p=0.2,
                         use_attention=True,
                         bidirectional=bidirectional,
                         eos_id=tgt_field.eos_id,
                         sos_id=tgt_field.sos_id,
                         rnn_cell='lstm')
    model_obj = Seq2seq(encoder, decoder)
    # if torch.cuda.is_available():
    #   model_obj.cuda()
    # for param in model_obj.parameters():
    #   init.xavier_uniform(param.data)
    for param in model_obj.parameters():
        param.data.uniform_(-0.08, 0.08)

    return model_obj
Пример #4
0
def initialize_model(opt, src, tgt, train):
    # build vocabulary
    src.build_vocab(train, max_size=opt.src_vocab)
    tgt.build_vocab(train, max_size=opt.tgt_vocab)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # Initialize model
    hidden_size = opt.hidden_size
    decoder_hidden_size = hidden_size * 2 if opt.bidirectional else hidden_size
    encoder = EncoderRNN(len(src.vocab), opt.max_len, hidden_size, opt.embedding_size,
                         dropout_p=opt.dropout_p_encoder,
                         n_layers=opt.n_layers,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         variable_lengths=True)
    decoder = DecoderRNN(len(tgt.vocab), opt.max_len, decoder_hidden_size,
                         dropout_p=opt.dropout_p_decoder,
                         n_layers=opt.n_layers,
                         attention_method=opt.attention_method,
                         full_focus=opt.full_focus,
                         bidirectional=opt.bidirectional,
                         rnn_cell=opt.rnn_cell,
                         eos_id=tgt.eos_id, sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    seq2seq.to(device)

    return seq2seq, input_vocab, output_vocab
Пример #5
0
    def setUpClass(self):
        self.test_wd = os.getcwd()
        self.dataset = Dataset(path=os.path.join(self.test_wd,'tests/data/eng-fra.txt'),
                               src_max_len=50, tgt_max_len=50, src_max_vocab=50000, tgt_max_vocab=50000)
        self.encoder = EncoderRNN(self.dataset.input_vocab,max_len=10, hidden_size=10, rnn_cell='lstm')
        self.decoder = DecoderRNN(self.dataset.output_vocab, max_len=10, hidden_size=10, rnn_cell='lstm')
        self.seq2seq = Seq2seq(self.encoder,self.decoder)
        self.mock_seq2seq = Seq2seq(self.encoder, self.decoder)

        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        if not os.path.exists(os.path.join(self.test_wd,'checkpoints')):
            os.mkdir(os.path.join(self.test_wd,'checkpoints'))

        self.seq2seq.save(os.path.join(self.test_wd,'checkpoints'))
        self.mock_seq2seq.load(os.path.join(self.test_wd, 'checkpoints'))
Пример #6
0
    def __init__(self,
                 data_path,
                 model_save_path,
                 model_load_path,
                 hidden_size=32,
                 max_vocab=4000,
                 device='cuda'):
        self.src = SourceField()
        self.tgt = TargetField()
        self.max_length = 90
        self.data_path = data_path
        self.model_save_path = model_save_path
        self.model_load_path = model_load_path

        def len_filter(example):
            return len(example.src) <= self.max_length and len(
                example.tgt) <= self.max_length

        self.trainset = torchtext.data.TabularDataset(
            path=os.path.join(self.data_path, 'train'),
            format='tsv',
            fields=[('src', self.src), ('tgt', self.tgt)],
            filter_pred=len_filter)
        self.devset = torchtext.data.TabularDataset(path=os.path.join(
            self.data_path, 'eval'),
                                                    format='tsv',
                                                    fields=[('src', self.src),
                                                            ('tgt', self.tgt)],
                                                    filter_pred=len_filter)
        self.src.build_vocab(self.trainset, max_size=max_vocab)
        self.tgt.build_vocab(self.trainset, max_size=max_vocab)
        weight = torch.ones(len(self.tgt.vocab))
        pad = self.tgt.vocab.stoi[self.tgt.pad_token]
        self.loss = Perplexity(weight, pad)
        self.loss.cuda()
        self.optimizer = None
        self.hidden_size = hidden_size
        self.bidirectional = True
        encoder = EncoderRNN(len(self.src.vocab),
                             self.max_length,
                             self.hidden_size,
                             bidirectional=self.bidirectional,
                             variable_lengths=True)
        decoder = DecoderRNN(len(self.tgt.vocab),
                             self.max_length,
                             self.hidden_size *
                             2 if self.bidirectional else self.hidden_size,
                             dropout_p=0.2,
                             use_attention=True,
                             bidirectional=self.bidirectional,
                             eos_id=self.tgt.eos_id,
                             sos_id=self.tgt.sos_id)
        self.device = device
        self.seq2seq = Seq2seq(encoder, decoder).cuda()
        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
Пример #7
0
 def build_model(self):
     self._logger.info("Building model...")
     self.model = Seq2seq(
         batch_input_shape=(TRAIN_BATCH_SIZE,
                            (INPUT_SEQ_LEN + 1) * MSG_HISTORY_LEN, 29),
         hidden_dim=HIDDEN_LAYER_DIM,
         output_length=MAX_OUTPUT_TOKEN_LENGTH,
         output_dim=29,
         depth=1)
     self._logger.info("Compiling...")
     self.model.compile(loss='mse', optimizer='rmsprop')
Пример #8
0
    def setUpClass(self):
        test_path = os.path.dirname(os.path.realpath(__file__))
        src = SourceField()
        trg = TargetField()
        dataset = torchtext.data.TabularDataset(
            path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv',
            fields=[('src', src), ('trg', trg)],
        )
        src.build_vocab(dataset)
        trg.build_vocab(dataset)

        encoder = EncoderRNN(len(src.vocab), 10, 10, rnn_cell='lstm')
        decoder = DecoderRNN(len(trg.vocab), 10, 10, trg.sos_id, trg.eos_id, rnn_cell='lstm')
        seq2seq = Seq2seq(encoder, decoder)
        self.predictor = Predictor(seq2seq, src.vocab, trg.vocab)
Пример #9
0
    def setUp(self):
        test_path = os.path.dirname(os.path.realpath(__file__))
        src = SourceField()
        tgt = TargetField()
        self.dataset = torchtext.data.TabularDataset(
            path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv',
            fields=[('src', src), ('tgt', tgt)],
        )
        src.build_vocab(self.dataset)
        tgt.build_vocab(self.dataset)

        encoder = EncoderRNN(len(src.vocab), 10, 10, rnn_cell='lstm')
        decoder = DecoderRNN(len(tgt.vocab), 10, 10, tgt.sos_id, tgt.eos_id, rnn_cell='lstm')
        self.seq2seq = Seq2seq(encoder, decoder)

        for param in self.seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
Пример #10
0
def initialize_model(
    train,
    input_vocab,
    output_vocab,
    max_len=10,
    hidden_size=256,
    dropout_p=0.5,
    bidirectional=True,
    n_beam=5,
):
    # Initialize model
    encoder = EncoderRNN(
        len(input_vocab),
        max_len,
        hidden_size,
        bidirectional=bidirectional,
        variable_lengths=True,
    )

    decoder = DecoderRNN(
        len(output_vocab),
        max_len,
        hidden_size * (2 if bidirectional else 1),
        dropout_p=dropout_p,
        use_attention=True,
        bidirectional=bidirectional,
        eos_id=train.tgt_field.eos_id,
        sos_id=train.tgt_field.sos_id,
    )
    #     decoder = TopKDecoder(decoder ,n_beam)
    seq2seq = Seq2seq(encoder, decoder)
    if torch.cuda.is_available():
        seq2seq = seq2seq.cuda()

    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

    # Optimizer and learning rate scheduler can be customized by
    # explicitly constructing the objects and pass to the trainer
    optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
                          max_grad_norm=5)
    scheduler = StepLR(optimizer.optimizer, 1)
    optimizer.set_scheduler(scheduler)

    return seq2seq, optimizer, scheduler
Пример #11
0
def build_model(src, tgt, hidden_size, mini_batch_size, bidirectional, dropout,
                attention, init_value):
    EXPERIMENT.param("Hidden", hidden_size)
    EXPERIMENT.param("Bidirectional", bidirectional)
    EXPERIMENT.param("Dropout", dropout)
    EXPERIMENT.param("Attention", attention)
    EXPERIMENT.param("Mini-batch", mini_batch_size)
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    encoder = EncoderRNN(len(src.vocab),
                         MAX_LEN,
                         hidden_size,
                         rnn_cell="lstm",
                         bidirectional=bidirectional,
                         dropout_p=dropout,
                         variable_lengths=False)
    decoder = DecoderRNN(
        len(tgt.vocab),
        MAX_LEN,
        hidden_size,  # * 2 if bidirectional else hidden_size,
        rnn_cell="lstm",
        use_attention=attention,
        eos_id=tgt.eos_id,
        sos_id=tgt.sos_id)
    seq2seq = Seq2seq(encoder, decoder)
    using_cuda = False
    if torch.cuda.is_available():
        using_cuda = True
        encoder.cuda()
        decoder.cuda()
        seq2seq.cuda()
        loss.cuda()
    EXPERIMENT.param("CUDA", using_cuda)
    for param in seq2seq.parameters():
        param.data.uniform_(-init_value, init_value)

    trainer = SupervisedTrainer(loss=loss,
                                batch_size=mini_batch_size,
                                checkpoint_every=5000,
                                random_seed=42,
                                print_every=1000)
    return seq2seq, trainer
Пример #12
0
import seq2seq.layers.decoders
from seq2seq.models import Seq2seq
import cPickle

batch_size = 2
input_dim = 3
output_dim = 4
timesteps_i = 5
timesteps_o = 6
X_batch = np.arange(
    batch_size * timesteps_i * input_dim).reshape(
    batch_size, timesteps_i, input_dim)
Y_batch = np.arange(
    batch_size * timesteps_o * output_dim).reshape(
    batch_size, timesteps_o, output_dim)

model = Seq2seq(batch_input_shape=(batch_size, timesteps_i, input_dim),
                hidden_dim=7,
                output_length=timesteps_o,
                output_dim=output_dim,
                depth=2,
                peek=True)
# model.add(SimpleRNN(output_dim,
#                     input_shape=(timesteps, input_dim),
#                     return_sequences=True,
#                     unroll=True))
model.compile(loss='categorical_crossentropy',
              optimizer='sgd')
model.train_on_batch(X_batch, Y_batch)
# loss_and_metrics = model.evaluate(X_test, Y_test, batch_size=32)
Пример #13
0
loss1, accuracy1 = evaluator.evaluate(seq2seq_m, test)
print(" testing ")
print("loss: ", loss1)
print("accuracy: ", accuracy1)


loss1, accuracy1 = evaluator.evaluate(seq2seq_m, dev)
print(" evaluation set ")
print("loss: ", loss1)
print("accuracy: ", accuracy1)


#############


beam_search = Seq2seq(seq2seq_m.encoder, TopKDecoder(seq2seq_m.decoder, 3))
if torch.cuda.is_available():
    beam_search.cuda()
else:
    print(" error no cuda")

predictor = Predictor(beam_search, input_vocab, output_vocab)

####
from rouge import Rouge

print("**training rouge")
references = []
hypothesis = []
test_set = train
for i in range(len(test_set)):
Пример #14
0
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        hidden_size=128
        bidirectional = True
        encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                             bidirectional=bidirectional, variable_lengths=True)
        decoder = DecoderRNN(len(tgt.vocab), max_len, hidden_size * 2 if bidirectional else hidden_size,
                             dropout_p=0.2, use_attention=True, bidirectional=bidirectional,
                             eos_id=tgt.eos_id, sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        # scheduler = StepLR(optimizer.optimizer, 1)
        # optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss, batch_size=32,
Пример #15
0
def main():
    '''Main Function'''

    parser = argparse.ArgumentParser(description='sum_file.py')

    parser.add_argument('-model', required=True,
                        help='Path to model .pt file')
    parser.add_argument('-src', required=True,
                        help='Source sequence to decode (one line per sequence)')
    parser.add_argument('-vocab', required=True,
                        help='Source sequence to decode (one line per sequence)')
    parser.add_argument('-output', default='pred.txt',
                        help="""Path to output the predictions (each line will
                        be the decoded sequence""")
    parser.add_argument('-beam_size', type=int, default=5,
                        help='Beam size')
    parser.add_argument('-batch_size', type=int, default=30,
                        help='Batch size')
    parser.add_argument('-n_best', type=int, default=1,
                        help="""If verbose is set, will output the n_best
                        decoded sentences""")
    parser.add_argument('-no_cuda', action='store_true')

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda

    # Prepare DataLoader
    preprocess_data = torch.load(opt.vocab)
    preprocess_settings = preprocess_data['settings']
    test_src_word_insts = read_instances_from_file(
        opt.src,
        preprocess_settings.max_word_seq_len,
        preprocess_settings.keep_case,
        preprocess_settings.mode)
    test_src_insts = convert_instance_to_idx_seq(
        test_src_word_insts, preprocess_data['dict']['src'])

    # prepare model
    device = torch.device('cuda' if opt.cuda else 'cpu')
    checkpoint = torch.load(opt.model)
    model_opt = checkpoint['settings']
    
    model_opt.bidirectional = True
    encoder = EncoderRNN(model_opt.src_vocab_size, model_opt.max_token_seq_len, model_opt.d_model,
                            bidirectional=model_opt.bidirectional, variable_lengths=True)
    decoder = DecoderRNN(model_opt.tgt_vocab_size, model_opt.max_token_seq_len, model_opt.d_model * 2 if model_opt.bidirectional else model_opt.d_model,
                            n_layers=model_opt.n_layer, dropout_p=model_opt.dropout, use_attention=True, bidirectional=model_opt.bidirectional,
                            eos_id=Constants.BOS, sos_id=Constants.EOS)
    model = Seq2seq(encoder, decoder).to(device)
    model = nn.DataParallel(model) # using Dataparallel because training used

    model.load_state_dict(checkpoint['model'])
    print('[Info] Trained model state loaded.')

    predictor = Predictor(model, preprocess_data['dict']['tgt'])

    with open(opt.output, 'w') as f:
        for src_seq in tqdm(test_src_insts, mininterval=2, desc='  - (Test)', leave=False):
            pred_line = ' '.join(predictor.predict(src_seq))
            f.write(pred_line + '\n')
    print('[Info] Finished.')
Пример #16
0
def train(args):
    ###############################################################################
    # Load data
    ###############################################################################
    cuda = int(torch.cuda.is_available()) - 1

    TEXT = data.Field(lower=True, init_token="<start>", eos_token="<end>")
    LABELS = data.Field(sequential=True)

    train, val, test = data.TabularDataset.splits(
        # ms_draw data
        path='../ms_draw/',
        train='draw-train.tsv',
        validation='draw-dev.tsv',
        test='draw-test.tsv',
        format='tsv',
        fields=[('text', TEXT), ('label', LABELS)])

    print('train.examples.data:', train.examples[0].label)

    prevecs = None
    if (args.pretr_emb == True):
        #print('Making vocab w/ glove.6B.' + str(args.emb_dim) + ' dim vectors')
        TEXT.build_vocab(train,
                         vectors=GloVe(name='6B', dim=args.emb_dim),
                         min_freq=args.mf)  #wv_type="glove.6B")
        prevecs = TEXT.vocab.vectors
    else:
        TEXT.build_vocab(train)

    LABELS.build_vocab(train)
    vecs = Vecs(args.emb_dim)
    #print('Making interator for splits...')
    train_iter, val_iter, test_iter = data.BucketIterator.splits(
        (train, val, test),
        batch_sizes=(args.batch_size, args.batch_size, args.batch_size),
        sort_key=lambda x: len(x.text))  #, device=cuda)

    num_classes = len(LABELS.vocab)
    vocab_size = len(TEXT.vocab)
    ###############################################################################
    # Build the model
    ###############################################################################

    encoder_model = EncoderRNN(vocab_size=vocab_size,
                               max_len=200,
                               hidden_size=args.hidden_sz,
                               input_dropout_p=0,
                               dropout_p=args.dropout,
                               n_layers=args.num_layers,
                               bidirectional=args.num_dir == 2,
                               rnn_cell=args.net_type,
                               variable_lengths=False)

    decoder_model = DecoderRNN(
        vocab_size=vocab_size,
        max_len=200,
        hidden_size=args.hidden_sz,
        sos_id=2,  # Add to params
        eos_id=3,  # Add to params
        n_layers=args.num_layers,
        rnn_cell=args.net_type,
        bidirectional=args.num_dir == 2,
        input_dropout_p=0,
        dropout_p=args.dropout,
        use_attention=False)

    model = Seq2seq(encoder_model, decoder_model)

    criterion = NLLLoss()
    #criterion = nn.CrossEntropyLoss()
    # Select optimizer
    if (args.opt == 'adamax'):
        optimizer = torch.optim.Adamax(model.parameters())  #, lr=args.lr)
    elif (args.opt == 'adam'):
        optimizer = torch.optim.Adam(model.parameters())  #, lr=args.lr)
    elif (args.opt == 'sgd'):
        optimizer = torch.optim.SGD(model.parameters(), lr=0.1,
                                    momentum=0.5)  #,lr=args.lr,momentum=0.5)
    else:
        #print('Optimizer unknown, defaulting to adamax')
        optimizer = torch.optim.Adamax(model.parameters())

    ###############################################################################
    # Training the Model
    ###############################################################################
    if cuda == 0:
        model = model.cuda()  #args.device)

    highest_t1_acc = 0
    highest_t1_acc_metrics = ''
    highest_t1_acc_params = ''
    results = ''
    for epoch in range(args.epochs):
        losses = []
        tot_loss = 0
        train_iter.repeat = False
        for batch_count, batch in enumerate(train_iter):
            print('Batch:', batch_count)
            model.zero_grad()
            inp = batch.text.t()
            print('type(inp)', type(inp))
            inp3d = torch.autograd.Variable(
                torch.cuda.FloatTensor(inp.size(0), inp.size(1), args.emb_dim))
            print('type(inp3d)', type(inp3d))
            for i in range(inp.size(0)):
                for j in range(inp.size(1)):
                    inp3d[i, j, :] = vecs[TEXT.vocab.itos[inp[i, j].data[0]]]
            #print("INP: ",inp.size())
            #print(inp3d)

            #print(inp)

            preds = model(inp3d)
            #print("PREDS: ",np.shape(preds))
            #print("LABELS: ",batch.label.size())

            loss = criterion(preds, batch.label)
            loss.backward()
            optimizer.step()
            losses.append(loss)
            tot_loss += loss.data[0]

            #if (batch_count % 20 == 0):
            #    print('Batch: ', batch_count, '\tLoss: ', str(losses[-1].data[0]))
            batch_count += 1
        #print('Average loss over epoch ' + str(epoch) + ': ' + str(tot_loss/len(losses)))
        (avg_loss, accuracy, corrects, size, t5_acc, t5_corrects,
         mrr) = eval(val_iter, model, vecs, TEXT,
                     args.emb_dim)  #, args.device)
        if accuracy > args.acc_thresh:
            save_path = '{}/acc{:.2f}_e{}.pt'.format(args.save_path_full,
                                                     accuracy, epoch)
            if not os.path.isdir(args.save_path_full):
                os.makedirs(args.save_path_full)
            torch.save(model, save_path)

        if highest_t1_acc < accuracy:
            highest_t1_acc = accuracy
            highest_t1_acc_metrics = ('acc: {:6.4f}%({:3d}/{}) EPOCH{:2d} - loss: {:.4f} t5_acc: {:6.4f}%({:3d}' \
                    '/{}) MRR: {:.6f}'.format(accuracy, corrects, size,epoch, avg_loss, t5_acc, t5_corrects, size, mrr))

            highest_t1_acc_params = (('PARAMETERS:' \
                    'net-%s' \
                    '_e%i' \
                    '_bs%i' \
                    '_opt-%s' \
                    '_ly%i' \
                    '_hs%i' \
                    '_dr%i'
                    '_ed%i' \
                    '_femb%s' \
                    '_ptemb%s' \
                    '_drp%.1f' \
                    '_mf%d\n'
                    % (args.net_type, args.epochs, args.batch_size, args.opt, args.num_layers,
                    args.hidden_sz, args.num_dir, args.emb_dim, args.embfix, args.pretr_emb, args.dropout, args.mf)))
        results += ('\nEPOCH{:2d} - loss: {:.4f}  acc: {:6.4f}%({:3d}/{}) t5_acc: {:6.4f}%({:3d}' \
                '/{}) MRR: {:.6f}'.format(epoch, avg_loss, accuracy,
                                        corrects, size, t5_acc, t5_corrects, size,
                                        mrr))

    print(highest_t1_acc_metrics + '\n')
    writeResults(args, results, highest_t1_acc, highest_t1_acc_metrics,
                 highest_t1_acc_params)
Пример #17
0
                     rnn_cell='lstm',
                     bidirectional=bidirectional,
                     n_layers=2,
                     variable_lengths=True)
decoder = DecoderRNN(len(tgt.vocab),
                     max_len,
                     hidden_size * 2 if bidirectional else hidden_size,
                     rnn_cell='lstm',
                     dropout_p=0.25,
                     use_attention=True,
                     bidirectional=bidirectional,
                     n_layers=2,
                     eos_id=tgt.eos_id,
                     sos_id=tgt.sos_id)

seq2seq_model = Seq2seq(encoder, decoder)
if torch.cuda.is_available():
    seq2seq_model.cuda()

for param in seq2seq_model.parameters():
    param.data.uniform_(-0.1, 0.1)

optimizer = Optimizer(torch.optim.Adam(seq2seq_model.parameters()),
                      max_grad_norm=5)

# In[20]:

seq2seq_model = torch.nn.DataParallel(seq2seq_model)

# In[21]:
Пример #18
0
                             use_attention=True,
                             bidirectional=True,
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id,
                             embedding=hidden_size,
                             use_concept=opt.concept)
        dialog_encoder = torch.nn.LSTM(input_size=hidden_size *
                                       2 if bidirectional else hidden_size,
                                       hidden_size=dialog_hidden_size,
                                       batch_first=True,
                                       dropout=dropout)
        if opt.concept:
            seq2seq = Seq2seq(encoder,
                              decoder,
                              dialog_encoder=dialog_encoder,
                              cpt_vocab=cpt.vocab,
                              hidden_size=dialog_hidden_size,
                              concept_level=opt.concept_level,
                              conceptnet_file=opt.conceptnet_file)
        else:
            seq2seq = Seq2seq(encoder,
                              decoder,
                              dialog_encoder=dialog_encoder,
                              hidden_size=dialog_hidden_size)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
Пример #19
0
def run_training(opt, default_data_dir, num_epochs=100):
    if opt.load_checkpoint is not None:
        logging.info("loading checkpoint from {}".format(
            os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)))
        checkpoint_path = os.path.join(opt.expt_dir, Checkpoint.CHECKPOINT_DIR_NAME, opt.load_checkpoint)
        checkpoint = Checkpoint.load(checkpoint_path)
        seq2seq = checkpoint.model
        input_vocab = checkpoint.input_vocab
        output_vocab = checkpoint.output_vocab
    else:

        # Prepare dataset
        src = SourceField()
        tgt = TargetField()
        max_len = 50

        data_file = os.path.join(default_data_dir, opt.train_path, 'data.txt')

        logging.info("Starting new Training session on %s", data_file)

        def len_filter(example):
            return (len(example.src) <= max_len) and (len(example.tgt) <= max_len) \
                   and (len(example.src) > 0) and (len(example.tgt) > 0)

        train = torchtext.data.TabularDataset(
            path=data_file, format='json',
            fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
            filter_pred=len_filter
        )

        dev = None
        if opt.no_dev is False:
            dev_data_file = os.path.join(default_data_dir, opt.train_path, 'dev-data.txt')
            dev = torchtext.data.TabularDataset(
                path=dev_data_file, format='json',
                fields={'src': ('src', src), 'tgt': ('tgt', tgt)},
                filter_pred=len_filter
            )

        src.build_vocab(train, max_size=50000)
        tgt.build_vocab(train, max_size=50000)
        input_vocab = src.vocab
        output_vocab = tgt.vocab

        # NOTE: If the source field name and the target field name
        # are different from 'src' and 'tgt' respectively, they have
        # to be set explicitly before any training or inference
        # seq2seq.src_field_name = 'src'
        # seq2seq.tgt_field_name = 'tgt'

        # Prepare loss
        weight = torch.ones(len(tgt.vocab))
        pad = tgt.vocab.stoi[tgt.pad_token]
        loss = Perplexity(weight, pad)
        if torch.cuda.is_available():
            logging.info("Yayyy We got CUDA!!!")
            loss.cuda()
        else:
            logging.info("No cuda available device found running on cpu")

        seq2seq = None
        optimizer = None
        if not opt.resume:
            hidden_size = 128
            decoder_hidden_size = hidden_size * 2
            logging.info("EncoderRNN Hidden Size: %s", hidden_size)
            logging.info("DecoderRNN Hidden Size: %s", decoder_hidden_size)
            bidirectional = True
            encoder = EncoderRNN(len(src.vocab), max_len, hidden_size,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 variable_lengths=True)
            decoder = DecoderRNN(len(tgt.vocab), max_len, decoder_hidden_size,
                                 dropout_p=0, use_attention=True,
                                 bidirectional=bidirectional,
                                 rnn_cell='lstm',
                                 eos_id=tgt.eos_id, sos_id=tgt.sos_id)

            seq2seq = Seq2seq(encoder, decoder)
            if torch.cuda.is_available():
                seq2seq.cuda()

            for param in seq2seq.parameters():
                param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.

        optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        scheduler = StepLR(optimizer.optimizer, 1)
        optimizer.set_scheduler(scheduler)

        # train

        num_epochs = num_epochs
        batch_size = 32
        checkpoint_every = num_epochs / 10
        print_every = num_epochs / 100

        properties = dict(batch_size=batch_size,
                          checkpoint_every=checkpoint_every,
                          print_every=print_every, expt_dir=opt.expt_dir,
                          num_epochs=num_epochs,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        logging.info("Starting training with the following Properties %s", json.dumps(properties, indent=2))
        t = SupervisedTrainer(loss=loss, batch_size=num_epochs,
                              checkpoint_every=checkpoint_every,
                              print_every=print_every, expt_dir=opt.expt_dir)

        seq2seq = t.train(seq2seq, train,
                          num_epochs=num_epochs, dev_data=dev,
                          optimizer=optimizer,
                          teacher_forcing_ratio=0.5,
                          resume=opt.resume)

        evaluator = Evaluator(loss=loss, batch_size=batch_size)

        if opt.no_dev is False:
            dev_loss, accuracy = evaluator.evaluate(seq2seq, dev)
            logging.info("Dev Loss: %s", dev_loss)
            logging.info("Accuracy: %s", dev_loss)

    beam_search = Seq2seq(seq2seq.encoder, TopKDecoder(seq2seq.decoder, 4))

    predictor = Predictor(beam_search, input_vocab, output_vocab)
    while True:
        try:
            seq_str = raw_input("Type in a source sequence:")
            seq = seq_str.strip().split()
            results = predictor.predict_n(seq, n=3)
            for i, res in enumerate(results):
                print('option %s: %s\n', i + 1, res)
        except KeyboardInterrupt:
            logging.info("Bye Bye")
            exit(0)
Пример #20
0
                                 hidden_size,
                                 n_layers=n_layers,
                                 bidirectional=bidirectional,
                                 variable_lengths=True)
            decoder = DecoderRNN(len(output_vocab),
                                 opt.max_len,
                                 hidden_size *
                                 2 if bidirectional else hidden_size,
                                 dropout_p=decoder_dropout,
                                 use_attention=True,
                                 bidirectional=bidirectional,
                                 n_layers=n_layers,
                                 eos_id=first_field.eos_id,
                                 sos_id=first_field.sos_id)

            seq2seq = Seq2seq(encoder, decoder, batch_size, num_sequences)
            if torch.cuda.is_available():
                seq2seq.cuda()

            for param in seq2seq.parameters():
                param.data.uniform_(-0.08, 0.08)

            # Optimizer and learning rate scheduler can be customized by
            # explicitly constructing the objects and pass to the trainer.
            #
            # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
            # scheduler = StepLR(optimizer.optimizer, 1)
            # optimizer.set_scheduler(scheduler)

        # train
        t = SupervisedTrainer(loss=loss,
Пример #21
0
                      hidden_size * 2 if bidirectional else hidden_size,
                      dropout_p=0.2,
                      use_attention=args.use_att,
                      bidirectional=bidirectional,
                      eos_id=sos_id,
                      sos_id=sos_id + 1,
                      batch_size=args.batch_size,
                      att_method=args.att_method,
                      att_mlp=args.att_mlp,
                      att_type=args.att_type)
# decoder3 = DecoderRNN(args.decoder3_n_layer, args.vocab_size, max_len, hidden_size * 2 if bidirectional else hidden_size,
#                      dropout_p=0.2, use_attention=True, bidirectional=bidirectional,
#                      eos_id=eos_id, sos_id=sos_id)

# seq2seq = Seq2seq(args, decoder1, decoder2, decoder3)
seq2seq = Seq2seq(args, decoder1, decoder2)
seq2seq.cuda()
seq2seq = torch.nn.DataParallel(seq2seq)
cudnn.benchmark = True

print('Initialize model parameter ...')
if args.init == 'uniform':
    print('uniform init !')
    for param in seq2seq.parameters():
        param.data.uniform_(-args.init_weight, args.init_weight)
elif args.init == 'mos':
    print('mos init !')
    for m in seq2seq.modules():
        if type(m) in [nn.GRU, nn.LSTM, nn.RNN]:
            for name, param in m.named_parameters():
                if 'weight_ih' in name:
Пример #22
0
def offline_training(opt, traget_file_path):

    # Prepare dataset with torchtext
    src = SourceField(tokenize=treebank_tokenizer)
    tgt = TargetField(tokenize=treebank_tokenizer)

    def sample_filter(sample):
        """ sample example for future purpose"""
        return True

    train = torchtext.data.TabularDataset(path=opt.train_path,
                                          format='tsv',
                                          fields=[('src', src), ('tgt', tgt)],
                                          filter_pred=sample_filter)
    dev = torchtext.data.TabularDataset(path=opt.dev_path,
                                        format='tsv',
                                        fields=[('src', src), ('tgt', tgt)],
                                        filter_pred=sample_filter)
    test = torchtext.data.TabularDataset(path=opt.dev_path,
                                         format='tsv',
                                         fields=[('src', src), ('tgt', tgt)],
                                         filter_pred=sample_filter)
    src.build_vocab(train, max_size=opt.src_vocab_size)
    tgt.build_vocab(train, max_size=opt.tgt_vocab_size)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # NOTE: If the source field name and the target field name
    # are different from 'src' and 'tgt' respectively, they have
    # to be set explicitly before any training or inference
    # seq2seq.src_field_name = 'src'
    # seq2seq.tgt_field_name = 'tgt'

    # Prepare loss
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    if opt.loss == 'perplexity':
        loss = Perplexity(weight, pad)
    else:
        raise TypeError

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        encoder = EncoderRNN(vocab_size=len(src.vocab),
                             max_len=opt.max_length,
                             hidden_size=opt.hidden_size,
                             input_dropout_p=opt.intput_dropout_p,
                             dropout_p=opt.dropout_p,
                             n_layers=opt.n_layers,
                             bidirectional=opt.bidirectional,
                             rnn_cell=opt.rnn_cell,
                             variable_lengths=True,
                             embedding=input_vocab.vectors
                             if opt.use_pre_trained_embedding else None,
                             update_embedding=opt.update_embedding)
        decoder = DecoderRNN(vocab_size=len(tgt.vocab),
                             max_len=opt.max_length,
                             hidden_size=opt.hidden_size *
                             2 if opt.bidirectional else opt.hidden_size,
                             sos_id=tgt.sos_id,
                             eos_id=tgt.eos_id,
                             n_layers=opt.n_layers,
                             rnn_cell=opt.rnn_cell,
                             bidirectional=opt.bidirectional,
                             input_dropout_p=opt.input_dropout_p,
                             dropout_p=opt.dropout_p,
                             use_attention=opt.use_attention)
        seq2seq = Seq2seq(encoder=encoder, decoder=decoder)
        if opt.gpu >= 0 and torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
    # train
    trainer = SupervisedTrainer(loss=loss,
                                batch_size=opt.batch_size,
                                checkpoint_every=opt.checkpoint_every,
                                print_every=opt.print_every,
                                expt_dir=opt.expt_dir)
    seq2seq = trainer.train(model=seq2seq,
                            data=train,
                            num_epochs=opt.epochs,
                            resume=opt.resume,
                            dev_data=dev,
                            optimizer=optimizer,
                            teacher_forcing_ratio=opt.teacher_forcing_rate)
Пример #23
0
def train():
    src = SourceField(sequential=True,
                      tokenize=lambda x: [i for i in jieba.lcut(x)])
    tgt = TargetField(sequential=True,
                      tokenize=lambda x: [i for i in jieba.lcut(x)])
    max_len = 50

    def len_filter(example):
        return len(example.src) <= max_len and len(example.tgt) <= max_len

    train = torchtext.data.TabularDataset(path=opt.train_path,
                                          format='csv',
                                          fields=[('src', src), ('tgt', tgt)],
                                          filter_pred=len_filter)
    dev = torchtext.data.TabularDataset(path=opt.dev_path,
                                        format='csv',
                                        fields=[('src', src), ('tgt', tgt)],
                                        filter_pred=len_filter)

    src.build_vocab(train, max_size=50000)
    tgt.build_vocab(train, max_size=50000)
    input_vocab = src.vocab
    output_vocab = tgt.vocab

    # NOTE: If the source field name and the target field name
    # are different from 'src' and 'tgt' respectively, they have
    # to be set explicitly before any training or inference
    # seq2seq.src_field_name = 'src'
    # seq2seq.tgt_field_name = 'tgt'

    # Prepare loss
    weight = torch.ones(len(tgt.vocab))
    pad = tgt.vocab.stoi[tgt.pad_token]
    loss = Perplexity(weight, pad)
    if torch.cuda.is_available():
        loss.cuda()

    seq2seq = None
    optimizer = None
    if not opt.resume:
        # Initialize model
        hidden_size = 128
        bidirectional = True
        encoder = EncoderRNN(len(src.vocab),
                             max_len,
                             hidden_size,
                             bidirectional=bidirectional,
                             variable_lengths=True)
        decoder = DecoderRNN(len(tgt.vocab),
                             max_len,
                             hidden_size * 2 if bidirectional else hidden_size,
                             dropout_p=0.2,
                             use_attention=True,
                             bidirectional=bidirectional,
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
        #
        # optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()), max_grad_norm=5)
        # scheduler = StepLR(optimizer.optimizer, 1)
        # optimizer.set_scheduler(scheduler)

    # train
    t = SupervisedTrainer(loss=loss,
                          batch_size=32,
                          checkpoint_every=50,
                          print_every=10,
                          expt_dir=opt.expt_dir)

    seq2seq = t.train(seq2seq,
                      train,
                      num_epochs=6,
                      dev_data=dev,
                      optimizer=optimizer,
                      teacher_forcing_ratio=0.5,
                      resume=opt.resume)
    predictor = Predictor(seq2seq, input_vocab, output_vocab)
Пример #24
0
encoder_embedding_size = 100
encoder_hidden_size = 50
encoder = Encoder(encoder_vocab_size, encoder_embedding_size,
                  encoder_hidden_size)

# decoder
decoder_vocab_size = len(dest_word2index)
decoder_embedding_size = 100
decoder_hidden_size = 50
decoder_output_size = 100
decoder = Decoder(decoder_vocab_size, decoder_embedding_size,
                  decoder_hidden_size, decoder_output_size)

# Sequential to sequential learning model
model = Seq2seq(encoder,
                decoder,
                RMSprop(clip=5.0, lr=0.001, gamma=0.9, eps=1e-8),
                logger=logger)


# training
def epoch_end_callback():
    def sampling(x, mask_x, y, mask_y, sample_size=5):
        sample_indices = rng.randint(0,
                                     x.get_value(borrow=True).shape[0],
                                     sample_size)
        predict = model.predict(x[sample_indices], mask_x[:, sample_indices],
                                y[sample_indices], mask_y[:, sample_indices])
        sample_x = x.get_value(borrow=True)[sample_indices]
        sample_y = y.get_value(borrow=True)[sample_indices]
        predict_y = predict.eval()
        return (sample_x, sample_y, predict_y)
Пример #25
0
            bidirectional=bidirectional,
            rnn_cell="lstm",
            variable_lengths=True,
        )
        decoder = DecoderRNN(
            len(tgt.vocab),
            max_len,
            hidden_size * 2,
            dropout_p=0.2,
            use_attention=True,
            bidirectional=bidirectional,
            rnn_cell="lstm",
            eos_id=tgt.eos_id,
            sos_id=tgt.sos_id,
        )
        seq2seq = Seq2seq(encoder, decoder)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

    # train
    t = SupervisedTrainer(
        loss=loss,
        batch_size=32,
        checkpoint_every=50,
        print_every=10,
        expt_dir=opt.expt_dir,
    )
Пример #26
0
                             dropout_p=0.2,
                             use_attention=True,
                             bidirectional=bidirectional,
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id,
                             embedding=hidden_size,
                             use_concept=opt.concept)
        dialog_encoder = torch.nn.LSTM(input_size=hidden_size *
                                       2 if bidirectional else hidden_size,
                                       hidden_size=dialog_hidden_size,
                                       batch_first=True,
                                       dropout=dropout)
        if opt.concept:
            seq2seq = Seq2seq(encoder,
                              decoder,
                              dialog_encoder=dialog_encoder,
                              cpt_vocab=cpt.vocab,
                              hidden_size=dialog_hidden_size)
        else:
            seq2seq = Seq2seq(encoder,
                              decoder,
                              dialog_encoder=dialog_encoder,
                              hidden_size=dialog_hidden_size)
        if torch.cuda.is_available():
            seq2seq.cuda()

        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)

        # Optimizer and learning rate scheduler can be customized by
        # explicitly constructing the objects and pass to the trainer.
Пример #27
0
                             bidirectional=bidirectional,
                             n_layers=1,
                             rnn_cell='gru',
                             variable_lengths=True)
        decoder = DecoderRNN(vocab_size=len(tgt.vocab),
                             max_len=max_len,
                             hidden_size=hidden_size *
                             2 if bidirectional else 1,
                             dropout_p=opt.dropout,
                             use_attention=True,
                             bidirectional=bidirectional,
                             n_layers=1,
                             rnn_cell='gru',
                             eos_id=tgt.eos_id,
                             sos_id=tgt.sos_id)
        seq2seq = Seq2seq(encoder, decoder)
        for param in seq2seq.parameters():
            param.data.uniform_(-0.08, 0.08)
            print(param.data[0:3])
        _, _, norm_val = encoder.vectors_stats()
        encoder.init_vectors(src.vocab.vectors)
        # encoder.scale_vectors(0.08)
        encoder.normalize_vectors(norm_val)
        encoder.vectors_stats()
        for param in seq2seq.parameters():
            print(param.data[0:3])

        if torch.cuda.is_available():
            seq2seq.cuda()

        # Optimizer and learning rate scheduler can be customized by
Пример #28
0
def main():
    ''' Main function '''
    parser = argparse.ArgumentParser()

    parser.add_argument('-data', required=True)

    parser.add_argument('-epoch', type=int, default=3)
    parser.add_argument('-batch_size', type=int, default=64)

    parser.add_argument('-d_model', type=int, default=1024)
    parser.add_argument('-n_layer', type=int, default=1)

    parser.add_argument('-dropout', type=float, default=0)

    parser.add_argument('-log', default=None)
    parser.add_argument('-save_model', default=None)
    parser.add_argument('-save_mode',
                        type=str,
                        choices=['all', 'best'],
                        default='best')

    parser.add_argument('-seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")

    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-teacher_forcing_ratio', type=float, default=0.5)

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda
    opt.d_word_vec = opt.d_model
    opt.log = opt.save_model

    random.seed(opt.seed)
    np.random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    if opt.cuda:
        torch.cuda.manual_seed_all(opt.seed)

    #========= Loading Dataset =========#
    data = torch.load(opt.data)
    opt.max_token_seq_len = data['settings'].max_token_seq_len

    training_data, validation_data = prepare_dataloaders(data, opt)

    opt.src_vocab_size = training_data.dataset.src_vocab_size
    opt.tgt_vocab_size = training_data.dataset.tgt_vocab_size

    #========= Preparing Model =========#
    print(opt)
    device = torch.device('cuda' if opt.cuda else 'cpu')

    # model
    opt.bidirectional = True
    encoder = EncoderRNN(opt.src_vocab_size,
                         opt.max_token_seq_len,
                         opt.d_model,
                         bidirectional=opt.bidirectional,
                         variable_lengths=True)
    decoder = DecoderRNN(opt.tgt_vocab_size,
                         opt.max_token_seq_len,
                         opt.d_model * 2 if opt.bidirectional else opt.d_model,
                         n_layers=opt.n_layer,
                         dropout_p=opt.dropout,
                         use_attention=True,
                         bidirectional=opt.bidirectional,
                         eos_id=Constants.BOS,
                         sos_id=Constants.EOS)
    seq2seq = Seq2seq(encoder, decoder).to(device)
    for param in seq2seq.parameters():
        param.data.uniform_(-0.08, 0.08)

    seq2seq = nn.DataParallel(seq2seq)

    # loss
    weight = torch.ones(opt.tgt_vocab_size)
    pad = Constants.PAD
    loss = Perplexity(weight, pad)
    if opt.cuda:
        loss.cuda()

    # optimizer
    optimizer = Optimizer(torch.optim.Adam(seq2seq.parameters()),
                          max_grad_norm=5)

    train(seq2seq, training_data, validation_data, loss, optimizer, device,
          opt)