示例#1
0
def main():
    cuda = int(torch.cuda.is_available()) - 1

    TEXT = data.Field(lower=True, init_token="<start>", eos_token="<end>")
    LABELS = data.Field(sequential=False)

    train, val, test = data.TabularDataset.splits(
        path='../tencent/data/',
        train='train_0.8.tsv',
        validation='train_dev_0.8_common.tsv',
        test='train_dev_0.8_uncommon.tsv',
        format='tsv',
        fields=[('text', TEXT), ('label', LABELS)])

    TEXT.build_vocab(train)
    LABELS.build_vocab(train)

    train_iter, val_iter, test_iter = data.BucketIterator.splits(
        (train, val, test),
        batch_sizes=(8, 8, 8),
        sort_key=lambda x: len(x.text),
        repeat=False)

    model = torch.load(
        '../tencent/models/common_0.8/net-lstm_e25_bs8_opt-adam_ly1_hs300_dr2_ed200_fembFalse_ptembFalse_drp0.3/acc47.60_e6.pt'
    )

    (avg_loss, accuracy, corrects, size, t5_acc, t5_corrects,
     mrr) = eval(val_iter, model, TEXT, 300)
    print('COMMON ACCURACY:', accuracy)
    (avg_loss, accuracy, corrects, size, t5_acc, t5_corrects,
     mrr) = eval(test_iter, model, TEXT, 300)
    print('UNCOMMON ACCURACY:', accuracy)
示例#2
0
def train(args):
    ###############################################################################
    # Load data
    ###############################################################################
    cuda = int(torch.cuda.is_available()) - 1

    TEXT = data.Field(lower=True, init_token="<start>", eos_token="<end>")
    LABELS = data.Field(sequential=True)

    train, val, test = data.TabularDataset.splits(
        # ms_draw data
        path='../ms_draw/',
        train='draw-train.tsv',
        validation='draw-dev.tsv',
        test='draw-test.tsv',
        format='tsv',
        fields=[('text', TEXT), ('label', LABELS)])

    print('train.examples.data:', train.examples[0].label)

    prevecs = None
    if (args.pretr_emb == True):
        #print('Making vocab w/ glove.6B.' + str(args.emb_dim) + ' dim vectors')
        TEXT.build_vocab(train,
                         vectors=GloVe(name='6B', dim=args.emb_dim),
                         min_freq=args.mf)  #wv_type="glove.6B")
        prevecs = TEXT.vocab.vectors
    else:
        TEXT.build_vocab(train)

    LABELS.build_vocab(train)
    vecs = Vecs(args.emb_dim)
    #print('Making interator for splits...')
    train_iter, val_iter, test_iter = data.BucketIterator.splits(
        (train, val, test),
        batch_sizes=(args.batch_size, args.batch_size, args.batch_size),
        sort_key=lambda x: len(x.text))  #, device=cuda)

    num_classes = len(LABELS.vocab)
    vocab_size = len(TEXT.vocab)
    ###############################################################################
    # Build the model
    ###############################################################################

    encoder_model = EncoderRNN(vocab_size=vocab_size,
                               max_len=200,
                               hidden_size=args.hidden_sz,
                               input_dropout_p=0,
                               dropout_p=args.dropout,
                               n_layers=args.num_layers,
                               bidirectional=args.num_dir == 2,
                               rnn_cell=args.net_type,
                               variable_lengths=False)

    decoder_model = DecoderRNN(
        vocab_size=vocab_size,
        max_len=200,
        hidden_size=args.hidden_sz,
        sos_id=2,  # Add to params
        eos_id=3,  # Add to params
        n_layers=args.num_layers,
        rnn_cell=args.net_type,
        bidirectional=args.num_dir == 2,
        input_dropout_p=0,
        dropout_p=args.dropout,
        use_attention=False)

    model = Seq2seq(encoder_model, decoder_model)

    criterion = NLLLoss()
    #criterion = nn.CrossEntropyLoss()
    # Select optimizer
    if (args.opt == 'adamax'):
        optimizer = torch.optim.Adamax(model.parameters())  #, lr=args.lr)
    elif (args.opt == 'adam'):
        optimizer = torch.optim.Adam(model.parameters())  #, lr=args.lr)
    elif (args.opt == 'sgd'):
        optimizer = torch.optim.SGD(model.parameters(), lr=0.1,
                                    momentum=0.5)  #,lr=args.lr,momentum=0.5)
    else:
        #print('Optimizer unknown, defaulting to adamax')
        optimizer = torch.optim.Adamax(model.parameters())

    ###############################################################################
    # Training the Model
    ###############################################################################
    if cuda == 0:
        model = model.cuda()  #args.device)

    highest_t1_acc = 0
    highest_t1_acc_metrics = ''
    highest_t1_acc_params = ''
    results = ''
    for epoch in range(args.epochs):
        losses = []
        tot_loss = 0
        train_iter.repeat = False
        for batch_count, batch in enumerate(train_iter):
            print('Batch:', batch_count)
            model.zero_grad()
            inp = batch.text.t()
            print('type(inp)', type(inp))
            inp3d = torch.autograd.Variable(
                torch.cuda.FloatTensor(inp.size(0), inp.size(1), args.emb_dim))
            print('type(inp3d)', type(inp3d))
            for i in range(inp.size(0)):
                for j in range(inp.size(1)):
                    inp3d[i, j, :] = vecs[TEXT.vocab.itos[inp[i, j].data[0]]]
            #print("INP: ",inp.size())
            #print(inp3d)

            #print(inp)

            preds = model(inp3d)
            #print("PREDS: ",np.shape(preds))
            #print("LABELS: ",batch.label.size())

            loss = criterion(preds, batch.label)
            loss.backward()
            optimizer.step()
            losses.append(loss)
            tot_loss += loss.data[0]

            #if (batch_count % 20 == 0):
            #    print('Batch: ', batch_count, '\tLoss: ', str(losses[-1].data[0]))
            batch_count += 1
        #print('Average loss over epoch ' + str(epoch) + ': ' + str(tot_loss/len(losses)))
        (avg_loss, accuracy, corrects, size, t5_acc, t5_corrects,
         mrr) = eval(val_iter, model, vecs, TEXT,
                     args.emb_dim)  #, args.device)
        if accuracy > args.acc_thresh:
            save_path = '{}/acc{:.2f}_e{}.pt'.format(args.save_path_full,
                                                     accuracy, epoch)
            if not os.path.isdir(args.save_path_full):
                os.makedirs(args.save_path_full)
            torch.save(model, save_path)

        if highest_t1_acc < accuracy:
            highest_t1_acc = accuracy
            highest_t1_acc_metrics = ('acc: {:6.4f}%({:3d}/{}) EPOCH{:2d} - loss: {:.4f} t5_acc: {:6.4f}%({:3d}' \
                    '/{}) MRR: {:.6f}'.format(accuracy, corrects, size,epoch, avg_loss, t5_acc, t5_corrects, size, mrr))

            highest_t1_acc_params = (('PARAMETERS:' \
                    'net-%s' \
                    '_e%i' \
                    '_bs%i' \
                    '_opt-%s' \
                    '_ly%i' \
                    '_hs%i' \
                    '_dr%i'
                    '_ed%i' \
                    '_femb%s' \
                    '_ptemb%s' \
                    '_drp%.1f' \
                    '_mf%d\n'
                    % (args.net_type, args.epochs, args.batch_size, args.opt, args.num_layers,
                    args.hidden_sz, args.num_dir, args.emb_dim, args.embfix, args.pretr_emb, args.dropout, args.mf)))
        results += ('\nEPOCH{:2d} - loss: {:.4f}  acc: {:6.4f}%({:3d}/{}) t5_acc: {:6.4f}%({:3d}' \
                '/{}) MRR: {:.6f}'.format(epoch, avg_loss, accuracy,
                                        corrects, size, t5_acc, t5_corrects, size,
                                        mrr))

    print(highest_t1_acc_metrics + '\n')
    writeResults(args, results, highest_t1_acc, highest_t1_acc_metrics,
                 highest_t1_acc_params)