Exemplo n.º 1
0
def train(**kwargs):
    for k_, v_ in kwargs.items():
        setattr(options, k_, v_)

    training_set = TextDataset(path='data/train/train.csv', model='wordvec/skipgram.bin', max_length=options.max_length, word_dim=options.word_dim)
    training_loader = Data.DataLoader(dataset=training_set, batch_size=options.batch_size, shuffle=True, drop_last=True)
    model = TextCNN(options.word_dim, options.max_length, training_set.encoder.classes_.shape[0])

    if torch.cuda.is_available():
        model.cuda()

    optimizer = optim.Adam(model.parameters(), lr=options.learning_rate)

    for epoch in tqdm(range(options.epochs)):
        loss_sum = 0
        
        for data, label in tqdm(training_loader):
            if torch.cuda.is_available():
                data = data.cuda()
                label = label.cuda()

            out = model(data)
            
            loss = criteration(out, autograd.Variable(label.squeeze().long()))
            loss_sum += loss.item() / options.batch_size
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        tqdm.write(f'epoch {epoch + 1}: loss = {loss_sum/len(training_set.data)}')
        model.save(f'checkpoints/loss-{loss_sum/len(training_set.data)}.pt')
Exemplo n.º 2
0
def build_textcnn_model(vocab, config, train=True):
    model = TextCNN(vocab.vocab_size, config)
    if train:
        model.train()
    else:
        model.eval()

    if torch.cuda.is_available():
        model.cuda()
    else:
        model.cpu()
    return model
Exemplo n.º 3
0
def build_textcnn_model(vocab, config, train=True):
    model = TextCNN(vocab.vocab_size, config)
    if train:
        model.train()
        #在训练模型时会在前面加上train();
    else:
        model.eval()
        #在测试模型时在前面使用eval(),会将BN和DropOut固定住,不会取平均,而是用训练好的值
    if torch.cuda.is_available():
        model.cuda()
    else:
        model.cpu()
    return model
Exemplo n.º 4
0
def evaluate():
    # test
    model = TextCNN(config)
    model.cuda()
    saved_model = torch.load(config.save_model)
    model.load_state_dict(saved_model["state_dict"])
    print(
        "epoch:%s steps:%s best_valid_acc:%s" %
        (saved_model["epoch"], saved_model["steps"], saved_model["valid_acc"]))

    test_loss, test_acc, cm = test(config.test)
    print(
        f"\tLoss: {test_loss:.4f}(test)\t|\tAcc: {test_acc * 100:.1f}%(test)")

    print_confusion_matrix(cm, list(id2label.values()))
Exemplo n.º 5
0
def train(x, y):
    model = TextCNN()
    model = model.cuda()
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.SGD(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss(size_average=False)

    for epoch in range(100):
        total = 0
        for i in range(0, len(x) / 64):
            batch_x = x[i * 64:(i + 1) * 64]
            batch_y = y[i * 64:(i + 1) * 64]
            batch_x = Variable(torch.FloatTensor(batch_x)).cuda()
            batch_y = Variable(torch.LongTensor(batch_y)).cuda()
            optimizer.zero_grad()
            model.train()
            pred = model(batch_x, 64)
            loss = criterion(pred, batch_y)
            #print(loss)
            loss.backward()
            nn.utils.clip_grad_norm(parameters, max_norm=3)
            total += np.sum(
                pred.data.max(1)[1].cpu().numpy() ==
                batch_y.data.cpu().numpy())
            optimizer.step()
        print("epoch ", epoch + 1, " acc: ", float(total) / len(x))
    return model
Exemplo n.º 6
0
def train(args):
    train_iter, dev_iter = data_processor.load_data(args)  # 将数据分为训练集和验证集
    print('加载数据完成')
    model = TextCNN(args)
    if args.cuda: model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    steps = 0
    best_acc = 0
    last_step = 0
    model.train()
    for epoch in range(1, args.epoch + 1):
        for batch in train_iter:
            feature, target = batch.text, batch.label
            # t_()函数表示将(max_len, batch_size)转置为(batch_size, max_len)
            #            feature.data.t_(), target.data.sub_(1) # target减去1
            feature = feature.data.t()  # x.t() x是不变的,所以重新赋值
            #           target.data.sub_(1)
            if args.cuda:
                feature, target = feature.cuda(), target.cuda()
            optimizer.zero_grad()
            logits = model(feature)
            loss = F.cross_entropy(logits, target)
            loss.backward()
            optimizer.step()
            steps += 1
            if steps % args.log_interval == 0:
                # torch.max(logits, 1)函数:返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引)
                corrects = (torch.max(logits, 1)[1] == target).sum()
                train_acc = 100.0 * corrects / batch.batch_size
                sys.stdout.write(
                    '\rBatch[{}] - loss: {:.6f}  acc: {:.4f}%({}/{})'.format(
                        steps, loss.item(), train_acc, corrects,
                        batch.batch_size))
            if steps % args.test_interval == 0:
                dev_acc = eval(dev_iter, model, args)
                if dev_acc > best_acc:
                    best_acc = dev_acc
                    last_step = steps
                    if args.save_best:
                        print('Saving best model, acc: {:.4f}%\n'.format(
                            best_acc))
                        save(model, args.save_dir, 'best', steps)
                else:
                    if steps - last_step >= args.early_stopping:
                        print('\nearly stop by {} steps, acc: {:.4f}%'.format(
                            args.early_stopping, best_acc))
                        raise KeyboardInterrupt
Exemplo n.º 7
0
def build_textcnn_model(vocab, config, train=True):
    model = TextCNN(vocab.vocab_size, config)
    if train:
        model.train()
        #在训练模型时会在前面加上train();
    else:
        model.eval()
        #在测试模型时在前面使用eval(),会将BN和DropOut固定住,不会取平均,而是用训练好的值

    #train()与eval()两个方法是针对网络train和eval时采用不同方式的情况
    #比如Batch Normalization和Dropout
    #BN的作用主要是对网络中间的每层进行归一化处理,并且使用变换重构保证所提取的特征分布不会被破坏;
    #由于训练完毕后参数都是固定的,所有BN的训练和测试时的操作不同
    #Dropopt能够克服过拟合,在每个训练batch中,通过忽略一般的特征检测器,可以明显地减少过拟合现象。
    if torch.cuda.is_available():
        model.cuda()
    else:
        model.cpu()
    return model
Exemplo n.º 8
0
dropout_rate = opts.dropout
kwargs = {
    'nb_classes': nb_classes,
    'vocab_size': vocab_size,
    'input_size': word_dim,
    'filter_shape': filter_shape,
    'pretrained_embed': pretrained_embed,
    'dropout_rate': dropout_rate
}

# 初始化模型
use_cuda = opts.cuda
text_cnn = TextCNN(kwargs)
print(text_cnn)
if use_cuda:
    text_cnn = text_cnn.cuda()
optimizer = torch.optim.Adam(text_cnn.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()

# 训练
t0 = time()
nb_epoch = opts.nb_epoch
max_patience = opts.max_patience
current_patience = 0
root_model = opts.root_model
if not os.path.exists(root_model):
    os.makedirs(root_model)
path_model = os.path.join(root_model, 'textcnn.model')
best_dev_loss = 1000.
for epoch in range(nb_epoch):
    sys.stdout.write('epoch {0} / {1}: \r'.format(epoch, nb_epoch))
Exemplo n.º 9
0
    deving_set = TextCharDataset(config,
                                 path='data/dev.tsv',
                                 vocab_file='./data/char2num.pkl')

    training_iter = data.DataLoader(dataset=training_set,
                                    batch_size=config.batch_size,
                                    num_workers=2)
    deving_iter = data.DataLoader(dataset=deving_set,
                                  batch_size=config.batch_size,
                                  num_workers=2)

    config.word_num = len(training_set.tok2num)
    model = TextCNN(config)

    if torch.cuda.is_available():
        model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.lr)

    training_lossse = []
    # Train the model
    for epoch in range(config.epoch):
        model.train()
        for data, label in training_iter:

            if config.cuda and torch.cuda.is_available():
                data = data.cuda()
                labels = label.cuda()

            out = model(data)
Exemplo n.º 10
0
def train(args, states=None):

    config_obj = Config(args.config_file)
    config = config_obj.elements

    # make training runs deterministic
    set_seed(seed_value=config['random_seed'])

    logging.info("Loading datasets...")
    dataset, labels = load_embeddings(data_path=config['data'],
                                      label_path=config['labels'])

    train_loader, val_loader, test_loader = create_dataloaders(
        dataset,
        labels,
        batch_size=config['batch_size'],
        random_seed=config['random_seed'],
        balance=config['correct_imbalance'],
    )

    model = TextCNN(
        num_classes=config['num_classes'],
        embedding_size=config['embedding_size'],
        num_filters=config['num_filters'],
        dropout_rate=config['dropout'],
    )
    if torch.cuda.is_available():
        model.cuda()

    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])

    best_metric = 0

    # loop over the dataset multiple times
    for epoch in range(1, config['num_epochs'] + 1):
        logging.info(
            f"==================== Epoch: {epoch} ====================")
        running_losses = []
        for i, data in enumerate(train_loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            if torch.cuda.is_available():
                inputs, labels = inputs.cuda(), labels.cuda()

            # zero the parameter gradients before each pass
            optimizer.zero_grad()

            # forward
            probs, classes = model(inputs)
            # backprop
            loss = loss_function(probs, labels)
            loss.backward()
            # update/optimize
            optimizer.step()

            # Log summary
            running_losses.append(loss.item())
            if i % args.log_interval == 0:
                interval_loss = sum(running_losses) / len(running_losses)
                logging.info(f"step = {i}, loss = {interval_loss}")
                running_losses = []

            if i % args.test_interval == 0:
                dev_metric = eval(
                    val_loader,
                    model,
                    loss_function,
                    args.eval_metric,
                )
                if dev_metric > best_metric:
                    best_metric = dev_metric
                    states = {
                        "epoch": epoch,
                        "step": i,
                        "model": model.state_dict(),
                        "optimizer": optimizer.state_dict()
                    }
                    save_model_state(save_dir=args.model_dir,
                                     step=i,
                                     states=states)

    print(f"Finished Training, best {args.eval_metric}: {best_metric}")
Exemplo n.º 11
0
    parser.add_argument('--without_unlabel', '-wu', action='store_true')
    parser.add_argument('--learning_rate', '-lr', type=float, default=0.0001)
    parser.add_argument('--save_dir', type=str, default='')
    parser.add_argument('--gpu', action='store_true')
    parser.add_argument('--verbose', action='store_true')
    args = parser.parse_args()

    # device = torch.device("cuda") if args.gpu else torch.device("cpu")
    dataset = TextDataset(args.dataset,args.percent, wo_unlabel=args.without_unlabel)
    print("Num of labeled data: ", len(dataset))
    emb = dataset.get_emb()
    num_class = dataset.num_class
    net = TextCNN(emb, num_class)
    best_model = TextCNN(emb, num_class)
    if args.gpu:
        net.cuda()
        best_model.cuda()
    indices = np.arange(len(dataset))
    np.random.shuffle(indices)
    split = round(0.1 * len(dataset))
    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    dataLoader = DataLoader(dataset, batch_size=32,
                            num_workers=4, sampler=train_sampler)
    validLoader = DataLoader(dataset, batch_size=32,
                             num_workers=4, sampler=valid_sampler)
    loss_fn = nn.CrossEntropyLoss()
    softmax = nn.Softmax(dim=1)
    # Print per iteration
    len_print = 20