示例#1
0
def train(config, device):
    dataset = News20Dataset(config.cache_data_dir,
                            config.vocab_path,
                            is_train=True)
    dataloader = MyDataLoader(dataset, config.batch_size)

    model = HierarchialAttentionNetwork(
        num_classes=dataset.num_classes,
        vocab_size=dataset.vocab_size,
        embed_dim=config.embed_dim,
        word_gru_hidden_dim=config.word_gru_hidden_dim,
        sent_gru_hidden_dim=config.sent_gru_hidden_dim,
        word_gru_num_layers=config.word_gru_num_layers,
        sent_gru_num_layers=config.sent_gru_num_layers,
        word_att_dim=config.word_att_dim,
        sent_att_dim=config.sent_att_dim).to(device)

    optimizer = optim.Adam(params=filter(lambda p: p.requires_grad,
                                         model.parameters()),
                           lr=config.lr)

    criterion = nn.NLLLoss(reduction='sum').to(device)

    trainer = Trainer(config, model, optimizer, criterion, dataloader)
    trainer.train()
def train(config, device):
    dataset = News20Dataset(config.cache_data_dir, config.vocab_path, is_train=True)

    dataloader = MyDataLoader(dataset, config.batch_size)

    model = HierarchicalAttentionNetwork(
        num_classes=dataset.num_classes,
        vocab_size=dataset.vocab_size,
        embed_dim=config.embed_dim,
        word_gru_hidden_dim=config.word_gru_hidden_dim,
        sent_gru_hidden_dim=config.sent_gru_hidden_dim,
        word_gru_num_layers=config.word_gru_num_layers,
        sent_gru_num_layers=config.sent_gru_num_layers,
        word_att_dim=config.word_att_dim,
        sent_att_dim=config.sent_att_dim,
        use_layer_norm=config.use_layer_norm,
        dropout=config.dropout).to(device)

    optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=config.lr)

    # NOTE MODIFICATION (BUG)
    # criterion = nn.NLLLoss(reduction='sum').to(device) # option 1
    criterion = nn.CrossEntropyLoss(reduction='sum').to(device)  # option 2

    # NOTE MODIFICATION (EMBEDDING)
    if config.pretrain:
        weights = get_pretrained_weights("data/glove", dataset.vocab, config.embed_dim, device)
        model.sent_attention.word_attention.init_embeddings(weights)
    model.sent_attention.word_attention.freeze_embeddings(config.freeze)

    trainer = Trainer(config, model, optimizer, criterion, dataloader)
    trainer.train()
示例#3
0
def main():
    parser = ArgumentParser(description='train a MLP model')
    parser.add_argument('INPUT', type=str, help='path to input')
    parser.add_argument('EMBED', type=str, help='path to embedding')
    parser.add_argument('--gpu', '-g', default=-1, type=int, help='gpu number')
    args = parser.parse_args()

    word_to_id = word2id(args.INPUT)
    embedding = id2embedding(args.EMBED, word_to_id)

    train_loader = MyDataLoader(args.INPUT,
                                word_to_id,
                                batch_size=5000,
                                shuffle=True,
                                num_workers=1)
    # インスタンスを作成
    net = MLP(word_to_id, embedding)
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)

    gpu_id = args.gpu
    device = torch.device("cuda:{}".format(gpu_id) if gpu_id >= 0 else "cpu")
    net = net.to(device)

    epochs = 5
    log_interval = 10
    for epoch in range(1, epochs + 1):
        net.train()  # おまじない (Dropout などを使う場合に効く)
        for batch_idx, (ids, mask, labels) in enumerate(train_loader):
            # data shape: (batchsize, 1, 28, 28)

            ids, mask, labels = ids.to(device), mask.to(device), labels.to(
                device)
            optimizer.zero_grad(
            )  # 最初に gradient をゼロで初期化; これを呼び出さないと過去の gradient が蓄積されていく
            output = net(ids, mask)
            output2 = F.softmax(output, dim=1)
            loss = F.binary_cross_entropy(output2[:, 1],
                                          labels.float())  # 損失を計算
            loss.backward()
            optimizer.step()  # パラメータを更新

            # 途中経過の表示
            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(ids), len(train_loader.dataset),
                    10 * batch_idx / len(train_loader), loss.item()))
示例#4
0
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


if __name__ == '__main__':
    from config import *

    init_data_source = H5DataSource([soft_labeld_data_file],
                                    5000,
                                    shuffle=False,
                                    split=False)
    init_loader = MyDataLoader(init_data_source.h5fids,
                               init_data_source.indices)
    mean, std, n = 0, 0, 0
    for data, label, _ in tqdm(init_loader):
        data = torch.from_numpy(data).float().cuda()
        data = preprocess_batch(data)
        data = data.view(-1, data.size(3))
        mean += data.sum(0)
        n += data.shape[0]

    mean /= n

    for data, label, _ in tqdm(init_loader):
        data = torch.from_numpy(data).float().cuda()
        data = preprocess_batch(data)
        data = data.view(-1, data.size(3))
        std += ((data - mean)**2).sum(0)
示例#5
0
# model_dir = './checkpoints/model_54017'
model_dir = './checkpoints/model_70701'
cur_model_path = os.path.join(model_dir, 'state_curr.ckpt')

if not os.path.isdir('./submit/'):
    os.mkdir('./submit/')
if __name__ == '__main__':

    mean_std_h5 = h5py.File(mean_std_file, 'r')
    mean = np.array(mean_std_h5['mean'])
    std = np.array(mean_std_h5['std'])
    mean_std_h5.close()

    data_source = H5DataSource([test_file], BATCH_SIZE, shuffle=False)
    test_loader = MyDataLoader(data_source.h5fids, data_source.indices)

    model = LCZNet(channel=18, n_class=17, base=64, dropout=0.3)
    model = model.cuda()

    best_score = 0

    if os.path.isfile(cur_model_path):
        print('load training param, ', cur_model_path)
        state = torch.load(cur_model_path)
        model.load_state_dict(state['best_model_state'])
        best_score = state['best_score']
    print('best_score:', best_score)

    print('-' * 80)
    print('Testing...')
示例#6
0
文件: train.py 项目: eguilg/LCZ
model_dir = './checkpoints/model_70701'
if not os.path.isdir('./checkpoints/'):
	os.mkdir('./checkpoints/')
if not os.path.isdir(model_dir):
	os.mkdir(model_dir)
cur_model_path = os.path.join(model_dir, 'state_curr.ckpt')

if __name__ == '__main__':

	mean_std_h5 = h5py.File(mean_std_file, 'r')
	mean = np.array(mean_std_h5['mean'])
	std = np.array(mean_std_h5['std'])
	mean_std_h5.close()

	data_source = H5DataSource([train_file, val_file], BATCH_SIZE, split=0.1, seed=SEED)
	train_loader = MyDataLoader(data_source.h5fids, data_source.train_indices)
	val_loader = MyDataLoader(data_source.h5fids, data_source.val_indices)

	# train_source = H5DataSource([train_file], BATCH_SIZE, split=None, seed=SEED)
	# val_source = H5DataSource([val_file], BATCH_SIZE, shuffle=False, split=None)
	# train_loader = MyDataLoader(train_source.h5fids, train_source.indices)
	# val_loader = MyDataLoader(val_source.h5fids, val_source.indices)

	model = LCZNet(channel=18, n_class=17, base=64, dropout=0.3)
	model = model.cuda()

	model_param_num = 0
	for param in list(model.parameters()):
		model_param_num += param.nelement()
	print('num_params: %d' % (model_param_num))
示例#7
0
                        if j == 99:
                            test_loss_list.append(loss_test.item() / 100)
                            print("test loss : {0}".format(loss_test.item() /
                                                           100))
                            break
                break

    print('Finished Training')
    return net, train_loss_list, test_loss_list


if __name__ == '__main__':
    df_train = pd.read_csv("/daintlab/data/sr/traindf.csv", index_col=0)

    train_dataset = MyDataLoader(df_train)
    train_loader = DataLoader(train_dataset,
                              shuffle=True,
                              batch_size=128,
                              pin_memory=True)

    df_test = pd.read_csv("/daintlab/data/sr/testdf.csv", index_col=0)

    test_dataset = MyDataLoader(df_test)
    test_loader = DataLoader(test_dataset,
                             shuffle=True,
                             batch_size=128,
                             pin_memory=True)

    lstm = Model(input_size=48 * 7, hidden_size=48, num_layers=3).cuda()
    trained_model, train_loss_list, test_loss_list = train(
示例#8
0
def train(args):
    with open(args.vocab, 'r') as f:
        vocab = json.load(f)
    vocab_size = len(vocab['q'])
    with MyDataLoader(args.train_features_h5,
                      args.train_questions_h5) as train_loader, \
         MyDataLoader(args.val_features_h5,
                      args.val_questions_h5) as val_loader:

        film_generator = FiLMGenerator(vocab_size)
        filmed_net = FiLMedNet()

        if args.start_from_checkpoint:
            print('Loading states from {}'.format(args.start_from_checkpoint))
            checkpoint = torch.load(args.start_from_checkpoint)
            fg_state = checkpoint['fg_best_state']
            fn_state = checkpoint['fn_best_state']
            film_generator.load_state_dict(fg_state)
            filmed_net.load_state_dict(fn_state)

        print('\nFiLMGenerator: \n  {}\n\n'.format(film_generator))
        print('FiLMed Netowrk: \n  {}\n'.format(filmed_net))

        criterion = CrossEntropyLoss().cuda()

        fg_optimizer = Adam(film_generator.parameters(),
                            lr=args.lr,
                            weight_decay=1e-5)
        fn_optimizer = Adam(filmed_net.parameters(),
                            lr=args.lr,
                            weight_decay=1e-5)
        t = 0
        best_accuracy = 0
        running_loss = 0
        for epoch in range(args.epochs):
            print('Starting Epoch {}'.format(epoch))

            film_generator.cuda()
            filmed_net.cuda()
            film_generator.train()
            filmed_net.train()

            for batch in train_loader:
                t += 1

                questions, feats, answers = batch
                questions = Variable(questions.cuda())
                feats = Variable(feats.cuda())
                answers = Variable(answers.cuda())

                fg_optimizer.zero_grad()
                fn_optimizer.zero_grad()

                film = film_generator(questions)
                output = filmed_net(feats, film)

                loss = criterion(output, answers)

                loss.backward()

                fg_optimizer.step()
                fn_optimizer.step()

                running_loss += loss.data[0]
                if t % 100 == 0:
                    print(t, running_loss / 100)
                    running_loss = 0

            film_generator.eval()
            filmed_net.eval()

            tr_accuracy = check_accuracy(film_generator, filmed_net,
                                         train_loader)
            print('Epoch {}. Training accuracy:   {}'.format(
                epoch, tr_accuracy))

            val_accuracy = check_accuracy(film_generator, filmed_net,
                                          val_loader)
            print('Epoch {}. Validation accuracy: {}'.format(
                epoch, val_accuracy))

            if val_accuracy >= best_accuracy:
                fg_best_state = get_state(film_generator)
                fn_best_state = get_state(filmed_net)

            checkpoint = {
                'fg_best_state': fg_best_state,
                'fn_best_state': fn_best_state,
                'epoch': epoch,
                'val_accuracy': val_accuracy,
                'vocab': vocab
            }

            print('Saving checkpoint to {}'.format(args.save_checkpoint_to))
            torch.save(checkpoint, args.save_checkpoint_to)