def train(config, device): dataset = News20Dataset(config.cache_data_dir, config.vocab_path, is_train=True) dataloader = MyDataLoader(dataset, config.batch_size) model = HierarchialAttentionNetwork( num_classes=dataset.num_classes, vocab_size=dataset.vocab_size, embed_dim=config.embed_dim, word_gru_hidden_dim=config.word_gru_hidden_dim, sent_gru_hidden_dim=config.sent_gru_hidden_dim, word_gru_num_layers=config.word_gru_num_layers, sent_gru_num_layers=config.sent_gru_num_layers, word_att_dim=config.word_att_dim, sent_att_dim=config.sent_att_dim).to(device) optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=config.lr) criterion = nn.NLLLoss(reduction='sum').to(device) trainer = Trainer(config, model, optimizer, criterion, dataloader) trainer.train()
def train(config, device): dataset = News20Dataset(config.cache_data_dir, config.vocab_path, is_train=True) dataloader = MyDataLoader(dataset, config.batch_size) model = HierarchicalAttentionNetwork( num_classes=dataset.num_classes, vocab_size=dataset.vocab_size, embed_dim=config.embed_dim, word_gru_hidden_dim=config.word_gru_hidden_dim, sent_gru_hidden_dim=config.sent_gru_hidden_dim, word_gru_num_layers=config.word_gru_num_layers, sent_gru_num_layers=config.sent_gru_num_layers, word_att_dim=config.word_att_dim, sent_att_dim=config.sent_att_dim, use_layer_norm=config.use_layer_norm, dropout=config.dropout).to(device) optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=config.lr) # NOTE MODIFICATION (BUG) # criterion = nn.NLLLoss(reduction='sum').to(device) # option 1 criterion = nn.CrossEntropyLoss(reduction='sum').to(device) # option 2 # NOTE MODIFICATION (EMBEDDING) if config.pretrain: weights = get_pretrained_weights("data/glove", dataset.vocab, config.embed_dim, device) model.sent_attention.word_attention.init_embeddings(weights) model.sent_attention.word_attention.freeze_embeddings(config.freeze) trainer = Trainer(config, model, optimizer, criterion, dataloader) trainer.train()
def main(): parser = ArgumentParser(description='train a MLP model') parser.add_argument('INPUT', type=str, help='path to input') parser.add_argument('EMBED', type=str, help='path to embedding') parser.add_argument('--gpu', '-g', default=-1, type=int, help='gpu number') args = parser.parse_args() word_to_id = word2id(args.INPUT) embedding = id2embedding(args.EMBED, word_to_id) train_loader = MyDataLoader(args.INPUT, word_to_id, batch_size=5000, shuffle=True, num_workers=1) # インスタンスを作成 net = MLP(word_to_id, embedding) optimizer = torch.optim.Adam(net.parameters(), lr=1e-3) gpu_id = args.gpu device = torch.device("cuda:{}".format(gpu_id) if gpu_id >= 0 else "cpu") net = net.to(device) epochs = 5 log_interval = 10 for epoch in range(1, epochs + 1): net.train() # おまじない (Dropout などを使う場合に効く) for batch_idx, (ids, mask, labels) in enumerate(train_loader): # data shape: (batchsize, 1, 28, 28) ids, mask, labels = ids.to(device), mask.to(device), labels.to( device) optimizer.zero_grad( ) # 最初に gradient をゼロで初期化; これを呼び出さないと過去の gradient が蓄積されていく output = net(ids, mask) output2 = F.softmax(output, dim=1) loss = F.binary_cross_entropy(output2[:, 1], labels.float()) # 損失を計算 loss.backward() optimizer.step() # パラメータを更新 # 途中経過の表示 if batch_idx % log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(ids), len(train_loader.dataset), 10 * batch_idx / len(train_loader), loss.item()))
y_a, y_b = y, y[index] return mixed_x, y_a, y_b, lam def mixup_criterion(criterion, pred, y_a, y_b, lam): return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b) if __name__ == '__main__': from config import * init_data_source = H5DataSource([soft_labeld_data_file], 5000, shuffle=False, split=False) init_loader = MyDataLoader(init_data_source.h5fids, init_data_source.indices) mean, std, n = 0, 0, 0 for data, label, _ in tqdm(init_loader): data = torch.from_numpy(data).float().cuda() data = preprocess_batch(data) data = data.view(-1, data.size(3)) mean += data.sum(0) n += data.shape[0] mean /= n for data, label, _ in tqdm(init_loader): data = torch.from_numpy(data).float().cuda() data = preprocess_batch(data) data = data.view(-1, data.size(3)) std += ((data - mean)**2).sum(0)
# model_dir = './checkpoints/model_54017' model_dir = './checkpoints/model_70701' cur_model_path = os.path.join(model_dir, 'state_curr.ckpt') if not os.path.isdir('./submit/'): os.mkdir('./submit/') if __name__ == '__main__': mean_std_h5 = h5py.File(mean_std_file, 'r') mean = np.array(mean_std_h5['mean']) std = np.array(mean_std_h5['std']) mean_std_h5.close() data_source = H5DataSource([test_file], BATCH_SIZE, shuffle=False) test_loader = MyDataLoader(data_source.h5fids, data_source.indices) model = LCZNet(channel=18, n_class=17, base=64, dropout=0.3) model = model.cuda() best_score = 0 if os.path.isfile(cur_model_path): print('load training param, ', cur_model_path) state = torch.load(cur_model_path) model.load_state_dict(state['best_model_state']) best_score = state['best_score'] print('best_score:', best_score) print('-' * 80) print('Testing...')
model_dir = './checkpoints/model_70701' if not os.path.isdir('./checkpoints/'): os.mkdir('./checkpoints/') if not os.path.isdir(model_dir): os.mkdir(model_dir) cur_model_path = os.path.join(model_dir, 'state_curr.ckpt') if __name__ == '__main__': mean_std_h5 = h5py.File(mean_std_file, 'r') mean = np.array(mean_std_h5['mean']) std = np.array(mean_std_h5['std']) mean_std_h5.close() data_source = H5DataSource([train_file, val_file], BATCH_SIZE, split=0.1, seed=SEED) train_loader = MyDataLoader(data_source.h5fids, data_source.train_indices) val_loader = MyDataLoader(data_source.h5fids, data_source.val_indices) # train_source = H5DataSource([train_file], BATCH_SIZE, split=None, seed=SEED) # val_source = H5DataSource([val_file], BATCH_SIZE, shuffle=False, split=None) # train_loader = MyDataLoader(train_source.h5fids, train_source.indices) # val_loader = MyDataLoader(val_source.h5fids, val_source.indices) model = LCZNet(channel=18, n_class=17, base=64, dropout=0.3) model = model.cuda() model_param_num = 0 for param in list(model.parameters()): model_param_num += param.nelement() print('num_params: %d' % (model_param_num))
if j == 99: test_loss_list.append(loss_test.item() / 100) print("test loss : {0}".format(loss_test.item() / 100)) break break print('Finished Training') return net, train_loss_list, test_loss_list if __name__ == '__main__': df_train = pd.read_csv("/daintlab/data/sr/traindf.csv", index_col=0) train_dataset = MyDataLoader(df_train) train_loader = DataLoader(train_dataset, shuffle=True, batch_size=128, pin_memory=True) df_test = pd.read_csv("/daintlab/data/sr/testdf.csv", index_col=0) test_dataset = MyDataLoader(df_test) test_loader = DataLoader(test_dataset, shuffle=True, batch_size=128, pin_memory=True) lstm = Model(input_size=48 * 7, hidden_size=48, num_layers=3).cuda() trained_model, train_loss_list, test_loss_list = train(
def train(args): with open(args.vocab, 'r') as f: vocab = json.load(f) vocab_size = len(vocab['q']) with MyDataLoader(args.train_features_h5, args.train_questions_h5) as train_loader, \ MyDataLoader(args.val_features_h5, args.val_questions_h5) as val_loader: film_generator = FiLMGenerator(vocab_size) filmed_net = FiLMedNet() if args.start_from_checkpoint: print('Loading states from {}'.format(args.start_from_checkpoint)) checkpoint = torch.load(args.start_from_checkpoint) fg_state = checkpoint['fg_best_state'] fn_state = checkpoint['fn_best_state'] film_generator.load_state_dict(fg_state) filmed_net.load_state_dict(fn_state) print('\nFiLMGenerator: \n {}\n\n'.format(film_generator)) print('FiLMed Netowrk: \n {}\n'.format(filmed_net)) criterion = CrossEntropyLoss().cuda() fg_optimizer = Adam(film_generator.parameters(), lr=args.lr, weight_decay=1e-5) fn_optimizer = Adam(filmed_net.parameters(), lr=args.lr, weight_decay=1e-5) t = 0 best_accuracy = 0 running_loss = 0 for epoch in range(args.epochs): print('Starting Epoch {}'.format(epoch)) film_generator.cuda() filmed_net.cuda() film_generator.train() filmed_net.train() for batch in train_loader: t += 1 questions, feats, answers = batch questions = Variable(questions.cuda()) feats = Variable(feats.cuda()) answers = Variable(answers.cuda()) fg_optimizer.zero_grad() fn_optimizer.zero_grad() film = film_generator(questions) output = filmed_net(feats, film) loss = criterion(output, answers) loss.backward() fg_optimizer.step() fn_optimizer.step() running_loss += loss.data[0] if t % 100 == 0: print(t, running_loss / 100) running_loss = 0 film_generator.eval() filmed_net.eval() tr_accuracy = check_accuracy(film_generator, filmed_net, train_loader) print('Epoch {}. Training accuracy: {}'.format( epoch, tr_accuracy)) val_accuracy = check_accuracy(film_generator, filmed_net, val_loader) print('Epoch {}. Validation accuracy: {}'.format( epoch, val_accuracy)) if val_accuracy >= best_accuracy: fg_best_state = get_state(film_generator) fn_best_state = get_state(filmed_net) checkpoint = { 'fg_best_state': fg_best_state, 'fn_best_state': fn_best_state, 'epoch': epoch, 'val_accuracy': val_accuracy, 'vocab': vocab } print('Saving checkpoint to {}'.format(args.save_checkpoint_to)) torch.save(checkpoint, args.save_checkpoint_to)