def main(): train_set = SinaDataset(path.join(args.source, 'train.json'), input_dim) test_set = SinaDataset(path.join(args.source, 'test.json'), input_dim) train_loader = DataLoader(train_set, batch_size=args.bs, shuffle=True, drop_last=True) test_loader = DataLoader(test_set, batch_size=args.bs, shuffle=True, drop_last=True) model = TextCNN(input_dim, 200) # model = MyLSTM(input_dim, hidden_dim=8) model = model.to(device) optimizer = optim.Adam(model.parameters(), args.lr, weight_decay=args.wd) epoch = 0 train_loss = [] train_accu = [] valid_loss = [] valid_accu = [] while True: epoch += 1 epoch_loss, epoch_accu = train_one_epoch(epoch, model, optimizer, train_loader, device, args.bs) val_loss, val_accu = validate(model, test_loader, device, args.bs) train_loss += epoch_loss train_accu += epoch_accu valid_loss += val_loss valid_accu += val_accu print('saving...') torch.save(model.state_dict(), './saved_models/epoch' + str(epoch) + '.pkl') print() if args.max_epoch and epoch >= args.max_epoch: train_result = { 'batch-size': args.bs, 'train-loss': train_loss, 'train-accu': train_accu, 'valid-loss': valid_loss, 'valid-accu': valid_accu } with open('train-result.json', 'w', encoding='utf-8') as f: json.dump(train_result, f) break
def train(name, dataset, epochs, batch_size, learning_rate, regularization, embedding_dims, embedding_type): dirname, _ = os.path.split(os.path.abspath(__file__)) run_uid = datetime.datetime.today().strftime('%Y-%m-%dT%H:%M:%S') logger = StatsLogger(dirname, 'stats', name, run_uid) print('Loading data') X_train, y_train = load('{}_train'.format(dataset)) X_valid, y_valid = load('{}_valid'.format(dataset)) vocab = load('{}_vocab'.format(dataset)).vocab X_train = torch.as_tensor(X_train, dtype=torch.long) y_train = torch.as_tensor(y_train, dtype=torch.float) X_valid = torch.as_tensor(X_valid, dtype=torch.long) y_valid = torch.as_tensor(y_valid, dtype=torch.float) prev_acc = 0 model = TextCNN(dataset=dataset, input_size=X_train.size()[1], vocab_size=len(vocab) + 1, embedding_dims=embedding_dims, embedding_type=embedding_type) print(model) print('Parameters: {}'.format(sum([p.numel() for p in \ model.parameters() if p.requires_grad]))) print('Training samples: {}'.format(len(X_train))) if torch.cuda.is_available(): X_train = X_train.cuda() y_train = y_train.cuda() X_valid = X_valid.cuda() y_valid = y_valid.cuda() model = model.cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=regularization) criterion = nn.BCEWithLogitsLoss() print('Starting training') for epoch in range(epochs): epoch_loss = [] epoch_acc = [] iters = 0 total_iters = num_batches(len(X_train), batch_size) for i, batch in enumerate(minibatch_iter(len(X_train), batch_size)): model.train() X_train_batch = X_train[batch] y_train_batch = y_train[batch] if torch.cuda.is_available(): X_train_batch = X_train_batch.cuda() y_train_batch = y_train_batch.cuda() optimizer.zero_grad() output = model(X_train_batch) train_loss = criterion(output, y_train_batch) train_acc = accuracy(output, y_train_batch) epoch_loss.append(train_loss.item()) epoch_acc.append(train_acc.item()) train_loss.backward() optimizer.step() model.eval() train_loss, train_acc = np.mean(epoch_loss), np.mean(epoch_acc) valid_loss, valid_acc, _ = compute_dataset_stats( X_valid, y_valid, model, nn.BCEWithLogitsLoss(), 256) stats = [epoch + 1, train_loss, train_acc, valid_loss, valid_acc] epoch_string = '* Epoch {}: t_loss={:.3f}, t_acc={:.3f}, ' + \ 'v_loss={:.3f}, v_acc={:.3f}' print(epoch_string.format(*stats)) logger.write(stats) # checkpoint model if prev_acc < valid_acc: prev_acc = valid_acc model_path = os.path.join(dirname, 'checkpoints', name) torch.save(model.state_dict(), model_path) logger.close()
def cv_score(dataset, embedding_type, epochs, batch_size=32, learning_rate=1e-4, regularization=0): kf = KFold(10) X, y = load('{}_train'.format(dataset)) vocab = load('{}_vocab'.format(dataset)).vocab cv_acc = [] cv_std = [] for ci, (train_index, test_index) in enumerate(kf.split(X)): X_train, y_train = X[train_index], y[train_index] X_test, y_test = X[test_index], y[test_index] X_train = torch.as_tensor(X_train, dtype=torch.long).cuda() y_train = torch.as_tensor(y_train, dtype=torch.float).cuda() X_test = torch.as_tensor(X_test, dtype=torch.long).cuda() y_test = torch.as_tensor(y_test, dtype=torch.float).cuda() model = TextCNN(dataset=dataset, input_size=X_train.shape[1], vocab_size=len(vocab) + 1, embedding_dims=300, embedding_type=embedding_type).cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=regularization) criterion = nn.BCEWithLogitsLoss() model.train() for epoch in range(epochs): for i, batch in enumerate(minibatch_iter(len(X_train), batch_size)): X_train_batch = X_train[batch].cuda() y_train_batch = y_train[batch].cuda() optimizer.zero_grad() output = model(X_train_batch) train_loss = criterion(output, y_train_batch) train_loss.backward() optimizer.step() model.eval() _, test_acc, test_std = compute_dataset_stats(X_test, y_test, model, nn.BCEWithLogitsLoss(), 256) cv_acc.append(test_acc) cv_std.append(test_std) print(' [{}] acc={}, std={}'.format(ci + 1, test_acc, test_std)) print('{} - {}'.format(dataset, embedding_type)) print('Mean acc - {}'.format(np.mean(cv_acc))) print('Min acc - {}'.format(np.min(cv_acc))) print('Max acc - {}'.format(np.max(cv_acc))) print('Mean std - {}'.format(np.mean(cv_std)))
label_model.load_state_dict(label_model_pretrained) if args.flat or args.cascaded_step2: for param in label_model.parameters(): param.require_grad = False doc_model = nn.DataParallel(doc_model) label_model = nn.DataParallel(label_model) doc_model = doc_model.cuda() label_model = label_model.cuda() # Loss and optimizer criterion = Loss(use_geodesic=args.joint, _lambda=args.geodesic_lambda, only_label=args.cascaded_step1) optimizer = torch.optim.Adam([{ 'params': doc_model.parameters(), 'lr': doc_lr }, { 'params': label_model.parameters(), 'lr': 0.001 }]) logging.info('Starting Training') # Train and evaluate Y = torch.arange(trainvalset.n_labels).cuda() train(doc_model, label_model, trainloader, valloader, testloader, criterion, optimizer, Y, args.num_epochs, args.exp_name)