def train_model(model, data_loader, optimizer, loss_fn): for epoch in range(NUM_EPOCHS): # Activate stateful parts of the model for training model.train() for batch in enumerate(data_loader): # Get one data set from the loader _, (images, expected) = batch # Run the forward pass of the model to produce an array (one for each # expected value/input image) of arrays (one for each output class) of # probabilities. # # Pytorch builds a graph of all this models parameters for subsequent # gradient computations via loss.backward(). predicted = model(images) # Zero out pytorch's runtime heap gradient buffers. optimizer.zero_grad() # Compute the MSE loss. # This step actually computes the loss and differentiates the loss # function wrt all parameters marked as requiring a gradient # (requires_gradient=True). loss = _loss(predicted, expected, loss_fn) # Compute gradients via back propogation. # This operates on the intermediate NN layer of the model stored by # pytorch during the forward pass of the model. loss.backward() # Apply the computed gradients to weight/bias update. # NB: The loss and optimize functions are connected to the model via # pytorch. optimizer.step() # TODO: Checkpoint model test_model(model, dataloader.get_test_loader(), epoch)
crop_size = [32, 32, 32] model = [] model.append( densenetf.get_model(down_structure=[2, 2, 4], k=18, weights=model_path[0])) model.append( densenetf.get_model(down_structure=[2, 2, 4], k=16, weights=model_path[1])) from dataloader import ClfAttentionDataset, get_test_loader lines = pd.read_csv('test.csv') pred = [] for m in model: test_dataset = ClfAttentionDataset(crop_size=crop_size, subset=['test'], move=None, lines=lines, data_path=data_path) test_loader = get_test_loader(test_dataset, batch_size=1) pred.append(m.predict(test_loader, steps=len(test_dataset))) #%% total = np.zeros(117) for i in range(len(model_path)): total += pred[i][:, 1].squeeze() predicted = total / len(model_path) candidate = lines['name'].tolist() result = {'name': candidate, 'predicted': predicted} result = pd.DataFrame(result) result.to_csv("submission.csv", index=False, sep=',')
help='data repeat num') self.parser = parser def parse(self): arg = self.parser.parse_args(args=[]) arg.cuda = not arg.no_cuda and torch.cuda.is_available() arg.device = torch.device('cuda' if arg.cuda else 'cpu') return arg if __name__ == '__main__': args = Options().parse() args.model = DenseNet_CNN(args.device) # args.model = DenseNet(args.device) # args.model = LeNet5(args.device) # args.model = ShallowConvNet(args.device) args.train_loader, args.valid_loader = get_train_valid_loader( './', batch_size=args.batch_size, random_seed=123, valid_ratio=0.1, shuffle=True) trainer = Trainer(args) trainer.train() args.test_loader = get_test_loader('./', batch_size=8, shuffle=False) tester = Tester(args) tester.test()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=int, default=0, help='0: original dataset, 1: re-split dataset') parser.add_argument('--train_emb', action='store_true', help='Train word embedding for SQLNet(requires pretrained model).') parser.add_argument('--resume', default=None, help='resume from pretrained model.') parser.add_argument('--epoch', type=int, default=20, help='number of epoches') parser.add_argument('--batch_size', type=int, default=2, help='batch size') parser.add_argument('--logging_step', type=int, default=50, help='logging step') parser.add_argument('--lr_update', type=int, default=10, help='lr update') parser.add_argument('--learning_rate', type=float, default=1e-3, help='lr update') parser.add_argument('--prefix', type=str, default='bS2_', help='prefix of saved model') parser.add_argument('--withtab', type=int, default=1, help='sample from content vector') parser.add_argument('--teacher_forcing_fraction', type=float, default=1.0, help='fraction of batches that will use teacher forcing during training') parser.add_argument('--scheduled_teacher_forcing', action='store_true', help='Linearly decrease the teacher forcing fraction ' 'from 1.0 to 0.0 over the specified number of epocs') args = parser.parse_args() if args.scheduled_teacher_forcing: schedule = np.arange(1.0, 0.0, -1.0/args.epoch) else: schedule = np.ones(args.epoch) * args.teacher_forcing_fraction train_loader, val_loader = data.get_loaders(args.batch_size, 8) test_loader=data.get_test_loader('test', args.batch_size, 8) if args.withtab: model = QG() else: model = QG() if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['model']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, start_epoch)) model.teach_forcing=0.0 model.mask_tf=True print('dev set') validate(args, val_loader, model) print('test set') validate(args, test_loader, model) else: print("=> no checkpoint found at '{}'".format(args.resume))
if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") ### permuted mnist if args.pmnist: perm = torch.randperm(784) else: perm = torch.arange(0, 784).long() train_loader, valid_loader = get_train_valid_loader(args.data, args.batchsize, perm, shuffle=True) test_loader = get_test_loader(args.data, args.batchsize, perm) model = model.mnistModel(args.model, args.ninp, args.nhid, args.nlayers, args, quantize=args.quantize) model.to(device) criterion = nn.CrossEntropyLoss() criterion.to(device) params_fp = [] params_invariant = [] for name, param in model.named_parameters(): if param.requires_grad: if 'full_precision' in name:
dataset = ToxicDataset(csv_file="aggregate_tox.csv", root_dir=Config().data_dir) num_train = len(dataset) indices = list(range(num_train)) values = [] for fold in range(cv_folds): kwargs['fold'] = fold data_loader, indices = get_train_valid_loader( dataset, indices,config.data_dir, config.batch_size, config.random_seed, config.valid_size, config.shuffle, config.show_sample, config.cv,**kwargs ) trainer = Trainer(config,data_loader) valid_acc = trainer.train() values.append(valid_acc) cross_file = open("cross_val.txt", "a+") cross_file.write(str(valid_acc)) cross_file.write("\n") cross_file.close() cross_file = open("cross_val.txt", "a+") cross_file.write(str(np.mean(np.array(values)))) cross_file.write("\n") cross_file.close() else: save_config(config) trainer.train() # or load a pretrained model and test else: data_loader = get_test_loader(config.data_dir, config.batch_size, **kwargs) trainer.test()
import dataloader import SpinalNet if __name__ == '__main__': print("Batch doesn't work now.") _attack = "deepfool" print(f"attack: {_attack}") use_cuda = True print("CUDA Available: ", torch.cuda.is_available()) device = torch.device("cuda" if ( use_cuda and torch.cuda.is_available()) else "cpu") spinal = torch.load("./weight/pretrained_spinalnet.pt").to(device) print(spinal) spinal.eval() dl = dataloader.get_test_loader(path="/home/dhk1349/바탕화면/dataset", batch_size_test=1) epsilons = [0, .05, .1, .15, .2, .25, .3] accuracies = [] examples = [] if _attack == "fgsm": print("fgsm attack") for eps in epsilons: acc, ex = attack.attack_test(spinal, device, dl) accuracies.append(acc) examples.append(ex) plt.figure(figsize=(5, 5)) plt.plot(epsilons, accuracies, "*-") plt.yticks(np.arange(0.7, 1.1, step=0.1))
def main(): seed = 42 seed_everything(seed) num_epochs = 3 batch_size = 32 skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed) train_df = pd.read_csv('data/train.csv') train_df['text'] = train_df['text'].astype(str) train_df['selected_text'] = train_df['selected_text'].astype(str) for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df.sentiment), start=1): print(f'Fold: {fold}') model = TweetModel() optimizer = optim.AdamW(model.parameters(), lr=3e-5, betas=(0.9, 0.999)) criterion = loss_fn dataloaders_dict = get_train_val_loaders(train_df, train_idx, val_idx, batch_size) train_model(model, dataloaders_dict, criterion, optimizer, num_epochs, f'roberta_fold{fold}.pth') # inference test_df = pd.read_csv('data/test.csv') test_df['text'] = test_df['text'].astype(str) test_loader = get_test_loader(test_df) predictions = [] models = [] for fold in range(skf.n_splits): model = TweetModel() model.cuda() model.load_state_dict(torch.load(f'roberta_fold{fold+1}.pth')) model.eval() models.append(model) for data in test_loader: ids = data['ids'].cuda() masks = data['masks'].cuda() tweet = data['tweet'] offsets = data['offsets'].numpy() start_logits = [] end_logits = [] for model in models: with torch.no_grad(): output = model(ids, masks) start_logits.append( torch.softmax(output[0], dim=1).cpu().detach().numpy()) end_logits.append( torch.softmax(output[1], dim=1).cpu().detach().numpy()) start_logits = np.mean(start_logits, axis=0) end_logits = np.mean(end_logits, axis=0) for i in range(len(ids)): start_pred = np.argmax(start_logits[i]) end_pred = np.argmax(end_logits[i]) if start_pred > end_pred: pred = tweet[i] else: pred = get_selected_text(tweet[i], start_pred, end_pred, offsets[i]) predictions.append(pred) #submission sub_df = pd.read_csv('data/sample_submission.csv') sub_df['selected_text'] = predictions sub_df['selected_text'] = sub_df['selected_text'].apply( lambda x: x.replace('!!!!', '!') if len(x.split()) == 1 else x) sub_df['selected_text'] = sub_df['selected_text'].apply( lambda x: x.replace('..', '.') if len(x.split()) == 1 else x) sub_df['selected_text'] = sub_df['selected_text'].apply( lambda x: x.replace('...', '.') if len(x.split()) == 1 else x) sub_df.to_csv('submission.csv', index=False) sub_df.head()
def main(args): test_loaderr = get_test_loader(args.dataset, args.test_id, args.batch_size, args.num_workers, True) for i in range(1, 15): train_idd = i process(args, train_idd, test_loaderr)