def main(): train_engine = DataEngine(config, args.data_dir, args.img_dir, args.year, args.test_set, 'train') train_dataloader = DataLoader(train_engine, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True) val_engine = DataEngine(config, args.data_dir, args.img_dir, args.year, args.test_set, 'val') val_dataloader = DataLoader(val_engine, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True) model = Net(config=config, no_words=train_engine.tokenizer.no_words, no_answers=train_engine.tokenizer.no_answers, resnet_model=resnet_model, lstm_size=lstm_size, emb_size=emb_size, use_pretrained=args.use_pretrained).cuda() optimizer = optim.Adam(model.parameters(), lr=lr) train(train_dataloader, val_dataloader, model, optimizer)
def main(): device = (torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')) model = Net(1).to(device=device) data_path = "../Mnist/" mnist = instantiate_training_data(data_path) mnist_val = instantiate_val_data(data_path) train_loader = torch.utils.data.DataLoader(mnist, batch_size=64) val_loader = torch.utils.data.DataLoader(mnist_val, batch_size=64) optimizer = optim.SGD(model.parameters(), lr=1e-2) loss_fn = nn.CrossEntropyLoss() training_string = "Training" val_string = "Val" print(f"Training on device {device}.") training_loop( n_epochs = 100, optimizer = optimizer, model = model, loss_fn = loss_fn, train_loader = train_loader, device = device, ) evaluate_training(model, train_loader, training_string) evaluate_validation(model, val_loader, val_string)
def train(net: Net, data_path: str, batch_size: int, num_epochs: int, learning_rate: float): trans = transforms.Compose([ transforms.ToTensor(), ]) train_dataset = torchvision.datasets.ImageFolder(root=data_path, transform=trans) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) criterion = nn.CrossEntropyLoss() optimizer = adam.Adam(net.parameters(), lr=learning_rate) for epoch in range(num_epochs): # loop over the dataset multiple times running_loss = 0.0 show_loss = lambda _: '[{}, {:3f}]'.format(epoch + 1, running_loss) with click.progressbar(train_loader, item_show_func=show_loss) as bar: for inputs, labels in bar: if cuda.is_available(): inputs, labels = inputs.to('cuda'), labels.to('cuda') # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item()
def main(): if args.use_cuda: torch.cuda.set_device(args.gpu) dataloader = DataLoader(dict_path=args.dict_path, glove_path=args.glove_path, data_path=args.data_path, batch_size=args.batch_size, use_glove=args.use_glove) model = Net(no_words=dataloader.tokenizer.no_words, lstm_size=args.lstm_size, emb_size=args.emb_size, depth=args.depth) if args.use_cuda: model = model.cuda() if args.start_iter != 0: # load the model state from pre-specified iteration (saved model available) model.load_state_dict(torch.load( os.path.join(args.save_dir, 'iter_%d.pth' % (args.start_iter))), strict=False) tokenizer = Tokenizer(args.dict_path) optimizer = optim.Adam(model.parameters(), lr=args.lr) train(dataloader, model, optimizer, tokenizer)
def main(): dataloader = DataLoader(config, args.data_dir, args.img_dir, args.year, args.test_set, batch_size) model = Net(config=config, no_words=dataloader.tokenizer.no_words, no_answers=dataloader.tokenizer.no_answers, resnet_model=resnet_model, lstm_size=lstm_size, emb_size=emb_size, use_pretrained=False).cuda() optimizer = optim.Adam(model.parameters(), lr=lr) train(dataloader, model, optimizer)
def main(args): #### basic torch setup use_cuda = not args['no_cuda'] and torch.cuda.is_available() # use cuda device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} torch.manual_seed(args['seed']) # seed #### data pipeline data_dir = os.path.join(args['data_dir'], nni.get_trial_id()) train_loader = torch.utils.data.DataLoader(datasets.MNIST( data_dir, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args['batch_size'], shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( data_dir, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=1000, shuffle=True, **kwargs) #### define model hidden_size = args['hidden_size'] model = Net(hidden_size=hidden_size).to(device) optimizer = optim.SGD(model.parameters(), lr=args['lr'], momentum=args['momentum']) #### train for epoch in range(1, args['epochs'] + 1): train(args, model, device, train_loader, optimizer, epoch) test_acc = test(args, model, device, test_loader) if epoch < args['epochs']: # report intermediate result nni.report_intermediate_result(test_acc) logger.debug('test accuracy %g', test_acc) logger.debug('Pipe send intermediate result done.') else: # report final result nni.report_final_result(test_acc) logger.debug('Final result is %g', test_acc) logger.debug('Send final result done.')
def main(): """Main function """ # Load the parameters from json file args = args_parser() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # Create summary writer for use with tensorboard writer = SummaryWriter(os.path.join(args.model_dir, 'runs', 'train')) # use GPU if available params.cuda = torch.cuda.is_available() # Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) params.device = "cuda:0" else: params.device = "cpu" # Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders dataloaders = d_l.get_dataloader(['train', 'val'], args.data_dir, params) train_dl = dataloaders['train'] val_dl = dataloaders['val'] logging.info("- done.") # Define the model and optimizer model = Net(params) if params.cuda: model = model.to(params.device) writer.add_graph(model, next(iter(train_dl))[0]) optimizer = torch.optim.Adam(model.parameters(), lr=params.learning_rate) # fetch loss function and metrics criterion = loss_fn metrics = get_metrics() # Train the model logging.info("Starting training for %d epoch(s)", params.num_epochs) train_and_evaluate(model, train_dl, val_dl, optimizer, criterion, metrics, params, args.model_dir, writer, args.restore_file) writer.close()
def main(): test_path = Path.cwd() / 'data_in' / 'test.txt' vocab_path = Path.cwd() / 'data_in' / 'vocab.pkl' with open(vocab_path, mode='rb') as io: vocab = pickle.load(io) tokenizer = MeCab() padder = PadSequence(length=70, pad_val=vocab.token_to_idx['<pad>']) test_ds = Corpus(test_path, vocab, tokenizer, padder) test_dl = DataLoader(test_ds, batch_size=1024) model = Net(vocab_len=len(vocab)) loss_fn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) for epoch in range(1): model.train() index = 0 acc = 0 for label, sen1, sen2 in tqdm(test_dl, disable=True): optimizer.zero_grad() pre_label = model(sen1, sen2) loss = loss_fn(pre_label, label) loss.backward() optimizer.step() pred_cls = pre_label.data.max(1)[1] acc += pred_cls.eq(label.data).cpu().sum() print("epoch: {}, index: {}, loss: {}".format((epoch + 1), index, loss.item())) index += len(label) print('Accuracy : %d %%' % (100 * acc / index))
def run(load_last_checkpoint=False): save_dir = f'{OUTPUT_PATH}/models/' os.makedirs(save_dir, exist_ok=True) neural_net = Net() loss_fn = Loss() optim = torch.optim.SGD(neural_net.parameters(), DEFAULT_LR, momentum=0.9, weight_decay=1e-4) starting_epoch = 0 initial_loss = None if load_last_checkpoint: model_paths = glob(f'''{save_dir}*.ckpt''') model_names = [int(i.split('/')[-1][:-5]) for i in model_paths] latest_model_path = f'''{save_dir}{max(model_names)}.ckpt''' print('loading latest model from:', latest_model_path) checkpoint = torch.load(latest_model_path) neural_net.load_state_dict(checkpoint['model_state_dict']) optim.load_state_dict(checkpoint['optimizer_state_dict']) starting_epoch = checkpoint['epoch'] initial_loss = checkpoint['loss'] if torch.cuda.is_available(): neural_net = neural_net.cuda() loss_fn = loss_fn.cuda() print(f'''Training from epoch: {starting_epoch} towards: {TOTAL_EPOCHS}, with learning rate starting from: {get_lr(starting_epoch)}, and loss: {initial_loss}''') meta = pd.read_csv(f'{OUTPUT_PATH}/augmented_meta.csv', index_col=0).sample(frac=1).reset_index(drop=True) meta_group_by_series = meta.groupby(['seriesuid']).indices list_of_groups = [{i: list(meta_group_by_series[i])} for i in meta_group_by_series.keys()] random.Random(0).shuffle(list_of_groups) val_split = int(VAL_PCT * len(list_of_groups)) val_indices = list(itertools.chain(*[list(i.values())[0] for i in list_of_groups[:val_split]])) train_indices = list(itertools.chain(*[list(i.values())[0] for i in list_of_groups[val_split:]])) ltd = LunaDataSet(train_indices, meta) lvd = LunaDataSet(val_indices, meta) train_loader = DataLoader(ltd, batch_size=1, shuffle=False) val_loader = DataLoader(lvd, batch_size=1, shuffle=False) for ep in range(starting_epoch, TOTAL_EPOCHS): train(train_loader, neural_net, loss_fn, ep, optim, get_lr, save_dir=save_dir) validate(val_loader, neural_net, loss_fn)
def train(train_data, val_data, fold_idx=None): train_data = MyDataset(train_data, train_transform) train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True) val_data = MyDataset(val_data, val_transform) val_loader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False) model = Net(model_name).to(device) # criterion = nn.CrossEntropyLoss() criterion = FocalLoss(0.5) # optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) # scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1) optimizer = Ranger(model.parameters(), lr=1e-3, weight_decay=0.0005) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=4) if fold_idx is None: print('start') model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name)) else: print('start fold: {}'.format(fold_idx + 1)) model_save_path = os.path.join(config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx)) # if os.path.isfile(model_save_path): # print('加载之前的训练模型') # model.load_state_dict(torch.load(model_save_path)) best_val_score = 0 best_val_score_cnt = 0 last_improved_epoch = 0 adjust_lr_num = 0 for cur_epoch in range(config.epochs_num): start_time = int(time.time()) model.train() print('epoch:{}, step:{}'.format(cur_epoch + 1, len(train_loader))) cur_step = 0 for batch_x, batch_y in train_loader: batch_x, batch_y = batch_x.to(device), batch_y.to(device) optimizer.zero_grad() probs = model(batch_x) train_loss = criterion(probs, batch_y) train_loss.backward() optimizer.step() cur_step += 1 if cur_step % config.train_print_step == 0: train_acc = accuracy(probs, batch_y) msg = 'the current step: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%}' print(msg.format(cur_step, len(train_loader), train_loss.item(), train_acc[0].item())) val_loss, val_score = evaluate(model, val_loader, criterion) if val_score >= best_val_score: if val_score == best_val_score: best_val_score_cnt += 1 best_val_score = val_score torch.save(model.state_dict(), model_save_path) improved_str = '*' last_improved_epoch = cur_epoch else: improved_str = '' msg = 'the current epoch: {0}/{1}, val loss: {2:>5.2}, val acc: {3:>6.2%}, cost: {4}s {5}' end_time = int(time.time()) print(msg.format(cur_epoch + 1, config.epochs_num, val_loss, val_score, end_time - start_time, improved_str)) if cur_epoch - last_improved_epoch >= config.patience_epoch or best_val_score_cnt >= 3: if adjust_lr_num >= config.adjust_lr_num: print("No optimization for a long time, auto stopping...") break print("No optimization for a long time, adjust lr...") # scheduler.step() last_improved_epoch = cur_epoch # 加上,不然会连续更新的 adjust_lr_num += 1 best_val_score_cnt = 0 scheduler.step() del model gc.collect() if fold_idx is not None: model_score[fold_idx] = best_val_score
def train(train_data, val_data, fold_idx=None): train_data = MyDataset(train_data, train_transform) train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True) val_data = MyDataset(val_data, val_transform) val_loader = DataLoader(val_data, batch_size=config.batch_size, shuffle=False) model = Net(model_name).to(device) criterion = nn.CrossEntropyLoss() # criterion = FocalLoss(0.5) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) # optimizer = torch.optim.Adagrad(model.parameters(), lr=1e-3) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # config.model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name)) best_val_acc = 0 last_improved_epoch = 0 if fold_idx is None: print('start') model_save_path = os.path.join(config.model_path, '{}.bin'.format(model_name)) else: print('start fold: {}'.format(fold_idx + 1)) model_save_path = os.path.join(config.model_path, '{}_fold{}.bin'.format(model_name, fold_idx)) for cur_epoch in range(config.epochs_num): start_time = int(time.time()) model.train() print('epoch: ', cur_epoch + 1) cur_step = 0 for batch_x, batch_y in train_loader: batch_x, batch_y = batch_x.to(device), batch_y.to(device) optimizer.zero_grad() probs = model(batch_x) train_loss = criterion(probs, batch_y) train_loss.backward() optimizer.step() cur_step += 1 if cur_step % config.train_print_step == 0: train_acc = accuracy(probs, batch_y) msg = 'the current step: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%}' print(msg.format(cur_step, len(train_loader), train_loss.item(), train_acc[0].item())) val_loss, val_acc = evaluate(model, val_loader, criterion) if val_acc >= best_val_acc: best_val_acc = val_acc torch.save(model.state_dict(), model_save_path) improved_str = '*' last_improved_epoch = cur_epoch else: improved_str = '' # msg = 'the current epoch: {0}/{1}, train loss: {2:>5.2}, train acc: {3:>6.2%}, ' \ # 'val loss: {4:>5.2}, val acc: {5:>6.2%}, {6}' msg = 'the current epoch: {0}/{1}, val loss: {2:>5.2}, val acc: {3:>6.2%}, cost: {4}s {5}' end_time = int(time.time()) print(msg.format(cur_epoch + 1, config.epochs_num, val_loss, val_acc, end_time - start_time, improved_str)) scheduler.step() if cur_epoch - last_improved_epoch > config.patience_epoch: print("No optimization for a long time, auto-stopping...") break del model gc.collect()
k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v) } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) print('-- Loading weights finished.') # 2.多GPU并行 if torch.cuda.is_available(): model = torch.nn.DataParallel(model) cudnn.benchmark = True model = model.cuda() # 3.创建计算loss的类 criterion = MultiBoxLoss() # 4.创建优化器 optimizer = optim.Adam(model.parameters(), lr=args.lr) model.train() # 5.读取数据开始训练Epoch轮 for epoch in range(args.Epoch): # 5.1每轮使用不同学习率 if epoch % 10 == 0: adjust_learning_rate(optimizer, args.lr, 0.95, epoch) # 5.2创建数据加载器 train_data = MyDataSet(args.annotation_path, Config['input_size'], transform=my_transform, loader=default_loader) # 因为每张图像上的目标个数不确定,所以batch_size只能为1。DataLoader自动把np.array转换成tensor data_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=1, shuffle=False)
for i in torch.arange(x.shape[0]): include = np.delete(np.arange(x.shape[0]), i) # p = get_class_probs(out_z[i,:], c[include,:], l[include], out_w[include], params) p = get_class_probs(out_z[i, :], c[include, :], l[include], None, params) loss += loss_fn(p, l[i], params) print(", loss: {}".format(loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() if __name__ == "__main__": # Load the parameters from json file args = parser.parse_args() params = utils.Params("params.json") model = Net(params) optimizer = optim.Adam(model.parameters(), params.lr) x, t = simulate_data(params) data = {"x": x, "target": t} storage = {} for epoch in range(params.epochs): train(data, model, optimizer, storage, args, params, epoch + 1)
# Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) # Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders dataloaders = data_loader.fetch_data_loader(['train', 'val'], args.data_dir, params) train_dl = dataloaders['train'] val_dl = dataloaders['val'] logging.info("dataset loading - done.") # Define the model and optimizer model = Net().cuda() if params.cuda else Net() optimizer = optim.Adam(model.parameters(), lr=params.learning_rate) # fetch loss function and metrics loss_fn = loss_fn metrics = metrics # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(model, train_dl, val_dl, optimizer, loss_fn, metrics, params, args.model_dir, args.restore_file)