def train_and_predict(cfg_dict, preprocess_list): for p, preprocess_fun in preprocess_list: cfg = cfg_dict.copy() cfg['preprocess_fun'] = preprocess_fun cfg['CODER'] += '_%s' % p cfg['bagging_num'] = BAGGING_NUM print("training ", cfg['CODER']) train_model(**cfg)
def train_and_predict(cfg_dict, preprocess_list): for p, preprocess_fun in preprocess_list: cfg = cfg_dict.copy() cfg['preprocess_fun'] = preprocess_fun cfg['CODER'] += '_%s' %p cfg['bagging_num'] = BAGGING_NUM cfg['semi_train_path'] = "sub/base_average.csv" print("training ", cfg['CODER']) train_model(**cfg)
def train_and_predict(cfg_dict, preprocess_list): # 전처리 방식에 따라 각각 다른 모델을 학습한다 for p, preprocess_fun in preprocess_list: # 모델 학습의 설정값 (config)를 정의한다 cfg = cfg_dict.copy() cfg['preprocess_fun'] = preprocess_fun cfg['CODER'] += '_%s' %p cfg['bagging_num'] = BAGGING_NUM print("training ", cfg['CODER']) # 모델을 학습한다! train_model(**cfg)
def main(unused_argv): del unused_argv experiment_data = create_training_and_eval_data_for_experiment( **EXPERIMENT_DATA_CONFIG) print('### Training with cross entropy loss:') tpr_1, fpr_1, w_1, b_1, threshold = train_model( data=experiment_data, use_global_objectives=False, metric_func=true_positive_at_false_positive, at_target_rate=TARGET_FPR, obj_type='TPR', at_target_type='FPR', train_iteration=TRAIN_ITERATIONS, lr=LEARNING_RATE, num_checkpoints=NUM_CHECKPOINTS ) print('cross_entropy_loss tpr at requested fpr ' 'is {:.2f}@{:.2f}\n'.format(tpr_1, fpr_1)) criterion = TPRFPRLoss(target_fpr=TARGET_FPR, dual_factor=10.0, num_labels=1) print('\n\n### training tpr@fpr{}:'.format(TARGET_FPR)) tpr_2, fpr_2, w_2, b_2, _ = train_model( data=experiment_data, use_global_objectives=True, criterion=criterion, metric_func=true_positive_at_false_positive, at_target_rate=TARGET_FPR, obj_type='TPR', at_target_type='FPR', train_iteration=TRAIN_ITERATIONS, lr=LEARNING_RATE, num_checkpoints=NUM_CHECKPOINTS ) print('true_positives_at_false_positives_loss tpr ' 'at requested fpr is {:.2f}@{:.2f}'. format(tpr_2, fpr_2) ) plot_results( data=experiment_data, w_1=w_1, b_1=b_1, threshold=threshold, w_2=w_2, b_2=b_2, obj_type="TPR", at_target_type="FPR", at_target_rate=TARGET_FPR )
def main(unused_argv): del unused_argv experiment_data = create_training_and_eval_data_for_experiment( **EXPERIMENT_DATA_CONFIG) print('### Training with cross_entropy loss:') p_1, r_1, w_1, b_1, threshold = train_model( data=experiment_data, use_global_objectives=False, metric_func=precision_at_recall, at_target_rate=TARGET_RECALL, obj_type='P', at_target_type='R', train_iteration=TRAIN_ITERATIONS, lr=LEARNING_RATE, num_checkpoints=NUM_CHECKPOINTS) print('cross_entropy_loss precision at requested recall ' 'is {:.2f}@{:.2f}\n'.format(p_1, r_1)) criterion = PRLoss(target_recall=TARGET_RECALL, num_labels=1, dual_factor=1.0) print('\n\n### training precision@recall loss:') p_2, r_2, w_2, b_2, _ = train_model(data=experiment_data, use_global_objectives=True, criterion=criterion, metric_func=precision_at_recall, at_target_rate=TARGET_RECALL, obj_type='TPR', at_target_type='FPR', train_iteration=TRAIN_ITERATIONS, lr=LEARNING_RATE, num_checkpoints=NUM_CHECKPOINTS) print('precision_at_recall_loss precision at requested recall ' 'is {:.2f}@{:.2f}'.format(p_2, r_2)) plot_results(data=experiment_data, w_1=w_1, b_1=b_1, threshold=threshold, w_2=w_2, b_2=b_2, obj_type="P", at_target_type="R", at_target_rate=TARGET_RECALL)
def main(data_directory, exp_directory, epochs, batch_size): # Create the deeplabv3 resnet101 model which is pretrained on a subset # of COCO train2017, on the 20 categories that are present in the Pascal VOC dataset. model = createDeepLabv3() model.train() data_directory = Path(data_directory) # Create the experiment directory if not present exp_directory = Path(exp_directory) if not exp_directory.exists(): exp_directory.mkdir() # Specify the loss function criterion = torch.nn.MSELoss(reduction='mean') # Specify the optimizer with a lower learning rate optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) # Specify the evaluation metrics metrics = {'f1_score': f1_score, 'auroc': roc_auc_score} # Create the dataloader dataloaders = datahandler.get_dataloader_single_folder( data_directory, batch_size=batch_size) _ = train_model(model, criterion, dataloaders, optimizer, bpath=exp_directory, metrics=metrics, num_epochs=epochs) # Save the trained model torch.save(model, exp_directory / 'weights.pt')
def main(): train_config = EasyDict(dict( # device device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'), # training configuration num_epochs = 5, num_classes = 10, batch_size = 100, learning_rate = 1e-3, )) datasets = get_datasets() model = ConvNet(num_classes=train_config.num_classes).to(train_config.device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=train_config.learning_rate) train_model(train_config, model, optimizer, criterion, datasets) torch.save({'model': model.state_dict()}, 'runs/convnet_exp_1.ckpt')
def main(): """run the pipeline """ test, train = data_loader(data_path) X_train, tfidf = preprocess(train, True) X_test, _ = preprocess(test, tfidf) y_train = train.author model = train_model(X_train, y_train) sub = pd.Series(model.predict(X_test)) sub.to_csv(os.path.join(output_path, "simple_submission.csv"), index=False)
def main(): args = parse_args_finetuning_pruning() print('------ Parameters for finetuning ------') for parameter, value in args.__dict__.items(): print(f'{parameter}: {value}') print('---------------------------------------') if args.model_path is None: if args.verbose: print(f"No model was given, training {args.model} on {args.dataset} with {args.n_epochs} epochs.") model = train_model(args) else: model = torch.load(args.model_path) try: os.mkdir("temp") except FileExistsError: pass torch.save(model, "temp/model_finetuning_parameters.pt") if not args.download and args.data_dir == '../data': raise("ERROR: please provide the data directory from which to take the data.") kwargs = {'num_workers': 1, 'pin_memory': True} if (torch.cuda.is_available() and args.use_cuda) else {} device = torch.device("cuda:0" if (torch.cuda.is_available() and args.use_cuda) else "cpu") loader_class = get_loader(args.dataset) loader_object = loader_class(args.data_dir, args.batch_size, args.test_batch_size, args.custom_transforms, args.crop_size) loader_train = loader_object.get_loader(train=True, download=args.download, kwargs=kwargs) loader_eval = loader_object.get_loader(train=False, download=args.download, kwargs=kwargs) baseline_accuracy = eval(model, loader_eval, device, args.verbose) accuracy_list = [baseline_accuracy] n_epochs_retrain = args.n_epochs_retrain for n_pruning_epochs in range(1, n_epochs_retrain + 1): model_ = torch.load("temp/model_finetuning_parameters.pt") accuracy_list.append(gradual_linear_pruning(model_, args.final_sparsity, loader_train, loader_eval, n_epochs_retrain, n_pruning_epochs, 1, device, args.optimizer, args.loss, args.lr, args.verbose, baseline_accuracy, args.save_to, False, args.pruning_method)) if args.show_plot: plt.plot(np.arange(n_epochs_retrain + 1), accuracy_list, label='Accuracy') plt.xlabel('Pruning rate') plt.ylabel('Accuracy') plt.legend(loc="lower left") plt.show()
def run_train(datasets=[ 'E_1', 'E_7', 'E_21', 'F_1', 'F_1a', 'F_2', 'F_2k', 'F_7', 'F_21' ], models=['nn', 'hu', 'lee', 'li', 'hamida', 'mou'], runs=10): """Run a sequence of training for DeepHyperBlood experiments. Parameters: datasets (list of strings): images to run experiments on. models (list of strings): models to be evaluated. runs (int): number of runs. Returns: (Nothing, the trained models are saved as a file) """ for dataset in datasets: for model in models: options = get_default_run_options(model, dataset, runs, sampling_mode='fixed') img, gt = load_and_update(options) train_model(img, gt, options)
def main(): ''' main training code ''' # training setting train_config = EasyDict( dict( # device device=torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu'), # RNN parameters sequence_length=28, input_size=28, hidden_size=128, num_layers=2, # training configuration num_epochs=10, num_classes=10, batch_size=100, num_workers=6, learning_rate=1e-3, power=0.9, model_path='runs/rnn_exp_1', )) # additional objects (model, datasets, criterion, optimizer, scheduler) for training train_config.additional_preprocess = additional_preprocess(train_config) train_config.datasets = get_datasets() train_config.model = get_model(train_config) train_config.criterion = get_loss() max_iter = get_max_iter(train_config, len(train_config.datasets.train)) train_config.optimizer, train_config.scheduler\ = get_quick_optimizer(train_config.model, max_iter, base_lr=train_config.learning_rate, power=train_config.power) train_model(train_config)
def start_train(): # extractor, classifier = extract_vgg16(models.vgg16(pretrained=True)) # head = VGG16RoIHead(classifier, 3, (7, 7)) extractor, classifier = extract_resnet101( models.resnet101(pretrained=True)) head = ResNet101RoIHead(classifier, 3) model = FastRCNN(extractor, head) dataloader = build_dataloader(table_only=False) testloader = build_testloader(table_only=False) device = torch.device("cuda:0") # device = torch.device("cpu") # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # next time adjust the target creator threshold model = train_model(model, device, dataloader, testloader) torch.save(model.state_dict(), "resnet101_params_0.5_0.0_target.pkl")
def evaluate_model_finetuned(model: keras.Model, train_data, test_data, epochs, repetitions=100, batch_size=32): X_train, y_train = train_data X_test, y_test = test_data original_weights = model.get_weights() model_MSE_all = np.zeros((1, repetitions)) for N in epochs: # Storage arrays for i iterations of each epoch number N model_MSE_epoch = np.array([]) for i in range(repetitions): # Reset model weights every repetition model.set_weights(original_weights) history = trainer.train_model( model, x_train=X_train, y_train=y_train, optimizer=keras.optimizers.Adam(learning_rate=0.001), validation_split=None, epochs=N, batch_size=batch_size, summary=False, verbose=0) model_MSE = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0) model_MSE_epoch = np.append(model_MSE_epoch, model_MSE) model_MSE_all = np.append(model_MSE_all, [model_MSE_epoch], axis=0) model.set_weights(original_weights) # Reset model model_MSE_all = np.delete(model_MSE_all, 0, axis=0) return model_MSE_all
def create_model(): """ ----------------------------------------------------------------------------------------------------- Model architecture and weight training ----------------------------------------------------------------------------------------------------- """ global nn, test_set, test_str ar.TRAIN = args['TRAIN'] nn = ar.create_model() if args['LOAD'] is not None: if not ar.load_model(nn, args['LOAD']): ms.print_err("Failed to load weights from {}".format(args['LOAD'])) if args['TRAIN']: lg = os.path.join(dir_current, log_time) hs = tr.train_model(nn, lg) tr.plot_history(hs, os.path.join(dir_current, 'loss')) if SAVE: ar.save_model(nn)
def train_model(model, epochs, ecg_noisy, ecg_clean, train_pct=0.8): # Train a new model # move model to be run by gpu train_model = model().cuda() train_model.double() # start training the model losses = tr.train_model(model=train_model, epochs=epochs, ecg_noisy=ecg_noisy, ecg_clean=ecg_clean, train_pct=train_pct) save_file_name = 'model_' + str(get_local_time()) + '.pt' # saved model will have model_YYYY-MM-DD_hhmm.pt format torch.save(train_model.state_dict(), save_file_name) print(f'Saved {save_file_name}') return train_model
def main(checkpoint_folder='checkpoints/'): model_conv, iter = load_available_model(checkpoint_folder) if not model_conv: logging.info( "No saved model found; starting train with a pretrained resnet.") model_conv = models.resnet152(pretrained=True) for param in model_conv.parameters(): param.requires_grad = False num_ftrs = model_conv.fc.in_features model_conv.fc = nn.Linear(num_ftrs, 2) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_conv = model_conv.to(device) criterion = nn.CrossEntropyLoss() optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9) exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1) train_loader, val_loader, _ = get_data_loaders(input_dir="images", batch_size=10, num_workers=6, val_size=200, test_size=200) logging.info("starting train") model_conv = train_model(model_conv, train_loader, val_loader, criterion, optimizer_conv, exp_lr_scheduler, checkpoint_folder, device, num_epochs=25)
# Specify the loss function #criterion = torch.nn.MSELoss(reduction='mean') # Dice/F1 score - https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient criterion = smp.utils.losses.DiceLoss() # Specify the optimizer with a lower learning rate optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) # Specify the evalutation metrics # IoU/Jaccard score - https://en.wikipedia.org/wiki/Jaccard_index metrics = {'f1_score': f1_score, 'auroc': roc_auc_score} #metrics = [ # smp.utils.metrics.IoU(threshold=0.5), #] # Create the dataloader dataloaders = datahandler.get_dataloader_single_folder(data_dir, batch_size=batchsize) trained_model = train_model(model, criterion, dataloaders, optimizer, bpath=bpath, metrics=metrics, num_epochs=epochs) # Save the trained model torch.save({'model_state_dict': trained_model.state_dict()}, os.path.join(bpath, 'state_dict.pt')) torch.save(model, os.path.join(bpath, 'weights.pt'))
def main(): ''' Run training and model saving..see args for options ''' parser = argparse.ArgumentParser() parser.add_argument('--bsize', help='mini batch size, lower if have memory issues', type=int, default=32) parser.add_argument('--learning_rate', help='learning rate', type=float, default=0.001) parser.add_argument( '--lrs', help= 'learning rate step decay, ie how many epochs to weight before decaying rate', type=int, default=4) parser.add_argument( '--lrsg', help='learning rate step decay factor,gamma decay rate', type=float, default=0.1) parser.add_argument('--L2', help='L2 weight decay', type=float, default=0.01) parser.add_argument('--num_epochs', help='number of epochs', type=int, default=12) parser.add_argument( '--random_seed', help= 'use random seed, use 0 for false, 1 for generate, and more than 2 to seed', type=int, default=1) parser.add_argument('--model_type', help='retrain or finetune', type=str, default='retrain') parser.add_argument('--train_dir', help='train directory in data root', type=str, default='train5') parser.add_argument('--model_dir', help='model directory', type=str, default='../data/models/') parser.add_argument('--val_dir', help='validation directory in data root', type=str, default='val5') parser.add_argument('--data_dir', help='data directory', type=str, default='../data') parser.add_argument('--print_class_results', dest='print_class_results', action='store_true') parser.add_argument('--no_print_class_results', dest='print_class_results', action='store_false') parser.add_argument('--print_batches', dest='print_batches', action='store_true') parser.add_argument('--no_print_batches', dest='print_batches', action='store_false') parser.set_defaults(print_class_results=True) parser.set_defaults(print_batches=True) # parse the args args = parser.parse_args() print('Settings for training:', 'batch size:', args.bsize, 'epochs:', args.num_epochs, 'learning rate:', args.learning_rate, 'lr decay', args.lrs, 'gamma', args.lrsg) if args.random_seed == 1: random_seed = random.randint(1, 1000) print('Random seed:', random_seed) # CPU seed torch.manual_seed(random_seed) # GPU seed torch.cuda.manual_seed_all(random_seed) else: random_seed = args.random_seed use_gpu = torch.cuda.is_available() data_transforms = { 'train': transforms.Compose([ transforms.Scale(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]), 'val': transforms.Compose([ transforms.Scale(224), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]), } image_datasets = { 'train': datasets.ImageFolder(os.path.join(args.data_dir, args.train_dir), data_transforms['train']), 'val': datasets.ImageFolder(os.path.join(args.data_dir, args.val_dir), data_transforms['val']), 'test': datasets.ImageFolder(os.path.join(args.data_dir, 'test'), data_transforms['val']), } if use_gpu: dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=args.bsize, shuffle=True, num_workers=8, pin_memory=True) for x in ['train', 'val', 'test'] } else: dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=args.bsize, shuffle=True, num_workers=8) for x in ['train', 'val', 'test'] } dataset_sizes = { x: len(image_datasets[x]) for x in ['train', 'val', 'test'] } batch_frequency = 100 # assume batch sizes are the same print_sizes = { x: len(image_datasets[x]) // (args.bsize * batch_frequency) for x in ['train', 'val', 'test'] } class_names = image_datasets['train'].classes nb_classes = len(class_names) print('Data set sizes:', dataset_sizes) print('Class names:', class_names) print('Total classes:', nb_classes) if args.model_type == 'retrain': model_conv = RtResnet18ly2(nb_classes) model_name = 'rt_resnet18ly2' print('Model name:', model_name) # optimize all parameters when we retrain optimizer_conv = optim.Adam(model_conv.parameters(), lr=args.learning_rate, weight_decay=args.L2) elif args.model_type == 'finetune': model_conv = FtResnet18(nb_classes) model_name = 'ft_resnet18' print('Model name:', model_name) # optimize only the last layers when we fine tune optimizer_conv = optim.Adam( list(model_conv.preclassifier.parameters()) + list(model_conv.classifier.parameters()), lr=args.learning_rate) else: sys.exit('Error check model type') if use_gpu: model_conv = model_conv.cuda() criterion = nn.CrossEntropyLoss().cuda() else: criterion = nn.CrossEntropyLoss() # Decay LR by a factor of lrsg (eg 0.1) every lrs epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=args.lrs, gamma=args.lrsg) model_conv, val_acc = train_model( model_conv, criterion, optimizer_conv, exp_lr_scheduler, class_names, args.bsize, args.model_dir, model_name, print_sizes, data_transforms, image_datasets, dataloaders, dataset_sizes, use_gpu, args.num_epochs, args.print_class_results, args.print_batches) # evaluate test set test_model(model_conv, criterion, class_names, args.bsize, args.model_dir, model_name, print_sizes, dataloaders, dataset_sizes, use_gpu, True) # write out best model to disk val_acc = round(100 * val_acc, 1) torch.save( model_conv.state_dict(), args.model_dir + model_name + '_va_' + str(val_acc) + '_model_wts.pth') return
2: hyper_params[2]['name'], 3: hyper_params[3]['name'], 4: hyper_params[4]['name']}) print(df) print() # Loop through features and train with given hyperparameters for params in hyper_params: column_name = params['name'] if column_name == 'Cancer' or column_name == 'Heart Disease' or column_name == 'Alsheimers' or column_name == 'Total': continue weight, bias, error, epoch_data = trainer.train_model( feature=df[column_name], label=df[label_name], learning_rate=params['learning_rate'], number_epochs=params['epochs'], batch_size=params['batch_size']) print(f'bias={bias}, weight={weight}') print() plotter.plot_model(title='Causes of Death', feature_title=column_name, label_title=label_name, weight=weight, bias=bias, feature_data=df[column_name], label_data=df[label_name]) plotter.plot_loss(epoch_data=epoch_data, root_mean_squared_error=error)
optimizer_ft = optim.SGD([{ 'params': model_ft.parameters(), 'weight_decay': 0.000001 }, { 'params': margin.parameters(), 'weight_decay': 0.000001 }], lr=0.001, momentum=0.9, nesterov=True) # Decay LR by a factor of 0.1 every 24 epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=24, gamma=0.1) model_ft, margin = train_model(dataloaders, dataset_sizes, model_ft, class_names, criterion, args.save_train_results_dir, margin, optimizer_ft, exp_lr_scheduler, num_epochs=args.num_epoch) torch.save(model_ft.state_dict(), save_model + '/' + 'finetune_model.pkl') torch.save(margin.state_dict(), save_model + '/' + 'finetune_model_margin.pkl') #训练完可以看一些验证结果的可视化预测情况 visualize_model(model_ft, dataloaders, class_names, margin)
def train_REPTILE_simple(model: keras.Model, dataset, training_keys, epochs=1, lr_inner=0.01, lr_meta=0.01, batch_size=32, validation_split=0.2, logging=1, stopping_threshold=None, stopping_number=None, lr_scheduler=None, show_plot=True): print("Beginning REPTILE training.") stop_counter = 0 model_copy = keras.models.clone_model(model) meta_optimizer = keras.optimizers.Adam(learning_rate=lr_meta) # Runs faster with optimizer initialised here X_, y_ = dataset epoch_train_losses = [] epoch_val_losses = [] for epoch in range(epochs): epoch_start = time.time() epoch_train_loss = [] epoch_val_loss = [] if lr_scheduler: lr_inner, lr_meta = lr_scheduler(epoch + 1) for i, key in enumerate(training_keys): # Inner loop for task i, SGD/Adam on the learner model _x, _y = X_[key], y_[key] model_copy.set_weights(model.get_weights()) # model_copy = mlu.copy_model(model, _x) history = trainer.train_model(model_copy, x_train=_x, y_train=_y, optimizer=keras.optimizers.Adam(learning_rate=lr_inner), loss='mse', metrics=None, validation_split=validation_split, epochs=1, batch_size=batch_size, summary=False, verbose=0) # Log losses of each task task_train_loss = history.history['loss'][0] epoch_train_loss.append(task_train_loss) try: task_val_loss = history.history['val_loss'][0] epoch_val_loss.append(task_val_loss) except: pass # Meta-update step per task phi <- phi + lr_meta*(phi~ - phi) updated_weights = [] phi_tilde = model_copy.get_weights() phi = model.get_weights() directions = [] for j in range(len(phi)): direction = phi[j] - phi_tilde[j] delta = lr_meta * (phi[j] - phi_tilde[j]) new_weight = phi[j] + delta updated_weights.append(new_weight) directions.append(direction) # model.set_weights(updated_weights) # return directions meta_optimizer.apply_gradients(zip(directions, model.trainable_variables)) # del model_copy # Cleanup to save memory? # Logging overall epoch losses _train_loss = np.mean(epoch_train_loss) epoch_train_losses.append(_train_loss) try: _val_loss = np.mean(epoch_val_loss) epoch_val_losses.append(_val_loss) except: pass # Logging every logging steps if logging: if (epoch + 1) % logging == 0: print(f"Epoch {epoch + 1} / {epochs} completed in {time.time() - epoch_start:.2f}s") try: print(f"Epoch train loss: {_train_loss}, val loss: {_val_loss}") except: print(f"Epoch train loss: {_train_loss}") if stopping_threshold is not None and len(epoch_train_losses) >= 2: if abs(epoch_train_losses[-1] - epoch_train_losses[-2]) < stopping_threshold: stop_counter += 1 else: stop_counter = 0 # Reset stop counter if stop_counter >= stopping_number: print(f"No significant change in training loss for {stopping_number} epochs.") break # Exit training early if show_plot: plt.plot(epoch_train_losses) try: plt.plot(epoch_val_losses) except: pass plt.show() try: output = {'loss': epoch_train_losses, 'val_loss': epoch_val_losses} except: output = {'loss': epoch_train_losses} return output
import os import sys from network import R2Plus1DTSNClassifier from dataset_activity import VideoDatasetTSN as VideoDataset from torch.utils.data import DataLoader from trainer import train_model train_list = '../activitynet1.3/training.txt' val_list = '../activitynet1.3/validation.txt' save_path = 'r2p1d_tsn_model_activity.pth' # build model num_classes = 200 model = R2Plus1DTSNClassifier(num_classes=num_classes) # build dataset train_dataloader = DataLoader(VideoDataset(train_list), batch_size=8, shuffle=True, num_workers=4) val_dataloader = DataLoader(VideoDataset(val_list, mode='val'), batch_size=2, num_workers=2) # train model train_model(model, train_dataloader, val_dataloader, num_epochs=70, path=save_path)
# 双卡开启 if torch.cuda.device_count() > 1 and DOUBLE_CUDA: print( "Let's use", torch.cuda.device_count(), "GPUs!" ) model = torch.nn.DataParallel( model ) DEVICE = 0 # 加载到GPU if torch.cuda.is_available(): model.cuda( DEVICE ) # 损失函数 criterion = torch.nn.CrossEntropyLoss().cuda( DEVICE ) # 日志CSV头设置 logs_toCSV( header=True ) # 训练 optimizer = torch.optim.ASGD( model.parameters(), lr=1e-4, lambd=1e-4, alpha=0.75, t0=1e6, weight_decay=1e-4 ) scheduler = GradualWarmupScheduler( optimizer, multiplier=1000, total_epoch=10) train_model( model, model_name, dataloaders, criterion, optimizer, DEVICE, scheduler=scheduler, test_size=test_size, num_epochs=[0, 20] ) # 加载最优模型 model = load_parameter( model, model_name, type='acc_model' ) optimizer = torch.optim.ASGD( model.parameters(), lr=1e-2, lambd=1e-4, alpha=0.75, t0=1e6, weight_decay=1e-4 ) scheduler = ReduceLROnPlateau( optimizer, mode='min', factor=0.8, patience=3, verbose=True, threshold=1e-4, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-86 ) train_model( model, model_name, dataloaders, criterion, optimizer, DEVICE, scheduler, test_size=test_size, num_epochs=[20, 200] ) # python -u p_main.py 2>&1 | tee logs/csv/20190323_train.log
def train_model(): # Init device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Initialize a new wandb run wandb.init() # Config is a variable that holds and saves hyperparameters and inputs config = wandb.config # Load the meta data file df = pd.read_csv('data/train.csv', ) df = df.drop(['timestamp'], axis=1) df, _ = utility.encode_labels(df) num_classes = len(df['label'].value_counts()) # Build the dataset train_loader, valid_loader = dl.get_train_valid_loader( df, data_dir='data/train_images', batch_size=config.batch_size, image_size=IMAGE_SIZE, augment=True, random_seed=0) # Make resnet model = utility.initialize_net(num_classes, config.resnet_type, config.use_feature_extract) model = model.to(device) # Gather the parameters to be optimized/updated in this run. params_to_update = utility.get_model_params_to_train( model, config.use_feature_extract) # Define criterion + optimizer criterion = nn.CrossEntropyLoss() if config.optimizer == 'sgd': optimizer = optim.SGD(params_to_update, lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(params_to_update, lr=config.learning_rate) elif config.optimizer == 'adam': optimizer = optim.Adam(params_to_update, lr=config.learning_rate) # Define scheduler scheduler = optim.lr_scheduler.OneCycleLR( optimizer=optimizer, max_lr=10, epochs=config.epochs, anneal_strategy=config.scheduler, steps_per_epoch=len(train_loader)) trainer.train_model(device=device, model=model, optimizer=optimizer, criterion=criterion, train_loader=train_loader, valid_loader=valid_loader, scheduler=scheduler, epochs=config.epochs, send_to_wandb=True)
resize_height = 256 crop_size = 224 clip_len = 8 # build model num_classes = 101 model = TSNClassifier(num_classes=num_classes, clip_len=clip_len, base_model='resnet101', pretrained=False) # build dataset train_dataloader = DataLoader(VideoDataset(data_path, im_root, resize_width=resize_width, resize_height=resize_height, crop_size=crop_size, clip_len=clip_len), batch_size=16, shuffle=True, num_workers=4) val_dataloader = DataLoader(VideoDataset(data_path, im_root, resize_width=resize_width, resize_height=resize_height, crop_size=crop_size, clip_len=clip_len, mode='val'), batch_size=2, num_workers=2) # train model train_model(model, train_dataloader, val_dataloader, path=save_path)
DATA_ROOT_DIR = os.path.join("./", "Data/") TRAINING_DIR = os.path.join(DATA_ROOT_DIR, "Training/") TESTING_DIRS = [ os.path.join(DATA_ROOT_DIR, "Testing/", f"Testing{i}") for i in range(3) ] MODEL_ROOT_DIR = os.path.join("./", "Model/") NEGATIVE_DIR = os.path.join(DATA_ROOT_DIR, "Negatives/") GLUED_IMAGE_PATH = os.path.join(DATA_ROOT_DIR, "glued_surr.jpg") training, training_labels = load_glued_image(GLUED_IMAGE_PATH, IMAGE_SIZE) triplet_model = torch.load(os.path.join(MODEL_ROOT_DIR, TRIPLET_MODEL_NAME)) classf_model = ClassificationNet(triplet_model.embedding_net, 6).cuda() criterion = nn.BCELoss() optimizer = optim.SGD(triplet_model.embedding_net.parameters(), lr=LR, momentum=0.9) # Train the model classf_model = train_model(classf_model, criterion, optimizer, training, training_labels, n_epoch=N_EPOCH) # Save the trained model torch.save(classf_model, os.path.join(MODEL_ROOT_DIR, CLASSF_MODEL_NAME))
def main(): ''' Run training and model saving..see args for options ''' parser = argparse.ArgumentParser() parser.add_argument('--bsize', help='mini batch size, lower if have memory issues', type=int, default=32) parser.add_argument('--learning_rate', help='learning rate', type=float, default=0.001) parser.add_argument( '--lrs', help= 'learning rate step decay, ie how many epochs to weight before decaying rate', type=int, default=4) parser.add_argument( '--lrsg', help='learning rate step decay factor,gamma decay rate', type=float, default=0.1) parser.add_argument('--L2', help='L2 weight decay', type=float, default=0.01) parser.add_argument('--num_epochs', help='number of epochs', type=int, default=12) parser.add_argument('--num_folds', help='number of CV folds', type=int, default=5) parser.add_argument('--label_file', help='csv file for labels', type=str, default='corrected_labels_training.csv') parser.add_argument( '--random_seed', help= 'use random seed, use 0 for false, 1 for generate, and more than 2 to seed', type=int, default=1) parser.add_argument('--model_type', help='retrain or finetune', type=str, default='retrain') parser.add_argument('--model_dir', help='model directory', type=str, default='../data/models/') parser.add_argument('--data_dir', help='data directory', type=str, default='../data') parser.add_argument('--print_class_results', dest='print_class_results', action='store_true') parser.add_argument('--no_print_class_results', dest='print_class_results', action='store_false') parser.add_argument('--print_batches', dest='print_batches', action='store_true') parser.add_argument('--no_print_batches', dest='print_batches', action='store_false') parser.set_defaults(print_class_results=True) parser.set_defaults(print_batches=True) # parse the args args = parser.parse_args() print('Settings for CV training:', 'batch size:', args.bsize, 'epochs:', args.num_epochs, 'learning rate:', args.learning_rate, 'lr decay', args.lrs, 'gamma', args.lrsg) if args.random_seed == 1: random_seed = random.randint(1, 1000) print('Random seed:', random_seed) # CPU seed torch.manual_seed(random_seed) # GPU seed torch.cuda.manual_seed_all(random_seed) else: random_seed = args.random_seed use_gpu = torch.cuda.is_available() data_transforms = { 'train': transforms.Compose([ transforms.Scale(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]), 'val': transforms.Compose([ transforms.Scale(224), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]), } # to perform CV first need a list of files df = pd.read_csv(args.data_dir + '/' + args.label_file, header=None, names=['label', 'filename']) # to categorical df['class'] = df['label'].apply(lambda x: 0 if x == 'other' else 1) # fix filenames df['filename'] = df['filename'].apply(lambda x: x.strip()) print(df.head()) print(df.tail()) Xmap = dict(zip(list(range(len(df))), df['filename'].values)) ymap = dict(zip(list(range(len(df))), df['label'].values)) X = np.array(list(range(len(df)))) # splits are based on y, so need to represent correct catagorical class y = np.array(df['class'].values) class_names = ['retail', 'other'] skf = StratifiedKFold(n_splits=args.num_folds) for train_index, test_index in skf.split(X, y): print("%s %s" % (train_index, test_index)) X_train_files, X_test_files = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] base = os.path.split(os.getcwd())[0] train_dir = args.data_dir + '/train_CV' # if CV exists remove dirs if os.path.isdir(train_dir): shutil.rmtree(train_dir) os.makedirs(train_dir) for c in class_names: os.makedirs(train_dir + '/' + c) for i in X_train_files: os.symlink(os.path.join(base, 'data', ymap[i], Xmap[i]), os.path.join(base, 'data/train_CV', ymap[i], Xmap[i])) val_dir = args.data_dir + '/val_CV' if os.path.isdir(val_dir): shutil.rmtree(val_dir) os.makedirs(val_dir) for c in class_names: os.makedirs(val_dir + '/' + c) for i in X_test_files: os.symlink(os.path.join(base, 'data', ymap[i], Xmap[i]), os.path.join(base, 'data/val_CV', ymap[i], Xmap[i])) image_datasets = { 'train': datasets.ImageFolder(os.path.join(args.data_dir, 'train_CV'), data_transforms['train']), 'val': datasets.ImageFolder(os.path.join(args.data_dir, 'val_CV'), data_transforms['val']), 'test': datasets.ImageFolder(os.path.join(args.data_dir, 'test'), data_transforms['val']) } if use_gpu: dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=args.bsize, shuffle=True, num_workers=8, pin_memory=True) for x in ['train', 'val', 'test'] } else: dataloaders = { x: torch.utils.data.DataLoader(image_datasets[x], batch_size=args.bsize, shuffle=True, num_workers=8) for x in ['train', 'val', 'test'] } dataset_sizes = { x: len(image_datasets[x]) for x in ['train', 'val', 'test'] } batch_frequency = 100 # assume batch sizes are the same print_sizes = { x: len(image_datasets[x]) // (args.bsize * batch_frequency) for x in ['train', 'val', 'test'] } class_names = image_datasets['train'].classes nb_classes = len(class_names) print('Data set sizes:', dataset_sizes) print('Class names:', class_names) print('Total classes:', nb_classes) if args.model_type == 'retrain': model_conv = RtResnet18ly2(nb_classes) model_name = 'rt_resnet18ly2' print('Model name:', model_name) # optimize all parameters when we retrain optimizer_conv = optim.Adam(model_conv.parameters(), lr=args.learning_rate, weight_decay=args.L2) elif args.model_type == 'finetune': model_conv = FtResnet18(nb_classes) model_name = 'ft_resnet18' print('Model name:', model_name) # optimize only the last layers when we fine tune optimizer_conv = optim.Adam( list(model_conv.preclassifier.parameters()) + list(model_conv.classifier.parameters()), lr=args.learning_rate) else: sys.exit('Error check model type') if use_gpu: model_conv = model_conv.cuda() criterion = nn.CrossEntropyLoss().cuda() else: criterion = nn.CrossEntropyLoss() # Decay LR by a factor of lrsg (eg 0.1) every lrs epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=args.lrs, gamma=args.lrsg) model_conv, val_acc = train_model( model_conv, criterion, optimizer_conv, exp_lr_scheduler, class_names, args.bsize, args.model_dir, model_name, print_sizes, data_transforms, image_datasets, dataloaders, dataset_sizes, use_gpu, args.num_epochs, args.print_class_results, args.print_batches) # evaluate test set test_model(model_conv, criterion, class_names, args.bsize, args.model_dir, model_name, print_sizes, dataloaders, dataset_sizes, use_gpu, True) # write out best model to disk val_acc = round(100 * val_acc, 1) torch.save( model_conv.state_dict(), args.model_dir + model_name + '_va_' + str(val_acc) + '_model_wts.pth') return
def main(): t = Timer() seed_everything(cfg.common.seed) logger_path.mkdir(exist_ok=True) logging.basicConfig(filename=logger_path / 'train.log', level=logging.DEBUG) dh.save(logger_path / 'config.yml', cfg) with t.timer('load data'): train_x = dh.load('../data/input/train_concated.csv') train_org_x = dh.load('../data/input/train.csv') train_2019_x = dh.load('../data/input/train_2019.csv') test_x = dh.load('../data/input/test.csv') with t.timer('make folds'): fold_org_df = factory.get_fold(cfg.validation.val1, train_org_x, train_org_x[[cfg.common.target]]) fold2019_df = factory.get_fold(cfg.validation.val2, train_2019_x, train_2019_x[[cfg.common.target]]) fold_df = pd.concat([fold_org_df, fold2019_df], axis=0, sort=False, ignore_index=True) if cfg.validation.val1.single: fold_df = fold_df[['fold_0']] fold_df /= fold_df['fold_0'].max() with t.timer('load features'): features = dh.load('../configs/feature/all.yml')['features'] for f in features: train_x[f] = dh.load(f'../features/{f}_train.feather')[f].fillna(-1) test_x[f] = dh.load(f'../features/{f}_test.feather')[f].fillna(-1) with t.timer('drop several rows'): if cfg.common.drop is not None: drop_idx = factory.get_drop_idx(cfg.common.drop) train_x = train_x.drop(drop_idx, axis=0).reset_index(drop=True) fold_df = fold_df.drop(drop_idx, axis=0).reset_index(drop=True) with t.timer('train model'): result = train_model(run_name, train_x, fold_df, cfg) logging.disable(logging.FATAL) run_name_cv = f'{run_name}_{result["cv"]:.3f}' logger_path.rename(f'../logs/{run_name_cv}') with t.timer('predict'): preds = predict_test(run_name_cv, test_x, fold_df, cfg) with t.timer('post process'): duplicates = { 'ISIC_5224960': 1, 'ISIC_9207777': 1, 'ISIC_6457527': 1, 'ISIC_8347588': 0, 'ISIC_8372206': 1, 'ISIC_9353360': 1, 'ISIC_3689290': 0, 'ISIC_3584949': 0, } for image_name, target in duplicates.items(): idx = test_x[test_x['image_name'] == image_name].index[0] preds[idx] = target with t.timer('make submission'): sample_path = f'../data/input/sample_submission.csv' output_path = f'../data/output/{run_name_cv}.csv' make_submission(y_pred=preds, target_name=cfg.common.target, sample_path=sample_path, output_path=output_path, comp=False) with t.timer('kaggle api'): kaggle = Kaggle(cfg.compe.compe_name, run_name_cv) if cfg.common.kaggle.submit: kaggle.submit(comment) with t.timer('notify'): process_minutes = t.get_processing_time() message = f'''{model_name}\ncv: {result["cv"]:.3f}\ntime: {process_minutes:.2f}[h]''' send_line(notify_params.line.token, message) notion = Notion(token=notify_params.notion.token_v2) notion.set_url(url=notify_params.notion.url) notion.insert_rows({ 'name': run_name_cv, 'created': now, 'model': cfg.model.name, 'local_cv': round(result['cv'], 4), 'time': process_minutes, 'comment': comment })
with t.timer('make folds'): folds = pd.DataFrame(index=train_df.index) folds['fold_id'] = 0 kf = KFold(n_splits=20, shuffle=True, random_state=compe_params.seed) for fold_, (trn_idx, val_idx) in enumerate(kf.split(train_df)): folds.loc[val_idx, 'fold_id'] = fold_ folds['fold_id'] = folds['fold_id'].astype(int) with t.timer('train model'): fold_num = data_params.fold_num x_trn = train_df[folds['fold_id'] != fold_num] x_val = train_df[folds['fold_id'] == fold_num] # pretrained: False num_classes = train_params.model_params.n_classes model_wight, oof_list, best_score, train_loss_list, val_loss_list, val_score_list = train_model( x_trn, x_val, train_params, num_classes, weights, device) np.save(f'../logs/{run_name}/oof_gr.npy', oof_list[0]) np.save(f'../logs/{run_name}/oof_vo.npy', oof_list[1]) np.save(f'../logs/{run_name}/oof_co.npy', oof_list[2]) torch.save(model_wight, f'../logs/{run_name}/weight_best.pt') save_png(run_name, train_params, train_loss_list, val_loss_list, val_score_list) logging.disable(logging.FATAL) logger_path.rename(f'../logs/{run_name}_{best_score:.3f}') process_minutes = t.get_processing_time(type='hour') with t.timer('notify'): message = f'''{model_name}\ncv: {best_score:.3f}\ntime: {process_minutes:.2f}[h]'''
################################################# model_ft = model_ft.to(device) margin = margin.to(device) criterion = nn.CrossEntropyLoss() optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9, weight_decay=0.000001, nesterov=True) # Decay LR by a factor of 0.1 every 7 epochs exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=49, gamma=0.1) model_ft = train_model(dataloaders, dataset_sizes, model_ft, criterion, args.save_loss_dir, margin, optimizer_ft, exp_lr_scheduler, num_epochs=args.num_epoch) torch.save(model_ft.state_dict(), save_model + '/' + 'finetune.pkl') visualize_model(model_ft, dataloaders, class_names, margin)