def main(): parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', type=str, required=True, help="yaml file for config.") parser.add_argument('-p', '--checkpoint_path', type=str, default=None, help="path of checkpoint pt file for resuming") parser.add_argument( '-n', '--name', type=str, required=True, help="Name of the model. Used for both logging and saving chkpt.") args = parser.parse_args() hp = HParam(args.config) hp_str = yaml.dump(hp) args_str = yaml.dump(vars(args)) pt_dir = os.path.join(hp.log.chkpt_dir, args.name) log_dir = os.path.join(hp.log.log_dir, args.name) os.makedirs(pt_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler( os.path.join( log_dir, '%s-%d.log' % (args.name, time.time()))), logging.StreamHandler() ]) logger = logging.getLogger() logger.info('Config by yaml file') logger.info(hp_str) logger.info('Command Line Config') logger.info(args_str) if hp.data.train == '' or hp.data.test == '': logger.error("train or test data directory cannot be empty.") raise Exception("Please specify directories of data in %s" % args.config) writer = Writer(hp, log_dir) train_loader = create_dataloader(hp, args, DataloaderMode.train) test_loader = create_dataloader(hp, args, DataloaderMode.test) train(args, pt_dir, train_loader, test_loader, writer, logger, hp, hp_str)
def main(): #加载数据 trainloader, testloader = read_dataset(input_size, batch_size, root, set) #定义模型 model = MainNet(proposalN=proposalN, num_classes=num_classes, channels=channels) #设置训练参数 criterion = nn.CrossEntropyLoss() parameters = model.parameters() #加载checkpoint save_path = os.path.join(model_path, model_name) if os.path.exists(save_path): start_epoch, lr = auto_load_resume(model, save_path, status='train') assert start_epoch < end_epoch else: os.makedirs(save_path) start_epoch = 0 lr = init_lr # define optimizers optimizer = torch.optim.SGD(parameters, lr=lr, momentum=0.9, weight_decay=weight_decay) model = model.cuda() # 部署在GPU scheduler = MultiStepLR(optimizer, milestones=lr_milestones, gamma=lr_decay_rate) # 保存config参数信息 time_str = time.strftime("%Y%m%d-%H%M%S") shutil.copy('./config.py', os.path.join(save_path, "{}config.py".format(time_str))) # 开始训练 train(model=model, trainloader=trainloader, testloader=testloader, criterion=criterion, optimizer=optimizer, scheduler=scheduler, save_path=save_path, start_epoch=start_epoch, end_epoch=end_epoch, save_interval=save_interval)
def main(argv=sys.argv): """ The main script """ input_type = None args = parse_args(argv) folder_or_image = args.path action = args.app_action if action == 'train': train() elif action == 'predict' and folder_or_image is None: print( '\n A path to a folder or image is required e.g /hotels or newhotel.jpg \n for help: run python3 app.py -h' ) return elif action == 'predict': # if it's not a folder that was supplied, check if it's a file if not os.path.isdir(folder_or_image): if os.path.isfile(folder_or_image): if folder_or_image.split( '.')[1].lower() not in image_extensions: print("\nError: An image file is required. Try again\n") return input_type = 'file' # add logic before here to pass in the model we want to use in the predictor predictor(input_type, folder_or_image) return print( '\nError: Invalid path. Kindly supply a valid folder or image path\n' ) return input_type = 'folder' # add logic before here to pass in the model we want to use in the predictor predictor(input_type, folder_or_image) if input_type == 'folder': print( f"\nDone! The '{file_name}' file has been written to respective folders in {folder_or_image}" ) else: print( '\nAction command is not supported\n for help: run python3 app.py -h' )
base_lr = args.base_lr momentum = 0.9 if Config.use_backbone: optimizer = optim.SGD([{'params': base_params}, {'params': model.module.classifier.parameters(), 'lr': base_lr}], lr = base_lr, momentum=momentum) else: optimizer = optim.SGD([{'params': base_params}, {'params': model.module.classifier.parameters(), 'lr': lr_ratio*base_lr}, {'params': model.module.classifier_swap.parameters(), 'lr': lr_ratio*base_lr}, {'params': model.module.Convmask.parameters(), 'lr': lr_ratio*base_lr}, ], lr = base_lr, momentum=momentum) exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=args.decay_step, gamma=0.1) print('Yantao: v0.0.1') # train entry train(Config, model, epoch_num=args.epoch, start_epoch=args.start_epoch, optimizer=optimizer, exp_lr_scheduler=exp_lr_scheduler, data_loader=dataloader, save_dir=save_dir, data_size=args.crop_resolution, savepoint=args.save_point, checkpoint=args.check_point)
def main(): trainset, _, testset, _ = read_dataset(input_size, batch_size, root, set) # image will be resize to the input_size # batch size means the number of images the nn process before updating the weight and biases # root is the root to the dataset # set is the dataset name (change in config) # Load checkpoint from a fold number save_path = os.path.join(model_path, model_name) if os.path.exists(save_path): load_model_from_path = os.path.join(save_path, f'fold_{start_from_fold}') if not os.path.exists(load_model_from_path): os.makedirs(load_model_from_path) # Create model if (multitask): model = MainNetMultitask(proposalN=proposalN, num_classes=num_classes, channels=channels) else: model = MainNet(proposalN=proposalN, num_classes=num_classes, channels=channels) start_epoch, lr, patience_counter = auto_load_resume( model, load_model_from_path, status='train') print(f'Patience counter starting from: {patience_counter}') assert start_epoch < end_epoch, 'end of fold reached, please increment start_from_fold' assert start_from_fold < num_folds assert patience_counter <= patience else: os.makedirs(save_path) start_epoch = 0 lr = init_lr patience_counter = 0 # save config time_str = time.strftime("%Y%m%d-%H%M%S") shutil.copy('./config.py', os.path.join(save_path, "{}config.py".format(time_str))) print( '\nSplitting trainset into train and val sets (80:20)\nNote testset is loaded but unused, and will not be used unless test.py is run.' ) # NOTE: split train into train/val set; but for consistency of code we'll leave the variable names as 'test' instead of 'val' # split train set into train/val 80:20 X_train = [] y_train = [] for i, j in trainset.train_img_label: X_train.append(i) y_train.append(j) # convert lists into numpy arrays X_train = np.array(X_train) y_train = np.array(y_train) skf = StratifiedKFold(n_splits=num_folds, random_state=seed, shuffle=True) if (multitask): # placeholder y_train with only the first element of the y tuples for when multitask learning is done to prevent stratified kfolds bug y_train_temp = np.array([i[0] for i in y_train]) for fold, (train_index, val_index) in enumerate(skf.split(X_train, y_train_temp)): print(f'Multitask: {multitask}') print(f'Random Augmentation: {rand_aug}') if (rand_aug): print(f'N:{N} M: {M}') print(f'\n=============== Fold {fold+1} ==================') if (fold + 1 < start_from_fold): print('Skipping this fold...\n') continue # Prepare save_path for the fold save_path_fold = os.path.join(save_path, str(f'fold_{fold+1}')) # Split trainloader into train and val loaders X_train_fold = X_train[train_index] X_val_fold = X_train[val_index] y_train_fold = y_train[train_index] y_val_fold = y_train[val_index] # Zip back the X and y values train_img_label_fold = list(zip(X_train_fold, y_train_fold)) val_img_label_fold = list(zip(X_val_fold, y_val_fold)) # Hijack the original trainset with the X and y for the particular fold trainset_fold = trainset trainset_fold.train_img_label = train_img_label_fold valset_fold = testset valset_fold.test_img_label = val_img_label_fold # variable name kept as test_img_label for code consistency print(f'Size of trainset: {len(trainset_fold)}') print(f'Size of valset: {len(valset_fold)}') # Recreate DataLoaders with train and val sets trainloader_fold = torch.utils.data.DataLoader( trainset_fold, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=False) valloader_fold = torch.utils.data.DataLoader(valset_fold, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=False) # Create model model = MainNetMultitask(proposalN=proposalN, num_classes=num_classes, channels=channels) criterion = nn.CrossEntropyLoss() # Define optimizers parameters = model.parameters() optimizer = torch.optim.SGD(parameters, lr=lr, momentum=0.9, weight_decay=weight_decay) model = model.cuda() scheduler = MultiStepLR(optimizer, milestones=lr_milestones, gamma=lr_decay_rate) train_multitask(model=model, trainloader=trainloader_fold, testloader=valloader_fold, criterion=criterion, optimizer=optimizer, scheduler=scheduler, save_path=save_path_fold, start_epoch=start_epoch, end_epoch=end_epoch, patience_counter=patience_counter, save_interval=save_interval) start_epoch = 0 # refresh start_epoch for next fold # Clear model and release GPU memory del model torch.cuda.empty_cache() print( f'\n=============== End of fold {fold+1} ==================\n') else: for fold, (train_index, val_index) in enumerate(skf.split(X_train, y_train)): print(f'Multitask: {multitask}') print(f'Random Augmentation: {rand_aug}') if (rand_aug): print(f'N:{N} M: {M}') print(f'\n=============== Fold {fold+1} ==================') if (fold + 1 < start_from_fold): print('Skipping this fold...\n') continue # Prepare save_path for the fold save_path_fold = os.path.join(save_path, str(f'fold_{fold+1}')) # Split trainloader into train and val loaders X_train_fold = X_train[train_index] X_val_fold = X_train[val_index] y_train_fold = y_train[train_index] y_val_fold = y_train[val_index] # Zip back the X and y values train_img_label_fold = list(zip(X_train_fold, y_train_fold)) val_img_label_fold = list(zip(X_val_fold, y_val_fold)) # Hijack the original trainset with the X and y for the particular fold trainset_fold = trainset trainset_fold.train_img_label = train_img_label_fold valset_fold = testset valset_fold.test_img_label = val_img_label_fold # variable name kept as test_img_label for code consistency print(f'Size of trainset: {len(trainset_fold)}') print(f'Size of valset: {len(valset_fold)}') # Recreate DataLoaders with train and val sets trainloader_fold = torch.utils.data.DataLoader( trainset_fold, batch_size=batch_size, shuffle=True, num_workers=8, drop_last=False) valloader_fold = torch.utils.data.DataLoader(valset_fold, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=False) # Create model model = MainNet(proposalN=proposalN, num_classes=num_classes, channels=channels) criterion = nn.CrossEntropyLoss() # Define optimizers parameters = model.parameters() optimizer = torch.optim.SGD(parameters, lr=lr, momentum=0.9, weight_decay=weight_decay) model = model.cuda() scheduler = MultiStepLR(optimizer, milestones=lr_milestones, gamma=lr_decay_rate) train(model=model, trainloader=trainloader_fold, testloader=valloader_fold, criterion=criterion, optimizer=optimizer, scheduler=scheduler, save_path=save_path_fold, start_epoch=start_epoch, end_epoch=end_epoch, patience_counter=patience_counter, save_interval=save_interval) start_epoch = 0 # refresh start_epoch for next fold # Clear model and release GPU memory del model torch.cuda.empty_cache() print( f'\n=============== End of fold {fold+1} ==================\n')
def main(): _, trainloader, _, testloader = read_dataset(input_size, batch_size, root, set) # image will be resize to the input_size # batch size means the number of images the nn process before updating the weight and biases # root is the root to the dataset # set is the dataset name (change in config) # Load checkpoint save_path = os.path.join(model_path, model_name) if os.path.exists(save_path): load_model_from_path = save_path if not os.path.exists(load_model_from_path): os.makedirs(load_model_from_path) # Create model if (multitask): model = MainNetMultitask(proposalN=proposalN, num_classes=num_classes, channels=channels) else: model = MainNet(proposalN=proposalN, num_classes=num_classes, channels=channels) start_epoch, lr, patience_counter = auto_load_resume( model, load_model_from_path, status='train') print(f'Patience counter starting from: {patience_counter}') assert start_epoch < end_epoch, 'maximum number of epochs reached' assert patience_counter <= patience else: os.makedirs(save_path) start_epoch = 0 lr = init_lr patience_counter = 0 if (multitask): print(f'Multitask: {multitask}') print(f'Random Augmentation: {rand_aug}') if (rand_aug): print(f'N:{N} M: {M}') # Create model model = MainNetMultitask(proposalN=proposalN, num_classes=num_classes, channels=channels) criterion = nn.CrossEntropyLoss() # Define optimizers parameters = model.parameters() optimizer = torch.optim.SGD(parameters, lr=lr, momentum=0.9, weight_decay=weight_decay) model = model.cuda() scheduler = MultiStepLR(optimizer, milestones=lr_milestones, gamma=lr_decay_rate) train_multitask(model=model, trainloader=trainloader, testloader=testloader, criterion=criterion, optimizer=optimizer, scheduler=scheduler, save_path=save_path, start_epoch=start_epoch, end_epoch=end_epoch, patience_counter=patience_counter, save_interval=save_interval) else: print(f'Multitask: {multitask}') print(f'Random Augmentation: {rand_aug}') if (rand_aug): print(f'N:{N} M: {M}') # Create model model = MainNet(proposalN=proposalN, num_classes=num_classes, channels=channels) criterion = nn.CrossEntropyLoss() # Define optimizers parameters = model.parameters() optimizer = torch.optim.SGD(parameters, lr=lr, momentum=0.9, weight_decay=weight_decay) model = model.cuda() scheduler = MultiStepLR(optimizer, milestones=lr_milestones, gamma=lr_decay_rate) train(model=model, trainloader=trainloader, testloader=testloader, criterion=criterion, optimizer=optimizer, scheduler=scheduler, save_path=save_path, start_epoch=start_epoch, end_epoch=end_epoch, patience_counter=patience_counter, save_interval=save_interval)
dictionary = dataloader.build_vocab(dataframes=train_df, text_columns=["text1", "text2"], save_path=dictionary_path) print("Building iterator!") train_iter, valid_iter = dataloader.load_batched_data_from( train_df, valid_df, dictionary, fixlen=fixlen, batch_size=batch_size) if pretrain_path: print("Loading pretrain embedding!") word_matrix = dataloader.load_embedding_matrix(len(dictionary) + 2, pretrain_path, dictionary, dim=300, norm=True) else: print("Learn embeddings during training!") word_matrix = None print("Building model!") MODEL = LSTM_CONCAT(len(dictionary) + 2, device, word_matrix) print("Training!") train_model.train(MODEL, train_iter, valid_iter, checkpoint_path, device=device, epochs=epochs, print_every=5, early_stop_num=15)
def main(argv=sys.argv): """ The main script """ args = parse_args(argv) action = args.app_action # If the action is train, the model is the name of the new model # that is going to be trained; if it's predict, the model is the # name of the model to use for prediction model = args.model if action == 'train': # Instead of the folder_paths being None if they were not supplied # make them empty strings so the os.path functions below won't # throw errors train_folder_path = "" if args.train_folder_path is None else args.train_folder_path test_folder_path = "" if args.test_folder_path is None else args.test_folder_path new_model = model if not new_model: print("Kindly give a name to save your model with") return if new_model + model_extension in all_models(): print("There's already a model with that name. Choose another name") return # Check that both train and test folders are present (Catch both orders) if os.path.isdir(train_folder_path): # If train folder is provided first, test folder must also be provided if os.path.isdir(test_folder_path): train(new_model, train_folder=train_folder_path, test_folder=test_folder_path) print('\n You cannot provide only one folder. Provide both training and testing folder') return # You must return # If test folder is provided, check is train folder is also provided if os.path.isdir(test_folder_path): if os.path.isdir(train_folder_path): train(new_model, train_folder=train_folder_path, test_folder=test_folder_path) print('\n You cannot provide only one folder. Provide both training and testing folder') return # You must return # Means no folder was provided, run with default folders train(new_model) elif action == 'predict': folder_or_image = "" if args.path is None else args.path # If no model was given, use the default one if not model: model = default_model else: # If one was supplied, check that it actually exists if model + model_extension not in all_models(): print("No such model has been trained") return # if it's not a folder that was supplied, check if it's a file if not os.path.isdir(folder_or_image): if os.path.isfile(folder_or_image): if not folder_or_image.endswith(image_extensions): print("\nError: An image file is required. Try again\n") return input_type = 'file' # add logic before here to pass in the model we want to use in the predictor predictor(input_type, folder_or_image, model) return print('\nError: Invalid path. Kindly supply a valid folder or image path\n') return input_type = 'folder' # add logic before here to pass in the model we want to use in the predictor predictor(input_type, folder_or_image, model) if input_type == 'folder': print( f"\nDone! The results are in {folder_or_image}") elif action == 'delete': # Check that model name is provided. if not model: print("\n You must supply a model to delete") return if model + model_extension not in all_models(): print("That model does not exist") return model_delete(model) return elif action == 'models': # List all models print(all_models()) return else: print('\nAction command is not supported\n for help: run python3 app.py -h')
def main(argv=sys.argv): """ The main script """ args = parse_args(argv) action = args.app_action train_folder_path = args.trp test_folder_path = args.tep folder_or_image = "" if args.path is None else args.path #Any arg supplied to this will be seen as True, no arg means False generate_model_name = args.gen_name # If the action is train, the model is the name of the new model # that is going to be trained; if it's predict, the model is the # name of the model to use for prediction model = args.model if action == 'train': new_model = model if not new_model: if generate_model_name in truth_values: #The user want us to generate model name for them #trp and tep args are required args implicitly for users from app if train_folder_path and test_folder_path: #Means user fulfilled the requirement. we can proceed now #generate name new_model = generate_name(train_folder_path) train_model(new_model, train_folder_path, test_folder_path) return #Here, the user might have supplied one folder argument or None at all print( "\n Both training folder and test folder arguments are required" ) return #The user did not supply model name and did not ask us to generate one. So definitely, # we are the one running this from console app #We don't want to retrain our default model. Better to delete. So we have to check if we #have trained our default model before. If default model exist, return if default_model in all_models(): print( "Retraining the default model is forbidden. Supply model name or Delete the default model manually and proceed" ) return #Training our default model now new_model = default_model print("Training the default model now...") #We use train function directly here for obvious reasons return train(new_model) #Model name supplied new_model = model + model_extension if new_model in all_models(): print( "There's already a model with that name. Please choose another name" " or find a model with name {}. Delete it and try again". format(new_model)) return #From here on, we expect user to supply training dataset and test dataset. #trp and tep args are required args implicitly for users from app if train_folder_path and test_folder_path: #Means user fulfilled the requirement. we can proceed now return train_model(new_model, train_folder_path, test_folder_path) #Here, the user might have supplied one folder argument or None at all print("\n Both training folder and test folder arguments are required") return elif action == 'predict': # If no model was given, use the default one if not model: model = default_model else: model = model + model_extension # If one was supplied, check that it actually exists if model not in all_models(): print("No such model has been trained") return # if it's not a folder that was supplied, check if it's a file if not os.path.isdir(folder_or_image): if os.path.isfile(folder_or_image): if not folder_or_image.endswith(image_extensions): print("\nError: An image file is required. Try again\n") return input_type = 'file' # add logic before here to pass in the model we want to use in the predictor predictor(input_type, folder_or_image, model) return print( '\nError: Invalid path. Kindly supply a valid folder or image path\n' ) return input_type = 'folder' # add logic before here to pass in the model we want to use in the predictor predictor(input_type, folder_or_image, model) if input_type == 'folder': print(f"\nDone! The results are in {folder_or_image}") elif action == 'delete': # Check that model name is provided. if not model: print("\n You must supply a model to delete") return model = model + model_extension if model not in all_models(): print("That model does not exist") return model_delete(model) return elif action == 'retrieve_models': # List all models print(all_models()) return else: print( '\nAction command is not supported\n for help: run python3 app.py -h' )