def main(_): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_index solver = Solver(FLAGS) if FLAGS.is_train: solver.train() else: solver.test()
def main(config): os.environ["CUDA_VISIBLE_DEVICES"] = str(config["gpu_no"]) save_path = Path(config["training"]["save_path"]) save_path.mkdir(parents=True, exist_ok=True) mode = config["mode"] solver = Solver(config) if mode == "train": solver.train() if mode == "test": solver.test()
def main(config): os.environ["CUDA_VISIBLE_DEVICES"] = str(config["gpu_no"]) mode = config["mode"] save_path = Path(config["training"]["save_path"]) / config["version"] save_path.mkdir(parents=True, exist_ok=True) config["save_path"] = save_path datasets = DataLoader(mode, **config["dataset"]) solver = Solver(config, datasets) if mode == "train": solver.train() if mode == "test": solver.test()
def main(): # make a string that describes the current running setup num = 0 run_setup_str = f"{args.source}2{args.target}_k_{args.num_k}_kq_{args.num_kq}_lamb_{args.lamb_marg_loss}" while os.path.exists(f"record/{run_setup_str}_run_{num}.txt"): num += 1 run_setup_str = f"{run_setup_str}_run_{num}" # eg, svhn2mnist_k_4_kq_4_lamb_10.0_run_5 # set file names for records (storing training stats) record_train = f"record/{run_setup_str}.txt" record_test = f"record/{run_setup_str}_test.txt" if not os.path.exists('record'): os.mkdir('record') # create a folder for records if not exist # set the checkpoint dir name (storing model params) checkpoint_dir = f'checkpoint/{run_setup_str}' if not os.path.exists('checkpoint'): os.mkdir('checkpoint') # create a folder if not exist if not os.path.exists(checkpoint_dir): os.mkdir(checkpoint_dir) # create a folder if not exist #### # create a solver: load data, create models (or load existing models), # and create optimizers solver = Solver(args, source=args.source, target=args.target, nsamps_q=args.nsamps_q, lamb_marg_loss=args.lamb_marg_loss, learning_rate=args.lr, batch_size=args.batch_size, optimizer=args.optimizer, num_k=args.num_k, num_kq=args.num_kq, all_use=args.all_use, checkpoint_dir=checkpoint_dir, save_epoch=args.save_epoch) # run it (test or training) if args.eval_only: solver.test(0) else: # training count = 0 for t in range(args.max_epoch): num = solver.train(t, record_file=record_train) count += num if t % 1 == 0: # run it on test data every epoch (and save models) solver.test(t, record_file=record_test, save_model=args.save_model) if count >= 20000 * 10: break
use_batchnorm=False, reg=0.6) solver1 = Solver( model1, data1, print_every=data1['X_train'].shape[0], num_epochs=50, batch_size=100, update_rule='sgd', optim_config={ 'learning_rate': 0.03, }, verbose=False, lr_decay=0.9, ) solver1.train() pass #train net2 model2 = FullyConnectedNet([100, 100], weight_scale=0.003, use_batchnorm=False, reg=0.6) solver2 = Solver( model2, data1, print_every=data1['X_train'].shape[0], num_epochs=50, batch_size=100, update_rule='sgd', optim_config={ 'learning_rate': 0.03,
def main(): # torch.set_default_tensor_type('torch.FloatTensor') # set up default cuda device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load the (preprocessed) CIFAR10 data. The preprocessing includes # channel swapping, normalization and train-val-test splitting. # Loading the datasets might take a while. datasetGen = DatasetGen() datasetGen.BinaryShinyPokemonDataset() data_dict = datasetGen.Subsample([0.6, 0.2, 0.2]) print("Train size: %i" % len(data_dict["X_train"])) print("Val size: %i" % len(data_dict["X_val"])) print("Test size: %i" % len(data_dict["X_test"])) train_data, val_data, test_data = ConvertDatasetDictToTorch(data_dict) from src.solver import Solver from torch.utils.data.sampler import SequentialSampler num_train = len(train_data) OverfitSampler = SequentialSampler(range(num_train)) train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True, num_workers=4) val_loader = torch.utils.data.DataLoader(val_data, batch_size=64, shuffle=False, num_workers=4) ############################################################################ # Hyper parameter Grid search : Set grids below # ############################################################################ print(train_data) lr = 1e-3 kernelsize = 3 hidden_dims = [200] convArray = [64, 128] model = ClassificationCNN(input_dim=[3, 96, 96], num_classes=2, convolutionalDims=convArray, kernel_size=kernelsize, stride_conv=1, weight_scale=0.02, pool=2, stride_pool=2, hiddenDims=hidden_dims, dropout=0.0) model.to(device) solver = Solver(optim_args={"lr": lr, "weight_decay": 1e-3}) print( "training now with values: lr=%s, hidden_dim=%s, filtersize=%s, convArray=%s" % (lr, str(hidden_dims), kernelsize, str(convArray))) solver.train(model, train_loader, val_loader, log_nth=6, num_epochs=10, L1=False, reg=0.1) from src.vis_utils import visualize_grid # first (next) parameter should be convolutional conv_params = next(model.parameters()).cpu().data.numpy() grid = visualize_grid(conv_params.transpose(0, 2, 3, 1)) plt.imshow(grid.astype('uint8')) plt.axis('off') plt.gcf().set_size_inches(6, 6) plt.show()
# solver ################################################################################ # TODO: Train the best FullyConnectedNet that you can on CIFAR-10. You might # # batch normalization and dropout useful. Store your best model in the # # best_model variable. # ################################################################################ # here I use the same parameters I got before, seems like they perform moderately fine model = FullyConnectedNet([100, 100], weight_scale=0.003, use_batchnorm = False, reg=0.6) solver = Solver(model, data1, print_every=data1['X_train'].shape[0], num_epochs=10, batch_size=100, update_rule='sgd', optim_config={ 'learning_rate': 0.03, },verbose=True, lr_decay = 0.9, ) solver.train() pass # check for normalizer model1, m1_w1_norms = weight_nomrer(model) ''' # saving all variables import shelve filename='shelve.out' my_shelf = shelve.open(filename,'n') # 'n' for new for key in dir(): try: my_shelf[key] = globals()[key] except TypeError:
def train(data_dir, epochs, batch_size, model_path, max_hours=None, continue_from=""): # General config # Task related json_dir = data_dir train_dir = data_dir + "tr" valid_dir = data_dir + "cv" sample_rate = 8000 segment_len = 4 cv_maxlen = 6 # Network architecture N = 256 # Number of filters in autoencoder L = 20 # Length of filters in conv autoencoder B = 256 # number of channels in conv blocks - after bottleneck 1x1 conv H = 512 # number of channels in inner conv1d block P = 3 # length of filter in inner conv1d blocks X = 8 # number of conv1d blocks in (also number of dilations) in each repeat R = 4 # number of repeats C = 2 # Number of speakers norm_type = 'gLN' # choices=['gLN', 'cLN', 'BN'] causal = 0 mask_nonlinear = 'relu' use_cuda = 1 half_lr = 1 # Half the learning rate when there's a small improvement early_stop = 1 # Stop learning if no imporvement after 10 epochs max_grad_norm = 5 # gradient clipping shuffle = 1 # Shuffle every epoch # batch_size = 3 num_workers = 4 # optimizer optimizer_type = "adam" lr = 1e-3 momentum = 0 l2 = 0 # Weight decay - l2 norm # save and visualize save_folder = "../egs/models" enable_checkpoint = 0 # enables saving checkpoints # continue_from = save_folder + "/speech_seperation_first_try.pth" # model to continue from # model_path = "speech_separation_first_try_more_epochs.pth" # TODO: Fix this print_freq = 20000 visdom_enabled = 1 visdom_epoch = 1 visdom_id = "Conv-TasNet Training" # TODO: Check what this does arg_solver = (use_cuda, epochs, half_lr, early_stop, max_grad_norm, save_folder, enable_checkpoint, continue_from, model_path, print_freq, visdom_enabled, visdom_epoch, visdom_id) # Datasets and Dataloaders tr_dataset = AudioDataset(train_dir, batch_size, sample_rate=sample_rate, segment=segment_len, max_hours=max_hours) cv_dataset = AudioDataset(valid_dir, batch_size=1, # 1 -> use less GPU memory to do cv sample_rate=sample_rate, segment=-1, cv_maxlen=cv_maxlen, max_hours=max_hours) # -1 -> use full audio tr_loader = AudioDataLoader(tr_dataset, batch_size=1, shuffle=shuffle, num_workers=num_workers) cv_loader = AudioDataLoader(cv_dataset, batch_size=1, num_workers=0) data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} # model model = ConvTasNet(N, L, B, H, P, X, R, C, norm_type=norm_type, causal=causal, mask_nonlinear=mask_nonlinear) # print(model) if use_cuda: model = torch.nn.DataParallel(model) model.cuda() # optimizer if optimizer_type == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=l2) elif optimizer_type == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2) else: print("Not support optimizer") return # solver solver = Solver(data, model, optimizer, arg_solver) # TODO: Fix solver thing solver.train()
def main(args): # load dictionary and generate char_list, sos_id, eos_id char_list, sos_id, eos_id = process_dict(args.dict) vocab_size = len(char_list) tr_dataset = AudioDataset('train', args.batch_size) cv_dataset = AudioDataset('dev', args.batch_size) tr_loader = AudioDataLoader(tr_dataset, batch_size=1, num_workers=args.num_workers, shuffle=args.shuffle, feature_dim=args.feature_dim, char_list=char_list, path_list=tr_dataset.path_lst, label_list=tr_dataset.han_lst, LFR_m=args.LFR_m, LFR_n=args.LFR_n) cv_loader = AudioDataLoader(cv_dataset, batch_size=1, num_workers=args.num_workers, feature_dim=args.feature_dim, char_list=char_list, path_list=cv_dataset.path_lst, label_list=cv_dataset.han_lst, LFR_m=args.LFR_m, LFR_n=args.LFR_n) data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} encoder = Encoder(args.d_input * args.LFR_m, args.d_low_dim, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen) decoder = Decoder( sos_id, eos_id, vocab_size, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen) model = Transformer(encoder, decoder) print(model) model.cuda() # optimizer optimizier = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.init_lr, args.d_model, args.warmup_steps) # solver solver = Solver(data, model, optimizier, args) solver.train()
num_workers=config['num_workers'], sampler=train_data_sampler) val_data_loader = torch.utils.data.DataLoader( dataset=data_set, batch_size=config['batch_size'], num_workers=config['num_workers'], sampler=val_data_sampler) if config['continue_training']: model = torch.load(config['model_path']) solver = pickle.load(open(config['solver_path'], 'rb')) start_epoch = config['start_epoch'] else: model = EncoderDecoder() solver = Solver(optim_args={ "lr": config['learning_rate'], "betas": config['betas'] }) start_epoch = 0 solver.train(lr_decay=config['lr_decay'], start_epoch=start_epoch, model=model, train_loader=train_data_loader, val_loader=train_data_loader, num_epochs=config['num_epochs'], log_after_iters=config['log_interval'], save_after_epochs=config['save_interval'], lr_decay_interval=config['lr_decay_interval'], save_path=config['save_path'], num_subtasks=config['num_subtasks'])
def train(): # path of this file ABS_PATH = os.path.dirname(os.path.abspath(__file__)) + '/' # Year-month-day_Hour-Minute-Second timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") print("Loading data...") # currently only using AN Dataset train_data, val_data = get_Dataset() train_loader = torch.utils.data.DataLoader(train_data, batch_size=25, shuffle=True, num_workers=0) val_loader = torch.utils.data.DataLoader(val_data, batch_size=25, shuffle=False, num_workers=0) #train_loader = torch.utils.data.DataLoader(train_data, batch_size=25, shuffle=False, num_workers=4, sampler=OverfitSampler(3000)) #val_loader = torch.utils.data.DataLoader(val_data, batch_size=25, shuffle=False,num_workers=2, sampler=OverfitSampler(100)) log_n = 50 # Train acc every x iterations epochs = 20 # x epochs, model gets saved after each completed epoch val_n = 0 # Run validation every x iterations (default: off/0) print("Training for %d epochs." % epochs) model = CNNEmoClassifier(weight_scale=0.0005) solver = Solver(optim_args={'lr': 5e-5}) tic = time.time() solver.train(model, train_loader, val_loader, num_epochs=epochs, log_nth=log_n, val_nth=val_n) temp_time = time.time() - tic m, s = divmod(temp_time, 60) h, m = divmod(m, 60) print('Done after %dh%02dmin%02ds' % (h, m, s)) # Save model model.save("models/model_{}.model".format(timestamp)) plt.subplot(2, 1, 1) plt.plot(solver.train_loss_history, '-', label='train_loss') x = np.linspace(0, len(solver.train_loss_history), len(solver.val_loss_history)) plt.plot(x, solver.val_loss_history, '-o', label='val_loss') plt.legend(loc='upper right') plt.title('Training vs Validation loss | %d Epochs' % epochs) plt.xlabel('iteration') plt.ylabel('loss') plt.subplot(2, 1, 2) plt.plot(solver.train_acc_history, '-o', label='train_acc=%.4f' % (solver.train_acc_history[-1])) plt.plot(solver.val_acc_history, '-o', label='val_acc=%.4f' % (solver.val_acc_history[-1])) plt.legend(loc='upper left') plt.title('Training vs Validation accuracy') plt.xlabel('epoch') plt.ylabel('accuracy') plt.gca().yaxis.grid(True) plt.gcf().set_size_inches(15, 15) plt.tight_layout() plt.savefig(ABS_PATH + 'output/performance_{}.png'.format(timestamp)) plt.gcf().clear() # plot examples: model.eval() # get_pics might not work! If it doesn't, uncomment the old code. test_pics, example_labels, filenames, amount_example_pics = get_pics( train_data, val_data) output = model.forward(Variable(torch.Tensor(test_pics).float()).cuda()) emotions = { 0: 'neutral', 1: 'happy', 2: 'sad', 3: 'surprise', 4: 'fear', 5: 'disgust', 6: 'anger', 7: 'contempt' } print( '0=neutral, 1=happy, 2=sad, 3=surprise, 4=fear, 5=disgust, 6=anger, 7=contempt' ) print( np.argmax(output.data.cpu().numpy(), axis=1, out=np.empty(amount_example_pics, dtype='int64'))) print(example_labels) output = torch.nn.functional.softmax(output).cpu().data.numpy() # plot images and write output under them, very unsure!! Better check on this one! for i in range(amount_example_pics): plt.subplot(amount_example_pics, 1, i + 1) #plt.legend(loc='upper left') plt.title( '%s: Truth=%s, N=%.2e, H=%.2e, Sad=%.2e, Sur=%.2e, F=%.2e, D=%.2e, A=%.2e, C=%.2e' % (filenames[i], emotions[example_labels[i]], list(output[i])[0], list(output[i])[1], list(output[i])[2], list( output[i])[3], list(output[i])[4], list( output[i])[5], list(output[i])[6], list(output[i])[7])) plt.imshow(test_pics[i][0]) plt.tight_layout() plt.savefig(ABS_PATH + 'output/examples_{}.png'.format(timestamp)) plt.gcf().clear()