def start_training(model_class, model_args, model_kwargs, chkpt_num, lr, train_sets, val_sets, data_dir, **params): #PyTorch Model net = utils.create_network(model_class, model_args, model_kwargs) monitor = utils.LearningMonitor() #Loading model checkpoint (if applicable) if chkpt_num != 0: utils.load_chkpt(net, monitor, chkpt_num, params["model_dir"], params["log_dir"]) #DataProvider Sampler Sampler = params["sampler_class"] train_sampler = utils.AsyncSampler( Sampler(data_dir, dsets=train_sets, mode="train", resize=params["resize"])) val_sampler = utils.AsyncSampler( Sampler(data_dir, dsets=val_sets, mode="val", resize=params["resize"])) loss_fn = loss.BinomialCrossEntropyWithLogits() optimizer = torch.optim.Adam(net.parameters(), lr=lr) train.train(net, loss_fn, optimizer, train_sampler, val_sampler, last_iter=chkpt_num, monitor=monitor, **params)
def train(model, loss_fn, optimizer, sampler, val_sampler=None, last_iter=0, train_writer=None, val_writer=None, monitor=None, **params): """ Generalized training function """ assert params_defined(params), "Params under-specified" if monitor is None: monitor = utils.LearningMonitor() #Determine the names of inputs, labels, masks sample_spec = utils.SampleSpec(sampler().keys()) mask_names = sample_spec.get_masks() print("======= BEGIN TRAINING LOOP ========") for i in range(last_iter, params['max_iter']): start = time.time() # Make sure no mask is empty (data for all tasks) sample = fetch_nonempty_sample(sampler, mask_names, params['batch_size']) inputs, labels, masks = group_sample(sample, sample_spec, "train") #Running forward pass preds = model(*inputs) losses, nmsks = eval_error(preds, labels, masks, loss_fn, sample_spec) update_model(optimizer, losses) log_errors(monitor, losses, nmsks, i) # Elapsed time. elapsed = time.time() - start log_elapsed_time(monitor, elapsed, i, "train") if val_sampler is not None and i % params["test_intv"] == 0: run_validation(model, val_sampler, params["test_iter"], loss_fn, sample_spec, monitor, val_writer, i) if i % params["avgs_intv"] == 0 or i < last_iter + params["warm_up"]-1: monitor.compute_avgs(i, "train") #Displaying stats (both to console and TensorBoard) avg_losses = { k : round(monitor.get_last_value(k, "train"),5) for k in losses.keys() } avg_time = round(monitor.get_last_value("iter_time","train"),5) write_averages(train_writer, avg_losses, avg_time, i) print("iter: {}; avg losses = {} (iter_time = {} s on avg)".format(i,avg_losses, avg_time)) if i % params["chkpt_intv"] == 0 and i != last_iter: print("SAVE CHECKPOINT: {} iters.".format(i)) utils.save_chkpt(model, monitor, i, params["model_dir"], params["log_dir"])
def start_training(model_class, model_args, model_kwargs, sampler_class, sampler_spec, augmentor_constr, chkpt_num, lr, train_sets, val_sets, data_dir, model_dir, log_dir, tb_train, tb_val, **params): #PyTorch Model net = utils.create_network(model_class, model_args, model_kwargs) train_writer = tensorboardX.SummaryWriter(tb_train) val_writer = tensorboardX.SummaryWriter(tb_val) monitor = utils.LearningMonitor() #Loading model checkpoint (if applicable) if chkpt_num != 0: utils.load_chkpt(net, monitor, chkpt_num, model_dir, log_dir) #DataProvider Stuff train_aug = augmentor_constr(True) train_sampler = utils.AsyncSampler( sampler_class(data_dir, sampler_spec, vols=train_sets, mode="train", aug=train_aug)) val_aug = augmentor_constr(False) val_sampler = utils.AsyncSampler( sampler_class(data_dir, sampler_spec, vols=val_sets, mode="val", aug=val_aug)) loss_fn = loss.BinomialCrossEntropyWithLogits() optimizer = torch.optim.Adam(net.parameters(), lr=lr) train.train(net, loss_fn, optimizer, train_sampler, val_sampler, train_writer=train_writer, val_writer=val_writer, last_iter=chkpt_num, model_dir=model_dir, log_dir=log_dir, monitor=monitor, **params)
def train(model, loss_fn, optimizer, sampler, val_sampler=None, last_iter=0, monitor=None, **params): """ Generalized training fn """ assert params_defined(params), "Params under-specified" if monitor is None: monitor = utils.LearningMonitor() #Determine the names of inputs, labels, masks sample_spec = utils.SampleSpec(sampler.get().keys()) mask_names = sample_spec.get_masks() start = time.time() print("======= BEGIN TRAINING LOOP ========") for i in range(last_iter, params['max_iter']): # Make sure no mask is empty (data for all tasks) sample = fetch_nonempty_sample(sampler, mask_names, params['batch_size']) #print("sample type, size in training loop", type(sample), sample.get(sample.keys()[0]).shape) inputs, labels, masks = make_variables(sample, sample_spec, "train") #Running forward pass preds = model(*inputs) if (params["resize"] != 1): print("Type of Preds[0]:", type(preds[0])) #preds = misc.imresize(preds, 1.0*params["resize"], interp="bilinear") #print("Resized!") losses, nmsks = eval_error(preds, labels, masks, loss_fn, sample_spec) update_model(optimizer, losses) log_errors(monitor, losses, nmsks) # Elapsed time. elapsed = time.time() - start log_elapsed_time(monitor, elapsed, "train") start = time.time() if val_sampler is not None and i % params["test_intv"] == 0: run_validation(model, val_sampler, params["test_iter"], loss_fn, sample_spec, monitor, i) start = time.time() #ignore validation time if i % params[ "avgs_intv"] == 0 or i < last_iter + params["warm_up"] - 1: monitor.compute_avgs(i, "train") #Displaying stats avg_losses = { k: round(monitor.get_last_value(k, "train"), 5) for k in losses.keys() } avg_time = round(monitor.get_last_value("iter_time", "train"), 5) print("iter: {}; avg losses = {} (iter_time = {} s on avg)".format( i, avg_losses, avg_time)) if i % params["chkpt_intv"] == 0 and i != last_iter: print("SAVE CHECKPOINT: {} iters.".format(i)) utils.save_chkpt(model, monitor, i, params["model_dir"], params["log_dir"])