def __init__( self, model=None, data_loader=None, train_times=1000, lr=1e-3, alpha=0.5, use_gpu=True, opt_method="sgd", save_steps=None, checkpoint_dir=None, ): self.work_threads = 8 self.train_times = train_times self.opt_method = opt_method self.optimizer = None self.lr_decay = 0 self.weight_decay = 0 self.alpha = alpha self.lr = lr self.model = model self.data_loader = data_loader self.use_gpu = use_gpu self.save_steps = save_steps self.checkpoint_dir = checkpoint_dir self.liveplot = PlotLosses()
class LivelossCallback(AvgStatsCallback): def __init__(self, metrics): super().__init__(metrics) self.liveloss = PlotLosses(skip_first=0) self.metricnames = [m.__name__ for m in metrics] self.logs = {} def begin_epoch(self): super().begin_epoch() self.logs = {} self.iteration = 0 def after_loss(self): super().after_loss() if self.in_train: self.iteration += 1 print( "\r[%d, %5d] Train_loss: %.3f" % (self.epoch + 1, self.iteration, self.loss), end="", ) def after_epoch(self): super().after_epoch() self.logs["loss"] = self.train_stats.avg_stats[0] self.logs["val_loss"] = self.valid_stats.avg_stats[0] for i, metric in enumerate(self.metricnames): self.logs[metric] = self.train_stats.avg_stats[i + 1].item() self.logs["val_" + metric] = self.valid_stats.avg_stats[i + 1].item() self.liveloss.update(self.logs) self.liveloss.draw()
def __init__(self, n_epochs, batches_epoch, out_dir, start_epoch=1): # self.viz = Visdom() self.n_epochs = n_epochs self.batches_epoch = batches_epoch self.epoch = start_epoch self.batch = 1 self.prev_time = time.time() self.mean_period = 0 self.losses = {} self.loss_windows = {} self.image_windows = {} self.out_dir = out_dir self.to_image = transforms.ToPILImage() self.liveloss = PlotLosses()
def train(D, G, D_optimizer, G_optimizer, D_loss, G_loss, data_loader, options): """ Inputs: - `options`: A dictionary of options to configure the GAN. Required values: `batch_size` - (int) The size of each batch. `epoch_count` - (int) The number of epochs to run. `data_type` - `glyph_size` - (tuple or triple, [int, int, (int)]) The size of the image (H, W, C) `glyphs_per_image` - (int) The number of glyphs found on each image Returns: Dictionary of losses. """ epoch_count = options['epoch_count'] visualize = options['visualize'] losses = collections.defaultdict(list) loss_plot = PlotLosses() if visualize: real_test, static_test = prepare_static_test(data_loader, options) visualize_progress(G, real_test, static_test) for _ in range(epoch_count): train_epoch(D, G, D_optimizer, G_optimizer, D_loss, G_loss, data_loader, losses, options) if visualize: record_losses(loss_plot, losses) visualize_progress(G, real_test, static_test) return losses
def fit_model(cfg,net,loader,verbose=False ) : optimizer = torch.optim.Adam(net.parameters(), lr=cfg["learning_rate"], weight_decay=cfg["weight_decay"]) loss_func = torch.nn.MSELoss() # this is for regression mean squared loss # Setup Pytorch in training mode net.train() # start training loss_hist = {} liveloss = PlotLosses() logs = {} lowest = 999999 best_params = None for epoch in range(cfg["num_epochs"]): epoch_loss = 0 for step, (batch_x, batch_y) in enumerate(loader): # for each training step prediction = net(batch_x).reshape(-1) # input x and predict based on x if verbose :npt("batch_x.size:{}".format(batch_x.size())) if verbose :npt("batch_y.size:{}".format(batch_y.size())) if verbose :npt("prediction.size:{}".format(prediction.size())) loss = loss_func(prediction, batch_y) # must be (1. nn output, 2. target) epoch_loss += loss.detach().cpu().numpy() optimizer.zero_grad() # clear gradients for next train loss.backward() # backpropagation, compute gradients optimizer.step() # apply gradients epoch_loss = epoch_loss / 900 # Draw Loss curves, gradients, and current inference results... visualize_results(cfg,epoch,liveloss,epoch_loss,loss_hist,net) print("epoch_loss {}".format(epoch_loss)) pstr = '\repoch: {}, lr: {}, lowest_loss: {:7.5e}, latest_loss: {:7.5e}\n'.format(epoch, cfg["learning_rate"], lowest, epoch_loss) print(pstr,end="") return epoch_loss
class LiveLossPlotListener(DojoListener): """ DojoListener implementation which renders a livelossplot after finishing a dan. """ def __init__(self): self.liveloss = None def training_started(self, aikidoka: Aikidoka, kata: Kata, kun: DojoKun): self.liveloss = PlotLosses() def dan_finished(self, aikidoka: Aikidoka, run: (int, int), metrics: (float, float)): (loss, acc) = metrics self.liveloss.update({"loss": loss, "train_acc": acc}) self.liveloss.draw()
def train_vae(self, epochs=10, hidden_size=2, lr=0.0005, recon_loss_method='mse'): """ Handles the training of the vae model. Parameters ---------- epochs : int Number of complete passes over the whole training set. hidden_size : int Size of the latent space of the vae. lr : float. Learning rate for the vae model training. recon_loss_method : str Method for reconstruction loss calculation Returns ------- None """ set_seed(42) # Set the random seed self.model = VAE(hidden_size, self.input.shape) # Initialise model # Create optimizer optimizer = optim.Adam(self.model.parameters(), lr=lr, betas=(0.9, 0.999)) if self.plot_loss: liveloss = PlotLosses() liveloss.skip_first = 0 liveloss.figsize = (16, 10) # Start training loop for epoch in range(1, epochs + 1): tl = train(epoch, self.model, optimizer, self.train_loader, recon_loss_method=recon_loss_method ) # Train model on train dataset testl = test(epoch, self.model, self.test_loader, recon_loss_method=recon_loss_method) if self.plot_loss: # log train and test losses for dynamic plot logs = {} logs['' + 'ELBO'] = tl logs['val_' + 'ELBO'] = testl liveloss.update(logs) liveloss.draw()
def fit(self, optimizer, patience, num_epochs=200): liveloss = PlotLosses() # initialize the early_stopping object early_stopping = EarlyStopping(patience=patience, verbose=True, metric='auc') for epoch in tqdm(range(num_epochs)): logs = {} self.train(optimizer) val_auc, val_ap = self.evaluate(validation=True, test=False) logs['val_auc'] = val_auc logs['val_ap'] = val_ap liveloss.update(logs) liveloss.send() self.writer.add_scalar('val_auc', val_auc, epoch) self.writer.add_scalar('val_ap', val_ap, epoch) ### Add Early stop implementation # early_stopping needs the validation loss to check if it has decresed, # and if it has, it will make a checkpoint of the current model early_stopping(val_auc, self.model) if early_stopping.early_stop: print("Early stopping") break # load the last checkpoint with the best model self.model.load_state_dict(torch.load('checkpoint.pt')) return self.model
def test_neptune(): neptune_logger = NeptuneLogger( api_token="ANONYMOUS", project_qualified_name="shared/colab-test-run", tags=['livelossplot', 'github-actions'] ) plotlosses = PlotLosses(outputs=[neptune_logger]) assert neptune_logger.experiment.state == 'running' for i in range(3): plotlosses.update( { 'acc': 1 - np.random.rand() / (i + 2.), 'val_acc': 1 - np.random.rand() / (i + 0.5), 'loss': 1. / (i + 2.), 'val_loss': 1. / (i + 0.5) } ) plotlosses.send() assert neptune_logger.experiment.state == 'running' neptune_logger.close() assert neptune_logger.experiment.state == 'succeeded' url = neptune.project._get_experiment_link(neptune_logger.experiment) assert len(url) > 0
def __init__(self, model, optimizer, train_loader, validate_loader, criterion=nn.CrossEntropyLoss(), device="cpu", keep_best=0): "Stores the parameters on the class instance for later methods" for arg in ["model", "optimizer", "train_loader", "validate_loader", "criterion", "device", "keep_best"]: exec("self." + arg + "=" + arg) try: self.transform = validate_loader.dataset.transform except: print("No transform found, test data must be normalised manually") # store the liveloss as it holds all our logs, useful for later self.liveloss = PlotLosses() # store the best model params self.best_params_dict = {} # store the current epoch between training batches self.epoch = 0 # for keeping the best model params self.max_acc=0. return
def train(model, patch_train_loader, patch_val_loader, EPOCHS, learning_rate): loss_func = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)#, weight_decay=0.99) liveloss = PlotLosses() lr2_tr_loss = [] lr2_val_loss = [] model_losses, valid_losses = [], [] for epoch in range(EPOCHS): print("epoch{}".format(epoch)) model_losses, valid_losses = [], [] logs = {} prefix = '' # with train data model.train() for idx, (data,target) in enumerate(patch_train_loader): data = torch.autograd.Variable(data).to(device = device, dtype = torch.float) print(data.shape) optimizer.zero_grad() pred = model(data) print(pred.shape) loss = loss_func(pred, data) # Backpropagation loss.backward() # update optimizer.step() # loss save model_losses.append(loss.cpu().data.item()) logs[prefix + 'MSE loss'] = loss.item() print(idx,"complete") ## with validation data(only nodefect) model.eval() for idx, (data,target) in enumerate(patch_val_loader): data = torch.autograd.Variable(data).to(device = device, dtype = torch.float) pred = model(data) loss = loss_func(pred, data) valid_losses.append(loss.item()) prefix = 'val_' logs[prefix + 'MSE loss'] = loss.item() lr2_tr_loss.append(np.mean(model_losses)) lr2_val_loss.append(np.mean(valid_losses)) liveloss.update(logs) liveloss.draw() print ("Epoch:", epoch+1, " Training Loss: ", np.mean(model_losses), " Valid Loss: ", np.mean(valid_losses)) ## epoch 별로 모델을 저장을 해서, 혹시 overfitting이 된다면 그 이전의 epoch때를 저장해서 AE모델로 사용하고자한다. path = os.path.join("/content/drive/Shared drives/data/nocrop/model/hs/model{}".format(str(model)[11:12]),str(model)[:12] + '_epoch{}.pth'.format(epoch)) torch.save(model.state_dict(), path) ## epoch19(즉 마지막 에포크)때의 모델을 AE모델로 저장 if epoch == EPOCHS -1: path = os.path.join("/content/drive/Shared drives/data/nocrop/model/hs",str(model)[:12] + '.pth') torch.save(model.state_dict(), path) return lr2_tr_loss, lr2_val_loss
def test_default_from_step(): """Test without from_step""" out = CheckOutput(target_log_history_length=10) loss_plotter = PlotLosses(outputs=[out]) for idx in range(10): loss_plotter.update({ 'acc': 0.1 * idx, 'loss': 0.69 / (idx + 1), }) loss_plotter.send()
def test_minus_from_step(): """Test from_step < 0""" out = CheckOutput(target_log_history_length=6) loss_plotter = PlotLosses(outputs=[out], from_step=-5) for idx in range(10): loss_plotter.update({ 'acc': 0.1 * idx, 'loss': 0.69 / (idx + 1), }) loss_plotter.send()
def execute(model, n_epochs, trn_ldr, val_ldr, opti, crit, plot): ''' This routine is responsible for the entire training process, and handles in-training plotting Arguments: model : the model to be trained // nn.Module n_epochs : the number of epochs the model should be trained for // integer trn_ldr : the training dataloader // dataloader val_ldr : the validation dataloader // dataloader opti : the optimiser object // optim crit : the criterion (loss) function // nn loss function plot : a flag denoting whether in-training plotting should occur // boolean Parameters: liveloss : responsible for in-training plotting, activated by plot // PlotLosses() object epoch : the current epoch number // integer logs : holds the log data for the current epoch // dict trn_los : the training loss for the current epoch // float trn_acc : the training accuracy for the current epoch // float val_los : the validation loss for the current epoch // float val_acc : the validation accuracy for the current epoch // float Returns: model : the final, trained model // nn.Module ''' if plot: liveloss = PlotLosses() # initialise liveloss if plotting flag true for epoch in range(n_epochs): logs = {} trn_los, trn_acc = trn(model, opti, crit, trn_ldr) # run the training cycle logs['' + 'log loss'] = trn_los.item() logs['' + 'accuracy'] = trn_acc.item() # update the logs val_los, val_acc = val(model, crit, val_ldr) # run the validation cycle logs['val_' + 'log loss'] = val_los.item() logs['val_' + 'accuracy'] = val_acc.item() # update the logs if plot: liveloss.update(logs) liveloss.draw() # print the plots if flag is true if not plot: print( "Epoch: " + str(epoch)) # if not plotting, print epoch number for tracking return model # return finished trained model
def main(args): liveloss = PlotLosses(groups=kfold_groups, group_patterns=group_patterns) global device device = args.device epochs = args.epochs bs = args.batch_size lr = args.lr dataset = args.dataset savef = args.savef loadf = args.loadf args.takeout = [1, 3] # Take out node representation layers for infomax. path = osp.join(osp.abspath(''), 'data', dataset) dataset = TUDataset(path, name=dataset).shuffle() dataloader = DataLoader(dataset, batch_size=bs) args.num_classes = dataset.num_classes args.num_features = max(dataset.num_features, 1) model = HGI(args).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr) if loadf: model.load_state_dict(torch.load(loadf)) kfoldacc = test(model, dataloader, args) print('Kfold accuracy: {:.4f}'.format(kfoldacc)) return for epoch in range(1, epochs+1): loss = train(model, optimizer, dataloader) kfoldacc = test(model, dataloader, args) log(liveloss, loss, None, kfoldacc, None) best_val_i = max(liveloss.logger.log_history['kfold_acc'], key=lambda i: i.value) step, best_val = best_val_i.step, best_val_i.value if savef and kfoldacc >= best_val: torch.save(model.state_dict(), savef) best_val = final_log(liveloss) return best_val
def main(): api_token = os.environ.get('NEPTUNE_API_TOKEN') project_qualified_name = os.environ.get('NEPTUNE_PROJECT_NAME') logger = NeptuneLogger(api_token=api_token, project_qualified_name=project_qualified_name) liveplot = PlotLosses(outputs=[logger]) for i in range(20): liveplot.update({ 'accuracy': 1 - np.random.rand() / (i + 2.), 'val_accuracy': 1 - np.random.rand() / (i + 0.5), 'mse': 1. / (i + 2.), 'val_mse': 1. / (i + 0.5) }) liveplot.send() sleep(.5)
def test_bokeh_plot(): logger = BokehPlot() liveplot = PlotLosses(outputs=[logger], mode='script') for i in range(3): liveplot.update({ 'acc': 1 - np.random.rand() / (i + 2.), 'val_acc': 1 - np.random.rand() / (i + 0.5), 'loss': 1. / (i + 2.), 'val_loss': 1. / (i + 0.5) }) liveplot.send() assert os.path.isfile(logger.output_file)
def train_model_gener(model, criterion, optimizer, dataloaders, num_epochs=10): liveloss = PlotLosses() model = model.to(device) for epoch in range(num_epochs): logs = {} for phase in ['train', 'validation']: if phase == 'train': model.train() else: model.eval() running_loss = 0.0 running_corrects = 0 for inputs_full, labels_class in dataloaders[phase]: # here are changes! inputs = inputs_full[:, :-1].to(device) labels = inputs_full[:, 1:].to(device) outputs = model(inputs) loss = criterion(outputs, labels) if phase == 'train': optimizer.zero_grad() loss.backward() optimizer.step() _, preds = torch.max(outputs, 1) running_loss += loss.detach() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) epoch_loss = running_loss / len(dataloaders[phase].dataset) epoch_acc = running_corrects.float() / len( dataloaders[phase].dataset) prefix = '' if phase == 'validation': prefix = 'val_' logs[prefix + 'log loss'] = epoch_loss.item() logs[prefix + 'accuracy'] = epoch_acc.item() liveloss.update(logs) liveloss.draw()
def test_tensorboard(): groups = { 'acccuracy': ['acc', 'val_acc'], 'log-loss': ['loss', 'val_loss'] } logger = TensorboardTFLogger() liveplot = PlotLosses(groups=groups, outputs=(logger, )) for i in range(3): liveplot.update({ 'acc': 1 - np.random.rand() / (i + 2.), 'val_acc': 1 - np.random.rand() / (i + 0.5), 'loss': 1. / (i + 2.), 'val_loss': 1. / (i + 0.5) }) liveplot.send() assert all([ f.startswith('events.out.tfevents.') for f in os.listdir(logger._path) ])
def fit(self, train_loader): liveloss = PlotLosses() logs = {} for epoch in range(self.epoch_num): for batch_idx, (data, target) in enumerate(train_loader): data, target = Variable(data.float()).to( self.device), Variable(target.float()).to(self.device) data = data.view(-1, self.input_layer_size) target = target.view(-1, self.input_layer_size) self.optimizer.zero_grad() net_out = self.model(data) loss = self.criterion(net_out, target) loss.backward() self.optimizer.step() epoch_loss = loss.detach() logs['MSE loss'] = epoch_loss.item() liveloss.update(logs) liveloss.send() print("Number of weight coefficients:", self.model.number_of_weight_coefficients)
def init_live_plot(self, file): self.liveloss = PlotLosses(fig_path=file)
def train(self, train_ds, valid_ds, plot_loss=True, verbose=True, save_path=None, need_y: str = 'no'): """Method for training, takes train and validation Datasets, as well as parameters specifying training monitoring and trains a network for a given set of hyperparameters. :param train_ds: training Dataset :param valid_ds: validation Dataset :param plot_loss: whether to plot loss during training :param verbose: whether to print loss after each epoch :param save_path: if given, serialises the model and saves there :param need_y: command to extract y's in order to train Attention based models with 'state' or 'switch cells' layer """ # Create DataLoaders assert need_y in ['no', 'yes'], 'Should be no/yes' train_dl = DataLoader(train_ds, batch_size=self.batch_size, shuffle=True) test_dl = DataLoader(valid_ds, batch_size=self.batch_size) # Dictionary for losses losses = {'train_loss': [], 'valid_loss': []} # Plot losses if the user chooses so if plot_loss: liveloss = PlotLosses() # Iterate over epochs for epoch in range(self.max_epochs): # Switch to training mode self.model.train() if verbose: print('Starting epoch {}'.format(epoch + 1)) # A list for batch-wise training losses in a given epoch epoch_loss = [] # Iterate over batches for idx_batch, batch in enumerate(train_dl): self.optimizer.zero_grad() if need_y == 'yes': out = self.model(batch[0]['train_obs'].permute(1, 0, 2), y=batch[1].permute(1, 0)) tr_loss = self.loss(out, batch[0]['train_y'].to(DEVICE)) elif need_y == 'no': out = self.model(batch['train_obs'].permute(1, 0, 2)) tr_loss = self.loss(out, batch['train_y'].to(DEVICE)) epoch_loss.append(tr_loss.item()) tr_loss.backward() self.optimizer.step() # Switch to evaluation mode self.model.eval() # Compute training loss for the epoch losses['train_loss'].append(sum(epoch_loss) / len(train_dl)) # Compute validation loss by iterating through valid dl batches with torch.no_grad(): # A list for batch-wise validation losses val_loss = [] # Iterate over batches in the validation DataLoader for idx_v_batch, v_batch in enumerate(test_dl): if need_y == 'yes': val_loss.append( self.loss( self.model(v_batch[0]['test_obs'].permute( 1, 0, 2), y=v_batch[1].permute(1, 0)), v_batch[0]['test_y']).item()) elif need_y == 'no': val_loss.append( self.loss( self.model(v_batch['test_obs'].permute( 1, 0, 2)), v_batch['test_y']).item()) losses['valid_loss'].append(sum(val_loss) / len(test_dl)) # Printing loss for a given epoch if verbose: print('Loss: {}'.format(losses['valid_loss'][epoch])) # Plot loss after each epoch if the user chose to if plot_loss: logs = { 'log_loss': losses['train_loss'][epoch], 'val_log_loss': losses['valid_loss'][epoch] } liveloss.update(logs) liveloss.draw() # Early stopping if self.early_stopping_patience: lag_1 = losses['valid_loss'][( epoch - self.early_stopping_patience):epoch] lag_2 = losses['valid_loss'][(epoch - self.early_stopping_patience - 1):(epoch - 1)] no_drops = sum(True if l1 < l2 else False for l1, l2 in zip(lag_1, lag_2)) if epoch > self.early_stopping_patience and no_drops == 0: break # Save last loss self.final_loss = np.mean(losses['valid_loss'][-1]) self.last_epoch = epoch # Save model if save_path: torch.save(self.model.state_dict(), save_path)
def fit(self, X, eval_X, y=None, model_saved_path='bprh_model.pkl', iter_to_save=5000, coselection_saved_path='data/item-set-coselection.pkl', iter_to_log=100, correlation=True, coselection=False, plot_metric=False, log_metric=False): # Here we do not load model -> train a new model if self.existed_model_path is None: # To make sure train and test works with inconsistent user and item list, # we transform user and item's string ID to int ID so that their ID is their index in U and V print("Registering Model Parameters") # rename user and item self.user_original_id_list = sorted( set(X.UserID).union(set(eval_X.UserID))) self.item_original_id_list = sorted( set(X.ItemID).union(set(eval_X.ItemID))) self.train_data = X.copy() self.test_data = eval_X.copy() self.train_data.UserID = self.train_data.UserID.apply( lambda x: self.user_original_id_list.index(x)) self.train_data.ItemID = self.train_data.ItemID.apply( lambda x: self.item_original_id_list.index(x)) self.test_data.UserID = self.test_data.UserID.apply( lambda x: self.user_original_id_list.index(x)) self.test_data.ItemID = self.test_data.ItemID.apply( lambda x: self.item_original_id_list.index(x)) self.item_list = [ idx[0] for idx in enumerate(self.item_original_id_list) ] self.user_list = [ idx[0] for idx in enumerate(self.user_original_id_list) ] self.num_u = len(self.user_list) self.num_i = len(self.item_list) # build I_u_t, I_u_a (pre-computing for acceleration) self.build_itemset_for_user() # Calculate auxiliary-target correlation C for every user and each types of auxiliary action if correlation: self.alpha_u = self.auxiliary_target_correlation( X=self.train_data) else: print( "No auxiliary-target correlation - all alpha_u equal to one" ) alpha_u_all_ones = dict() user_set_bar = tqdm(self.user_list) for u in user_set_bar: alpha_u_all_ones[u] = dict() alpha_u_all_ones[u]['alpha'] = 1.0 self.alpha_u = alpha_u_all_ones.copy() # Generate item-set based on co-selection if coselection: self.S, self.U_item = self.itemset_coselection( X=self.train_data) # Initialization of User and Item Matrices if self.random_state is not None: np.random.seed(self.random_state) else: np.random.seed(0) print("Initializing User and Item Matrices") # NOTE: Initialization is influenced by mean and std self.U = np.random.normal(size=(self.num_u, self.dim + 1), loc=0.0, scale=0.1) self.V = np.random.normal(size=(self.dim + 1, self.num_i), loc=0.0, scale=0.1) # self.U = np.zeros(shape=(self.num_u, self.dim + 1)) # self.V = np.zeros(shape=(self.dim + 1, self.num_i)) self.U[:, -1] = 1.0 # estimation is U dot V self.estimation = np.dot(self.U, self.V) # Configure loss plots layout if plot_metric: groups = { 'Precision@K': ['Precision@5', 'Precision@10'], 'Recall@K': ['Recall@5', 'Recall@10'], 'AUC': ['AUC'] } plot_losses = PlotLosses(groups=groups) # Start Iteration all_item = set(self.item_list) user_in_train = sorted(set(self.train_data.UserID)) print("Start Training") with trange(self.num_iter) as t: for index in t: # Description will be displayed on the left # t.set_description('ITER %i' % index) # Build u, I, J, K # uniformly sample a user from U u = choice(user_in_train) # build I # uniformly sample a item i from I_u_t I_u_t = self.I_u_t_train[u] if len(I_u_t) != 0: i = choice(sorted(I_u_t)) # build I = I_u_t cap S_i if coselection: I = I_u_t.intersection(self.S[i]) else: # if no coselection, we set I as the set of purchased items by user u # no uniform sampling, like COFISET I = I_u_t else: # if no item in I_u_t, then set I to empty set i = None I = set() # build J, since we only have one auxiliary action, we follow the uniform sampling I_u_oa = self.I_u_a_train[u] - I_u_t if len(I_u_oa) != 0: j = choice(sorted(I_u_oa)) if coselection: # NOTE: typo in paper? J = I_u_oa.intersection(self.S[j]) else: # if no coselection, we set J as the set of only-auxiliary items by user u # no uniform sampling, like COFISET J = I_u_oa else: # if no item in I_u_oa, then set J to empty set j = None J = set() # build K I_u_n = all_item - I_u_t - I_u_oa if len(I_u_n) != 0: k = choice(sorted(I_u_n)) # build K if coselection: # NOTE: typo in paper? K = I_u_n.intersection(self.S[k]) else: # if no coselection, we set K as the set of no-action items by user u # no uniform sampling, like COFISET K = I_u_n else: # if no item in I_u_n, then set K to empty set k = None K = set() # calculate intermediate variables # get specific alpha_u spec_alpha_u = self.alpha_u[u]['alpha'] U_u = self.U[u, :-1].copy() sorted_I = sorted(I) sorted_J = sorted(J) sorted_K = sorted(K) # get r_hat_uIJ, r_hat_uJK, r_hat_uIK r_hat_uI = np.average( self.estimation[u, sorted_I]) if len(I) != 0 else np.array( [0]) r_hat_uJ = np.average( self.estimation[u, sorted_J]) if len(J) != 0 else np.array( [0]) r_hat_uK = np.average( self.estimation[u, sorted_K]) if len(K) != 0 else np.array( [0]) r_hat_uIJ = r_hat_uI - r_hat_uJ r_hat_uJK = r_hat_uJ - r_hat_uK r_hat_uIK = r_hat_uI - r_hat_uK # get V_bar_I, V_bar_J, V_bar_K V_bar_I = np.average(self.V[:-1, sorted_I], axis=1) if len(I) != 0 else np.zeros( shape=(self.dim, )) V_bar_J = np.average(self.V[:-1, sorted_J], axis=1) if len(J) != 0 else np.zeros( shape=(self.dim, )) V_bar_K = np.average(self.V[:-1, sorted_K], axis=1) if len(K) != 0 else np.zeros( shape=(self.dim, )) # get b_I, b_J, b_K b_I = np.average( self.V[-1, sorted_I]) if len(I) != 0 else np.array([0]) b_J = np.average( self.V[-1, sorted_J]) if len(J) != 0 else np.array([0]) b_K = np.average( self.V[-1, sorted_K]) if len(K) != 0 else np.array([0]) # here we want to examine the condition of empty sets indicator_I = indicator(len(I) == 0) indicator_J = indicator(len(J) == 0) indicator_K = indicator(len(K) == 0) indicator_sum = indicator_I + indicator_J + indicator_K if 0 <= indicator_sum <= 1: # these are the cases when only one set are empty or no set is empty # when all three are not empty, or I is empty, or K is empty, it is # easy to rewrite the obj by multiplying the indicator # when J is empty, we have to rewrite the obj if indicator_J == 1: # when J is empty # NABLA U_u df_dUu = sigmoid(-r_hat_uIK) * (V_bar_I - V_bar_K) dR_dUu = 2 * self.lambda_u * U_u # update U_u = U_u + gamma * (df_dUu - dR_dUu) self.U[u, :-1] += self.gamma * (df_dUu - dR_dUu) # NABLA V_i df_dbi = (1 - indicator_I ) * sigmoid(-r_hat_uIK) / indicator_len(I) dR_dbi = ( 1 - indicator_I ) * 2 * self.lambda_b * b_I / indicator_len(I) df_dVi = df_dbi * U_u dR_dVi = 2 * self.lambda_v * V_bar_I / indicator_len(I) # update V_i = V_i + gamma * (df_dVi - dR_dVi) self.V[:-1, sorted_I] += self.gamma * ( df_dVi - dR_dVi)[:, None] # trick: transpose here # update b_i = b_i + gamma * (df_dbi - dR_dbi) self.V[-1, sorted_I] += self.gamma * (df_dbi - dR_dbi) # No change on J # NABLA V_k df_dbk = (1 - indicator_K ) * -sigmoid(-r_hat_uIK) / indicator_len(K) dR_dbk = ( 1 - indicator_K ) * 2 * self.lambda_b * b_K / indicator_len(K) df_dVk = df_dbk * U_u dR_dVk = 2 * self.lambda_v * V_bar_K / indicator_len(K) # update V_k = V_k + gamma * (df_dVk - dR_dVk) self.V[:-1, sorted_K] += self.gamma * ( df_dVk - dR_dVk)[:, None] # trick: transpose here # update b_k = b_k + gamma * (df_dbk - dR_dbk) self.V[-1, sorted_K] += self.gamma * (df_dbk - dR_dbk) else: # when J is not empty # NABLA U_u df_dUu = (1 - indicator_I) * sigmoid(- r_hat_uIJ / spec_alpha_u) / spec_alpha_u * ( V_bar_I - V_bar_J) + \ (1 - indicator_K) * sigmoid(- r_hat_uJK) * (V_bar_J - V_bar_K) dR_dUu = 2 * self.lambda_u * U_u # update U_u = U_u + gamma * (df_dUu - dR_dUu) self.U[u, :-1] += self.gamma * (df_dUu - dR_dUu) # NABLA V_i df_dbi = (1 - indicator_I) * sigmoid( -r_hat_uIJ / spec_alpha_u) / (indicator_len(I) * spec_alpha_u) dR_dbi = ( 1 - indicator_I ) * 2 * self.lambda_b * b_I / indicator_len(I) df_dVi = df_dbi * U_u dR_dVi = 2 * self.lambda_v * V_bar_I / indicator_len(I) # update V_i = V_i + gamma * (df_dVi - dR_dVi) self.V[:-1, sorted_I] += self.gamma * ( df_dVi - dR_dVi)[:, None] # trick: transpose here # update b_i = b_i + gamma * (df_dbi - dR_dbi) self.V[-1, sorted_I] += self.gamma * (df_dbi - dR_dbi) # NABLA V_j df_dbj = (1 - indicator_I) * ( -sigmoid(-r_hat_uIJ / spec_alpha_u) / spec_alpha_u + (1 - indicator_K) * sigmoid(-r_hat_uJK)) / indicator_len(J) dR_dbj = 2 * self.lambda_b * b_J / indicator_len(J) df_dVj = df_dbj * U_u dR_dVj = 2 * self.lambda_v * V_bar_J / indicator_len(J) # update V_j = V_j + gamma * (df_dVj - dR_dVj) self.V[:-1, sorted_J] += self.gamma * ( df_dVj - dR_dVj)[:, None] # trick: transpose here # update b_j = b_j + gamma * (df_dbj - dR_dbj) self.V[-1, sorted_J] += self.gamma * (df_dbj - dR_dbj) # NABLA V_k df_dbk = (1 - indicator_K ) * -sigmoid(-r_hat_uJK) / indicator_len(K) dR_dbk = ( 1 - indicator_K ) * 2 * self.lambda_b * b_K / indicator_len(K) df_dVk = df_dbk * U_u dR_dVk = 2 * self.lambda_v * V_bar_K / indicator_len(K) # update V_k = V_k + gamma * (df_dVk - dR_dVk) self.V[:-1, sorted_K] += self.gamma * ( df_dVk - dR_dVk)[:, None] # trick: transpose here # update b_k = b_k + gamma * (df_dbk - dR_dbk) self.V[-1, sorted_K] += self.gamma * (df_dbk - dR_dbk) else: # these are the cases when at least two sets are empty # at these cases, we ignore this user and continue the loop continue # calculate loss # f_Theta = np.log(sigmoid(r_hat_uIJ / spec_alpha_u)) + np.log(sigmoid(r_hat_uJK)) # regula = self.lambda_u * np.linalg.norm(U_u, ord=2) + self.lambda_v * ( # (np.linalg.norm(V_bar_I, ord=2) if len(I) != 0 else 0) + ( # np.linalg.norm(V_bar_J, ord=2) if len(J) != 0 else 0) + ( # np.linalg.norm(V_bar_K, ord=2)) if len(K) != 0 else 0) + self.lambda_b * ( # (b_I if len(I) != 0 else 0) ** 2 + (b_J if len(J) != 0 else 0) ** 2 + ( # b_K if len(K) != 0 else 0) ** 2) # bprh_loss = f_Theta - regula # update estimation old_estimation = self.estimation.copy() # self.estimation = np.dot(self.U, self.V) all_sampled_item = sorted(set.union(I, J, K)) # for sampled_item in all_sampled_item: # self.estimation[:, sampled_item] = np.dot(self.U, self.V[:, sampled_item]) self.estimation[:, all_sampled_item] = np.dot( self.U, self.V[:, all_sampled_item]) # estimation changed est_changed = np.linalg.norm(self.estimation - old_estimation) # we only save model to file when the num of iter % iter_to_save == 0 if (index + 1) % iter_to_save == 0: self.save(model_path=model_saved_path + "_" + str(index)) # we only calculate metric when the num of iter % iter_to_log == 0 if (index + 1) % iter_to_log == 0: if log_metric | plot_metric: # calculate metrics on test data user_to_eval = sorted(set(self.test_data.UserID)) scoring_list_5, precision_5, recall_5, avg_auc = self.scoring( user_to_eval=user_to_eval, ground_truth=self.test_data, K=5, train_data_as_reference_flag=True) scoring_list_10, precision_10, recall_10, _ = self.scoring( user_to_eval=user_to_eval, ground_truth=self.test_data, K=10, train_data_as_reference_flag=True) if log_metric: self.eval_hist.append([ index, precision_5, precision_10, recall_5, recall_10, avg_auc ]) if plot_metric: plot_losses.update({ 'Precision@5': precision_5, 'Precision@10': precision_10, 'Recall@5': recall_5, 'Recall@10': recall_10, 'AUC': avg_auc }) plot_losses.send() # Postfix will be displayed on the right, # formatted automatically based on argument's datatype t.set_postfix(est_changed=est_changed, len_I=len(I), len_J=len(J), len_K=len(K))
def train_cross_validation(model_cls, dataset, dropout=0.0, lr=1e-3, weight_decay=1e-2, num_epochs=200, n_splits=10, use_gpu=True, dp=False, ddp=False, comment='', tb_service_loc='192.168.192.57:6007', batch_size=1, num_workers=0, pin_memory=False, cuda_device=None, tb_dir='runs', model_save_dir='saved_models', res_save_dir='res', fold_no=None, saved_model_path=None, device_ids=None, patience=20, seed=None, fold_seed=None, save_model=False, is_reg=True, live_loss=True, domain_cls=True, final_cls=True): """ :type fold_seed: int :param live_loss: bool :param is_reg: bool :param save_model: bool :param seed: :param patience: for early stopping :param device_ids: for ddp :param saved_model_path: :param fold_no: int :param ddp_port: str :param ddp: DDP :param cuda_device: list of int :param pin_memory: bool, DataLoader args :param num_workers: int, DataLoader args :param model_cls: pytorch Module cls :param dataset: instance :param dropout: float :param lr: float :param weight_decay: :param num_epochs: :param n_splits: number of kFolds :param use_gpu: bool :param dp: bool :param comment: comment in the logs, to filter runs in tensorboard :param tb_service_loc: tensorboard service location :param batch_size: Dataset args not DataLoader :return: """ saved_args = locals() seed = int(time.time() % 1e4 * 1e5) if seed is None else seed saved_args['random_seed'] = seed torch.manual_seed(seed) np.random.seed(seed) if use_gpu: torch.cuda.manual_seed_all(seed) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False model_name = model_cls.__name__ if not cuda_device: if device_ids and dp: device = device_ids[0] else: device = torch.device( 'cuda' if torch.cuda.is_available() and use_gpu else 'cpu') else: device = cuda_device device_count = torch.cuda.device_count() if dp else 1 device_count = len(device_ids) if (device_ids is not None and dp) else device_count batch_size = batch_size * device_count # TensorBoard log_dir_base = get_model_log_dir(comment, model_name) if tb_service_loc is not None: print("TensorBoard available at http://{1}/#scalars®exInput={0}". format(log_dir_base, tb_service_loc)) else: print("Please set up TensorBoard") # model criterion = nn.NLLLoss() print("Training {0} {1} models for cross validation...".format( n_splits, model_name)) # 1 # folds, fold = KFold(n_splits=n_splits, shuffle=False, random_state=seed), 0 # 2 # folds = GroupKFold(n_splits=n_splits) # iter = folds.split(np.zeros(len(dataset)), groups=dataset.data.site_id) # 4 # folds = StratifiedKFold(n_splits=n_splits, random_state=fold_seed, shuffle=True if fold_seed else False) # iter = folds.split(np.zeros(len(dataset)), dataset.data.y.numpy(), groups=dataset.data.subject_id) # 5 fold = 0 iter = multi_site_cv_split(dataset.data.y, dataset.data.site_id, dataset.data.subject_id, n_splits, random_state=fold_seed, shuffle=True if fold_seed else False) for train_idx, val_idx in tqdm_notebook(iter, desc='CV', leave=False): fold += 1 liveloss = PlotLosses() if live_loss else None # for a specific fold if fold_no is not None: if fold != fold_no: continue writer = SummaryWriter(log_dir=osp.join('runs', log_dir_base + str(fold))) model_save_dir = osp.join('saved_models', log_dir_base + str(fold)) print("creating dataloader tor fold {}".format(fold)) train_dataset, val_dataset = norm_train_val(dataset, train_idx, val_idx) model = model_cls(writer) train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size, collate_fn=lambda data_list: data_list, num_workers=num_workers, pin_memory=pin_memory) val_dataloader = DataLoader(val_dataset, shuffle=False, batch_size=batch_size, collate_fn=lambda data_list: data_list, num_workers=num_workers, pin_memory=pin_memory) if fold == 1 or fold_no is not None: print(model) writer.add_text('model_summary', model.__repr__()) writer.add_text('training_args', str(saved_args)) optimizer = torch.optim.AdamW(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=weight_decay, amsgrad=False) # scheduler_reduce = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5) scheduler = GradualWarmupScheduler(optimizer, multiplier=10, total_epoch=5) # scheduler = scheduler_reduce # optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay) if dp and use_gpu: model = model.cuda() if device_ids is None else model.to( device_ids[0]) model = DataParallel(model, device_ids=device_ids) elif use_gpu: model = model.to(device) if saved_model_path is not None: model.load_state_dict(torch.load(saved_model_path)) best_map, patience_counter, best_score = 0.0, 0, np.inf for epoch in tqdm_notebook(range(1, num_epochs + 1), desc='Epoch', leave=False): logs = {} # scheduler.step(epoch=epoch, metrics=best_score) for phase in ['train', 'validation']: if phase == 'train': model.train() dataloader = train_dataloader else: model.eval() dataloader = val_dataloader # Logging running_total_loss = 0.0 running_corrects = 0 running_reg_loss = 0.0 running_nll_loss = 0.0 epoch_yhat_0, epoch_yhat_1 = torch.tensor([]), torch.tensor([]) epoch_label, epoch_predicted = torch.tensor([]), torch.tensor( []) logging_hist = True if phase == 'train' else False # once per epoch for data_list in tqdm_notebook(dataloader, desc=phase, leave=False): # TODO: check devices if dp: data_list = to_cuda(data_list, (device_ids[0] if device_ids is not None else 'cuda')) y_hat, domain_yhat, reg = model(data_list) y = torch.tensor([], dtype=dataset.data.y.dtype, device=device) domain_y = torch.tensor([], dtype=dataset.data.site_id.dtype, device=device) for data in data_list: y = torch.cat([y, data.y.view(-1).to(device)]) domain_y = torch.cat( [domain_y, data.site_id.view(-1).to(device)]) loss = criterion(y_hat, y) domain_loss = criterion(domain_yhat, domain_y) # domain_loss = -1e-7 * domain_loss # print(domain_loss.item()) if domain_cls: total_loss = domain_loss _, predicted = torch.max(domain_yhat, 1) label = domain_y if final_cls: total_loss = loss _, predicted = torch.max(y_hat, 1) label = y if domain_cls and final_cls: total_loss = (loss + domain_loss).sum() _, predicted = torch.max(y_hat, 1) label = y if is_reg: total_loss += reg.sum() if phase == 'train': # print(torch.autograd.grad(y_hat.sum(), model.saved_x, retain_graph=True)) optimizer.zero_grad() total_loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 2.0) optimizer.step() running_nll_loss += loss.item() running_total_loss += total_loss.item() running_reg_loss += reg.sum().item() running_corrects += (predicted == label).sum().item() epoch_yhat_0 = torch.cat( [epoch_yhat_0, y_hat[:, 0].detach().view(-1).cpu()]) epoch_yhat_1 = torch.cat( [epoch_yhat_1, y_hat[:, 1].detach().view(-1).cpu()]) epoch_label = torch.cat( [epoch_label, label.detach().float().view(-1).cpu()]) epoch_predicted = torch.cat([ epoch_predicted, predicted.detach().float().view(-1).cpu() ]) # precision = sklearn.metrics.precision_score(epoch_label, epoch_predicted, average='micro') # recall = sklearn.metrics.recall_score(epoch_label, epoch_predicted, average='micro') # f1_score = sklearn.metrics.f1_score(epoch_label, epoch_predicted, average='micro') accuracy = sklearn.metrics.accuracy_score( epoch_label, epoch_predicted) epoch_total_loss = running_total_loss / dataloader.__len__() epoch_nll_loss = running_nll_loss / dataloader.__len__() epoch_reg_loss = running_reg_loss / dataloader.__len__() # print('epoch {} {}_nll_loss: {}'.format(epoch, phase, epoch_nll_loss)) writer.add_scalars( 'nll_loss', {'{}_nll_loss'.format(phase): epoch_nll_loss}, epoch) writer.add_scalars('accuracy', {'{}_accuracy'.format(phase): accuracy}, epoch) # writer.add_scalars('{}_APRF'.format(phase), # { # 'accuracy': accuracy, # 'precision': precision, # 'recall': recall, # 'f1_score': f1_score # }, # epoch) if epoch_reg_loss != 0: writer.add_scalars( 'reg_loss'.format(phase), {'{}_reg_loss'.format(phase): epoch_reg_loss}, epoch) # print(epoch_reg_loss) # writer.add_histogram('hist/{}_yhat_0'.format(phase), # epoch_yhat_0, # epoch) # writer.add_histogram('hist/{}_yhat_1'.format(phase), # epoch_yhat_1, # epoch) # Save Model & Early Stopping if phase == 'validation': model_save_path = model_save_dir + '-{}-{}-{:.3f}-{:.3f}'.format( model_name, epoch, accuracy, epoch_nll_loss) # best score if accuracy > best_map: best_map = accuracy model_save_path = model_save_path + '-best' score = epoch_nll_loss if score < best_score: patience_counter = 0 best_score = score else: patience_counter += 1 # skip first 10 epoch # best_score = best_score if epoch > 10 else -np.inf if save_model: for th, pfix in zip( [0.8, 0.75, 0.7, 0.5, 0.0], ['-perfect', '-great', '-good', '-bad', '-miss']): if accuracy >= th: model_save_path += pfix break torch.save(model.state_dict(), model_save_path) writer.add_scalars('best_val_accuracy', {'{}_accuracy'.format(phase): best_map}, epoch) writer.add_scalars( 'best_nll_loss', {'{}_nll_loss'.format(phase): best_score}, epoch) writer.add_scalars('learning_rate', { 'learning_rate': scheduler.optimizer.param_groups[0]['lr'] }, epoch) if patience_counter >= patience: print("Stopped at epoch {}".format(epoch)) return if live_loss: prefix = '' if phase == 'validation': prefix = 'val_' logs[prefix + 'log loss'] = epoch_nll_loss logs[prefix + 'accuracy'] = accuracy if live_loss: liveloss.update(logs) liveloss.draw() print("Done !")
'net_state_dict': net.state_dict(), # 'acc': test_correct / test_total, # 'optimizer_state_dict': optimizer.state_dict() } if not os.path.isdir('./checkpoint/Sqnet_1x_v1.0'): os.makedirs('./checkpoint/Sqnet_1x_v1.0') torch.save(state, './checkpoint/Sqnet_1x_v1.0/Sqnet_1x_v1.0_Cifar10.ckpt') best_acc = test_correct / test_total # checkpoint = torch.load('./checkpoint/Sqnet_1x_v1.0/Sqnet_1x_v1.0_Cifar10.ckpt') # net.load_state_dict(checkpoint['net_state_dict']) # optimizer.load_state_dict(checkpoint['optimizer_state_dict']) liveloss = PlotLosses() for _epoch in range(start_epoch, start_epoch + num_epochs): start_time = time.time() train(_epoch) print() test(_epoch) print() print() end_time = time.time() print('Epoch #%d Cost %ds' % (_epoch, end_time - start_time)) best_cost = end_time - start_time if end_time - start_time < best_cost: best_cost = end_time - start_time liveloss.update({ 'log loss': train_loss,
def main(): global best_test_bpd last_checkpoints = [] lipschitz_constants = [] ords = [] # if args.resume: # validate(args.begin_epoch - 1, model, ema) #liveloss = PlotLosses() #liveloss = PlotLosses() liveloss = PlotLosses() for epoch in range(args.begin_epoch, args.nepochs): logs = {} logger.info('Current LR {}'.format(optimizer.param_groups[0]['lr'])) running_loss = train(epoch, model) #train(epoch, model) lipschitz_constants.append(get_lipschitz_constants(model)) #ords.append(get_ords(model)) #ords.append(get_ords(model)) ords.append(get_ords(model)) logger.info('Lipsh: {}'.format(pretty_repr(lipschitz_constants[-1]))) logger.info('Order: {}'.format(pretty_repr(ords[-1]))) #epoch_loss = running_loss / len(dataloaders[phase].dataset) epoch_loss = running_loss / len( datasets.CIFAR10( args.dataroot, train=True, transform=transform_train)) logs['log loss'] = epoch_loss.item() liveloss.update(logs) liveloss.draw() if args.ema_val: test_bpd = validate(epoch, model, ema) else: test_bpd = validate(epoch, model) if args.scheduler and scheduler is not None: scheduler.step() if test_bpd < best_test_bpd: best_test_bpd = test_bpd utils.save_checkpoint( { 'state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'args': args, 'ema': ema, 'test_bpd': test_bpd, }, os.path.join(args.save, 'moMoModels'), epoch, last_checkpoints, num_checkpoints=5) """ utils.save_checkpoint({ 'state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'args': args, 'ema': ema, 'test_bpd': test_bpd, }, os.path.join(args.save, 'mMoModels'), epoch, last_checkpoints, num_checkpoints=5) utils.save_checkpoint({ 'state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'args': args, 'ema': ema, 'test_bpd': test_bpd, }, os.path.join(args.save, 'mModels'), epoch, last_checkpoints, num_checkpoints=5) utils.save_checkpoint({ 'state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'args': args, 'ema': ema, 'test_bpd': test_bpd, }, os.path.join(args.save, 'models'), epoch, last_checkpoints, num_checkpoints=5) """ torch.save( { 'state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'args': args, 'ema': ema, 'test_bpd': test_bpd, }, os.path.join(args.save, 'models', '010mmoosttMoosttRecentt.pth')) """
def train_model(output_path, model, dataloaders, dataset_sizes, criterion, optimizer, num_epochs=5, scheduler=None): if not os.path.exists('models/'+str(output_path)): os.makedirs('models/'+str(output_path)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") since = time.time() liveloss = PlotLosses() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 best = 0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch+1, num_epochs)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': if scheduler != None: scheduler.step() model.train() # Set model to training mode else: pbar = dataloaders[phase] model.eval() # Set model to evaluate mode running_loss = 0.0 running_corrects = 0 # Iterate over data. pbar = tqdm(dataloaders[phase]) for i,(inputs, labels) in enumerate(pbar): inputs = inputs.to(device) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() * inputs.size(0) running_corrects += torch.sum(preds == labels.data) #print("\rIteration: {}/{}, Loss: {}.".format(i+1, len(dataloaders[phase]), loss.item() * inputs.size(0)), end="") # print( (i+1)*100. / len(dataloaders[phase]), "% Complete" ) pbar.set_description(desc= f'Loss={loss.item()} Batch_id={i} ') epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = running_corrects.double() / dataset_sizes[phase] if phase == 'train': avg_loss = epoch_loss t_acc = epoch_acc else: val_loss = epoch_loss val_acc = epoch_acc # print('{} Loss: {:.4f} Acc: {:.4f}'.format( # phase, epoch_loss, epoch_acc)) # deep copy the model if phase == 'val' and epoch_acc > best_acc: best_acc = epoch_acc best = epoch + 1 best_model_wts = copy.deepcopy(model.state_dict()) liveloss.update({ 'log loss': avg_loss, 'val_log loss': val_loss, 'accuracy': t_acc, 'val_accuracy': val_acc }) #liveloss.draw() print('Train Loss: {:.4f} Acc: {:.4f}'.format(avg_loss, t_acc)) print( 'Val Loss: {:.4f} Acc: {:.4f}'.format(val_loss, val_acc)) print() torch.save(model.state_dict(), './models/' + str(output_path) + '/model_{}_epoch.pt'.format(epoch+1)) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best Validation Accuracy: {}, Epoch: {}'.format(best_acc, best))
def on_start(self, state): from livelossplot import PlotLosses self.plt = PlotLosses(**self._kwargs) self.batch_plt = PlotLosses(**self._kwargs)
class LiveLossPlot(Callback): """ Callback to write metrics to `LiveLossPlot <https://github.com/stared/livelossplot>`_, a library for visualisation in notebooks Example: :: >>> import torch.nn >>> from torchbearer import Trial >>> from torchbearer.callbacks import LiveLossPlot # Example Trial which clips all model gradients norms at 2 under the L1 norm. >>> model = torch.nn.Linear(1,1) >>> live_loss_plot = LiveLossPlot() >>> trial = Trial(model, callbacks=[live_loss_plot], metrics=['acc']) Args: on_batch (bool): If True, batch metrics will be logged. Else batch metrics will not be logged batch_step_size (int): The number of batches between logging metrics on_epoch (bool): If True, epoch metrics will be logged every epoch. Else epoch metrics will not be logged draw_once (bool): If True, draw the plot only at the end of training. Else draw every time metrics are logged kwargs: Keyword arguments for livelossplot.PlotLosses State Requirements: - :attr:`torchbearer.state.METRICS`: Metrics should be a dict containing the metrics to be plotted - :attr:`torchbearer.state.BATCH`: Batch should be the current batch or iteration number in the epoch """ def __init__(self, on_batch=False, batch_step_size=10, on_epoch=True, draw_once=False, **kwargs): super(LiveLossPlot, self).__init__() self._kwargs = kwargs self.on_batch = on_batch self.on_epoch = on_epoch self.draw_once = draw_once self.batch_step_size = batch_step_size if on_batch: self.on_step_training = self._on_step_training if on_epoch: self.on_end_epoch = self._on_end_epoch def on_start(self, state): from livelossplot import PlotLosses self.plt = PlotLosses(**self._kwargs) self.batch_plt = PlotLosses(**self._kwargs) def _on_step_training(self, state): self.batch_plt.update({ k: get_metric('LiveLossPlot', state, k) for k in state[torchbearer.METRICS] }) if state[torchbearer. BATCH] % self.batch_step_size == 0 and not self.draw_once: with no_print(): self.batch_plt.draw() def _on_end_epoch(self, state): self.plt.update({ k: get_metric('LiveLossPlot', state, k) for k in state[torchbearer.METRICS] }) if not self.draw_once: with no_print(): self.plt.draw() def on_end(self, state): if self.draw_once: with no_print(): self.batch_plt.draw() self.plt.draw()
# In[5]: names = ['airplane', 'onion', 'apple', 'pineapple', 'ant', 'banana', 'ambulance', 'angel', 'cat', 'cow', 'broccoli', 'bus'] amount = 1000 device = torch.device("cpu") model = Net(len(names)) X_train, Y_train, X_test, Y_test = data_load(names, amount) train_loader = DataLoader(TensorDataset(X_train,Y_train), batch_size=32, shuffle=True) test_loader = DataLoader(TensorDataset(X_test, Y_test), batch_size=32, shuffle=False) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) liveloss = PlotLosses() # In[6]: def conv_train_model(epoch): #stosuje wcześniej zdefiniowane funkcje, argument: #epoch-ilość przejśc przez dane treningowe, #names-tablica nazw klas, #amount-ilość danych, for epoch in range(epoch): avg_loss, avg_accuracy = conv_train_step(model, device, train_loader, optimizer, epoch) avg_loss_val, avg_accuracy_val = conv_test_step(model, device, test_loader) liveloss.update({