def cb(bar, iter, loss_dict, task=1): '''Callback-function, to perform on every iteration to keep track of training progress.''' iteration = iter # progress-bar if progress_bar and bar is not None: task_stm = " Class: {} |".format(task) bar.set_description( ' <GAN> |{t_stm} d cost: {loss:.3} | g cost: {prec:.3} |' .format(t_stm=task_stm, loss=loss_dict['d_cost'], prec=loss_dict['g_cost'])) bar.update(1) if visdom is None: return if (iteration % log == 0) and (visdom is not None): plot_data = [loss_dict['d_cost'], loss_dict['g_cost']] names = ['Discriminator cost', 'Generator cost'] visual_visdom.visualize_scalars( scalars=plot_data, names=names, iteration=iteration, title="GENERATOR: loss class{t}".format(t=task), env=visdom["env"], ylabel="training loss")
def cb(bar, iteration, loss_dict, task=1): '''Callback-function, to call on every iteration to keep track of training progress.''' if task is None: task = 0 # progress-bar if progress_bar and bar is not None: task_stm = "" if (tasks is None) else " Task: {}/{} |".format( task, tasks) bar.set_description( ' <SOLVER> |{t_stm} training loss: {loss:.3} | accuracy: {prec:.3} |' .format(t_stm=task_stm, loss=loss_dict['loss_total'], prec=loss_dict['accuracy'])) bar.update(1) # log the loss of the solver (to visdom) if (iteration % log == 0) and (visdom is not None): plot_data = [loss_dict['loss_total']] i = (task - 1) * iters_per_task + iteration visual_visdom.visualize_scalars(scalars=plot_data, names=["Total loss"], iteration=i, title="Solver loss", env=visdom["env"], ylabel="training loss")
def cb(iter, task=1): '''Callback-function, to call on every iteration to keep track of training progress.''' if task is None: task = 0 iteration = (task - 1) * iters_per_task + iter if (iteration % log == 0) and (visdom is not None): loss_dict = model.test(task, test_datasets, verbose=False) while len(loss_dict["Accuracy"]) < len(test_datasets): loss_dict["Accuracy"].append(0) loss_dict["Task"] = range(len(test_datasets)) plot_data = loss_dict["Accuracy"] names = ["task" + str(s + 1) for s in loss_dict["Task"]] if visdom is None: return visdom["values"].append({"iter": iteration, "acc": plot_data}) visual_visdom.visualize_scalars(scalars=plot_data, names=names, iteration=iteration, title="Task accuracy" + vis_name, env=visdom["env"], ylabel="accuracy per task")
def cb(bar, iter, loss_dict, task=1): '''Callback-function, to perform on every iteration to keep track of training progress.''' iteration = iter if task == 1 else (task - 1) * iters_per_task + iter # progress-bar if progress_bar and bar is not None: task_stm = "" if (tasks is None) else " Task: {}/{} |".format( task, tasks) bar.set_description( ' <VAE> |{t_stm} training loss: {loss:.3} | training precision: {prec:.3} |' .format(t_stm=task_stm, loss=loss_dict['loss_total'], prec=loss_dict['precision'])) bar.update(1) # log the loss of the solver (to visdom) if (iteration % log == 0) and (visdom is not None): if tasks is None or tasks == 1: plot_data = [loss_dict['recon'], loss_dict['variat']] names = ['Recon', 'Variat'] if model.lamda_pl > 0: plot_data += [loss_dict['pred']] names += ['Prediction'] else: weight_new_task = 1. / task if replay else 1. plot_data = [ weight_new_task * loss_dict['recon'], weight_new_task * loss_dict['variat'] ] names = ['Recon', 'Variat'] if model.lamda_pl > 0: plot_data += [weight_new_task * loss_dict['pred']] names += ['Prediction'] if replay: plot_data += [(1 - weight_new_task) * loss_dict['recon_r'], (1 - weight_new_task) * loss_dict['variat_r'] ] names += ['Recon - r', 'Variat - r'] if model.lamda_pl > 0: if model.replay_targets == "hard": plot_data += [ (1 - weight_new_task) * loss_dict['pred_r'] ] names += ['pred - r'] elif model.replay_targets == "soft": plot_data += [ (1 - weight_new_task) * loss_dict['distil_r'] ] names += ['distill - r'] visual_visdom.visualize_scalars(scalars=plot_data, names=names, iteration=iteration, title="VAE: loss ({})".format( visdom["graph"]), env=visdom["env"], ylabel="training loss")
def precision(model, datasets, current_task, iteration, classes_per_task=None, scenario="class", precision_dict=None, test_size=None, visdom=None, verbose=False, summary_graph=True, with_exemplars=False, no_task_mask=False): '''Evaluate precision of a classifier (=[model]) on all tasks so far (= up to [current_task]) using [datasets]. [precision_dict] None or <dict> of all measures to keep track of, to which results will be appended to [classes_per_task] <int> number of active classes er task [scenario] <str> how to decide which classes to include during evaluating precision [visdom] None or <dict> with name of "graph" and "env" (if None, no visdom-plots are made)''' # Evaluate accuracy of model predictions for all tasks so far (reporting "0" for future tasks) n_tasks = len(datasets) precs = [] for i in range(n_tasks): if i+1 <= current_task: if scenario=='domain': allowed_classes = None elif scenario=='task': allowed_classes = list(range(classes_per_task*i, classes_per_task*(i+1))) elif scenario=='class': allowed_classes = list(range(classes_per_task*current_task)) precs.append(validate(model, datasets[i], test_size=test_size, verbose=verbose, allowed_classes=allowed_classes, with_exemplars=with_exemplars, no_task_mask=no_task_mask, task=i+1)) else: precs.append(0) average_precs = sum([precs[task_id] for task_id in range(current_task)]) / current_task # Print results on screen if verbose: print(' => ave precision: {:.3f}'.format(average_precs)) # Send results to visdom server names = ['task {}'.format(i + 1) for i in range(n_tasks)] if visdom is not None: visual_visdom.visualize_scalars( precs, names=names, title="precision ({})".format(visdom["graph"]), iteration=iteration, env=visdom["env"], ylabel="test precision" ) if n_tasks>1 and summary_graph: visual_visdom.visualize_scalars( [average_precs], names=["ave"], title="ave precision ({})".format(visdom["graph"]), iteration=iteration, env=visdom["env"], ylabel="test precision" ) # Append results to [progress]-dictionary and return if precision_dict is not None: for task_id, _ in enumerate(names): precision_dict["all_tasks"][task_id].append(precs[task_id]) precision_dict["average"].append(average_precs) precision_dict["x_iteration"].append(iteration) precision_dict["x_task"].append(current_task) return precision_dict
def train(model, train_datasets, test_datasets, epochs_per_task=10, batch_size=64, test_size=1024, consolidate=True, fisher_estimation_sample_size=1024, lr=1e-3, weight_decay=1e-5, lamda=3, loss_log_interval=30, eval_log_interval=50, cuda=False, plot="pdf", pdf_file_name=None, epsilon=1e-3, c=1, intelligent_synapses=False): # number of tasks n_tasks = len(train_datasets) # prepare the loss criterion and the optimizer. criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) # register starting param-values (needed for "intelligent synapses"). if intelligent_synapses: for n, p in model.named_parameters(): n = n.replace('.', '__') model.register_buffer('{}_prev_task'.format(n), p.data.clone()) # if plotting, prepare task names and plot-titles if not plot=="none": names = ['task {}'.format(i + 1) for i in range(n_tasks)] title_precision = 'precision (consolidated)' if consolidate else 'precision' title_loss = 'loss (consolidated)' if consolidate else 'loss' # if plotting in pdf, initiate lists for storing data if plot=="pdf": all_task_lists = [[] for _ in range(n_tasks)] x_list = [] average_list = [] all_loss_lists = [[] for _ in range(3)] x_loss_list = [] # training, ..looping over all tasks for task, train_dataset in enumerate(train_datasets, 1): # if requested, prepare dictionaries to store running importance # estimates and parameter-values before update if intelligent_synapses: W = {} p_old = {} for n, p in model.named_parameters(): n = n.replace('.', '__') W[n] = p.data.clone().zero_() p_old[n] = p.data.clone() # ..looping over all epochs for epoch in range(1, epochs_per_task+1): # prepare data-loader, and wrap in "tqdm"-object. data_loader = utils.get_data_loader( train_dataset, batch_size=batch_size, cuda=cuda ) data_stream = tqdm(enumerate(data_loader, 1)) # ..looping over all batches for batch_index, (x, y) in data_stream: # where are we? data_size = len(x) dataset_size = len(data_loader.dataset) dataset_batches = len(data_loader) previous_task_iteration = sum([ epochs_per_task * len(d) // batch_size for d in train_datasets[:task-1] ]) current_task_iteration = (epoch-1)*dataset_batches + batch_index iteration = previous_task_iteration + current_task_iteration # prepare the data. x = x.view(data_size, -1) x = Variable(x).cuda() if cuda else Variable(x) y = Variable(y).cuda() if cuda else Variable(y) # run model, backpropagate errors, update parameters. model.train() optimizer.zero_grad() scores = model(x) ce_loss = criterion(scores, y) ewc_loss = model.ewc_loss(lamda, cuda=cuda) surrogate_loss = model.surrogate_loss(c, cuda=cuda) loss = ce_loss + ewc_loss + surrogate_loss loss.backward() optimizer.step() # if requested, update importance estimates if intelligent_synapses: for n, p in model.named_parameters(): n = n.replace('.', '__') W[n].add_(-p.grad.data*(p.data-p_old[n])) p_old[n] = p.data.clone() # calculate the training precision. _, predicted = scores.max(1) precision = (predicted == y).sum().data[0] / len(x) # print progress to the screen using "tqdm" data_stream.set_description(( 'task: {task}/{tasks} | ' 'epoch: {epoch}/{epochs} | ' 'progress: [{trained}/{total}] ({progress:.0f}%) | ' 'prec: {prec:.4} | ' 'loss => ' 'ce: {ce_loss:.4} / ' 'ewc: {ewc_loss:.4} / ' 'total: {loss:.4}' ).format( task=task, tasks=n_tasks, epoch=epoch, epochs=epochs_per_task, trained=batch_index*batch_size, total=dataset_size, progress=(100.*batch_index/dataset_batches), prec=precision, ce_loss=ce_loss.data[0], ewc_loss=ewc_loss.data[0], loss=loss.data[0], )) # Send test precision to the visdom server, # or store for later plotting to pdf. if not plot=="none": if iteration % eval_log_interval == 0: precs = [ utils.validate( model, test_datasets[i], test_size=test_size, cuda=cuda, verbose=False, ) if i+1 <= task else 0 for i in range(n_tasks) ] if plot=="visdom": visual_visdom.visualize_scalars( precs, names, title_precision, iteration, env=model.name, ) visual_visdom.visualize_scalars( [sum([precs[task_id] for task_id in range(task)]) / task], ["average precision"], title_precision+" (ave)", iteration, env=model.name, ) elif plot=="pdf": for task_id, _ in enumerate(names): all_task_lists[task_id].append(precs[task_id]) average_list.append(sum([precs[task_id] for task_id in range(task)])/task) x_list.append(iteration) # Send losses to the visdom server, # or store for later plotting to pdf. if not plot=="none": if iteration % loss_log_interval == 0: if plot=="visdom": visual_visdom.visualize_scalars( [loss.data, ce_loss.data, ewc_loss.data, surrogate_loss.data], ['total', 'cross entropy', 'ewc', 'surrogate loss'], title_loss, iteration, env=model.name ) elif plot=="pdf": all_loss_lists[0].append(loss.data.cpu().numpy()[0]) all_loss_lists[1].append(ce_loss.data.cpu().numpy()[0]) all_loss_lists[2].append(ewc_loss.data.cpu().numpy()[0]) all_loss_lists[3].append(surrogate_loss.data.cpu().numpy()[0]) x_loss_list.append(iteration) if consolidate: # take [fisher_estimation_sample_size] random samples from the last task learned sample_ids = random.sample(range(len(train_dataset)), fisher_estimation_sample_size) selected_samples = [train_dataset[id] for id in sample_ids] # estimate the Fisher Information matrix and consolidate it in the network model.estimate_fisher(selected_samples) if intelligent_synapses: # update & consolidate normalized path integral in the network model.update_omega(W, epsilon) # if requested, generate pdf. if plot=="pdf": # create list to store all figures to be plotted. figure_list = [] # Fig1: precision figure = visual_plt.plot_lines( all_task_lists, x_axes=x_list, line_names=names ) figure_list.append(figure) # Fig2: loss figure = visual_plt.plot_lines( all_loss_lists, x_axes=x_loss_list, line_names=['total', 'cross entropy', 'ewc', 'surrogate loss'] ) figure_list.append(figure) # create pdf containing all figures. pdf = PdfPages(pdf_file_name) for figure in figure_list: pdf.savefig(figure) pdf.close()
def cb(bar, iter, loss_dict, task=1): '''Callback-function, to perform on every iteration to keep track of training progress.''' iteration = iter if task == 1 else (task - 1) * iters_per_task + iter ##--------------------------------PROGRESS BAR---------------------------------## task_stm = "" if (tasks is None) else " Task: {}/{} |".format( task, tasks) bar.set_description( ' <VAE> |{t_stm} training loss: {loss:.3} | training precision: {prec:.3} |' .format(t_stm=task_stm, loss=loss_dict['loss_total'], prec=loss_dict['precision'])) bar.update() ##-----------------------------------------------------------------------------## # plot training loss every [log] if (iteration % log == 0) and (visdom is not None): ##--------------------------------PROGRESS PLOTS--------------------------------## plot_data = [loss_dict['recon'], loss_dict['variat']] names = ['Recon', 'Variat'] if model.lamda_pl > 0: plot_data += [loss_dict['pred']] names += ['Prediction'] if tasks is not None and replay: if tasks > 1: plot_data += [loss_dict['recon_r'], loss_dict['variat_r']] names += ['Recon - r', 'Variat - r'] if model.lamda_pl > 0: plot_data += [ loss_dict['pred_r'], loss_dict['distil_r'] ] names += ['Pred - r', 'Distill - r'] visual_visdom.visualize_scalars( plot_data, names, title="VAE: all losses ({})".format(visdom["graph"]), iteration=iteration, env=visdom["env"], ylabel="training loss") plot_data = list() names = list() weight_new_task = 1. / task if replay else 1. if model.lamda_rcl > 0: plot_data += [ weight_new_task * model.lamda_rcl * loss_dict['recon'] ] names += ['Recon (x{})'.format(model.lamda_rcl)] if model.lamda_vl > 0: plot_data += [ weight_new_task * model.lamda_vl * loss_dict['variat'] ] names += ['Variat (x{})'.format(model.lamda_vl)] if model.lamda_pl > 0: plot_data += [ weight_new_task * model.lamda_pl * loss_dict['pred'] ] names += ['Prediction (x{})'.format(model.lamda_pl)] if tasks is not None and replay: if tasks > 1: if model.lamda_rcl > 0: plot_data += [(1 - weight_new_task) * model.lamda_rcl * loss_dict['recon_r']] names += ['Recon - r (x{})'.format(model.lamda_rcl)] if model.lamda_vl > 0: plot_data += [(1 - weight_new_task) * model.lamda_vl * loss_dict['variat_r']] names += ['Variat - r (x{})'.format(model.lamda_vl)] if model.lamda_pl > 0: if model.replay_targets == "hard": plot_data += [(1 - weight_new_task) * model.lamda_pl * loss_dict['pred_r']] names += [ 'Prediction - r (x{})'.format(model.lamda_pl) ] elif model.replay_targets == "soft": plot_data += [ (1 - weight_new_task) * model.lamda_pl * loss_dict['distil_r'] ] names += [ 'Distill - r (x{})'.format(model.lamda_pl) ] visual_visdom.visualize_scalars( plot_data, names, title="VAE: weighted loss ({})".format(visdom["graph"]), iteration=iteration, env=visdom["env"], ylabel="training loss")
def cb(bar, iter, loss_dict, task=1): '''Callback-function, to call on every iteration to keep track of training progress.''' iteration = iter if task == 1 else (task - 1) * iters_per_task + iter ##--------------------------------PROGRESS BAR---------------------------------## task_stm = "" if (tasks is None) else " Task: {}/{} |".format( task, tasks) bar.set_description( ' <SOLVER> |{t_stm} training loss: {loss:.3} | training precision: {prec:.3} |' .format(t_stm=task_stm, loss=loss_dict['loss_total'], prec=loss_dict['precision'])) bar.update() ##-----------------------------------------------------------------------------## # log the loss of the solver (to visdom) if (iteration % log == 0) and (visdom is not None): plot_data = [loss_dict['pred']] names = ['prediction'] if tasks is not None: if tasks > 1: plot_data += [loss_dict['ewc'], loss_dict['si_loss']] names += ['EWC', 'SI'] if tasks is not None and replay: if tasks > 1: plot_data += [loss_dict['pred_r'], loss_dict['distil_r']] names += ['pred - r', 'KD - r'] visual_visdom.visualize_scalars(plot_data, names, "solver: all losses ({})".format( visdom["graph"]), iteration, env=visdom["env"], ylabel='training loss') if tasks is not None: if tasks > 1: weight_new_task = 1. / task if replay else 1. plot_data = [weight_new_task * loss_dict['pred']] names = ['pred'] if replay: if model.replay_targets == "hard": plot_data += [ (1 - weight_new_task) * loss_dict['pred_r'] ] names += ['pred - r'] elif model.replay_targets == "soft": plot_data += [ (1 - weight_new_task) * loss_dict['distil_r'] ] names += ['KD - r'] if model.ewc_lambda > 0: plot_data += [model.ewc_lambda * loss_dict['ewc']] names += ['EWC (lambda={})'.format(model.ewc_lambda)] if model.si_c > 0: plot_data += [model.si_c * loss_dict['si_loss']] names += ['SI (c={})'.format(model.si_c)] visual_visdom.visualize_scalars( plot_data, names, "solver: weighted loss ({})".format(visdom["graph"]), iteration, env=visdom["env"], ylabel='training loss')