def __compute_loss_2d(self, test_loader, params_grid): """ Calculates the loss of the model on 2D grid :param test_loader: test data set loader :param params_grid: parameter grid :return: 2D array of validation loss, position of minimum, value of minumum """ loss_2d = [] n = len(params_grid) m = len(params_grid[0]) loss_min = sys.float_info.max arg_min = () logger.info("Calculating loss values for PCA directions") for i in tqdm(range(n), desc="Optimizer path visualization"): loss_row = [] for j in range(m): logger.debug(f"Calculating loss for coordinates: {i}, {j}") w_ij = torch.Tensor(params_grid[i][j].float()).to(self.device) self.model.load_from_flat_params(w_ij) loss, acc = net.test(self.model, test_loader, self.device) logger.debug(f"Loss for {i}, {j} = {loss}") if loss < loss_min: loss_min = loss logger.debug(f"New min loss {loss_min}") arg_min = (i, j) loss_row.append(loss) loss_2d.append(loss_row) loss_2darray = np.array(loss_2d).T return loss_2darray, arg_min, loss_min
def pre_test_subset(model, device, subset_list): """ Function examines impact of test dataset size on stability of measurements :param model: NN model :param device: device to be used :param subset_list: list of subset sizes to be examined """ if paths.test_subs_loss.exists() and paths.test_subs_acc.exists(): return subset_losses = [] subset_accs = [] theta_f = copy.deepcopy(torch.load(paths.final_state)) model.load_state_dict(theta_f) for n_samples in subset_list: losses = [] accs = [] for x in range(10): _, test_loader = data_loader.data_load( test_samples=n_samples) # choose random data each time loss, acc = net.test(model, test_loader, device) losses.append(loss) accs.append(acc) logger.info(f"Subset size: {n_samples}\n" f"Validation loss: {loss}\n" f"Accuracy: {acc}\n") subset_losses.append(losses) subset_accs.append(accs) np.savetxt(paths.test_subs_loss, subset_losses) np.savetxt(paths.test_subs_acc, subset_accs)
def interpolate_all_linear(self, test_loader): """ Method interpolates all parameters of the model and after each interpolation step evaluates the performance of the model :param test_loader: test loader loader """ if not paths.loss_path.exists() or not paths.acc_path.exists(): v_loss_list = [] acc_list = [] layers = [name for name, _ in self.model.named_parameters()] self.model.load_state_dict(self.theta_f) for alpha_act in tqdm(self.alpha, desc="Model Level Linear", dynamic_ncols=True): for layer in layers: self.__calc_theta_vec(layer, alpha_act) self.model.load_state_dict(self.theta) loss, acc = net.test(self.model, test_loader, self.device) v_loss_list.append(loss) acc_list.append(acc) np.savetxt(paths.loss_path, v_loss_list) np.savetxt(paths.acc_path, acc_list) self.model.load_state_dict(self.theta_f)
def pre_train_subset(model, device, subset_list, epochs, test_loader): """ Function to examine impact of different sizes of training subset. :param model: NN model :param device: device to be used :param subset_list: list of subsets sizes to be examinated :param epochs: number of training epoch :param test_loader: test dataset loader """ logger.info("Subset preliminary experiment started") if paths.train_subs_loss.exists() and paths.train_subs_acc.exists(): return loss_list = [] acc_list = [] theta_i = copy.deepcopy(torch.load(paths.init_state)) theta_f = copy.deepcopy(torch.load(paths.final_state)) for n_samples in subset_list: model.load_state_dict(theta_i) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) # set optimizer scheduler = StepLR(optimizer, step_size=1, gamma=0.7) # set scheduler for epoch in range(1, epochs): train_loader, test_loader = data_loader.data_load( train_samples=n_samples) net.train(model, train_loader, optimizer, device, epoch) net.test(model, test_loader, device) scheduler.step() logger.debug( f"Finished epoch for tranining subset {epoch}, {n_samples}") loss, acc = net.test(model, test_loader, device) loss_list.append(loss) acc_list.append(acc) np.savetxt(paths.train_subs_loss, loss_list) np.savetxt(paths.train_subs_acc, acc_list) model.load_state_dict(theta_f)
def pre_epochs(model, device, epochs_list): """ Function examines performance of the model after certain number of epochs :param model: NN model :param device: device to be used :param epochs_list: list of epochs numbers after which will be the model evaluated """ logger.info("Epochs performance experiment started.") if paths.epochs_loss.exists() and paths.epochs_acc.exists(): return loss_list = [] acc_list = [] theta_i = copy.deepcopy(torch.load(paths.init_state)) model.load_state_dict(theta_i) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) # set optimizer scheduler = StepLR(optimizer, step_size=1, gamma=0.7) # set scheduler train_loader, test_loader = data_loader.data_load() for epoch in range(max(epochs_list) + 1): net.train(model, train_loader, optimizer, device, epoch) net.test(model, test_loader, device) scheduler.step() logger.debug(f"Finished epoch {epoch}") if epoch in epochs_list: loss, acc = net.test(model, test_loader, device) loss_list.append(loss) acc_list.append(acc) logger.info(f"Performance of the model for epoch {epoch}" f"Validation loss: {loss}" f"Accuracy: {acc}") np.savetxt(paths.epochs_loss, loss_list) np.savetxt(paths.epochs_acc, loss_list)
def interpolate_all_quadratic(self, test_loader): """ Method interpolates all parameters of the model using the quadratic interpolation and after each interpolation step evaluates the performance of the model. :param test_loader: test data set loader """ if not paths.q_loss_path.exists() or not paths.q_acc_path.exists(): v_loss_list = [] acc_list = [] layers = [name for name, _ in self.model.named_parameters()] start_a = 0 mid_a = 0.5 end_a = 1 logger.debug(f"Start: {start_a}\n" f"Mid: {mid_a}\n" f"End: {end_a}") self.model.load_state_dict(self.theta_f) mid_check = self.__get_mid_point(paths.checkpoints) for alpha_act in tqdm(self.alpha, desc="Model Level Quadratic", dynamic_ncols=True): for layer in layers: start_p = self.theta_i[layer].cpu() mid_p = copy.deepcopy( torch.load(os.path.join(paths.checkpoints, mid_check))[layer]).cpu() end_p = self.theta_f[layer].cpu() start = [start_a, start_p] mid = [mid_a, mid_p] end = [end_a, end_p] self.__calc_theta_vec_q(layer, alpha_act, start, mid, end) self.model.load_state_dict(self.theta) loss, acc = net.test(self.model, test_loader, self.device) v_loss_list.append(loss) acc_list.append(acc) np.savetxt(paths.q_loss_path, v_loss_list) np.savetxt(paths.q_acc_path, acc_list) plot.plot_lin_quad_real(self.alpha) self.model.load_state_dict(self.theta_f)
def calc_loss(model, test_loader, directions, device): """ Function iterates over surface file and calculates loss over the surface :param model: model to be evaluated :param test_loader: test dataset loader :param directions: random projection directions :param device: device """ logger.info("Calculating loss function surface") filename = Path(os.path.join(paths.random_dirs, "surf.h5")) logger.debug(f"Surface file: {filename.resolve()}") set_surf_file(filename) init_weights = [p.data for p in model.parameters()] with h5py.File(filename, "r+") as fd: xcoords = fd["xcoordinates"][:] ycoords = fd["ycoordinates"][:] losses = fd["loss"][:] ids, coords = get_indices(losses, xcoords, ycoords) for count, idx in enumerate( tqdm(ids, desc="Loss Landscape Visualization", dynamic_ncols=True)): coord = coords[count] logger.debug(f"Index: {idx}") overwrite_weights(model, init_weights, directions, coord, device) loss, _ = net.test(model, test_loader, device) logger.debug(f"Loss: {loss}") losses.ravel()[idx] = loss fd["loss"][:] = losses fd.flush()
def layers_quadratic(self, test_loader, layer): """ Method examines the parameters on the level of layers using the quadratic interpolation. :param test_loader: test data set loader :param layer: layer to be examined """ loss_res = Path("{}_{}_q".format(paths.vvloss_path, layer)) loss_img = Path("{}_{}_q".format(paths.vvloss_img_path, layer)) acc_res = Path("{}_{}_q".format(paths.vacc_path, layer)) acc_img = Path("{}_{}_q".format(paths.vacc_img_path, layer)) logger.debug(f"Result files:\n" f"{loss_res}\n" f"{acc_res}") logger.debug(f"Img files:\n" f"{loss_img}\n" f"{acc_img}") if not loss_res.exists() or not acc_res.exists(): logger.debug("Result files not found - beginning interpolation.") v_loss_list = [] acc_list = [] start_a = 0 mid_a = 0.5 end_a = 1 logger.debug(f"Start: {start_a}\n" f"Mid: {mid_a}\n" f"End: {end_a}") mid_check = self.__get_mid_point(paths.checkpoints) start_p = self.theta_i[layer + ".weight"].cpu() mid_p = copy.deepcopy( torch.load(os.path.join(paths.checkpoints, mid_check))[layer + ".weight"]).cpu() end_p = self.theta_f[layer + ".weight"].cpu() start_w = [start_a, start_p] mid_w = [mid_a, mid_p] end_w = [end_a, end_p] start_pb = self.theta_i[layer + ".bias"].cpu() mid_pb = copy.deepcopy( torch.load(os.path.join( paths.checkpoints, mid_check))[layer + ".bias"]).cpu() # TODO AUTO MID end_pb = self.theta_f[layer + ".bias"].cpu() start_b = [start_a, start_pb] mid_b = [mid_a, mid_pb] end_b = [end_a, end_pb] for alpha_act in tqdm(self.alpha, desc=f"Layer {layer} Level Quadratic Path", dynamic_ncols=True): self.__calc_theta_vec_q(layer + ".weight", alpha_act, start_w, mid_w, end_w) self.__calc_theta_vec_q(layer + ".bias", alpha_act, start_b, mid_b, end_b) self.model.load_state_dict(self.theta) logger.debug( f"Getting validation loss and accuracy for alpha = {alpha_act}" ) vloss, acc = net.test(self.model, test_loader, self.device) v_loss_list.append(vloss) acc_list.append(acc) logger.debug(f"Saving results to files ({loss_res}, {acc_res})") np.savetxt(loss_res, v_loss_list) np.savetxt(acc_res, acc_list) logger.debug(f"Saving results to figures {loss_img}, {acc_img} ...") plot.plot_metric(self.alpha, np.loadtxt(loss_res), loss_img, "loss") plot.plot_metric(self.alpha, np.loadtxt(acc_res), acc_img, "acc") self.model.load_state_dict(self.theta_f) return
def individual_param_quadratic(self, test_loader, layer, idxs): """ Method interpolates individual parameter of the model and evaluates the performance of the model when the interpolated parameter replaces its original in the parameters of the model :param test_loader: test dataset loader :param layer: layer of parameter :param idxs: position of parameter """ loss_res = Path("{}_{}_{}_q".format(paths.svloss_path, layer, convert_list2str(idxs))) loss_img = Path("{}_{}_{}_q".format(paths.svloss_img_path, layer, convert_list2str(idxs))) acc_res = Path("{}_{}_{}_q".format(paths.sacc_path, layer, convert_list2str(idxs))) acc_img = Path("{}_{}_{}_q".format(paths.sacc_img_path, layer, convert_list2str(idxs))) logger.debug(f"Result files:\n" f"{loss_res}\n" f"{acc_res}\n") logger.debug(f"Img files:\n" f"{loss_img}\n" f"{acc_img}\n") if not loss_res.exists() or not acc_res.exists(): logger.debug( "Files with results not found - beginning interpolation.") v_loss_list = [] acc_list = [] start_a = 0 mid_a = 0.5 end_a = 1 logger.debug(f"Start: {start_a}\n" f"Mid: {mid_a}\n" f"End: {end_a}") mid_check = self.__get_mid_point(paths.checkpoints) start_p = self.theta_i[layer + ".weight"][idxs].cpu() mid_p = copy.deepcopy( torch.load(Path( os.path.join(paths.checkpoints, mid_check))))[layer + ".weight"][idxs].cpu() end_p = self.theta_f[layer + ".weight"][idxs].cpu() logger.debug(f"Start loss: {start_p}\n" f"Mid loss: {mid_p}\n" f"End loss: {end_p}") start = [start_a, start_p] mid = [mid_a, mid_p] end = [end_a, end_p] logger.debug(f"Start: {start}\n" f"Mid: {mid}\n" f"End: {end}") self.model.load_state_dict(self.theta_f) for alpha_act in tqdm( self.alpha, desc=f"Parameter {layer}/{idxs} Level Quadratic", dynamic_ncols=True): self.__calc_theta_single_q(layer + ".weight", idxs, alpha_act, start, mid, end) self.model.load_state_dict(self.theta) logger.debug( f"Getting validation loss and accuracy for alpha = {alpha_act}" ) val_loss, acc = net.test(self.model, test_loader, self.device) acc_list.append(acc) v_loss_list.append(val_loss) logger.debug(f"Saving results to files ({loss_res}, {acc_res})") np.savetxt(loss_res, v_loss_list) np.savetxt(acc_res, acc_list) self.model.load_state_dict(self.theta_f) logger.debug(f"Saving results to figures {loss_img}, {acc_img} ...") plot.plot_metric(self.alpha, np.loadtxt(loss_res), loss_img, "loss") plot.plot_metric(self.alpha, np.loadtxt(acc_res), acc_img, "acc") self.model.load_state_dict(self.theta_f) return
def layers_linear(self, test_loader, layer): """ Method interpolates parameters of selected layer of the model and evaluates the model after each interpolation step :param test_loader: test loader :param layer: layer to be interpolated """ loss_res = Path("{}_{}".format(paths.vvloss_path, layer)) loss_img = Path("{}_{}".format(paths.vvloss_img_path, layer)) acc_res = Path("{}_{}".format(paths.vacc_path, layer)) acc_img = Path("{}_{}".format(paths.vacc_img_path, layer)) dist = Path("{}_{}_{}".format(paths.vvloss_path, layer, "distance")) logger.debug(f"Result files:\n" f"{loss_res}\n" f"{acc_res}") logger.debug(f"Img files:\n" f"{loss_img}\n" f"{acc_img}") logger.debug(f"Dist file:\n" f"{dist}") if not loss_res.exists() or not acc_res.exists(): logger.debug("Result files not found - beginning interpolation.") v_loss_list = [] acc_list = [] self.model.load_state_dict(self.theta_f) for alpha_act in tqdm(self.alpha, desc=f"Layer {layer} Level Linear", dynamic_ncols=True): self.__calc_theta_vec(layer + ".weight", alpha_act) self.__calc_theta_vec(layer + ".bias", alpha_act) self.model.load_state_dict(self.theta) logger.debug( f"Getting validation loss and accuracy for alpha = {alpha_act}" ) vloss, acc = net.test(self.model, test_loader, self.device) v_loss_list.append(vloss) acc_list.append(acc) logger.debug(f"Saving results to files ({loss_res}, {acc_res})") np.savetxt(loss_res, v_loss_list) np.savetxt(acc_res, acc_list) if not dist.exists(): logger.info(f"Calculating distance for: {layer}") distance = self.calc_distance(layer + ".weight") logger.info(f"Distance: {distance}") with open(dist, 'w') as fd: fd.write("{}".format(distance)) logger.debug(f"Saving results to figures {loss_img}, {acc_img} ...") plot.plot_metric(self.alpha, np.loadtxt(loss_res), loss_img, "loss") plot.plot_metric(self.alpha, np.loadtxt(acc_res), acc_img, "acc") self.model.load_state_dict(self.theta_f) return