Exemplo n.º 1
0
def test_model_without_set_get_weights(
        model: nn.Module,
        testset: Dataset,
        metric: TrainingMetrics,
        config: TrainingConfig,
        rept: int = 1) -> Tuple[np.ndarray, np.ndarray]:
    """
        Test if the model is coequal before and after using model.teleport
    """
    loss_diff_avg = []
    acc_diff_avg = []
    for _ in range(rept):
        m = NeuralTeleportationModel(model,
                                     input_shape=(config.batch_size, 3, 32,
                                                  32)).to(device)

        res = test(m, testset, metric, config)
        loss1, acc1 = res['loss'], res['accuracy']

        m.random_teleport()

        res = test(m, testset, metric, config)
        loss2, acc2 = res['loss'], res['accuracy']

        loss_diff_avg.append(np.abs(loss1 - loss2))
        acc_diff_avg.append(np.abs(acc1 - acc2))

        print("==========================================")
        print("Loss and accuracy diff without set/get was")
        print("Loss diff was: {:.6e}".format(np.abs(loss1 - loss2)))
        print("Acc diff was: {:.6e}".format(np.abs(acc1 - acc2)))
        print("==========================================")

    return np.mean(loss_diff_avg), np.mean(acc_diff_avg)
Exemplo n.º 2
0
def test_model_with_set_get_weights(
        model: nn.Module,
        testset: Dataset,
        metric: TrainingMetrics,
        config: TrainingConfig,
        rept: int = 1) -> Tuple[np.ndarray, np.ndarray]:
    loss_diff_avg = []
    acc_diff_avg = []
    for _ in range(rept):
        m = NeuralTeleportationModel(model,
                                     input_shape=(config.batch_size, 3, 32,
                                                  32)).to(device)
        w_o, cob_o = m.get_params()
        m.random_teleport()
        w_t, cob_t = m.get_params()

        m.set_params(weights=w_o, cob=cob_o)
        res = test(m, testset, metric, config)
        loss1, acc1 = res['loss'], res['accuracy']

        m.set_params(weights=w_t, cob=cob_t)
        res = test(m, testset, metric, config)
        loss2, acc2 = res['loss'], res['accuracy']

        loss_diff_avg.append(np.abs(loss1 - loss2))
        acc_diff_avg.append(np.abs(acc1 - acc2))

        print("==========================================")
        print("Loss and accuracy diff with set/get was")
        print("Loss diff was: {:.6e}".format(np.abs(loss1 - loss2)))
        print("Acc diff was: {:.6e}".format(np.abs(acc1 - acc2)))
        print("==========================================")

    return np.mean(loss_diff_avg), np.mean(acc_diff_avg)
Exemplo n.º 3
0
def generate_1D_linear_interp(
        model: NeuralTeleportationModel,
        param_o: Tuple[torch.Tensor, torch.Tensor],
        param_t: Tuple[torch.Tensor, torch.Tensor],
        a: torch.Tensor,
        trainset: Dataset,
        valset: Dataset,
        metric: TrainingMetrics,
        config: TrainingConfig,
        checkpoint: dict = None) -> Tuple[list, list, list, list]:
    """
        This is 1-Dimensional Linear Interpolation
        θ(α) = (1−α)θ + αθ′
    """
    loss = []
    loss_v = []
    acc_t = []
    acc_v = []
    w_o, cob_o = param_o
    w_t, cob_t = param_t
    start_at = checkpoint["step"] if checkpoint else 0
    try:
        for step, coord in enumerate(a, start_at):
            # Interpolate the weight from W to T(W),
            # then interpolate the cob for the activation
            # and batchNorm layers only.
            print("step {} of {} - alpha={}".format(step + 1, len(a), coord))
            w = (1 - coord) * w_o + coord * w_t
            cob = (1 - coord) * cob_o + coord * cob_t
            model.set_params(w, cob)
            res = test(model, trainset, metric, config)
            loss.append(res['loss'])
            acc_t.append(res['accuracy'])
            res = test(model, valset, metric, config)
            acc_v.append(res['accuracy'])
            loss_v.append(res['loss'])
    except:
        if not checkpoint:
            checkpoint = {
                'step': step,
                'alpha': a,
                'original_model': param_o,
                'teleported_model': param_t,
                'losses': loss,
                'acc_t': acc_t,
                'acc_v': acc_v,
            }
        else:
            checkpoint['step'] = step
            checkpoint['losses'] = checkpoint['losses'].append(loss)
            checkpoint['acc_t'] = checkpoint['acc_t'].append(acc_t)
            checkpoint['acc_v'] = checkpoint['acc_v'].append(loss)
        torch.save(checkpoint, linterp_checkpoint_file)
        print("A checkpoint was made on step {} of {}".format(step, len(a)))
        # This is to notify the upper level of try/except
        # Since there is no way to know if this is from before teleportation or after teleportation.
        raise

    return loss, acc_t, loss_v, acc_v
Exemplo n.º 4
0
def generate_contour_loss_values(
        model: NeuralTeleportationModel,
        directions: Tuple[torch.Tensor, torch.Tensor],
        weights: torch.Tensor,
        surface: torch.Tensor,
        trainset: Dataset,
        metric: TrainingMetrics,
        config: TrainingConfig,
        checkpoint: dict = None) -> Tuple[np.ndarray, np.ndarray]:
    """
        Generate a tensor containing the loss values from a given model.
    """
    loss = []
    acc = []
    delta, eta = directions
    start_at = 0
    if checkpoint:
        start_at = checkpoint['step']
    try:
        for step, (x, y) in enumerate(surface, start_at):
            print("Evaluating step {}: [{:.3f}, {:.3f}]".format(step, x, y))
            x, y = x.to(config.device), y.to(config.device)

            # L (w + alpha*delta + beta*eta)
            changes = (delta * x + eta * y).to(config.device)
            w = torch.add(weights, changes)
            model.set_weights(w)
            results = test(model, trainset, metric, config)

            loss.append(results['loss'])
            acc.append(results['accuracy'])
    except:
        # The reason is that, no matter what, make a checkpoint of the current surface generation.
        if not checkpoint:
            checkpoint = {'step': step, 'surface': surface, 'loss': loss}
        else:
            checkpoint['step'] = step
            [checkpoint['loss'].append(l) for l in loss]
        torch.save(checkpoint, contour_checkpoint_file)
        print("A checkpoint was made at coord {} of {}".format(x, y))

        # This is to notify the upper level of try/except
        # Since there is no way to know if this is from before teleportation or after teleportation.
        raise

    return np.array(loss), np.array(acc)
def train(model: Union[NeuralTeleportationModel, Tuple[str, NeuralTeleportationModel]], train_dataset: Dataset,
          metrics: TrainingMetrics, config: BreadthTeleportationTrainingConfig, val_dataset: Dataset = None,
          optimizer: Optimizer = None) -> Dict[str, NeuralTeleportationModel]:
    # If the model is not named (at the first iteration), initialize its name based on its class
    if type(model) is tuple:
        model_name, model = model
    else:
        model_name = model.__class__.__name__

    # Initialize an optimizer if there isn't already one
    if optimizer is None:
        optimizer = get_optimizer_from_model_and_config(model, config)

    train_loader = DataLoader(train_dataset, batch_size=config.batch_size)

    # Always move model to GPU before training
    model.cuda()

    stopping_epoch = min(config.starting_epoch + config.every_n_epochs, config.epochs + 1)
    for epoch in range(config.starting_epoch, stopping_epoch):
        print(f'Training epoch {epoch} for {model_name} ...')
        train_epoch(model, metrics, optimizer, train_loader, epoch, device=config.device)
        if val_dataset:
            val_res = test(model, val_dataset, metrics, config)
            print("Validation: {}".format(val_res))

    # Always move model off-GPU after training
    model.cpu()

    # Update new starting epoch for the next iteration of model training
    config.starting_epoch += config.every_n_epochs

    # Determine if training has reached its end
    # TODO Add test for convergence
    is_train_end = config.starting_epoch >= config.epochs + 1

    if is_train_end:
        trained_models = {f'{model_name}_0': model}
    else:
        # Teleport the model and train each teleportation recursively
        trained_models = _teleport_and_train((model_name, model), train_dataset, metrics, config, optimizer,
                                             val_dataset=val_dataset)

    return trained_models
Exemplo n.º 6
0
def run_model(model: nn.Module,
              config: TrainingConfig,
              metrics: TrainingMetrics,
              train_set: VisionDataset,
              test_set: VisionDataset,
              val_set: VisionDataset = None,
              optimizer: Optimizer = None,
              lr_scheduler=None) -> None:
    if isinstance(model, NeuralTeleportationModel):
        model_cls = model.network.__class__
    else:
        model_cls = model.__class__
    print(f"Training {model_cls.__name__}")

    # Always log parameters (to enable useful filtering options in the web interface)
    assert config.logger is not None
    hparams = config_to_dict(config)
    hparams.update({
        "model_name": model_cls.__name__.lower(),
        "dataset_name": train_set.__class__.__name__.lower()
    })
    config.logger.log_parameters(hparams)
    with config.logger.train():
        trained_model = train(model,
                              train_set,
                              metrics,
                              config,
                              val_dataset=val_set,
                              optimizer=optimizer,
                              lr_scheduler=lr_scheduler)

    # Ensure the model is on the correct device before testing
    # This avoids problem in case models are shuffled between CPU and GPU during training
    trained_model.to(config.device)

    with config.logger.test():
        print("Testing {}: {} \n".format(
            model.__class__.__name__,
            test(trained_model, test_set, metrics, config)))
        print()

    config.logger.flush()
Exemplo n.º 7
0
def run_multi_output_training(train_fct: Callable,
                              models: Sequence[nn.Module],
                              config: TrainingConfig,
                              metrics: TrainingMetrics,
                              train_set: VisionDataset,
                              test_set: VisionDataset,
                              val_set: VisionDataset = None) -> None:
    for model in models:
        print(f"Training {model.__class__.__name__}")
        trained_models = train_fct(model,
                                   train_dataset=train_set,
                                   metrics=metrics,
                                   config=deepcopy(config),
                                   val_dataset=val_set)
        for id, trained_model in trained_models.items():
            # Ensure the model is on the correct device before testing
            # This avoids problem in case models are shuffled between CPU and GPU during training
            trained_model.to(config.device)

            print("Testing {}: {} \n".format(
                id, test(trained_model, test_set, metrics, config)))
        print()
def start_training(model: NeuralTeleportationModel,
                   trainloader: DataLoader,
                   valset: VisionDataset,
                   metric: TrainingMetrics,
                   config: CompareTrainingConfig,
                   teleport_chance: float) -> np.ndarray:
    """
        This function starts a model training with a specific Scenario configuration.

        Scenario 1: train the model without using teleportation (teleportation_chance = 0.0)
        Scenario 2: train the model using a probability of teleporting every Xth epochs
        (0 < teleportation_chance < 1.0)
        Scenario 3: train the model using teleportation every Xth epochs (teleportation_chance = 1.0)

        returns:
            np.array containing the validation accuracy results of every epochs.
    """
    model.to(config.device)
    optimizer = get_optimizer_from_model_and_config(model, config)

    results = []
    for e in np.arange(1, args.epochs + 1):
        train_epoch(model=model, metrics=metric, optimizer=optimizer, train_loader=trainloader, epoch=e,
                    device=config.device)
        results.append(test(model=model, dataset=valset, metrics=metric, config=config)['accuracy'])
        model.train()

        if e % config.every_n_epochs == 0 and random.random() <= teleport_chance:
            print("teleported model")
            if config.targeted_teleportation:
                # TODO: use teleportation function here when they are available.
                raise NotImplementedError
            else:
                model.random_teleport(cob_range=config.cob_range, sampling_type=config.cob_sampling)
                optimizer = get_optimizer_from_model_and_config(model, config)

    model.cpu()  # Force the network to go out of the cuda mem.

    return np.array(results)
Exemplo n.º 9
0
        net2 = MLPCOB(input_shape=(1, 28, 28),
                      num_classes=10,
                      hidden_layers=hidden_layers).to(device)

    model1 = NeuralTeleportationModel(network=net1,
                                      input_shape=sample_input_shape)
    if args.weights1 is not None:
        model1.load_state_dict(torch.load(args.weights1))
    config.batch_size = 8  # Change batch size to train to different minima
    train(model1,
          train_dataset=mnist_train,
          metrics=metrics,
          config=config,
          val_dataset=mnist_test)
    torch.save(model1.state_dict(), pjoin(save_path, 'model1.pt'))
    print("Model 1 test results: ", test(model1, mnist_test, metrics, config))

    model2 = NeuralTeleportationModel(network=net2,
                                      input_shape=sample_input_shape)
    if args.weights2 is not None:
        model2.load_state_dict(torch.load(args.weights2))
    config.batch_size = 512  # Change batch size to train to different minima
    train(model2,
          train_dataset=mnist_train,
          metrics=metrics,
          config=config,
          val_dataset=mnist_test)
    torch.save(model2.state_dict(), pjoin(save_path, 'model2.pt'))
    print("Model 2 test results: ", test(model2, mnist_test, metrics, config))

    # Compare the output of the two models for a given input.
Exemplo n.º 10
0
                            device=device,
                            batch_size=args.batch_size)

    net = get_model(args.dataset, args.model, device=device)
    if args.load_model_path:
        load_dict = torch.load(args.load_model_path)
        if not net.state_dict().keys() == load_dict.keys():
            raise Exception(
                "Model that was loaded does not match the model type used in the experiment."
            )
        net.load_state_dict(load_dict)

    else:
        if args.train:
            train(net, train_dataset=trainset, metrics=metric, config=config)
            test(net, dataset=trainset, metrics=metric, config=config)
        if args.save_model:
            torch.save(net.state_dict(), get_nonexistent_path(args.save_path))

    checkpoint = None
    if checkpoint_exist:
        print(
            "A checkpoint exists and is requested to use, overriding all Experiment configuration!"
        )
        checkpoint = torch.load(checkpoint_file)
        step = checkpoint['step']
        surface = checkpoint['surface'][step:]
        section = checkpoint['section']

    plot_before = args.plot_before if not checkpoint else section == "before"
    if plot_before:
                                   batch_size=args.batch_size,
                                   device=device,
                                   cob_range=args.cob_range,
                                   cob_sampling=args.cob_sampling,
                                   targeted_teleportation=args.targeted_teleportation,
                                   every_n_epochs=args.teleport_every
                                   )

    res_vanilla = np.empty((args.run, args.epochs + 1))
    res_5050 = np.empty((args.run, args.epochs + 1))
    res_teleport = np.empty((args.run, args.epochs + 1))

    results = [res_vanilla, res_5050, res_teleport]

    # No need to run test for each since they all have the same weights at start.
    init_val_res = test(model=nets[0], dataset=valset, metrics=metric, config=config)['accuracy']
    teleport_probs = [0.0, args.teleport_chance, 1, 0]
    for net in nets:
        scenarion_num = nets.index(net)
        print("Starting scenario {}".format(scenarion_num + 1))
        for n in range(args.run):
            print("run no {}".format(n + 1))
            net.set_weights(init_weights)
            results[scenarion_num][n] = np.concatenate(([init_val_res],
                                                        start_training(net, trainloader, valset, metric, config,
                                                                       teleport_chance=teleport_probs[scenarion_num])))

    mean_vanilla = res_vanilla.mean(axis=0)
    std_vanilla = res_vanilla.std(axis=0) if args.run > 1 else 0

    mean_5050 = res_5050.mean(axis=0)
Exemplo n.º 12
0
        modelA.load_state_dict(torch.load(args.weightsA))
        # modelA = torch.load(args.weightsA)
    if args.weightsB is not None:
        modelB.load_state_dict(torch.load(args.weightsB))
        # modelB = torch.load(args.weightsB)

    if args.train:
        print("Train model A")
        train(modelA, trainset, metric, configA, val_dataset=valset)
        print("Train model B")
        train(modelB, trainset, metric, configB, val_dataset=valset)

        torch.save(modelA.state_dict(), pjoin(save_path, 'modelA.pt'))
        torch.save(modelB.state_dict(), pjoin(save_path, 'modelB.pt'))

    res = test(modelA, valset, metric, configA)
    print("Model A Scored {} acc on valset".format(res['accuracy']))

    res = test(modelB, valset, metric, configB)
    print("Model B Scored {} acc on valset".format(res['accuracy']))

    a = torch.linspace(args.x[0], args.x[1], int(args.x[2]))

    teleportation_model = get_model(args.dataset, args.model, device=device)
    interpolation_config = LandscapeConfig(batch_size=1000, device=device)

    # interpolate between two original models
    print("Interpolating between original models...")
    param_o = modelA.get_params()
    param_t = modelB.get_params()