示例#1
0
def generate_data_from_teacher(num_train=1000,
                               num_test=100,
                               n_in=5,
                               n_out=5,
                               n_hidden=[10, 10, 10],
                               linear=False):
    """Generate data for a regression task through a teacher model.

    This function generates random input patterns and creates a random MLP
    (fully-connected neural network), that is used as a teacher model. I.e., the
    generated input data is fed through the teacher model to produce target
    outputs. The so produced dataset can be used to train and assess a
    student model. Hence, a learning procedure can be verified by validating its
    capability of training a student network to mimic a given teacher network.

    Input samples will be uniformly drawn from a unit cube.

    .. warning::
        Since this is a synthetic dataset that uses random number generators,
        the generated dataset depends on externally configured random seeds
        (and in case of GPU computation, it also depends on whether CUDA
        operations are performed in a derterministic mode).

    Args:
        num_train (int): Number of training samples.
        num_test (int): Number of test samples.
        n_in (int): Passed as argument ``n_in`` to class :class:`lib.mlp.MLP`
            when building the teacher model.
        n_out (int): Passed as argument ``n_out`` to class :class:`lib.mlp.MLP`
            when building the teacher model.
        n_hidden (list): Passed as argument ``n_hidden`` to class
            :class:`lib.mlp.MLP` when building the teacher model.
        linear (bool): Passed as argument ``linear`` to
            class :class:`lib.mlp.MLP` when building the teacher model

    Returns:
        See return values of function :func:`regression_cubic_poly`.
    """
    # FIXME Disentangle the random seeds set in a simulation from the one used
    # to generate synthetic datasets.
    rand = np.random

    train_x = rand.uniform(low=0, high=1, size=(num_train, n_in))
    test_x = rand.uniform(low=0, high=1, size=(num_train, n_in))

    # Note: make sure that gain is high, such that the neurons are pushed into
    # nonlinear regime. Otherwise we have a linear dataset
    teacher = MLP(n_in=n_in,
                  n_out=n_out,
                  n_hidden=n_hidden,
                  linear=linear,
                  gain=3.)

    train_y = teacher.forward(torch.from_numpy(train_x).float()).detach(). \
        numpy()
    test_y = teacher.forward(torch.from_numpy(test_x).float()).detach(). \
        numpy()

    return train_x, test_x, train_y, test_y
示例#2
0
    def test_test_function(self):
        """Testing function :func:`main.test`."""
        # Ensure reproducibility.
        torch.manual_seed(42)
        torch.cuda.manual_seed_all(42)
        rand = np.random.RandomState(42)

        n_in = 5
        n_out = 5
        n_samples = 2

        x = rand.uniform(low=0, high=1, size=(n_samples, n_in))
        y = rand.uniform(low=0, high=1, size=(n_samples, n_out))

        data = utils.RegressionDataset(x, y)
        data_loader1 = DataLoader(data, batch_size=n_samples)
        assert (len(data_loader1) == 1)
        data_loader2 = DataLoader(data, batch_size=n_samples // 2)
        assert (len(data_loader2) > 1)

        device = torch.device("cpu")

        net = MLP(n_in=n_in, n_out=n_out)
        net.eval()

        with torch.no_grad():
            predictions = net.forward(data.inputs)

        # Avoid any console prints.
        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull):
                mse1_test = main.test(device, data_loader1, net)

        # See docstring of method `_pytorch_mse` in
        # `tests.public.lib.test_backprop_functions`.
        mse1 = F.mse_loss(predictions, data.outputs, reduction='none')
        mse1 = 0.5 * mse1.sum(dim=1).mean()

        self.assertAlmostEqual(mse1_test, float(mse1), 5,
                               'Method "main.test" does not work correctly.')

        ### Check if `test` handles multiple batches correctly correctly.

        # Avoid any console prints.
        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull):
                mse2_test = main.test(device, data_loader2, net)

        self.assertAlmostEqual(
            mse2_test, float(mse1), 5,
            'Method "main.test" does not work correctly when iterating ' +
            'over multiple mini batches.')
示例#3
0
def run():
    """Run the script.

    - Parsing command-line arguments
    - Creating synthetic regression data
    - Initiating training process
    - Testing final network
    """
    ### Parse CLI arguments.
    parser = argparse.ArgumentParser(description='Nonlinear regression with ' +
                                     'neural networks.')

    rgroup = parser.add_argument_group('Regression options')
    rgroup.add_argument('--polynomial_regression',
                        action='store_true',
                        help='Perform a 1D polynomial regression instead of ' +
                        'using a dataset that has been obtained from a ' +
                        'teacher model.')

    tgroup = parser.add_argument_group('Training options')
    tgroup.add_argument('--epochs',
                        type=int,
                        metavar='N',
                        default=100,
                        help='Number of training epochs. ' +
                        'Default: %(default)s.')
    tgroup.add_argument('--batch_size',
                        type=int,
                        metavar='N',
                        default=32,
                        help='Training batch size. Default: %(default)s.')
    tgroup.add_argument('--lr',
                        type=float,
                        default=1e-4,
                        help='Learning rate of optimizer. Default: ' +
                        '%(default)s.')
    tgroup.add_argument('--momentum',
                        type=float,
                        default=0.0,
                        help='Momentum of the optimizer. ' +
                        'Default: %(default)s.')
    tgroup.add_argument('--feedback_alignment',
                        action='store_true',
                        help='Use feedback alignment to train the network' +
                        'instead of backpropagation')

    sgroup = parser.add_argument_group('Network options')
    sgroup.add_argument('--num_hidden',
                        type=int,
                        metavar='N',
                        default=2,
                        help='Number of hidden layer in the (student) ' +
                        'network. Default: %(default)s.')
    sgroup.add_argument('--size_hidden',
                        type=int,
                        metavar='N',
                        default=10,
                        help='Number of units in each hidden layer of the ' +
                        '(student) network. Default: %(default)s.')
    sgroup.add_argument('--size_input',
                        type=int,
                        metavar='N',
                        default=5,
                        help='Number of units of the input'
                        '. Default: %(default)s.')
    sgroup.add_argument('--size_output',
                        type=int,
                        metavar='N',
                        default=5,
                        help='Number of units of the output'
                        '. Default: %(default)s.')
    sgroup.add_argument('--linear',
                        action='store_true',
                        help='Train a linear network on a linear dataset')

    pgroup = parser.add_argument_group('Plotting options')
    pgroup.add_argument('--plot_matrix_angles',
                        action='store_true',
                        help='Show a plot of the angles between B, W^T and '
                        'pseudoinverse(W)')
    pgroup.add_argument('--show_plot',
                        action='store_true',
                        help='Show the final regression results as plot. ' +
                        'Note, only applies to 1D regression tasks.')

    mgroup = parser.add_argument_group('Miscellaneous options')
    mgroup.add_argument('--use_cuda',
                        action='store_true',
                        help='Flag to enable GPU usage.')
    mgroup.add_argument('--random_seed',
                        type=int,
                        metavar='N',
                        default=42,
                        help='Random seed. Default: %(default)s.')

    args = parser.parse_args()
    if not args.feedback_alignment:
        args.plot_matrix_angles = False

    ### Ensure deterministic computation.
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed_all(args.random_seed)
    np.random.seed(args.random_seed)
    random.seed(args.random_seed)

    # Ensure that runs are reproducible even on GPU. Note, this slows down
    # training!
    # https://pytorch.org/docs/stable/notes/randomness.html
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    use_cuda = args.use_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    print('Using cuda: ' + str(use_cuda))

    ### Generate datasets and data handlers.
    if args.polynomial_regression:
        print('### Learning to regress a 1D cubic polynomial ###')
        n_in = n_out = 1

        train_x, test_x, train_y, test_y = utils.regression_cubic_poly()

    else:
        print('### Training a student model to mimic a teacher ###')
        n_in = args.size_input
        n_out = args.size_output

        train_x, test_x, train_y, test_y = utils.generate_data_from_teacher( \
            n_in=n_in, n_out=n_out, n_hidden=[25,50,25], linear=False,
            num_train=1000)

    train_loader = DataLoader(utils.RegressionDataset(train_x, train_y),
                              batch_size=args.batch_size,
                              shuffle=True)
    test_loader = DataLoader(utils.RegressionDataset(test_x, test_y),
                             batch_size=args.batch_size,
                             shuffle=False)

    ### Generate network.
    n_hidden = [args.size_hidden] * args.num_hidden
    net = MLP(n_in=n_in,
              n_out=n_out,
              n_hidden=n_hidden,
              fa=args.feedback_alignment,
              linear=args.linear).to(device)

    ### Train network.
    train(args, device, train_loader, net)

    ### Test network.
    test(device, test_loader, net)

    if args.show_plot and n_in == 1 and n_out == 1:
        utils.plot_predictions(device, test_loader, net)
示例#4
0
文件: main.py 项目: anianruoss/LDABNN
def run():
    """Run the script.

    - Parsing command-line arguments
    - Creating synthetic regression data
    - Initiating training process
    - Testing final network
    """
    # Parse CLI arguments.
    parser = argparse.ArgumentParser(description='Nonlinear regression with ' +
                                     'neural networks.')

    tgroup = parser.add_argument_group('Training options')
    tgroup.add_argument('--epochs',
                        type=int,
                        metavar='N',
                        default=10000,
                        help='Number of training epochs. ' +
                        'Default: %(default)s.')
    tgroup.add_argument('--batch_size',
                        type=int,
                        metavar='N',
                        default=128,
                        help='Training batch size. Default: %(default)s.')
    tgroup.add_argument('--lr',
                        type=float,
                        default=1e-4,
                        help='Learning rate of optimizer. Default: ' +
                        '%(default)s.')
    tgroup.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        help='Momentum of the optimizer. ' +
                        'Default: %(default)s.')
    sgroup = parser.add_argument_group('Network options')
    sgroup.add_argument('--num_hidden',
                        type=int,
                        metavar='N',
                        default=2,
                        help='Number of hidden layer in the (student) ' +
                        'network. Default: %(default)s.')
    sgroup.add_argument('--size_hidden',
                        type=int,
                        metavar='N',
                        default=10,
                        help='Number of units in each hidden layer of the ' +
                        '(student) network. Default: %(default)s.')
    sgroup.add_argument('--num_train_samples',
                        type=int,
                        default=20,
                        help='Number of data training points.')

    mgroup = parser.add_argument_group('Miscellaneous options')
    mgroup.add_argument('--use_cuda',
                        action='store_true',
                        help='Flag to enable GPU usage.')
    mgroup.add_argument('--random_seed',
                        type=int,
                        metavar='N',
                        default=42,
                        help='Random seed. Default: %(default)s.')
    mgroup.add_argument('--data_random_seed',
                        type=int,
                        metavar='N',
                        default=42,
                        help='Data random seed. Default: %(default)s.')
    mgroup.add_argument('--dont_show_plot',
                        action='store_false',
                        help='Dont show the final regression results plot.' +
                        'Note, only applies to 1D regression tasks.')

    bgroup = parser.add_argument_group('Bayes by Backprop options')
    bgroup.add_argument('--bbb',
                        action='store_true',
                        help='Start training of BbB.')
    bgroup.add_argument('--weight_samples',
                        type=int,
                        default=100,
                        help='Number of weight samples used.')

    args = parser.parse_args()

    # Ensure deterministic computation.
    torch.manual_seed(args.random_seed)
    torch.cuda.manual_seed_all(args.random_seed)
    np.random.seed(args.random_seed)
    random.seed(args.random_seed)

    # Ensure that runs are reproducible even on GPU. Note, this slows down
    # training!
    # https://pytorch.org/docs/stable/notes/randomness.html
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    use_cuda = args.use_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    print('Using cuda: ' + str(use_cuda))

    # Generate datasets and data handlers.
    print('### Learning to regress a 1D cubic polynomial ###')
    n_in = n_out = 1

    train_x, test_x, train_y, test_y = utils.regression_cubic_poly(
        rseed=args.data_random_seed, num_train=args.num_train_samples)

    train_loader = DataLoader(utils.RegressionDataset(train_x, train_y),
                              batch_size=args.batch_size,
                              shuffle=True)
    test_loader = DataLoader(utils.RegressionDataset(test_x, test_y),
                             batch_size=args.batch_size,
                             shuffle=False)

    # Generate network.
    n_hidden = [args.size_hidden] * args.num_hidden
    net = MLP(n_in=n_in, n_out=n_out, n_hidden=n_hidden).to(device)

    # Train network.
    train(args, device, train_loader, net)

    # Test network.
    test(device, test_loader, net)

    if args.dont_show_plot and n_in == 1 and n_out == 1:
        utils.plot_predictions(device, test_loader, train_loader, net, args)