Пример #1
0
def do_test(test_subset_generators, f_predict, loss_dict, n_test_iters, collapse_loss = 'mean', in_test_callback = None):

    if callable(loss_dict):
        loss_dict = dict(loss=loss_dict)

    these_test_results = Duck()
    losses = {}
    pi = ProgressIndicator(n_test_iters, "Testing")
    for subset_name, subset_generator in test_subset_generators.items():
        start_time = time.time()
        losses[subset_name] = []
        n_tests = 0
        for inputs, targets in subset_generator:
            n_tests+=1
            outputs = f_predict(inputs)
            for loss_name, f_loss in loss_dict.items():
                these_test_results[subset_name, Keys.LOSSES, loss_name, next] = f_loss(outputs, targets)
            pi.print_update()
            if in_test_callback is not None:
                in_test_callback(inputs=inputs, targets=targets, outputs=outputs)

        assert n_tests>0, "It appears that subset '{}' had no tests!".format(subset_name)
        these_test_results[subset_name, Keys.N_TESTS] = n_tests
        if collapse_loss is not None:
            collapse_func = {'mean': np.mean}[collapse_loss]
            for loss_name, f_loss in loss_dict.items():
                these_test_results[subset_name, Keys.LOSSES, loss_name] = collapse_func(these_test_results[subset_name, Keys.LOSSES, loss_name])
        these_test_results[subset_name, Keys.TIME] = time.time() - start_time
    return these_test_results
Пример #2
0
def demo_kd_too_large(
        n_steps=20000,
        kp=.01,
        kd=1.,
        kp_scan_range = (.001, .1),
        kd_scan_range = (.1, 10),
        n_k_points = 32,
        x_cutoff = 0.01,
        w_cutoff = 0.002,
        w_fixed = False,
        k_spacing = 'log',
        seed = 1238
        ):
    """
    We have time varying signals x, w.  See how different choices of kp, kd, and quantization affect our
    ability to approximate the time-varying quantity x*w.
    """

    rng = np.random.RandomState(seed)

    x = lowpass_random(n_samples=n_steps, cutoff=x_cutoff, normalize=True, rng=rng)
    w = lowpass_random(n_samples=n_steps, cutoff=w_cutoff, normalize=True, rng=rng) if not w_fixed else np.ones(n_steps)
    x_w = x*w

    distance_mat_nonquantized = np.zeros((n_k_points, n_k_points))
    distance_mat_quantized = np.zeros((n_k_points, n_k_points))
    distance_mat_recon = np.zeros((n_k_points, n_k_points))
    n_spikes = np.zeros((n_k_points, n_k_points))

    pi = ProgressIndicator(n_k_points**2)

    kp_values = point_space(kp_scan_range[0], kp_scan_range[1], n_points=n_k_points, spacing=k_spacing)
    kd_values = point_space(kd_scan_range[0], kd_scan_range[1], n_points=n_k_points, spacing=k_spacing)

    for i, kpi in enumerate(kp_values):
        for j, kdj in enumerate(kd_values):
            pi.print_update(i*n_k_points+j)
            x_enc = pid_encode(x, kp=kpi, kd=kdj, quantization=None)
            x_enc_quantized = pid_encode(x, kp=kpi, kd=kdj, quantization='herd')
            x_enc_w = pid_decode(x_enc*w, kp=kpi, kd=kdj)
            x_enc_quantized_w_dec = pid_decode(x_enc_quantized*w, kp=kpi, kd=kdj)
            x_enc_quantized_dec_w = pid_decode(x_enc_quantized, kp=kpi, kd=kdj)*w
            distance_mat_nonquantized[i, j] = cosine_distance(x_w, x_enc_w)
            distance_mat_quantized[i, j] = cosine_distance(x_w, x_enc_quantized_w_dec)
            distance_mat_recon[i, j] = cosine_distance(x_w, x_enc_quantized_dec_w)
            n_spikes[i,j] = np.abs(x_enc_quantized).sum()

    x_enc_quantized = pid_encode(x, kp=kp, kd=kd, quantization='herd')
    x_enc = pid_encode(x, kp=kp, kd=kd, quantization=None)
    xwq = pid_decode(x_enc_quantized*w, kp=kp, kd=kd)
    xwn = pid_decode(x_enc*w, kp=kp, kd=kd)

    return (x, w, x_w, x_enc_quantized, x_enc, xwq, xwn), (distance_mat_nonquantized, distance_mat_quantized, distance_mat_recon, n_spikes), (kp, kd, kd_values, kp_values)
Пример #3
0
def test_progress_inidicator():

    n_iter = 100

    pi = ProgressIndicator(n_iter, update_every='1s')

    start = time.time()
    for i in range(n_iter):
        time.sleep(0.001)
        if i % 10 == 0:
            with pi.pause_measurement():
                time.sleep(0.02)

    assert pi.get_elapsed() < (time.time() - start) / 2.
Пример #4
0
def temporalize(x, smoothing_steps, distance='L1'):
    """
    :param x: An (n_samples, n_dims) dataset
    :return: A (n_samples, ) array of indexes that can be used to shuffle the input for temporal smoothness.
    """
    x_flat = x.reshape(x.shape[0], -1)
    index_buffer = np.arange(1, smoothing_steps + 1)
    next_sample_buffer = x_flat[1:smoothing_steps + 1].copy()
    # Technically, we could do this without a next_sample_buffer (and only an index_buffer), but it would require
    # repeatedly accessing a bunch of really scattered memory, so we do it this way.
    shuffling_indices = np.zeros(len(x), dtype=int)
    rectifier = np.abs if distance == 'L1' else np.square if distance == 'L2' else bad_value(
        distance)
    p = ProgressIndicator(len(x))
    current_index = 0
    for i in xrange(len(x)):
        shuffling_indices[i] = current_index
        closest = np.argmin(
            rectifier(x_flat[current_index] - next_sample_buffer).sum(axis=1))
        current_index = index_buffer[closest]
        weve_aint_done_yet = i + smoothing_steps + 1 < len(x)
        next_index = i + smoothing_steps + 1
        next_sample_buffer[closest] = x_flat[
            next_index] if weve_aint_done_yet else float('inf')
        index_buffer[closest] = next_index if weve_aint_done_yet else -1
        p(i)
    return shuffling_indices
Пример #5
0
def do_test(test_subset_generators,
            f_predict,
            loss_dict,
            n_test_iters,
            collapse_loss='mean',
            in_test_callback=None):

    if callable(loss_dict):
        loss_dict = dict(loss=loss_dict)

    these_test_results = Duck()
    losses = {}
    pi = ProgressIndicator(n_test_iters, "Testing")
    for subset_name, subset_generator in test_subset_generators.items():
        start_time = time.time()
        losses[subset_name] = []
        n_tests = 0
        for inputs, targets in subset_generator:
            n_tests += 1
            outputs = f_predict(inputs)
            for loss_name, f_loss in loss_dict.items():
                these_test_results[subset_name, Keys.LOSSES, loss_name,
                                   next] = f_loss(outputs, targets)
            pi.print_update()
            if in_test_callback is not None:
                in_test_callback(inputs=inputs,
                                 targets=targets,
                                 outputs=outputs)

        assert n_tests > 0, "It appears that subset '{}' had no tests!".format(
            subset_name)
        these_test_results[subset_name, Keys.N_TESTS] = n_tests
        if collapse_loss is not None:
            collapse_func = {'mean': np.mean}[collapse_loss]
            for loss_name, f_loss in loss_dict.items():
                these_test_results[subset_name, Keys.LOSSES,
                                   loss_name] = collapse_func(
                                       these_test_results[subset_name,
                                                          Keys.LOSSES,
                                                          loss_name])
        these_test_results[subset_name, Keys.TIME] = time.time() - start_time
    return these_test_results
Пример #6
0
def demo_optimize_conv_scales(n_epochs=5,
                              comp_weight=1e-11,
                              learning_rate=0.1,
                              error_loss='KL',
                              use_softmax=True,
                              optimizer='sgd',
                              shuffle_training=False):
    """
    Run the scale optimization routine on a convnet.  
    :param n_epochs:
    :param comp_weight:
    :param learning_rate:
    :param error_loss:
    :param use_softmax:
    :param optimizer:
    :param shuffle_training:
    :return:
    """
    if error_loss == 'KL' and not use_softmax:
        raise Exception(
            "It's very strange that you want to use a KL divergence on something other than a softmax error.  I assume you've made a mistake."
        )

    training_videos, training_vgg_inputs = get_vgg_video_splice(
        ['ILSVRC2015_train_00033010', 'ILSVRC2015_train_00336001'],
        shuffle=shuffle_training,
        shuffling_rng=1234)
    test_videos, test_vgg_inputs = get_vgg_video_splice(
        ['ILSVRC2015_train_00033009', 'ILSVRC2015_train_00033007'])

    set_dbplot_figure_size(12, 6)

    n_frames_to_show = 10
    display_frames = np.arange(
        len(test_videos) / n_frames_to_show / 2, len(test_videos),
        len(test_videos) / n_frames_to_show)
    ax1 = dbplot(np.concatenate(test_videos[display_frames], axis=1),
                 "Test Videos",
                 title='',
                 plot_type='pic')
    plt.subplots_adjust(wspace=0, hspace=.05)
    ax1.set_xticks(224 * np.arange(len(display_frames) / 2) * 2 + 224 / 2)
    ax1.tick_params(labelbottom='on')

    layers = get_vgg_layer_specifiers(
        up_to_layer='prob' if use_softmax else 'fc8')

    # Setup the true VGGnet and get the outputs
    f_true = ConvNet.from_init(layers, input_shape=(3, 224, 224)).compile()
    true_test_out = flatten2(
        np.concatenate([
            f_true(frame_positions[None])
            for frame_positions in test_vgg_inputs
        ]))
    top5_true_guesses = argtopk(true_test_out, 5)
    true_guesses = np.argmax(true_test_out, axis=1)
    true_labels = [
        get_vgg_label_at(g, short=True)
        for g in true_guesses[display_frames[::2]]
    ]
    full_convnet_cost = np.array([
        get_full_convnet_computational_cost(layer_specs=layers,
                                            input_shape=(3, 224, 224))
    ] * len(test_videos))

    # Setup the approximate networks
    slrc_net = ScaleLearningRoundingConvnet.from_convnet_specs(
        layers,
        optimizer=get_named_optimizer(optimizer, learning_rate=learning_rate),
        corruption_type='rand',
        rng=1234)
    f_train_slrc = slrc_net.train_scales.partial(
        comp_weight=comp_weight, error_loss=error_loss).compile()
    f_get_scales = slrc_net.get_scales.compile()
    round_fp = RoundConvNetForwardPass(layers)
    sigmadelta_fp = SigmaDeltaConvNetForwardPass(layers,
                                                 input_shape=(3, 224, 224))

    p = ProgressIndicator(n_epochs * len(training_videos))

    output_dir = make_dir(get_local_path('output/%T-convnet-spikes'))

    for input_minibatch, minibatch_info in minibatch_iterate_info(
            training_vgg_inputs,
            n_epochs=n_epochs,
            minibatch_size=1,
            test_epochs=np.arange(0, n_epochs, 0.1)):

        if minibatch_info.test_now:
            with EZProfiler('test'):
                current_scales = f_get_scales()
                round_cost, round_out = round_fp.get_cost_and_output(
                    test_vgg_inputs, scales=current_scales)
                sd_cost, sd_out = sigmadelta_fp.get_cost_and_output(
                    test_vgg_inputs, scales=current_scales)
                round_guesses, round_top1_correct, round_top5_correct = get_and_report_scores(
                    round_cost,
                    round_out,
                    name='Round',
                    true_top_1=true_guesses,
                    true_top_k=top5_true_guesses)
                sd_guesses, sd_top1_correct, sd_top5_correct = get_and_report_scores(
                    sd_cost,
                    sd_out,
                    name='SigmaDelta',
                    true_top_1=true_guesses,
                    true_top_k=top5_true_guesses)

                round_labels = [
                    get_vgg_label_at(g, short=True)
                    for g in round_guesses[display_frames[::2]]
                ]

                ax1.set_xticklabels([
                    '{}\n{}'.format(tg, rg)
                    for tg, rg in izip_equal(true_labels, round_labels)
                ])

                ax = dbplot(
                    np.array([
                        round_cost / 1e9, sd_cost / 1e9,
                        full_convnet_cost / 1e9
                    ]).T,
                    'Computation',
                    plot_type='thick-line',
                    ylabel='GOps',
                    title='',
                    legend=['Round', '$\Sigma\Delta$', 'Original'],
                )
                ax.set_xticklabels([])
                plt.grid()
                dbplot(
                    100 * np.array(
                        [cummean(sd_top1_correct),
                         cummean(sd_top5_correct)]).T,
                    "Score",
                    plot_type=lambda: LinePlot(
                        y_bounds=(0, 100),
                        plot_kwargs=[
                            dict(linewidth=3, color='k'),
                            dict(linewidth=3, color='k', linestyle=':')
                        ]),
                    title='',
                    legend=[
                        'Round/$\Sigma\Delta$ Top-1',
                        'Round/$\Sigma\Delta$ Top-5'
                    ],
                    ylabel='Cumulative\nPercent Accuracy',
                    xlabel='Frame #',
                    layout='v',
                )
                plt.grid()
            plt.savefig(
                os.path.join(output_dir,
                             'epoch-%.3g.pdf' % (minibatch_info.epoch, )))
        f_train_slrc(input_minibatch)
        p()
        print "Epoch {:3.2f}: Scales: {}".format(
            minibatch_info.epoch, ['%.3g' % float(s) for s in f_get_scales()])

    results = dict(current_scales=current_scales,
                   round_cost=round_cost,
                   round_out=round_out,
                   sd_cost=sd_cost,
                   sd_out=sd_out,
                   round_guesses=round_guesses,
                   round_top1_correct=round_top1_correct,
                   round_top5_correct=round_top5_correct,
                   sd_guesses=sd_guesses,
                   sd_top1_correct=sd_top1_correct,
                   sd_top5_correct=sd_top5_correct)

    dbplot_hang()
    return results
Пример #7
0
def train_and_test_predictor(
        f_train,
        f_predict,
        losses,
        training_data_gen,
        test_data_gen_constructors,
        n_training_iters = None,
        n_test_iters = None,
        test_checkpoints = ('lin', 1000),
        collapse_loss = 'mean',
        progress_update_period = '5s',
        in_test_callback = None,
        post_test_callback = None,
        post_train_callback = None,
        save_train_return = False,
        measures = None,
        iterations_to_end = False
        ):
    """
    :param f_train:
    :param f_predict:
    :param losses:
    :param training_data_gen:
    :param test_data_gen_constructors:
    :param samples_count:
    :param n_training_iters:
    :param n_test_iters:
    :param test_checkpoints:
    :param collapse_loss:
    :param progress_update_period:
    :param in_test_callback:
    :param post_test_callback:
    :param post_train_callback:
    :param save_train_return:
    :param measures:
    :return: If yield_array_data is False, an ArrayStruct with fields:
        'training'                          Results recorded during training callbacks
            training_iter                   The iteration of the training callback
        'testing'                           Results recorded during tests
            test_iter                       The index of the test
                'iter'                      The number of training iterations finished at the time that the test is run
                'time'                      The time at which the test is run
                'samples'                   The number of samples seen so far
                'results'                   A structure containing results
                    subset_name             The name of the testing subset e.g. 'train', 'test' (you give subset names in test_data_gen_constructors)
                        'losses'
                            loss_name       The name of the loss function (you provide this in losses)
                            n_tests         The numner of tests that were run for this subset
                        'time'              The time, in seconds, that it took to test on this subset.


        Otherwise, if true, a structure the same object but training_iter and test_iter pushed to the leaf-position
    """

    if measures is None:
        measures = Duck()
    if 'training' not in measures:
        measures[Keys.TRAINING] = Duck()
    if 'testing' not in measures:
        measures[Keys.TESTING] = Duck()

    is_test_time = Checkpoints(test_checkpoints) if not isinstance(test_checkpoints, Checkpoints) else test_checkpoints
    pi = ProgressIndicator(n_training_iters, "Training", update_every=progress_update_period)

    for inputs, targets in training_data_gen:
        if is_test_time():

            this_test_measures = measures[Keys.TESTING].open(next)
            this_test_measures[Keys.ITER] = pi.get_iterations()
            this_test_measures[Keys.TIME] = pi.get_elapsed()
            this_test_measures[Keys.RESULTS] = do_test(
                test_subset_generators={subset_name: constructor() for subset_name, constructor in test_data_gen_constructors.items()},
                f_predict=f_predict,
                loss_dict=losses,
                n_test_iters=n_test_iters,
                collapse_loss = collapse_loss,
                in_test_callback = in_test_callback,
                )
            if post_test_callback is not None:
                return_val = post_test_callback(this_test_measures)
                if return_val is not None:
                    this_test_measures[Keys.CALLBACK, ...] = return_val
            if iterations_to_end:
                measures_to_yield = measures.arrayify_axis(axis=1, subkeys=Keys.TRAINING)
                measures_to_yield = measures_to_yield.arrayify_axis(axis=1, subkeys=Keys.TESTING, inplace=True)
                yield measures_to_yield.to_struct()
            else:
                yield measures

        train_return = f_train(inputs, targets)
        pi.print_update()
        if save_train_return:
            measures[Keys.TRAINING, Keys.RETURNS] = train_return
        if post_train_callback:
            return_val = post_train_callback(inputs=inputs, targets=targets, iter=pi.get_iterations())
            if return_val is not None:
                measures[Keys.TRAINING, Keys.CALLBACK, next, ...] = return_val
Пример #8
0
def demo_kd_too_large(n_steps=20000,
                      kp=.01,
                      kd=1.,
                      kp_scan_range=(.001, .1),
                      kd_scan_range=(.1, 10),
                      n_k_points=32,
                      x_cutoff=0.01,
                      w_cutoff=0.002,
                      w_fixed=False,
                      k_spacing='log',
                      seed=1238):
    """
    We have time varying signals x, w.  See how different choices of kp, kd, and quantization affect our
    ability to approximate the time-varying quantity x*w.
    """

    rng = np.random.RandomState(seed)

    x = lowpass_random(n_samples=n_steps,
                       cutoff=x_cutoff,
                       normalize=True,
                       rng=rng)
    w = lowpass_random(
        n_samples=n_steps, cutoff=w_cutoff, normalize=True,
        rng=rng) if not w_fixed else np.ones(n_steps)
    x_w = x * w

    distance_mat_nonquantized = np.zeros((n_k_points, n_k_points))
    distance_mat_quantized = np.zeros((n_k_points, n_k_points))
    distance_mat_recon = np.zeros((n_k_points, n_k_points))
    n_spikes = np.zeros((n_k_points, n_k_points))

    pi = ProgressIndicator(n_k_points**2)

    kp_values = point_space(kp_scan_range[0],
                            kp_scan_range[1],
                            n_points=n_k_points,
                            spacing=k_spacing)
    kd_values = point_space(kd_scan_range[0],
                            kd_scan_range[1],
                            n_points=n_k_points,
                            spacing=k_spacing)

    for i, kpi in enumerate(kp_values):
        for j, kdj in enumerate(kd_values):
            pi.print_update(i * n_k_points + j)
            x_enc = pid_encode(x, kp=kpi, kd=kdj, quantization=None)
            x_enc_quantized = pid_encode(x,
                                         kp=kpi,
                                         kd=kdj,
                                         quantization='herd')
            x_enc_w = pid_decode(x_enc * w, kp=kpi, kd=kdj)
            x_enc_quantized_w_dec = pid_decode(x_enc_quantized * w,
                                               kp=kpi,
                                               kd=kdj)
            x_enc_quantized_dec_w = pid_decode(x_enc_quantized, kp=kpi,
                                               kd=kdj) * w
            distance_mat_nonquantized[i, j] = cosine_distance(x_w, x_enc_w)
            distance_mat_quantized[i, j] = cosine_distance(
                x_w, x_enc_quantized_w_dec)
            distance_mat_recon[i, j] = cosine_distance(x_w,
                                                       x_enc_quantized_dec_w)
            n_spikes[i, j] = np.abs(x_enc_quantized).sum()

    x_enc_quantized = pid_encode(x, kp=kp, kd=kd, quantization='herd')
    x_enc = pid_encode(x, kp=kp, kd=kd, quantization=None)
    xwq = pid_decode(x_enc_quantized * w, kp=kp, kd=kd)
    xwn = pid_decode(x_enc * w, kp=kp, kd=kd)

    return (x, w, x_w, x_enc_quantized, x_enc, xwq,
            xwn), (distance_mat_nonquantized, distance_mat_quantized,
                   distance_mat_recon, n_spikes), (kp, kd, kd_values,
                                                   kp_values)
Пример #9
0
def get_mnist_results_with_parameters(weights,
                                      biases,
                                      scales=None,
                                      hidden_activations='relu',
                                      output_activation='softmax',
                                      n_samples=None,
                                      smoothing_steps=1000):
    """
    Return a data structure showing the error and computation for required by the orignal, rounding, and sigma-delta
    implementation of a network with the given parameters.

    :param weights:
    :param biases:
    :param scales:
    :param hidden_activations:
    :param output_activation:
    :param n_samples:
    :param smoothing_steps:
    :return: results: An OrderedDict
        Where the key is a 3-tuple are:
            (dataset_name, subset, net_version), Where:
                dataset_name: is 'mnist' or 'temp_mnist'
                subset: is 'train' or 'test'
                net_version: is 'td' or 'round' or 'truth'
        And values are another OrderedDict, with keys:
            'MFlops', 'l1_errorm', 'class_error'  ... for discrete nets and
            'Dense MFlops', 'Sparse MFlops', 'class_error' for "true" nets.
    """
    mnist = get_mnist_dataset(flat=True,
                              n_training_samples=n_samples,
                              n_test_samples=n_samples)
    temp_mnist = get_temporal_mnist_dataset(flat=True,
                                            smoothing_steps=smoothing_steps,
                                            n_training_samples=n_samples,
                                            n_test_samples=n_samples)
    results = OrderedDict()
    p = ProgressIndicator(2 * 3 * 2)
    for dataset_name, (tr_x, tr_y, ts_x, ts_y) in [('mnist', mnist.xyxy),
                                                   ('temp_mnist',
                                                    temp_mnist.xyxy)]:
        for subset, x, y in [('train', tr_x, tr_y), ('test', ts_x, ts_y)]:
            traditional_net_output, dense_flops, sparse_flops = forward_pass_and_cost(
                input_data=x,
                weights=weights,
                biases=biases,
                hidden_activations=hidden_activations,
                output_activations=output_activation)
            assert round(dense_flops) == dense_flops and round(
                sparse_flops) == sparse_flops, 'Flop counts must be int!'

            class_error = percent_argmax_incorrect(traditional_net_output, y)
            results[dataset_name, subset, 'truth'] = OrderedDict([
                ('Dense MFlops', dense_flops / (1e6 * len(x))),
                ('Sparse MFlops', sparse_flops / (1e6 * len(x))),
                ('class_error', class_error)
            ])
            for net_version in 'td', 'round':
                (comp_cost_adds, comp_cost_multiplyadds
                 ), output = tdnet_forward_pass_cost_and_output(
                     inputs=x,
                     weights=weights,
                     biases=biases,
                     scales=scales,
                     version=net_version,
                     hidden_activations=hidden_activations,
                     output_activations=output_activation,
                     quantization_method='herd',
                     computation_calc=('adds', 'multiplyadds'))
                l1_error = np.abs(output - traditional_net_output).sum(
                    axis=1).mean(axis=0)
                class_error = percent_argmax_incorrect(output, y)
                results[dataset_name, subset, net_version] = OrderedDict([
                    ('MFlops', comp_cost_adds / (1e6 * len(x))),
                    ('MFlops-multadd',
                     comp_cost_multiplyadds / (1e6 * len(x))),
                    ('l1_error', l1_error), ('class_error', class_error)
                ])
                p.print_update()
    return results
Пример #10
0
def train_online_network_checkpoints(model, dataset, checkpoint_generator = None, \
            test_online=True, return_output = True, n_tests=0, offline_test_mode=None, is_cuda=False, \
            online_test_reporter ='recent', error_func ='mse', batchify = False, print_every = 5):
    """

    :param model: A TrainableStatefulModule
    :param dataset: A 4-tuple of (x_train, y_train, x_test, y_test) where the first axis of each is the sample #
    :param n_tests: Number of "splits", in the training set... (where we run a full test)
    :return: train_test_errors: A tuple of (time_step, train_error, test_error)
    """
    data = numpy_struct_to_torch_struct(dataset, cast_floats='float32')

    if batchify:
        data = [x[:, None] for x in data]

    if len(data) == 4:
        x_train, y_train, x_test, y_test = data
    elif len(data) == 2:
        x_train, y_train = data
        x_test, y_test = [], []
    else:
        raise Exception(
            'Expected data to be (x_train, y_train, x_test, y_test) or (x_test, y_test)'
        )
    assert len(y_train) == len(x_train)
    assert len(x_test) == len(y_test)
    if is_cuda:
        x_train = x_train.cuda()
        y_train = y_train.cuda()
        x_test = x_test.cuda()
        y_test = y_test.cuda()

    if isinstance(checkpoint_generator, tuple):
        distribution = checkpoint_generator[0]
        if distribution == 'even':
            interval, = checkpoint_generator[1:]
            checkpoint_generator = (interval * i for i in itertools.count(1))
        elif distribution == 'exp':
            first, growth = checkpoint_generator[1:]
            checkpoint_generator = (first * i * (1 + growth)**(i - 1)
                                    for i in itertools.count(1))
        else:
            raise Exception("Can't make a checkpoint generator {}".format(
                checkpoint_generator))

    if isinstance(error_func, str):
        error_func = create_loss_function(error_func)

    n_training_samples = len(x_train)
    test_iterations = [
        int(n_training_samples * i / float(n_tests - 1))
        for i in range(0, n_tests)
    ]

    initial_state = model.get_state()

    # results = SequentialStructBuilder()
    results = Duck()
    if test_online:
        loss_accumulator = RunningAverage(
        ) if online_test_reporter == 'cum' else RecentRunningAverage(
        ) if online_test_reporter == 'recent' else lambda x: x if online_test_reporter is None else bad_value(
            online_test_reporter)

    t_start = time.time()
    next_checkpoint = float('inf') if checkpoint_generator is None else next(
        checkpoint_generator)
    err = np.nan
    pi = ProgressIndicator(
        n_training_samples + 1,
        update_every=(print_every, 'seconds'),
        show_total=True,
        post_info_callback=lambda: 'Iteration {} of {}. Online {} Error: {}'.
        format(t, len(x_train), online_test_reporter, err))
    for t in range(n_training_samples + 1):

        if offline_test_mode is not None and t in test_iterations:
            training_state = model.get_state()
            model.set_state(initial_state)

            if offline_test_mode == 'full_pass':
                y_train_guess = torch_loop(model, is_cuda,
                                           x_train[:t]) if t > 0 else None
                if t < len(x_train) - 1:
                    y_middle_guess = torch_loop(model, is_cuda,
                                                x_train[t:None])
                y_test_guess = torch_loop(model, is_cuda, x_test)
                if is_cuda:
                    y_train_guess = y_test_guess.cuda()
                    y_test_guess = y_test_guess.cuda()

                if is_cuda:
                    train_err = error_func(
                        _flatten_first_2(y_train_guess),
                        _flatten_first_2(y_train[:t])).data.cpu().numpy(
                        ) if y_train_guess is not None else np.nan
                    test_err = error_func(
                        _flatten_first_2(y_test_guess),
                        _flatten_first_2(y_test)).data.cpu().numpy()
                else:
                    train_err = error_func(
                        _flatten_first_2(y_train_guess),
                        _flatten_first_2(y_train[:t])).data.numpy(
                        ) if y_train_guess is not None else np.nan
                    test_err = error_func(
                        _flatten_first_2(y_test_guess),
                        _flatten_first_2(y_test)).data.numpy()

                # train_err, test_err = tuple((y_guess - y_truth).abs().sum().data.numpy() for y_guess, y_truth in [(y_train_guess, y_train[:t] if t>0 else torch.zeros(2, 2, 2)/0), (y_test_guess, y_test)])
                print('Iteration {} of {}: Training: {:.3g}, Test: {:.3g}'.
                      format(t, len(x_train), train_err, test_err))
                results['offline_errors', next, :] = dict(t=t,
                                                          train=train_err,
                                                          test=test_err)
                # results['offline_errors']['t'].next = t
                # results['offline_errors']['train'].next = train_err
                # results['offline_errors']['test'].next = test_err

            elif offline_test_mode == 'cold_test':
                y_test_guess = torch_loop(model, is_cuda, x_test)
                y_test_guess = _flatten_first_2(y_test_guess)
                if is_cuda:
                    y_test_guess = y_test_guess.cuda()
                    test_err = error_func(
                        y_test_guess,
                        _flatten_first_2(y_test)).data.cpu().numpy()
                else:
                    test_err = error_func(
                        y_test_guess, _flatten_first_2(y_test)).data.numpy()

                print('Iteration {} of {}: Test: {:.3g}'.format(
                    t, len(x_train), test_err))
                results['offline_errors', next, :] = dict(t=t, test=test_err)
                # results['offline_errors']['t'].next = t
                # results['offline_errors']['test'].next = test_err
            else:
                raise Exception('No test_mode: {}'.format(offline_test_mode))
            model.set_state(training_state)

        if t < n_training_samples:
            #model.set_state(initial_state)
            #training_state = model.get_state()
            #model.set_state(training_state)
            out = model.train_it(x_train[t], y_train[t], is_cuda)
            if return_output:
                if is_cuda:
                    results['output', next] = out.data.cpu().numpy()[0]
                else:
                    results['output', next] = out.data.numpy()[0]
            if test_online:
                # print 'Out: {}, Correct: {}'.format(np.argmax(out.data.numpy(), axis=1), torch_str(y_train[t]))
                this_loss = error_func(out, y_train[t]).item()
                err = loss_accumulator(this_loss)
                results['online_errors', next] = this_loss
                if online_test_reporter is not None:
                    results['smooth_online_errors', online_test_reporter,
                            next] = err
                # if t in test_iterations:
                #     print('Iteration {} of {} Online {} Error: {}'.format(t, len(x_train), online_test_reporter, err))

        pi()
        if t >= next_checkpoint or t == n_training_samples:
            results['checkpoints', next, :] = {
                'iter': t,
                'runtime': time.time() - t_start
            }
            yield results
            next_checkpoint = next(checkpoint_generator)
            # yield nested_map(lambda x: np.array(x), results, is_container_func=lambda x: isinstance(x, dict))

    import pdb
    pdb.set_trace()
Пример #11
0
def train_and_test_predictor(f_train,
                             f_predict,
                             losses,
                             training_data_gen,
                             test_data_gen_constructors,
                             n_training_iters=None,
                             n_test_iters=None,
                             test_checkpoints=('lin', 1000),
                             collapse_loss='mean',
                             progress_update_period='5s',
                             in_test_callback=None,
                             post_test_callback=None,
                             post_train_callback=None,
                             save_train_return=False,
                             measures=None,
                             iterations_to_end=False):
    """
    :param f_train:
    :param f_predict:
    :param losses:
    :param training_data_gen:
    :param test_data_gen_constructors:
    :param samples_count:
    :param n_training_iters:
    :param n_test_iters:
    :param test_checkpoints:
    :param collapse_loss:
    :param progress_update_period:
    :param in_test_callback:
    :param post_test_callback:
    :param post_train_callback:
    :param save_train_return:
    :param measures:
    :return: If yield_array_data is False, an ArrayStruct with fields:
        'training'                          Results recorded during training callbacks
            training_iter                   The iteration of the training callback
        'testing'                           Results recorded during tests
            test_iter                       The index of the test
                'iter'                      The number of training iterations finished at the time that the test is run
                'time'                      The time at which the test is run
                'samples'                   The number of samples seen so far
                'results'                   A structure containing results
                    subset_name             The name of the testing subset e.g. 'train', 'test' (you give subset names in test_data_gen_constructors)
                        'losses'
                            loss_name       The name of the loss function (you provide this in losses)
                            n_tests         The numner of tests that were run for this subset
                        'time'              The time, in seconds, that it took to test on this subset.


        Otherwise, if true, a structure the same object but training_iter and test_iter pushed to the leaf-position
    """

    if measures is None:
        measures = Duck()
    if 'training' not in measures:
        measures['training'] = Duck()
    if 'testing' not in measures:
        measures['testing'] = Duck()

    is_test_time = Checkpoints(test_checkpoints) if not isinstance(
        test_checkpoints, Checkpoints) else test_checkpoints
    pi = ProgressIndicator(n_training_iters,
                           "Training",
                           update_every=progress_update_period)

    for inputs, targets in training_data_gen:
        if is_test_time():

            this_test_measures = measures['testing'].open(next)
            this_test_measures['iter'] = pi.get_iterations()
            this_test_measures['time'] = pi.get_elapsed()
            this_test_measures['results'] = do_test(
                test_subset_generators={
                    subset_name: constructor()
                    for subset_name, constructor in
                    test_data_gen_constructors.items()
                },
                f_predict=f_predict,
                loss_dict=losses,
                n_test_iters=n_test_iters,
                collapse_loss=collapse_loss,
                in_test_callback=in_test_callback,
            )
            if post_test_callback is not None:
                post_test_callback(this_test_measures)
            if iterations_to_end:
                measures_to_yield = measures.arrayify_axis(axis=1,
                                                           subkeys='training')
                measures_to_yield = measures_to_yield.arrayify_axis(
                    axis=1, subkeys='testing', inplace=True)
                yield measures_to_yield.to_struct()
            else:
                yield measures

        train_return = f_train(inputs, targets)
        pi.print_update()
        if save_train_return:
            measures['training', 'returns'] = train_return
        if post_train_callback:
            return_val = post_train_callback(inputs=inputs,
                                             targets=targets,
                                             iter=pi.get_iterations())
            if return_val is not None:
                measures['training', 'callback', next, ...] = return_val
Пример #12
0
def experiment_mnist_eqprop_torch(
    layer_constructor: Callable[[int, LayerParams], IDynamicLayer],
    n_epochs=10,
    hidden_sizes=(500, ),
    minibatch_size=10,  # update mini-batch size
    batch_size=500,  # total batch size
    beta=.5,
    random_flip_beta=True,
    learning_rate=.05,
    n_negative_steps=120,
    n_positive_steps=80,
    initial_weight_scale=1.,
    online_checkpoints_period=None,
    epoch_checkpoint_period=1.0,  #'100s', #{0: .25, 1: .5, 5: 1, 10: 2, 50: 4},
    skip_zero_epoch_test=False,
    n_test_samples=10000,
    prop_direction: Union[str, Tuple] = 'neutral',
    bidirectional=True,
    renew_activations=True,
    do_fast_forward_pass=False,
    rebuild_coders=True,
    l2_loss=None,
    splitstream=False,
    seed=1234,
    prediction_inp_size=17,  ## prediction input size
    delay=18,  ## delay size for the clamped phase
    pred=True,  ## if you want to use the prediction
    check_flg=False,
):
    """
    Replicate the results of Scellier & Bengio:
        Equilibrium Propagation: Bridging the Gap between Energy-Based Models and Backpropagation
        https://www.frontiersin.org/articles/10.3389/fncom.2017.00024/full

    Specifically, the train_model demo here:
        https://github.com/bscellier/Towards-a-Biologically-Plausible-Backprop

    Differences between our code and theirs:
    - We do not keep persistent layer activations tied to data points over epochs.  So our results should only really match for the first epoch.
    - We evaluate training score periodically, rather than online average (however you can see online score by setting online_checkpoints_period to something that is not None)
    """
    torch.manual_seed(seed)
    device = 'cuda' if torch.cuda.is_available(
    ) and USE_CUDA_WHEN_AVAILABLE else 'cpu'
    if device == 'cuda':
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    print(f'Using Device: {device}')

    print('Params:\n' +
          '\n'.join(list(f'  {k} = {v}' for k, v in locals().items())))

    rng = get_rng(seed)
    n_in = 784
    n_out = 10

    dataset = input_data.read_data_sets('MNIST_data', one_hot=True)

    x_train, y_train = torch.tensor(
        dataset.train.images, dtype=torch.float32
    ).to(device), torch.tensor(dataset.train.labels, dtype=torch.float32).to(
        device
    )  #(torch.tensor(a.astype(np.float32)).to(device) for a in dataset.mnist.train.images.xy)
    x_test, y_test = torch.tensor(
        dataset.test.images, dtype=torch.float32).to(device), torch.tensor(
            dataset.test.labels, dtype=torch.float32).to(
                device)  # Their 'validation set' is our 'test set'
    x_val, y_val = torch.tensor(
        dataset.validation.images,
        dtype=torch.float32).to(device), torch.tensor(
            dataset.validation.labels, dtype=torch.float32).to(
                device)  # Their 'validation set' is our 'test set'

    if is_test_mode():
        x_train, y_train, x_test, y_test, x_val, y_val = x_train[:
                                                                 100], y_train[:
                                                                               100], x_test[:
                                                                                            100], y_test[:
                                                                                                         100], x_val[:
                                                                                                                     100], y_val[:
                                                                                                                                 100]
        n_epochs = 1
        n_negative_steps = 3
        n_positive_steps = 3

    layer_sizes = [n_in] + list(hidden_sizes) + [n_out]

    ra = RunningAverage()
    sp = Speedometer(mode='last')
    is_online_checkpoint = Checkpoints(
        online_checkpoints_period, skip_first=skip_zero_epoch_test
    ) if online_checkpoints_period is not None else lambda: False
    is_epoch_checkpoint = Checkpoints(epoch_checkpoint_period,
                                      skip_first=skip_zero_epoch_test)

    training_states = initialize_states(
        layer_constructor=layer_constructor,
        #n_samples=minibatch_size,
        n_samples=batch_size,
        params=initialize_params(layer_sizes=layer_sizes,
                                 initial_weight_scale=initial_weight_scale,
                                 rng=rng))

    # dbplot(training_states[0].params.w_fore[:10, :10], str(rng.randint(265)))

    if isinstance(prop_direction, str):
        fwd_prop_direction, backward_prop_direction = prop_direction, prop_direction
    else:
        fwd_prop_direction, backward_prop_direction = prop_direction

    def do_test():
        # n_samples = n_test_samples if n_test_samples is not None else len(x_test)
        test_error, train_error, val_error = [
            percent_argmax_incorrect(
                run_inference(
                    x_data=x[:n_test_samples],
                    states=initialize_states(
                        layer_constructor=layer_constructor,
                        params=[s.params for s in training_states],
                        n_samples=n_samples),
                    n_steps=n_negative_steps,
                    prop_direction=fwd_prop_direction,
                ), y[:n_samples]).item()
            for x, y in [(x_test, y_test), (x_train, y_train), (x_val, y_val)]
            for n_samples in [
                min(len(x), n_test_samples
                    ) if n_test_samples is not None else len(x)
            ]
        ]  # Not an actal loop... just hack for assignment in comprehensions
        print(
            f'Epoch: {epoch:.3g}, Iter: {i}, Test Error: {test_error:.3g}%, Train Error: {train_error:.3g}, Validation Error: {val_error:.3g}, Mean Rate: {sp(i):.3g}iter/s'
        )

        return dict(iter=i,
                    epoch=epoch,
                    train_error=train_error,
                    test_error=test_error,
                    val_error=val_error), train_error, test_error, val_error

    results = Duck()
    pi = ProgressIndicator(expected_iterations=n_epochs *
                           dataset.train.num_examples / minibatch_size,
                           update_every='10s')

    dy_squared = []
    dy_squared.append(None)
    dy_squared.append(None)
    for i, (ixs, info) in enumerate(
            minibatch_index_info_generator(n_samples=x_train.size()[0],
                                           minibatch_size=batch_size,
                                           n_epochs=n_epochs)):
        epoch = i * batch_size / x_train.shape[0]

        if is_epoch_checkpoint(epoch):
            check_flg = False
            x_train, y_train = shuffle_data(x_train, y_train)
            with pi.pause_measurement():
                results[next, :], train_err, test_err, val_err = do_test()

                ## prepare for saving the parameters
                ws, bs = zip(*((s.params.w_aft, s.params.b)
                               for s in training_states[1:]))

                f = None
                if os.path.isfile(directory + '/log.txt'):
                    f = open(directory + '/log.txt', 'a')
                else:
                    os.mkdir(directory)
                    f = open(directory + '/log.txt', 'w')

                f.write("Epoch: " + str(epoch) + '\n')
                f.write("accuracy for training: " + str(train_err) + '\n')
                f.write("accuracy for testing: " + str(test_err) + '\n')
                f.write("accuracy for validation: " + str(val_err) + '\n')

                f.close()

                np.save(directory + '/w_epoch_' + str(epoch) + '.npy', ws)
                np.save(directory + '/b_epoch_' + str(epoch) + '.npy', bs)
                np.save(directory + '/dy_squared_epoch_' + str(epoch) + '.npy',
                        dy_squared)

                yield results
                if epoch > 100 and results[-1, 'train_error'] > 50:
                    return

        # The Original training loop, just taken out here:
        ixs = ixs.astype(np.int32)  # this is for python version 3.7

        x_data_sample, y_data_sample = x_train[ixs], y_train[ixs]

        training_states, dy_squared = run_eqprop_training_update(
            x_data=x_data_sample,
            y_data=y_data_sample,
            layer_states=training_states,
            beta=beta,
            random_flip_beta=random_flip_beta,
            learning_rate=learning_rate,
            layer_constructor=layer_constructor,
            bidirectional=bidirectional,
            l2_loss=l2_loss,
            renew_activations=renew_activations,
            n_negative_steps=n_negative_steps,
            n_positive_steps=n_positive_steps,
            prop_direction=prop_direction,
            splitstream=splitstream,
            rng=rng,
            prediction_inp_size=prediction_inp_size,
            delay=delay,
            device=device,
            epoch_check=check_flg,
            epoch=epoch,
            pred=pred,
            batch_size=batch_size,
            minibatch_size=minibatch_size,
            dy_squared=dy_squared)
        check_flg = False

        this_train_score = ra(
            percent_argmax_incorrect(output_from_state(training_states),
                                     y_train[ixs]))
        if is_online_checkpoint():
            print(
                f'Epoch {epoch:.3g}: Iter {i}: Score {this_train_score:.3g}%: Mean Rate: {sp(i):.2g}'
            )

        pi.print_update(info=f'Epoch: {epoch}')

    results[next, :], train_err, test_err, val_err = do_test()
    yield results
Пример #13
0
def demo_quantized_convergence(
        quantized_layer_constructor,
        smooth_epsilon=0.5,
        layer_sizes=(500, 500, 10),
        initialize_acts_randomly=False,
        minibatch_size=1,
        # n_steps = 100,
        n_steps=10000,
        initial_weight_scale=1.,
        prop_direction='neutral',
        data_seed=1241,
        param_seed=1237,
        hang=True,
        plot=False):
    """
    """

    smooth_layer_constructor = SimpleLayerController.get_partial_constructor(
        epsilon=smooth_epsilon)

    print('Params:\n' +
          '\n'.join(list(f'  {k} = {v}' for k, v in locals().items())))

    data_rng = get_rng(data_seed)
    param_rng = get_rng(param_seed)

    HISTORY_LEN = n_steps
    N_NEURONS_TO_PLOT = 10

    if is_test_mode():
        n_steps = 10

    pi = ProgressIndicator(update_every='2s', expected_iterations=2 * n_steps)
    n_in, n_out = layer_sizes[0], layer_sizes[-1]

    x_data = data_rng.rand(minibatch_size, n_in)

    params = initialize_params(layer_sizes=layer_sizes,
                               initial_weight_scale=initial_weight_scale,
                               rng=param_rng)

    def run_update(layer_constructor, mode):

        plt.gca().set_prop_cycle(None)

        states = initialize_states(layer_constructor=layer_constructor,
                                   n_samples=minibatch_size,
                                   params=params)

        for t in range(n_steps):

            states = eqprop_step(layer_states=states,
                                 x_data=x_data,
                                 beta=0,
                                 y_data=None,
                                 direction=prop_direction)
            acts = [s.potential for s in states]
            yield acts
            if plot:
                dbplot_collection(
                    [a[0, :N_NEURONS_TO_PLOT] for a in acts],
                    f'{mode} acts',
                    axis='acts',
                    draw_every='5s',
                    cornertext=f'Negative Phase: {t}',
                    plot_type=lambda: MovingPointPlot(
                        buffer_len=HISTORY_LEN,
                        plot_kwargs=dict(linestyle='-.'
                                         if mode == 'Smooth' else '-'),
                        reset_color_cycle=True))
                # dbplot_collection([a[0, :N_NEURONS_TO_PLOT] for a in acts], f'{mode} acts', axis='acts', draw_every=1, cornertext=f'Negative Phase: {t}', plot_type = lambda: MovingPointPlot(buffer_len=HISTORY_LEN, plot_kwargs=dict(linestyle = '-.' if mode=='Smooth' else '-'), reset_color_cycle=True))
            pi()

    smooth_record = list(
        run_update(layer_constructor=smooth_layer_constructor, mode='Smooth'))
    smooth_acts = smooth_record[-1]

    rough_record = list(
        run_update(layer_constructor=quantized_layer_constructor,
                   mode='Rough'))
    rough_acts = rough_record[-1]

    rs_online_errors = np.array(
        [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)]
         for hs_rough, hs_smooth in zip(rough_record, smooth_record)])
    rs_end_errors = np.array(
        [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)]
         for hs_smooth in [smooth_record[-1]] for hs_rough in rough_record])
    rr_end_errors = np.array(
        [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)]
         for hs_smooth in [rough_record[-1]] for hs_rough in rough_record])
    ss_end_errors = np.array(
        [[np.mean(np.abs(hr - hs)) for hr, hs in zip(hs_rough, hs_smooth)]
         for hs_smooth in [smooth_record[-1]] for hs_rough in smooth_record])

    mean_abs_error = np.mean(rs_online_errors, axis=0)
    final_abs_error = rs_online_errors[-1]
    print(
        f'Mean Abs Layerwise Errors: {np.array_str(mean_abs_error, precision=5)}\t Final Layerwise Errors: {np.array_str(final_abs_error,  precision=5)}'
    )

    return rs_online_errors, rs_end_errors, rr_end_errors, ss_end_errors
def demo_quantized_convergence_perturbed(
    quantized_layer_constructor,
    smooth_epsilon=0.5,
    layer_sizes=(500, 500, 10),
    initialize_acts_randomly=False,
    minibatch_size=20,
    # n_steps = 100,
    smooth_longer_factor=10,
    n_steps=500,
    change_frac=0.5,
    beta=.5,  # TODO: revert
    initial_weight_scale=1.,
    prop_direction='neutral',
    data_seed=None,
    param_seed=None,
    hang=True,
):
    """
    """

    perturbation_step = int(n_steps * change_frac)
    smooth_layer_constructor = SimpleLayerController.get_partial_constructor(
        epsilon=smooth_epsilon)

    print('Params:\n' +
          '\n'.join(list(f'  {k} = {v}' for k, v in locals().items())))

    data_rng = get_rng(data_seed)
    param_rng = get_rng(param_seed)

    HISTORY_LEN = n_steps
    N_NEURONS_TO_PLOT = 10

    if is_test_mode():
        n_steps = 10
        perturbation_step = 5

    pi = ProgressIndicator(update_every='2s', expected_iterations=2 * n_steps)
    n_in, n_out = layer_sizes[0], layer_sizes[-1]

    x_data = to_default_tensor(data_rng.rand(minibatch_size, n_in))

    y_data = torch.zeros((minibatch_size, n_out))
    y_data[np.arange(len(y_data)),
           np.random.choice(n_out, size=minibatch_size)] = 1

    params = initialize_params(layer_sizes=layer_sizes,
                               initial_weight_scale=initial_weight_scale,
                               rng=param_rng)

    def run_update(layer_constructor, mode):

        if PLOT:
            plt.gca().set_color_cycle(None)

        states = initialize_states(layer_constructor=layer_constructor,
                                   n_samples=minibatch_size,
                                   params=params)

        for t in range(n_steps):

            for _ in range(
                    smooth_longer_factor) if mode == 'Smooth' else range(1):
                if t < perturbation_step:
                    states = eqprop_step(layer_states=states,
                                         x_data=x_data,
                                         beta=0,
                                         y_data=None,
                                         direction=prop_direction)
                else:
                    states = eqprop_step(layer_states=states,
                                         x_data=x_data,
                                         beta=beta,
                                         y_data=y_data,
                                         direction=prop_direction)

            acts = [s.potential for s in states]
            yield acts[1:]
            if PLOT:
                if do_every('2s'):
                    dbplot([put_vector_in_grid(a[0]) for a in acts],
                           f'acts-{mode}',
                           title=f'{mode} Iter-{t}')
                if mode == 'Rough':
                    dbplot([
                        states[1].stepper.step_size.mean(),
                        states[2].stepper.step_size.mean()
                    ],
                           'step size',
                           draw_every='2s')
            pi()

        if PLOT:
            dbplot_redraw_all()

    rough_record = list(
        run_update(layer_constructor=quantized_layer_constructor,
                   mode='Rough'))

    smooth_record = list(
        run_update(layer_constructor=smooth_layer_constructor, mode='Smooth'))

    smooth_neg_endpoint = smooth_record[perturbation_step - 1]
    smooth_pos_endpoint = smooth_record[-1]

    smooth_endpoint_delta = np.concatenate([
        sp - sn
        for sp, sn in izip_equal(smooth_pos_endpoint, smooth_neg_endpoint)
    ],
                                           axis=1)

    rough_endpoint_delta = np.concatenate([
        sp - sn for sp, sn in izip_equal(rough_record[-1], rough_record[
            perturbation_step - 1])
    ],
                                          axis=1)

    distance_to_converged = np.array(
        [[
            torch.mean(abs(hr - hs)).item()
            for hr, hs in zip(hs_rough, smooth_neg_endpoint)
        ] for hs_rough in rough_record[:perturbation_step]] + [[
            torch.mean(abs(hr - hs)).item()
            for hr, hs in zip(hs_rough, smooth_pos_endpoint)
        ] for hs_rough in rough_record[perturbation_step:]]
    )  # (n_steps, n_layers) array indicating convergence to the fixed point for each non-input layer.

    # plt.semilogy(distance_to_converged)
    # plt.show()

    if PLOT:
        dbplot(
            distance_to_converged,
            'errors',
            plot_type=lambda: LinePlot(x_axis_type='log', y_axis_type='log'),
            legend=[f'Layer {i+1}' for i in range(len(layer_sizes))])

    mean_abs_error = np.mean(distance_to_converged, axis=0)
    final_abs_error = distance_to_converged[-1]
    print(
        f'Mean Abs Layerwise Errors: {np.array_str(mean_abs_error, precision=5)}\t Final Layerwise Errors: {np.array_str(final_abs_error,  precision=5)}'
    )

    return distance_to_converged, rough_endpoint_delta, smooth_endpoint_delta