Exemplo n.º 1
0
def run_trial(f_dfs):
    X = genX(n)
    Y = genY(X)
    batch_fn = make_flat_batch_fn(X, Y, n_per_batch)

    Xvalid = genX(1000)
    Yvalid = genY(Xvalid)

    # Yrms = rms(Y, axis=1).mean()
    # Yvalidrms = rms(Yvalid, axis=1).mean()

    # --- initial weights
    weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng)
    # print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(weights)))

    # genB = lambda shape: initial_w(shape, rng=rng, kind='ortho', normkind='rightmean', scale=0.2)
    genB = lambda shape: initial_w(
        shape, rng=rng, kind='ortho', normkind='rightmean', scale=0.3)
    directBs = [genB((dout, dhid)) for dhid in dhids]

    def test_derivative(f, df):

        get_network = lambda **kwargs: Network(
            weights, f=f, df=df, biases=None, noise=0, **kwargs)

        bp_learner = BPLearner(get_network(),
                               squared_cost,
                               rms_error,
                               eta=eta,
                               alpha=alpha,
                               name='BP')
        bp_learner.weight_norms = []

        fas_learner = FASkipLearner(get_network(),
                                    squared_cost,
                                    rms_error,
                                    eta=eta,
                                    alpha=alpha,
                                    name='DFA')
        fas_learner.Bs = directBs

        learners = [bp_learner, fas_learner]
        for learner in learners:
            learner.train(1, batch_fn)

        for learner in learners:
            print(", ".join("||W%d|| = %0.3f" % (i, norm(w))
                            for i, w in enumerate(learner.network.weights)))

        return learners

    results = []
    for f_df in f_dfs:
        results.append(test_derivative(*f_df))

    return results
Exemplo n.º 2
0
def run_trial(f_dfs):
    trial_seed = rng.randint(2**30)

    # --- initial weights
    weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng)
    # print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(weights)))

    genB = lambda shape: initial_w(
        shape, kind='ortho', normkind='rightmean', scale=0.2, rng=rng)
    directBs = [genB((dout, dhid)) for dhid in dhids]

    def test_derivative(f, df):
        # batch_fn = make_flat_batch_fn(X, Y, n_per_batch)

        get_network = lambda **kwargs: Network(
            weights, f=f, df=df, biases=None, noise=0, **kwargs)

        bp_learner = BPLearner(get_network(),
                               cost,
                               error,
                               eta=eta,
                               alpha=alpha,
                               name='BP')
        bp_learner.weight_norms = []

        fas_learner = FASkipLearner(get_network(),
                                    cost,
                                    error,
                                    eta=eta,
                                    alpha=alpha,
                                    name='FA')
        fas_learner.Bs = directBs

        learners = [bp_learner, fas_learner]
        for learner in learners:
            batch_fn = make_random_batch_fn(
                trainX,
                trainY,
                n_per_batch,
                rng=np.random.RandomState(trial_seed))
            learner.train(epochs, batch_fn, test_set=test_set)

        # for learner in learners:
        #     print(", ".join("||W%d|| = %0.3f" % (i, norm(w))
        #                     for i, w in enumerate(learner.network.weights)))

        return learners

    results = []
    for f_df in f_dfs:
        results.append(test_derivative(*f_df))

    return results
Exemplo n.º 3
0
def test_initial_w_ortho_angle():
    ws = [initial_w((2, 2), kind='ortho') for _ in range(100000)]

    angle0 = [np.arctan2(*w[0]) for w in ws]
    angle1 = [np.arctan2(*w[1]) for w in ws]

    plt.subplot(211)
    plt.hist(angle0, bins=51)

    plt.subplot(212)
    plt.hist(angle1, bins=51)

    plt.show()
Exemplo n.º 4
0
def test_derivative(f, df):
    get_network = lambda **kwargs: Network(
        weights, f=f, df=df, biases=None, noise=0, **kwargs)

    bp_learner = BPLearner(get_network(),
                           cost,
                           error,
                           eta=eta,
                           alpha=alpha,
                           name='BP')
    bp_learner.weight_norms = []

    # fa_learner = FALearner(
    #     get_network(), squared_cost, rms_error, eta=eta, alpha=alpha)
    # fa_learner.Bs = [initial_w((j, i), kind='ortho', scale=2)
    #                  for i, j in zip(dhids, dhids[1:] + [dout])]
    # fa_learner.bp_angles = []
    # # fa_learner.pbp_angles = []

    fas_learner = FASkipLearner(get_network(),
                                cost,
                                error,
                                eta=eta,
                                alpha=alpha,
                                name='FA')
    genB = lambda shape: initial_w(
        shape, kind='ortho', normkind='rightmean', scale=0.2)
    fas_learner.Bs = [genB((dout, dhid)) for dhid in dhids]

    # learners = [bp_learner, fa_learner]
    learners = [bp_learner, fas_learner]
    for learner in learners:
        learner.train(epochs, batch_fn, test_set=test_set)

    for learner in learners:
        print(", ".join("||W%d|| = %0.3f" % (i, norm(w))
                        for i, w in enumerate(learner.network.weights)))

    return learners
Exemplo n.º 5
0
def objective(args):
    sargs = arg_string(args)
    w_kind = args['w_kind']
    w_scale = args['w_scale']
    b_kind = args['b_kind']
    # b_scale = args['b_scale']
    eta = args['eta']
    # alpha = args['alpha']
    alpha = 0
    # eta = [args['eta0'], args['eta1'], args['eta2']]

    # max_cost = -np.inf
    costs = []
    for _ in range(5):
        f, df = static_f_df(tau_rc=0.05, **args['neuron_type'])

        weights = initial_weights(sizes, kind=w_kind, scale=w_scale, rng=rng)

        # --- learners
        network = Network(weights, f=f, df=df, biases=None)
        # network = Network(weights, f=f, df=df, biases=None, noise=1.)
        learner = Learner(network, squared_cost, rms_error, eta=eta, alpha=alpha)
        # learner.Bs = [initial_w((dout, dhid), kind=b_kind, scale=b_scale) for dhid in dhids]
        learner.Bs = [initial_w((dout, dhid), kind=b_kind, normkind='rightmean') for dhid in dhids]

        learner.train(1, batch_fn, verbose=0)

        y = learner.network.predict(Xvalid)
        cost = rms(y - Yvalid, axis=1).mean() / Yvalidrms
        costs.append(cost)

    costs = sorted(costs)[1:-1]  # drop largest and smallest
    cost = np.mean(costs)
    status = hyperopt.STATUS_OK if np.isfinite(cost) else hyperopt.STATUS_FAIL
    print("%s: %0.3e" % (sargs, cost))

    return dict(loss=cost, status=status)
Exemplo n.º 6
0
    batch_fn = make_random_batch_fn(trainX,
                                    trainY,
                                    n_per_batch,
                                    rng=np.random.RandomState(5))

    sizes = [din] + dhids + [dout]

    # --- initial weights
    weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng)
    print(", ".join("||W%d|| = %0.3f" % (i, norm(w))
                    for i, w in enumerate(weights)))

    # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.2)
    # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.4)
    # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.1)
    genB = lambda shape: initial_w(
        shape, kind='identity', normkind='rightmean', scale=0.2)
    Bs = [genB((d1, d0)) for d0, d1 in zip(dhids, dhids[1:] + [dout])]
    Bs_direct = [genB((dout, dhid)) for dhid in dhids]

    def combine_Bs(Bs):
        Bs_combined = [Bs[-1]]
        for B in Bs[-2::-1]:
            Bs_combined.insert(0, np.dot(Bs_combined[0], B))
        return Bs_combined

    for B, Bc, Bd in zip(Bs, combine_Bs(Bs), Bs_direct):
        B *= norm(B) / norm(Bc)

    print("B norms: %s" % ", ".join("%0.3f" % norm(B) for B in Bs))
    print("Bc norms: %s" % ", ".join("%0.3f" % norm(B)
                                     for B in combine_Bs(Bs)))
Exemplo n.º 7
0
                                 cost,
                                 error,
                                 eta=eta,
                                 alpha=alpha)
bp_learner = BPLearner(get_network(), cost, error, eta=eta, alpha=alpha)
# bp_learner.weight_norms = []

# fa_learner = FALearner(
#     get_network(), cost, error, eta=eta, alpha=alpha)
# fa_learner.Bs = [initial_w((j, i), kind='ortho', scale=2)
#                  for i, j in zip(dhids, dhids[1:] + [dout])]
# fa_learner.bp_angles = []

fas_learner = FASkipLearner(get_network(), cost, error, eta=eta, alpha=alpha)
fas_learner.Bs = [
    initial_w((dout, dhid), kind='ortho', scale=2) for dhid in dhids
]

# epochs = 1
# epochs = 3
epochs = 10
# epochs = 30

# learners = [shallow_learner, bp_learner, fas_learner]
learners = [bp_learner, fas_learner]

for learner in learners:
    learner.train(epochs, batch_fn, test_set=test_subset)

# --- plot
plt.figure(1)
Exemplo n.º 8
0
                             error,
                             eta=0.5 * eta,
                             momentum=momentum,
                             name='BPLocal')

fas_learner = FASkipLearner(
    get_network(),
    cost,
    error,
    # eta=eta, momentum=momentum, name='FA')
    eta=0.5 * eta,
    momentum=momentum,
    name='FA')
# genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.2, rng=rng)
# genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.15, rng=rng)
genB = lambda shape: initial_w(
    shape, kind='ortho', normkind='rightmean', scale=0.1, rng=rng)
fas_learner.Bs = [genB((dout, dhid)) for dhid in dhids]

# learners = [bp_learner]
# learners = [bpl_learner]
# learners = [fas_learner]
learners = [bp_learner, bpl_learner, fas_learner]

batch_size = 10
batch_fn = make_flat_batch_fn(Xtrain, Ytrain, batch_size)
for learner in learners:
    print("%s: %s" % (learner.name, weight_norm_s(learner.network.weights)))
    if hasattr(learner, 'Bs'):
        print("%s B: %s" % (learner.name, weight_norm_s(learner.Bs)))

    # learner.train(5, batch_fn, test_set=(Xtest, Ytest))
Exemplo n.º 9
0
# n_per_batch = 20
n_per_batch = 100
batch_fn = make_flat_batch_fn(trainX, trainY, n_per_batch)

learner = FASkipLearner(get_network(),
                        cost,
                        error,
                        eta=eta,
                        alpha=alpha,
                        momentum=momentum)

if 0:
    Bs = [np.zeros((n_out, n_hid)) for n_hid in n_hids]
elif 1:
    Bs = [
        initial_w((n_out, n_hid), kind='ortho', scale=0.1) for n_hid in n_hids
    ]
else:
    scale = 0.1
    Bs = []
    for h in (h0, h1):
        hmean, hstd = h.mean(axis=0), h.std(axis=0)
        hsilent = hstd <= 1e-16
        h = (h - hmean) / np.maximum(hstd, 1e-16)
        # h = (h - hmean) * (scale / np.maximum(hstd, 1e-16))

        B = np.dot(trainT.T, h) / trainT.sum(0)[:, None]
        if hsilent.sum() > 0:
            Bstd = B[:, ~hsilent].std(axis=0).mean()
            B[:, hsilent] = rng.normal(scale=Bstd,
                                       size=(B.shape[0], hsilent.sum()))
Exemplo n.º 10
0
                                 name='Shallow')
bp_learner = BPLearner(get_network(),
                       squared_cost,
                       rms_error,
                       eta=eta,
                       alpha=alpha,
                       name='BP')
bp_learner.weight_norms = []

fa_learner = FALearner(get_network(),
                       squared_cost,
                       rms_error,
                       eta=eta,
                       alpha=alpha)
fa_learner.Bs = [
    initial_w((j, i), kind='ortho', scale=2)
    for i, j in zip(dhids, dhids[1:] + [dout])
]
fa_learner.bp_angles = []
# fa_learner.pbp_angles = []

fas_learner = FASkipLearner(get_network(),
                            squared_cost,
                            rms_error,
                            eta=eta,
                            alpha=alpha,
                            name='Our model')
# fas_learner.Bs = [initial_w((dout, dhid), kind='ortho', scale=2) for dhid in dhids]
fas_learner.Bs = [
    initial_w((dout, dhid), kind='ortho', normkind='rightmean')
    for dhid in dhids