def test_derivative(f, df): get_network = lambda **kwargs: Network( weights, f=f, df=df, biases=None, noise=0, **kwargs) bp_learner = BPLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='BP') bp_learner.weight_norms = [] fas_learner = FASkipLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='DFA') fas_learner.Bs = directBs learners = [bp_learner, fas_learner] for learner in learners: learner.train(1, batch_fn) for learner in learners: print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(learner.network.weights))) return learners
def test_derivative(f, df): get_network = lambda **kwargs: Network( weights, f=f, df=df, biases=None, noise=0, **kwargs) bp_learner = BPLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='BP') bp_learner.weight_norms = [] # fa_learner = FALearner( # get_network(), squared_cost, rms_error, eta=eta, alpha=alpha) # fa_learner.Bs = [initial_w((j, i), kind='ortho', scale=2) # for i, j in zip(dhids, dhids[1:] + [dout])] # fa_learner.bp_angles = [] # # fa_learner.pbp_angles = [] fas_learner = FASkipLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='FA') genB = lambda shape: initial_w( shape, kind='ortho', normkind='rightmean', scale=0.2) fas_learner.Bs = [genB((dout, dhid)) for dhid in dhids] # learners = [bp_learner, fa_learner] learners = [bp_learner, fas_learner] for learner in learners: learner.train(epochs, batch_fn, test_set=test_set) for learner in learners: print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(learner.network.weights))) return learners
def test_derivative(f, df): # batch_fn = make_flat_batch_fn(X, Y, n_per_batch) get_network = lambda **kwargs: Network( weights, f=f, df=df, biases=None, noise=0, **kwargs) bp_learner = BPLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='BP') bp_learner.weight_norms = [] fas_learner = FASkipLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='FA') fas_learner.Bs = directBs learners = [bp_learner, fas_learner] for learner in learners: batch_fn = make_random_batch_fn( trainX, trainY, n_per_batch, rng=np.random.RandomState(trial_seed)) learner.train(epochs, batch_fn, test_set=test_set) # for learner in learners: # print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) # for i, w in enumerate(learner.network.weights))) return learners
eta=eta, alpha=alpha, name='global FA') fa_learner.Bs = [np.array(B) for B in Bs] fal_learner = FALocalLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='local FA') fal_learner.Bs = [np.array(B) for B in Bs] fas_learner = FASkipLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='direct FA') fas_learner.Bs = [np.array(B) for B in Bs_direct] # learners = [bp_learner] # learners = [fas_learner] # learners = [bp_learner, fas_learner] # learners = [bp_learner, fa_learner] learners = [bp_learner, fa_learner, fal_learner, fas_learner] for learner in learners: learner.train(epochs, batch_fn, test_set=test_set) for learner in learners: print(", ".join("||W%d|| = %0.3f" % (i, norm(w))
# --- learners shallow_learner = ShallowLearner(get_network(), cost, error, eta=eta, alpha=alpha) bp_learner = BPLearner(get_network(), cost, error, eta=eta, alpha=alpha) # bp_learner.weight_norms = [] # fa_learner = FALearner( # get_network(), cost, error, eta=eta, alpha=alpha) # fa_learner.Bs = [initial_w((j, i), kind='ortho', scale=2) # for i, j in zip(dhids, dhids[1:] + [dout])] # fa_learner.bp_angles = [] fas_learner = FASkipLearner(get_network(), cost, error, eta=eta, alpha=alpha) fas_learner.Bs = [ initial_w((dout, dhid), kind='ortho', scale=2) for dhid in dhids ] # epochs = 1 # epochs = 3 epochs = 10 # epochs = 30 # learners = [shallow_learner, bp_learner, fas_learner] learners = [bp_learner, fas_learner] for learner in learners: learner.train(epochs, batch_fn, test_set=test_subset)
error, eta=eta, momentum=momentum, name='BP') bpl_learner = BPLocalLearner(get_network(), cost, error, eta=0.5 * eta, momentum=momentum, name='BPLocal') fas_learner = FASkipLearner( get_network(), cost, error, # eta=eta, momentum=momentum, name='FA') eta=0.5 * eta, momentum=momentum, name='FA') # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.2, rng=rng) # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.15, rng=rng) genB = lambda shape: initial_w( shape, kind='ortho', normkind='rightmean', scale=0.1, rng=rng) fas_learner.Bs = [genB((dout, dhid)) for dhid in dhids] # learners = [bp_learner] # learners = [bpl_learner] # learners = [fas_learner] learners = [bp_learner, bpl_learner, fas_learner] batch_size = 10
bpl_learner = BPLocalLearner(get_network(), cost, error, # eta=eta, momentum=momentum, name='local BP') eta=0.1*eta, momentum=momentum, name='local BP') # eta=0.05*eta, momentum=momentum, name='local BP') # bp2_learner = BPLocalLearner2(get_network(), cost, error, name='local BP 2', # # eta=eta, momentum=momentum) # # eta=0.1*eta, momentum=momentum) # eta=0.01*eta, momentum=momentum) fa_learner = FALearner(get_network(), cost, error, eta=eta, momentum=momentum, name='GFA') # eta=0.5*eta, momentum=momentum, name='GFA') fa_learner.Bs = Bs fas_learner = FASkipLearner(get_network(), cost, error, eta=eta, momentum=momentum, name='DFA') # eta=0.5*eta, momentum=momentum, name='DFA') # eta=0.25*eta, momentum=momentum, name='DFA') # eta=0.1*eta, momentum=momentum, name='DFA') fas_learner.Bs = Bs_direct learners = [bp_learner, bpl_learner, fa_learner, fas_learner] # batch_size = 10 batch_size = 100 batch_fn = make_flat_batch_fn(Xtrain, Ytrain, batch_size) for learner in learners: print("%s: %s" % (learner.name, weight_norm_s(learner.network.weights))) if hasattr(learner, 'Bs'): print("%s B: %s" % (learner.name, weight_norm_s(learner.Bs)))
# eta = 2e-5 # eta = [2e-4, 2e-4, 2e-5] eta = [2e-3, 2e-3, 2e-5] # momentum = 0 momentum = 0.5 epochs = 10 # n_per_batch = 20 n_per_batch = 100 batch_fn = make_flat_batch_fn(trainX, trainY, n_per_batch) learner = FASkipLearner(get_network(), cost, error, eta=eta, alpha=alpha, momentum=momentum) if 0: Bs = [np.zeros((n_out, n_hid)) for n_hid in n_hids] elif 1: Bs = [ initial_w((n_out, n_hid), kind='ortho', scale=0.1) for n_hid in n_hids ] else: scale = 0.1 Bs = [] for h in (h0, h1): hmean, hstd = h.mean(axis=0), h.std(axis=0) hsilent = hstd <= 1e-16
# --- nonlinearity tau_rc = 0.05 # amp = 0.01 amp = 0.025 f, df = static_f_df('liflinear', tau_rc=tau_rc, amplitude=amp) # --- learners get_network = lambda **kwargs: Network( weights, f=f, df=df, biases=None, noise=0, **kwargs) bp_learner = BPLearner( get_network(), cost, error, eta=eta, alpha=alpha, name='BP') # bp_learner.weight_norms = [] bp_learner.delta_norms = [] fas_learner = FASkipLearner( get_network(), cost, error, eta=eta, alpha=alpha, name='DFA') fas_learner.Bs = [B.copy() for B in Bs_direct] fas_learner.delta_norms = [] fas_learner.bpd_angles = [] fay_learner = FASymmLearner( get_network(), cost, error, eta=eta, alpha=alpha, name='Symmetric ADFA') fay_learner.Bs = [B.copy() for B in Bs_direct] fay_learner.delta_norms = [] fay_learner.bpd_angles = [] fah_learner = FAHebbLearner( get_network(), cost, error, eta=eta, alpha=alpha, name='Hebbian ADFA') fah_learner.Bs = [B.copy() for B in Bs_direct] fah_learner.delta_norms = [] fah_learner.bpd_angles = []
name='FA') fa_learner.Bs = Bs fa_learner.delta_norms = [] fa_learner.bp_angles = [] fa_learner.bpd_angles = [] # fal_learner = FALocalLearner( # Identical to FASkipLearner # get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='LFA') # fal_learner.Bs = Bs # fal_learner.delta_norms = [] # fal_learner.bp_angles = [] # fal_learner.bpd_angles = [] fas_learner = FASkipLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='DFA') fas_learner.Bs = Bs_direct fas_learner.delta_norms = [] fas_learner.bpd_angles = [] fas_learner.bpu_angles = [] e = WBeigs(fas_learner) print("Eigs0: %0.3f, %0.3f [%0.3f, %0.3f]" % (e.mean(), e.std(), e.min(), e.max())) # learners = [bp_learner, fa_learner] learners = [fas_learner] # learners = [bp_learner, fa_learner, fas_learner] for learner in learners:
fa_learner = FALearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha) fa_learner.Bs = [ initial_w((j, i), kind='ortho', scale=2) for i, j in zip(dhids, dhids[1:] + [dout]) ] fa_learner.bp_angles = [] # fa_learner.pbp_angles = [] fas_learner = FASkipLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='Our model') # fas_learner.Bs = [initial_w((dout, dhid), kind='ortho', scale=2) for dhid in dhids] fas_learner.Bs = [ initial_w((dout, dhid), kind='ortho', normkind='rightmean') for dhid in dhids ] # V0init = rng.uniform(-1, 1, size=(dhid, din)) # Vinit = rng.uniform(-1, 1, size=(dout, dhid)) # V0init = orthogonalize(rng.uniform(-1, 1, size=(dhid, din))) # Vinit = orthogonalize(rng.uniform(-1, 1, size=(dout, dhid))) # V0init = np.linalg.pinv(W0init) # Vinit = np.linalg.pinv(Winit)