def train_sequence(self, eta=0.01, num_epochs=1000, term=0.1, verbose=0): sequence = mu.random_patterns(self.layer_size(), self.memory_size) keys = sequence[:, :-1] next_keys = sequence[:, 1:] error_history = np.empty((num_epochs, )) # history_fun = lambda e: np.fabs(e).max() history_fun = lambda e: (e**2).sum() # train k -> v with gradient descent, incorporating passive loop for epoch in range(num_epochs): x = mu.forward_pass(keys, self.W_sq, self.f_sq) e = x[self.L_sq] - next_keys E = 0.5 * (e**2).sum() # early termination: if (np.fabs(e) < term).all(): break # Progress update: if epoch % int(num_epochs / 10) == 0: if verbose > 0: print('%d: %f' % (epoch, E)) if verbose > 1: print(x[self.L_sq].T) print(value.T) # Weight update: y = mu.backward_pass(x, e, self.W_sq, self.df_sq) G = mu.error_gradient(x, y) for k in self.W_sq: self.W_sq[k] += -eta * G[k] # learning curve error_history[epoch] = history_fun(e) # update history self.sequence_error_history = error_history[:epoch] # update key tracking self.first_key = sequence[:, [0]] self.last_key = sequence[:, [0]] self.current_key = sequence[:, [0]]
def __init__(self, N_kv, f_kv, df_kv, af_kv, N_sq, f_sq, df_sq, af_sq, memory_size): # N[k]: size of k^th layer (len L+1) # f[k]: activation function of k^th layer (len L) # df[k]: derivative of f[k] (len L) # af[k]: inverse of f[k] (len L) self.L_kv = len(N_kv) - 1 self.N_kv = N_kv self.f_kv = f_kv self.df_kv = df_kv self.af_kv = af_kv self.L_sq = len(N_sq) - 1 self.N_sq = N_sq self.f_sq = f_sq self.df_sq = df_sq self.af_sq = af_sq self.memory_size = memory_size self.W_kv = mu.init_randn_W(N_kv) self.W_sq = mu.init_randn_W(N_sq) # key tracking for write and passive mode (reset by sequence training) self.first_key = mu.random_patterns(N_kv[0], 1) self.last_key = self.first() self.current_key = self.first() # for batch update in passive mode self.dW_kv = {k: np.zeros(self.W_kv[k].shape) for k in self.W_kv} # for visualization (not operation!) self.sequence_error_history = np.empty((0, )) self.write_error_history = np.empty((0, )) self.cheat_error_history = np.empty((0, 0)) self.kv_cheat = {} self.failed_write_count = 0
def kvnet_test(): N = [32] * 3 L = len(N) - 1 f, df, af = [np.tanh] * L, [mu.tanh_df] * L, [np.arctanh] * L f = {k: np.tanh for k in range(1, L + 1)} df = {k: mu.tanh_df for k in range(1, L + 1)} af = {k: np.arctanh for k in range(1, L + 1)} kvn = BackPropKVNet(N=N, f=f, df=df, af=af) # num_patterns = 5 num_patterns = 1 values = mu.random_patterns(N[0], num_patterns) keys = np.empty((N[0], num_patterns)) # k = kvn.first() k = mu.random_patterns(N[0], 1) print(k) for m in range(values.shape[1]): keys[:, [m]] = k kvn.write(k, values[:, [m]], verbose=1, term=0.5, eta=0.001, num_epochs=100000) # kvn.passive_ticks(10000) k = kvn.next(k) # memory accuracy net_values = kvn.read(keys) print('final accuracy:') print(np.fabs(values - net_values).max(axis=0)) # show write histories # print('target vs read') # print(values[:,-1].T) # print(kvn.read(keys[:,-1]).T) h = [None, None] for c in range(kvn.cheat_error_history.shape[0]): h[0] = plt.plot(kvn.cheat_error_history[c, :], 'r.')[0] h[1] = plt.plot(kvn.write_error_history, 'b.')[0] # plt.plot(kvn.cheat_error_history.sum(axis=0) + kvn.write_error_history,'g.') # print(kvn.write_error_history) plt.xlabel('Training iteration') plt.ylabel('L_inf error on previous and current memories') plt.title('learning curves during write(k,v)') plt.legend(h, ['||read(k_prev)-sign(read(k_prev))||', '||read(k)-v||']) plt.show()
def train_sequence(self, memory_size, verbose=0): sequence = mu.random_patterns(self.layer_size(), memory_size) self.first_key = sequence[:, [0]] self.last_key = sequence[:, [0]] self.current_key = sequence[:, [0]] # one-shot associations for m in range(memory_size): self.gnn.set_pattern(sequence[:, [m]]) if m > 0: self.gnn.associate() self.gnn.advance_tick_mark()
def __init__(self, N, f, df, af): # N[k]: size of k^th layer (len L+1) # f[k]: activation function of k^th layer (len L) # df[k]: derivative of f[k] (len L) # af[k]: inverse of f[k] (len L) self.L = len(N) - 1 self.N = N self.f = f self.df = df self.af = af self.W = mu.init_randn_W(N) self.first_key = mu.random_patterns(N[0], 1) self.write_error_history = np.empty((0, ))
def __init__(self, N, k_d=0, k_theta=0, k_w=1. / 3, beta_1=1, beta_2=1. / 2): # parameters as in galis references self.gnn = gn.GALISNN(N, k_d, k_theta, k_w, beta_1, beta_2) # key tracking for write and passive mode (reset by sequence training) self.first_key = mu.random_patterns(N, 1) self.last_key = self.first() self.current_key = self.first()
def bmnet_test(): num_patterns = 10 net = make_tanh_bmn(layer_size=32, memory_size=num_patterns, kv_layers=3, sq_layers=3) values = mu.random_patterns(net.layer_size(), num_patterns) keys = np.empty((net.layer_size(), num_patterns)) k = net.first() for m in range(values.shape[1]): print('writing pattern %d' % m) keys[:, [m]] = k net.write(k, values[:, [m]], verbose=1, term=0.5, eta=0.001, num_epochs=10000) # net.passive_ticks(10000) k = net.next(k) # memory accuracy net_values = net.read(keys) print('final accuracy:') print(np.fabs(values - net_values).max(axis=0)) # show write histories print('target vs read') print(values[:, -1].T) print(net.read(keys[:, -1]).T) print((values == net.read(keys)).all()) h = [None, None] for c in range(net.cheat_error_history.shape[0]): h[0] = plt.plot(net.cheat_error_history[c, :], 'r.')[0] h[1] = plt.plot(net.write_error_history, 'b.')[0] # plt.plot(net.cheat_error_history.sum(axis=0) + net.write_error_history,'g.') # print(net.write_error_history) plt.xlabel('Training iteration') plt.ylabel('L_inf error on previous and current memories') plt.title('learning curves during write(k,v)') plt.legend(h, ['||read(k_prev)-sign(read(k_prev))||', '||read(k)-v||']) plt.show()
def pooled_array_write_trials(): # simple kv experiment: # learn a random sequence of kv mappings. # At each one, assess recall on all learned so far. # Between each one, allow some number of random passive ticks. # N = 8 # mmn = MockMemoryNet(N, noise=0.005) # mnh = MemoryNetHarness(mmn) # trial_data = kv_trial_data(N, num_mappings=10, max_passive_ticks=5) # acc = run_kv_trial(mnh, *trial_data) # print(acc) run_trial_fun = run_array_write_trial # run_trial_fun = run_array_trial num_procs = 9 array_length_grid = [4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,65] write_epochs_grid = [500,1000,5000,10000,50000] layer_size_grid = [32] num_passive_ticks=0 num_trials = len(array_length_grid)*len(layer_size_grid)*len(write_epochs_grid) trial_funs = [run_trial_fun] * num_trials trial_kwargs = [] seed = None params = [] for array_length in array_length_grid: for layer_size in layer_size_grid: values = mu.random_patterns(layer_size, num_patterns=array_length) for write_epochs in write_epochs_grid: params.append((array_length,layer_size, write_epochs)) # net = bmn.make_tanh_bmn(layer_size, memory_size=array_length, kv_layers=3, sq_layers=3) net = gmn.make_tanh_gmn(layer_size, memory_size=array_length, kv_layers=3) # passive_tick_fun = mu.constant_tick_fun(num_passive_ticks) # mnh = MemoryNetHarness(net, passive_tick_fun=passive_tick_fun, seed=seed) mnh = MemoryNetHarness(net, num_passive_ticks=num_passive_ticks, seed=seed) trial_kwargs.append({ 'mnh': mnh, 'values': values, 'write_epochs':write_epochs, 'params': params[-1], 'verbose': 1, }) results = pool_trials(num_procs, trial_funs, trial_kwargs, results_file_name=None) print('array_length;layer_size;write_epochs') print(np.array(params).T) print('seq_acc, kv_acc, seq_mem, kv_mem') for idx in range(4): if idx < 2: print([r[idx][-1] for r in results]) else: print([r[idx] for r in results]) # confirm perfect accuracy iff memory check passes for r in results: assert((r[0][-1]==1. and r[1][-1]==1.) == (r[2] and r[3])) # save results # save_pkl_file('bmn.pkl',{'params':params, 'results':results}) save_pkl_file('gmn.pkl',{'params':params, 'results':results})