class Agent: def __init__(self, num_params, num_inputs, num_actions): self.num_units = num_params//num_actions self.num_inputs = num_inputs self.num_actions = num_actions self.echo = ESN( N = self.num_units, dt = 1.0, tau = 5.0, alpha = 0.1, beta = 0.9, epsilon = 1.0e-10) self.input_weights = np.random.randn(self.num_inputs, self.num_units) self.out_weights = np.random.randn(self.num_units, self.num_actions) def save(self, filename): with open(filename, "w") as f: yaml.dump(self, f) @staticmethod def load(filename): with open(filename, "r") as f: return yaml.load(f, Loader=yaml.UnsafeLoader) def setParams(self, params): self.out_weights = params.reshape(self.num_units, self.num_actions).copy() def reset(self): self.echo.reset() def step(self, status): units_status = np.matmul(status.reshape(1, -1), self.input_weights) units = self.echo.step(units_status.ravel()) action = np.matmul(units, self.out_weights) action = np.hstack([ np.tanh(action[0]), sigmoid(action[1])]) action *= [0.1, 0.5] return action
#train for epoch in range(epochs): for ind in range(N_max - 1): inp = input_train[ind].reshape(-1, 1) desired_target = target_train[ind + 1].reshape(-1, 1) if ind >= esn.washout: out = esn.online_train(inp, desired_target) mse = esn.mse(out, desired_target) print(ind) print("target", desired_target.T) print("output", out.T) print("mse", mse) print("\n") esn.reset() print("hid", esn.W) print("in", esn.W_in) print("out", esn.W_out) #esn.store_net("forza.pickle") out_name = os.path.join(output_dir, filenames[index]) esn.save_genome("%s_2.params" % out_name) #for i in range(len(filenames)): input_test, target_test, N_max_t = read_file( os.path.join(data_dir, filenames[1]), steer_out) #test m = 1800
class ESN_discrete(object): """ Learn trajectoriess to goals. Changing the spatial goal also changes the shape of the trajectory. """ def __init__(self, y0=0.0, goal=1.0, timesteps=1000, lmbd=1e-4, input_amplitude=50, **kargs): """ y0 float: stating point goal float: final point timesteps int: number of steps of the desired trajectory lmbd int: ridge regularization parameter """ self.goal = goal self.y0 = y0 self.timesteps = timesteps self.LMBD = lmbd self.input_amplitude = input_amplitude # Init the esn self.res = ESN(stime=self.timesteps, **kargs) # Init the weights to the ESN with sparse random values. # The input to the ESN is a 4-elements vector: # [ y0, 1-y0, goal, 1-goal ] self.input2res_w = np.random.randn(self.res.N, 4) self.input2res_w *= np.random.rand(self.res.N, 4) < 0.1 self.readout_w = np.zeros(self.res.N) def imitate_path(self, y_des): """ Learn through ridge regression the weights to the readout unit to reproduce the 'y_des' trajectories. """ Y = np.array([]) P = np.array([]).reshape(self.res.N, 0) X = np.array([]).reshape(self.res.N, 0) for path in y_des: # learning start-end points y0 = path[0] goal = path[-1] # desired output y = self.interpolate(path) y = y - goal # input pattern p_init = np.dot(self.input2res_w, [y0, 1 - y0, goal, 1 - goal]) # Record the reservoir activity self.res.reset() for t in xrange(self.timesteps): p = p_init * (t == 0) self.res.step(self.input_amplitude * p) self.res.store(t) # append to the input time-series P = np.hstack([P, p.reshape(self.res.N, 1)]) # network activity time-series x = self.activations() # append activities X = np.hstack([X, x]) # append desired outputs Y = np.hstack([Y, y]) # Ridge regression w = self.readout_w L = self.LMBD # including the bias N = self.res.N w += np.dot(np.linalg.inv(np.dot(X, X.T) + L * np.eye(N, N)), np.dot(X, Y)) def activations(self): return self.res.data[self.res.out_lab] * np.outer( np.ones(self.res.N), np.exp(-np.linspace(1, 0, self.timesteps)) ) def rollout(self, y0=0.0, goal=1.0): # input pattern p_init = np.dot(self.input2res_w, [y0, 1 - y0, goal, 1 - goal]) # Record the reservoir activity self.res.reset() for t in xrange(self.timesteps): p = p_init * (t == 0) self.res.step(self.input_amplitude * p) self.res.store(t) # network activity time-series x = self.activations() return np.dot(self.readout_w, x) + goal def interpolate(self, path): import scipy.interpolate x = np.linspace(0, self.timesteps, len(path)) y = np.zeros(self.timesteps) y_gen = scipy.interpolate.interp1d(x, path) for t in range(self.timesteps): y[t] = y_gen(t) return y