def test_real(epi): vae = VAE() vae.load_state_dict(torch.load(cfg.vae_save_ckpt)['model']) model = RNNModel() model.load_state_dict(torch.load(cfg.rnn_save_ckpt)['model']) controller = Controller() controller.load_state_dict(torch.load(cfg.ctrl_save_ckpt)['model']) env = DoomTakeCover(True) obs = env.reset() model.reset() frames = [] for step in range(cfg.max_steps): frames.append(cv2.resize(obs, (256, 256))) obs = torch.from_numpy(obs.transpose(2, 0, 1)).unsqueeze(0).float() / 255.0 mu, logvar, _, z = vae(obs) inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1) y = controller(inp) y = y.item() action = encode_action(y) model.step(z.unsqueeze(0), action.unsqueeze(0)) obs_next, reward, done, _ = env.step(action.item()) obs = obs_next if done: break print('Episode {}: Real Reward {}'.format(epi, step)) write_video(frames, 'real_{}.avi'.format(epi), (256, 256)) os.system('mv real_{}.avi /home/bzhou/Dropbox/share'.format(epi))
def deflatten_controller(param_array): controller = Controller() for param in controller.parameters(): size = param.data.view(-1).size(0) param.data = torch.FloatTensor(param_array[:size]).view_as(param.data) param_array = param_array[size:] return controller
def test_updates(accounts, contract): c = Controller(contract) listeners = [Listener(a, contract) for a in accounts] # Have a bunch of random assigments happen # See if one of them blows an assert when syncing for i in range(1, 11): for j, l in enumerate(listeners): value = i c.set(l.acct, value) assert c.get(l.acct) == l.status == value
def __init__(self, input_size, read_size, output_size, **args): super(Controller, self).__init__(read_size, **args) # initialize the controller parameters self.linear = nn.Linear(input_size + self.get_read_size(), 2 + self.get_read_size() + output_size) # Careful! The way we initialize weights seems to really matter # self.linear.weight.data.uniform_(-.1, .1) # THIS ONE WORKS AbstractController.init_normal(self.linear.weight) self.linear.bias.data.fill_(0)
def __init__(self, num_embeddings, embedding_size, read_size, output_size, **args): super(Controller, self).__init__(read_size, **args) # Initialize the embedding parameters self.embed = nn.Embedding(num_embeddings, embedding_size) AbstractController.init_normal(self.embed.weight) # Initialize the linear parameters self.linear = nn.Linear(embedding_size + self.get_read_size(), 2 + self.get_read_size() + output_size) AbstractController.init_normal(self.linear.weight) self.linear.bias.data.fill_(0)
def __init__(self, input_size, read_size, output_size, **args): super(Controller, self).__init__(read_size, **args) self.input_size = input_size self.read_size = read_size self.output_size = output_size # Input dim , output dim self.lstm = nn.LSTM(input_size + read_size, 2 + read_size + output_size) #initialize weights AbstractController.init_normal(self.lstm.weight_hh_l0) AbstractController.init_normal(self.lstm.weight_ih_l0) self.lstm.bias_hh_l0.data.fill_(0) self.lstm.bias_ih_l0.data.fill_(0)
def test_listener(accounts, contract): c = Controller(contract) values = [] # Test sequence # Set and reset values.append(1) # 2 sets in a row values.append(2) values.append(3) # Followed by a reset values.append(0) # Initialize our actor models l = Listener(accounts[0], contract) # Run the test sequence! for v in values: c.set(l.acct, v) assert c.get(l.acct) == l.status == v
def __init__(self, input_size, read_size, output_size, **args): super(Controller, self).__init__(read_size, **args) self.input_size = input_size # initialize the controller parameters self.linear = nn.Linear( input_size + self.get_read_size(), Controller.N_ARGS + self.get_read_size() + output_size) # Careful! The way we initialize weights seems to really matter # self.linear.weight.data.uniform_(-.1, .1) # THIS ONE WORKS AbstractController.init_normal(self.linear.weight) self.linear.bias.data.fill_(0) self.linear.bias.data[0] = -1. # Discourage popping self.linear.bias.data[2] = 1. # Encourage reading self.linear.bias.data[3] = 1. # Encourage writing
def slave(comm): vae = VAE() vae.load_state_dict( torch.load(cfg.vae_save_ckpt, map_location=lambda storage, loc: storage)['model']) model = RNNModel() model.load_state_dict( torch.load(cfg.rnn_save_ckpt, map_location=lambda storage, loc: storage)['model']) controller = Controller() controller.load_state_dict( torch.load(cfg.ctrl_save_ckpt, map_location=lambda storage, loc: storage)['model']) env = DoomTakeCover(False) rewards = [] for epi in range(cfg.trials_per_pop * 4): obs = env.reset() model.reset() for step in range(cfg.max_steps): obs = torch.from_numpy(obs.transpose( 2, 0, 1)).unsqueeze(0).float() / 255.0 mu, logvar, _, z = vae(obs) inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1) y = controller(inp) y = y.item() action = encode_action(y) model.step(z.unsqueeze(0), action.unsqueeze(0)) obs_next, reward, done, _ = env.step(action.item()) obs = obs_next if done: break rewards.append(step) print('Workder {} got reward {} at epi {}'.format( comm.rank, step, epi)) rewards = np.array(rewards) comm.send(rewards, dest=0, tag=1) print('Worker {} sent rewards to master'.format(comm.rank))
def main(): B_matrix = [1, 1] controller = Controller("state", B_matrix, control_target, gains=[1, 1]) model = Model(fitzhugh_nagumo_neuron, ["I"], False, controller) fig, ax = plt.subplots() solution = model.run_model([0, 25], [-1, -1], I=1, rtol=1e-6) # t_span, initial_contition, kwargs ax.plot(solution.t, solution.y[0]) plt.show()
def main(): B_matrix = [1, 0] C_matrix = [1, 0] controller = Controller("PD", B_matrix, control_target, C_matrix=C_matrix, kp=10, kd=10) model = Model(fitzhugh_nagumo_neuron, ["I"], False, controller) solution = model.run_model([0, 25], [-1, -1], I=1, rtol=1e-6) # t_span, initial_contition, kwargs fig, ax = plt.subplots() ax.plot(solution.t, solution.y[0]) plt.show()
def master(comm): logger = Logger("{}/es_train_{}.log".format(cfg.logger_save_dir, cfg.timestr)) logger.log(cfg.info) controller = Controller() es = cma.CMAEvolutionStrategy(flatten_controller(controller), cfg.es_sigma, {'popsize': cfg.population_size}) for step in range(cfg.es_steps): solutions = es.ask() for idx, solution in enumerate(solutions): comm.send(solution, dest=idx+1, tag=1) check = np.ones(cfg.num_workers) rewards = [] for idx in range(cfg.num_workers): reward = comm.recv(source=idx+1, tag=2) rewards.append(reward) check[idx] = 0 assert check.sum() == 0 assert len(rewards) == cfg.num_workers r_cost = - np.array(rewards) reg_cost = l2_reg(solutions) cost = reg_cost + r_cost es.tell(solutions, cost.tolist()) sigma = es.result[6] rms_var = np.mean(sigma * sigma) info = "Step {:d}\t Max_R {:4f}\t Mean_R {:4f}\t Min_R {:4f}\t RMS_Var {:4f}\t Reg_Cost {:4f}\t R_Cost {:4f}".format( step, max(rewards), np.mean(rewards), min(rewards), rms_var, r_cost.mean(), reg_cost.mean()) logger.log(info) if step % 25 == 0: current_param = es.result[5] current_controller = deflatten_controller(current_param) save_path = "{}/controller_curr_{}_step_{:05d}.pth".format(cfg.model_save_dir, cfg.timestr, step) torch.save({'model': current_controller.state_dict()}, save_path) best_param = es.result[0] best_controller = deflatten_controller(best_param) save_path = "{}/controller_best_{}_step_{:05d}.pth".format(cfg.model_save_dir, cfg.timestr, step) torch.save({'model': best_controller.state_dict()}, save_path)
def get(self): self.response.headers['Content-Type'] = 'text/plain' controllers = [] for controller in Controller.all(): controller_output = { 'manufacturer_name': controller.manufacturer.name, 'key': str(controller.key()), 'name': controller.name, } if controller.link: controller_output['link'] = controller.link if controller.image_url: controller_output['image_url'] = controller.image_url tags = list(controller.tag_set) if tags: tags = [] for tag in controller.tag_set: tags.append(tag.tag.label) controller_output['tags'] = tags controllers.append(controller_output) self.response.out.write(json.dumps({'controllers': controllers}))
def test_rnn(epi): mus, logvars = load_init_z() vae = VAE() vae.load_state_dict(torch.load(cfg.vae_save_ckpt)['model']) model = RNNModel() model.load_state_dict(torch.load(cfg.rnn_save_ckpt)['model']) controller = Controller() controller.load_state_dict(torch.load(cfg.ctrl_save_ckpt)['model']) model.reset() z = sample_init_z(mus, logvars) frames = [] for step in range(cfg.max_steps): z = torch.from_numpy(z).float().unsqueeze(0) curr_frame = vae.decode(z).detach().numpy() frames.append(curr_frame.transpose(0, 2, 3, 1)[0] * 255.0) # cv2.imshow('game', frames[-1]) # k = cv2.waitKey(33) inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1) y = controller(inp) y = y.item() action = encode_action(y) logmix, mu, logstd, done_p = model.step(z.unsqueeze(0), action.unsqueeze(0)) # logmix = logmix - reduce_logsumexp(logmix) logmix_max = logmix.max(dim=1, keepdim=True)[0] logmix_reduce_logsumexp = (logmix - logmix_max).exp().sum( dim=1, keepdim=True).log() + logmix_max logmix = logmix - logmix_reduce_logsumexp # Adjust temperature logmix = logmix / cfg.temperature logmix -= logmix.max(dim=1, keepdim=True)[0] logmix = F.softmax(logmix, dim=1) m = Categorical(logmix) idx = m.sample() new_mu = torch.FloatTensor([mu[i, j] for i, j in enumerate(idx)]) new_logstd = torch.FloatTensor( [logstd[i, j] for i, j in enumerate(idx)]) z_next = new_mu + new_logstd.exp() * torch.randn_like( new_mu) * np.sqrt(cfg.temperature) z = z_next.detach().numpy() if done_p.squeeze().item() > 0: break frames = [cv2.resize(frame, (256, 256)) for frame in frames] print('Episode {}: RNN Reward {}'.format(epi, step)) write_video(frames, 'rnn_{}.avi'.format(epi), (256, 256)) os.system('mv rnn_{}.avi /home/bzhou/Dropbox/share'.format(epi))
def controller(embedding_size, hidden_size): return Controller(embedding_size, hidden_size)