Пример #1
0
def test_real(epi):
    vae = VAE()
    vae.load_state_dict(torch.load(cfg.vae_save_ckpt)['model'])

    model = RNNModel()
    model.load_state_dict(torch.load(cfg.rnn_save_ckpt)['model'])

    controller = Controller()
    controller.load_state_dict(torch.load(cfg.ctrl_save_ckpt)['model'])

    env = DoomTakeCover(True)
    obs = env.reset()
    model.reset()
    frames = []
    for step in range(cfg.max_steps):
        frames.append(cv2.resize(obs, (256, 256)))
        obs = torch.from_numpy(obs.transpose(2, 0,
                                             1)).unsqueeze(0).float() / 255.0
        mu, logvar, _, z = vae(obs)

        inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1)
        y = controller(inp)
        y = y.item()
        action = encode_action(y)

        model.step(z.unsqueeze(0), action.unsqueeze(0))
        obs_next, reward, done, _ = env.step(action.item())
        obs = obs_next
        if done:
            break
    print('Episode {}: Real Reward {}'.format(epi, step))
    write_video(frames, 'real_{}.avi'.format(epi), (256, 256))
    os.system('mv real_{}.avi /home/bzhou/Dropbox/share'.format(epi))
Пример #2
0
def deflatten_controller(param_array):
    controller = Controller()
    for param in controller.parameters():
        size = param.data.view(-1).size(0)
        param.data = torch.FloatTensor(param_array[:size]).view_as(param.data)
        param_array = param_array[size:]
    return controller
Пример #3
0
def test_updates(accounts, contract):
    c = Controller(contract)
    listeners = [Listener(a, contract) for a in accounts]
    # Have a bunch of random assigments happen
    # See if one of them blows an assert when syncing
    for i in range(1, 11):
        for j, l in enumerate(listeners):
            value = i
            c.set(l.acct, value)
            assert c.get(l.acct) == l.status == value
Пример #4
0
    def __init__(self, input_size, read_size, output_size, **args):
        super(Controller, self).__init__(read_size, **args)

        # initialize the controller parameters
        self.linear = nn.Linear(input_size + self.get_read_size(),
                                2 + self.get_read_size() + output_size)

        # Careful! The way we initialize weights seems to really matter
        # self.linear.weight.data.uniform_(-.1, .1) # THIS ONE WORKS
        AbstractController.init_normal(self.linear.weight)
        self.linear.bias.data.fill_(0)
Пример #5
0
    def __init__(self, num_embeddings, embedding_size, read_size, output_size,
                 **args):

        super(Controller, self).__init__(read_size, **args)

        # Initialize the embedding parameters
        self.embed = nn.Embedding(num_embeddings, embedding_size)
        AbstractController.init_normal(self.embed.weight)

        # Initialize the linear parameters
        self.linear = nn.Linear(embedding_size + self.get_read_size(),
                                2 + self.get_read_size() + output_size)
        AbstractController.init_normal(self.linear.weight)
        self.linear.bias.data.fill_(0)
Пример #6
0
	def __init__(self, input_size, read_size, output_size, **args):
		
		super(Controller, self).__init__(read_size, **args)

		self.input_size = input_size
		self.read_size = read_size
		self.output_size = output_size

		# Input dim , output dim
		self.lstm = nn.LSTM(input_size + read_size, 2 + read_size + output_size)

		#initialize weights
		AbstractController.init_normal(self.lstm.weight_hh_l0)
		AbstractController.init_normal(self.lstm.weight_ih_l0)
		self.lstm.bias_hh_l0.data.fill_(0)
		self.lstm.bias_ih_l0.data.fill_(0)
Пример #7
0
def test_listener(accounts, contract):
    c = Controller(contract)
    values = []  # Test sequence
    # Set and reset
    values.append(1)
    # 2 sets in a row
    values.append(2)
    values.append(3)
    # Followed by a reset
    values.append(0)
    # Initialize our actor models
    l = Listener(accounts[0], contract)
    # Run the test sequence!
    for v in values:
        c.set(l.acct, v)
        assert c.get(l.acct) == l.status == v
Пример #8
0
    def __init__(self, input_size, read_size, output_size, **args):
        super(Controller, self).__init__(read_size, **args)
        self.input_size = input_size

        # initialize the controller parameters
        self.linear = nn.Linear(
            input_size + self.get_read_size(),
            Controller.N_ARGS + self.get_read_size() + output_size)

        # Careful! The way we initialize weights seems to really matter
        # self.linear.weight.data.uniform_(-.1, .1) # THIS ONE WORKS
        AbstractController.init_normal(self.linear.weight)
        self.linear.bias.data.fill_(0)
        self.linear.bias.data[0] = -1.  # Discourage popping
        self.linear.bias.data[2] = 1.  # Encourage reading
        self.linear.bias.data[3] = 1.  # Encourage writing
Пример #9
0
def slave(comm):

    vae = VAE()
    vae.load_state_dict(
        torch.load(cfg.vae_save_ckpt,
                   map_location=lambda storage, loc: storage)['model'])

    model = RNNModel()
    model.load_state_dict(
        torch.load(cfg.rnn_save_ckpt,
                   map_location=lambda storage, loc: storage)['model'])

    controller = Controller()
    controller.load_state_dict(
        torch.load(cfg.ctrl_save_ckpt,
                   map_location=lambda storage, loc: storage)['model'])

    env = DoomTakeCover(False)

    rewards = []
    for epi in range(cfg.trials_per_pop * 4):
        obs = env.reset()
        model.reset()
        for step in range(cfg.max_steps):
            obs = torch.from_numpy(obs.transpose(
                2, 0, 1)).unsqueeze(0).float() / 255.0
            mu, logvar, _, z = vae(obs)

            inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1)
            y = controller(inp)
            y = y.item()
            action = encode_action(y)

            model.step(z.unsqueeze(0), action.unsqueeze(0))
            obs_next, reward, done, _ = env.step(action.item())
            obs = obs_next
            if done:
                break
        rewards.append(step)
        print('Workder {} got reward {} at epi {}'.format(
            comm.rank, step, epi))
    rewards = np.array(rewards)
    comm.send(rewards, dest=0, tag=1)
    print('Worker {} sent rewards to master'.format(comm.rank))
Пример #10
0
def main():
    B_matrix = [1, 1]
    controller = Controller("state", B_matrix, control_target, gains=[1, 1])

    model = Model(fitzhugh_nagumo_neuron, ["I"], False, controller)

    fig, ax = plt.subplots()
    solution = model.run_model([0, 25], [-1, -1], I=1,
                               rtol=1e-6)  # t_span, initial_contition, kwargs
    ax.plot(solution.t, solution.y[0])
    plt.show()
Пример #11
0
def main():
    B_matrix = [1, 0]
    C_matrix = [1, 0]
    controller = Controller("PD",
                            B_matrix,
                            control_target,
                            C_matrix=C_matrix,
                            kp=10,
                            kd=10)

    model = Model(fitzhugh_nagumo_neuron, ["I"], False, controller)

    solution = model.run_model([0, 25], [-1, -1], I=1,
                               rtol=1e-6)  # t_span, initial_contition, kwargs
    fig, ax = plt.subplots()
    ax.plot(solution.t, solution.y[0])
    plt.show()
Пример #12
0
def master(comm):
    logger = Logger("{}/es_train_{}.log".format(cfg.logger_save_dir, cfg.timestr))
    logger.log(cfg.info)
    controller = Controller()
    es = cma.CMAEvolutionStrategy(flatten_controller(controller), cfg.es_sigma, {'popsize': cfg.population_size})


    for step in range(cfg.es_steps):
        solutions = es.ask()
        for idx, solution in enumerate(solutions):
            comm.send(solution, dest=idx+1, tag=1)

        check = np.ones(cfg.num_workers)
        rewards = []
        for idx in range(cfg.num_workers):
            reward = comm.recv(source=idx+1, tag=2)
            rewards.append(reward)
            check[idx] = 0

        assert check.sum() == 0
        assert len(rewards) == cfg.num_workers

        r_cost = - np.array(rewards)
        reg_cost = l2_reg(solutions)
        cost =  reg_cost + r_cost
        es.tell(solutions, cost.tolist())

        sigma = es.result[6]
        rms_var = np.mean(sigma * sigma)



        info = "Step {:d}\t Max_R {:4f}\t Mean_R {:4f}\t Min_R {:4f}\t RMS_Var {:4f}\t Reg_Cost {:4f}\t R_Cost {:4f}".format(
                step, max(rewards), np.mean(rewards), min(rewards), rms_var, r_cost.mean(), reg_cost.mean())
        logger.log(info)

        if step % 25 == 0:
            current_param = es.result[5]
            current_controller = deflatten_controller(current_param)
            save_path = "{}/controller_curr_{}_step_{:05d}.pth".format(cfg.model_save_dir, cfg.timestr, step)
            torch.save({'model': current_controller.state_dict()}, save_path)

            best_param = es.result[0]
            best_controller = deflatten_controller(best_param)
            save_path = "{}/controller_best_{}_step_{:05d}.pth".format(cfg.model_save_dir, cfg.timestr, step)
            torch.save({'model': best_controller.state_dict()}, save_path)
Пример #13
0
  def get(self):
    self.response.headers['Content-Type'] = 'text/plain'

    controllers = []
    for controller in Controller.all():
      controller_output = {
        'manufacturer_name': controller.manufacturer.name,
        'key': str(controller.key()),
        'name': controller.name,
      }
      if controller.link:
        controller_output['link'] = controller.link
      if controller.image_url:
        controller_output['image_url'] = controller.image_url
      tags = list(controller.tag_set)
      if tags:
        tags = []
        for tag in controller.tag_set:
          tags.append(tag.tag.label)
        controller_output['tags'] = tags

      controllers.append(controller_output)
    self.response.out.write(json.dumps({'controllers': controllers}))
Пример #14
0
def test_rnn(epi):
    mus, logvars = load_init_z()

    vae = VAE()
    vae.load_state_dict(torch.load(cfg.vae_save_ckpt)['model'])

    model = RNNModel()
    model.load_state_dict(torch.load(cfg.rnn_save_ckpt)['model'])

    controller = Controller()
    controller.load_state_dict(torch.load(cfg.ctrl_save_ckpt)['model'])

    model.reset()
    z = sample_init_z(mus, logvars)
    frames = []

    for step in range(cfg.max_steps):
        z = torch.from_numpy(z).float().unsqueeze(0)
        curr_frame = vae.decode(z).detach().numpy()

        frames.append(curr_frame.transpose(0, 2, 3, 1)[0] * 255.0)
        # cv2.imshow('game', frames[-1])
        # k = cv2.waitKey(33)

        inp = torch.cat((model.hx.detach(), model.cx.detach(), z), dim=1)
        y = controller(inp)
        y = y.item()
        action = encode_action(y)

        logmix, mu, logstd, done_p = model.step(z.unsqueeze(0),
                                                action.unsqueeze(0))

        # logmix = logmix - reduce_logsumexp(logmix)
        logmix_max = logmix.max(dim=1, keepdim=True)[0]
        logmix_reduce_logsumexp = (logmix - logmix_max).exp().sum(
            dim=1, keepdim=True).log() + logmix_max
        logmix = logmix - logmix_reduce_logsumexp

        # Adjust temperature
        logmix = logmix / cfg.temperature
        logmix -= logmix.max(dim=1, keepdim=True)[0]
        logmix = F.softmax(logmix, dim=1)

        m = Categorical(logmix)
        idx = m.sample()

        new_mu = torch.FloatTensor([mu[i, j] for i, j in enumerate(idx)])
        new_logstd = torch.FloatTensor(
            [logstd[i, j] for i, j in enumerate(idx)])
        z_next = new_mu + new_logstd.exp() * torch.randn_like(
            new_mu) * np.sqrt(cfg.temperature)

        z = z_next.detach().numpy()
        if done_p.squeeze().item() > 0:
            break

    frames = [cv2.resize(frame, (256, 256)) for frame in frames]

    print('Episode {}: RNN Reward {}'.format(epi, step))
    write_video(frames, 'rnn_{}.avi'.format(epi), (256, 256))
    os.system('mv rnn_{}.avi /home/bzhou/Dropbox/share'.format(epi))
Пример #15
0
def controller(embedding_size, hidden_size):
    return Controller(embedding_size, hidden_size)