Exemplo n.º 1
0
    def __call__(self, inputs, state, scope=None):
        assert len(inputs) == self.n_agents
        assert len(state) == 3
        assert all(len(s) == self.n_agents for s in state)

        base_cell = tf.nn.rnn_cell.GRUCell(self.n_hidden)
        states, _, _ = state

        with tf.variable_scope(scope or "desc_cell"):
            next_states = []
            next_outs = []
            next_l_msgs = []
            for i_agent in range(self.n_agents):
                with tf.variable_scope(self.agent_scopes[i_agent],
                                       reuse=(i_agent > 0
                                              and self.reuse_agent_scope)):
                    _, next_state = base_cell(inputs[i_agent], states[i_agent])
                    with tf.variable_scope("out"):
                        next_out, _ = net.mlp(next_state,
                                              (self.n_out[i_agent], ))
                    with tf.variable_scope("gen"):
                        next_l_msg, _ = net.mlp(next_state, (self.n_lex, ))
                    next_states.append(next_state)
                    next_outs.append(next_out)
                    next_l_msgs.append(next_l_msg)

        next_state = (tuple(next_states), tuple(next_outs), tuple(next_l_msgs))
        return next_state, next_state
Exemplo n.º 2
0
    def __call__(self, inputs, state, scope=None):
        assert len(inputs) == self.n_agents
        assert len(state) == 3
        assert all(len(s) == self.n_agents for s in state)
        states, comms, _ = state

        with tf.variable_scope(scope or "comm_cell"):
            features = []
            if self.communicate:
                base_cell = tf.nn.rnn_cell.GRUCell(self.n_hidden)
                for i_agent in range(self.n_agents):
                    other_comms = tuple(comms[_ia]
                                        for _ia in range(self.n_agents)
                                        if _ia != i_agent)
                    features.append(
                        tf.concat(1, (inputs[i_agent], ) + other_comms))
            else:
                # for fair comparison, let non-communicating agents use the
                # extra channel capacity for themselves
                #base_cell = tf.nn.rnn_cell.GRUCell(
                #        self.n_hidden + self.n_comm * (self.n_agents - 1))
                base_cell = tf.nn.rnn_cell.GRUCell(self.n_hidden)
                for i_agent in range(self.n_agents):
                    features.append(inputs[i_agent])

            next_states = []
            next_comms = []
            next_outs = []
            for i_agent in range(self.n_agents):
                with tf.variable_scope(self.agent_scopes[i_agent],
                                       reuse=(i_agent > 0
                                              and self.reuse_agent_scope)):
                    hidden, _ = net.mlp(features[i_agent],
                                        (self.n_hidden, ) * self.feature_depth,
                                        final_nonlinearity=True)
                    _, next_state = base_cell(hidden, states[i_agent])
                    with tf.variable_scope("comm"):
                        next_comm, _ = net.mlp(next_state, (self.n_comm, ))
                    with tf.variable_scope("out"):
                        next_out, _ = net.mlp(next_state,
                                              (self.n_out[i_agent], ))
                    next_states.append(next_state)
                    next_comms.append(next_comm)
                    next_outs.append(next_out)

            transmitted = [self.channel.transmit(c) for c in next_comms]

        next_state = (tuple(next_states), tuple(transmitted), tuple(next_outs))
        return next_state, next_state
Exemplo n.º 3
0
def embed_all(inputs, size):
    out = []
    with tf.variable_scope("embed_all") as scope:
        for inp in inputs:
            #t_emb, _ = net.embed(inp, count, size)
            #t_pool = tf.reduce_mean(t_emb, axis=-2)
            #out.append(t_pool)
            #scope.reuse_variables()
            t_emb, _ = net.mlp(inp, (size, ))
            out.append(t_emb)
            scope.reuse_variables()
    return out
Exemplo n.º 4
0
 def predictor(scope):
     with tf.variable_scope(scope):
         t_arg = tf.placeholder(tf.int32, shape=[None])
         t_embed, v_embed = net.embed(t_arg,
                                      self.world.cookbook.n_kinds,
                                      N_EMBED)
         t_feats = tf.placeholder(tf.float32,
                                  shape=[None, world.n_features + 1])
         t_comb = tf.concat(1, (t_embed, t_feats))
         t_scores, v_weights = net.mlp(t_comb,
                                       [N_HIDDEN, self.n_actions])
         #t_scores, v_weights = net.mlp(t_comb, [self.n_actions])
     return t_arg, t_feats, t_scores, v_embed + v_weights
Exemplo n.º 5
0
 def predictor(scope):
     with tf.variable_scope(scope):
         t_plan = tf.placeholder(tf.int32, shape=[None, 2])
         t_embed_plan, v_emb = net.embed(t_plan,
                                         self.world.cookbook.n_kinds,
                                         N_EMBED,
                                         multi=True)
         t_features = tf.placeholder(tf.float32,
                                     shape=[None, world.n_features])
         t_comb = tf.concat(1, (t_embed_plan, t_features))
         t_scores, v_weights = net.mlp(t_comb,
                                       [N_HIDDEN, self.n_actions])
     return t_features, t_plan, t_scores, v_weights + v_emb
Exemplo n.º 6
0
        def build_critic(index, t_input, t_reward, extra_params=[]):
            with tf.variable_scope("critic_%s" % index):
                if self.config.model.baseline in ("task", "common"):
                    t_value = tf.get_variable(
                        "b", shape=(), initializer=tf.constant_initializer(0.0))
                    v_value = [t_value]
                elif self.config.model.baseline == "state":
                    t_value, v_value = net.mlp(t_input, (1,))
                    t_value = tf.squeeze(t_value)
                else:
                    raise NotImplementedError(
                        "Baseline %s is not implemented" % self.config.model.baseline)

            return CriticModule(t_value, v_value + extra_params)
Exemplo n.º 7
0
        def build_actor(index, t_input, t_action_mask, extra_params=[]):
            with tf.variable_scope("actor_%s" % index):
                t_action_score, v_action = net.mlp(t_input, (N_HIDDEN, self.n_actions))

                # TODO this is pretty gross
                v_bias = v_action[-1]
                assert "b1" in v_bias.name
                t_decrement_op = v_bias[-1].assign(v_bias[-1] - 3)

                t_action_logprobs = tf.nn.log_softmax(t_action_score)
                t_chosen_prob = tf.reduce_sum(
                    t_action_mask * t_action_logprobs,
                    reduction_indices=(1,))

            return ActorModule(
                    t_action_logprobs, t_chosen_prob,
                    v_action + extra_params, t_decrement_op)
Exemplo n.º 8
0
 def build_scorer(t_code):
     t_code_rs = tf.reshape(
             t_code,
             (config.trainer.n_batch_episodes, 1, 
                 config.channel.n_msg))
     t_code_tile = tf.tile(
             t_code_rs, (1, config.trainer.n_distractors + 1, 1))
     t_features = tf.concat(2, (t_xa, t_xb_tile, t_code_tile))
     t_score, v_net = net.mlp(t_features, (config.model.n_hidden, 1))
     t_score_rs = tf.reshape(
             t_score,
             (config.trainer.n_batch_episodes,
                 config.trainer.n_distractors + 1, 1))
     t_score_sq = tf.squeeze(t_score_rs)
     t_belief = tf.nn.softmax(t_score_sq)
     t_errs = tf.nn.sparse_softmax_cross_entropy_with_logits(
             t_score_sq, tf.ones(
                 (config.trainer.n_batch_episodes,), tf.int32))
     t_loss = tf.reduce_mean(t_errs)
     return t_loss, t_belief, v_net
Exemplo n.º 9
0
    def build(self, task, reconst_ph, channel, model, config):
        with tf.variable_scope("belief_translator") as scope:
            t_xa_true_rs = tf.reshape(
                    reconst_ph.t_xa_true,
                    (config.trainer.n_batch_episodes, 1, task.n_features))
            t_xa = tf.concat(1, (t_xa_true_rs, reconst_ph.t_xa_noise))
            #t_xa_drop = tf.dropout(t_xa, 0.9)
            t_xa_drop = t_xa
            #t_xa_true_drop = tf.nn.dropout(reconst_ph.t_xa_true, 0.9)
            t_xa_true_drop = reconst_ph.t_xa_true

            with tf.variable_scope("model") as model_scope:
                t_mean, self.v_model = net.mlp(
                        t_xa_true_drop,
                        (config.translator.n_hidden, config.channel.n_msg))
                t_model_logprob = -tf.reduce_sum(
                        tf.square(t_mean - reconst_ph.t_z), axis=1)
                self.t_model_loss = -tf.reduce_mean(t_model_logprob)

                model_scope.reuse_variables()
                t_all_mean, _ = net.mlp(
                        t_xa_drop, (config.translator.n_hidden, config.channel.n_msg))
                t_model_raw_belief = -tf.reduce_sum(
                        tf.square(t_all_mean-tf.expand_dims(reconst_ph.t_z, 1)),
                        axis=2)
                if config.translator.normalization == "global":
                    assert False, "you probably don't want this"
                    t_model_rs_belief = tf.nn.softmax(tf.reshape(
                            t_model_raw_belief, (1, -1)))
                    self.t_model_belief = tf.reshape(
                            t_model_rs_belief,
                            (config.trainer.n_batch_episodes,
                                config.trainer.n_distractors + 1))
                    self.t_model_weights = tf.ones(
                            (config.trainer.n_batch_episodes,))
                elif config.translator.normalization == "local":
                    self.t_model_belief = tf.nn.softmax(t_model_raw_belief)
                    #self.t_model_weights = tf.nn.softmax(t_model_logprob)
                    self.t_model_logweights = t_model_logprob
                else:
                    assert False

            with tf.variable_scope("desc") as desc_scope:
                t_dist, self.v_desc = net.mlp(
                        t_xa_true_drop,
                        (config.translator.n_hidden, len(task.lexicon)))
                        #(len(task.lexicon),))
                self.t_dist = t_dist
                t_desc_logprob = -tf.nn.softmax_cross_entropy_with_logits(
                        t_dist, reconst_ph.t_l_msg)
                self.t_desc_loss = -tf.reduce_mean(t_desc_logprob)

                desc_scope.reuse_variables()

                t_msg_tile = tf.tile(
                        tf.reshape(
                            reconst_ph.t_l_msg,
                            (config.trainer.n_batch_episodes, 1,
                                len(task.lexicon))),
                        (1, config.trainer.n_distractors + 1, 1))
                t_all_dist, _ = net.mlp(
                        t_xa_drop,
                        (config.translator.n_hidden, len(task.lexicon)))
                        #(len(task.lexicon),))
                t_all_scores = -tf.nn.softmax_cross_entropy_with_logits(
                        t_all_dist, t_msg_tile)

                if config.translator.normalization == "global":
                    t_desc_belief_raw = tf.nn.softmax(tf.reshape(
                            t_all_scores, (1, -1)))
                    t_desc_belief = tf.reshape(
                            t_desc_belief_raw,
                            (config.trainer.n_batch_episodes,
                                config.trainer.n_distractors + 1))
                    t_desc_belief_norm = (t_desc_belief /
                            tf.reduce_max(t_desc_belief))
                    self.t_desc_belief = t_desc_belief_norm
                    self.t_desc_weights = tf.ones(
                            (config.trainer.n_batch_episodes,))
                elif config.translator.normalization == "local":
                    self.t_desc_belief = tf.nn.softmax(t_all_scores)
                    #self.t_desc_weights = tf.nn.softmax(t_desc_logprob)
                    self.t_desc_logweights = t_desc_logprob
                else:
                    assert False

            optimizer = tf.train.AdamOptimizer(config.translator.step_size)

            #varz = self.v_model + self.v_desc
            #self.t_loss = self.t_desc_loss + self.t_model_loss
            #self.t_train_op = optimizer.minimize(
            #        self.t_loss, var_list=varz)
            self.t_train_model_op = optimizer.minimize(
                    self.t_model_loss, var_list=self.v_model)
            self.t_train_desc_op = optimizer.minimize(
                    self.t_desc_loss, var_list=self.v_desc)