Exemplo n.º 1
0
    def build_graph(self, assemble_model=False):
        input_placeholder = layer.InputPlaceholder(dppo_config.config.input)

        policy_head = Network(input_placeholder)
        if dppo_config.config.output.continuous:
            output = layer.Dense(policy_head, dppo_config.config.output.action_size, init_var=0.01)
            actor = ConcatFixedStd(output)
            actor_layers = [output, actor]
        else:
            actor = layer.Dense(policy_head, dppo_config.config.output.action_size,
                                activation=layer.Activation.Softmax, init_var=0.01)
            actor_layers = [actor]

        value_head = Network(input_placeholder)
        critic = layer.Dense(value_head, 1)

        feeds = dict(head=actor, weights=layer.Weights(policy_head, *actor_layers),
                     ph_state=input_placeholder.ph_state)
        if dppo_config.config.use_lstm:
            feeds.update(policy_head.lstm_items)
        self.actor = Subnet(**feeds)

        feeds = dict(head=critic, weights=layer.Weights(value_head, critic),
                     ph_state=input_placeholder.ph_state)
        if dppo_config.config.use_lstm:
            feeds.update(value_head.lstm_items)
        self.critic = Subnet(**feeds)

        if assemble_model:
            self.policy = PolicyModel(self.actor)
            self.value_func = ValueModel(self.critic)

            if dppo_config.config.use_lstm:
                self.lstm_zero_state = [self.actor.lstm_zero_state, self.critic.lstm_zero_state]
Exemplo n.º 2
0
    def build_graph(self):
        input_placeholder = layer.InputPlaceholder(da3c_config.config.input)

        actor_head = Head(input_placeholder)
        actor = layer.Actor(actor_head, da3c_config.config.output)

        critic_head = Head(input_placeholder)
        critic = layer.Dense(critic_head, 1)

        self.ph_state = input_placeholder.ph_state

        feeds = dict(head=actor, weights=layer.Weights(actor_head, actor))
        if da3c_config.config.use_lstm:
            feeds.update(actor_head.lstm_items)
        self.actor = Subnet(**feeds)

        feeds = dict(head=graph.Flatten(critic),
                     weights=layer.Weights(critic_head, critic))
        if da3c_config.config.use_lstm:
            feeds.update(critic_head.lstm_items)
        self.critic = Subnet(**feeds)

        self.weights = layer.Weights(actor_head, actor, critic_head, critic)
        if da3c_config.config.use_lstm:
            self.lstm_zero_state = (self.actor.lstm_zero_state,
                                    self.critic.lstm_zero_state)
Exemplo n.º 3
0
    def build_graph(self, input_placeholder):
        input = layer.ConfiguredInput(dppo_config.config.input, input_placeholder=input_placeholder)
        layers = [input]

        sizes = dppo_config.config.hidden_sizes
        activation = layer.get_activation(dppo_config.config)

        fc_layers = layer.GenericLayers(layer.Flatten(input),
                                        [dict(type=layer.Dense, size=size, activation=activation)
                                        for size in sizes[:-1]])
        layers.append(fc_layers)

        last_size = fc_layers.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if dppo_config.config.use_lstm:
            lstm = layer.lstm(dppo_config.config.lstm_type,
                              graph.Expand(fc_layers, 0), n_units=last_size,
                              n_cores=dppo_config.config.lstm_num_cores)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.lstm_items = {"ph_lstm_state": lstm.ph_state,
                               "lstm_zero_state": lstm.zero_state,
                               "lstm_state": lstm.state,
                               "lstm_reset_timestep": lstm.reset_timestep}
        else:
            head = layer.Dense(fc_layers, last_size, activation=activation)
            layers.append(head)

        self.ph_state = input.ph_state
        self.weight = layer.Weights(*layers)
        return head.node
Exemplo n.º 4
0
    def build_graph(self):
        input = layer.Input(cfg.config.input)
        self.ph_action = graph.Placeholder(
            np.float32, (None, cfg.config.output.action_size))

        sizes = cfg.config.hidden_sizes
        assert len(
            sizes) > 1, 'You need to provide sizes at least for 2 layers'

        dense_1st = layer.Dense(layer.Flatten(input), sizes[0],
                                layer.Activation.Relu)
        dense_2nd = layer.DoubleDense(dense_1st, self.ph_action, sizes[1],
                                      layer.Activation.Relu)
        layers = [input, dense_1st, dense_2nd]

        net = layer.GenericLayers(dense_2nd, [
            dict(type=layer.Dense, size=size, activation=layer.Activation.Relu)
            for size in sizes[2:]
        ])
        if len(sizes[2:]) > 0:
            layers.append(net)

        self.critic = layer.Dense(net, 1, init_var=3e-3)
        self.ph_state = input.ph_state

        layers.append(self.critic)
        self.weights = layer.Weights(*layers)
Exemplo n.º 5
0
    def build_graph(self):
        input = layer.Input(cfg.input)

        self.perception =\
            layer.Dense(layer.Flatten(input), cfg.d,  # d=256
                        activation=layer.Activation.Relu)

        self.weights = layer.Weights(input, self.perception)

        self.ph_state = input.ph_state
Exemplo n.º 6
0
    def build_graph(self):
        super(_WorkerNetwork, self).__init__()

        self.lstm = CustomBasicLSTMCell(cfg.d)  # d=256
        # needs wrap as layer to retrieve weights

        self.ph_goal =\
            graph.Placeholder(np.float32, shape=(None, cfg.d), name="ph_goal")
        # self.ph_goal = tf.placeholder(tf.float32, [None, cfg.d], name="ph_goal")

        perception_expanded = graph.Expand(self.perception.node, 0)

        self.ph_step_size = \
            graph.Placeholder(np.float32, shape=(1,), name="ph_w_step_size")
        # tf.placeholder(tf.float32, [1], name="ph_w_step_size")
        self.ph_initial_lstm_state = \
            graph.Placeholder(np.float32, shape=(1, self.lstm.state_size), name="ph_w_lstm_state")
        # tf.placeholder(tf.float32, [1, self.lstm.state_size], name="ph_w_lstm_state")

        lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn(
            self.lstm,
            perception_expanded,
            initial_state=self.ph_initial_lstm_state,
            sequence_length=self.ph_step_size,
            time_major=False)
        lstm_outputs = tf.reshape(lstm_outputs, [-1, cfg.d])
        sg_lstm_outputs = graph.TfNode(lstm_outputs)

        U = layer.LinearLayer(sg_lstm_outputs,
                              shape=(cfg.d, cfg.action_size * cfg.k),
                              transformation=tf.matmul)
        U_embedding = tf.transpose(tf.reshape(U, [cfg.action_size, cfg.k, -1]))

        w = layer.LinearLayer(self.ph_goal,
                              shape=(cfg.d, cfg.k),
                              transformation=tf.matmul,
                              bias=False)
        w_reshaped = tf.reshape(w.node, [-1, 1, cfg.k])

        self.pi = layer.MatmulLayer(w_reshaped,
                                    U_embedding,
                                    activation=layer.Activation.Softmax)
        self.vi = layer.LinearLayer(sg_lstm_outputs,
                                    shape=(cfg.d, 1),
                                    transformation=tf.matmul)

        self.weights = layer.Weights(
            self.weights, graph.TfNode((self.lstm.matrix, self.lstm.bias)), U,
            w, self.vi)

        self.lstm_state_out =\
            graph.VarAssign(graph.Variable(np.zeros([1, self.lstm.state_size]),
                                           dtype=np.float32, name="lstm_state_out"),
                            np.zeros([1, self.lstm.state_size]))
Exemplo n.º 7
0
    def build_graph(self):
        input = layer.ConfiguredInput(cfg.config.input)

        dense = layer.GenericLayers(layer.Flatten(input),
                                    [dict(type=layer.Dense, size=size, activation=layer.Activation.Relu)
                                     for size in cfg.config.hidden_sizes])

        actor = layer.DDPGActor(dense, cfg.config.output)

        self.ph_state = input.ph_state
        self.actor = actor.scaled_out
        self.weights = layer.Weights(input, dense, actor)
Exemplo n.º 8
0
    def build_graph(self):
        input = layer.ConfiguredInput(da3c_config.config.input)

        sizes = da3c_config.config.hidden_sizes
        layers = [input]
        flattened_input = layer.Flatten(input)

        last_size = flattened_input.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if da3c_config.config.use_lstm:
            lstm = layer.lstm(da3c_config.config.lstm_type,
                              graph.Expand(flattened_input, 0),
                              n_units=last_size,
                              n_cores=da3c_config.config.lstm_num_cores)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.ph_lstm_state = lstm.ph_state
            self.lstm_zero_state = lstm.zero_state
            self.lstm_state = lstm.state
            self.lstm_reset_timestep = lstm.reset_timestep
        else:
            activation = layer.get_activation(da3c_config.config)
            head = layer.GenericLayers(flattened_input, [
                dict(type=layer.Dense, size=size, activation=activation)
                for size in sizes
            ])
            layers.append(head)

        actor = layer.Actor(head, da3c_config.config.output)
        critic = layer.Dense(head, 1)
        layers.extend((actor, critic))

        self.ph_state = input.ph_state
        self.actor = actor
        self.critic = graph.Flatten(critic)
        self.weights = layer.Weights(*layers)
        self.actor_weights = layer.Weights(actor)
Exemplo n.º 9
0
    def build_graph(self):
        input = layer.Input(pg_config.config.input)

        dense = layer.GenericLayers(layer.Flatten(input),
                                    [dict(type=layer.Dense, size=size, activation=layer.Activation.Relu)
                                    for size in pg_config.config.hidden_sizes])

        actor = layer.Dense(dense, pg_config.config.output.action_size,
                            activation=layer.Activation.Softmax)

        self.state = input.ph_state
        self.weights = layer.Weights(input, dense, actor)
        return actor.node
Exemplo n.º 10
0
    def build_graph(self):
        input = layer.ConfiguredInput(trpo_config.config.input)
        # add one extra feature for timestep
        ph_step = graph.Placeholder(np.float32, shape=[None, 1])
        state = (input.ph_state, ph_step)

        concatenated = graph.Concat([layer.Flatten(input), ph_step], axis=1)

        activation = layer.Activation.get_activation(
            trpo_config.config.activation)
        head = layer.GenericLayers(concatenated, [
            dict(type=layer.Dense, size=size, activation=activation)
            for size in trpo_config.config.hidden_sizes
        ])
        value = layer.Dense(head, 1)

        ph_ytarg_ny = graph.Placeholder(np.float32)
        mse = graph.TfNode(
            tf.reduce_mean(tf.square(ph_ytarg_ny.node - value.node)))

        weights = layer.Weights(input, head, value)

        sg_get_weights_flatten = graph.GetVariablesFlatten(weights)
        sg_set_weights_flatten = graph.SetVariablesFlatten(weights)

        l2 = graph.TfNode(1e-3 * tf.add_n([
            tf.reduce_sum(tf.square(v))
            for v in utils.Utils.flatten(weights.node)
        ]))
        loss = graph.TfNode(l2.node + mse.node)

        sg_gradients = optimizer.Gradients(weights, loss=loss)
        sg_gradients_flatten = graph.GetVariablesFlatten(
            sg_gradients.calculate)

        self.op_value = self.Op(value, state=state)

        self.op_get_weights_flatten = self.Op(sg_get_weights_flatten)
        self.op_set_weights_flatten = self.Op(
            sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value)

        self.op_compute_loss_and_gradient = self.Ops(loss,
                                                     sg_gradients_flatten,
                                                     state=state,
                                                     ytarg_ny=ph_ytarg_ny)

        self.op_losses = self.Ops(loss,
                                  mse,
                                  l2,
                                  state=state,
                                  ytarg_ny=ph_ytarg_ny)
Exemplo n.º 11
0
    def build_graph(self):
        conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu,
                          n_filters=32, filter_size=[3, 3], stride=[2, 2],
                          border=layer.Border.Same)
        input_layers = [dict(conv_layer)] * 4 if da3c_config.config.input.universe else None
        input = layer.Input(da3c_config.config.input, descs=input_layers)

        sizes = da3c_config.config.hidden_sizes
        layers = [input]
        flattened_input = layer.Flatten(input)

        last_size = flattened_input.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if da3c_config.config.use_lstm:
            lstm = layer.LSTM(graph.Expand(flattened_input, 0), n_units=last_size)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.ph_lstm_state = lstm.ph_state
            self.lstm_zero_state = lstm.zero_state
            self.lstm_state = lstm.state
        else:
            head = layer.GenericLayers(flattened_input,
                                       [dict(type=layer.Dense, size=size,
                                             activation=layer.Activation.Relu) for size in sizes])
            layers.append(head)

        actor = layer.Actor(head, da3c_config.config.output)
        critic = layer.Dense(head, 1)
        layers.extend((actor, critic))

        self.ph_state = input.ph_state
        self.actor = actor
        self.critic = graph.Flatten(critic)
        self.weights = layer.Weights(*layers)
        self.actor_weights = layer.Weights(actor)
Exemplo n.º 12
0
    def build_graph(self, input_placeholder):
        conv_layer = dict(type=layer.Convolution,
                          activation=layer.Activation.Elu,
                          n_filters=32,
                          filter_size=[3, 3],
                          stride=[2, 2],
                          border=layer.Border.Same)
        input_layers = [dict(conv_layer)
                        ] * 4 if da3c_config.config.input.universe else None
        input = layer.Input(da3c_config.config.input,
                            descs=input_layers,
                            input_placeholder=input_placeholder)

        sizes = da3c_config.config.hidden_sizes
        layers = [input]
        flattened_input = layer.Flatten(input)

        fc_layers = layer.GenericLayers(flattened_input, [
            dict(
                type=layer.Dense, size=size, activation=layer.Activation.Relu6)
            for size in sizes[:-1]
        ])
        layers.append(fc_layers)

        last_size = fc_layers.node.shape.as_list()[-1]
        if len(sizes) > 0:
            last_size = sizes[-1]

        if da3c_config.config.use_lstm:
            lstm = layer.LSTM(graph.Expand(fc_layers, 0), n_units=last_size)
            head = graph.Reshape(lstm, [-1, last_size])
            layers.append(lstm)

            self.ph_lstm_state = lstm.ph_state
            self.lstm_zero_state = lstm.zero_state
            self.lstm_state = lstm.state
        else:
            head = layer.Dense(fc_layers,
                               last_size,
                               activation=layer.Activation.Relu6)
            layers.append(head)

        self.ph_state = input.ph_state
        self.weight = layer.Weights(*layers)
        return head.node
Exemplo n.º 13
0
    def build_graph(self):
        input = layer.Input(trpo_config.config.input)

        head = layer.GenericLayers(layer.Flatten(input),
                                   [dict(type=layer.Dense, size=size, activation=layer.Activation.Tanh)
                                    for size in trpo_config.config.hidden_sizes])

        if trpo_config.config.output.continuous:
            output = layer.Dense(head, trpo_config.config.output.action_size)
            actor = ConcatFixedStd(output)
            actor_layers = [output, actor]
        else:
            actor = layer.Dense(head, trpo_config.config.output.action_size,
                                activation=layer.Activation.Softmax)
            actor_layers = [actor]

        self.ph_state = input.ph_state
        self.actor = actor
        self.weights = layer.Weights(*([input, head] + actor_layers))
Exemplo n.º 14
0
    def build_graph(self):
        self.ph_perception =\
            graph.Placeholder(np.float32, shape=(None, cfg.d), name="ph_perception")
        # tf.placeholder(tf.float32, shape=[None, cfg.d], name="ph_perception")

        self.Mspace =\
            layer.Dense(self.ph_perception, cfg.d,  # d=256
                        activation=layer.Activation.Relu)
        Mspace_expanded = graph.Expand(self.Mspace, 0)

        self.lstm = DilatedLSTMCell(cfg.d, num_cores=cfg.d)
        # needs wrap as layer to retrieve weights

        self.ph_step_size =\
            graph.Placeholder(np.float32, shape=(1,), name="ph_m_step_size")
        # tf.placeholder(tf.float32, [1], name="ph_m_step_size")
        self.ph_initial_lstm_state =\
            graph.Placeholder(np.float32, shape=(1, self.lstm.state_size), name="ph_m_lstm_state")
        # tf.placeholder(tf.float32, [1, self.lstm.state_size], name="ph_m_lstm_state")

        lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn(
            self.lstm,
            Mspace_expanded,
            initial_state=self.ph_initial_lstm_state,
            sequence_length=self.ph_step_size,
            time_major=False)
        lstm_outputs = tf.reshape(lstm_outputs, [-1, cfg.d])
        sg_lstm_outputs = graph.TfNode(lstm_outputs)

        self.goal = tf.nn.l2_normalize(graph.Flatten(sg_lstm_outputs), dim=1)

        critic = layer.Dense(sg_lstm_outputs, 1)
        self.value = layer.Flatten(critic)

        self.weights = layer.Weights(
            self.Mspace, graph.TfNode((self.lstm.matrix, self.lstm.bias)),
            critic)

        self.lstm_state_out =\
            graph.VarAssign(graph.Variable(np.zeros([1, self.lstm.state_size]),
                                           dtype=np.float32, name="lstm_state_out"),
                            np.zeros([1, self.lstm.state_size]))
Exemplo n.º 15
0
    def build_graph(self):
        conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu,
                          n_filters=32, filter_size=[3, 3], stride=[2, 2],
                          border=layer.Border.Same)
        input = layer.Input(cfg.config.input, descs=[dict(conv_layer)] * 4)

        shape = [None] + [cfg.config.output.action_size]
        self.ph_probs = graph.Placeholder(np.float32, shape=shape, name='act_probs')
        self.ph_taken = graph.Placeholder(np.int32, shape=(None,), name='act_taken')

        flattened_input = layer.Flatten(input)
        last_size = flattened_input.node.shape.as_list()[-1]

        inverse_inp = graph.Reshape(input, [-1, last_size*2])

        get_first = graph.TfNode(inverse_inp.node[:, :last_size])
        get_second = graph.TfNode(inverse_inp.node[:, last_size:])

        forward_inp = graph.Concat([get_first, self.ph_probs], axis=1)

        fc_size = cfg.config.hidden_sizes[-1]
        inv_fc1 = layer.Dense(inverse_inp, fc_size, layer.Activation.Relu)
        inv_fc2 = layer.Dense(inv_fc1, shape[-1])   # layer.Activation.Softmax

        fwd_fc1 = layer.Dense(forward_inp, fc_size, layer.Activation.Relu)
        fwd_fc2 = layer.Dense(fwd_fc1, last_size)

        inv_loss = graph.SparseSoftmaxCrossEntropyWithLogits(inv_fc2, self.ph_taken).op
        fwd_loss = graph.L2loss(fwd_fc2.node - get_second.node).op

        self.ph_state = input.ph_state  # should be even wrt to batch_size for now
        self.rew_out = graph.TfNode(cfg.config.icm.nu * fwd_loss)

        self.loss = graph.TfNode(cfg.config.icm.beta * fwd_loss + (1 - cfg.config.icm.beta) * inv_loss)

        layers = [input, inv_fc1, inv_fc2, fwd_fc1, fwd_fc2]
        self.weights = layer.Weights(*layers)
Exemplo n.º 16
0
    def build_graph(self):
        input_size, = trpo_config.config.input.shape

        # add one extra feature for timestep
        ph_state = graph.Placeholder(np.float32, shape=(None, input_size + 1))

        activation = layer.Activation.get_activation(trpo_config.config.activation)
        descs = [dict(type=layer.Dense, size=size, activation=activation) for size
                 in trpo_config.config.hidden_sizes]
        descs.append(dict(type=layer.Dense, size=1))

        value = layer.GenericLayers(ph_state, descs)

        ph_ytarg_ny = graph.Placeholder(np.float32)
        mse = graph.TfNode(tf.reduce_mean(tf.square(ph_ytarg_ny.node - value.node)))

        weights = layer.Weights(value)

        sg_get_weights_flatten = GetVariablesFlatten(weights)
        sg_set_weights_flatten = SetVariablesFlatten(weights)

        l2 = graph.TfNode(1e-3 * tf.add_n([tf.reduce_sum(tf.square(v)) for v in
                                           utils.Utils.flatten(weights.node)]))
        loss = graph.TfNode(l2.node + mse.node)

        sg_gradients = optimizer.Gradients(weights, loss=loss)
        sg_gradients_flatten = GetVariablesFlatten(sg_gradients.calculate)

        self.op_value = self.Op(value, state=ph_state)

        self.op_get_weights_flatten = self.Op(sg_get_weights_flatten)
        self.op_set_weights_flatten = self.Op(sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value)

        self.op_compute_loss_and_gradient = self.Ops(loss, sg_gradients_flatten, state=ph_state,
                                                     ytarg_ny=ph_ytarg_ny)

        self.op_losses = self.Ops(loss, mse, l2, state=ph_state, ytarg_ny=ph_ytarg_ny)
Exemplo n.º 17
0
    def build_graph(self):
        input = layer.ConfiguredInput(config.input)

        hidden = layer.GenericLayers(layer.Flatten(input), [
            dict(type=layer.Dense, size=size, activation=layer.Activation.Tanh)
            for size in config.hidden_sizes
        ])

        weights = [input, hidden]

        if config.dueling_dqn:
            if config.hidden_sizes:
                v_input, a_input = tf.split(hidden.node, [
                    config.hidden_sizes[-1] // 2, config.hidden_sizes[-1] // 2
                ],
                                            axis=1)

                v_input = graph.TfNode(v_input)
                a_input = graph.TfNode(a_input)
            else:
                v_input, a_input = hidden, hidden

            v_output = layer.Dense(v_input, 1)
            a_output = layer.Dense(a_input, config.output.action_size)

            output = v_output.node + a_output.node - tf.reduce_mean(
                a_output.node, axis=1, keep_dims=True)
            output = graph.TfNode(output)

            weights.extend([v_output, a_output])
        else:
            output = layer.Dense(hidden, config.output.action_size)
            weights.append(output)

        self.ph_state = input.ph_state
        self.output = output
        self.weights = layer.Weights(*weights)