Exemplo n.º 1
0
    def add_logits_op_train(self):
        if self.cnn_format == "NHWC":
            x = tf.transpose(self.state, [0, 2, 3, 1])
        else:
            x = self.state
        self.image_summary = []
        w, b, out, summary = conv2d_layer(x,
                                          32, [8, 8], [4, 4],
                                          scope_name="conv1_train",
                                          summary_tag="conv1_out",
                                          activation=tf.nn.relu,
                                          data_format=self.cnn_format)
        self.w["wc1"] = w
        self.w["bc1"] = b
        self.image_summary.append(summary)

        w, b, out, summary = conv2d_layer(out,
                                          64, [4, 4], [2, 2],
                                          scope_name="conv2_train",
                                          summary_tag="conv2_out",
                                          activation=tf.nn.relu,
                                          data_format=self.cnn_format)
        self.w["wc2"] = w
        self.w["bc2"] = b
        self.image_summary.append(summary)

        w, b, out, summary = conv2d_layer(out,
                                          64, [3, 3], [1, 1],
                                          scope_name="conv3_train",
                                          summary_tag="conv3_out",
                                          activation=tf.nn.relu,
                                          data_format=self.cnn_format)
        self.w["wc3"] = w
        self.w["bc3"] = b
        self.image_summary.append(summary)

        shape = out.get_shape().as_list()
        out_flat = tf.reshape(
            out, [tf.shape(out)[0], 1, shape[1] * shape[2] * shape[3]])
        out, state = stateful_lstm(out_flat,
                                   self.num_lstm_layers,
                                   self.lstm_size,
                                   tuple([self.lstm_state_train]),
                                   scope_name="lstm_train")
        self.state_output_c = state[0][0]
        self.state_output_h = state[0][1]
        shape = out.get_shape().as_list()
        out = tf.reshape(out, [tf.shape(out)[0], shape[2]])
        w, b, out = fully_connected_layer(out,
                                          self.n_actions,
                                          scope_name="out_train",
                                          activation=None)

        self.w["wout"] = w
        self.w["bout"] = b

        self.q_out = out
        self.q_action = tf.argmax(self.q_out, axis=1)
Exemplo n.º 2
0
    def add_logits_op_train(self):
        x = self.state
        out, state = stateful_lstm(x=x,
                                   num_layers=self.num_lstm_layers,
                                   lstm_size=self.lstm_size,
                                   state_input=tuple([self.lstm_state_train]),
                                   scope_name="lstm_train")
        self.state_output_c = state[0][0]
        self.state_output_h = state[0][1]
        shape = out.get_shape().as_list()  #[None,1,512]

        out = tf.reshape(out, [tf.shape(out)[0], shape[2]])
        w, b, out = fully_connected_layer(out,
                                          self.n_actions,
                                          scope_name="out_train",
                                          activation=None)

        self.w["wout"] = w
        self.w["bout"] = b

        self.q_out = out
        self.q_action = tf.argmax(self.q_out, axis=1)
Exemplo n.º 3
0
    def add_logits_op_target(self):
        x = self.state_target  #shape: [None, 1, num_colors]

        out, state = stateful_lstm(x,
                                   self.num_lstm_layers,
                                   self.lstm_size,
                                   tuple([self.lstm_state_target]),
                                   scope_name="lstm_target")
        self.state_output_target_c = state[0][0]
        self.state_output_target_h = state[0][1]
        shape = out.get_shape().as_list()

        out = tf.reshape(out, [tf.shape(out)[0], shape[2]])

        w, b, out = fully_connected_layer(out,
                                          self.n_actions,
                                          scope_name="out_target",
                                          activation=None)

        self.w_target["wout"] = w
        self.w_target["bout"] = b

        self.q_target_out = out
        self.q_target_action = tf.argmax(self.q_target_out, axis=1)