Exemplo n.º 1
0
    def add_logits_op_target(self):
        if self.cnn_format == "NHWC":
            x = tf.transpose(self.state_target, [0, 2, 3, 1])
        else:
            x = self.state_target
        w, b, out, _ = conv2d_layer(x, 32, [8, 8], [4, 4], scope_name="conv1_target", summary_tag=None,
                                    activation=tf.nn.relu, data_format=self.cnn_format)
        self.w_target["wc1"] = w
        self.w_target["bc1"] = b

        w, b, out, _ = conv2d_layer(out, 64, [4, 4], [2, 2], scope_name="conv2_target", summary_tag=None,
                                    activation=tf.nn.relu, data_format=self.cnn_format)
        self.w_target["wc2"] = w
        self.w_target["bc2"] = b

        w, b, out, _ = conv2d_layer(out, 64, [3, 3], [1, 1], scope_name="conv3_target", summary_tag=None,
                                    activation=tf.nn.relu, data_format=self.cnn_format)
        self.w_target["wc3"] = w
        self.w_target["bc3"] = b

        shape = out.get_shape().as_list()
        out_flat = tf.reshape(out, [-1, reduce(lambda x, y: x * y, shape[1:])])

        w, b, out = fully_connected_layer(out_flat, 512, scope_name="fully1_target")

        self.w_target["wf1"] = w
        self.w_target["bf1"] = b

        w, b, out = fully_connected_layer(out, self.n_actions, scope_name="out_target", activation=None)

        self.w_target["wout"] = w
        self.w_target["bout"] = b

        self.q_target_out = out
        self.q_target_action = tf.argmax(self.q_target_out, axis=1)
Exemplo n.º 2
0
    def add_logits_op_train(self):
        if self.cnn_format == "NHWC":
            x = tf.transpose(self.state, [0, 2, 3, 1])
        else:
            x = self.state
        self.image_summary = []
        w, b, out, summary = conv2d_layer(x,
                                          32, [8, 8], [4, 4],
                                          scope_name="conv1_train",
                                          summary_tag="conv1_out",
                                          activation=tf.nn.relu,
                                          data_format=self.cnn_format)
        self.w["wc1"] = w
        self.w["bc1"] = b
        self.image_summary.append(summary)

        w, b, out, summary = conv2d_layer(out,
                                          64, [4, 4], [2, 2],
                                          scope_name="conv2_train",
                                          summary_tag="conv2_out",
                                          activation=tf.nn.relu,
                                          data_format=self.cnn_format)
        self.w["wc2"] = w
        self.w["bc2"] = b
        self.image_summary.append(summary)

        w, b, out, summary = conv2d_layer(out,
                                          64, [3, 3], [1, 1],
                                          scope_name="conv3_train",
                                          summary_tag="conv3_out",
                                          activation=tf.nn.relu,
                                          data_format=self.cnn_format)
        self.w["wc3"] = w
        self.w["bc3"] = b
        self.image_summary.append(summary)

        shape = out.get_shape().as_list()
        out_flat = tf.reshape(
            out, [tf.shape(out)[0], 1, shape[1] * shape[2] * shape[3]])
        out, state = stateful_lstm(out_flat,
                                   self.num_lstm_layers,
                                   self.lstm_size,
                                   tuple([self.lstm_state_train]),
                                   scope_name="lstm_train")
        self.state_output_c = state[0][0]
        self.state_output_h = state[0][1]
        shape = out.get_shape().as_list()
        out = tf.reshape(out, [tf.shape(out)[0], shape[2]])
        w, b, out = fully_connected_layer(out,
                                          self.n_actions,
                                          scope_name="out_train",
                                          activation=None)

        self.w["wout"] = w
        self.w["bout"] = b

        self.q_out = out
        self.q_action = tf.argmax(self.q_out, axis=1)
Exemplo n.º 3
0
    def add_logits_op_train(self):
        # self.colour_summary = []
        # self.colour_summary.append('')
        # if self.cnn_format == "NHWC":
        #     x = tf.transpose(self.state, [0, 2, 3, 1])
        # else:
        #     x = self.state
        # w, b, out, summary = conv2d_layer(x, 32, [8, 8], [4, 4], scope_name="conv1_train", summary_tag="conv1_out",
        #                                   activation=tf.nn.relu, data_format=self.cnn_format)
        # self.w["wc1"] = w
        # self.w["bc1"] = b
        # self.image_summary.append(summary)

        # w, b, out, summary = conv2d_layer(out, 64, [4, 4], [2, 2], scope_name="conv2_train", summary_tag="conv2_out",
        #                                   activation=tf.nn.relu, data_format=self.cnn_format)
        # self.w["wc2"] = w
        # self.w["bc2"] = b
        # self.image_summary.append(summary)

        # w, b, out, summary = conv2d_layer(out, 64, [3, 3], [1, 1], scope_name="conv3_train", summary_tag="conv3_out",
        #                                   activation=tf.nn.relu, data_format=self.cnn_format)
        # self.w["wc3"] = w
        # self.w["bc3"] = b
        # self.image_summary.append(summary)

        # shape = out.get_shape().as_list()
        # out_flat = tf.reshape(out, [-1, reduce(lambda x, y: x * y, shape[1:])])

        x = self.state
        shape = x.get_shape().as_list()
        x_flat = tf.reshape(x, [-1, reduce(lambda x,y: x*y, shape[1:])])

        w, b, out = fully_connected_layer(x_flat, 512, scope_name="fully1_train")
        
        self.w["wf1"] = w
        self.w["bf1"] = b

        w, b, out = fully_connected_layer(out, self.n_actions, scope_name="out_train", activation=None)

        self.w["wout"] = w
        self.w["bout"] = b
        self.q_out = out
        self.q_action = tf.argmax(self.q_out, axis=1)
Exemplo n.º 4
0
    def add_logits_op_target(self):
        if self.cnn_format == "NHWC":
            x = tf.transpose(self.state_target, [0, 2, 3, 1])
        else:
            x = self.state_target
        w, b, out, _ = conv2d_layer(x,
                                    32, [8, 8], [4, 4],
                                    scope_name="conv1_target",
                                    summary_tag=None,
                                    activation=tf.nn.relu,
                                    data_format=self.cnn_format)
        self.w_target["wc1"] = w
        self.w_target["bc1"] = b

        w, b, out, _ = conv2d_layer(out,
                                    64, [4, 4], [2, 2],
                                    scope_name="conv2_target",
                                    summary_tag=None,
                                    activation=tf.nn.relu,
                                    data_format=self.cnn_format)
        self.w_target["wc2"] = w
        self.w_target["bc2"] = b

        w, b, out, _ = conv2d_layer(out,
                                    64, [3, 3], [1, 1],
                                    scope_name="conv3_target",
                                    summary_tag=None,
                                    activation=tf.nn.relu,
                                    data_format=self.cnn_format)
        self.w_target["wc3"] = w
        self.w_target["bc3"] = b

        shape = out.get_shape().as_list()
        out_flat = tf.reshape(
            out, [tf.shape(out)[0], 1, shape[1] * shape[2] * shape[3]])
        out, state = stateful_gru(out_flat,
                                  self.num_lstm_layers,
                                  self.gru_size,
                                  scope_name="gru_target")
        self.state_output_target = state[0]
        # self.state_output_target_h = state[0][1]
        shape = out.get_shape().as_list()

        out = tf.reshape(out, [tf.shape(out)[0], shape[2]])

        w, b, out = fully_connected_layer(out,
                                          self.n_actions,
                                          scope_name="out_target",
                                          activation=None)

        self.w_target["wout"] = w
        self.w_target["bout"] = b

        self.q_target_out = out
        self.q_target_action = tf.argmax(self.q_target_out, axis=1)
Exemplo n.º 5
0
Arquivo: dqn.py Projeto: dbueno96/DQN
    def add_logits_op_train(self):
        self.image_summary = []
        if self.cnn_format == "NHWC": #Se ajusta la entrada x de acuerdo lal hardware
            x = tf.transpose(self.state, [0, 2, 3, 1])
        else:
            x = self.state
        #Se lleva acabo la primera convolución sobre la enrada 
        w, b, out, summary = conv2d_layer(x, 32, [8, 8], [4, 4], scope_name="conv1_train", summary_tag="conv1_out",
                                          activation=tf.nn.relu, data_format=self.cnn_format)
        self.w["wc1"] = w #se almacena los valores de peso y bias y summary de la primera conv
        self.w["bc1"] = b
        self.image_summary.append(summary)
        #Se lleva a cabo la segunda convolución con la salida de la primera
        w, b, out, summary = conv2d_layer(out, 64, [4, 4], [2, 2], scope_name="conv2_train", summary_tag="conv2_out",
                                          activation=tf.nn.relu, data_format=self.cnn_format)
        self.w["wc2"] = w #Se almacena la valores de peso, bias y summary de la segunda conv
        self.w["bc2"] = b
        self.image_summary.append(summary)
        #Se lleva a cabo la tercerca convolución con la salida de la segunda.
        w, b, out, summary = conv2d_layer(out, 64, [3, 3], [1, 1], scope_name="conv3_train", summary_tag="conv3_out",
                                          activation=tf.nn.relu, data_format=self.cnn_format)
        self.w["wc3"] = w #Se almacena los valores de perso bias y summary de la tercera conv
        self.w["bc3"] = b
        self.image_summary.append(summary)

        shape = out.get_shape().as_list() #Se modficaa el tamaño de la salida 
        out_flat = tf.reshape(out, [-1, reduce(lambda x, y: x * y, shape[1:])])
        #Se pasa los valores a la cuarta cpa conla salida modificada de la tercerca cov
        w, b, out = fully_connected_layer(out_flat, 512, scope_name="fully1_train")

        self.w["wf1"] = w #Se almacena los valores de peso bias de la primera fullconected
        self.w["bf1"] = b
        #Se pasa los valores a la última capa 
        w, b, out = fully_connected_layer(out, self.n_actions, scope_name="out_train", activation=None)

        self.w["wout"] = w #Se almacenan los valores de peso bias para la segunda fullconected
        self.w["bout"] = b

        self.q_out = out #Se asigna el valor al atributo salida q
        self.q_action = tf.argmax(self.q_out, axis=1) #Se define la acción como el índice en que se encuentra el mayor valor en qout
Exemplo n.º 6
0
Arquivo: dqn.py Projeto: dbueno96/DQN
    def add_logits_op_target(self):
        if self.cnn_format == "NHWC": #Se ajusta la entrada de acuerdo al hardware
            x = tf.transpose(self.state_target, [0, 2, 3, 1])
        else:
            x = self.state_target
        #Se lleva a cabo la primer convolución con la entrada x
        w, b, out, _ = conv2d_layer(x, 32, [8, 8], [4, 4], scope_name="conv1_target", summary_tag=None,
                                    activation=tf.nn.relu, data_format=self.cnn_format)
        self.w_target["wc1"] = w #Se almacena los valores de peso y bias
        self.w_target["bc1"] = b
        #Se lleva a cabo la segunda convolución sobre la salida de la primera conv
        w, b, out, _ = conv2d_layer(out, 64, [4, 4], [2, 2], scope_name="conv2_target", summary_tag=None,
                                    activation=tf.nn.relu, data_format=self.cnn_format)
        self.w_target["wc2"] = w#Se almacena los valores de peso y bias
        self.w_target["bc2"] = b
        #Se lleva a cabo la terce conv sobre la salida de la segunda conv
        w, b, out, _ = conv2d_layer(out, 64, [3, 3], [1, 1], scope_name="conv3_target", summary_tag=None,
                                    activation=tf.nn.relu, data_format=self.cnn_format)
        self.w_target["wc3"] = w#Se almacena los valores de peso y bias
        self.w_target["bc3"] = b

        shape = out.get_shape().as_list()  #Se modifica el tamaño de la salida de la tercer conv
        out_flat = tf.reshape(out, [-1, reduce(lambda x, y: x * y, shape[1:])])
        #Se pasa los valores a la cuarta capa 
        w, b, out = fully_connected_layer(out_flat, 512, scope_name="fully1_target")

        self.w_target["wf1"] = w#Se almacena los valores de peso y bias
        self.w_target["bf1"] = b
        #Se pasan los valores a la última capa 
        w, b, out = fully_connected_layer(out, self.n_actions, scope_name="out_target", activation=None)

        self.w_target["wout"] = w#Se almacena los valores de peso y bias
        self.w_target["bout"] = b

        self.q_target_out = out #See almacena la salida 
        self.q_target_action = tf.argmax(self.q_target_out, axis=1) #Se almacena el índice de la acción que ofrece mejor retorno
Exemplo n.º 7
0
    def add_logits_op_train(self):
        x = self.state
        out, state = stateful_goru(x=x,
                                   num_layers=self.num_lstm_layers,
                                   lstm_size=self.lstm_size,
                                   state_input=tuple([self.lstm_state_train]),
                                   scope_name="lstm_train")
        self.state_output_c = state[0][0]
        self.state_output_h = state[0][1]
        shape = out.get_shape().as_list()  #[None,1,512]

        out = tf.reshape(out, [tf.shape(out)[0], shape[2]])
        w, b, out = fully_connected_layer(out,
                                          self.n_actions,
                                          scope_name="out_train",
                                          activation=None)

        self.w["wout"] = w
        self.w["bout"] = b

        self.q_out = out
        self.q_action = tf.argmax(self.q_out, axis=1)
Exemplo n.º 8
0
    def add_logits_op_target(self):
        x = self.state_target  #shape: [None, 1, num_colors]

        out, state = stateful_goru(x,
                                   self.num_lstm_layers,
                                   self.lstm_size,
                                   tuple([self.lstm_state_target]),
                                   scope_name="lstm_target")
        self.state_output_target_c = state[0][0]
        self.state_output_target_h = state[0][1]
        shape = out.get_shape().as_list()

        out = tf.reshape(out, [tf.shape(out)[0], shape[2]])

        w, b, out = fully_connected_layer(out,
                                          self.n_actions,
                                          scope_name="out_target",
                                          activation=None)

        self.w_target["wout"] = w
        self.w_target["bout"] = b

        self.q_target_out = out
        self.q_target_action = tf.argmax(self.q_target_out, axis=1)