def add_logits_op_target(self): if self.cnn_format == "NHWC": x = tf.transpose(self.state_target, [0, 2, 3, 1]) else: x = self.state_target w, b, out, _ = conv2d_layer(x, 32, [8, 8], [4, 4], scope_name="conv1_target", summary_tag=None, activation=tf.nn.relu, data_format=self.cnn_format) self.w_target["wc1"] = w self.w_target["bc1"] = b w, b, out, _ = conv2d_layer(out, 64, [4, 4], [2, 2], scope_name="conv2_target", summary_tag=None, activation=tf.nn.relu, data_format=self.cnn_format) self.w_target["wc2"] = w self.w_target["bc2"] = b w, b, out, _ = conv2d_layer(out, 64, [3, 3], [1, 1], scope_name="conv3_target", summary_tag=None, activation=tf.nn.relu, data_format=self.cnn_format) self.w_target["wc3"] = w self.w_target["bc3"] = b shape = out.get_shape().as_list() out_flat = tf.reshape(out, [-1, reduce(lambda x, y: x * y, shape[1:])]) w, b, out = fully_connected_layer(out_flat, 512, scope_name="fully1_target") self.w_target["wf1"] = w self.w_target["bf1"] = b w, b, out = fully_connected_layer(out, self.n_actions, scope_name="out_target", activation=None) self.w_target["wout"] = w self.w_target["bout"] = b self.q_target_out = out self.q_target_action = tf.argmax(self.q_target_out, axis=1)
def add_logits_op_train(self): if self.cnn_format == "NHWC": x = tf.transpose(self.state, [0, 2, 3, 1]) else: x = self.state self.image_summary = [] w, b, out, summary = conv2d_layer(x, 32, [8, 8], [4, 4], scope_name="conv1_train", summary_tag="conv1_out", activation=tf.nn.relu, data_format=self.cnn_format) self.w["wc1"] = w self.w["bc1"] = b self.image_summary.append(summary) w, b, out, summary = conv2d_layer(out, 64, [4, 4], [2, 2], scope_name="conv2_train", summary_tag="conv2_out", activation=tf.nn.relu, data_format=self.cnn_format) self.w["wc2"] = w self.w["bc2"] = b self.image_summary.append(summary) w, b, out, summary = conv2d_layer(out, 64, [3, 3], [1, 1], scope_name="conv3_train", summary_tag="conv3_out", activation=tf.nn.relu, data_format=self.cnn_format) self.w["wc3"] = w self.w["bc3"] = b self.image_summary.append(summary) shape = out.get_shape().as_list() out_flat = tf.reshape( out, [tf.shape(out)[0], 1, shape[1] * shape[2] * shape[3]]) out, state = stateful_lstm(out_flat, self.num_lstm_layers, self.lstm_size, tuple([self.lstm_state_train]), scope_name="lstm_train") self.state_output_c = state[0][0] self.state_output_h = state[0][1] shape = out.get_shape().as_list() out = tf.reshape(out, [tf.shape(out)[0], shape[2]]) w, b, out = fully_connected_layer(out, self.n_actions, scope_name="out_train", activation=None) self.w["wout"] = w self.w["bout"] = b self.q_out = out self.q_action = tf.argmax(self.q_out, axis=1)
def add_logits_op_target(self): if self.cnn_format == "NHWC": x = tf.transpose(self.state_target, [0, 2, 3, 1]) else: x = self.state_target w, b, out, _ = conv2d_layer(x, 32, [8, 8], [4, 4], scope_name="conv1_target", summary_tag=None, activation=tf.nn.relu, data_format=self.cnn_format) self.w_target["wc1"] = w self.w_target["bc1"] = b w, b, out, _ = conv2d_layer(out, 64, [4, 4], [2, 2], scope_name="conv2_target", summary_tag=None, activation=tf.nn.relu, data_format=self.cnn_format) self.w_target["wc2"] = w self.w_target["bc2"] = b w, b, out, _ = conv2d_layer(out, 64, [3, 3], [1, 1], scope_name="conv3_target", summary_tag=None, activation=tf.nn.relu, data_format=self.cnn_format) self.w_target["wc3"] = w self.w_target["bc3"] = b shape = out.get_shape().as_list() out_flat = tf.reshape( out, [tf.shape(out)[0], 1, shape[1] * shape[2] * shape[3]]) out, state = stateful_gru(out_flat, self.num_lstm_layers, self.gru_size, scope_name="gru_target") self.state_output_target = state[0] # self.state_output_target_h = state[0][1] shape = out.get_shape().as_list() out = tf.reshape(out, [tf.shape(out)[0], shape[2]]) w, b, out = fully_connected_layer(out, self.n_actions, scope_name="out_target", activation=None) self.w_target["wout"] = w self.w_target["bout"] = b self.q_target_out = out self.q_target_action = tf.argmax(self.q_target_out, axis=1)
def add_logits_op_train(self): self.image_summary = [] if self.cnn_format == "NHWC": #Se ajusta la entrada x de acuerdo lal hardware x = tf.transpose(self.state, [0, 2, 3, 1]) else: x = self.state #Se lleva acabo la primera convolución sobre la enrada w, b, out, summary = conv2d_layer(x, 32, [8, 8], [4, 4], scope_name="conv1_train", summary_tag="conv1_out", activation=tf.nn.relu, data_format=self.cnn_format) self.w["wc1"] = w #se almacena los valores de peso y bias y summary de la primera conv self.w["bc1"] = b self.image_summary.append(summary) #Se lleva a cabo la segunda convolución con la salida de la primera w, b, out, summary = conv2d_layer(out, 64, [4, 4], [2, 2], scope_name="conv2_train", summary_tag="conv2_out", activation=tf.nn.relu, data_format=self.cnn_format) self.w["wc2"] = w #Se almacena la valores de peso, bias y summary de la segunda conv self.w["bc2"] = b self.image_summary.append(summary) #Se lleva a cabo la tercerca convolución con la salida de la segunda. w, b, out, summary = conv2d_layer(out, 64, [3, 3], [1, 1], scope_name="conv3_train", summary_tag="conv3_out", activation=tf.nn.relu, data_format=self.cnn_format) self.w["wc3"] = w #Se almacena los valores de perso bias y summary de la tercera conv self.w["bc3"] = b self.image_summary.append(summary) shape = out.get_shape().as_list() #Se modficaa el tamaño de la salida out_flat = tf.reshape(out, [-1, reduce(lambda x, y: x * y, shape[1:])]) #Se pasa los valores a la cuarta cpa conla salida modificada de la tercerca cov w, b, out = fully_connected_layer(out_flat, 512, scope_name="fully1_train") self.w["wf1"] = w #Se almacena los valores de peso bias de la primera fullconected self.w["bf1"] = b #Se pasa los valores a la última capa w, b, out = fully_connected_layer(out, self.n_actions, scope_name="out_train", activation=None) self.w["wout"] = w #Se almacenan los valores de peso bias para la segunda fullconected self.w["bout"] = b self.q_out = out #Se asigna el valor al atributo salida q self.q_action = tf.argmax(self.q_out, axis=1) #Se define la acción como el índice en que se encuentra el mayor valor en qout
def add_logits_op_target(self): if self.cnn_format == "NHWC": #Se ajusta la entrada de acuerdo al hardware x = tf.transpose(self.state_target, [0, 2, 3, 1]) else: x = self.state_target #Se lleva a cabo la primer convolución con la entrada x w, b, out, _ = conv2d_layer(x, 32, [8, 8], [4, 4], scope_name="conv1_target", summary_tag=None, activation=tf.nn.relu, data_format=self.cnn_format) self.w_target["wc1"] = w #Se almacena los valores de peso y bias self.w_target["bc1"] = b #Se lleva a cabo la segunda convolución sobre la salida de la primera conv w, b, out, _ = conv2d_layer(out, 64, [4, 4], [2, 2], scope_name="conv2_target", summary_tag=None, activation=tf.nn.relu, data_format=self.cnn_format) self.w_target["wc2"] = w#Se almacena los valores de peso y bias self.w_target["bc2"] = b #Se lleva a cabo la terce conv sobre la salida de la segunda conv w, b, out, _ = conv2d_layer(out, 64, [3, 3], [1, 1], scope_name="conv3_target", summary_tag=None, activation=tf.nn.relu, data_format=self.cnn_format) self.w_target["wc3"] = w#Se almacena los valores de peso y bias self.w_target["bc3"] = b shape = out.get_shape().as_list() #Se modifica el tamaño de la salida de la tercer conv out_flat = tf.reshape(out, [-1, reduce(lambda x, y: x * y, shape[1:])]) #Se pasa los valores a la cuarta capa w, b, out = fully_connected_layer(out_flat, 512, scope_name="fully1_target") self.w_target["wf1"] = w#Se almacena los valores de peso y bias self.w_target["bf1"] = b #Se pasan los valores a la última capa w, b, out = fully_connected_layer(out, self.n_actions, scope_name="out_target", activation=None) self.w_target["wout"] = w#Se almacena los valores de peso y bias self.w_target["bout"] = b self.q_target_out = out #See almacena la salida self.q_target_action = tf.argmax(self.q_target_out, axis=1) #Se almacena el índice de la acción que ofrece mejor retorno