def build(self, _input, net, store_output_op=False): assert (isinstance(net, BasicModel)) output = _input if not self.ready: return output with tf.variable_scope(self._id): self._scope = tf.get_variable_scope().name param_initializer = self.param_initializer if self.pre_activation: # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # Pooling if self._type == 'avg': output = BasicModel.avg_pool(output, k=self.kernel_size, s=self.strides) elif self._type == 'max': output = BasicModel.max_pool(output, k=self.kernel_size, s=self.strides) else: raise ValueError('Do not support the pooling type: %s' % self._type) else: # Pooling if self._type == 'avg': output = BasicModel.avg_pool(output, k=self.kernel_size, s=self.strides) elif self._type == 'max': output = BasicModel.max_pool(output, k=self.kernel_size, s=self.strides) else: raise ValueError('Do not support the pooling type: %s' % self._type) # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # dropout output = BasicModel.dropout(output, self.keep_prob, net.is_training) if store_output_op: self.output_op = output return output
def build_forward(self, _input): output = _input # [batch_size, num_steps, rnn_units] self.feature_dim = int(output.get_shape()[2]) # rnn_units output = tf.reshape(output, [-1, self.feature_dim]) # [batch_size * num_steps, rnn_units] final_activation = 'sigmoid' if self.out_dim == 1 else 'softmax' if self.net_type == 'simple': net_config = [] if self.net_config is None else self.net_config with tf.variable_scope('wider_actor'): for layer in net_config: units, activation = layer.get('units'), layer.get('activation', 'relu') output = BasicModel.fc_layer(output, units, use_bias=True) output = BasicModel.activation(output, activation) logits = BasicModel.fc_layer(output, self.out_dim, use_bias=True) # [batch_size * num_steps, out_dim] probs = BasicModel.activation(logits, final_activation) # [batch_size * num_steps, out_dim] probs_dim = self.out_dim if self.out_dim == 1: probs = tf.concat([1 - probs, probs], axis=1) probs_dim = 2 self.q_values = tf.reshape(BasicModel.fc_layer(output, probs_dim, use_bias=True), [-1, self.num_steps, probs_dim]) # [batch_size, num_steps, out_dim] self.decision = tf.multinomial(tf.log(probs), 1) # [batch_size * num_steps, 1] self.decision = tf.reshape(self.decision, [-1, self.num_steps]) # [batch_size, num_steps] self.probs = tf.reshape(probs, [-1, self.num_steps, probs_dim]) # [batch_size, num_steps, out_dim] self.values = tf.reduce_sum(tf.multiply(self.q_values, self.probs), axis=-1) # [batch_size, num_steps] self.selected_prob = tf.reduce_sum(tf.one_hot(self.decision, probs_dim) * self.probs, axis=-1) self.selected_q = tf.reduce_sum(tf.one_hot(self.decision, probs_dim) * self.q_values, axis=-1) else: raise ValueError('Do not support %s' % self.net_type)
def build(self, _input, net, store_output_op=False): assert (isinstance(net, BasicModel)) output = _input if not self.ready: return output with tf.variable_scope(self._id): self._scope = tf.get_variable_scope().name param_initializer = self.param_initializer # flatten if not output = BasicModel.flatten(output) if self.pre_activation: # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # FC output = BasicModel.fc_layer( output, self.units, self.use_bias, param_initializer=param_initializer) else: # FC output = BasicModel.fc_layer( output, self.units, self.use_bias, param_initializer=param_initializer) # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # dropout output = BasicModel.dropout(output, self.keep_prob, net.is_training) if store_output_op: self.output_op = output return output
def build(self, _input, net, store_output_op=False): assert (isinstance(net, BasicModel)) output = _input if not self.ready: return output with tf.variable_scope(self._id): self._scope = tf.get_variable_scope().name param_initializer = self.param_initializer if self.pre_activation: # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # convolutional output = BasicModel.conv2d(output, self.filter_num, self.kernel_size, self.strides, param_initializer=param_initializer) else: # convolutional output = BasicModel.conv2d(output, self.filter_num, self.kernel_size, self.strides, param_initializer=param_initializer) # batch normalization if self.use_bn: output = BasicModel.batch_norm( output, net.is_training, net.net_config.bn_epsilon, net.net_config.bn_decay, param_initializer=param_initializer) # activation output = BasicModel.activation(output, self.activation) # dropout output = BasicModel.dropout(output, self.keep_prob, net.is_training) if store_output_op: self.output_op = output return output
def build_forward(self, encoder_output, encoder_state, is_training, decision_trajectory): self._define_input() self.decision, self.probs, self.selected_prob, self.q_values, self.selected_q, self.values = [], [], [], [], [], [] batch_size = array_ops.shape(encoder_output)[0] if self.attention_config is None: cell = self.build_decoder_cell(encoder_state) cell_state = encoder_state cell_input = tf.zeros(shape=[batch_size], dtype=tf.int32) with tf.variable_scope('deeper_actor'): for _i in range(self.decision_num): cell_input_embed = embedding(cell_input, 1 if _i == 0 else self.out_dims[_i - 1], self.embedding_dim, name='deeper_actor_embedding_%d' % _i) with tf.variable_scope('rnn', reuse=(_i > 0)): cell_output, cell_state = cell(cell_input_embed, cell_state) with tf.variable_scope('classifier_%d' % _i): logits_i = BasicModel.fc_layer(cell_output, self.out_dims[_i], use_bias=True) # [batch_size, out_dim_i] with tf.variable_scope('q_value_%d' % _i): qv = BasicModel.fc_layer(cell_output, self.out_dims[_i], use_bias=True) # [batch_size, out_dim_i] act_i = 'softmax' probs_i = BasicModel.activation(logits_i, activation=act_i) # [batch_size, out_dim_i] if _i == 1: # determine the layer index for deeper actor # require mask one_hot_block_decision = tf.one_hot(cell_input, depth=self.out_dims[0], dtype=tf.int32) max_layer_num = tf.multiply(self.block_layer_num, one_hot_block_decision) max_layer_num = tf.reduce_max(max_layer_num, axis=1) # [batch_size] layer_mask = tf.sequence_mask(max_layer_num, self.out_dims[1], dtype=tf.float32) probs_i = tf.multiply(probs_i, layer_mask) # rescale the sum to 1 probs_i = tf.divide(probs_i, tf.reduce_sum(probs_i, axis=1, keep_dims=True)) decision_i = tf.multinomial(tf.log(probs_i), 1) # [batch_size, 1] decision_i = tf.cast(decision_i, tf.int32) decision_i = tf.reshape(decision_i, shape=[-1]) # [batch_size] cell_input = tf.cond( is_training, lambda: decision_trajectory[:, _i], lambda: decision_i, ) self.q_values.append(qv) self.decision.append(decision_i) self.probs.append(probs_i) self.values.append(tf.reduce_sum(tf.multiply(qv, probs_i), axis=-1)) sq = tf.reduce_sum(tf.one_hot(decision_i, self.out_dims[_i]) * qv, axis=-1) self.selected_q.append(sq) sp = tf.reduce_sum(tf.one_hot(decision_i, self.out_dims[_i]) * probs_i, axis=-1) self.selected_prob.append(sp) self.decision = tf.stack(self.decision, axis=1) # [batch_size, decision_num] self.values = tf.stack(self.values, axis=1) # [batch_size, decision_num] self.selected_q = tf.stack(self.selected_q, axis=1) self.selected_prob = tf.stack(self.selected_prob, axis=1) else: raise NotImplementedError