def conv_net(inputs): ''' Build a CNN. Parameters ---------- inputs : input data Returns ------- net : a CNN architecture ''' # using the scope to avoid mentioning the parameters repeatedly with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn = leaky_relu(0.005), weights_initializer = tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer = slim.l2_regularizer(0.0005)): net = slim.conv2d(inputs, 512, (3, inputs.shape[2]), 1, padding = 'valid', scope = 'conv_1') # (3, dimension_count) net = slim.max_pool2d(net, (4, 1), 4, padding = 'valid', scope = 'pool_2') net = slim.conv2d(net, 512, (5, 1), 1, scope = 'conv_3') net = slim.max_pool2d(net, (4, 1), 4, padding = 'valid', scope = 'pool_4') net = slim.flatten(net, scope = 'flatten_5') net = slim.fully_connected(net, 2, scope = 'fc_6', activation_fn = tf.nn.softmax) return net
def predict(self, preprocessed_inputs, true_image_shapes): """Prediction tensors from inputs tensor. Args: preprocessed_inputs: a [batch, 28, 28, channels] float32 tensor. true_image_shapes: int32 tensor of shape [batch, 3] where each row is of the form [height, width, channels] indicating the shapes of true images in the resized images, as resized images can be padded with zeros. Returns: prediction_dict: a dictionary holding prediction tensors to be passed to the Loss or Postprocess functions. """ flattened_inputs = slim.flatten(preprocessed_inputs) class_prediction = slim.fully_connected(flattened_inputs, self._num_classes) box_prediction = slim.fully_connected(flattened_inputs, 4) return { 'class_predictions_with_background': tf.reshape(class_prediction, [-1, 1, self._num_classes]), 'box_encodings': tf.reshape(box_prediction, [-1, 1, 4]) }
def define_vggish_slim(training=False): """Defines the VGGish TensorFlow model. All ops are created in the current default graph, under the scope 'vggish/'. The input is a placeholder named 'vggish/input_features' of type float32 and shape [batch_size, num_frames, num_bands] where batch_size is variable and num_frames and num_bands are constants, and [num_frames, num_bands] represents a log-mel-scale spectrogram patch covering num_bands frequency bands and num_frames time frames (where each frame step is usually 10ms). This is produced by computing the stabilized log(mel-spectrogram + params.LOG_OFFSET). The output is an op named 'vggish/embedding' which produces the activations of a 128-D embedding layer, which is usually the penultimate layer when used as part of a full model with a final classifier layer. Args: training: If true, all parameters are marked trainable. Returns: The op 'vggish/embeddings'. """ # Defaults: # - All weights are initialized to N(0, INIT_STDDEV). # - All biases are initialized to 0. # - All activations are ReLU. # - All convolutions are 3x3 with stride 1 and SAME padding. # - All max-pools are 2x2 with stride 2 and SAME padding. with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer( stddev=params.INIT_STDDEV), biases_initializer=tf.zeros_initializer(), activation_fn=tf.nn.relu, trainable=training), \ slim.arg_scope([slim.conv2d], kernel_size=[3, 3], stride=1, padding='SAME'), \ slim.arg_scope([slim.max_pool2d], kernel_size=[2, 2], stride=2, padding='SAME'), \ tf.variable_scope('vggish'): # Input: a batch of 2-D log-mel-spectrogram patches. features = tf.placeholder(tf.float32, shape=(None, params.NUM_FRAMES, params.NUM_BANDS), name='input_features') # Reshape to 4-D so that we can convolve a batch with conv2d(). net = tf.reshape(features, [-1, params.NUM_FRAMES, params.NUM_BANDS, 1]) # The VGG stack of alternating convolutions and max-pools. net = slim.conv2d(net, 64, scope='conv1') net = slim.max_pool2d(net, scope='pool1') net = slim.conv2d(net, 128, scope='conv2') net = slim.max_pool2d(net, scope='pool2') net = slim.repeat(net, 2, slim.conv2d, 256, scope='conv3') net = slim.max_pool2d(net, scope='pool3') net = slim.repeat(net, 2, slim.conv2d, 512, scope='conv4') net = slim.max_pool2d(net, scope='pool4') # Flatten before entering fully-connected layers net = slim.flatten(net) net = slim.repeat(net, 2, slim.fully_connected, 4096, scope='fc1') # The embedding layer. net = slim.fully_connected(net, params.EMBEDDING_SIZE, scope='fc2') return tf.identity(net, name='embedding')
def model( inputs, is_training = True, dropout_keep_prob = 0.8, reuse = None, scope = 'InceptionV4', bottleneck_dim = 512, ): # inputs = tf.image.grayscale_to_rgb(inputs) with tf.variable_scope( scope, 'InceptionV4', [inputs], reuse = reuse ) as scope: with slim.arg_scope( [slim.batch_norm, slim.dropout], is_training = is_training ): net, end_points = inception_v4_base(inputs, scope = scope) print(net.shape) with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride = 1, padding = 'SAME', ): with tf.variable_scope('Logits'): # 8 x 8 x 1536 kernel_size = net.get_shape()[1:3] print(kernel_size) if kernel_size.is_fully_defined(): net = slim.avg_pool2d( net, kernel_size, padding = 'VALID', scope = 'AvgPool_1a', ) else: net = tf.reduce_mean( input_tensor = net, axis = [1, 2], keepdims = True, name = 'global_pool', ) end_points['global_pool'] = net # 1 x 1 x 1536 net = slim.dropout( net, dropout_keep_prob, scope = 'Dropout_1b' ) net = slim.flatten(net, scope = 'PreLogitsFlatten') end_points['PreLogitsFlatten'] = net bottleneck = slim.fully_connected( net, bottleneck_dim, scope = 'bottleneck' ) logits = slim.fully_connected( bottleneck, 2, activation_fn = None, scope = 'Logits_vad', ) return logits
def create_net( SPEC_HEIGHT, HWW_X, LEARN_LOG, NUM_FILTERS, WIGGLE_ROOM, CONV_FILTER_WIDTH, NUM_DENSE_UNITS, DO_BATCH_NORM, ): channels = 4 net = collections.OrderedDict() net["input"] = tf.compat.v1.placeholder( tf.float32, (None, SPEC_HEIGHT, HWW_X * 2, channels), name="input" ) net["conv1_1"] = slim.conv2d( net["input"], NUM_FILTERS, (SPEC_HEIGHT - WIGGLE_ROOM, CONV_FILTER_WIDTH), padding="valid", activation_fn=None, biases_initializer=None, ) net["conv1_1"] = tf.nn.leaky_relu(net["conv1_1"], alpha=1 / 3) net["conv1_2"] = slim.conv2d( net["conv1_1"], NUM_FILTERS, (1, 3), padding="valid", activation_fn=None, biases_initializer=None, ) net["conv1_2"] = tf.nn.leaky_relu(net["conv1_2"], alpha=1 / 3) W = net["conv1_2"].shape[2] net["pool2"] = slim.max_pool2d(net["conv1_2"], kernel_size=(1, W), stride=(1, 1)) net["pool2"] = tf.transpose(net["pool2"], (0, 3, 2, 1)) net["pool2_flat"] = slim.flatten(net["pool2"]) net["fc6"] = slim.fully_connected( net["pool2_flat"], NUM_DENSE_UNITS, activation_fn=None, biases_initializer=None ) net["fc6"] = tf.nn.leaky_relu(net["fc6"], alpha=1 / 3) net["fc7"] = slim.fully_connected( net["fc6"], NUM_DENSE_UNITS, activation_fn=None, biases_initializer=None ) net["fc7"] = tf.nn.leaky_relu(net["fc7"], alpha=1 / 3) net["fc8"] = slim.fully_connected(net["fc7"], 2, activation_fn=None) # net['fc8'] = tf.nn.leaky_relu(net['fc8'], alpha=1/3) net["output"] = tf.nn.softmax(net["fc8"]) return net
def model( inputs, is_training=True, dropout_keep_prob=0.8, reuse=None, scope='InceptionV4', create_aux_logits=True, num_classes=2, ): with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_v4_base(inputs, scope=scope) print(net.shape) with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME', ): # Final pooling and prediction # TODO(sguada,arnoegw): Consider adding a parameter global_pool which # can be set to False to disable pooling here (as in resnet_*()). with tf.variable_scope('Logits'): # 8 x 8 x 1536 kernel_size = net.get_shape()[1:3] print(kernel_size) if kernel_size.is_fully_defined(): net = slim.avg_pool2d( net, kernel_size, padding='VALID', scope='AvgPool_1a', ) else: net = tf.reduce_mean( input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool', ) end_points['global_pool'] = net # 1 x 1 x 1536 net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b') net = slim.flatten(net, scope='PreLogitsFlatten') end_points['PreLogitsFlatten'] = net # 1536 logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='Logits') return logits
def create_network(self,mH=128): # Placeholder : Inserts a placeholder for a tensor that will be always fed. self.scalarInput = tf.placeholder(shape=[None,49], dtype=tf.float32) self.imageIn = tf.reshape(self.scalarInput, shape=[-1,7,7,1]) #Same: input and output dimension would be the same but for valid the output dimension will be less. self.conv1 = slim.conv2d(inputs=self.imageIn, num_outputs=4, kernel_size=[3,3], stride=[1,1], activation_fn=tf.nn.tanh, padding='SAME', biases_initializer=None) self.conv2 = slim.conv2d(inputs=self.conv1, num_outputs=16, kernel_size= [3,3], stride=[2,2], activation_fn=tf.nn.tanh,padding='SAME', biases_initializer=None) self.conv3 = slim.conv2d(inputs=self.conv2, num_outputs=32, kernel_size= [3,3], stride=[2,2], activation_fn=tf.nn.tanh,padding='SAME', biases_initializer=None) self.conv4 = slim.conv2d(inputs=self.conv3, num_outputs=mH, kernel_size= [2,2], stride=[1,1], activation_fn=tf.nn.tanh,padding='VALID', biases_initializer=None) # duel DQN, outputs concludes advantage and value streams #converts a tensor 2-Dim to a vector self.layer4 = slim.flatten(self.conv4) # Xavier initializes a weight arbitrarily. xavier_init = tf.contrib.layers.xavier_initializer() self.W1 = tf.Variable(xavier_init([mH,mH])) self.b1 = tf.Variable(tf.zeros([mH])) self.layer5 = tf.nn.relu(tf.matmul(self.layer4,self.W1)+self.b1) self.W2 = tf.Variable(xavier_init([mH,mH])) self.b2 = tf.Variable(tf.zeros([mH])) self.layer6 = tf.nn.relu(tf.matmul(self.layer5,self.W2)+self.b2) self.streamA, self.streamV = tf.split(self.layer6,2,1) # tf.split(data, number, axis) # 4 actions self.AW = tf.Variable(xavier_init([mH//2, 28])) # AW [h_size//2, 7*4] self.Ab = tf.Variable(tf.zeros([28])) self.VW = tf.Variable(xavier_init([mH//2, 7])) # value V(s) self.Vb = tf.Variable(tf.zeros([7])) self.Advantage = tf.matmul(self.streamA, self.AW)+self.Ab self.Value = tf.matmul(self.streamV, self.VW)+self.Vb # Q(s,a) self.Advantage = tf.reshape(self.Advantage, [-1, 7, 4]) # Action (non-binary) self.Value = tf.reshape(self.Value, [-1,7,1]) # combine advantage and value network together self.Qout = self.Value+tf.subtract(self.Advantage, tf.reduce_mean(self.Advantage, axis=2, keep_dims=True)) # 1*7*4 --> Q(s,a) self.predict = tf.argmax(self.Qout, 2) # the predicted actions for each component 1*7*1 --> actions # self.predict_a = tf.nn.softmax(self.Qout,2) self.targetQ = tf.placeholder(shape=[None,7], dtype = tf.float32) self.actions = tf.placeholder(shape=[None,7], dtype = tf.int32) self.actions_onehot = tf.one_hot(self.actions, 4, dtype = tf.float32) self.Q = tf.reduce_mean(tf.multiply(self.Qout, self.actions_onehot),axis=2) self.td_error = tf.reduce_mean(tf.square(self.targetQ-self.Q)) self.loss = tf.reduce_mean(self.td_error) self.trainer = tf.train.AdamOptimizer(learning_rate = 0.001) # training rules self.updateModel = self.trainer.minimize(self.loss) # training target
def create_net(SPEC_HEIGHT, HWW_X, LEARN_LOG, NUM_FILTERS, WIGGLE_ROOM, CONV_FILTER_WIDTH, NUM_DENSE_UNITS, DO_BATCH_NORM): tf.compat.v1.disable_eager_execution() channels = 4 net = collections.OrderedDict() net['input'] = tf.placeholder(tf.float32, (None, SPEC_HEIGHT, HWW_X * 2, channels), name='input') net['conv1_1'] = slim.conv2d( net['input'], NUM_FILTERS, (SPEC_HEIGHT - WIGGLE_ROOM, CONV_FILTER_WIDTH), padding='valid', activation_fn=None, biases_initializer=None) net['conv1_1'] = tf.nn.leaky_relu(net['conv1_1'], alpha=1 / 3) net['conv1_2'] = slim.conv2d(net['conv1_1'], NUM_FILTERS, (1, 3), padding='valid', activation_fn=None, biases_initializer=None) net['conv1_2'] = tf.nn.leaky_relu(net['conv1_2'], alpha=1 / 3) W = net['conv1_2'].shape[2] net['pool2'] = slim.max_pool2d(net['conv1_2'], kernel_size=(1, W), stride=(1, 1)) net['pool2'] = tf.transpose(net['pool2'], (0, 3, 2, 1)) net['pool2_flat'] = slim.flatten(net['pool2']) net['fc6'] = slim.fully_connected(net['pool2_flat'], NUM_DENSE_UNITS, activation_fn=None, biases_initializer=None) net['fc6'] = tf.nn.leaky_relu(net['fc6'], alpha=1 / 3) net['fc7'] = slim.fully_connected(net['fc6'], NUM_DENSE_UNITS, activation_fn=None, biases_initializer=None) net['fc7'] = tf.nn.leaky_relu(net['fc7'], alpha=1 / 3) net['fc8'] = slim.fully_connected(net['fc7'], 2, activation_fn=None) # net['fc8'] = tf.nn.leaky_relu(net['fc8'], alpha=1/3) net['output'] = tf.nn.softmax(net['fc8']) return net
def discriminator(images, num_classes, bottleneck_size=512, keep_prob=1.0, phase_train=True, weight_decay=0.0, reuse=None, scope='Discriminator'): print("discriminator input : ",images.shape) with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_regularizer=tf.keras.regularizers.l2(0.5 * (weight_decay)), activation_fn=leaky_relu, normalizer_fn=None, normalizer_params=batch_norm_params): with tf.compat.v1.variable_scope(scope, [images], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=phase_train): print('{} input shape:'.format(scope), [dim.value for dim in images.shape]) net =conv(images, 32, kernel_size=4, stride=2, scope='conv1') print('module_1 shape:', [dim.value for dim in net.shape]) net = conv(net, 64, kernel_size=4, stride=2, scope='conv2') print('module_2 shape:', [dim.value for dim in net.shape]) net = conv(net, 128, kernel_size=4, stride=2, scope='conv3') print('module_3 shape:', [dim.value for dim in net.shape]) net = conv(net, 256, kernel_size=4, stride=2, scope='conv4') print('module_4 shape:', [dim.value for dim in net.shape]) net = conv(net, 512, kernel_size=4, stride=2, scope='conv5') print('module_5 shape:', [dim.value for dim in net.shape]) # Patch Discrminator patch5_logits = slim.conv2d(net, 3, 1, activation_fn=None, normalizer_fn=None, scope='patch5_logits') patch_logits = tf.reshape(patch5_logits, [-1,3]) # Global Discriminator net = slim.flatten(net) prelogits = slim.fully_connected(net, bottleneck_size, scope='Bottleneck', weights_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="uniform"), activation_fn=None, normalizer_fn=None) prelogits = tf.nn.l2_normalize(prelogits, axis=1) print('latent shape:', [dim.value for dim in prelogits.shape]) logits = slim.fully_connected(prelogits, num_classes, scope='Logits', activation_fn=None, normalizer_fn=None) return patch_logits, logits
def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.compat.v1.variable_scope('encoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=tf.compat.v1. truncated_normal_initializer(stddev=0.1), weights_regularizer=tf.keras.regularizers.l2( 0.5 * (weight_decay)), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = slim.conv2d(images, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1') net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2') net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3') net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4') net = slim.flatten(net) fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2
def predict(self, features, num_predictions_per_location=1): """Predicts boxes. Args: features: A float tensor of shape [batch_size, height, width, channels] containing features for a batch of images. num_predictions_per_location: Int containing number of predictions per location. Returns: box_encodings: A float tensor of shape [batch_size, 1, num_classes, code_size] representing the location of the objects. Raises: ValueError: If num_predictions_per_location is not 1. """ if num_predictions_per_location != 1: raise ValueError( 'Only num_predictions_per_location=1 is supported') spatial_averaged_roi_pooled_features = tf.reduce_mean(features, [1, 2], keep_dims=True, name='AvgPool') flattened_roi_pooled_features = slim.flatten( spatial_averaged_roi_pooled_features) if self._use_dropout: flattened_roi_pooled_features = slim.dropout( flattened_roi_pooled_features, keep_prob=self._dropout_keep_prob, is_training=self._is_training) number_of_boxes = 1 if not self._share_box_across_classes: number_of_boxes = self._num_classes with slim.arg_scope(self._fc_hyperparams_fn()): box_encodings = slim.fully_connected(flattened_roi_pooled_features, number_of_boxes * self._box_code_size, reuse=tf.AUTO_REUSE, activation_fn=None, scope='BoxEncodingPredictor') box_encodings = tf.reshape( box_encodings, [-1, 1, number_of_boxes, self._box_code_size]) return box_encodings
def _network_template(self, state): """Builds the convolutional network used to compute the agent's Q-values. Args: state: tf.Placeholder, contains the agent's current state. Returns: net: _network_type object containing the tensors output by the network. """ net = tf.cast(state, tf.float32) net = tf.math.truediv(net, 255.) net = tf_slim.conv2d(net, 32, [8, 8], stride=4, trainable=False) net = tf_slim.conv2d(net, 64, [4, 4], stride=2, trainable=False) net = tf_slim.conv2d(net, 64, [3, 3], stride=1, trainable=False) net = tf_slim.flatten(net) linear_features = tf_slim.fully_connected(net, 512, trainable=True) q_values = tf_slim.fully_connected( linear_features, self.num_actions, activation_fn=None) return self._get_network_type()(q_values), linear_features
def predict(self, features, num_predictions_per_location=1): """Predicts boxes and class scores. Args: features: A float tensor of shape [batch_size, height, width, channels] containing features for a batch of images. num_predictions_per_location: Int containing number of predictions per location. Returns: class_predictions_with_background: A float tensor of shape [batch_size, 1, num_class_slots] representing the class predictions for the proposals. Raises: ValueError: If num_predictions_per_location is not 1. """ if num_predictions_per_location != 1: raise ValueError( 'Only num_predictions_per_location=1 is supported') spatial_averaged_roi_pooled_features = tf.reduce_mean(features, [1, 2], keep_dims=True, name='AvgPool') flattened_roi_pooled_features = slim.flatten( spatial_averaged_roi_pooled_features) if self._use_dropout: flattened_roi_pooled_features = slim.dropout( flattened_roi_pooled_features, keep_prob=self._dropout_keep_prob, is_training=self._is_training) with slim.arg_scope(self._fc_hyperparams_fn()): class_predictions_with_background = slim.fully_connected( flattened_roi_pooled_features, self._num_class_slots, reuse=tf.AUTO_REUSE, activation_fn=None, scope=self._scope) class_predictions_with_background = tf.reshape( class_predictions_with_background, [-1, 1, self._num_class_slots]) return class_predictions_with_background
def build_predictions(self, net, rois, is_training, initializer, initializer_bbox): # Crop image ROIs pool5 = self._crop_pool_layer(net, rois, "pool5") pool5_flat = slim.flatten(pool5, scope='flatten') # Fully connected layers fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') # Scores and predictions cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_prediction = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') return cls_score, cls_prob, bbox_prediction
def create_network(self, input, trainable): if trainable: wr = slim.l2_regularizer(self.regularization) else: wr = None # the input is stack of black and white frames. # put the stack in the place of channel (last in tf) input_t = tf.transpose(input, [0, 2, 3, 1]) net = slim.conv2d(input_t, 8, (7, 7), data_format="NHWC", activation_fn=tf.nn.relu, stride=3, weights_regularizer=wr, trainable=trainable) net = slim.max_pool2d(net, 2, 2) net = slim.conv2d(net, 16, (3, 3), data_format="NHWC", activation_fn=tf.nn.relu, weights_regularizer=wr, trainable=trainable) net = slim.max_pool2d(net, 2, 2) net = slim.flatten(net) net = slim.fully_connected(net, 256, activation_fn=tf.nn.relu, weights_regularizer=wr, trainable=trainable) q_state_action_values = slim.fully_connected(net, self.dim_actions, activation_fn=None, weights_regularizer=wr, trainable=trainable) return q_state_action_values
def _build_aux_head(net, end_points, num_classes, hparams, scope): """Auxiliary head used for all models across all datasets.""" with tf.compat.v1.variable_scope(scope): aux_logits = tf.identity(net) with tf.compat.v1.variable_scope('aux_logits'): aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, padding='VALID') aux_logits = slim.conv2d(aux_logits, 128, [1, 1], scope='proj') aux_logits = slim.batch_norm(aux_logits, scope='aux_bn0') aux_logits = tf.nn.relu(aux_logits) # Shape of feature map before the final layer. shape = aux_logits.shape if hparams.data_format == 'NHWC': shape = shape[1:3] else: shape = shape[2:4] aux_logits = slim.conv2d(aux_logits, 768, shape, padding='VALID') aux_logits = slim.batch_norm(aux_logits, scope='aux_bn1') aux_logits = tf.nn.relu(aux_logits) aux_logits = slim.flatten(aux_logits) aux_logits = slim.fully_connected(aux_logits, num_classes) end_points['AuxLogits'] = aux_logits
def inception_resnet_v2(inputs, num_classes=1001, is_training=True, dropout_keep_prob=0.8, reuse=None, scope='InceptionResnetV2', create_aux_logits=True, activation_fn=tf.nn.relu): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. Dimension batch_size may be undefined. If create_aux_logits is false, also height and width may be undefined. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer (before dropout) are returned instead. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. create_aux_logits: Whether to include the auxilliary logits. activation_fn: Activation function for conv2d. Returns: net: the output of the logits layer (if num_classes is a non-zero integer), or the non-dropped-out input to the logits layer (if num_classes is 0 or None). end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_resnet_v2_base( inputs, scope=scope, activation_fn=activation_fn) if create_aux_logits and num_classes: with tf.variable_scope('AuxLogits'): aux = end_points['PreAuxLogits'] aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID', scope='Conv2d_1a_3x3') aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1') aux = slim.conv2d(aux, 768, aux.get_shape()[1:3], padding='VALID', scope='Conv2d_2a_5x5') aux = slim.flatten(aux) aux = slim.fully_connected(aux, num_classes, activation_fn=None, scope='Logits') end_points['AuxLogits'] = aux with tf.variable_scope('Logits'): # TODO(sguada,arnoegw): Consider adding a parameter global_pool which # can be set to False to disable pooling here (as in resnet_*()). kernel_size = net.get_shape()[1:3] if kernel_size.is_fully_defined(): net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a_8x8') else: net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') end_points['global_pool'] = net if not num_classes: return net, end_points net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='Logits') end_points['Logits'] = logits end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') return logits, end_points
def create_net(self): opts = self.opts["net"] channels = opts["channels"] net = collections.OrderedDict() regularizer = slim.l2_regularizer(0.0005) net["input"] = tf.compat.v1.placeholder( tf.float32, (None, opts["spec_height"], opts["hww_x"] * 2, channels), name="input", ) net["conv1_1"] = slim.conv2d( net["input"], opts["num_filters"], (opts["spec_height"] - opts["wiggle_room"], opts["conv_filter_width"]), padding="valid", activation_fn=None, biases_initializer=None, weights_regularizer=regularizer, ) net["conv1_1"] = tf.nn.leaky_relu(net["conv1_1"], alpha=1 / 3) net["conv1_2"] = slim.conv2d( net["conv1_1"], opts["num_filters"], (1, 3), padding="valid", activation_fn=None, biases_initializer=None, weights_regularizer=regularizer, ) net["conv1_2"] = tf.nn.leaky_relu(net["conv1_2"], alpha=1 / 3) W = net["conv1_2"].shape[2] net["pool2"] = slim.max_pool2d( net["conv1_2"], kernel_size=(1, W), stride=(1, 1), ) net["pool2"] = tf.transpose(net["pool2"], (0, 3, 2, 1)) net["pool2_flat"] = slim.flatten(net["pool2"]) net["fc6"] = slim.fully_connected( net["pool2_flat"], opts["num_dense_units"], activation_fn=None, biases_initializer=None, weights_regularizer=regularizer, ) net["fc6"] = tf.nn.dropout(net["fc6"], 0.5) net["fc6"] = tf.nn.leaky_relu(net["fc6"], alpha=1 / 3) net["fc7"] = slim.fully_connected( net["fc6"], opts["num_dense_units"], activation_fn=None, biases_initializer=None, weights_regularizer=regularizer, ) net["fc7"] = tf.nn.dropout(net["fc7"], 0.5) net["fc7"] = tf.nn.leaky_relu(net["fc7"], alpha=1 / 3) net["fc8"] = slim.fully_connected(net["fc7"], 2, activation_fn=None) # net['fc8'] = tf.nn.leaky_relu(net['fc8'], alpha=1/3) net["output"] = tf.nn.softmax(net["fc8"]) return net
def generator(z, progress, num_filters_fn, resolution_schedule, num_blocks=None, kernel_size=3, colors=3, to_rgb_activation=None, simple_arch=False, scope='progressive_gan_generator', reuse=None): """Generator network for the progressive GAN model. Args: z: A `Tensor` of latent vector. The first dimension must be batch size. progress: A scalar float `Tensor` of training progress. num_filters_fn: A function that maps `block_id` to # of filters for the block. resolution_schedule: An object of `ResolutionSchedule`. num_blocks: An integer of number of blocks. None means maximum number of blocks, i.e. `resolution.schedule.num_resolutions`. Defaults to None. kernel_size: An integer of convolution kernel size. colors: Number of output color channels. Defaults to 3. to_rgb_activation: Activation function applied when output rgb. simple_arch: Architecture variants for lower memory usage and faster speed scope: A string or variable scope. reuse: Whether to reuse `scope`. Defaults to None which means to inherit the reuse option of the parent scope. Returns: A `Tensor` of model output and a dictionary of model end points. """ if num_blocks is None: num_blocks = resolution_schedule.num_resolutions start_h, start_w = resolution_schedule.start_resolutions final_h, final_w = resolution_schedule.final_resolutions def _conv2d(scope, x, kernel_size, filters, padding='SAME'): return layers.custom_conv2d( x=x, filters=filters, kernel_size=kernel_size, padding=padding, activation=lambda x: layers.pixel_norm(tf.nn.leaky_relu(x)), he_initializer_slope=0.0, scope=scope) def _to_rgb(x): return layers.custom_conv2d( x=x, filters=colors, kernel_size=1, padding='SAME', activation=to_rgb_activation, scope='to_rgb') he_init = tf_slim.variance_scaling_initializer() end_points = {} with tf.variable_scope(scope, reuse=reuse): with tf.name_scope('input'): x = tf_slim.flatten(z) end_points['latent_vector'] = x with tf.variable_scope(block_name(1)): if simple_arch: x_shape = tf.shape(x) x = tf.layers.dense(x, start_h*start_w*num_filters_fn(1), kernel_initializer=he_init) x = tf.nn.relu(x) x = tf.reshape(x, [x_shape[0], start_h, start_w, num_filters_fn(1)]) else: x = tf.expand_dims(tf.expand_dims(x, 1), 1) x = layers.pixel_norm(x) # Pad the 1 x 1 image to 2 * (start_h - 1) x 2 * (start_w - 1) # with zeros for the next conv. x = tf.pad(x, [[0] * 2, [start_h - 1] * 2, [start_w - 1] * 2, [0] * 2]) # The output is start_h x start_w x num_filters_fn(1). x = _conv2d('conv0', x, (start_h, start_w), num_filters_fn(1), 'VALID') x = _conv2d('conv1', x, kernel_size, num_filters_fn(1)) lods = [x] if resolution_schedule.scale_mode == 'H': strides = (resolution_schedule.scale_base, 1) else: strides = (resolution_schedule.scale_base, resolution_schedule.scale_base) for block_id in range(2, num_blocks + 1): with tf.variable_scope(block_name(block_id)): if simple_arch: x = tf.layers.conv2d_transpose( x, num_filters_fn(block_id), kernel_size=kernel_size, strides=strides, padding='SAME', kernel_initializer=he_init) x = tf.nn.relu(x) else: x = resolution_schedule.upscale(x, resolution_schedule.scale_base) x = _conv2d('conv0', x, kernel_size, num_filters_fn(block_id)) x = _conv2d('conv1', x, kernel_size, num_filters_fn(block_id)) lods.append(x) outputs = [] for block_id in range(1, num_blocks + 1): with tf.variable_scope(block_name(block_id)): if simple_arch: lod = lods[block_id - 1] lod = tf.layers.conv2d( lod, colors, kernel_size=1, padding='SAME', name='to_rgb', kernel_initializer=he_init) lod = to_rgb_activation(lod) else: lod = _to_rgb(lods[block_id - 1]) scale = resolution_schedule.scale_factor(block_id) lod = resolution_schedule.upscale(lod, scale) end_points['upscaled_rgb_{}'.format(block_id)] = lod # alpha_i is used to replace lod_select. Note sum(alpha_i) is # garanteed to be 1. alpha = _generator_alpha(block_id, progress) end_points['alpha_{}'.format(block_id)] = alpha outputs.append(lod * alpha) predictions = tf.add_n(outputs) batch_size = int(z.shape[0]) predictions.set_shape([batch_size, final_h, final_w, colors]) end_points['predictions'] = predictions return predictions, end_points
def lenet(images, num_classes=10, is_training=False, dropout_keep_prob=0.5, prediction_fn=slim.softmax, scope='LeNet'): """Creates a variant of the LeNet model. Note that since the output is a set of 'logits', the values fall in the interval of (-infinity, infinity). Consequently, to convert the outputs to a probability distribution over the characters, one will need to convert them using the softmax function: logits = lenet.lenet(images, is_training=False) probabilities = tf.nn.softmax(logits) predictions = tf.argmax(logits, 1) Args: images: A batch of `Tensors` of size [batch_size, height, width, channels]. num_classes: the number of classes in the dataset. If 0 or None, the logits layer is omitted and the input features to the logits layer are returned instead. is_training: specifies whether or not we're currently training the model. This variable will determine the behaviour of the dropout layer. dropout_keep_prob: the percentage of activation values that are retained. prediction_fn: a function to get predictions out of logits. scope: Optional variable_scope. Returns: net: a 2D Tensor with the logits (pre-softmax activations) if num_classes is a non-zero integer, or the inon-dropped-out nput to the logits layer if num_classes is 0 or None. end_points: a dictionary from components of the network to the corresponding activation. """ end_points = {} with tf.variable_scope(scope, 'LeNet', [images]): net = end_points['conv1'] = slim.conv2d(images, 32, [5, 5], scope='conv1') net = end_points['pool1'] = slim.max_pool2d(net, [2, 2], 2, scope='pool1') net = end_points['conv2'] = slim.conv2d(net, 64, [5, 5], scope='conv2') net = end_points['pool2'] = slim.max_pool2d(net, [2, 2], 2, scope='pool2') net = slim.flatten(net) end_points['Flatten'] = net net = end_points['fc3'] = slim.fully_connected(net, 1024, scope='fc3') if not num_classes: return net, end_points net = end_points['dropout3'] = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout3') logits = end_points['Logits'] = slim.fully_connected( net, num_classes, activation_fn=None, scope='fc4') end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def _run(): """Forward pass through the network.""" with slim.arg_scope([slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(self._l2_regularization), activation_fn=tf.nn.relu, trainable=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d], stride=1, padding='SAME'): with slim.arg_scope( [slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm): _, grasp_image = images net = slim.conv2d( grasp_image, 64, [6, 6], stride=2, scope='conv1_1', activation_fn=None, normalizer_fn=None, normalizer_params=None) # Old checkpoints (such as those used for tests) did not have # scaling on the separate batch norm operations (those not # associated with a conv operation), so only setting the scale # parameter in arg_scope would break the tests. We set scale= # False for these separate batch norm operations temporarily. # However, future users are encouraged to not set scale=False so # that barch_norm parameters are consistent through the whole # network. net = tf.nn.relu(slim.batch_norm(net, scale=False)) net = slim.max_pool2d(net, [3, 3], stride=3, scope='pool1') self.activation_layers.append(net) for l in range(2, 2 + self.num_convs[0]): net = slim.conv2d(net, 64, [5, 5], scope='conv%d' % l) self.activation_layers.append(net) net = slim.max_pool2d(net, [3, 3], stride=3, scope='pool2') end_points['pool2'] = net self.activation_layers.append(net) logging.debug('pool2') logging.debug(net.get_shape()) if grasp_param_names is None: grasp_param_blocks = [grasp_params] grasp_param_block_names = ['fcgrasp'] else: grasp_param_blocks = [] grasp_param_block_names = [] # Note: Creating variables must happen in a deterministic # order, otherwise some workers will look for variables on the # wrong parameter servers, so we sort the grasp_param_names # here. for block_name in sorted(grasp_param_names): offset, size = grasp_param_names[block_name] grasp_param_blocks += [ tf.slice(grasp_params, [0, offset], [-1, size]) ] grasp_param_block_names += [block_name] grasp_param_tensors = [] for block, name in zip(grasp_param_blocks, grasp_param_block_names): grasp_param_tensors += [ slim.fully_connected( block, 256, scope=name, activation_fn=None, normalizer_fn=None, normalizer_params=None) ] fcgrasp = tf.add_n(grasp_param_tensors) # Old checkpoints (such as those used for tests) did not have # scaling on the separate batch norm operations (those not # associated with a conv operation), so only setting the scale # parameter in arg_scope would break the tests. We set scale= # False for these separate batch norm operations temporarily. # However, future users are encouraged to not set scale=False so # that barch_norm parameters are consistent through the whole # network. fcgrasp = tf.nn.relu(slim.batch_norm(fcgrasp, scale=False)) fcgrasp = slim.fully_connected(fcgrasp, 64, scope='fcgrasp2') context = tf.reshape(fcgrasp, [-1, 1, 1, 64]) end_points['fcgrasp'] = fcgrasp # Tile the image embedding action_batch_size times to align # with the expanded action dimension of action_batch_size. # Same image is used with all the actions in a action_batch. # net pre expansion should be [batch, *, *, *] # net post expansion should be [batch x action_batch, *, *, *] if tile_batch: net = contrib_seq2seq.tile_batch(net, self._action_batch_size) net = tf.add(net, context) logging.debug('net post add %s', net) end_points['vsum'] = net self.activation_layers.append(net) logging.debug('vsum') logging.debug(net.get_shape()) for l in range(2 + sum(self.num_convs[:1]), 2 + sum(self.num_convs[:2])): net = slim.conv2d(net, 64, [3, 3], scope='conv%d' % l) logging.debug('conv%d', l) self.activation_layers.append(net) logging.debug(net.get_shape()) net = slim.max_pool2d(net, [2, 2], stride=2, scope='pool3') logging.debug('pool3') logging.debug(net.get_shape()) self.activation_layers.append(net) for l in range(2 + sum(self.num_convs[:2]), 2 + sum(self.num_convs[:3])): net = slim.conv2d( net, 64, [3, 3], scope='conv%d' % l, padding='VALID') self.activation_layers.append(net) logging.debug('final conv') logging.debug(net.get_shape()) end_points['final_conv'] = net batch_size = tf.shape(net)[0] if goal_spatial_fn is not None: goal_spatial = goal_spatial_fn() # Tile goal to match net batch size (e.g. CEM). goal_batch_size = tf.shape(goal_spatial)[0] goal_spatial = tf.tile( goal_spatial, [batch_size//goal_batch_size, 1, 1, 1]) # Merging features in style of Fang 2017. net = tf.concat([net, goal_spatial], axis=3) net = slim.flatten(net, scope='flatten') if goal_vector_fn is not None: goal_vector = goal_vector_fn() goal_batch_size = tf.shape(goal_vector)[0] goal_vector = tf.tile( goal_vector, [batch_size//goal_batch_size, 1]) net = tf.concat([net, goal_vector], axis=1) for l in range(self.hid_layers): net = slim.fully_connected(net, 64, scope='fc%d' % l) name = 'logit' if num_classes > 1: name = 'logit_%d' % num_classes logits = slim.fully_connected( net, num_classes, activation_fn=None, scope=name, normalizer_fn=None, normalizer_params=None) end_points['logits'] = logits if softmax: predictions = tf.nn.softmax(logits) else: predictions = tf.nn.sigmoid(logits) if tile_batch: if num_classes > 1: predictions = tf.reshape( predictions, [-1, self._action_batch_size, num_classes]) else: predictions = tf.reshape(predictions, [-1, self._action_batch_size]) end_points['predictions'] = predictions return logits, end_points
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 5, block35, scale=0.17) # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 192, 192, 256, 384) end_points['Mixed_6a'] = net # 10 x Inception-Resnet-B net = slim.repeat(net, 10, block17, scale=0.10) # Reduction-B with tf.variable_scope('Mixed_7a'): net = reduction_b(net) end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 5, block8, scale=0.20) net = block8(net, activation_fn=None) with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def forward(self): temp = tf.transpose(self.inp.out, [0, 3, 1, 2]) self.out = slim.flatten(temp, scope=self.scope)
def decoder(encoded, scales, styles, texture_only=False, style_size=8, image_size=(112,112), keep_prob=1.0, phase_train=True, weight_decay=0.0, reuse=None, scope='Decoder'): with tf.compat.v1.variable_scope(scope, reuse=reuse): with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected], activation_fn=tf.nn.relu, # weights_initializer=tf.contrib.layers.xavier_initializer(), weights_initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=2.0), weights_regularizer=tf.keras.regularizers.l2(0.5 * (weight_decay))): with slim.arg_scope([slim.dropout, slim.batch_norm], is_training=phase_train): with slim.arg_scope([slim.fully_connected], normalizer_fn=layer_norm, normalizer_params=None): print('{} input shape:'.format(scope), [dim.value for dim in encoded.shape]) batch_size = tf.shape(input=encoded)[0] h, w = tuple(image_size) k = 64 with tf.compat.v1.variable_scope('StyleController'): if styles is None: styles = tf.random.normal((batch_size, style_size)) net = tf.identity(styles, name='input_style') net = slim.fully_connected(net, 128, scope='fc2') print('module fc2 shape:', [dim.value for dim in net.shape]) net = slim.fully_connected(net, 128, scope='fc3') print('module fc3 shape:', [dim.value for dim in net.shape]) gamma = slim.fully_connected(net, 4*k, activation_fn=None, normalizer_fn=None, scope='fc4') gamma = tf.reshape(gamma, [-1, 1, 1, 4*k], name='gamma') print('gamma shape:', [dim.value for dim in gamma.shape]) beta = slim.fully_connected(net, 4*k, activation_fn=None, normalizer_fn=None, scope='fc5') beta = tf.reshape(beta, [-1, 1, 1, 4*k], name='beta') print('beta shape:', [dim.value for dim in beta.shape]) with tf.compat.v1.variable_scope('Decoder'): print('-- Decoder') net = encoded adain = lambda x : gamma * instance_norm(x, center=False, scale=False) + beta with slim.arg_scope([slim.conv2d_transpose, slim.conv2d], normalizer_fn=adain, normalizer_params=None): for i in range(3): net_ = conv(net, 4*k, 3, scope='res{}_0'.format(i)) net += conv(net_, 4*k, 3, activation_fn=None, biases_initializer=None, scope='res{}_1'.format(i)) print('module res{} shape:'.format(i), [dim.value for dim in net.shape]) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected], normalizer_fn=layer_norm, normalizer_params=None): net = upscale2d(net, 2) net = conv(net, 2*k, 5, pad=2, scope='deconv1_1') print('module deconv1 shape:', [dim.value for dim in net.shape]) net = upscale2d(net, 2) net = conv(net, k, 5, pad=2, scope='deconv2_1') net = conv(net, 3, 7, pad=3, activation_fn=None, normalizer_fn=None, weights_initializer=tf.compat.v1.constant_initializer(0.0), scope='conv_image') images_rendered = tf.nn.tanh(net, name='images_rendered') print('images_rendered shape:', [dim.value for dim in images_rendered.shape]) if texture_only: return images_rendered with tf.compat.v1.variable_scope('WarpController'): print('-- WarpController') net = encoded warp_input = tf.identity(images_rendered, name='warp_input') net = slim.flatten(net) net = slim.fully_connected(net, 128, scope='fc1') print('module fc1 shape:', [dim.value for dim in net.shape]) num_ldmark = 16 # Predict the control points ldmark_mean = (np.random.normal(0,50, (num_ldmark,2)) + np.array([[0.5*h,0.5*w]])).flatten() ldmark_mean = tf.Variable(ldmark_mean.astype(np.float32), name='ldmark_mean') print('ldmark_mean shape:', [dim.value for dim in ldmark_mean.shape]) ldmark_pred = slim.fully_connected(net, num_ldmark*2, weights_initializer=tf.compat.v1.truncated_normal_initializer(stddev=1.0), normalizer_fn=None, activation_fn=None, biases_initializer=None, scope='fc_ldmark') ldmark_pred = ldmark_pred + ldmark_mean print('ldmark_pred shape:', [dim.value for dim in ldmark_pred.shape]) ldmark_pred = tf.identity(ldmark_pred, name='ldmark_pred') # Predict the displacements ldmark_diff = slim.fully_connected(net, num_ldmark*2, normalizer_fn=None, activation_fn=None, scope='fc_diff') print('ldmark_diff shape:', [dim.value for dim in ldmark_diff.shape]) ldmark_diff = tf.identity(ldmark_diff, name='ldmark_diff') ldmark_diff = tf.identity(tf.reshape(scales,[-1,1]) * ldmark_diff, name='ldmark_diff_scaled') src_pts = tf.reshape(ldmark_pred, [-1, num_ldmark ,2]) dst_pts = tf.reshape(ldmark_pred + ldmark_diff, [-1, num_ldmark, 2]) diff_norm = tf.reduce_mean(input_tensor=tf.norm(tensor=src_pts-dst_pts, axis=[1,2])) # tf.summary.scalar('diff_norm', diff_norm) # tf.summary.scalar('mark', ldmark_pred[0,0]) images_transformed, dense_flow = sparse_image_warp(warp_input, src_pts, dst_pts, regularization_weight = 1e-6, num_boundary_points=0) dense_flow = tf.identity(dense_flow, name='dense_flow') return images_transformed, images_rendered, ldmark_pred, ldmark_diff
def loss(self, net_out): """ Takes net.out and placeholders value returned in batch() func above, to build train_op and loss """ # meta m = self.meta sprob = float(m['class_scale']) sconf = float(m['object_scale']) snoob = float(m['noobject_scale']) scoor = float(m['coord_scale']) S, B, C = m['side'], m['num'], m['classes'] SS = S * S # number of grid cells print('{} loss hyper-parameters:'.format(m['model'])) print('\tside = {}'.format(m['side'])) print('\tbox = {}'.format(m['num'])) print('\tclasses = {}'.format(m['classes'])) print('\tscales = {}'.format([sprob, sconf, snoob, scoor])) size1 = [None, SS, C] size2 = [None, SS, B] # return the below placeholders _probs = tf.placeholder(tf.float32, size1) _confs = tf.placeholder(tf.float32, size2) _coord = tf.placeholder(tf.float32, size2 + [4]) # weights term for L2 loss _proid = tf.placeholder(tf.float32, size1) # material calculating IOU _areas = tf.placeholder(tf.float32, size2) _upleft = tf.placeholder(tf.float32, size2 + [2]) _botright = tf.placeholder(tf.float32, size2 + [2]) self.placeholders = { 'probs': _probs, 'confs': _confs, 'coord': _coord, 'proid': _proid, 'areas': _areas, 'upleft': _upleft, 'botright': _botright } # Extract the coordinate prediction from net.out coords = net_out[:, SS * (C + B):] coords = tf.reshape(coords, [-1, SS, B, 4]) wh = tf.pow(coords[:, :, :, 2:4], 2) * S # unit: grid cell area_pred = wh[:, :, :, 0] * wh[:, :, :, 1] # unit: grid cell^2 centers = coords[:, :, :, 0:2] # [batch, SS, B, 2] floor = centers - (wh * .5) # [batch, SS, B, 2] ceil = centers + (wh * .5) # [batch, SS, B, 2] # calculate the intersection areas intersect_upleft = tf.maximum(floor, _upleft) intersect_botright = tf.minimum(ceil, _botright) intersect_wh = intersect_botright - intersect_upleft intersect_wh = tf.maximum(intersect_wh, 0.0) intersect = tf.multiply(intersect_wh[:, :, :, 0], intersect_wh[:, :, :, 1]) # calculate the best IOU, set 0.0 confidence for worse boxes iou = tf.truediv(intersect, _areas + area_pred - intersect) best_box = tf.equal(iou, tf.reduce_max(iou, [2], True)) best_box = tf.to_float(best_box) confs = tf.multiply(best_box, _confs) # take care of the weight terms conid = snoob * (1. - confs) + sconf * confs weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3) cooid = scoor * weight_coo proid = sprob * _proid # flatten 'em all probs = slim.flatten(_probs) proid = slim.flatten(proid) confs = slim.flatten(confs) conid = slim.flatten(conid) coord = slim.flatten(_coord) cooid = slim.flatten(cooid) self.fetch += [probs, confs, conid, cooid, proid] true = tf.concat([probs, confs, coord], 1) wght = tf.concat([proid, conid, cooid], 1) print('Building {} loss'.format(m['model'])) loss = tf.pow(net_out - true, 2) loss = tf.multiply(loss, wght) loss = tf.reduce_sum(loss, 1) self.loss = .5 * tf.reduce_mean(loss) tf.summary.scalar('{} loss'.format(m['model']), self.loss)
def inception_v4(inputs, is_training=True, dropout_keep_prob=0.8, reuse=None, scope='InceptionV4'): """Creates the Inception V4 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. If 0 or None, the logits layer is omitted and the input features to the logits layer (before dropout) are returned instead. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. create_aux_logits: Whether to include the auxiliary logits. Returns: net: a Tensor with the logits (pre-softmax activations) if num_classes is a non-zero integer, or the non-dropped input to the logits layer if num_classes is 0 or None. end_points: the set of end_points from the inception model. """ with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, end_points = inception_v4_base(inputs, scope=scope) with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # Final pooling and prediction # TODO(sguada,arnoegw): Consider adding a parameter global_pool which # can be set to False to disable pooling here (as in resnet_*()). with tf.variable_scope('Embeddings'): # 8 x 8 x 1536 kernel_size = net.get_shape()[1:3] if kernel_size.is_fully_defined(): net = slim.avg_pool2d(net, kernel_size, padding='VALID', scope='AvgPool_1a') else: net = tf.reduce_mean(input_tensor=net, axis=[1, 2], keepdims=True, name='global_pool') # 1 x 1 x 1536 net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b') net = slim.flatten(net, scope='PreEmbeddingsFlatten') # 1536 net = slim.fully_connected(net, 512, activation_fn=None, scope='Embeddings') net = (tf.math.tanh(net) + 1) / 2 return net
def inception_resnet_v2(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV2'): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') end_points['MaxPool_5a_3x3'] = net # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat([ tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1 ], 3) end_points['Mixed_5b'] = net net = slim.repeat(net, 10, block35, scale=0.17) # 17 x 17 x 1024 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) end_points['Mixed_6a'] = net net = slim.repeat(net, 20, block17, scale=0.10) with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([ tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool ], 3) end_points['Mixed_7a'] = net net = slim.repeat(net, 9, block8, scale=0.20) net = block8(net, activation_fn=None) net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') end_points['Conv2d_7b_1x1'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def inception_v3(images, trainable=True, is_training=True, weight_decay=0.00004, stddev=0.1, dropout_keep_prob=0.8, use_batch_norm=True, batch_norm_params=None, add_summaries=True, scope="InceptionV3"): """Builds an Inception V3 subgraph for image embeddings. Args: images: A float32 Tensor of shape [batch, height, width, channels]. trainable: Whether the inception submodel should be trainable or not. is_training: Boolean indicating training mode or not. weight_decay: Coefficient for weight regularization. stddev: The standard deviation of the trunctated normal weight initializer. dropout_keep_prob: Dropout keep probability. use_batch_norm: Whether to use batch normalization. batch_norm_params: Parameters for batch normalization. See tf.contrib.layers.batch_norm for details. add_summaries: Whether to add activation summaries. scope: Optional Variable scope. Returns: end_points: A dictionary of activations from inception_v3 layers. """ # Only consider the inception model to be in training mode if it's trainable. is_inception_model_training = trainable and is_training if use_batch_norm: # Default parameters for batch normalization. if not batch_norm_params: batch_norm_params = { "is_training": is_inception_model_training, "trainable": trainable, # Decay for the moving averages. "decay": 0.9997, # Epsilon to prevent 0s in variance. "epsilon": 0.001, # Collection containing the moving mean and moving variance. "variables_collections": { "beta": None, "gamma": None, "moving_mean": ["moving_vars"], "moving_variance": ["moving_vars"], } } else: batch_norm_params = None if trainable: weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) else: weights_regularizer = None with tf.compat.v1.variable_scope(scope, "InceptionV3", [images]) as scope: with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, trainable=trainable): with slim.arg_scope([slim.conv2d], weights_initializer=tf.compat.v1. truncated_normal_initializer(stddev=stddev), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): net, end_points = inception_v3_base(images, scope=scope) with tf.compat.v1.variable_scope("logits"): shape = net.get_shape() net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool") net = slim.dropout(net, keep_prob=dropout_keep_prob, is_training=is_inception_model_training, scope="dropout") net = slim.flatten(net, scope="flatten") # Add summaries. if add_summaries: for v in end_points.values(): slim.summarize_activation(v) return net
def build_mnf_lenet(self, x, sample=True): if not self.built: self.layers = [] with tf.variable_scope(self.opts): if not self.built: layer1 = Conv2DMNF(self.layer_dims[0], 5, 5, N=self.N, input_shape=self.input_shape, border_mode='VALID', flows_q=self.flows_q, flows_r=self.flows_r, logging=self.logging, use_z=self.use_z, learn_p=self.learn_p, prior_var=self.prior_var_w, prior_var_b=self.prior_var_b, thres_var=self.thres_var, flow_dim_h=self.flow_dim_h) self.layers.append(layer1) else: layer1 = self.layers[0] h1 = self.activation( tf.nn.max_pool(layer1(x, sample=sample), [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')) if not self.built: shape = [None] + [s.value for s in h1.get_shape()[1:]] layer2 = Conv2DMNF(self.layer_dims[1], 5, 5, N=self.N, input_shape=shape, border_mode='VALID', flows_q=self.flows_q, flows_r=self.flows_r, use_z=self.use_z, logging=self.logging, learn_p=self.learn_p, flow_dim_h=self.flow_dim_h, thres_var=self.thres_var, prior_var=self.prior_var_w, prior_var_b=self.prior_var_b) self.layers.append(layer2) else: layer2 = self.layers[1] h2 = slim.flatten( self.activation( tf.nn.max_pool(layer2(h1, sample=sample), [1, 2, 2, 1], [1, 2, 2, 1], 'SAME'))) if not self.built: fcinp_dim = h2.get_shape()[1].value layer3 = DenseMNF(self.layer_dims[2], N=self.N, input_dim=fcinp_dim, flows_q=self.flows_q, flows_r=self.flows_r, use_z=self.use_z, logging=self.logging, learn_p=self.learn_p, prior_var=self.prior_var_w, prior_var_b=self.prior_var_b, flow_dim_h=self.flow_dim_h, thres_var=self.thres_var) self.layers.append(layer3) else: layer3 = self.layers[2] h3 = self.activation(layer3(h2, sample=sample)) if not self.built: fcinp_dim = h3.get_shape()[1].value layerout = DenseMNF(self.nb_classes, N=self.N, input_dim=fcinp_dim, flows_q=self.flows_q, flows_r=self.flows_r, use_z=self.use_z, logging=self.logging, learn_p=self.learn_p, prior_var=self.prior_var_w, prior_var_b=self.prior_var_b, flow_dim_h=self.flow_dim_h, thres_var=self.thres_var) self.layers.append(layerout) else: layerout = self.layers[3] if not self.built: self.built = True return layerout(h3, sample=sample)
def __init__(self, h_size): # The network receives a frame from the game, flattened into an array. # It then resizes it and processes it through four convolutional layers. self.scalarInput = v1.placeholder(shape=[None, 84672], dtype=tf.float32) self.imageIn = tf.reshape(self.scalarInput, shape=[-1, 168, 168, 3]) # RESHAPE LINE self.conv1 = slim.conv2d(inputs=self.imageIn, num_outputs=32, kernel_size=[12, 12], stride=[6, 6], padding='VALID', biases_initializer=None) self.conv2 = slim.conv2d(inputs=self.conv1, num_outputs=64, kernel_size=[5, 5], stride=[2, 2], padding='VALID', biases_initializer=None) self.conv3 = slim.conv2d(inputs=self.conv2, num_outputs=64, kernel_size=[6, 6], stride=[1, 1], padding='VALID', biases_initializer=None) self.conv4 = slim.conv2d(inputs=self.conv3, num_outputs=h_size, kernel_size=[7, 7], stride=[1, 1], padding='VALID', biases_initializer=None) # We take the output from the final convolutional layer and split it into separate advantage and value streams. self.streamAC, self.streamVC = tf.split(self.conv4, 2, axis=3) self.streamA = slim.flatten(self.streamAC) self.streamV = slim.flatten(self.streamVC) xavier_init = tf.initializers.GlorotUniform( ) # xavier_init = tf.contrib.layers.xavier_initializer() ---no contrib lib in tf 2.0 self.AW = tf.Variable(xavier_init([h_size // 2, 2])) # WHY IS THIS 29 (env.actions) self.VW = tf.Variable(xavier_init([h_size // 2, 1])) self.Advantage = tf.matmul(self.streamA, self.AW) self.Value = tf.matmul(self.streamV, self.VW) # Then combine them together to get our final Q-values. self.Qout = self.Value + tf.subtract( self.Advantage, tf.reduce_mean(self.Advantage, axis=1, keepdims=True)) self.predict = tf.argmax(self.Qout, 1) self.extract_value, self.extract_index = tf.nn.top_k(self.Qout, 1, sorted=True) # Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values. self.targetQ = v1.placeholder(shape=[None], dtype=tf.float32) self.actions = v1.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, 2, dtype=tf.float32) # WHY IS THIS 29 self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis=1) self.td_error = tf.square(self.targetQ - self.Q) self.loss = tf.reduce_mean(self.td_error) self.trainer = v1.train.AdamOptimizer(learning_rate=0.001) self.updateModel = self.trainer.minimize(self.loss)