def __init__(self,h_size,rnn_cell,myScope): self.scalarInput = tf.placeholder(shape=[None,21168],dtype=tf.float32) self.imageIn = tf.reshape(self.scalarInput,shape=[-1,84,84,3]) self.conv1 = slim.convolution2d(inputs = self.imageIn,num_outputs=32,kernel_size = [8,8], stride=[4,4],padding='VALID',biases_initializer=None, scope=myScope + "_conv1") #[-1,20,20,32] self.conv2 = slim.convolution2d(inputs = self.conv1, num_outputs = 64,kernel_size=[4,4], stride=[2,2],padding='VALID', biases_initializer= None,scope=myScope+"_conv2") # [-1,9,9,64] self.conv3 = slim.convolution2d(inputs = self.conv2,num_outputs = 64,kernel_size=[3,3],stride=[1,1], padding='VALID',biases_initializer=None,scope=myScope+"_conv3") #[-1,7,7,64] self.conv4 = slim.convolution2d(inputs = self.conv3,num_outputs=h_size,kernel_size=[7,7],stride=[1,1], padding='VALID',biases_initializer=None,scope=myScope+"_conv4") #[-1,1,1,h_size] self.trainLength = tf.placeholder(tf.int32) self.batch_size = tf.placeholder(tf.int32,[]) self.convFlat = tf.reshape(slim.flatten(self.conv4),[self.batch_size,self.trainLength,h_size]) self.state_in = rnn_cell.zero_state(self.batch_size,tf.float32) self.rnn,self.rnn_state = tf.nn.dynamic_rnn(inputs=self.convFlat,cell=rnn_cell,dtype=tf.float32, initial_state=self.state_in,scope=myScope+"rnn") self.rnn = tf.reshape(self.rnn,shape=[-1,h_size]) self.streamA,self.streamV = tf.split(self.rnn,2,1) self.AW = tf.Variable(tf.random_normal([h_size//2,4])) self.VW = tf.Variable(tf.random_normal([h_size//2,1])) self.Advantage = tf.matmul(self.streamA,self.AW) self.Value = tf.matmul(self.streamV,self.VW) self.salience = tf.gradients(self.Advantage,self.imageIn) self.Qout = self.Value + tf.subtract(self.Advantage,tf.reduce_mean(self.Advantage,axis=1,keep_dims=True)) self.predict =tf.argmax(self.Qout,1) self.targetQ = tf.placeholder(shape=[None],dtype=tf.float32) self.actions = tf.placeholder(shape=[None],dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions,4,dtype=tf.float32) self.Q = tf.reduce_sum(tf.multiply(self.Qout,self.actions_onehot),axis=1) self.td_error = tf.square(self.targetQ - self.Q) self.maskA = tf.zeros([self.batch_size,self.trainLength//2]) self.maskB = tf.ones([self.batch_size,self.trainLength//2]) self.mask = tf.concat([self.maskA,self.maskB],1) self.mask = tf.reshape(self.mask,[-1]) self.loss = tf.reduce_mean(self.td_error * self.mask) self.trainer = tf.train.AdamOptimizer(learning_rate=0.001) self.updateModel = self.trainer.minimize(self.loss)
def separable_conv(self, input, k_h, k_w, c_o, stride, name, relu=True, set_bias=True): with slim.arg_scope([slim.batch_norm], decay=0.999, fused=common.batchnorm_fused, is_training=self.trainable): output = slim.separable_convolution2d(input, num_outputs=None, stride=stride, trainable=self.trainable, depth_multiplier=1.0, kernel_size=[k_h, k_w], # activation_fn=common.activation_fn if relu else None, activation_fn=None, # normalizer_fn=slim.batch_norm, weights_initializer=_init_xavier, # weights_initializer=_init_norm, weights_regularizer=_l2_regularizer_00004, biases_initializer=None, padding=DEFAULT_PADDING, scope=name + '_depthwise') output = slim.convolution2d(output, c_o, stride=1, kernel_size=[1, 1], activation_fn=common.activation_fn if relu else None, weights_initializer=_init_xavier, # weights_initializer=_init_norm, biases_initializer=_init_zero if set_bias else None, normalizer_fn=slim.batch_norm, trainable=self.trainable, weights_regularizer=None, scope=name + '_pointwise') return output
def _depthwise_separable_conv(inputs, num_pwc_filters, width_multiplier, sc, downsample=False): """ Helper function to build the depth-wise separable convolution layer. """ num_pwc_filters = round(num_pwc_filters * width_multiplier) _stride = 2 if downsample else 1 # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=_stride, depth_multiplier=1, kernel_size=[3, 3], scope=sc+'/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc+'/dw_batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc+'/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc+'/pw_batch_norm') return bn
def convb(self, input, k_h, k_w, c_o, stride, name, relu=True, set_bias=True, set_tanh=False): with slim.arg_scope([slim.batch_norm], decay=0.999, fused=common.batchnorm_fused, is_training=self.trainable): output = slim.convolution2d(input, c_o, kernel_size=[k_h, k_w], stride=stride, normalizer_fn=slim.batch_norm, weights_regularizer=_l2_regularizer_convb, weights_initializer=_init_xavier, # weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=_init_zero if set_bias else None, trainable=self.trainable, activation_fn=common.activation_fn if relu else None, scope=name) if set_tanh: output = tf.nn.sigmoid(output, name=name + '_extra_acv') return output
def _depthwise_separable_conv(inputs, num_pwc_filters, sc, kernel_size, stride): """ Helper function to build the depth-wise separable convolution layer. """ # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=stride, depth_multiplier=1, kernel_size=kernel_size, scope=sc+'/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc+'/dw_batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc+'/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc+'/pw_batch_norm') return bn
def pfld_inference_for_mobileNetV3_small(input, weight_decay, batch_norm_params): layers = [ [16, 16, 3, 2, "RE", True, 16], [16, 24, 3, 2, "RE", False, 72], [24, 24, 3, 1, "RE", False, 88], [24, 40, 5, 2, "HS", True, 96], [40, 40, 5, 1, "HS", True, 240], [40, 40, 5, 1, "HS", True, 240], [40, 48, 5, 1, "HS", True, 120], [48, 48, 5, 1, "HS", True, 144], [48, 96, 5, 2, "HS", True, 288], [96, 96, 5, 1, "HS", True, 576], [96, 96, 5, 1, "HS", True, 576], ] reduction_ratio = 4 multiplier = 1 with tf.variable_scope('pfld_inference'): features = {} with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='SAME'): print('PFLD input shape({}): {}'.format(input.name, input.get_shape())) # 112*112*3 out = slim.convolution2d(input, 16 * multiplier, [3, 3], stride=1, activation_fn=hard_swish, scope='conv_1') print(out.name, out.get_shape()) with tf.variable_scope("MobilenetV3_large"): for index in range(3): in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index] out_channels *= multiplier out = mobileNetV3_block(out, "bneck{}".format(index), expand_dims, out_channels, kernel_size, stride, ratio=reduction_ratio, activation_fn=activatation, se=se, short_cut=(in_channels == out_channels)) print(out.name, out.get_shape()) # 28*28 features['auxiliary_input'] = out # 14*14 index = 3 in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index] out_channels *= multiplier out1 = mobileNetV3_block(out, "bneck{}".format(index), expand_dims, out_channels, kernel_size, stride, ratio=reduction_ratio, activation_fn=activatation, se=se, short_cut=(in_channels == out_channels)) print(out1.name, out1.get_shape()) for index in range(4, 8): in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index] out_channels *= multiplier out1 = mobileNetV3_block(out1, "bneck{}".format(index), expand_dims, out_channels, kernel_size, stride, ratio=reduction_ratio, activation_fn=activatation, se=se, short_cut=(in_channels == out_channels)) print(out1.name, out1.get_shape()) # 7*7 index = 8 in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index] out_channels *= multiplier out2 = mobileNetV3_block(out1, "bneck{}".format(index), expand_dims, out_channels, kernel_size, stride, ratio=reduction_ratio, activation_fn=activatation, se=se, short_cut=(in_channels == out_channels)) print(out2.name, out2.get_shape()) for index in range(9, len(layers)): in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index] out_channels *= multiplier out2 = mobileNetV3_block(out2, "bneck{}".format(index), expand_dims, out_channels, kernel_size, stride, ratio=reduction_ratio, activation_fn=activatation, se=se, short_cut=(in_channels == out_channels)) print(out2.name, out2.get_shape()) out3 = slim.convolution2d(out2, 576, [1, 1], stride=1, activation_fn=hard_swish, dscope='conv_2') print(out3.name, out3.get_shape()) out3 = slim.avg_pool2d(out3, [out3.get_shape()[1], out3.get_shape()[2]], stride=1, scope='group_pool') print(out3.name, out3.get_shape()) out3 = slim.convolution2d(out3, 1280, [1, 1], stride=1, normalizer_fn=None, activation_fn=hard_swish, scope='conv_3') print(out3.name, out3.get_shape()) s1 = slim.flatten(out1) s2 = slim.flatten(out2) s3 = slim.flatten(out3) multi_scale = tf.concat([s1, s2, s3], 1) landmarks = slim.fully_connected(multi_scale, num_outputs=136, activation_fn=None, scope='fc') print(landmarks.name, landmarks.get_shape()) return features, landmarks
def mobilenet(inputs, num_classes=1000, is_training=True, width_multiplier=1, scope='MobileNet'): """ MobileNet More detail, please refer to Google's paper(https://arxiv.org/abs/1704.04861). Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. scope: Optional scope for the variables. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, `num_classes`] end_points: a dictionary from components of the network to the corresponding activation. """ def _depthwise_separable_conv(inputs, num_pwc_filters, width_multiplier, sc, downsample=False): """ Helper function to build the depth-wise separable convolution layer. """ num_pwc_filters = round(num_pwc_filters * width_multiplier) _stride = 2 if downsample else 1 # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=_stride, depth_multiplier=1, kernel_size=[3, 3], scope=sc+'/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc+'/dw_batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc+'/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc+'/pw_batch_norm') return bn with tf.variable_scope(scope) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope([slim.convolution2d, slim.separable_convolution2d], activation_fn=None, outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, activation_fn=tf.nn.relu, fused=True): net = slim.convolution2d(inputs, round(32 * width_multiplier), [3, 3], stride=2, padding='SAME', scope='conv_1') net = slim.batch_norm(net, scope='conv_1/batch_norm') net = _depthwise_separable_conv(net, 64, width_multiplier, sc='conv_ds_2') net = _depthwise_separable_conv(net, 128, width_multiplier, downsample=True, sc='conv_ds_3') net = _depthwise_separable_conv(net, 128, width_multiplier, sc='conv_ds_4') net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_5') net = _depthwise_separable_conv(net, 256, width_multiplier, sc='conv_ds_6') net = _depthwise_separable_conv(net, 512, width_multiplier, downsample=True, sc='conv_ds_7') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_8') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_9') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_10') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_11') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_12') net = _depthwise_separable_conv(net, 1024, width_multiplier, downsample=True, sc='conv_ds_13') net = _depthwise_separable_conv(net, 1024, width_multiplier, sc='conv_ds_14') net = slim.avg_pool2d(net, [7, 7], scope='avg_pool_15') end_points = slim.utils.convert_collection_to_dict(end_points_collection) net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') end_points['squeeze'] = net logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='fc_16') predictions = slim.softmax(logits, scope='Predictions') end_points['Logits'] = logits end_points['Predictions'] = predictions return logits, end_points
def pfld_inference(input, weight_decay, batch_norm_params): coefficient = 1 with tf.variable_scope('pfld_inference'): features = {} with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], \ activation_fn=tf.nn.relu6,\ weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='SAME'): print('PFLD input shape({}): {}'.format(input.name, input.get_shape())) # 112*112*3 conv1 = slim.convolution2d(input, 64 * coefficient, [3, 3], stride=2, scope='conv_1') print(conv1.name, conv1.get_shape()) # 56*56*64 conv2 = slim.separable_convolution2d(conv1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv2/dwise') print(conv2.name, conv2.get_shape()) # 56*56*64 conv3_1 = slim.convolution2d(conv2, 128, [1, 1], stride=2, scope='conv3_1/expand') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.separable_convolution2d(conv3_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_1/dwise') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.convolution2d(conv3_1, 64 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_1/linear') print(conv3_1.name, conv3_1.get_shape()) conv3_2 = slim.convolution2d(conv3_1, 128, [1, 1], stride=1, scope='conv3_2/expand') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.separable_convolution2d(conv3_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_2/dwise') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.convolution2d(conv3_2, 64 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_2/linear') print(conv3_2.name, conv3_2.get_shape()) block3_2 = conv3_1 + conv3_2 print(block3_2.name, block3_2.get_shape()) conv3_3 = slim.convolution2d(block3_2, 128, [1, 1], stride=1, scope='conv3_3/expand') print(conv3_3.name, conv3_3.get_shape()) conv3_3 = slim.separable_convolution2d(conv3_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_3/dwise') print(conv3_3.name, conv3_3.get_shape()) conv3_3 = slim.convolution2d(conv3_3, 64 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_3linear') print(conv3_3.name, conv3_3.get_shape()) block3_3 = block3_2 + conv3_3 print(block3_3.name, block3_3.get_shape()) conv3_4 = slim.convolution2d(block3_3, 128, [1, 1], stride=1, scope='conv3_4/expand') print(conv3_4.name, conv3_4.get_shape()) conv3_4 = slim.separable_convolution2d(conv3_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_4/dwise') print(conv3_4.name, conv3_4.get_shape()) conv3_4 = slim.convolution2d(conv3_4, 64 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_4/linear') print(conv3_4.name, conv3_4.get_shape()) block3_4 = block3_3 + conv3_4 print(block3_4.name, block3_4.get_shape()) conv3_5 = slim.convolution2d(block3_4, 128, [1, 1], stride=1, scope='conv3_5/expand') print(conv3_5.name, conv3_5.get_shape()) conv3_5 = slim.separable_convolution2d(conv3_5, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_5/dwise') print(conv3_5.name, conv3_5.get_shape()) conv3_5 = slim.convolution2d(conv3_5, 64 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_5/linear') print(conv3_5.name, conv3_5.get_shape()) block3_5 = block3_4 + conv3_5 print(block3_5.name, block3_5.get_shape()) features['auxiliary_input'] = block3_5 #28*28*64 conv4_1 = slim.convolution2d(block3_5, 128, [1, 1], stride=2, scope='conv4_1/expand') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.separable_convolution2d(conv4_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_1/dwise') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.convolution2d(conv4_1, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv4_1/linear') print(conv4_1.name, conv4_1.get_shape()) #14*14*128 conv5_1 = slim.convolution2d(conv4_1, 512, [1, 1], stride=1, scope='conv5_1/expand') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.separable_convolution2d(conv5_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_1/dwise') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.convolution2d(conv5_1, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_1/linear') print(conv5_1.name, conv5_1.get_shape()) conv5_2 = slim.convolution2d(conv5_1, 512, [1, 1], stride=1, scope='conv5_2/expand') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.separable_convolution2d(conv5_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_2/dwise') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.convolution2d(conv5_2, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_2/linear') print(conv5_2.name, conv5_2.get_shape()) block5_2 = conv5_1 + conv5_2 print(block5_2.name, block5_2.get_shape()) conv5_3 = slim.convolution2d(block5_2, 512, [1, 1], stride=1, scope='conv5_3/expand') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.separable_convolution2d(conv5_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_3/dwise') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.convolution2d(conv5_3, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_3/linear') print(conv5_3.name, conv5_3.get_shape()) block5_3 = block5_2 + conv5_3 print(block5_3.name, block5_3.get_shape()) conv5_4 = slim.convolution2d(block5_3, 512, [1, 1], stride=1, scope='conv5_4/expand') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.separable_convolution2d(conv5_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_4/dwise') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.convolution2d(conv5_4, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_4/linear') print(conv5_4.name, conv5_4.get_shape()) block5_4 = block5_3 + conv5_4 print(block5_4.name, block5_4.get_shape()) conv5_5 = slim.convolution2d(block5_4, 512, [1, 1], stride=1, scope='conv5_5/expand') print(conv5_5.name, conv5_5.get_shape()) conv5_5 = slim.separable_convolution2d(conv5_5, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_5/dwise') print(conv5_5.name, conv5_5.get_shape()) conv5_5 = slim.convolution2d(conv5_5, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_5/linear') print(conv5_5.name, conv5_5.get_shape()) block5_5 = block5_4 + conv5_5 print(block5_5.name, block5_5.get_shape()) conv5_6 = slim.convolution2d(block5_5, 512, [1, 1], stride=1, scope='conv5_6/expand') print(conv5_6.name, conv5_6.get_shape()) conv5_6 = slim.separable_convolution2d(conv5_6, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_6/dwise') print(conv5_6.name, conv5_6.get_shape()) conv5_6 = slim.convolution2d(conv5_6, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_6/linear') print(conv5_6.name, conv5_6.get_shape()) block5_6 = block5_5 + conv5_6 print(block5_6.name, block5_6.get_shape()) #14*14*128 conv6_1 = slim.convolution2d(block5_6, 256, [1, 1], stride=1, scope='conv6_1/expand') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.separable_convolution2d(conv6_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_1/dwise') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.convolution2d(conv6_1, 16 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv6_1/linear') print(conv6_1.name, conv6_1.get_shape()) #14*14*16 conv7 = slim.convolution2d(conv6_1, 32 * coefficient, [3, 3], stride=2, activation_fn=None, scope='conv7') print(conv7.name, conv7.get_shape()) #7*7*32 conv8 = slim.convolution2d(conv7, 128 * coefficient, [7, 7], stride=1, padding='VALID', activation_fn=None, scope='conv8') print(conv8.name, conv8.get_shape()) avg_pool1 = slim.avg_pool2d( conv6_1, [conv6_1.get_shape()[1], conv6_1.get_shape()[2]], stride=1) print(avg_pool1.name, avg_pool1.get_shape()) avg_pool2 = slim.avg_pool2d( conv7, [conv7.get_shape()[1], conv7.get_shape()[2]], stride=1) print(avg_pool2.name, avg_pool2.get_shape()) #s1 = slim.flatten(avg_pool1) #s2 = slim.flatten(avg_pool2) s1 = slim.flatten(conv6_1) s2 = slim.flatten(conv7) #1*1*128 s3 = slim.flatten(conv8) multi_scale = tf.concat([s1, s2, s3], 1) landmarks = slim.fully_connected(multi_scale, num_outputs=196, activation_fn=None, scope='fc') return features, landmarks
def mobilenet(name, inputs, num_class=1000, width_mult=1.0, train=False): block = depthwise_seperable_block with tf.variable_scope(name): with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], activation_fn=None, weights_initializer=slim.initializers.xavier_initializer()): with slim.arg_scope([slim.batch_norm], is_training=train, activation_fn=tf.nn.relu, fused=True): net = slim.convolution2d(inputs, round(32 * width_mult), [3, 3], stride=2, padding="SAME", scope="first_conv3x3") net = slim.batch_norm(net, scope="first_batch_norm") net = block("depthwise_seperable_1", net, round(64 * width_mult)) net = block("depthwise_seperable_2", net, round(128 * width_mult), downsample=True) net = block("depthwise_seperable_3", net, round(128 * width_mult)) net = block("depthwise_seperable_4", net, round(256 * width_mult), downsample=True) net = block("depthwise_seperable_5", net, round(256 * width_mult)) net = block("depthwise_seperable_6", net, round(512 * width_mult), downsample=True) net = block("depthwise_seperable_7", net, round(512 * width_mult)) net = block("depthwise_seperable_8", net, round(512 * width_mult)) net = block("depthwise_seperable_9", net, round(512 * width_mult)) net = block("depthwise_seperable_10", net, round(512 * width_mult)) net = block("depthwise_seperable_11", net, round(512 * width_mult)) net = block("depthwise_seperable_12", net, round(1024 * width_mult), downsample=True) net = block("depthwise_seperable_13", net, round(1024 * width_mult)) net = slim.avg_pool2d(net, [7, 7], scope="avg_pool2d") net = tf.squeeze(net, [1, 2], name="squeeze") logits = slim.fully_connected( net, num_class, activation_fn=None, scope="fc", weights_initializer=slim.initializers.xavier_initializer()) predictions = slim.softmax(logits, scope="softmax") return predictions
def inference(self, preprocessed_inputs, width_multiplier=1, scope="MobileNetV1"): with slim.arg_scope(self.mobilenet_arg_scope()): with tf.variable_scope(scope) as sc: #在每一层卷积后不使用激活函数 with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], activation_fn=None): #仅仅在归一化层后使用激活函数-ReLU with slim.arg_scope([slim.batch_norm], is_training=self.is_training, activation_fn=tf.nn.relu, fused=True, decay=0.95): #fused:是否使用一种更快的融合方法 net = slim.convolution2d(preprocessed_inputs, round(32 * width_multiplier), [3, 3], stride=2, padding="SAME", scope="conv_1") net = slim.batch_norm(net, scope='conv_1/batch_norm') net = self.depthwise_separable_conv( net, 64, width_multiplier, name='conv_ds_2') # 进行深度可卷积-depthwise和pointwise都执行 net = self.depthwise_separable_conv(net, 128, width_multiplier, downsampling=True, name='conv_3') net = self.depthwise_separable_conv(net, 128, width_multiplier, name='conv_ds_4') net = self.depthwise_separable_conv(net, 256, width_multiplier, downsampling=True, name='conv_5') # net = self.depthwise_separable_conv(net, 256, width_multiplier, name='conv_ds_6') # net = self.depthwise_separable_conv(net, 512, width_multiplier, downsampling=True, name='conv_7') net = self.depthwise_separable_conv(net, 512, width_multiplier, name='conv_8') net = self.depthwise_separable_conv(net, 512, width_multiplier, name='conv_9') net = self.depthwise_separable_conv(net, 512, width_multiplier, name='conv_10') net = self.depthwise_separable_conv(net, 512, width_multiplier, name='conv_11') net = self.depthwise_separable_conv(net, 512, width_multiplier, name='conv_12') # net = self.depthwise_separable_conv(net, 1024, width_multiplier, downsampling=True, name='conv_13') # net = self.depthwise_separable_conv(net, 1024, width_multiplier, name='conv_ds_14') net = slim.avg_pool2d(net, [2, 2], scope='avg_pool_15') shape = net.get_shape().as_list() flat_height, flat_width, flat_channals = shape[1:] flat_size = flat_height * flat_width * flat_channals net = tf.reshape(net, shape=[-1, flat_size]) net = slim.fully_connected(net, self.num_classes, activation_fn=None, scope='fc_16') return net
def __init__(self, h_size, action_size, img_size=84, learning_rate=0.00025): self.frame_in = tf.placeholder(tf.float32, [None, img_size * img_size * 3], name="frame_in") img_in = tf.reshape(self.frame_in, [-1, img_size, img_size, 3]) conv1 = slim.convolution2d(scope="conv1", inputs=img_in, num_outputs=32, kernel_size=[8, 8], stride=[4, 4], padding="VALID", biases_initializer=None) conv2 = slim.convolution2d(scope="conv2", inputs=conv1, num_outputs=64, kernel_size=[4, 4], stride=[2, 2], padding="VALID", biases_initializer=None) conv3 = slim.convolution2d(scope="conv3", inputs=conv2, num_outputs=64, kernel_size=[3, 3], stride=[1, 1], padding="VALID", biases_initializer=None) conv4 = slim.convolution2d(scope="conv4", inputs=conv3, num_outputs=h_size, kernel_size=[7, 7], stride=[1, 1], padding="VALID", biases_initializer=None) self.train_len = tf.placeholder(tf.int32, []) self.batch_size = tf.placeholder(tf.int32, []) conv_flat = tf.reshape(slim.flatten(conv4), [self.batch_size, self.train_len, h_size]) cell = tf.nn.rnn_cell.BasicLSTMCell(h_size, state_is_tuple=True, reuse=False) self.state_init = cell.zero_state(self.batch_size, tf.float32) rnn, self.rnn_state = tf.nn.dynamic_rnn(cell, conv_flat, dtype=tf.float32, initial_state=self.state_init) #print(rnn) #print(self.rnn_state) rnn = tf.reshape(rnn, [-1, h_size]) with tf.variable_scope("va_split"): stream_a, stream_v = tf.split(rnn, 2, axis=1) w_a = tf.Variable(tf.random_normal([h_size // 2, action_size])) w_v = tf.Variable(tf.random_normal([h_size // 2, 1])) advantage = tf.matmul(stream_a, w_a) value = tf.matmul(stream_v, w_v) # salience = tf.gradients(advantage, img_in) with tf.variable_scope("predict"): self.q_out = value + tf.subtract( advantage, tf.reduce_mean(advantage, 1, keep_dims=True)) self.pred = tf.argmax(self.q_out, axis=1) self.target_q = tf.placeholder(tf.float32, [None]) self.actions = tf.placeholder(tf.int32, [None]) actions_onehot = tf.one_hot(self.actions, action_size, dtype=tf.float32) Q = tf.reduce_sum(tf.multiply(self.q_out, actions_onehot), axis=1) td_error = tf.square(self.target_q - Q) self.maskA = tf.zeros([self.batch_size, self.trainLength // 2]) self.maskB = tf.ones([self.batch_size, self.trainLength // 2]) self.mask = tf.concat([self.maskA, self.maskB], 1) self.mask = tf.reshape(self.mask, [-1]) self.loss = tf.reduce_mean(self.td_error * self.mask) loss = tf.reduce_mean(td_error) #self.update = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(loss) self.update = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(loss) with tf.name_scope("summary"): tf.summary.scalar("loss", loss) tf.summary.scalar("mean_value", tf.reduce_mean(value)) tf.summary.scalar("max_advantage", tf.reduce_max(advantage)) tf.summary.scalar("mean_target_q", tf.reduce_mean(self.target_q)) tf.summary.scalar("mean_pred_q", tf.reduce_mean(self.q_out)) self.summary_op = tf.summary.merge_all()
def __init__(self, h_size, rnn_cell, myScope): #The network recieves a frame from the game, flattened into an array. #It then resizes it and processes it through four convolutional layers. self.scalarInput = tf.placeholder(shape=[None, 21168], dtype=tf.float32) self.imageIn = tf.reshape(self.scalarInput, shape=[-1, 84, 84, 3]) self.conv1 = slim.convolution2d( \ inputs=self.imageIn,num_outputs=32,\ kernel_size=[8,8],stride=[4,4],padding='VALID', \ biases_initializer=None,scope=myScope+'_conv1') self.conv2 = slim.convolution2d( \ inputs=self.conv1,num_outputs=64,\ kernel_size=[4,4],stride=[2,2],padding='VALID', \ biases_initializer=None,scope=myScope+'_conv2') self.conv3 = slim.convolution2d( \ inputs=self.conv2,num_outputs=64,\ kernel_size=[3,3],stride=[1,1],padding='VALID', \ biases_initializer=None,scope=myScope+'_conv3') self.conv4 = slim.convolution2d( \ inputs=self.conv3,num_outputs=h_size,\ kernel_size=[7,7],stride=[1,1],padding='VALID', \ biases_initializer=None,scope=myScope+'_conv4') self.trainLength = tf.placeholder(dtype=tf.int32) # We take the output from the final convolutional layer and send it to a # recurrent layer. The input must be reshaped into [batch x trace x units] for # rnn processing, and then returned to [batch x units] when sent through the # upper levels. self.batch_size = tf.placeholder(dtype=tf.int32) self.convFlat = tf.reshape(slim.flatten(self.conv4), [self.batch_size, self.trainLength, h_size]) self.state_in = cell.zero_state(self.batch_size, tf.float32) self.rnn, self.rnn_state = tf.nn.dynamic_rnn(\ inputs=self.convFlat, cell=rnn_cell, dtype=tf.float32, initial_state=self.state_in, scope=myScope+"_rnn") self.rnn = tf.reshape(self.rnn, shape=[-1, h_size]) # The output from the recurrent player is then split into separate Value and Advantage terms self.streamA, self.streamV = tf.split(1, 2, self.rnn) self.AW = tf.Variable(tf.random_normal([h_size // 2, 4])) self.VW = tf.Variable(tf.random_normal([h_size // 2, 1])) self.Advantage = tf.matmul(self.streamA, self.AW) self.Value = tf.matmul(self.streamV, self.VW) self.salience = tf.gradients(self.Advantage, self.imageIn) # Then combine them together to get our final Q-Values. self.Qout = self.Value + tf.subtract( self.Advantage, tf.reduce_mean(self.Advantage, axis=1, keep_dims=True)) self.predict = tf.argmax(self.Qout, 1) # Bellow we obtain the loss by taking the sum of squared differences between the target # and prediction Q values. self.targetQ = tf.placeholder(shape=[None], dtype=tf.float32) self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, 4, dtype=tf.float32) self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis=1) self.td_error = tf.square(self.targetQ - self.Q) # In order to only propagate accurate gradients through the network, we will mask the # first half of the losses for each trace as per Lample & Chatlot 2016 self.maskA = tf.zeros([self.batch_size, self.trainLength // 2]) self.maskB = tf.ones([self.batch_size, self.trainLength // 2]) self.mask = tf.concat(1, [self.maskA, self.maskB]) self.mask = tf.reshape(self.mask, [-1]) self.loss = tf.reduce_mean(self.td_error * self.mask) self.trainer = tf.train.AdamOptimizer(learning_rate=0.0001) self.updateModel = self.trainer.minimize(self.loss)
def init_mobilenet_v1(self, param): resolution_multiplier = param['resolution_multiplier'] width_multiplier = param['width_multiplier'] depth_multiplier = param['depth_multiplier'] if 'input_dim' in param: input_dim = param['input_dim'] else: H_W = int(224 * resolution_multiplier) input_dim = [1, H_W, H_W, 3] if 'output_dim' in param: out_dim = param['output_dim'] else: out_dim = 1000 # Define the resolution based on resolution multiplier # [1, 0.858, 0.715, 0.572 ] = [224, 192, 160, 128] input = tf.placeholder(tf.float32, input_dim, name='input_tensor') layer_1_conv = slim.convolution2d(input, round(32 * width_multiplier), [3, 3], stride=2, padding='SAME', scope='conv_1') #layer_1_bn = slim.batch_norm(layer_1_conv, scope='conv_1/batch_norm') layer_2_dw = self.dw_separable(layer_1_conv, 64, width_multiplier, depth_multiplier, sc='conv_ds_2') layer_3_dw = self.dw_separable(layer_2_dw, 128, width_multiplier, depth_multiplier, downsample=True, sc='conv_ds_3') layer_4_dw = self.dw_separable(layer_3_dw, 128, width_multiplier, depth_multiplier, sc='conv_ds_4') layer_5_dw = self.dw_separable(layer_4_dw, 256, width_multiplier, depth_multiplier, downsample=True, sc='conv_ds_5') layer_6_dw = self.dw_separable(layer_5_dw, 256, width_multiplier, depth_multiplier, sc='conv_ds_6') layer_7_dw = self.dw_separable(layer_6_dw, 512, width_multiplier, depth_multiplier, downsample=True, sc='conv_ds_7') # repeatable layers can be put inside a loop layer_8_12_dw = layer_7_dw for i in range(8, 13): layer_8_12_dw = self.dw_separable(layer_8_12_dw, 512, width_multiplier, depth_multiplier, sc='conv_ds_' + str(i)) layer_13_dw = self.dw_separable(layer_8_12_dw, 1024, width_multiplier, depth_multiplier, downsample=True, sc='conv_ds_13') layer_14_dw = self.dw_separable(layer_13_dw, 1024, width_multiplier, depth_multiplier, sc='conv_ds_14') # Pool and reduce to output dimension global_pool = tf.reduce_mean(layer_14_dw, [1, 2], keep_dims=True, name='global_pool') spatial_reduction = tf.squeeze(global_pool, [1, 2], name='SpatialSqueeze') logits = slim.fully_connected(spatial_reduction, out_dim, activation_fn=None, scope='fc_16') output = slim.softmax(logits, scope='Predictions') output = tf.identity(output, name="output_tensor") return {'input': input, 'output': output, 'logits': logits}
def mobilenet(inputs, num_classes=1000, is_training=True, width_multiplier=1, scope='MobileNet'): ''' Args: inputs: a tensor of size [batch_size,height,width,channels] num_classes: number of predicted classes is_training: model is being trained scope: scope for the variables Returns: logits end_points ''' def _depthwise_separable_conv(inputs, num_pwc_filters, width_multiplier, sc, downsample=False): num_pwc_filters = round(num_pwc_filters * width_multiplier) _stride = 2 if downsample else 1 #skip pointwise by setting num_output=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=_stride, depth_multiplier=1, kernel_size=[3, 3], scope=sc + '/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm') return bn with tf.Variable_scope(scope) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope([slim.convolution2d, slim.separable_convolution2d], activation_fn=None, outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, activation_fn=tf.nn.relu, fused=True): net = slim.convolution2d(inputs, round(32 * width_multiplier), [3, 3], stride=2, padding='same', scope='conv_1') net = slim.batch_norm(net, scope='conv_1/batch_norm') net = _depthwise_separable_conv(net, 64, width_multiplier, sc='conv_ds_2') net = _depthwise_separable_conv(net, 128, width_multiplier, downsample=True, sc='conv_ds_3') net = _depthwise_separable_conv(net, 128, width_multiplier, sc='conv_ds_4') net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_5') net = _depthwise_separable_conv(net, 256, width_multiplier, sc='conv_ds_6') net = _depthwise_separable_conv(net, 512, width_multiplier, downsample=True, sc='conv_ds_7') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_8') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_9') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_10') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_11') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_12') net = _depthwise_separable_conv(net, 1024, width_multiplier, downsample=True, sc='conv_ds_13') net = _depthwise_separable_conv(net, 1024, width_multiplier, sc='conv_ds_14') net = slim.avg_pool2d(net, [7, 7], scope='avg_pool_15') end_points = slim.utils.convert_collection_to_dict( end_points_collection) logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='fc_16') predictions = slim.softmax(logits, scope='predictions') end_points['Logits'] = logits end_points['Predictions'] = predictions return logits, end_points
def conv_bn_relu(input, out_channel, kernel_size, stride=1, dilation=1): with tf.variable_scope(None, 'conv_bn_relu'): input = slim.convolution2d(input, out_channel, kernel_size, stride, rate=dilation, activation_fn=None) input = slim.batch_norm(input, activation_fn=tf.nn.relu, fused=False) return input
def __init__(self, h_size, action_size, img_size=84, learning_rate=0.00025, frame_count=4): self.frame_in = tf.placeholder( tf.float32, [None, img_size * img_size * frame_count], name="frame_in") img_in = tf.reshape(self.frame_in, [-1, img_size, img_size, frame_count]) conv1 = slim.convolution2d(scope="conv1", inputs=img_in, num_outputs=32, kernel_size=[8, 8], stride=[4, 4], padding="VALID", biases_initializer=None) conv2 = slim.convolution2d(scope="conv2", inputs=conv1, num_outputs=64, kernel_size=[4, 4], stride=[2, 2], padding="VALID", biases_initializer=None) conv3 = slim.convolution2d(scope="conv3", inputs=conv2, num_outputs=64, kernel_size=[3, 3], stride=[1, 1], padding="VALID", biases_initializer=None) conv4 = slim.convolution2d(scope="conv4", inputs=conv3, num_outputs=h_size, kernel_size=[7, 7], stride=[1, 1], padding="VALID", biases_initializer=None) self.batch_size = tf.placeholder(tf.int32, []) conv_flat = tf.reshape(slim.flatten(conv4), [self.batch_size, h_size]) with tf.variable_scope("va_split"): stream_a, stream_v = tf.split(conv_flat, 2, axis=1) w_a = tf.Variable(tf.random_normal([h_size // 2, action_size])) w_v = tf.Variable(tf.random_normal([h_size // 2, 1])) advantage = tf.matmul(stream_a, w_a) value = tf.matmul(stream_v, w_v) # salience = tf.gradients(advantage, img_in) with tf.variable_scope("predict"): self.q_out = value + tf.subtract( advantage, tf.reduce_mean(advantage, axis=1, keep_dims=True)) self.pred = tf.argmax(self.q_out, axis=1) self.target_q = tf.placeholder(tf.float32, [None]) self.actions = tf.placeholder(tf.int32, [None]) actions_onehot = tf.one_hot(self.actions, action_size, dtype=tf.float32) Q = tf.reduce_sum(tf.multiply(self.q_out, actions_onehot), axis=1) td_error = tf.square(self.target_q - Q) loss = tf.reduce_mean(td_error) self.update = tf.train.RMSPropOptimizer( learning_rate=learning_rate).minimize(loss) with tf.name_scope("summary"): tf.summary.scalar("loss", loss) tf.summary.scalar("mean_value", tf.reduce_mean(value)) tf.summary.scalar("max_advantage", tf.reduce_max(advantage)) tf.summary.scalar("min_advantage", tf.reduce_min(advantage)) tf.summary.scalar("mean_target_q", tf.reduce_mean(self.target_q)) tf.summary.scalar("mean_pred_q", tf.reduce_mean(self.q_out)) self.summary_op = tf.summary.merge_all()
def inference(images, keep_probability, phase_train=True, bottleneck_layer_size=128, weight_decay=0.0, reuse=None): batch_norm_params = { # Decay for the moving averages. 'decay': 0.995, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # force in-place updates of mean and variance estimates 'updates_collections': None, # Moving averages ends up in the trainable variables collection 'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES], } width_multiplier = 0.25 with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=slim.xavier_initializer_conv2d(uniform=True), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with tf.variable_scope('mobilenet', [images], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=phase_train): net = slim.convolution2d(images, round(32 * width_multiplier), [3, 3], stride=2, padding='SAME', scope='conv_1') net = _depthwise_separable_conv(net, 64, width_multiplier, sc='conv_ds_2') net = _depthwise_separable_conv(net, 128, width_multiplier, downsample=True, sc='conv_ds_3') net = _depthwise_separable_conv(net, 128, width_multiplier, sc='conv_ds_4') net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_5') net = _depthwise_separable_conv(net, 256, width_multiplier, sc='conv_ds_6') net = _depthwise_separable_conv(net, 512, width_multiplier, downsample=True, sc='conv_ds_7') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_8') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_9') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_10') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_11') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_12') net = _depthwise_separable_conv(net, 1024, width_multiplier, downsample=True, sc='conv_ds_13') net = _depthwise_separable_conv(net, 1024, width_multiplier, sc='conv_ds_14') # net = slim.avg_pool2d(net, [7, 7], scope='avg_pool_15') net = tf.reduce_mean(net, [1, 2], name='avg_pool_15', keep_dims=True) net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='fc_16') sess = K.get_session() graph = sess.graph stats_graph(graph) return net, None
def mobilenet(inputs, net_id, emb_size=128, is_training=True, width_multiplier=1, scope='MobileNet'): """ MobileNet More detail, please refer to Google's paper(https://arxiv.org/abs/1704.04861). Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. scope: Optional scope for the variables. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, `num_classes`] end_points: a dictionary from components of the network to the corresponding activation. """ def _depthwise_separable_conv(inputs, num_pwc_filters, width_multiplier, sc, downsample=False): """ Helper function to build the depth-wise separable convolution layer. """ num_pwc_filters = round(num_pwc_filters * width_multiplier) _stride = 2 if downsample else 1 # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=_stride, depth_multiplier=1, kernel_size=[3, 3], scope=sc + '/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm') return bn inputs = tf.image.resize_images(inputs, [224, 224]) mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') inputs = inputs - mean print('---------------mobilenet--------------------') with tf.variable_scope('net_' + str(net_id), reuse=is_training) as sc: with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.00004)): end_points_collection = sc.name + '_end_points' with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], activation_fn=None, outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, activation_fn=tf.nn.relu, fused=True): net = slim.convolution2d(inputs, round(32 * width_multiplier), [3, 3], stride=2, padding='SAME', scope='conv_1') print(net) net = slim.batch_norm(net, scope='conv_1/batch_norm') net = _depthwise_separable_conv(net, 64, width_multiplier, sc='conv_ds_2') net = _depthwise_separable_conv(net, 128, width_multiplier, downsample=True, sc='conv_ds_3') print(net) net = _depthwise_separable_conv(net, 128, width_multiplier, sc='conv_ds_4') net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_5') print(net) net = _depthwise_separable_conv(net, 256, width_multiplier, sc='conv_ds_6') net = _depthwise_separable_conv(net, 512, width_multiplier, downsample=True, sc='conv_ds_7') print(net) net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_8') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_9') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_10') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_11') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_12') print(net) net = _depthwise_separable_conv(net, 1024, width_multiplier, downsample=True, sc='conv_ds_13') print(net) ################################## conv clm ################################### clm_emb = net #clm_emb = tf.pad(clm_emb, [[0, 0], [1, 0], [1, 0], [0, 0]]) clm_emb = clm.clm_enc(clm_emb, net_id, 128, stride=1, padding='SAME', is_training=is_training) clm_emb, shared_emb = clm.clm_shared(clm_emb, 128, padding='SAME', is_training=is_training) clm_emb = clm.clm_dec(clm_emb, net_id, 1024, stride=1, padding='SAME', is_training=is_training) net = clm_emb + net with tf.variable_scope('net_' + str(net_id), reuse=is_training) as sc: with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.00004)): end_points_collection = sc.name + '_end_points' with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], activation_fn=None, outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, activation_fn=tf.nn.relu, fused=True): net = _depthwise_separable_conv(net, 1024, width_multiplier, sc='conv_ds_14') net = slim.avg_pool2d(net, [7, 7], scope='avg_pool_15') print(net) end_points = slim.utils.convert_collection_to_dict( end_points_collection) net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') end_points['squeeze'] = net print(net) emb = slim.fully_connected(net, emb_size, activation_fn=None, scope='fc_16') print(emb) end_points['Logits'] = emb #predictions = slim.softmax(logits, scope='Predictions') #end_points['Predictions'] = predictions return emb, shared_emb, None
def discriminator(bottom, reuse=False): with tf.variable_scope('discriminator', reuse=reuse): initializer = tf.truncated_normal_initializer(stddev=0.02) dis1 = slim.convolution2d(bottom, 64, [5, 5], padding="SAME", stride=2, activation_fn=lrelu, reuse=reuse, scope='d_conv1', weights_initializer=initializer) # dis1 = tf.space_to_depth(dis1, 2) dis2 = slim.convolution2d(dis1, 128, [5, 5], padding="SAME", stride=2, normalizer_fn=slim.batch_norm, activation_fn=lrelu, reuse=reuse, scope='d_conv2', weights_initializer=initializer) # dis2 = tf.space_to_depth(dis2, 2) dis3 = slim.convolution2d(dis2, 256, [5, 5], padding="SAME", stride=2, normalizer_fn=slim.batch_norm, activation_fn=lrelu, reuse=reuse, scope='d_conv3', weights_initializer=initializer) # dis3 = tf.space_to_depth(dis3, 2) dis4 = slim.convolution2d(dis3, 512, [5, 5], padding="SAME", stride=2, normalizer_fn=slim.batch_norm, activation_fn=lrelu, reuse=reuse, scope='d_conv4', weights_initializer=initializer) ''' dis4 = slim.fully_connected(slim.flatten(dis4), 1024, activation_fn=lrelu, reuse=reuse, scope='d_fc1', weights_initializer=initializer) ''' d_out = slim.fully_connected(slim.flatten(dis4), 1, activation_fn=tf.nn.sigmoid, reuse=reuse, scope='d_out', weights_initializer=initializer) q_a = slim.fully_connected(slim.flatten(dis4), 128, normalizer_fn=slim.batch_norm, reuse=reuse, scope='q_a', weights_initializer=initializer) # Here we define the unique layers used for the q-network. The number of outputs depends on the number of # latent variables we choose to define. q_cont_outs1 = slim.fully_connected(q_a, 2, activation_fn=tf.nn.tanh, reuse=reuse, scope='q_out_cont1', weights_initializer=initializer) # q_cont_outs2 = slim.fully_connected(q_a, 2, activation_fn=tf.nn.tanh, reuse=reuse, scope='q_out_cont2', weights_initializer=initializer) return d_out, q_cont_outs1
def O_Net_new1(inputs, label=None, bbox_target=None, landmark_target=None, training=True): #define common param #is_training=training #dropout_keep_prob=0.8 #bottleneck_layer_size=1000, width_multiplier = 1 weight_decay = 0.0005 #reuse = None, batch_norm_params = { # Decay for the moving averages. 'decay': 0.995, # epsilon to prevent 0s in variance. 'epsilon': 0.001, #scale #'scale': True, # force in-place updates of mean and variance estimates 'updates_collections': None, # Moving averages ends up in the trainable variables collection 'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES], } def _depthwise_separable_conv(inputs, num_pwc_filters, width_multiplier, sc, downsample=False): """ Helper function to build the depth-wise separable convolution layer. """ num_pwc_filters = round(num_pwc_filters * width_multiplier) _stride = 2 if downsample else 1 # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=_stride, depth_multiplier=1, kernel_size=[3, 3], scope=sc + '/depthwise_conv') #padding='VALID') #print(depthwise_conv.get_shape()) #bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm') pointwise_conv = slim.convolution2d(depthwise_conv, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pointwise_conv', padding='SAME') #print(pointwise_conv.get_shape()) #bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm') return pointwise_conv with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], weights_initializer=slim.xavier_initializer_conv2d(), biases_initializer=slim.init_ops.zeros_initializer(), weights_regularizer=slim.l2_regularizer( weight_decay), #l1_regularizer? normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, #activation_fn = prelu, #outputs_collections=[end_points_collection], ): #padding = 'VALID'): with slim.arg_scope( [slim.batch_norm], is_training=training, ): print(inputs.get_shape()) net = slim.convolution2d(inputs, round(16 * width_multiplier), [3, 3], stride=1, padding='SAME', scope='conv_1') #net = _depthwise_separable_conv(inputs, 32, width_multiplier, sc='conv_ds_1') print(net.get_shape()) net = _depthwise_separable_conv(net, 32, width_multiplier, downsample=True, sc='conv_ds_2') print(net.get_shape()) net = _depthwise_separable_conv(net, 64, width_multiplier, downsample=True, sc='conv_ds_3') print(net.get_shape()) net = _depthwise_separable_conv(net, 64, width_multiplier, downsample=True, sc='conv_ds_4') print(net.get_shape()) net = _depthwise_separable_conv(net, 128, width_multiplier, downsample=True, sc='conv_ds_5') print(net.get_shape()) #net = _depthwise_separable_conv(net, 64, width_multiplier, sc='conv_ds_4') #print(net.get_shape()) net = slim.avg_pool2d(net, [3, 3], scope='avg_pool_15') print(net.get_shape()) #net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_6') #print(net.get_shape()) fc_flatten = tf.squeeze(net, [1, 2], name='SpatialSqueeze') print(fc_flatten.get_shape()) #fc_flatten = slim.flatten(net) #print (fc_flatten.get_shape()) fc1 = slim.fully_connected(fc_flatten, num_outputs=256 * width_multiplier, scope="fc1") print(fc1.get_shape()) #cls_prob = slim.conv2d(net,num_outputs=2,kernel_size=[1,1],stride=1,scope='conv4_1',activation_fn=tf.nn.softmax) cls_prob = slim.fully_connected(fc1, 2, activation_fn=tf.nn.softmax, scope='cls_fc') print(cls_prob.get_shape()) #train if training: cls_loss = cls_ohem(cls_prob, label) accuracy = cal_accuracy(cls_prob, label) L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, L2_loss, accuracy else: return cls_prob #,bbox_pred,landmark_pred
def __init__(self, h_size, rnn_cell, myScope): self.scalarInput = tf.placeholder(shape=[None, 5808], dtype=tf.float32) self.goal = tf.placeholder(shape=[None, 5808], dtype=tf.float32) self.scalarInputReshape = tf.reshape(self.scalarInput, shape=[-1, 44, 44, 3]) self.goalReshape = tf.reshape(self.goal, shape=[-1, 44, 44, 3]) self.input = tf.concat([self.goalReshape, self.scalarInputReshape], 3) with tf.variable_scope(myScope): self.conv1 = slim.convolution2d( \ inputs=self.input, num_outputs=32, \ kernel_size=[3, 3], stride=[4, 4], padding='VALID', \ biases_initializer=None, scope=myScope + '_critic_conv1') self.conv2 = slim.convolution2d( \ inputs=self.conv1, num_outputs=32, \ kernel_size=[3, 3], stride=[2, 2], padding='VALID', \ biases_initializer=None, scope=myScope + '_critic_conv2') self.conv3 = slim.convolution2d( \ inputs=self.conv2, num_outputs=32, \ kernel_size=[3, 3], stride=[1, 1], padding='VALID', \ biases_initializer=None, scope=myScope + '_critic_conv3') self.conv4 = slim.convolution2d( \ inputs=self.conv3, num_outputs=h_size, \ kernel_size=[3, 3], stride=[1, 1], padding='VALID', \ biases_initializer=None, scope=myScope + '_critic_conv4') self.trainLength = tf.placeholder(dtype=tf.int32) self.batch_size = tf.placeholder(dtype=tf.int32, shape=[]) self.convFlat = tf.reshape(slim.flatten( self.conv4), [self.batch_size, self.trainLength, h_size]) self.state_in = rnn_cell.zero_state(self.batch_size, tf.float32) self.rnn, self.rnn_state = tf.nn.dynamic_rnn( \ inputs=self.convFlat, cell=rnn_cell, dtype=tf.float32, initial_state=self.state_in, scope=myScope + '_rnn') self.rnn = tf.reshape(self.rnn, shape=[-1, h_size]) # The output from the recurrent player is then split into separate Value and Advantage streams self.streamA, self.streamV = tf.split(self.rnn, 2, 1) self.AW = tf.Variable(tf.random_normal([h_size // 2, 4])) self.VW = tf.Variable(tf.random_normal([h_size // 2, 1])) self.Advantage = tf.matmul(self.streamA, self.AW) self.Value = tf.matmul(self.streamV, self.VW) # Then combine them together to get our final Q-values. self.Qout = self.Value + tf.subtract( self.Advantage, tf.reduce_mean(self.Advantage, axis=1, keep_dims=True)) self.predict = tf.argmax(self.Qout, 1) # Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values. self.targetQ = tf.placeholder(shape=[None], dtype=tf.float32) self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, 4, dtype=tf.float32) self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis=1) self.td_error = tf.square(self.targetQ - self.Q) self.maskA = tf.zeros([self.batch_size, self.trainLength // 2]) self.maskB = tf.ones([self.batch_size, self.trainLength // 2]) self.mask = tf.concat([self.maskA, self.maskB], 1) self.mask = tf.reshape(self.mask, [-1]) self.loss = tf.reduce_mean(self.td_error * self.mask) self.trainer = tf.train.AdamOptimizer(learning_rate=0.0001) self.updateModel = self.trainer.minimize(self.loss) self.param = [ v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if myScope in v.name ]
def mobilenet_v3(inputs, classes_num, multiplier=1.0, is_training=True, type='small'): end_points = {} if type == 'small': layers = [ [16, 16, 3, 2, "RE", True, 16], [16, 24, 3, 2, "RE", False, 72], [24, 24, 3, 1, "RE", False, 88], [24, 40, 5, 2, "RE", True, 96], [40, 40, 5, 1, "RE", True, 240], [40, 40, 5, 1, "RE", True, 240], [40, 48, 5, 1, "HS", True, 120], [48, 48, 5, 1, "HS", True, 144], [48, 96, 5, 2, "HS", True, 288], [96, 96, 5, 1, "HS", True, 576], [96, 96, 5, 1, "HS", True, 576], ] else: layers = [ [16, 16, 3, 1, "RE", False, 16], [16, 24, 3, 2, "RE", False, 64], [24, 24, 3, 1, "RE", False, 72], [24, 40, 5, 2, "RE", True, 72], [40, 40, 5, 1, "RE", True, 120], [40, 40, 5, 1, "RE", True, 120], [40, 80, 3, 2, "HS", False, 240], [80, 80, 3, 1, "HS", False, 200], [80, 80, 3, 1, "HS", False, 184], [80, 80, 3, 1, "HS", False, 184], [80, 112, 3, 1, "HS", True, 480], [112, 112, 3, 1, "HS", True, 672], [112, 160, 5, 1, "HS", True, 672], [160, 160, 5, 2, "HS", True, 672], [160, 160, 5, 1, "HS", True, 960], ] batch_norm_params = { 'decay': 0.999, 'epsilon': 0.001, 'updates_collections': None, #tf.GraphKeys.UPDATE_OPS, 'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES], 'is_training': is_training } input_size = inputs.get_shape().as_list()[1:-1] assert ((input_size[0] % 32 == 0) and (input_size[1] % 32 == 0)) reduction_ratio = 4 x = slim.convolution2d(inputs, int(16 * multiplier), [3, 3], stride=2, activation_fn=hard_swish, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None) with tf.variable_scope("MobilenetV3"): for idx, (in_channels, out_channels, kernel_size, stride, activatation, se, exp_size) in enumerate(layers): in_channels = int(in_channels * multiplier) out_channels = int(out_channels * multiplier) exp_size = int(exp_size * multiplier) x = mobilenet_v3_block(x, [kernel_size, kernel_size], batch_norm_params, exp_size, out_channels, stride, "bneck{}".format(idx), is_training=is_training, shortcut=(in_channels == out_channels), activatation=activatation, ratio=reduction_ratio, se=se) end_points["bneck{}".format(idx)] = x if type == 'small': conv1_out = int(576 * multiplier) else: conv1_out = int(960 * multiplier) x = slim.convolution2d(x, conv1_out, [1, 1], stride=1, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, activation_fn=hard_swish) if type == 'small': x = _squeeze_excitation_layer(x, out_dim=conv1_out, ratio=reduction_ratio, is_training=is_training, reuse=None) end_points["conv1_out_1x1"] = x x = slim.avg_pool2d(x, x.get_shape()[1:-1], stride=1) #x = hard_swish(x) end_points["global_pool"] = x with tf.variable_scope('Logits_out'): conv2_out = int(1280 * multiplier) x = slim.convolution2d(x, conv2_out, [1, 1], stride=1, activation_fn=hard_swish) end_points["conv2_out_1x1"] = x x = slim.convolution2d(x, classes_num, [1, 1], stride=1, activation_fn=None) logits = tf.layers.flatten(x) logits = tf.identity(logits, name='output') end_points["Logits_out"] = logits return logits, end_points
def inverted_bottleneck(self, inputs, up_channel_rate, channels, stride, k_s=3, dilation=1.0, scope=""): with tf.variable_scope("inverted_bottleneck_%s" % scope): with slim.arg_scope([slim.batch_norm], decay=0.999, fused=True, is_training=self.is4Train): #stride = 2 if subsample else 1 output = slim.convolution2d( inputs, up_channel_rate * inputs.get_shape().as_list()[-1], stride=1, kernel_size=[1, 1], weights_initializer=self.init_xavier, biases_initializer=self.init_zero, activation_fn=tf.nn.relu6, normalizer_fn=slim.batch_norm, weights_regularizer=None, scope=scope + '_up_pointwise', trainable=self.is4Train) output = slim.separable_convolution2d( output, num_outputs=None, stride=stride, depth_multiplier=1, activation_fn=tf.nn.relu6, kernel_size=k_s, weights_initializer=self.init_xavier, weights_regularizer=self.l2_regularizer, biases_initializer=None, normalizer_fn=slim.batch_norm, rate=dilation, padding="SAME", scope=scope + '_depthwise', trainable=self.is4Train) output = slim.convolution2d( output, channels, stride=1, kernel_size=[1, 1], activation_fn=None, weights_initializer=self.init_xavier, biases_initializer=self.init_zero, normalizer_fn=slim.batch_norm, weights_regularizer=None, scope=scope + '_pointwise', trainable=self.is4Train) if inputs.get_shape().as_list()[1:] == output.get_shape( ).as_list()[1:]: output = tf.add(inputs, output) print("") return output
def create_ds_cnn_model(fingerprint_input, model_settings, model_size_info, is_training): """Builds a model with depthwise separable convolutional neural network Model definition is based on https://arxiv.org/abs/1704.04861 and Tensorflow implementation: https://github.com/Zehaos/MobileNet model_size_info: defines number of layers, followed by the DS-Conv layer parameters in the order {number of conv features, conv filter height, width and stride in y,x dir.} for each of the layers. Note that first layer is always regular convolution, but the remaining layers are all depthwise separable convolutions. """ def ds_cnn_arg_scope(weight_decay=0): """Defines the default ds_cnn argument scope. Args: weight_decay: The weight decay to use for regularizing the model. Returns: An `arg_scope` to use for the DS-CNN model. """ with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay)) as sc: return sc def _depthwise_separable_conv(inputs, num_pwc_filters, sc, kernel_size, stride): """ Helper function to build the depth-wise separable convolution layer. """ # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=stride, depth_multiplier=1, kernel_size=kernel_size, scope=sc + '/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm') return bn if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') label_count = model_settings['label_count'] input_frequency_size = model_settings['dct_coefficient_count'] input_time_size = model_settings['spectrogram_length'] fingerprint_4d = tf.reshape(fingerprint_input, [-1, input_time_size, input_frequency_size, 1]) t_dim = input_time_size f_dim = input_frequency_size #Extract model dimensions from model_size_info num_layers = model_size_info[0] conv_feat = [None] * num_layers conv_kt = [None] * num_layers conv_kf = [None] * num_layers conv_st = [None] * num_layers conv_sf = [None] * num_layers i = 1 for layer_no in range(0, num_layers): conv_feat[layer_no] = model_size_info[i] i += 1 conv_kt[layer_no] = model_size_info[i] i += 1 conv_kf[layer_no] = model_size_info[i] i += 1 conv_st[layer_no] = model_size_info[i] i += 1 conv_sf[layer_no] = model_size_info[i] i += 1 scope = 'DS-CNN' with tf.variable_scope(scope) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], activation_fn=None, weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, decay=0.96, updates_collections=None, activation_fn=tf.nn.relu): for layer_no in range(0, num_layers): if layer_no == 0: net = slim.convolution2d(fingerprint_4d, conv_feat[layer_no],\ [conv_kt[layer_no], conv_kf[layer_no]], stride=[conv_st[layer_no], conv_sf[layer_no]], padding='SAME', scope='conv_1') net = slim.batch_norm(net, scope='conv_1/batch_norm') else: net = _depthwise_separable_conv(net, conv_feat[layer_no], \ kernel_size = [conv_kt[layer_no],conv_kf[layer_no]], \ stride = [conv_st[layer_no],conv_sf[layer_no]], sc='conv_ds_'+str(layer_no)) t_dim = math.ceil(t_dim / float(conv_st[layer_no])) f_dim = math.ceil(f_dim / float(conv_sf[layer_no])) net = slim.avg_pool2d(net, [t_dim, f_dim], scope='avg_pool') net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') logits = slim.fully_connected(net, label_count, activation_fn=None, scope='fc1') if is_training: return logits, dropout_prob else: return logits
def inference(self, inputs, scope='MobileNetV2'): with slim.arg_scope(self.mobilenet_arg_scope()): with tf.variable_scope(scope): with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], activation_fn=None): # 先batch_norm,再ReLU,所以激活函数写在slim.batch_norm的参数空间中 with slim.arg_scope([slim.batch_norm], is_training=self._is_training, activation_fn=tf.nn.relu6, fused=True, decay=0.90): #一定要等到batch_norm的均值和方差稳定了,在测试时才能有高精度 #为了使得均值方差快速稳定,可以另batch_norm的decay变小,0.95不够就0.90甚至更小 net = slim.convolution2d(inputs, num_outputs=32, kernel_size=[3, 3], stride=2, padding='SAME', scope='conv_1') net = slim.batch_norm(net, scope="b1") net = self.bootleneck(net, 1, 16, 1, "bottleneck1") net = self.bootleneck(net, 6, 24, 2, "bottleneck2_1") net = self.bootleneck(net, 6, 24, 1, "bottleneck2_2") net = self.bootleneck(net, 6, 32, 2, "bottleneck3_1") net = self.bootleneck(net, 6, 32, 1, "bottleneck3_2") net = self.bootleneck(net, 6, 32, 1, "bottleneck3_3") net = self.bootleneck(net, 6, 64, 2, "bottleneck4_1") net = self.bootleneck(net, 6, 64, 1, "bottleneck4_2") net = self.bootleneck(net, 6, 64, 1, "bottleneck4_3") net = self.bootleneck(net, 6, 64, 1, "bottleneck4_4") net = self.bootleneck(net, 6, 96, 1, "bottleneck5_1") net = self.bootleneck(net, 6, 96, 1, "bottleneck5_2") net = self.bootleneck(net, 6, 96, 1, "bottleneck5_3") net = self.bootleneck(net, 6, 160, 2, "bottleneck6_1") net = self.bootleneck(net, 6, 160, 1, "bottleneck6_2") net = self.bootleneck(net, 6, 160, 1, "bottleneck6_3") net = self.bootleneck(net, 6, 320, 1, "bottleneck7_1") net = slim.convolution2d(net, num_outputs=1280, kernel_size=[3, 3], stride=1, scope='conv_2') net = slim.batch_norm(net, scope="b2") net = slim.avg_pool2d(net, kernel_size=[7, 7], stride=1) net = slim.convolution2d(net, num_outputs=self.num_classes, kernel_size=[3, 3], stride=1, scope='conv_3') logits = tf.reshape(net, shape=[-1, self.num_classes]) return logits
def __init__(self, h_size, rnn_cell, myScope): #处理输入数据 self.scalarInput = tf.placeholder(shape=[None, 21168], dtype=tf.float32) self.imageIn = tf.reshape(self.scalarInput, shape=[-1, 84, 84, 3]) #第一层卷积 self.conv1 = slim.convolution2d(inputs=self.imageIn, num_outputs=32, kernel_size=[8, 8], stride=[4, 4], padding='VALID', biases_initializer=None, scope=myScope + '_conv1') #第二层卷积 self.conv2 = slim.convolution2d(inputs=self.conv1, num_outputs=64, kernel_size=[4, 4], stride=[2, 2], padding='VALID', biases_initializer=None, scope=myScope + '_conv2') #第三层卷积 self.conv3 = slim.convolution2d(inputs=self.conv2, num_outputs=64, kernel_size=[3, 3], stride=[1, 1], padding='VALID', biases_initializer=None, scope=myScope + '_conv3') #第四层卷积 self.conv4 = slim.convolution2d(inputs=self.conv3, num_outputs=h_size, kernel_size=[7, 7], stride=[1, 1], padding='VALID', biases_initializer=None, scope=myScope + '_conv4') #训练长度,即回溯多少帧数据 self.trainLength = tf.placeholder(dtype=tf.int32) self.batch_size = tf.placeholder(dtype=tf.int32) self.convFlat = tf.reshape(slim.flatten(self.conv4), [self.batch_size, self.trainLength, h_size]) self.state_in = rnn_cell.zero_state(self.batch_size, tf.float32) self.rnn, self.rnn_state = tf.nn.dynamic_rnn( inputs=self.convFlat, cell=rnn_cell, dtype=tf.float32, initial_state=self.state_in, scope=myScope + '_rnn') #从循环神经网络中输出分成值函数和优势函数 self.streamA, self.streamV = tf.split(self.rnn, 2, 1) self.AW = tf.Variable(tf.random_normal([h_size // 2, 4])) self.VW = tf.Variable(tf.random_normal([h_size // 2, 1])) self.Advantage = tf.matmul(self.streamA, self.AW) self.Value = tf.matmul(self.streamV, self.VW) self.salience = tf.gradients(self.Advantage, self.imageIn) self.Qout = self.Value + tf.subtract( self.Advantage, tf.reduce_mean(self.Advantage, axis=1, keep_dims=True)) self.predict = tf.argmax(self.Qout, 1) #定义损失 self.targetQ = tf.placeholder(shape=[None], dtype=tf.float32) self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, 4, dtype=tf.float32) self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis=1) self.td_error = tf.square(self.targetQ - self.Q) self.maskA = tf.zeros([self.batch_size, self.trainLength // 2]) self.maskB = tf.ones([self.batch_size, self.trainLength // 2]) self.mask = tf.concat([self.maskA, self.maskB], 1) self.mask = tf.reshape(self.mask, [-1]) self.loss = tf.reduce_mean(self.td_error * self.mask) self.trainer = tf.train.AdamOptimizer(learning_rate=0.0001) self.updateModel = self.trainer.minimize(self.loss)
def lookup_conv2d(tensor_in, num_outputs, kernel_size, stride, dict_size, padding=1, param_lambda=0.3, initial_sparsity=None, activation_fn=None, biases_initializer=slim.init_ops.zeros_initializer()): if not initial_sparsity: initial_sparsity = 0.5 if isinstance(kernel_size, int): kernel_size = [kernel_size, kernel_size] if isinstance(stride, int): stride = [stride, stride] sparse_th = initial_sparsity / math.sqrt( kernel_size[0] * kernel_size[1] * dict_size) stddev = 1. / math.sqrt(kernel_size[0] * kernel_size[1] * dict_size) padded = tf.pad(tensor_in, [[0, 0], [padding, padding], [padding, padding], [0, 0]], "CONSTANT") pool_conv = slim.convolution2d(inputs=padded, num_outputs=dict_size, kernel_size=[1, 1], stride=1, padding='SAME', activation_fn=None, biases_initializer=None, scope='pool_conv') scope = tf.get_default_graph().get_name_scope() gen_sparse_conv = False if len(dense_weights.keys()) > 0: kernel_dense = dense_weights['%s/%s' % (scope, 'kernel_dense')] density = np.count_nonzero(kernel_dense) / kernel_dense.size if density < 0.15: gen_sparse_conv = True # activation for kernel weight if gen_sparse_conv: dense_kernel_shp = dense_weights['%s/%s' % (scope, 'kernel_shape')] dense_kernel_idx = dense_weights['%s/%s' % (scope, 'kernel')].indices dense_kernel_val = dense_weights['%s/%s' % (scope, 'kernel')].values dense_bias = tf.constant(dense_weights['%s/%s' % (scope, 'bias')]) mode = 'custom_op' if mode == 'tf_op': # sparse convolution using only tensorflow's operations. -- SLOW! # im2col - image patche matrix img2col = tf.extract_image_patches( pool_conv, [1, kernel_size[0], kernel_size[1], 1], [1, stride[0], stride[1], 1], [1, 1, 1, 1], 'VALID') img2col = tf.transpose(img2col, [0, 3, 1, 2]) img2col_shape = img2col.get_shape().as_list() img2col = tf.reshape( img2col, [img2col_shape[1], img2col_shape[2] * img2col_shape[3]]) # sparse kernel & bias sparse_kernel = tf.SparseTensor(dense_kernel_idx, dense_kernel_val, dense_kernel_shp) # multiplication matmul = tf.sparse_tensor_dense_matmul(sparse_kernel, img2col) matmul = tf.transpose(matmul) matmul = tf.reshape( matmul, [1, img2col_shape[2], img2col_shape[3], dense_kernel_shp[0]]) # bias & activation output = tf.nn.bias_add( matmul, dense_bias) if dense_bias is not None else matmul output = tf.nn.relu(output) return output elif mode == 'custom_op': conv = sparse_conv2d_m.sparse_conv2d(pool_conv, dense_kernel_idx, dense_kernel_val, dense_shape=dense_kernel_shp, strides=stride) output = tf.nn.bias_add( conv, dense_bias) if dense_bias is not None else conv output = tf.nn.relu(output) return output else: raise else: # dense convolution align_conv, layer = lookupalign_conv( inputs=pool_conv, filters=num_outputs, kernel_size=kernel_size, strides=(stride[0], stride[1]), padding='valid', param_lambda=param_lambda * sparse_th, sparse_th=sparse_th, activation=activation_fn, kernel_initializer=tf.random_uniform_initializer( -1 * stddev, stddev), bias_initializer=biases_initializer, name='align_conv') scope = tf.get_default_graph().get_name_scope() dense_layers[scope] = layer return align_conv
def __init__(self, h_size, rnn_cell, myScope): # Network receives a game frame flattened into an array. # It resizes the frame and processes it through 4 conv layers. self.scalarInput = tf.placeholder(shape = [None, 21168], dtype = tf.float32) self.imageIn = tf.reshape(self.scalarInput, shape = [-1, 84, 84, 3]) self.conv1 = slim.convolution2d(inputs = self.imageIn, num_outputs = 32, kernel_size = [8, 8], stride = [4, 4], padding = 'VALID', biases_initializer = None, scope = myScope + '_conv1') self.conv2 = slim.convolution2d(inputs = self.conv1, num_outputs = 64, kernel_size = [4, 4], stride = [2, 2], padding = 'VALID', biases_initializer = None, scope = myScope + '_conv2') self.conv3 = slim.convolution2d(inputs = self.conv2, num_outputs = 64, kernel_size = [3, 3], stride = [1, 1], padding = 'VALID', biases_initializer = None, scope = myScope + '_conv3') self.conv4 = slim.convolution2d(inputs = self.conv3, num_outputs = h_size, kernel_size = [7, 7], stride = [1, 1], padding = 'VALID', biases_initializer = None, scope = myScope + '_conv4') # We use consecutive frames to understand temporal dependencies. This is the # of consecutive frames. self.trainLength = tf.placeholder(dtype = tf.int32) # Final conv layer output is sent to recurrent layer. # A trace is a sequence of experiences from within an episode. # We use consecutive frames to understand temporal dependencies. # Input must be reshaped into [batch x trace x units] for RNN processing, # and then returned to [batch x units*] when sent through the upper levels. # Reshaping must be done because that's what the "tf.nn.dynamic_rnn" function accepts. # I guess we change back so each frame can be evaluated for value and action computation. self.batch_size = tf.placeholder(dtype = tf.int32, shape = []) # "flatten" converts "conv4" from (?, 1, 1, h_size) to (?, h_size). # Reshaping must be done because that's what the "tf.nn.dynamic_rnn" function accepts. self.convFlat = tf.reshape(slim.flatten(self.conv4), [self.batch_size, self.trainLength, h_size]) # Return zero-filled state tensor (initial state) # "state_in" is actually 2 Tensors. I think one is for the output and the other is for the hidden state. self.state_in = rnn_cell.zero_state(self.batch_size, tf.float32) # Feed "self.convFlat" into "rnn_cell". "rnn_cell" has initial state "self.state_in". # "self.rnn" / "self.rnn_state" is RNN output / final state. self.rnn, self.rnn_state = tf.nn.dynamic_rnn(inputs = self.convFlat, cell = rnn_cell, dtype = tf.float32, initial_state = self.state_in, scope = myScope + '_rnn') # I guess we change back so each frame can be evaluated for value and action computation self.rnn = tf.reshape(self.rnn, shape = [-1, h_size]) # Split recurrent layer output into value and advantage streams self.streamA, self.streamV = tf.split(self.rnn, 2, 1) self.AW = tf.Variable(tf.random_normal([h_size // 2, 4])) # Create variables w/ values initialized by random normal distribution self.VW = tf.Variable(tf.random_normal([h_size // 2, 1])) # "streamA"/"streamV" are relevant frame + RNN info to find action/state value. # Multiply them w/ weights "AW"/"VW" to get (predicted) action/state value. self.Advantage = tf.matmul(self.streamA, self.AW) self.Value = tf.matmul(self.streamV, self.VW) # Derivative of sum of "Advantage" wrt to each pixel in each image in "imageIn". # This is the only mention of "salience" in this script! See "helper.py" for more # "salience" has dimensions: (?, 84, 84, 3) # "salience" gives an idea of what pixels contribute the most to changing "Advantage". # A large gradient for a pixel indicates that pixel changing would change "Advantage" a lot. # A 0 gradient for a pixel indicates that pixel doesn't really matter for "Advantage". # See "https://raghakot.github.io/keras-vis/visualizations/saliency/" for more info self.salience = tf.gradients(self.Advantage, self.imageIn) # Get final Q-values. Add state value to relative action advantage. # For each action, subtract the average action value. That leaves the relative value of each action. self.Qout = self.Value + tf.subtract(self.Advantage, tf.reduce_mean(self.Advantage, axis = 1, keepdims = True)) self.predict = tf.argmax(self.Qout, 1) # Get loss by taking sum of squares difference between target and predicted Q-values self.targetQ = tf.placeholder(shape = [None], dtype = tf.float32) self.actions = tf.placeholder(shape = [None], dtype = tf.int32) self.actions_onehot = tf.one_hot(self.actions, 4, dtype = tf.float32) # These are predicted Q-values for chosen actions. # The inner "multiply" keeps the Q-values only for actions that were taken, because of the "actions_onehot" term. # "reduce_sum" reduces dimensions by removing zero terms. self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis = 1) self.td_error = tf.square(self.targetQ - self.Q) # To only propogate accurate gradients through the network, mask 1st half of # losses for each trace as per Lample & Chatlot 2016. # A trace is a sequence of experiences from within an episode. # We use consecutive frames to understand temporal dependencies. # Research has shown only sending the last half of gradients improves performance # by only sending more meaningful info through the network. # Reminds me of dropout. # See blog post for more info: https://medium.com/emergent-future/simple-reinforcement-learning-with-tensorflow-part-6-partial-observability-and-deep-recurrent-q-68463e9aeefc self.maskA = tf.zeros([self.batch_size, self.trainLength // 2]) self.maskB = tf.ones([self.batch_size, self.trainLength // 2]) self.mask = tf.concat([self.maskA, self.maskB], 1) self.mask = tf.reshape(self.mask, [-1]) # Make mask 1D self.loss = tf.reduce_mean(self.td_error * self.mask) self.trainer = tf.train.AdamOptimizer(learning_rate = 0.0001) self.updateModel = self.trainer.minimize(self.loss)
def squeeze(self, inputs, output_channels): return slim.convolution2d(inputs, num_outputs=output_channels, kernel_size=1, stride=1, scope="squeeze")
def mobilenet(inputs, width_multiplier=1, scope=None, is_training=True): def _depthwise_separable_conv(inputs, num_pwc_filters, width_multiplier, sc, downsample=False): # helper function to build the depthwise separable convolution layer. num_pwc_filters = round(num_pwc_filters * width_multiplier) _stride = 2 if downsample else 1 # skip pointwise by setting num_outputs = None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=_stride, depth_multiplier=1, kernel_size=[3, 3], scope=sc + '/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm') return bn with tf.variable_scope(scope) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope([slim.convolution2d, slim.separable_convolution2d], activation_fn=None, outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, activation_fn=tf.nn.relu): net = slim.convolution2d(inputs, round(32 * width_multiplier), [3, 3], stride=2, padding='SAME', scope='conv_1') net = slim.batch_norm(net, scope='conv_1/batch_norm') net = _depthwise_separable_conv(net, 64, width_multiplier, sc='conv_ds_2') net = _depthwise_separable_conv(net, 128, width_multiplier, downsample=True, sc='conv_ds_3') net = _depthwise_separable_conv(net, 128, width_multiplier, sc='conv_ds_4') net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_5') net = _depthwise_separable_conv(net, 256, width_multiplier, sc='conv_ds_6') net = _depthwise_separable_conv(net, 512, width_multiplier, downsample=True, sc='conv_ds_7') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_8') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_9') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_10') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_11') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_12') net = _depthwise_separable_conv(net, 1024, width_multiplier, downsample=True, sc='conv_ds_13') net = _depthwise_separable_conv(net, 1024, width_multiplier, sc='conv_ds_14') net = slim.avg_pool2d(net, [7, 7], scope='avg_pool_15') end_points = slim.utils.convert_collection_to_dict( end_points_collection) net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') end_points['squeeze'] = net return net, end_points
def __init__(self, h_size, a_size, action_h_size, rnn_cell, scopeName, discount=0.99): self.h_size, self.a_size, self.discount = h_size, a_size, discount self.scalarInput = tf.placeholder(shape=[None, 7056], dtype=tf.uint8, name='frameInput') self.batch_size = tf.placeholder(dtype=tf.int32, shape=[], name='batchSize') self.trainLength = tf.placeholder(dtype=tf.int32, shape=[], name='trainLength') self.actionsInput = tf.placeholder(shape=[None], dtype=tf.int32, name='actionsInput') self.actionsInputOnehot = tf.one_hot(self.actionsInput, a_size) self.actionsInputWeights = tf.Variable( tf.random_normal([a_size, action_h_size])) self.actionsInputProjected = tf.matmul(self.actionsInputOnehot, self.actionsInputWeights) self.frameShape = tf.constant((84, 84, 1), dtype=tf.int32) # self.frames = tf.reshape(self.scalarInput, tf.concat(([self.batch_size*self.trainLength], self.frameShape), 0)) self.frames = tf.reshape(self.scalarInput / 255, [-1, 84, 84, 1]) self.conv1 = slim.convolution2d(inputs=self.frames, num_outputs=32, kernel_size=(8, 8), stride=(4, 4), padding='VALID', biases_initializer=None, scope=scopeName + '_conv1') self.conv2 = slim.convolution2d(inputs=self.conv1, num_outputs=64, kernel_size=(4, 4), stride=(2, 2), padding='VALID', biases_initializer=None, scope=scopeName + '_conv2') self.conv3 = slim.convolution2d(inputs=self.conv2, num_outputs=64, kernel_size=(3, 3), stride=(1, 1), padding='VALID', biases_initializer=None, scope=scopeName + '_conv3') self.conv4 = slim.convolution2d(inputs=self.conv3, num_outputs=h_size, kernel_size=(7, 7), stride=(1, 1), padding='VALID', biases_initializer=None, scope=scopeName + '_conv4') # self.convFlat = tf.reshape( # slim.flatten(self.conv4), [self.batch_size, self.trainLength, h_size]) # print(self.convFlat.shape) self.rnnInput = tf.concat([ tf.reshape(slim.flatten(self.conv4), [self.batch_size, self.trainLength, h_size]), tf.reshape(self.actionsInputProjected, [self.batch_size, self.trainLength, action_h_size]) ], 2) print(self.rnnInput.shape) self.state_init = rnn_cell.zero_state(self.batch_size, tf.float32) self.rnn, self.rnn_state = tf.nn.dynamic_rnn( inputs=self.rnnInput, cell=rnn_cell, dtype=tf.float32, initial_state=self.state_init, scope=scopeName + '_rnn') self.rnn = tf.reshape(self.rnn, shape=[-1, h_size]) self.streamA, self.streamV = tf.split(self.rnn, 2, axis=1) print(self.streamA.shape) self.AW = tf.Variable(tf.random_normal([h_size // 2, a_size])) self.VW = tf.Variable(tf.random_normal([h_size // 2, 1])) self.A = tf.matmul(self.streamA, self.AW) self.V = tf.matmul(self.streamV, self.VW) self.salience = tf.gradients(self.A, self.scalarInput) self.Qout = self.V + \ (self.A - tf.reduce_mean(self.A, axis=1, keepdims=True)) self.predict = tf.argmax(self.Qout, 1) self.action = self.predict[-1] # self.targetQ = tf.placeholder(shape=[None], dtype=tf.float32) self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, a_size, dtype=tf.float32) self.Q = tf.reduce_sum(self.Qout * self.actions_onehot, reduction_indices=1) self.sample_terminals = tf.placeholder(tf.int32, shape=[None], name='sample_terminals') end_multiplier = tf.cast(-(self.sample_terminals - 1), tf.float32) self.sample_rewards = tf.placeholder(tf.float32, shape=[None], name='sample_rewards') self.doubleQ = tf.placeholder(tf.float32, shape=(None), name='doubleQ') self.targetQ = self.sample_rewards + self.discount * self.doubleQ * end_multiplier # only train on first half of every trace per Lample & Chatlot 2016 self.mask = tf.concat( (tf.zeros((self.batch_size, self.trainLength // 2)), tf.ones((self.batch_size, self.trainLength // 2))), 1) self.mask = tf.reshape(self.mask, [-1]) self.loss = tf.losses.huber_loss(self.Q * self.mask, self.targetQ * self.mask) if scopeName == 'main': tf.summary.scalar('loss', self.loss) tf.summary.histogram('Q', self.Qout) tf.summary.histogram('hidden', self.rnn_state) self.trainer = tf.train.RMSPropOptimizer(0.00025, momentum=0.95, epsilon=0.01) self.updateModel = self.trainer.minimize(self.loss)
def __init__(self, h_size, rnn_cell, myScope): # Network recieves a game frame, flattened # Resize and processes through 4 convolutional layers self.scalarInput = tf.placeholder(shape=[None, 21168], dtype=tf.float32) self.imageIn = tf.reshape(self.scalarInput, shape=[-1, 84, 84, 3]) self.conv1 = slim.convolution2d(inputs=self.imageIn, num_outputs=32, kernel_size=[8, 8], stride=[4, 4], padding='VALID', biases_initializer=None, scope=myScope + '_conv1') self.conv2 = slim.convolution2d(inputs=self.conv1, num_outputs=64, kernel_size=[4, 4], stride=[2, 2], padding='VALID', biases_initializer=None, scope=myScope + '_conv2') self.conv3 = slim.convolution2d(inputs=self.conv2, num_outputs=64, kernel_size=[3, 3], stride=[1, 1], padding='VALID', biases_initializer=None, scope=myScope + '_conv3') self.conv4 = slim.convolution2d(inputs=self.conv3, num_outputs=h_size, kernel_size=[7, 7], stride=[1, 1], padding='VALID', biases_initializer=None, scope=myScope + '_conv4') self.trainLength = tf.placeholder(dtype=tf.int32) # We take the output from final conv layer and send to recurrent layer # Input must be shaped into [batch x trace x units] for rnn processing and returned to [batch x units] when sent thorugh uppers self.batch_size = tf.placeholder(dtype=tf.int32, shape=[]) self.convFlat = tf.reshape(slim.flatten(self.conv4), [self.batch_size, self.trainLength, h_size]) self.state_in = rnn_cell.zero_state(self.batch_size, tf.float32) self.rnn, self.rnn_state = tf.nn.dynamic_rnn( inputs=self.convFlat, cell=rnn_cell, dtype=tf.float32, initial_state=self.state_in, scope=myScope + '_rnn') self.rnn = tf.reshape(self.rnn, shape=[-1, h_size]) # The output from the recurrent player is split into Value and Advantage streams self.streamA, self.streamV = tf.split(self.rnn, 2, 1) self.AW = tf.Variable(tf.random_normal([h_size // 2, 4])) self.VW = tf.Variable(tf.random_normal([h_size // 2, 1])) self.Advantage = tf.matmul(self.streamA, self.AW) self.Value = tf.matmul(self.streamV, self.VW) self.salience = tf.gradients(self.Advantage, self.imageIn) # Then combine them together for our final Q-value self.Qout = self.Value + tf.subtract( self.Advantage, tf.reduce_mean(self.Advantage, axis=1, keep_dims=True)) self.predict = tf.argmax(self.Qout, 1) # Obtain loss by taking sum of squares difference between the target and predict Q values self.targetQ = tf.placeholder(shape=[None], dtype=tf.float32) self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, 4, dtype=tf.float32) self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis=1) self.td_error = tf.square(self.targetQ - self.Q) # in order to only propogate accurate gradients, we mask the first half of the losses for each trace self.maskA = tf.zeros([self.batch_size, self.trainLength // 2]) self.maskB = tf.ones([self.batch_size, self.trainLength // 2]) self.mask = tf.concat([self.maskA, self.maskB], 1) self.mask = tf.reshape(self.mask, [-1]) self.loss = tf.reduce_mean(self.td_error * self.mask) self.trainer = tf.train.AdamOptimizer(learning_rate=0.0001) self.updateModel = self.trainer.minimize(self.loss)
def __init__(self): action_input = Input(shape=[None, 1]) state_input = Input(shape=[None, 224, 224, 3]) value_dcn = dcn_resnet() policy_dcn = dcn_resnet() # 변수 공유? 다른 변수? value_lstm = CuDNNLSTM(256)(value_dcn, state_input) # state-value : expected return policy_lstm = CuDNNLSTM(256)(policy_dcn) # policy : agent's action selection self.value_model = Dense(1, activation='relu')(value_lstm) self.policy_model = Dense(1, activation='relu')(policy_lstm) self.action_max = 2 self.conv1 = slim.conv2d(self.input_image, activation_fn=tf.nn.relu, num_outputs=32, kernel_size=[8, 8], stride=[4, 4], padding='VALID') self.conv2 = slim.conv2d(self.conv1, activation_fn=tf.nn.relu, num_outputs=64, kernel_size=[4, 4], stride=[2, 2], padding='VALID') self.conv3 = slim.convolution2d( inputs=self.conv2, num_outputs=64, kernel_size=[3, 3], stride=[1, 1], padding='VALID', activation_fn=tf.nn.relu) self.conv4 = slim.convolution2d( inputs=self.conv3, num_outputs=256, kernel_size=[7, 7], stride=[1, 1], padding='VALID', activation_fn=tf.nn.relu) hidden = slim.fully_connected(slim.flatten(self.conv4), 256, activation_fn=tf.nn.relu) # temporal dependency lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(256, reuse=tf.AUTO_REUSE) c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) # self.state_in = (c_in, h_in) self.rnn_in = tf.expand_dims(hidden, [0]) # step_size = tf.shape(self.imageIn[:1]) # 84 84 3 self.state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in) # c --> hidden, h --> output lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn(lstm_cell, self.rnn_in, initial_state=self.state_in, time_major=False, scope="A3C") lstm_c, lstm_h = self.lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_outputs, [-1, 256]) # self.policy = slim.fully_connected(rnn_out, a_size, # activation_fn=tf.nn.relu, # weights_initializer=normalized_columns_initializer(0.01), # biases_initializer=None) # # hidden1 = tf.layers.dense(rnn_out, 16, activation=tf.nn.relu) # hidden2 = tf.layers.dense(hidden1, 16, activation=tf.nn.relu) # hidden3 = tf.layers.dense(hidden2, 16, activation=tf.nn.relu) self.policy = tf.layers.dense(rnn_out, 9, activation=tf.nn.relu) self.policy = tf.nn.softmax(self.policy) self.value = slim.fully_connected(rnn_out, 1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None) self.true_val = tf.placeholder(tf.int32, shape=[9]) # self.error = tf.reduce_mean(tf.square(self.true_val - self.policy)) # self.train_op = tf.train.AdamOptimizer(0.001) self.error = tf.nn.softmax_cross_entropy_with_logits(labels=self.true_val, logits=self.policy) # self.error = tf.reduce_mean(tf.square(tf.subtract(self.true_val, self.policy))) self.train_op = tf.train.AdamOptimizer(0.01).minimize(self.error) self.saver = tf.train.Saver()
def create_ds_cnn_model(fingerprint_input, model_settings, model_size_info, is_training): """Builds a model with depthwise separable convolutional neural network Model definition is based on https://arxiv.org/abs/1704.04861 and Tensorflow implementation: https://github.com/Zehaos/MobileNet model_size_info: defines number of layers, followed by the DS-Conv layer parameters in the order {number of conv features, conv filter height, width and stride in y,x dir.} for each of the layers. Note that first layer is always regular convolution, but the remaining layers are all depthwise separable convolutions. """ def ds_cnn_arg_scope(weight_decay=0): """Defines the default ds_cnn argument scope. Args: weight_decay: The weight decay to use for regularizing the model. Returns: An `arg_scope` to use for the DS-CNN model. """ with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay)) as sc: return sc def _depthwise_separable_conv(inputs, num_pwc_filters, sc, kernel_size, stride): """ Helper function to build the depth-wise separable convolution layer. """ # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=stride, depth_multiplier=1, kernel_size=kernel_size, scope=sc+'/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc+'/dw_batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc+'/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc+'/pw_batch_norm') return bn if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') label_count = model_settings['label_count'] input_frequency_size = model_settings['dct_coefficient_count'] input_time_size = model_settings['spectrogram_length'] fingerprint_4d = tf.reshape(fingerprint_input, [-1, input_time_size, input_frequency_size, 1]) t_dim = input_time_size f_dim = input_frequency_size #Extract model dimensions from model_size_info num_layers = model_size_info[0] conv_feat = [None]*num_layers conv_kt = [None]*num_layers conv_kf = [None]*num_layers conv_st = [None]*num_layers conv_sf = [None]*num_layers i=1 for layer_no in range(0,num_layers): conv_feat[layer_no] = model_size_info[i] i += 1 conv_kt[layer_no] = model_size_info[i] i += 1 conv_kf[layer_no] = model_size_info[i] i += 1 conv_st[layer_no] = model_size_info[i] i += 1 conv_sf[layer_no] = model_size_info[i] i += 1 scope = 'DS-CNN' with tf.variable_scope(scope) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope([slim.convolution2d, slim.separable_convolution2d], activation_fn=None, weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, decay=0.96, updates_collections=None, activation_fn=tf.nn.relu): for layer_no in range(0,num_layers): if layer_no==0: net = slim.convolution2d(fingerprint_4d, conv_feat[layer_no],\ [conv_kt[layer_no], conv_kf[layer_no]], stride=[conv_st[layer_no], conv_sf[layer_no]], padding='SAME', scope='conv_1') net = slim.batch_norm(net, scope='conv_1/batch_norm') else: net = _depthwise_separable_conv(net, conv_feat[layer_no], \ kernel_size = [conv_kt[layer_no],conv_kf[layer_no]], \ stride = [conv_st[layer_no],conv_sf[layer_no]], sc='conv_ds_'+str(layer_no)) t_dim = math.ceil(t_dim/float(conv_st[layer_no])) f_dim = math.ceil(f_dim/float(conv_sf[layer_no])) net = slim.avg_pool2d(net, [t_dim, f_dim], scope='avg_pool') net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') logits = slim.fully_connected(net, label_count, activation_fn=None, scope='fc1') if is_training: return logits, dropout_prob else: return logits
def mobilenet_v2(input, weight_decay, batch_norm_params): features = {} with tf.variable_scope('Mobilenet'): with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], \ activation_fn=tf.nn.relu6,\ weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='SAME'): print('Mobilnet input shape({}): {}'.format( input.name, input.get_shape())) # 96*96*3 112*112*3 conv_1 = slim.convolution2d(input, 32, [3, 3], stride=2, scope='conv_1') print(conv_1.name, conv_1.get_shape()) # 48*48*32 56*56*32 conv2_1 = slim.separable_convolution2d(conv_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv2_1/dwise') print(conv2_1.name, conv2_1.get_shape()) conv2_1 = slim.convolution2d(conv2_1, 16, [1, 1], stride=1, activation_fn=None, scope='conv2_1/linear') print(conv2_1.name, conv2_1.get_shape()) features['feature2'] = conv2_1 # 48*48*16 56*56*16 conv3_1 = slim.convolution2d(conv2_1, 96, [1, 1], stride=1, scope='conv3_1/expand') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.separable_convolution2d(conv3_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_1/dwise') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.convolution2d(conv3_1, 24, [1, 1], stride=1, activation_fn=None, scope='conv3_1/linear') print(conv3_1.name, conv3_1.get_shape()) conv3_2 = slim.convolution2d(conv3_1, 144, [1, 1], stride=1, scope='conv3_2/expand') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.separable_convolution2d(conv3_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_2/dwise') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.convolution2d(conv3_2, 24, [1, 1], stride=1, activation_fn=None, scope='conv3_2/linear') print(conv3_2.name, conv3_2.get_shape()) block_3_2 = conv3_1 + conv3_2 print(block_3_2.name, block_3_2.get_shape()) features['feature3'] = block_3_2 features['pfld'] = block_3_2 # 24*24*24 28*28*24 conv4_1 = slim.convolution2d(block_3_2, 144, [1, 1], stride=1, scope='conv4_1/expand') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.separable_convolution2d(conv4_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_1/dwise') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.convolution2d(conv4_1, 32, [1, 1], stride=1, activation_fn=None, scope='conv4_1/linear') print(conv4_1.name, conv4_1.get_shape()) conv4_2 = slim.convolution2d(conv4_1, 192, [1, 1], stride=1, scope='conv4_2/expand') print(conv4_2.name, conv4_2.get_shape()) conv4_2 = slim.separable_convolution2d(conv4_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_2/dwise') print(conv4_2.name, conv4_2.get_shape()) conv4_2 = slim.convolution2d(conv4_2, 32, [1, 1], stride=1, activation_fn=None, scope='conv4_2/linear') print(conv4_2.name, conv4_2.get_shape()) block_4_2 = conv4_1 + conv4_2 print(block_4_2.name, block_4_2.get_shape()) conv4_3 = slim.convolution2d(block_4_2, 192, [1, 1], stride=1, scope='conv4_3/expand') print(conv4_3.name, conv4_3.get_shape()) conv4_3 = slim.separable_convolution2d(conv4_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_3/dwise') print(conv4_3.name, conv4_3.get_shape()) conv4_3 = slim.convolution2d(conv4_3, 32, [1, 1], stride=1, activation_fn=None, scope='conv4_3/linear') print(conv4_3.name, conv4_3.get_shape()) block_4_3 = block_4_2 + conv4_3 print(block_4_3.name, block_4_3.get_shape()) # 12*12*32 14*14*32 features['feature4'] = block_4_3 conv5_1 = slim.convolution2d(block_4_3, 192, [1, 1], stride=1, scope='conv5_1/expand') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.separable_convolution2d(conv5_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_1/dwise') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.convolution2d(conv5_1, 64, [1, 1], stride=1, activation_fn=None, scope='conv5_1/linear') print(conv5_1.name, conv5_1.get_shape()) conv5_2 = slim.convolution2d(conv5_1, 384, [1, 1], stride=1, scope='conv5_2/expand') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.separable_convolution2d(conv5_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_2/dwise') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.convolution2d(conv5_2, 64, [1, 1], stride=1, activation_fn=None, scope='conv5_2/linear') print(conv5_2.name, conv5_2.get_shape()) block_5_2 = conv5_1 + conv5_2 print(block_5_2.name, block_5_2.get_shape()) conv5_3 = slim.convolution2d(block_5_2, 384, [1, 1], stride=1, scope='conv5_3/expand') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.separable_convolution2d(conv5_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_3/dwise') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.convolution2d(conv5_3, 64, [1, 1], stride=1, activation_fn=None, scope='conv5_3/linear') print(conv5_3.name, conv5_3.get_shape()) block_5_3 = block_5_2 + conv5_3 print(block_5_3.name, block_5_3.get_shape()) conv5_4 = slim.convolution2d(block_5_3, 384, [1, 1], stride=1, scope='conv5_4/expand') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.separable_convolution2d(conv5_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_4/dwise') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.convolution2d(conv5_4, 64, [1, 1], stride=1, activation_fn=None, scope='conv5_4/linear') print(conv5_4.name, conv5_4.get_shape()) block_5_4 = block_5_3 + conv5_4 print(block_5_4.name, block_5_4.get_shape()) # 6*6*64 7*7*64 conv6_1 = slim.convolution2d(block_5_4, 384, [1, 1], stride=1, scope='conv6_1/expand') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.separable_convolution2d(conv6_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_1/dwise') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.convolution2d(conv6_1, 96, [1, 1], stride=1, activation_fn=None, scope='conv6_1/linear') print(conv6_1.name, conv6_1.get_shape()) conv6_2 = slim.convolution2d(conv6_1, 576, [1, 1], stride=1, scope='conv6_2/expand') print(conv6_2.name, conv6_2.get_shape()) conv6_2 = slim.separable_convolution2d(conv6_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_2/dwise') print(conv6_2.name, conv6_2.get_shape()) conv6_2 = slim.convolution2d(conv6_2, 96, [1, 1], stride=1, activation_fn=None, scope='conv6_2/linear') print(conv6_2.name, conv6_2.get_shape()) block_6_2 = conv6_1 + conv6_2 print(block_6_2.name, block_6_2.get_shape()) conv6_3 = slim.convolution2d(block_6_2, 576, [1, 1], stride=1, scope='conv6_3/expand') print(conv6_3.name, conv6_3.get_shape()) conv6_3 = slim.separable_convolution2d(conv6_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_3/dwise') print(conv6_3.name, conv6_3.get_shape()) conv6_3 = slim.convolution2d(conv6_3, 96, [1, 1], stride=1, activation_fn=None, scope='conv6_3/linear') print(conv6_3.name, conv6_3.get_shape()) block_6_3 = block_6_2 + conv6_3 print(block_6_3.name, block_6_3.get_shape()) features['feature5'] = block_6_3 # 6*6*96 7*7*96 conv7_1 = slim.convolution2d(block_6_3, 576, [1, 1], stride=1, scope='conv7_1/expand') print(conv7_1.name, conv7_1.get_shape()) conv7_1 = slim.separable_convolution2d(conv7_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_1/dwise') print(conv7_1.name, conv7_1.get_shape()) conv7_1 = slim.convolution2d(conv7_1, 160, [1, 1], stride=1, activation_fn=None, scope='conv7_1/linear') print(conv7_1.name, conv7_1.get_shape()) conv7_2 = slim.convolution2d(conv7_1, 960, [1, 1], stride=1, scope='conv7_2/expand') print(conv7_2.name, conv7_2.get_shape()) conv7_2 = slim.separable_convolution2d(conv7_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_2/dwise') print(conv7_2.name, conv7_2.get_shape()) conv7_2 = slim.convolution2d(conv7_2, 160, [1, 1], stride=1, activation_fn=None, scope='conv7_2/linear') print(conv7_2.name, conv7_2.get_shape()) block_7_2 = conv7_1 + conv7_2 print(block_7_2.name, block_7_2.get_shape()) conv7_3 = slim.convolution2d(block_7_2, 960, [1, 1], stride=1, scope='conv7_3/expand') print(conv7_3.name, conv7_3.get_shape()) conv7_3 = slim.separable_convolution2d(conv7_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_3/dwise') print(conv7_3.name, conv7_3.get_shape()) conv7_3 = slim.convolution2d(conv7_3, 160, [1, 1], stride=1, activation_fn=None, scope='conv7_3/linear') print(conv7_3.name, conv7_3.get_shape()) block_7_3 = block_7_2 + conv7_3 print(block_7_3.name, block_7_3.get_shape()) conv7_4 = slim.convolution2d(block_7_3, 960, [1, 1], stride=1, scope='conv7_4/expand') print(conv7_4.name, conv7_4.get_shape()) conv7_4 = slim.separable_convolution2d(conv7_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_4/dwise') print(conv7_4.name, conv7_4.get_shape()) conv7_4 = slim.convolution2d(conv7_4, 320, [1, 1], stride=1, activation_fn=None, scope='conv7_4/linear') print(conv7_4.name, conv7_4.get_shape()) features['feature6'] = conv7_4 return features
def create_model(input, landmark, phase_train, args): batch_norm_params = { 'decay': 0.995, 'epsilon': 0.001, 'updates_collections': None, #tf.GraphKeys.UPDATE_OPS, 'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES], 'is_training': phase_train } landmark_dim = int(landmark.get_shape()[-1]) features, landmarks_pre = pfld_inference(input, args.weight_decay, batch_norm_params) # loss landmarks_loss = tf.reduce_sum(tf.square(landmarks_pre - landmark), axis=1) landmarks_loss = tf.reduce_mean(landmarks_loss) # add the auxiliary net # : finish the loss function print('\nauxiliary net') with slim.arg_scope([slim.convolution2d, slim.fully_connected], \ activation_fn=tf.nn.relu,\ weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(args.weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): pfld_input = features['auxiliary_input'] net_aux = slim.convolution2d(pfld_input, 128, [3, 3], stride=2, scope='pfld_conv1') print(net_aux.name, net_aux.get_shape()) # net = slim.max_pool2d(net, kernel_size=[3, 3], stride=1, scope='pool1', padding='SAME') net_aux = slim.convolution2d(net_aux, 128, [3, 3], stride=1, scope='pfld_conv2') print(net_aux.name, net_aux.get_shape()) net_aux = slim.convolution2d(net_aux, 32, [3, 3], stride=2, scope='pfld_conv3') print(net_aux.name, net_aux.get_shape()) net_aux = slim.convolution2d(net_aux, 128, [7, 7], stride=1, scope='pfld_conv4') print(net_aux.name, net_aux.get_shape()) net_aux = slim.max_pool2d(net_aux, kernel_size=[3, 3], stride=1, scope='pool1', padding='SAME') print(net_aux.name, net_aux.get_shape()) net_aux = slim.flatten(net_aux) print(net_aux.name, net_aux.get_shape()) fc1 = slim.fully_connected(net_aux, num_outputs=32, activation_fn=None, scope='pfld_fc1') print(fc1.name, fc1.get_shape()) euler_angles_pre = slim.fully_connected(fc1, num_outputs=3, activation_fn=None, scope='pfld_fc2') print(euler_angles_pre.name, euler_angles_pre.get_shape()) # return landmarks_loss, landmarks, heatmap_loss, HeatMaps return landmarks_pre, landmarks_loss, euler_angles_pre