def encoder(self, inputs): # convolutional layer conv1 = ConvLayer(input_filters=tf.cast(inputs.shape[3], tf.int32), output_filters=8, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME") conv1_act = conv1.__call__(inputs) print(conv1_act.shape) # convolutional and pooling layer conv_pool1 = ConvPoolLayer(input_filters=8, output_filters=8, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME", pool_size=3, pool_stride=2, pool_padding="SAME") conv_pool1_act = conv_pool1.__call__(conv1_act) print(conv_pool1_act.shape) # convolutional layer conv2 = ConvLayer(input_filters=8, output_filters=16, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME") conv2_act = conv2.__call__(conv_pool1_act) print(conv2_act.shape) # convolutional and pooling layer conv_pool2 = ConvPoolLayer(input_filters=16, output_filters=16, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME", pool_size=3, pool_stride=2, pool_padding="SAME") conv_pool2_act = conv_pool2.__call__(conv2_act) print(conv_pool2_act.shape) conv3 = ConvLayer(input_filters=16, output_filters=32, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME") conv3_act = conv3.__call__(conv_pool2_act) print(conv3_act.shape) conv_pool3 = ConvPoolLayer(input_filters=32, output_filters=32, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding="SAME", pool_size=3, pool_stride=2, pool_padding="SAME") conv_pool3_act = conv_pool3.__call__(conv3_act) print(conv_pool3_act.shape) last_conv_dims = conv_pool3_act.shape[1:] # make output of pooling flatten flatten = tf.reshape(conv_pool3_act, [-1,last_conv_dims[0]*last_conv_dims[1]*last_conv_dims[2]]) print(flatten.shape) weights_encoder = normal_initializer((tf.cast(flatten.shape[1], tf.int32), FLAGS.code_size)) bias_encoder = zero_initializer((FLAGS.code_size)) # apply fully connected layer dense = tf.matmul(flatten, weights_encoder) + bias_encoder print(dense.shape) return dense, last_conv_dims
def encoder(self, inputs): # Build Convolutional Part of Encoder # Put sequential layers: # ConvLayer1 ==> ConvPoolLayer1 ==> ConvLayer2 ==> ConvPoolLayer2 ==> ConvLayer3 ==> ConvPoolLayer3 # Settings of layers: # For all ConvLayers: filter size = 3, filter stride = 1, padding type = SAME # For all ConvPoolLayers: # Conv : filter size = 3, filter stride = 1, padding type = SAME # Pooling : pool size = 3, pool stride = 2, padding type = SAME # Number of Filters: # num_channel defined in FLAGS (input) ==> 8 ==> 8 ==> 16 ==> 16 ==> 32 ==> 32 # convolutional layer conv1_class = ConvLayer(input_filters=FLAGS.num_channel, output_filters=8, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME') conv1 = conv1_class(inputs=inputs) print(conv1.shape) # convolutional and pooling layer conv_pool1_class = ConvPoolLayer(input_filters=8, output_filters=8, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME') conv_pool1 = conv_pool1_class(inputs=conv1) print(conv_pool1.shape) # convolutional layer conv2_class = ConvLayer(input_filters=8, output_filters=16, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME') conv2 = conv2_class(inputs=conv_pool1) print(conv2.shape) # convolutional and pooling layer conv_pool2_class = ConvPoolLayer(input_filters=16, output_filters=16, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME') conv_pool2 = conv_pool2_class(inputs=conv2) print(conv_pool2.shape) conv3_class = ConvLayer(input_filters=16, output_filters=32, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME') conv3 = conv3_class(inputs=conv_pool2) print(conv3.shape) conv_pool3_class = ConvPoolLayer(input_filters=32, output_filters=32, act=tf.nn.relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME') conv_pool3 = conv_pool3_class(inputs=conv3) print(conv_pool3.shape) # Make Output Flatten and Apply Transformation # Num of features for dense is defined by code_size in FLAG # make output of pooling flatten WholeShape = tf.shape(conv_pool3) NumSamples = WholeShape[0] last_conv_dims = tf.constant(value=(4, 4, 32), dtype=tf.int32, shape=(3, )) #WholeShape[1:] FlattedShape = tf.reduce_prod(last_conv_dims) flatten = tf.reshape(conv_pool3, shape=[NumSamples, FlattedShape]) print(flatten.shape) # apply fully connected layer W_Trans = normal_initializer(shape=[FlattedShape, FLAGS.code_size]) B_Trans = zero_initializer(shape=[FLAGS.code_size]) dense = tf.nn.xw_plus_b(flatten, W_Trans, B_Trans) print(dense.shape) return dense, last_conv_dims
def __init__(self, config): ModelBase.__init__(self) self.config = config self.verbose = self.config['verbose'] self.name = 'alexnet' batch_size = config['batch_size'] flag_datalayer = config['use_data_layer'] lib_conv = config['lib_conv'] n_softmax_out = config['n_softmax_out'] # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data x = T.ftensor4('x') y = T.lvector('y') rand = T.fvector('rand') lr = T.scalar('lr') if self.verbose: print 'AlexNet 2/16' self.layers = [] params = [] weight_types = [] if flag_datalayer: data_layer = DataLayer(input=x, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1_input = data_layer.output else: layer1_input = x convpool_layer1 = ConvPoolLayer(input=layer1_input, image_shape=(3, 227, 227, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output, image_shape=(96, 27, 27, batch_size), filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output, image_shape=(256, 13, 13, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type fc_layer6_input = T.flatten( convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, verbose=self.verbose) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, verbose=self.verbose) fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, verbose=self.verbose) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, verbose=self.verbose) softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=n_softmax_out, verbose=self.verbose) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.p_y_given_x = softmax_layer8.p_y_given_x self.y_pred = softmax_layer8.y_pred self.output = self.p_y_given_x self.cost = softmax_layer8.negative_log_likelihood(y) self.error = softmax_layer8.errors(y) if n_softmax_out < 5: self.error_top_5 = softmax_layer8.errors_top_x(y, n_softmax_out) else: self.error_top_5 = softmax_layer8.errors_top_x(y, 5) self.params = params # inputs self.x = x self.y = y self.rand = rand self.lr = lr self.shared_x = theano.shared( np.zeros( (3, config['input_width'], config['input_height'], config['file_batch_size']), # for loading large batch dtype=theano.config.floatX), borrow=True) self.shared_y = theano.shared(np.zeros((config['file_batch_size'], ), dtype=int), borrow=True) self.shared_lr = theano.shared(np.float32(config['learning_rate'])) # training related self.base_lr = np.float32(config['learning_rate']) self.step_idx = 0 self.mu = config['momentum'] # def: 0.9 # momentum self.eta = config['weight_decay'] #0.0002 # weight decay self.weight_types = weight_types self.batch_size = batch_size self.grads = T.grad(self.cost, self.params) subb_ind = T.iscalar('subb') # sub batch index #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval() self.subb_ind = subb_ind self.shared_x_slice = self.shared_x[:, :, :, subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] self.shared_y_slice = self.shared_y[subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size]
def __init__(self, config): self.config = config batch_size = config['batch_size'] num_seq = config['num_seq'] lib_conv = config['lib_conv'] # ##################### BUILD NETWORK ########################## img_scale_x = config['img_scale_x'] img_scale_y = config['img_scale_y'] reg_scale_x = config['reg_scale_x'] reg_scale_y = config['reg_scale_y'] use_noise = T.fscalar('use_noise') input_dim = config['input_dim'] print '... building the model' self.layers = [] params = [] weight_types = [] x_temporal = T.ftensor4('x') conv1_temporal = ConvPoolLayer(input=x_temporal, image_shape=(input_dim, img_scale_x, img_scale_y, batch_size), filter_shape=(input_dim, 7, 7, 64), convstride=2, padsize=3, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, Bn=True, lib_conv=lib_conv, caffe_style=True, poolpadsize=(1, 1)) self.layers.append(conv1_temporal) conv_temporal_2_reduce = ConvPoolLayer( input=conv1_temporal.output, image_shape=(64, 56, 56, batch_size), filter_shape=(64, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, Bn=True, lib_conv=lib_conv, ) self.layers.append(conv_temporal_2_reduce) # convpool_temporal_2 = ConvPoolLayer( input=conv_temporal_2_reduce.output, image_shape=(64, 56, 56, batch_size), filter_shape=(64, 3, 3, 192), convstride=1, padsize=1, group=1, poolsize=3, poolstride=2, #poolpadsize=(1,1), bias_init=0.0, lrn=False, Bn=True, lib_conv=lib_conv, caffe_style=True, poolpadsize=(1, 1)) self.layers.append(convpool_temporal_2) ##############----3a---######### inception_temporal_3a_1x1 = ConvPoolLayer( input=convpool_temporal_2.output, image_shape=(192, 28, 28, batch_size), filter_shape=(192, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_1x1) ################# inception_temporal_3a_3x3_reduce = ConvPoolLayer( input=convpool_temporal_2.output, image_shape=(192, 28, 28, batch_size), filter_shape=(192, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_3x3_reduce) inception_temporal_3a_3x3 = ConvPoolLayer( input=inception_temporal_3a_3x3_reduce.output, image_shape=(64, 28, 28, batch_size), filter_shape=(64, 3, 3, 64), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_3x3) ############ inception_temporal_3a_double_3x3_reduce = ConvPoolLayer( input=convpool_temporal_2.output, image_shape=(192, 28, 28, batch_size), filter_shape=(192, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_double_3x3_reduce) inception_temporal_3a_double_3x3_1 = ConvPoolLayer( input=inception_temporal_3a_double_3x3_reduce.output, image_shape=(64, 28, 28, batch_size), filter_shape=(64, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_double_3x3_1) inception_temporal_3a_double_3x3_2 = ConvPoolLayer( input=inception_temporal_3a_double_3x3_1.output, image_shape=(96, 28, 28, batch_size), filter_shape=(96, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_double_3x3_2) ############## inception_temporal_3a_pool = PoolLayer( input=convpool_temporal_2.output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_3a_pool_proj = ConvPoolLayer( input=inception_temporal_3a_pool.output, image_shape=(192, 28, 28, batch_size), filter_shape=(192, 1, 1, 32), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3a_pool_proj) #################### inception_temporal_3a_output = T.concatenate([ inception_temporal_3a_1x1.output, inception_temporal_3a_3x3.output, inception_temporal_3a_double_3x3_2.output, inception_temporal_3a_pool_proj.output ], axis=0) ##############----3b---######### inception_temporal_3b_1x1 = ConvPoolLayer( input=inception_temporal_3a_output, image_shape=(256, 28, 28, batch_size), filter_shape=(256, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_1x1) ####################### inception_temporal_3b_3x3_reduce = ConvPoolLayer( input=inception_temporal_3a_output, image_shape=(256, 28, 28, batch_size), filter_shape=(256, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_3x3_reduce) inception_temporal_3b_3x3 = ConvPoolLayer( input=inception_temporal_3b_3x3_reduce.output, image_shape=(64, 28, 28, batch_size), filter_shape=(64, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_3x3) ############ inception_temporal_3b_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_3a_output, image_shape=(256, 28, 28, batch_size), filter_shape=(256, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_double_3x3_reduce) inception_temporal_3b_double_3x3_1 = ConvPoolLayer( input=inception_temporal_3b_double_3x3_reduce.output, image_shape=(64, 28, 28, batch_size), filter_shape=(64, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_double_3x3_1) inception_temporal_3b_double_3x3_2 = ConvPoolLayer( input=inception_temporal_3b_double_3x3_1.output, image_shape=(96, 28, 28, batch_size), filter_shape=(96, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_double_3x3_2) ############## inception_temporal_3b_pool = PoolLayer( input=inception_temporal_3a_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_3b_pool_proj = ConvPoolLayer( input=inception_temporal_3b_pool.output, image_shape=(256, 28, 28, batch_size), filter_shape=(256, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3b_pool_proj) ###############33 inception_temporal_3b_output = T.concatenate([ inception_temporal_3b_1x1.output, inception_temporal_3b_3x3.output, inception_temporal_3b_double_3x3_2.output, inception_temporal_3b_pool_proj.output ], axis=0) ##############----3c---######### inception_temporal_3c_3x3_reduce = ConvPoolLayer( input=inception_temporal_3b_output, image_shape=(320, 28, 28, batch_size), filter_shape=(320, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3c_3x3_reduce) inception_temporal_3c_3x3 = ConvPoolLayer( input=inception_temporal_3c_3x3_reduce.output, image_shape=(128, 28, 28, batch_size), filter_shape=(128, 3, 3, 160), convstride=2, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3c_3x3) ############ inception_temporal_3c_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_3b_output, image_shape=(320, 28, 28, batch_size), filter_shape=(320, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3c_double_3x3_reduce) inception_temporal_3c_double_3x3_1 = ConvPoolLayer( input=inception_temporal_3c_double_3x3_reduce.output, image_shape=(64, 28, 28, batch_size), filter_shape=(64, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3c_double_3x3_1) inception_temporal_3c_double_3x3_2 = ConvPoolLayer( input=inception_temporal_3c_double_3x3_1.output, image_shape=(96, 28, 28, batch_size), filter_shape=(96, 3, 3, 96), convstride=2, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_3c_double_3x3_2) ############## inception_temporal_3c_pool = PoolLayer( input=inception_temporal_3b_output, poolsize=3, poolstride=2, lib_conv=lib_conv, caffe_style=True, poolpad=1) # inception_temporal_3c_pool=PoolLayer(input=inception_temporal_3b_output,caffe_style=True,poolsize=3,poolpad=1,poolstride=2,lib_conv=lib_conv) ################################# inception_temporal_3c_output = T.concatenate([ inception_temporal_3c_3x3.output, inception_temporal_3c_double_3x3_2.output, inception_temporal_3c_pool.output ], axis=0) ################################----4a------########## inception_temporal_4a_1x1 = ConvPoolLayer( input=inception_temporal_3c_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 224), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_1x1) ################# inception_temporal_4a_3x3_reduce = ConvPoolLayer( input=inception_temporal_3c_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 64), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_3x3_reduce) inception_temporal_4a_3x3 = ConvPoolLayer( input=inception_temporal_4a_3x3_reduce.output, image_shape=(64, 14, 14, batch_size), filter_shape=(64, 3, 3, 96), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_3x3) ############ inception_temporal_4a_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_3c_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 96), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_double_3x3_reduce) inception_temporal_4a_double_3x3_1 = ConvPoolLayer( input=inception_temporal_4a_double_3x3_reduce.output, image_shape=(96, 14, 14, batch_size), filter_shape=(96, 3, 3, 128), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_double_3x3_1) inception_temporal_4a_double_3x3_2 = ConvPoolLayer( input=inception_temporal_4a_double_3x3_1.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 128), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_double_3x3_2) ############## inception_temporal_4a_pool = PoolLayer( input=inception_temporal_3c_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_4a_pool_proj = ConvPoolLayer( input=inception_temporal_4a_pool.output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4a_pool_proj) #################### inception_temporal_4a_output = T.concatenate([ inception_temporal_4a_1x1.output, inception_temporal_4a_3x3.output, inception_temporal_4a_double_3x3_2.output, inception_temporal_4a_pool_proj.output ], axis=0) #####################----4b------################# inception_temporal_4b_1x1 = ConvPoolLayer( input=inception_temporal_4a_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 192), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_1x1) ################# inception_temporal_4b_3x3_reduce = ConvPoolLayer( input=inception_temporal_4a_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 96), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_3x3_reduce) inception_temporal_4b_3x3 = ConvPoolLayer( input=inception_temporal_4b_3x3_reduce.output, image_shape=(96, 14, 14, batch_size), filter_shape=(96, 3, 3, 128), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_3x3) ############ inception_temporal_4b_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_4a_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 96), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_double_3x3_reduce) inception_temporal_4b_double_3x3_1 = ConvPoolLayer( input=inception_temporal_4b_double_3x3_reduce.output, image_shape=(96, 14, 14, batch_size), filter_shape=(96, 3, 3, 128), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_double_3x3_1) inception_temporal_4b_double_3x3_2 = ConvPoolLayer( input=inception_temporal_4b_double_3x3_1.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 128), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_double_3x3_2) ############## inception_temporal_4b_pool = PoolLayer( input=inception_temporal_4a_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_4b_pool_proj = ConvPoolLayer( input=inception_temporal_4b_pool.output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4b_pool_proj) #################### inception_temporal_4b_output = T.concatenate([ inception_temporal_4b_1x1.output, inception_temporal_4b_3x3.output, inception_temporal_4b_double_3x3_2.output, inception_temporal_4b_pool_proj.output ], axis=0) #####################----4c------################# inception_temporal_4c_1x1 = ConvPoolLayer( input=inception_temporal_4b_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 160), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_1x1) ################# inception_temporal_4c_3x3_reduce = ConvPoolLayer( input=inception_temporal_4b_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_3x3_reduce) inception_temporal_4c_3x3 = ConvPoolLayer( input=inception_temporal_4c_3x3_reduce.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 160), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_3x3) ############ inception_temporal_4c_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_4b_output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_double_3x3_reduce) inception_temporal_4c_double_3x3_1 = ConvPoolLayer( input=inception_temporal_4c_double_3x3_reduce.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 160), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_double_3x3_1) inception_temporal_4c_double_3x3_2 = ConvPoolLayer( input=inception_temporal_4c_double_3x3_1.output, image_shape=(160, 14, 14, batch_size), filter_shape=(160, 3, 3, 160), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_double_3x3_2) ############## inception_temporal_4c_pool = PoolLayer( input=inception_temporal_4b_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_4c_pool_proj = ConvPoolLayer( input=inception_temporal_4c_pool.output, image_shape=(576, 14, 14, batch_size), filter_shape=(576, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4c_pool_proj) #################### inception_temporal_4c_output = T.concatenate([ inception_temporal_4c_1x1.output, inception_temporal_4c_3x3.output, inception_temporal_4c_double_3x3_2.output, inception_temporal_4c_pool_proj.output ], axis=0) #####################----4d------################# inception_temporal_4d_1x1 = ConvPoolLayer( input=inception_temporal_4c_output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 96), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_1x1) ################# inception_temporal_4d_3x3_reduce = ConvPoolLayer( input=inception_temporal_4c_output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_3x3_reduce) inception_temporal_4d_3x3 = ConvPoolLayer( input=inception_temporal_4d_3x3_reduce.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 192), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_3x3) ############ inception_temporal_4d_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_4c_output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 160), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_double_3x3_reduce) inception_temporal_4d_double_3x3_1 = ConvPoolLayer( input=inception_temporal_4d_double_3x3_reduce.output, image_shape=(160, 14, 14, batch_size), filter_shape=(160, 3, 3, 192), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_double_3x3_1) inception_temporal_4d_double_3x3_2 = ConvPoolLayer( input=inception_temporal_4d_double_3x3_1.output, image_shape=(192, 14, 14, batch_size), filter_shape=(192, 3, 3, 192), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_double_3x3_2) ############## inception_temporal_4d_pool = PoolLayer( input=inception_temporal_4c_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_4d_pool_proj = ConvPoolLayer( input=inception_temporal_4d_pool.output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4d_pool_proj) #################### inception_temporal_4d_output = T.concatenate([ inception_temporal_4d_1x1.output, inception_temporal_4d_3x3.output, inception_temporal_4d_double_3x3_2.output, inception_temporal_4d_pool_proj.output ], axis=0) ##############----4e---######### inception_temporal_4e_3x3_reduce = ConvPoolLayer( input=inception_temporal_4d_output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4e_3x3_reduce) inception_temporal_4e_3x3 = ConvPoolLayer( input=inception_temporal_4e_3x3_reduce.output, image_shape=(128, 14, 14, batch_size), filter_shape=(128, 3, 3, 192), convstride=2, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4e_3x3) ############ inception_temporal_4e_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_4d_output, image_shape=(608, 14, 14, batch_size), filter_shape=(608, 1, 1, 192), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4e_double_3x3_reduce) inception_temporal_4e_double_3x3_1 = ConvPoolLayer( input=inception_temporal_4e_double_3x3_reduce.output, image_shape=(192, 14, 14, batch_size), filter_shape=(192, 3, 3, 256), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4e_double_3x3_1) inception_temporal_4e_double_3x3_2 = ConvPoolLayer( input=inception_temporal_4e_double_3x3_1.output, image_shape=(256, 14, 14, batch_size), filter_shape=(256, 3, 3, 256), convstride=2, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_4e_double_3x3_2) ############## inception_temporal_4e_pool = PoolLayer( input=inception_temporal_4d_output, poolsize=3, poolstride=2, lib_conv=lib_conv, caffe_style=True, poolpad=1) ################################# inception_temporal_4e_output = T.concatenate([ inception_temporal_4e_3x3.output, inception_temporal_4e_double_3x3_2.output, inception_temporal_4e_pool.output ], axis=0) ################################----5a------########## inception_temporal_5a_1x1 = ConvPoolLayer( input=inception_temporal_4e_output, image_shape=(1056, 7, 7, batch_size), filter_shape=(1056, 1, 1, 352), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_1x1) ################# inception_temporal_5a_3x3_reduce = ConvPoolLayer( input=inception_temporal_4e_output, image_shape=(1056, 7, 7, batch_size), filter_shape=(1056, 1, 1, 192), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_3x3_reduce) inception_temporal_5a_3x3 = ConvPoolLayer( input=inception_temporal_5a_3x3_reduce.output, image_shape=(192, 7, 7, batch_size), filter_shape=(192, 3, 3, 320), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_3x3) ############ inception_temporal_5a_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_4e_output, image_shape=(1056, 7, 7, batch_size), filter_shape=(1056, 1, 1, 160), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_double_3x3_reduce) inception_temporal_5a_double_3x3_1 = ConvPoolLayer( input=inception_temporal_5a_double_3x3_reduce.output, image_shape=(160, 7, 7, batch_size), filter_shape=(160, 3, 3, 224), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_double_3x3_1) inception_temporal_5a_double_3x3_2 = ConvPoolLayer( input=inception_temporal_5a_double_3x3_1.output, image_shape=(224, 7, 7, batch_size), filter_shape=(224, 3, 3, 224), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_double_3x3_2) ############## inception_temporal_5a_pool = PoolLayer( input=inception_temporal_4e_output, poolsize=3, poolstride=1, poolpad=1, poolmode='average_inc_pad', lib_conv=lib_conv) inception_temporal_5a_pool_proj = ConvPoolLayer( input=inception_temporal_5a_pool.output, image_shape=(1056, 7, 7, batch_size), filter_shape=(1056, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5a_pool_proj) #################### inception_temporal_5a_output = T.concatenate([ inception_temporal_5a_1x1.output, inception_temporal_5a_3x3.output, inception_temporal_5a_double_3x3_2.output, inception_temporal_5a_pool_proj.output ], axis=0) inception_temporal_5a_output_1 = inception_temporal_5a_output ################################----5b------########## inception_temporal_5b_1x1 = ConvPoolLayer( input=inception_temporal_5a_output, image_shape=(1024, 7, 7, batch_size), filter_shape=(1024, 1, 1, 352), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_1x1) ################# inception_temporal_5b_3x3_reduce = ConvPoolLayer( input=inception_temporal_5a_output, image_shape=(1024, 7, 7, batch_size), filter_shape=(1024, 1, 1, 192), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_3x3_reduce) inception_temporal_5b_3x3 = ConvPoolLayer( input=inception_temporal_5b_3x3_reduce.output, image_shape=(192, 7, 7, batch_size), filter_shape=(192, 3, 3, 320), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_3x3) ############ inception_temporal_5b_double_3x3_reduce = ConvPoolLayer( input=inception_temporal_5a_output, image_shape=(1024, 7, 7, batch_size), filter_shape=(1024, 1, 1, 192), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_double_3x3_reduce) inception_temporal_5b_double_3x3_1 = ConvPoolLayer( input=inception_temporal_5b_double_3x3_reduce.output, image_shape=(192, 7, 7, batch_size), filter_shape=(192, 3, 3, 224), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_double_3x3_1) inception_temporal_5b_double_3x3_2 = ConvPoolLayer( input=inception_temporal_5b_double_3x3_1.output, image_shape=(224, 7, 7, batch_size), filter_shape=(224, 3, 3, 224), convstride=1, padsize=1, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_double_3x3_2) ############## inception_temporal_5b_pool = PoolLayer( input=inception_temporal_5a_output, poolsize=3, poolstride=1, poolpad=1, lib_conv=lib_conv) inception_temporal_5b_pool_proj = ConvPoolLayer( input=inception_temporal_5b_pool.output, image_shape=(1024, 7, 7, batch_size), filter_shape=(1024, 1, 1, 128), convstride=1, padsize=0, group=1, poolsize=1, poolstride=1, bias_init=0.0, lib_conv=lib_conv, Bn=True) self.layers.append(inception_temporal_5b_pool_proj) #params += inception_temporal_5b_pool_proj.params # weight_types += inception_temporal_5b_pool_proj.weight_type #################### dummy_fea = T.zeros([1024, 1, num_seq, batch_size / num_seq]) pool5_fea_tmp = T.reshape( inception_temporal_5a_output_1, [1024, reg_scale_x * reg_scale_y, num_seq, batch_size / num_seq]) pool5_fea_tmp = T.concatenate([pool5_fea_tmp, dummy_fea], axis=1) pool5_fea_tmp = pool5_fea_tmp.dimshuffle(1, 3, 2, 0) self.fea_tmp = pool5_fea_tmp # self.fea_lstm_tmp = pool5_fea_tmp self.params = params self.x_temporal = x_temporal self.weight_types = weight_types self.batch_size = batch_size self.num_seq = num_seq self.use_noise = use_noise
def encoder(self, inputs): ############################################################################################################# # TODO: Build Convolutional Part of Encoder # # Put sequential layers: # # ConvLayer1 ==> ConvPoolLayer1 ==> ConvLayer2 ==> ConvPoolLayer2 ==> ConvLayer3 ==> ConvPoolLayer3 # # Settings of layers: # # For all ConvLayers: filter size = 3, filter stride = 1, padding type = SAME # # For all ConvPoolLayers: # # Conv : filter size = 3, filter stride = 1, padding type = SAME # # Pooling : pool size = 3, pool stride = 2, padding type = SAME # # Number of Filters: # # num_channel defined in FLAGS (input) ==> 8 ==> 8 ==> 16 ==> 16 ==> 32 ==> 32 # ############################################################################################################# relu = tf.nn.relu # convolutional layer cl1 = ConvLayer(FLAGS.num_channel, 8, relu, 3, 1, 'SAME') conv1 = cl1(inputs) print(conv1.shape) # convolutional and pooling layer cl2 = ConvPoolLayer(8, 8, relu, 3, 1, 'SAME', 3, 2, 'SAME') conv_pool1 = cl2(conv1) print(conv_pool1.shape) # convolutional layer cl3 = ConvLayer(8, 16, relu, 3, 1, 'SAME') conv2 = cl3(conv_pool1) print(conv2.shape) # convolutional and pooling layer cl4 = ConvPoolLayer(16, 16, relu, 3, 1, 'SAME', 3, 2, 'SAME') conv_pool2 = cl4(conv2) print(conv_pool2.shape) cl5 = ConvLayer(16, 32, relu, 3, 1, 'SAME') conv3 = cl5(conv_pool2) print(conv3.shape) cl6 = ConvPoolLayer(32, 32, tf.nn.relu, 3, 1, 'SAME', 3, 2, 'SAME') conv_pool3 = cl6(conv3) print(conv_pool3.shape) ########################################################################## # END OF YOUR CODE # ########################################################################## ########################################################################## # TODO: Make Output Flatten and Apply Transformation # # Please save the last three dimensions of output of the above code # # Save these numbers in a variable called last_conv_dims # # Multiply all these dimensions to find num of features if flatten # # Use tf.reshape to make a tensor flat # # Define some weights and bias and apply linear transformation # # Use normal and zero initializer for weights and bias respectively # # Please store output of transformation in a variable called dense # # Num of features for dense is defined by code_size in FLAG # # Note that there is no need apply any kind of activation function # ########################################################################## # make output of pooling flatten dim = np.prod(conv_pool3.shape[1:]) flatten = tf.reshape(conv_pool3, [-1, dim]) print(flatten.shape) # apply fully connected layer W_fc = normal_initializer(shape=(dim.__int__(), FLAGS.code_size)) B_fc = zero_initializer(shape=FLAGS.code_size) dense = tf.matmul(flatten, W_fc) + B_fc print(dense.shape) ########################################################################## # END OF YOUR CODE # ########################################################################## last_conv_dims = conv_pool3.shape[1:] return dense, last_conv_dims
def encoder(self, inputs): ############################################################################################################# # TODO: Build Convolutional Part of Encoder # # Put sequential layers: # # ConvLayer1 ==> ConvPoolLayer1 ==> ConvLayer2 ==> ConvPoolLayer2 ==> ConvLayer3 ==> ConvPoolLayer3 # # Settings of layers: # # For all ConvLayers: filter size = 3, filter stride = 1, padding type = SAME # # For all ConvPoolLayers: # # Conv : filter size = 3, filter stride = 1, padding type = SAME # # Pooling : pool size = 3, pool stride = 2, padding type = SAME # # Number of Filters: # # num_channel defined in FLAGS (input) ==> 8 ==> 8 ==> 16 ==> 16 ==> 32 ==> 32 # ############################################################################################################# # convolutional layer relu = tf.nn.relu conv1 = ConvLayer(input_filters=FLAGS.num_channel, output_filters=8, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME')(inputs) print(conv1.shape) # convolutional and pooling layer conv_pool1 = ConvPoolLayer(input_filters=8, output_filters=8, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME')(conv1) print(conv_pool1.shape) # convolutional layer conv2 = ConvLayer(input_filters=8, output_filters=16, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME')(conv_pool1) print(conv2.shape) # convolutional and pooling layer conv_pool2 = ConvPoolLayer(input_filters=16, output_filters=16, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME')(conv2) print(conv_pool2.shape) conv3 = ConvLayer(input_filters=16, output_filters=32, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME')(conv_pool2) print(conv3.shape) conv_pool3 = ConvPoolLayer(input_filters=32, output_filters=32, act=relu, kernel_size=3, kernel_stride=1, kernel_padding='SAME', pool_size=3, pool_stride=2, pool_padding='SAME')(conv3) print(conv_pool3.shape) ########################################################################## # END OF YOUR CODE # ########################################################################## ########################################################################## # TODO: Make Output Flatten and Apply Transformation # # Please save the last three dimensions of output of the above code # # Save these numbers in a variable called last_conv_dims # # Multiply all these dimensions to find num of features if flatten # # Use tf.reshape to make a tensor flat # # Define some weights and bias and apply linear transformation # # Use normal and zero initializer for weights and bias respectively # # Please store output of transformation in a variable called dense # # Num of features for dense is defined by code_size in FLAG # # Note that there is no need apply any kind of activation function # ########################################################################## # make output of pooling flatten last_conv_dims = conv_pool3.shape[1:] flatten_dim = np.prod(last_conv_dims) flatten = tf.reshape(conv_pool3, [tf.shape(conv_pool3)[0], flatten_dim]) print(flatten.shape) # apply fully connected layer dense = tf.matmul(flatten, normal_initializer([ flatten_dim, FLAGS.code_size ])) + zero_initializer([FLAGS.code_size]) print(dense.shape) ########################################################################## # END OF YOUR CODE # ########################################################################## return dense, last_conv_dims
def __init__(self, config): self.config = config batch_size = config['batch_size'] flag_datalayer = config['use_data_layer'] lib_conv = config['lib_conv'] # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data x = T.ftensor4('x') y = T.ivector('y') rand = T.fvector('rand') print '... building the model' self.layers = [] params = [] weight_types = [] if flag_datalayer: data_layer = DataLayer(input=x, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1_input = data_layer.output else: layer1_input = x convpool_layer1 = ConvPoolLayer(input=layer1_input, image_shape=(3, 227, 227, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, ) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output, image_shape=(96, 27, 27, batch_size), filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, ) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output, image_shape=(256, 13, 13, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type fc_layer6_input = T.flatten( convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096) fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096) softmax_layer8 = SoftmaxLayer( input=dropout_layer7.output, n_in=4096, n_out=1000) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.cost = softmax_layer8.negative_log_likelihood(y) self.errors = softmax_layer8.errors(y) self.errors_top_5 = softmax_layer8.errors_top_x(y, 5) self.params = params self.x = x self.y = y self.rand = rand self.weight_types = weight_types self.batch_size = batch_size
import skimage.measure import pickle from readlabel import read_image from network import Network from layers import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer, ReLU, Sigmoid whole_data = read_image(path1 = 'test_images/', path2 = './test_annotation', data_size = 1050) whole_x = whole_data[0] mean = whole_x.mean(axis=0) std = whole_x.std(axis=0) whole_x = (whole_x - mean) / std whole_y = whole_data[1] test_x = whole_x test_y = whole_y test_data = [test_x, test_y] mini_batch_size = 1 # final net = Network([ConvPoolLayer(filter_shape=(5, 5, 3, 9), image_shape=(mini_batch_size, 64, 64, 3), poolsize=2, activation_fn=ReLU), ConvPoolLayer(filter_shape=(5, 5, 9, 18), image_shape=(mini_batch_size, 30, 30, 9), poolsize=2, activation_fn=ReLU), ConvPoolLayer(filter_shape=(4, 4, 18, 36), image_shape=(mini_batch_size, 13, 13, 18), poolsize=2, activation_fn=ReLU), FullyConnectedLayer(n_in=900, n_out=225, activation_fn=ReLU), FullyConnectedLayer(n_in=225, n_out=50, activation_fn=ReLU), SoftmaxLayer(n_in=50, n_out=20, activation_fn=None)], mini_batch_size) print('start') net.load_test(mini_batch_size, test_data, path='./finalparams_noact.pickle')
def __init__(self, config, testMode): self.config = config batch_size = config['batch_size'] lib_conv = config['lib_conv'] useLayers = config['useLayers'] #imgWidth = config['imgWidth'] #imgHeight = config['imgHeight'] initWeights = config['initWeights'] #if we wish to initialize alexnet with some weights. #need to make changes in layers.py to accept initilizing weights if initWeights: weightsDir = config['weightsDir'] weightFileTag = config['weightFileTag'] prob_drop = config['prob_drop'] # ##################### BUILD NETWORK ########################## x = T.ftensor4('x') mean = T.ftensor4('mean') #y = T.lvector('y') print '... building the model' self.layers = [] params = [] weight_types = [] if useLayers >= 1: convpool_layer1 = ConvPoolLayer(input=x-mean, image_shape=(3, None, None, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_0'+weightFileTag, 'b_0'+weightFileTag] ) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type if useLayers >= 2: convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output, image_shape=(96, None, None, batch_size), #change from 27 to appropriate value sbased on conv1's output filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_1'+weightFileTag, 'W1_1'+weightFileTag, 'b0_1'+weightFileTag, 'b1_1'+weightFileTag] ) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type if useLayers >= 3: convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output, image_shape=(256, None, None, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_2'+weightFileTag, 'b_2'+weightFileTag] ) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type if useLayers >= 4: convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output, image_shape=(384, None, None, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_3'+weightFileTag, 'W1_3'+weightFileTag, 'b0_3'+weightFileTag, 'b1_3'+weightFileTag] ) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type if useLayers >= 5: convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output, image_shape=(384, None, None, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_4'+weightFileTag, 'W1_4'+weightFileTag, 'b0_4'+weightFileTag, 'b1_4'+weightFileTag] ) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type if useLayers >= 6: fc_layer6_input = T.flatten(convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_5'+weightFileTag, 'b_5'+weightFileTag]) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type if testMode: dropout_layer6 = fc_layer6 else: dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, prob_drop=prob_drop) if useLayers >= 7: fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_6'+weightFileTag, 'b_6'+weightFileTag]) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type if testMode: dropout_layer6 = fc_layer7 else: dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, prob_drop=prob_drop) if useLayers >= 8: softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=1000, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_7'+weightFileTag, 'b_7'+weightFileTag]) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.output = self.layers[useLayers-1] self.params = params self.x = x self.mean = mean self.weight_types = weight_types self.batch_size = batch_size self.useLayers = useLayers self.outLayer = self.layers[useLayers-1] meanVal = np.load(config['mean_file']) meanVal = meanVal[:, :, :, np.newaxis].astype('float32') #x is 4d, with 'batch' number of images. meanVal has only '1' in the 'batch' dimension. subtraction wont work. meanVal = np.tile(meanVal,(1,1,1,batch_size)) self.meanVal = meanVal #meanVal = np.zeros([3,imgHeight,imgWidth,2], dtype='float32') if useLayers >= 8: #if last layer is softmax, then its output is y_pred finalOut = self.outLayer.y_pred else: finalOut = self.outLayer.output self.forwardFunction = theano.function([self.x, In(self.mean, value=meanVal)], [finalOut])
def image_repr(self, x, rand, config): batch_size = config['batch_size'] flag_datalayer = config['use_data_layer'] lib_conv = config['lib_conv'] layers = [] params = [] weight_types = [] if flag_datalayer: data_layer = DataLayer(input=x, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1_input = data_layer.output else: layer1_input = x convpool_layer1 = ConvPoolLayer( input=layer1_input, image_shape=(3, 227, 227, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, ) layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type convpool_layer2 = ConvPoolLayer( input=convpool_layer1.output, image_shape=(96, 27, 27, batch_size), filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, ) layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type convpool_layer3 = ConvPoolLayer( input=convpool_layer2.output, image_shape=(256, 13, 13, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type convpool_layer4 = ConvPoolLayer( input=convpool_layer3.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, ) layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type convpool_layer5 = ConvPoolLayer( input=convpool_layer4.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type fc_layer6_input = T.flatten( convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = MaxoutLayer(input=fc_layer6_input, n_in=9216, n_out=4096) layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096) fc_layer7 = MaxoutLayer(input=dropout_layer6.output, n_in=4096, n_out=4096) layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type #dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096) # Rename weight types so that weights can be shared new_weight_types = [] counter_W = 0 counter_b = 0 for w in weight_types: if w == 'W': new_weight_types.append('W' + str(counter_W)) counter_W += 1 elif w == 'b': new_weight_types.append('b' + str(counter_b)) counter_b += 1 weight_types = new_weight_types return fc_layer7, layers, params, weight_types
def __init__(self, config): self.config = config batch_size = config.batch_size lib_conv = config.lib_conv group = (2 if config.grouping else 1) LRN = (True if config.LRN else False) print 'LRN, group', LRN, group # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data x = T.ftensor4('x') y = T.lvector('y') print '... building the model with ConvLib %s, LRN %s, grouping %i ' \ % (lib_conv, LRN, group) self.layers = [] params = [] weight_types = [] layer1_input = x convpool_layer1 = ConvPoolLayer( input=layer1_input, image_shape=((3, 224, 224, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 3, 227, 227)), filter_shape=((3, 11, 11, 96) if lib_conv == 'cudaconvnet' else (96, 3, 11, 11)), convstride=4, padsize=(0 if lib_conv == 'cudaconvnet' else 3), group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=LRN, lib_conv=lib_conv) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type convpool_layer2 = ConvPoolLayer( input=convpool_layer1.output, image_shape=((96, 27, 27, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 96, 27, 27)), filter_shape=((96, 5, 5, 256) if lib_conv == 'cudaconvnet' else (256, 96, 5, 5)), convstride=1, padsize=2, group=group, poolsize=3, poolstride=2, bias_init=0.1, lrn=LRN, lib_conv=lib_conv, ) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type convpool_layer3 = ConvPoolLayer( input=convpool_layer2.output, image_shape=((256, 13, 13, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 256, 13, 13)), filter_shape=((256, 3, 3, 384) if lib_conv == 'cudaconvnet' else (384, 256, 3, 3)), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type convpool_layer4 = ConvPoolLayer( input=convpool_layer3.output, image_shape=((384, 13, 13, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 384, 13, 13)), filter_shape=((384, 3, 3, 384) if lib_conv == 'cudaconvnet' else (384, 384, 3, 3)), convstride=1, padsize=1, group=group, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type convpool_layer5 = ConvPoolLayer( input=convpool_layer4.output, image_shape=((384, 13, 13, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 384, 13, 13)), filter_shape=((384, 3, 3, 256) if lib_conv == 'cudaconvnet' else (256, 384, 3, 3)), convstride=1, padsize=1, group=group, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type if lib_conv == 'cudaconvnet': fc_layer6_input = T.flatten( convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) else: fc_layer6_input = convpool_layer5.output.flatten(2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type dropout_layer6 = DropoutLayer(fc_layer6.output) fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type dropout_layer7 = DropoutLayer(fc_layer7.output) softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=1000) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.cost = softmax_layer8.negative_log_likelihood(y) self.errors = softmax_layer8.errors(y) self.errors_top_5 = softmax_layer8.errors_top_x(y, 5) self.params = params self.x = x self.y = y # self.rand = rand self.weight_types = weight_types self.batch_size = batch_size