def _build_model(self): with default_options(init=he_uniform(), activation=relu, bias=True): model = Sequential([ Convolution((4, 4), 64, strides=(2, 2), name='conv1'), Convolution((3, 3), 64, strides=(1, 1), name='conv2'), Dense(512, name='dense1', init=he_normal(0.01)), Dense(self._nb_actions, activation=None, init=he_normal(0.01), name='qvalues') ]) return model
def _build_model(self): with default_options(init=he_uniform(), activation=relu, bias=True): model = Sequential([ Convolution((8, 8), 32, strides=(4, 4)), Convolution((4, 4), 64, strides=(2, 2)), Convolution((3, 3), 64, strides=(1, 1)), Dense(512, init=he_normal(0.01)), Dense(self._nb_actions, activation=None, init=he_normal(0.01)) ]) return model
def create_transfer_learning_model(input, num_classes, model_file, freeze=False): base_model = load_model(model_file) base_model = C.as_composite(base_model[3].owner) # Load the pretrained classification net and find nodes feature_node = C.logging.find_by_name(base_model, feature_node_name) last_node = C.logging.find_by_name(base_model, last_hidden_node_name) base_model = C.combine([last_node.owner]).clone(C.CloneMethod.freeze if freeze else C.CloneMethod.clone, {feature_node: C.placeholder(name='features')}) base_model = base_model(C.input_variable((num_channels, image_height, image_width))) r1 = C.logging.find_by_name(base_model, "z.x.x.r") r2_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.r") r3_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.x.x.r") r4_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.x.x.x.x.r") up_r1 = OneByOneConvAndUpSample(r1, 3, num_classes) up_r2_2 = OneByOneConvAndUpSample(r2_2, 2, num_classes) up_r3_2 = OneByOneConvAndUpSample(r3_2, 1, num_classes) up_r4_2 = OneByOneConvAndUpSample(r4_2, 0, num_classes) merged = C.splice(up_r1, up_r3_2, up_r2_2, axis=0) resnet_fcn_out = Convolution((1, 1), num_classes, init=he_normal(), activation=sigmoid, pad=True)(merged) z = UpSampling2DPower(resnet_fcn_out,2) return z
def bn_inception_cifar_model(input, labelDim, bnTimeConst): # 32 x 32 x 3 conv1a = conv_bn_relu_layer(input, 32, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 32 conv1b = conv_bn_relu_layer(conv1a, 32, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 32 conv2a = conv_bn_relu_layer(conv1b, 64, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 64 conv2b = conv_bn_relu_layer(conv2a, 128, (3,3), (1,1), True, bnTimeConst) # Inception Blocks # 32 x 32 x 128 inception3a = inception_block_with_maxpool(conv2b, 32, 32, 32, 32, 48, 16, bnTimeConst) # 32 x 32 x 128 inception3b = inception_block_with_maxpool(inception3a, 32, 32, 32, 32, 48, 16, bnTimeConst) # 16 x 16 x 128 inception4a = inception_block_with_avgpool(inception3b, 96, 48, 64, 48, 64, 64, bnTimeConst) # 16 x 16 x 288 inception4b = inception_block_with_avgpool(inception4a, 48, 64, 96, 80, 96, 64, bnTimeConst) # 16 x 16 x 288 inception4c = inception_block_with_avgpool(inception4b, 48, 64, 96, 80, 96, 64, bnTimeConst) # 16 x 16 x 288 inception4d = inception_block_pass_through(inception4c, 0, 128, 192, 192, 256, 0, bnTimeConst) # 8 x 8 x 512 inception5a = inception_block_with_maxpool(inception4d, 176, 96, 160, 96, 112, 64, bnTimeConst) # Global Average # 8 x 8 x 512 pool1 = AveragePooling(filter_shape=(8,8))(inception5a) # 1 x 1 x 512 z = Dense(labelDim, init=he_normal())(pool1) return z
def conv_bn_relu(input, filter_size, num_filters, strides=(1, 1), init=he_normal()): r = conv_bn(input, filter_size, num_filters, strides, init, 1) return relu(r)
def bn_inception_cifar_model(input, labelDim, bnTimeConst): # 32 x 32 x 3 conv1a = conv_bn_relu_layer(input, 32, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 32 conv1b = conv_bn_relu_layer(conv1a, 32, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 32 conv1c = conv_bn_relu_layer(conv1b, 32, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 32 conv2a = conv_bn_relu_layer(conv1c, 32, (1,1), (1,1), True, bnTimeConst) # 32 x 32 x 32 conv2b = conv_bn_relu_layer(conv2a, 64, (3,3), (1,1), True, bnTimeConst) # Inception Blocks # 32 x 32 x 64 inception3a = inception_block_with_avgpool(conv2b, 32, 32, 32, 32, 48, 16, bnTimeConst) # 32 x 32 x 128 inception3b = inception_block_pass_through(inception3a, 0, 64, 80, 32, 48, 0, bnTimeConst) # 16 x 16 x 256 inception4a = inception_block_with_avgpool(inception3b, 96, 48, 64, 48, 64, 64, bnTimeConst) # 16 x 16 x 288 inception4b = inception_block_with_avgpool(inception4a, 48, 64, 96, 80, 96, 64, bnTimeConst) # 16 x 16 x 288 inception4c = inception_block_pass_through(inception4b, 0, 128, 192, 192, 256, 0, bnTimeConst) # 8 x 8 x 512 inception5a = inception_block_with_maxpool(inception4c, 176, 96, 160, 96, 112, 64, bnTimeConst) # Global Average # 8 x 8 x 512 pool1 = AveragePooling(filter_shape=(8,8))(inception5a) # 1 x 1 x 512 z = Dense(labelDim, init=he_normal())(pool1) return z
def create_model(input, num_classes): c_map = [16, 32, 64] num_stack_layers = 3 conv = conv_bn_relu(input, (3, 3), c_map[0]) r1 = resnet_basic_stack(conv, num_stack_layers, c_map[0]) r2_1 = resnet_basic_inc(r1, c_map[1]) r2_2 = resnet_basic_stack(r2_1, num_stack_layers - 1, c_map[1]) r3_1 = resnet_basic_inc(r2_2, c_map[2]) r3_2 = resnet_basic_stack(r3_1, num_stack_layers - 1, c_map[2]) up_r1 = OneByOneConvAndUpSample(r1, 0, num_classes) up_r2_2 = OneByOneConvAndUpSample(r2_2, 1, num_classes) up_r3_2 = OneByOneConvAndUpSample(r3_2, 2, num_classes) merged = C.splice(up_r1, up_r3_2, up_r2_2, axis=0) resnet_fcn_out = Convolution((1, 1), num_classes, init=he_normal(), activation=sigmoid, pad=True)(merged) return resnet_fcn_out
def conv_bn_layer(input, out_feature_map_count, kernel_shape, strides, bn_time_const, b_value=0, sc_value=1): num_in_channels = input.shape[0] kernel_width = kernel_shape[0] kernel_height = kernel_shape[1] v_stride = strides[0] h_stride = strides[1] #TODO: use RandomNormal to initialize, needs to be exposed in the python api conv_params = parameter(shape=(out_feature_map_count, num_in_channels, kernel_height, kernel_width), init=he_normal()) conv_func = convolution(conv_params, input, (num_in_channels, v_stride, h_stride)) #TODO: initialize using b_value and sc_value, needs to be exposed in the python api bias_params = parameter(shape=(out_feature_map_count), init=b_value) scale_params = parameter(shape=(out_feature_map_count), init=sc_value) running_mean = constant(0., (out_feature_map_count)) running_invstd = constant(0., (out_feature_map_count)) running_count = constant(0., (1)) return batch_normalization(conv_func, scale_params, bias_params, running_mean, running_invstd, running_count=running_count, spatial=True, normalization_time_constant=bn_time_const, use_cudnn_engine=True)
def convolution_bn(input, filter_size, num_filters, strides=(1,1), init=he_normal(), activation=relu): if activation is None: activation = lambda x: x r = Convolution(filter_size, num_filters, strides=strides, init=init, activation=None, pad=True, bias=False)(input) r = BatchNormalization(map_rank=1)(r) r = activation(r) return r
def OneByOneConvAndUpSample(x, k_power, num_channels): x = Convolution((1, 1), num_channels, init=he_normal(), activation=relu, pad=True)(x) x = UpSampling2DPower(x, k_power) return x
def conv_bn(input, filter_size, num_filters, strides=(1, 1), init=he_normal()): c = Convolution(filter_size, num_filters, activation=None, init=init, pad=True, strides=strides, bias=False)(input) r = BatchNormalization(map_rank=1, normalization_time_constant=4096, use_cntk_engine=False)(c) return r
def bn_inception_model(input, labelDim, bnTimeConst): # 224 x 224 x 3 conv1 = conv_bn_relu_layer(input, 64, (7, 7), (2, 2), True, bnTimeConst) # 112 x 112 x 64 pool1 = MaxPooling(filter_shape=(3, 3), strides=(2, 2), pad=True)(conv1) # 56 x 56 x 64 conv2a = conv_bn_relu_layer(pool1, 64, (1, 1), (1, 1), True, bnTimeConst) # 56 x 56 x 64 conv2b = conv_bn_relu_layer(conv2a, 192, (3, 3), (1, 1), True, bnTimeConst) # 56 x 56 x 192 pool2 = MaxPooling(filter_shape=(3, 3), strides=(2, 2), pad=True)(conv2b) # Inception Blocks # 28 x 28 x 192 inception3a = inception_block_with_avgpool(pool2, 64, 64, 64, 64, 96, 32, bnTimeConst) # 28 x 28 x 256 inception3b = inception_block_with_avgpool(inception3a, 64, 64, 96, 64, 96, 64, bnTimeConst) # 28 x 28 x 320 inception3c = inception_block_pass_through(inception3b, 0, 128, 160, 64, 96, 0, bnTimeConst) # 14 x 14 x 576 inception4a = inception_block_with_avgpool(inception3c, 224, 64, 96, 96, 128, 128, bnTimeConst) # 14 x 14 x 576 inception4b = inception_block_with_avgpool(inception4a, 192, 96, 128, 96, 128, 128, bnTimeConst) # 14 x 14 x 576 inception4c = inception_block_with_avgpool(inception4b, 160, 128, 160, 128, 160, 128, bnTimeConst) # 14 x 14 x 576 inception4d = inception_block_with_avgpool(inception4c, 96, 128, 192, 160, 192, 128, bnTimeConst) # 14 x 14 x 576 inception4e = inception_block_pass_through(inception4d, 0, 128, 192, 192, 256, 0, bnTimeConst) # 7 x 7 x 1024 inception5a = inception_block_with_avgpool(inception4e, 352, 192, 320, 160, 224, 128, bnTimeConst) # 7 x 7 x 1024 inception5b = inception_block_with_maxpool(inception5a, 352, 192, 320, 192, 224, 128, bnTimeConst) # Global Average # 7 x 7 x 1024 pool3 = AveragePooling(filter_shape=(7, 7))(inception5b) # 1 x 1 x 1024 z = Dense(labelDim, init=he_normal())(pool3) return z
def bn_inceptionv2_cifar_model(input, labelDim, bnTimeConst): # 32 x 32 x 3 conv1a = conv_bn_relu_layer(input, 32, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 32 conv1b = conv_bn_relu_layer(conv1a, 32, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 32 conv1c = conv_bn_relu_layer(conv1b, 32, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 64 conv2a = conv_bn_relu_layer(conv1c, 64, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 64 conv2b = conv_bn_relu_layer(conv2a, 64, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 64 conv2c = conv_bn_relu_layer(conv2b, 64, (3,3), (1,1), True, bnTimeConst) # Inception Blocks # 32 x 32 x 64 inception3a = inception_block1(conv2b, 32, 32, 32, 32, 48, 16, bnTimeConst) # 32 x 32 x 128 inception3b = inception_block1(inception3a, 0, 64, 80, 32, 48, 0, bnTimeConst) # 32 x 32 x 128 inception3c = inception_block1(inception3b, 0, 64, 80, 32, 48, 0, bnTimeConst) # 32 x 32 x 128 pool1 = AveragePooling(filter_shape=(3,3), strides = (2,2), pad = True)(inception3c) # 16 x 16 x 256 inception4a = inception_block2(inception3c, 96, 48, 64, 48, 64, 64, bnTimeConst) # 16 x 16 x 256 inception4b = inception_block2(inception4a, 96, 48, 64, 48, 64, 64, bnTimeConst) # 16 x 16 x 256 inception4c = inception_block2(inception4b, 96, 48, 64, 48, 64, 64, bnTimeConst) # 16 x 16 x 288 inception4d = inception_block2(inception4c, 48, 64, 96, 80, 96, 64, bnTimeConst) # 16 x 16 x 288 inception4e = inception_block2(inception4d, 0, 128, 192, 192, 256, 0, bnTimeConst) # 16 x 16 x 288 pool2 = AveragePooling(filter_shape=(3,3), strides = (2,2), pad = True)(inception4e) # Inception Blocks # 8 x 8 x 512 inception5a = inception_block1(pool2, 176, 96, 160, 96, 112, 64, bnTimeConst) # 8 x 8 x 512 inception5b = inception_block1(inception5a, 176, 96, 160, 96, 112, 64, bnTimeConst) # Global Average # 8 x 8 x 512 pool3 = AveragePooling(filter_shape=(8,8))(inception5a) # 1 x 1 x 512 z = Dense(labelDim, init=he_normal())(pool3) return z
def conv_dw(input, fillter_size, num_filters, strides=(1, 1), init=he_normal()): r = Convolution(fillter_size, num_filters, activation=None, init=init, pad=True, strides=strides, bias=False, groups=1)(input) print('r.shape ', r.shape) return relu(r)
def inceptionv1_cifar_model2(input, labelDim, bnTimeConst): # 32 x 32 x 3 conv1 = conv_bn_relu_layer(input, 32, (3, 3), (1, 1), True, bnTimeConst) # 32 x 32 x 32 conv2 = conv_bn_relu_layer(conv1, 32, (3, 3), (1, 1), True, bnTimeConst) # Inception Blocks # 32 x 32 x 64 inception3a = inception_block_with_maxpool(conv2, 32, 32, 32, 32, 48, 16, bnTimeConst) # 32 x 32 x 128 inception3b = inception_block_with_maxpool(inception3a, 32, 32, 32, 32, 48, 16, bnTimeConst) maxpool1 = MaxPooling((3, 3), strides=(2, 2), pad=True)(inception3b) # 16 x 16 x 128 inception4a = inception_block_with_maxpool(maxpool1, 96, 48, 64, 48, 64, 64, bnTimeConst) # 16 x 16 x 288 inception4b = inception_block_with_maxpool(inception4a, 96, 48, 64, 48, 64, 64, bnTimeConst) # 16 x 16 x 288 inception4c = inception_block_with_maxpool(inception4b, 96, 48, 64, 48, 64, 64, bnTimeConst) # 16 x 16 x 288 inception4d = inception_block_with_maxpool(inception4c, 96, 48, 64, 48, 64, 64, bnTimeConst) # 16 x 16 x 288 inception4e = inception_block_with_maxpool(inception4d, 96, 48, 64, 48, 64, 64, bnTimeConst) maxpool2 = MaxPooling((3, 3), strides=(2, 2), pad=True)(inception4e) # 8 x 8 x 288 inception5a = inception_block_with_maxpool(inception4e, 176, 96, 160, 96, 112, 64, bnTimeConst) # 8 x 8 x 512 inception5b = inception_block_with_maxpool(inception5a, 176, 96, 160, 96, 112, 64, bnTimeConst) # Global Average # 8 x 8 x 512 pool1 = AveragePooling(filter_shape=(8, 8))(inception5b) # 1 x 1 x 512 z = Dense(labelDim, init=he_normal())(pool1) return z
def create_resnet_model(input, num_classes): conv = convolution_bn(input, (3, 3), 16) r1_1 = resnet_basic_stack(conv, 16, 3) r2_1 = resnet_basic_inc(r1_1, 32) r2_2 = resnet_basic_stack(r2_1, 32, 2) r3_1 = resnet_basic_inc(r2_2, 64) r3_2 = resnet_basic_stack(r3_1, 64, 2) pool = GlobalAveragePooling()(r3_2) net = Dense(num_classes, init=he_normal(), activation=None)(pool) return net
def create_resnet_model(input, num_classes): conv = convolution_bn(input, (3,3), 16) r1_1 = resnet_basic_stack(conv, 16, 3) r2_1 = resnet_basic_inc(r1_1, 32) r2_2 = resnet_basic_stack(r2_1, 32, 2) r3_1 = resnet_basic_inc(r2_2, 64) r3_2 = resnet_basic_stack(r3_1, 64, 2) pool = GlobalAveragePooling()(r3_2) net = Dense(num_classes, init=he_normal(), activation=None)(pool) return net
def create_resnet_model(input, num_classes): conv = convolution_bn(input, (3, 3), 16) r1_1 = resnet_basic_stack(conv, 16, 3) r2_1 = resnet_basic_inc(r1_1, 32) r2_2 = resnet_basic_stack(r2_1, 32, 2) r3_1 = resnet_basic_inc(r2_2, 64) r3_2 = resnet_basic_stack(r3_1, 64, 2) # Global average pooling pool = AveragePooling(filter_shape=(8, 8), strides=(1, 1))(r3_2) net = Dense(num_classes, init=he_normal(), activation=None)(pool) return net
def create_cifar10_model(input, num_classes): c_map = [32, 64, 128] conv = conv_bn_relu(input, (3, 3), c_map[0]) r1 = mobilenet_basic_stack(conv, 3, c_map[0]) r2_1 = mobilenet_basic_inc(r1, c_map[1]) r2_2 = mobilenet_basic_stack(r2_1, 3, c_map[1]) r3_1 = mobilenet_basic_inc(r2_2, c_map[2]) r3_2 = mobilenet_basic_stack(r3_1, 3, c_map[2]) # Global average pooling and output pool = AveragePooling(filter_shape=(8, 8))(r3_2) z = Dense(num_classes, init=he_normal())(pool) return z
def conv_bn_relu(layer_input, filter_size, num_filters, strides, init=he_normal(), name=''): """ Returns a convolutional layer followed by a batch normalization layer and then ReLU activation """ r = conv_bn(layer_input, filter_size, num_filters, strides, init, name=name) return relu(r, name='{}_relu'.format(name))
def constructCNN(self, cntk_input): self.cntk_model = None if self.network_type == 'idsia': with C.layers.default_options(activation=C.relu): self.cntk_model = C.layers.Sequential([ C.layers.Convolution((3,3), strides=(1,1), num_filters=100, pad=False, init=he_normal(), name="cntk_conv1"), C.layers.MaxPooling((2,2), strides=(2,2), name="cntk_pool1"), C.layers.Convolution((4,4), strides=(1,1), num_filters=150, pad=False, init=he_normal(), name="cntk_conv2"), C.layers.MaxPooling((2,2), strides=(2,2), name="cntk_pool2"), C.layers.Convolution((3,3), strides=(1,1), num_filters=250, pad=False, init=he_normal(), name="cntk_conv3"), C.layers.MaxPooling((2,2), strides=(2,2), name="cntk_pool3"), C.layers.Dense(200, init=he_normal(), name="cntk_fc1"), C.layers.Dense(self.class_num, activation=None, init=he_normal(), name="cntk_fc2") # Leave the softmax for now ])(cntk_input) elif self.network_type == 'self': with C.layers.default_options(activation=C.relu): self.cntk_model = C.layers.Sequential([ C.layers.Convolution((5,5), strides=(2,2), num_filters=64, pad=True, init=he_normal(), name="cntk_conv1"), C.layers.MaxPooling((2,2), strides=(2,2), name="cntk_pool1"), C.layers.Convolution((3,3), strides=(1,1), num_filters=256, pad=True, init=he_normal(), name="cntk_conv2"), C.layers.MaxPooling((2,2), strides=(2,2), name="cntk_pool2"), C.layers.Dense(2048, init=he_normal(), name="cntk_fc1"), C.layers.Dropout(0.5, name="cntk_dropout1"), C.layers.Dense(self.class_num, activation=None, init=he_normal(), name="cntk_fc2") # Leave the softmax for now ])(cntk_input) elif self.network_type == "resnet-56": self.cntk_model = cntk_resnet.create_model(cntk_input, 9, self.class_num) # 6*9 + 2 = 56 elif self.network_type == "resnet-32": self.cntk_model = cntk_resnet.create_model(cntk_input, 5, self.class_num) # 6*5 + 2 = 32 elif self.network_type == "resnet-20": self.cntk_model = cntk_resnet.create_model(cntk_input, 3, self.class_num) # 6*3 + 2 = 20
def conv_bn_relu_layer(input, num_filters, filter_size, strides=(1, 1), pad=True, bnTimeConst=4096, init=he_normal()): conv = Convolution(filter_size, num_filters, activation=None, init=init, pad=pad, strides=strides, bias=False)(input) bn = BatchNormalization(map_rank=1, normalization_time_constant=bnTimeConst, use_cntk_engine=False)(conv) return relu(bn)
def bn_inception_model(input, labelDim, bnTimeConst): # 224 x 224 x 3 conv1 = conv_bn_relu_layer(input, 64, (7,7), (2,2), True, bnTimeConst) # 112 x 112 x 64 pool1 = MaxPooling(filter_shape=(3,3), strides=(2,2), pad=True)(conv1) # 56 x 56 x 64 conv2a = conv_bn_relu_layer(pool1, 64, (1,1), (1,1), True, bnTimeConst) # 56 x 56 x 64 conv2b = conv_bn_relu_layer(conv2a, 192, (3,3), (1,1), True, bnTimeConst) # 56 x 56 x 192 pool2 = MaxPooling(filter_shape=(3,3), strides=(2,2), pad=True)(conv2b) # Inception Blocks # 28 x 28 x 192 inception3a = inception_block_with_avgpool(pool2, 64, 64, 64, 64, 96, 32, bnTimeConst) # 28 x 28 x 256 inception3b = inception_block_with_avgpool(inception3a, 64, 64, 96, 64, 96, 64, bnTimeConst) # 28 x 28 x 320 inception3c = inception_block_pass_through(inception3b, 0, 128, 160, 64, 96, 0, bnTimeConst) # 14 x 14 x 576 inception4a = inception_block_with_avgpool(inception3c, 224, 64, 96, 96, 128, 128, bnTimeConst) # 14 x 14 x 576 inception4b = inception_block_with_avgpool(inception4a, 192, 96, 128, 96, 128, 128, bnTimeConst) # 14 x 14 x 576 inception4c = inception_block_with_avgpool(inception4b, 160, 128, 160, 128, 160, 128, bnTimeConst) # 14 x 14 x 576 inception4d = inception_block_with_avgpool(inception4c, 96, 128, 192, 160, 192, 128, bnTimeConst) # 14 x 14 x 576 inception4e = inception_block_pass_through(inception4d, 0, 128, 192, 192, 256, 0, bnTimeConst) # 7 x 7 x 1024 inception5a = inception_block_with_avgpool(inception4e, 352, 192, 320, 160, 224, 128, bnTimeConst) # 7 x 7 x 1024 inception5b = inception_block_with_maxpool(inception5a, 352, 192, 320, 192, 224, 128, bnTimeConst) # Global Average # 7 x 7 x 1024 pool3 = AveragePooling(filter_shape=(7,7))(inception5b) # 1 x 1 x 1024 z = Dense(labelDim, init=he_normal())(pool3) return z
def conv_bn(layer_input, filter_size, num_filters, strides, init=he_normal(), name=''): """ Returns a convolutional layer followed by a batch normalization layer """ r = Convolution(filter_size, num_filters, activation=None, init=init, pad=True, strides=strides, bias=True, name=name)(layer_input) r = BatchNormalization(map_rank=1, normalization_time_constant=4096, name='{}_bn'.format(name))(r) return r
def inception_v4_model(input, labelDim, bnTimeConst): l = pre_block(input, bnTimeConst) # 32 x 32 A1 = Inception_A(l, 32, 32, 32, 64, 32, 64, bnTimeConst) A2 = Inception_A(A1, 32, 32, 32, 64, 32, 64, bnTimeConst) A3 = Inception_A(A2, 32, 32, 32, 64, 32, 64, bnTimeConst) A4 = Inception_A(A3, 32, 32, 32, 64, 32, 64, bnTimeConst) # 32 x 32 x 192 RA = reduction_A(A4, 32, 32, 64, 64, bnTimeConst) # 16 x 16 x 288 B1 = Inception_B(RA, 128, 32, 32, 64, 96, 32, 64, 96, bnTimeConst) B2 = Inception_B(B1, 128, 32, 32, 64, 96, 32, 64, 96, bnTimeConst) B3 = Inception_B(B2, 128, 32, 32, 64, 96, 32, 64, 96, bnTimeConst) B4 = Inception_B(B3, 128, 32, 32, 64, 96, 32, 64, 96, bnTimeConst) B5 = Inception_B(B4, 128, 32, 32, 64, 96, 32, 64, 96, bnTimeConst) B6 = Inception_B(B5, 128, 32, 32, 64, 96, 32, 64, 96, bnTimeConst) B7 = Inception_B(B6, 128, 32, 32, 64, 96, 32, 64, 96, bnTimeConst) # 16 x 16 x 352 RB = reduction_B(B7, 64, 64, 96, bnTimeConst) # 8 x 8 x 512 C1 = Inception_C(RB, 128, 128, 96, 64, 64, 128, 160, 64, bnTimeConst) C2 = Inception_C(C1, 128, 128, 96, 64, 64, 128, 160, 64, bnTimeConst) C3 = Inception_C(C2, 128, 128, 96, 64, 64, 128, 160, 64, bnTimeConst) # 8 x 8 x 512 pool1 = AveragePooling(filter_shape=(8, 8))(C3) # 1 x 1 x 512 z = Dense(labelDim, init=he_normal())(pool1) return z
def gaussian(scale=1): return he_normal(scale=scale * math.sqrt(0.02))
def inception_v3_model(input, labelDim, dropRate, bnTimeConst): # 299 x 299 x 3 conv1 = conv_bn_relu_layer(input, 32, (3, 3), (2, 2), False, bnTimeConst) # 149 x 149 x 32 conv2 = conv_bn_relu_layer(conv1, 32, (3, 3), (1, 1), False, bnTimeConst) # 147 x 147 x 32 conv3 = conv_bn_relu_layer(conv2, 64, (3, 3), (1, 1), True, bnTimeConst) # 147 x 147 x 64 pool1 = MaxPooling(filter_shape=(3, 3), strides=(2, 2), pad=False)(conv3) # 73 x 73 x 64 conv4 = conv_bn_relu_layer(pool1, 80, (1, 1), (1, 1), False, bnTimeConst) # 73 x 73 x 80 conv5 = conv_bn_relu_layer(conv4, 192, (3, 3), (1, 1), False, bnTimeConst) # 71 x 71 x 192 pool2 = MaxPooling(filter_shape=(3, 3), strides=(2, 2), pad=False)(conv5) # 35 x 35 x 192 # # Inception Blocks # mixed1 = inception_block_1(pool2, 64, [48, 64], [64, 96, 96], 32, bnTimeConst) # 35 x 35 x 256 mixed2 = inception_block_1(mixed1, 64, [48, 64], [64, 96, 96], 64, bnTimeConst) # 35 x 35 x 288 mixed3 = inception_block_1(mixed2, 64, [48, 64], [64, 96, 96], 64, bnTimeConst) # 35 x 35 x 288 mixed4 = inception_block_2(mixed3, 384, [64, 96, 96], bnTimeConst) # 17 x 17 x 768 mixed5 = inception_block_3(mixed4, 192, [128, 128, 192], [128, 128, 128, 128, 192], 192, bnTimeConst) # 17 x 17 x 768 mixed6 = inception_block_3(mixed5, 192, [160, 160, 192], [160, 160, 160, 160, 192], 192, bnTimeConst) # 17 x 17 x 768 mixed7 = inception_block_3(mixed6, 192, [160, 160, 192], [160, 160, 160, 160, 192], 192, bnTimeConst) # 17 x 17 x 768 mixed8 = inception_block_3(mixed7, 192, [192, 192, 192], [192, 192, 192, 192, 192], 192, bnTimeConst) # 17 x 17 x 768 mixed9 = inception_block_4(mixed8, [192, 320], [192, 192, 192, 192], bnTimeConst) # 8 x 8 x 1280 mixed10 = inception_block_5(mixed9, 320, [384, 384, 384], [448, 384, 384, 384], 192, bnTimeConst) # 8 x 8 x 2048 mixed11 = inception_block_5(mixed10, 320, [384, 384, 384], [448, 384, 384, 384], 192, bnTimeConst) # 8 x 8 x 2048 # # Prediction # pool3 = AveragePooling(filter_shape=(8, 8), pad=False)(mixed11) # 1 x 1 x 2048 drop = Dropout(dropout_rate=dropRate)(pool3) # 1 x 1 x 2048 z = Dense(labelDim, init=he_normal())(drop) # # Auxiliary # # 17 x 17 x 768 auxPool = AveragePooling(filter_shape=(5, 5), strides=(3, 3), pad=False)(mixed8) # 5 x 5 x 768 auxConv1 = conv_bn_relu_layer(auxPool, 128, (1, 1), (1, 1), True, bnTimeConst) # 5 x 5 x 128 auxConv2 = conv_bn_relu_layer(auxConv1, 768, (5, 5), (1, 1), False, bnTimeConst) # 1 x 1 x 768 aux = Dense(labelDim, init=he_normal())(auxConv2) return {'z': z, 'aux': aux}
def inception_v3_cifar_model(input, labelDim, bnTimeConst): # 32 x 32 x 3 conv1 = conv_bn_relu_layer(input, 32, (3, 3), (1, 1), True, bnTimeConst) # 32 x 32 x 32 conv2 = conv_bn_relu_layer(conv1, 32, (3, 3), (1, 1), True, bnTimeConst) # 32 x 32 x 32 conv3 = conv_bn_relu_layer(conv2, 64, (3, 3), (1, 1), True, bnTimeConst) # 32 x 32 x 64 conv4 = conv_bn_relu_layer(conv3, 80, (1, 1), (1, 1), True, bnTimeConst) # 32 x 32 x 80 conv5 = conv_bn_relu_layer(conv4, 128, (3, 3), (1, 1), True, bnTimeConst) # 32 x 32 x 128 pool1 = MaxPooling(filter_shape=(3, 3), strides=(2, 2), pad=True)(conv5) # # Inception Blocks # 16 x 16 x 128 mixed1 = inception_block_1(pool1, 32, [32, 48], [48, 64, 64], 32, bnTimeConst) # 16 x 16 x 160 mixed2 = inception_block_1(mixed1, 32, [32, 48], [48, 64, 64], 64, bnTimeConst) # 16 x 16 x 160 mixed3 = inception_block_1(mixed2, 32, [32, 48], [48, 64, 64], 64, bnTimeConst) # 16 x 16 x 160 #mixed4 = inception_block_2(mixed3, 32, [48, 64, 64], bnTimeConst) mixed4 = inception_block_pass_through(mixed3, 0, 64, 80, 32, 48, 0, bnTimeConst) # 8 x 8 x 256 mixed5 = inception_block_3(mixed4, 192, [48, 64, 64], [128, 128, 128, 128, 192], 192, bnTimeConst) # 8 x 8 x mixed6 = inception_block_3(mixed5, 192, [160, 160, 192], [160, 160, 160, 160, 192], 192, bnTimeConst) # 8 x 8 x 768 mixed7 = inception_block_3(mixed6, 192, [160, 160, 192], [160, 160, 160, 160, 192], 192, bnTimeConst) # 8 x 8 x 768 mixed8 = inception_block_3(mixed7, 192, [192, 192, 192], [192, 192, 192, 192, 192], 192, bnTimeConst) # 8 x 8 x 768 mixed9 = inception_block_3(mixed8, 192, [192, 192, 192], [192, 192, 192, 192, 192], 192, bnTimeConst) # 8 x 8 x 1280 mixed10 = inception_block_5(mixed9, 320, [384, 384, 384], [448, 384, 384, 384], 192, bnTimeConst) # 8 x 8 x 2048 mixed11 = inception_block_5(mixed10, 320, [384, 384, 384], [448, 384, 384, 384], 192, bnTimeConst) # 8 x 8 x 2048 # # Prediction # pool3 = AveragePooling(filter_shape=(8, 8))(mixed11) # 1 x 1 x 2048 drop = Dropout(dropout_rate=0.2)(pool3) # 1 x 1 x 2048 z = Dense(labelDim, init=he_normal())(drop) # # Auxiliary # # 8 x 8 x 768 auxPool = AveragePooling(filter_shape=(3, 3), strides=(1, 1), pad=True)(mixed8) # 5 x 5 x 768 auxConv1 = conv_bn_relu_layer(auxPool, 128, (1, 1), (1, 1), True, bnTimeConst) # 5 x 5 x 128 auxConv2 = conv_bn_relu_layer(auxConv1, 256, (3, 3), (1, 1), True, bnTimeConst) # 1 x 1 x 768 aux = Dense(labelDim, init=he_normal())(auxConv2) return {'z': z, 'aux': aux}
def conv_bn_relu_layer(input, num_filters, filter_size, strides=(1,1), pad=True, bnTimeConst=4096, init=he_normal()): conv = Convolution(filter_size, num_filters, activation=None, init=init, pad=pad, strides=strides, bias=False)(input) bn = BatchNormalization(map_rank=1, normalization_time_constant=bnTimeConst, use_cntk_engine=False)(conv) return relu(bn)
def inception_v3_model(input, labelDim, dropRate, bnTimeConst): # 299 x 299 x 3 conv1 = conv_bn_relu_layer(input, 32, (3,3), (2,2), False, bnTimeConst) # 149 x 149 x 32 conv2 = conv_bn_relu_layer(conv1, 32, (3,3), (1,1), False, bnTimeConst) # 147 x 147 x 32 conv3 = conv_bn_relu_layer(conv2, 64, (3,3), (1,1), True, bnTimeConst) # 147 x 147 x 64 pool1 = MaxPooling(filter_shape=(3,3), strides=(2,2), pad=False)(conv3) # 73 x 73 x 64 conv4 = conv_bn_relu_layer(pool1, 80, (1,1), (1,1), False, bnTimeConst) # 73 x 73 x 80 conv5 = conv_bn_relu_layer(conv4, 192, (3,3), (1,1), False, bnTimeConst) # 71 x 71 x 192 pool2 = MaxPooling(filter_shape=(3,3), strides=(2,2), pad=False)(conv5) # 35 x 35 x 192 # # Inception Blocks # mixed1 = inception_block_1(pool2, 64, [48, 64], [64, 96, 96], 32, bnTimeConst) # 35 x 35 x 256 mixed2 = inception_block_1(mixed1, 64, [48, 64], [64, 96, 96], 64, bnTimeConst) # 35 x 35 x 288 mixed3 = inception_block_1(mixed2, 64, [48, 64], [64, 96, 96], 64, bnTimeConst) # 35 x 35 x 288 mixed4 = inception_block_2(mixed3, 384, [64, 96, 96], bnTimeConst) # 17 x 17 x 768 mixed5 = inception_block_3(mixed4, 192, [128, 128, 192], [128, 128, 128, 128, 192], 192, bnTimeConst) # 17 x 17 x 768 mixed6 = inception_block_3(mixed5, 192, [160, 160, 192], [160, 160, 160, 160, 192], 192, bnTimeConst) # 17 x 17 x 768 mixed7 = inception_block_3(mixed6, 192, [160, 160, 192], [160, 160, 160, 160, 192], 192, bnTimeConst) # 17 x 17 x 768 mixed8 = inception_block_3(mixed7, 192, [192, 192, 192], [192, 192, 192, 192, 192], 192, bnTimeConst) # 17 x 17 x 768 mixed9 = inception_block_4(mixed8, [192, 320], [192, 192, 192, 192], bnTimeConst) # 8 x 8 x 1280 mixed10 = inception_block_5(mixed9, 320, [384, 384, 384], [448, 384, 384, 384], 192, bnTimeConst) # 8 x 8 x 2048 mixed11 = inception_block_5(mixed10, 320, [384, 384, 384], [448, 384, 384, 384], 192, bnTimeConst) # 8 x 8 x 2048 # # Prediction # pool3 = AveragePooling(filter_shape=(8,8), pad=False)(mixed11) # 1 x 1 x 2048 drop = Dropout(dropout_rate=dropRate)(pool3) # 1 x 1 x 2048 z = Dense(labelDim, init=he_normal())(drop) # # Auxiliary # # 17 x 17 x 768 auxPool = AveragePooling(filter_shape=(5,5), strides=(3,3), pad=False)(mixed8) # 5 x 5 x 768 auxConv1 = conv_bn_relu_layer(auxPool, 128, (1,1), (1,1), True, bnTimeConst) # 5 x 5 x 128 auxConv2 = conv_bn_relu_layer(auxConv1, 768, (5,5), (1,1), False, bnTimeConst) # 1 x 1 x 768 aux = Dense(labelDim, init=he_normal())(auxConv2) return { 'z': z, 'aux': aux }
def inception_v3_cifar_model(input, labelDim, bnTimeConst): # 32 x 32 x 3 conv1 = conv_bn_relu_layer(input, 32, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 32 conv2 = conv_bn_relu_layer(conv1, 32, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 32 conv3 = conv_bn_relu_layer(conv2, 64, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 64 conv4 = conv_bn_relu_layer(conv3, 80, (1,1), (1,1), True, bnTimeConst) # 32 x 32 x 80 conv5 = conv_bn_relu_layer(conv4, 128, (3,3), (1,1), True, bnTimeConst) # 32 x 32 x 128 pool1 = MaxPooling(filter_shape=(3,3), strides=(2,2), pad=True)(conv5) # # Inception Blocks # 16 x 16 x 128 mixed1 = inception_block_1(pool1, 32, [32, 48], [48, 64, 64], 32, bnTimeConst) # 16 x 16 x 176 mixed2 = inception_block_1(mixed1, 32, [32, 48], [48, 64, 64], 32, bnTimeConst) # 16 x 16 x 176 mixed3 = inception_block_1(mixed2, 32, [32, 48], [48, 64, 64], 32, bnTimeConst) # 16 x 16 x 176 mixed4 = inception_block_pass_through(mixed3, 0, 32, 48, 32, 48, 0, bnTimeConst) # 8 x 8 x 256 mixed5 = inception_block_3(mixed4, 64, [48, 64, 64], [48, 48, 48, 48, 64], 64, bnTimeConst) # 8 x 8 x 256 mixed6 = inception_block_3(mixed5, 64, [48, 64, 64], [48, 48, 48, 48, 64], 64, bnTimeConst) # 8 x 8 x 256 mixed7 = inception_block_3(mixed6, 64, [48, 64, 64], [48, 48, 48, 48, 64], 64, bnTimeConst) # 8 x 8 x 256 mixed8 = inception_block_3(mixed7, 80, [48, 64, 64], [48, 48, 48, 48, 64], 80, bnTimeConst) # 8 x 8 x 288 mixed9 = inception_block_3(mixed8, 128, [64, 128, 128], [64, 64, 64, 64, 128], 128, bnTimeConst) # 8 x 8 x 512 mixed10 = inception_block_5(mixed9, 128, [64, 128, 128], [64, 64, 64, 128], 128, bnTimeConst) # 8 x 8 x 512 mixed11 = inception_block_5(mixed10, 128, [64, 128, 128], [64, 64, 64, 128], 128, bnTimeConst) # 8 x 8 x 512 # # Prediction # pool2 = AveragePooling(filter_shape=(8,8))(mixed11) # 1 x 1 x 512 z = Dense(labelDim, init=he_normal())(pool2) # # Auxiliary # # 8 x 8 x 288 auxPool = AveragePooling(filter_shape=(3,3), strides=(1,1), pad=True)(mixed8) # 8 x 8 x 288 auxConv1 = conv_bn_relu_layer(auxPool, 320, (1,1), (1,1), True, bnTimeConst) # 8 x 8 x 320 auxConv2 = conv_bn_relu_layer(auxConv1, 512, (3,3), (1,1), True, bnTimeConst) # 8 x 8 x 512 aux = Dense(labelDim, init=he_normal())(auxConv2) return { 'z': z, 'aux': aux }
def conv_bn(input, filter_size, num_filters, strides=(1,1), init=he_normal()): c = Convolution(filter_size, num_filters, activation=None, init=init, pad=True, strides=strides, bias=False)(input) r = BatchNormalization(map_rank=1, normalization_time_constant=4096, use_cntk_engine=False)(c) return r
def conv_bn_relu(input, filter_size, num_filters, strides=(1,1), init=he_normal()): r = conv_bn(input, filter_size, num_filters, strides, init) return relu(r)
def conv_bn_layer(input, out_feature_map_count, kernel_shape, strides, bn_time_const, b_value=0, sc_value=1): num_in_channels = input.shape[0] kernel_width = kernel_shape[0] kernel_height = kernel_shape[1] v_stride = strides[0] h_stride = strides[1] # TODO: use RandomNormal to initialize, needs to be exposed in the python api conv_params = parameter( shape=(out_feature_map_count, num_in_channels, kernel_height, kernel_width), init=he_normal() ) conv_func = convolution(conv_params, input, (num_in_channels, v_stride, h_stride)) # TODO: initialize using b_value and sc_value, needs to be exposed in the python api bias_params = parameter(shape=(out_feature_map_count), init=b_value) scale_params = parameter(shape=(out_feature_map_count), init=sc_value) running_mean = constant(0.0, (out_feature_map_count)) running_invstd = constant(0.0, (out_feature_map_count)) return batch_normalization( conv_func, scale_params, bias_params, running_mean, running_invstd, True, bn_time_const, use_cudnn_engine=True )