def augmentation(h, test, aug): if aug is None: aug = not test if aug: h = F.image_augmentation(h, (1, 28, 28), (0, 0), 0.9, 1.1, 0.3, 1.3, 0.1, False, False, 0.5, False, 1.5, 0.5, False, 0.1, 0) return h
def image_augmentation(image): h = F.image_augmentation(image, shape=image.shape, min_scale=1.0, max_scale=286.0/256.0, # == 1.1171875 flip_lr=True) h.persistent = True return h
def image_augmentation(image): return F.image_augmentation( image, shape=image.shape, min_scale=1.0, max_scale=286.0 / 256.0, # == 1.1171875 flip_lr=True, seed=rng_seed)
def vectorizer(x, maxh=256, test=False, output_hidden=False): """ Building discriminator network which maps a (B, 1, 28, 28) input to a (B, 100). """ # Define shortcut functions def bn(xx): # Batch normalization return PF.batch_normalization(xx, batch_stat=not test) def downsample2(xx, c): return PF.convolution(xx, c, (3, 3), pad=(1, 1), stride=(2, 2), with_bias=False) assert maxh / 8 > 0 with nn.parameter_scope("dis"): # (1, 28, 28) --> (32, 16, 16) if not test: x_ = F.image_augmentation(x, min_scale=0.9, max_scale=1.08) x2 = F.random_shift(x_, (2, 2)) with nn.parameter_scope("conv1"): c1 = F.elu( bn( PF.convolution(x2, maxh / 8, (3, 3), pad=(3, 3), stride=(2, 2), with_bias=False))) else: with nn.parameter_scope("conv1"): c1 = F.elu( bn( PF.convolution(x, maxh / 8, (3, 3), pad=(3, 3), stride=(2, 2), with_bias=False))) # (32, 16, 16) --> (64, 8, 8) with nn.parameter_scope("conv2"): c2 = F.elu(bn(downsample2(c1, maxh / 4))) # (64, 8, 8) --> (128, 4, 4) with nn.parameter_scope("conv3"): c3 = F.elu(bn(downsample2(c2, maxh / 2))) # (128, 4, 4) --> (256, 4, 4) with nn.parameter_scope("conv4"): c4 = bn( PF.convolution(c3, maxh, (3, 3), pad=(1, 1), with_bias=False)) # (256, 4, 4) --> (1,) with nn.parameter_scope("fc1"): f = PF.affine(c4, 100) if output_hidden: return f, [c1, c2, c3, c4] return f
def test_image_augmentation_forward(seed, shape, ctx, func_name): rng = np.random.RandomState(seed) inputs = [rng.randn(16, 3, 8, 8).astype(np.float32)] i = nn.Variable(inputs[0].shape) # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i) assert o.d.shape == inputs[0].shape with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i, shape=shape, pad=(2, 2), min_scale=0.8, max_scale=1.2, angle=0.2, aspect_ratio=1.1, distortion=0.1, flip_lr=True, flip_ud=False, brightness=0.1, brightness_each=True, contrast=1.1, contrast_center=0.5, contrast_each=True, noise=0.1, seed=0) assert o.d.shape == (inputs[0].shape[0],) + shape
def construct_networks(args, ops, arch_dict, image, test): """ Construct a network by stacking cells. input: args: arguments set by user. ops: operations used in the network. arch_dict: a dictionary containing architecture information. image: Variable. Input images. test: bool. True if the network is for validation. """ num_of_cells = args.num_cells initial_output_filter = args.output_filter + args.additional_filters_on_retrain num_class = 10 aux_logits = None if not test: image = F.random_crop(F.pad(image, (4, 4, 4, 4)), shape=(image.shape)) image = F.image_augmentation(image, flip_lr=True) image.need_grad = False x = image with nn.parameter_scope("stem_conv1"): stem_1 = PF.convolution(x, initial_output_filter, (3, 3), (1, 1), with_bias=False) stem_1 = PF.batch_normalization(stem_1, batch_stat=not test) cell_prev, cell_prev_prev = stem_1, stem_1 output_filter = initial_output_filter is_reduced_curr, is_reduced_prev = False, False for i in range(num_of_cells): if i in [num_of_cells // 3, 2 * num_of_cells // 3]: output_filter = 2 * output_filter is_reduced_curr = True else: is_reduced_curr = False y, is_reduced_curr, is_reduced_prev, output_filter = \ constructing_learned_cell(args, ops, arch_dict, i, cell_prev_prev, cell_prev, output_filter, is_reduced_curr, is_reduced_prev, test) if i == 2 * num_of_cells // 3 and args.auxiliary and not test: print("Using Aux Tower after cell_{}".format(i)) aux_logits = construct_aux_head(y, num_class) cell_prev, cell_prev_prev = y, cell_prev # shifting y = F.average_pooling(y, y.shape[2:]) # works as global average pooling with nn.parameter_scope("fc"): pred = PF.affine(y, num_class, with_bias=True) return pred, aux_logits
def test_image_augmentation_forward(seed, ctx, func_name): rng = np.random.RandomState(seed) inputs = [rng.randn(16, 3, 8, 8).astype(np.float32)] i = nn.Variable(inputs[0].shape) # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i) assert o.d.shape == inputs[0].shape shape = (3, 5, 8) with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i, shape=shape, pad=(2, 2), min_scale=0.8, max_scale=1.2, angle=0.2, aspect_ratio=1.1, distortion=0.1, flip_lr=True, flip_ud=False, brightness=0.1, brightness_each=True, contrast=1.1, contrast_center=0.5, contrast_each=True, noise=0.1, seed=0) assert o.d.shape == (inputs[0].shape[0],) + shape
def resnet23_prediction(image, test=False, rng=None, ncls=10, nmaps=64, act=F.relu): """ Construct ResNet 23 """ # Residual Unit def res_unit(x, scope_name, rng, dn=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Nonlinear with nn.parameter_scope("conv1"): h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = act(h) # Conv -> BN -> Nonlinear with nn.parameter_scope("conv2"): h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = act(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Nonlinear h = act(F.add2(h, x, inplace=True)) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h # Conv -> BN -> Nonlinear with nn.parameter_scope("conv1"): # Preprocess if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = act(h) h = res_unit(h, "conv2", rng, False) # -> 32x32 h = res_unit(h, "conv3", rng, True) # -> 16x16 h = res_unit(h, "conv4", rng, False) # -> 16x16 h = res_unit(h, "conv5", rng, True) # -> 8x8 h = res_unit(h, "conv6", rng, False) # -> 8x8 h = res_unit(h, "conv7", rng, True) # -> 4x4 h = res_unit(h, "conv8", rng, False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, ncls, rng=rng) return pred
def test_image_augmentation_forward(seed, shape, ctx, func_name): rng = np.random.RandomState(seed) inputs = [rng.randn(16, 3, 8, 8).astype(np.float32)] i = nn.Variable(inputs[0].shape) # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i) assert o.d.shape == inputs[0].shape func_kargs = { 'shape': shape, 'pad': (2, 2), 'min_scale': 0.8, 'max_scale': 1.2, 'angle': 0.2, 'aspect_ratio': 1.1, 'distortion': 0.1, 'flip_lr': True, 'flip_ud': False, 'brightness': 0.1, 'brightness_each': True, 'contrast': 1.1, 'contrast_center': 0.5, 'contrast_each': True, 'noise': 0.1, 'seed': 0} with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i, **func_kargs) assert o.d.shape == (inputs[0].shape[0],) + shape # Checking recomputation from nbla_test_utils import recomputation_test recomputation_test(rng=rng, func=F.image_augmentation, vinputs=[i], func_args=[], func_kwargs=func_kargs, ctx=ctx) func_kargs['seed'] = -1 recomputation_test(rng=rng, func=F.image_augmentation, vinputs=[i], func_args=[], func_kwargs=func_kargs, ctx=ctx)
def cnn_model_003(ctx, h, act=F.elu, do=True, test=False): with nn.context_scope(ctx): if not test: b, c, s, s = h.shape h = F.image_augmentation(h, (c, s, s), min_scale=1.0, max_scale=1.5, angle=0.5, aspect_ratio=1.3, distortion=0.2, flip_lr=True) # Convblock0 h = conv_unit(h, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) u = h # Convblock 3 h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) # Uncertainty u = conv_unit(u, "u0", 10, k=1, s=1, p=0, act=act, test=test) u = F.average_pooling(u, (6, 6)) with nn.parameter_scope("u0bn"): u = PF.batch_normalization(u, batch_stat=not test) log_var = F.reshape(u, (u.shape[0], np.prod(u.shape[1:]))) return pred, log_var
def image_preprocess(image, img_size=224, data_size=320, test=False): h, w = image.shape[2:] image = image / 255.0 if test: _img_size = data_size * 0.875 # Ratio of size is 87.5% hs = (h - _img_size) / 2 ws = (w - _img_size) / 2 he = (h + _img_size) / 2 we = (w + _img_size) / 2 image = F.slice(image, (0, ws, hs), (3, we, he), (1, 1, 1)) image = F.image_augmentation(image, (3, img_size, img_size), min_scale=0.8, max_scale=0.8) else: size = min(h, w) min_size = img_size * 1.1 max_size = min_size * 2 min_scale = min_size / size max_scale = max_size / size image = F.image_augmentation(image, (3, img_size, img_size), pad=(0, 0), min_scale=min_scale, max_scale=max_scale, angle=0.5, aspect_ratio=1.3, distortion=0.2, flip_lr=True, flip_ud=False, brightness=0.0, brightness_each=True, contrast=1.1, contrast_center=0.5, contrast_each=True, noise=0.0) image = image - 0.5 return image
def image_preprocess(image, img_size=224): h, w = image.shape[2:] size = min(h, w) min_size = img_size * 1.1 max_size = min_size * 2 min_scale = min_size / size max_scale = max_size / size image = F.image_augmentation(image, (3, img_size, img_size), min_scale=min_scale, max_scale=max_scale, angle=0.5, aspect_ratio=1.3, distortion=0.2, flip_lr=True, brightness=25.5, brightness_each=True, contrast=1.1, contrast_center=128.0, contrast_each=True, noise=25.5) image = image - 128 return image
def cifar10_resnet2rnn_prediction(image, maps=64, unrolls=[3, 3, 4], res_unit=res_unit_default, test=False): """ Construct ResNet 23 with depth-wise convolution. References ---------- Qianli Liao and Tomaso Poggio, "Bridging the Gaps Between Residual Learning, Recurrent Neural Networks and Visual Cortex", https://arxiv.org/abs/1604.03640 """ ncls = 10 # Conv -> BN -> Relu with nn.parameter_scope("conv1"): # Preprocess image /= 255.0 if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.convolution(image, maps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # ResUnit2RNN for i, u in enumerate(unrolls): for n in range(u): h = res_unit(h, "block{}".format(i), n, test) h = F.average_pooling(h, kernel=h.shape[-2:]) pred = PF.affine(h, ncls) return pred
def construct_architecture(image, num_class, num_cells, num_nodes, both_archs, output_filter, test): """ Construct an architecture based on the given lists. Note that first 2 layers are stem conv and have nothing to do with node operations. """ conv_arch, reduc_arch = both_archs aux_logits = None used_weights = set() pool_distance = num_cells // 3 pool_layers = [pool_distance - 1, 2*pool_distance - 1] pool_layers = [_ for _ in pool_layers if _ > 0] if len(pool_layers) > 0: aux_head_indices = [pool_layers[-1] + 1] else: # this must not be happened. since num_cells needs to be more than 3. aux_head_indices = [1] ref_groups, required_indices = get_reference_layers(num_cells, pool_layers) prev_layers = [list() for _ in range(ref_groups[-1] + 1)] # Note that this implementation is slightly different from the one written by tensorflow. if not test: image = F.image_augmentation( image, angle=0.25, flip_lr=True) # random_crop, min_scale image.need_grad = False x = image # --------------------------------------- 1st cell --------------------------------------- with nn.parameter_scope("stem_conv1"): x = PF.convolution(x, output_filter, (3, 3), (1, 1), with_bias=False) x = PF.batch_normalization(x, batch_stat=not test) used_weights.update( {"stem_conv1/conv/W", "stem_conv1/bn/gamma", "stem_conv1/bn/beta"}) prev_layers[0].append(x) # store to the "unpooled" layer # spatial reduction (this might be skipped) for i in range(1, len(required_indices[0])): curr_scope = "stem1_reduc{}".format(i) x = factorized_reduction(x, 2*x.shape[1], curr_scope, test) local_used_weights = get_factorized_weights_name(curr_scope) used_weights.update(local_used_weights) prev_layers[i].append(x) # --------------------------------------- 2nd cell --------------------------------------- with nn.parameter_scope("stem_conv2"): x = PF.convolution( prev_layers[0][-1], output_filter, (3, 3), (1, 1), with_bias=False) x = PF.batch_normalization(x, batch_stat=not test) used_weights.update( {"stem_conv2/conv/W", "stem_conv2/bn/gamma", "stem_conv2/bn/beta"}) prev_layers[0].append(x) # store to the "unpooled" layer # spatial reduction (this might be skipped) for i in range(1, len(required_indices[1])): curr_scope = "stem2_reduc{}".format(i) x = factorized_reduction(x, 2*x.shape[1], curr_scope, test) local_used_weights = get_factorized_weights_name(curr_scope) used_weights.update(local_used_weights) prev_layers[i].append(x) # ------------------------------- Normal / Reduction cells ------------------------------- for layer_id in range(2, num_cells): using_layer_index = ref_groups[layer_id] required_index = list(required_indices[layer_id]) required_index.sort() scope = 'w{}'.format(layer_id) if layer_id in pool_layers: architecture = reduc_arch else: architecture = conv_arch previous_outputs = prev_layers[using_layer_index] x, local_used_weights = construct_cell( previous_outputs, architecture, num_nodes, previous_outputs[-1].shape[1], scope, test) used_weights.update(local_used_weights) prev_layers[using_layer_index].append(x) required_index.remove(using_layer_index) # discard an index used above # if this output (x) is reused as an input in other cells and # its shape needs to be changed, apply downsampling in advance for i in required_index: curr_scope = "scope{0}_reduc{1}".format(layer_id, i) x = factorized_reduction(x, 2*x.shape[1], curr_scope, test) local_used_weights = get_factorized_weights_name(curr_scope) used_weights.update(local_used_weights) prev_layers[i].append(x) # auxiliary head, to use the intermediate output for training if layer_id in aux_head_indices and not test: print("Using aux_head at layer {}".format(layer_id)) aux_logits = F.relu(x) aux_logits = F.average_pooling(aux_logits, (5, 5), (3, 3)) with nn.parameter_scope("proj"): aux_logits = PF.convolution( aux_logits, 128, (3, 3), (1, 1), with_bias=False) aux_logits = PF.batch_normalization( aux_logits, batch_stat=not test) aux_logits = F.relu(aux_logits) used_weights.update( {"proj/conv/W", "proj/bn/gamma", "proj/bn/beta"}) with nn.parameter_scope("avg_pool"): aux_logits = PF.convolution( aux_logits, 768, (3, 3), (1, 1), with_bias=False) aux_logits = PF.batch_normalization( aux_logits, batch_stat=not test) aux_logits = F.relu(aux_logits) used_weights.update( {"avg_pool/conv/W", "avg_pool/bn/gamma", "avg_pool/bn/beta"}) with nn.parameter_scope("additional_fc"): aux_logits = F.global_average_pooling(aux_logits) aux_logits = PF.affine(aux_logits, num_class, with_bias=False) used_weights.update({"additional_fc/affine/W"}) x = F.global_average_pooling(prev_layers[-1][-1]) if not test: dropout_rate = 0.5 x = F.dropout(x, dropout_rate) with nn.parameter_scope("fc"): pred = PF.affine(x, num_class, with_bias=False) used_weights.add("fc/affine/W") return pred, aux_logits, used_weights
def construct_architecture(image, num_class, operations, output_filter, test, connect_patterns): """ Architecture Construction. """ ops = { 0: conv3x3, 1: conv5x5, 2: depthwise_separable_conv3x3, 3: depthwise_separable_conv5x5, 4: max_pool, 5: average_pool } used_weights = set() pool_distance = len(operations) // 3 pool_layers = [pool_distance - 1, 2 * pool_distance - 1] # exclude negative indices pool_layers = [idx for idx in pool_layers if idx > 0] ref_groups = len(operations) * [0] tmp_list = pool_layers + [len(operations) - 1] index = 0 for n in range(len(operations)): if n <= tmp_list[index]: ref_groups[n] = index else: index += 1 ref_groups[n] = index # elements in ref_groups tell you how many times you need to do pooling. # e.g. [0, 0, 0, 1, 1, 1, ..., 2] : the 1st layer needs no pooling, # but the last needs 2 poolings, to get spatially reduced variables. #required_indices = get_requirement_soft(ref_groups) required_indices = get_requirement_strict(ref_groups, connect_patterns, pool_layers) num_of_pooling = len(pool_layers) normal_layers = [list()] pooled_layers = [list() for j in range(num_of_pooling)] prev_layers = normal_layers + pooled_layers # prev_layer consists of: [[initial_size_layers], [1x pooled_layers], [2x pooled_layers], ...] if not test: image = F.image_augmentation(image, angle=0.25, flip_lr=True) image.need_grad = False x = image # next comes the basic operation. for the first layer, # just apply a convolution (to make the size of the input the same as that of successors) with nn.parameter_scope("stem_conv"): x = PF.convolution(x, output_filter, (3, 3), (1, 1), with_bias=False) x = PF.batch_normalization(x, batch_stat=not test) used_weights.update( {"stem_conv/conv/W", "stem_conv/bn/gamma", "stem_conv/bn/beta"}) prev_layers[0].append(x) # "unpooled" variable is stored in normal_layers (prev_layers[0]). # then apply factorized reduction (kind of pooling), # but ONLY IF the spatially-reduced variable is required. # for example, when this layer has skip connection with latter layers. for j in range(1, len(prev_layers)): if required_indices[0][j]: nested_scope = "stem_pool_{}".format(j) reduced_var = factorized_reduction(prev_layers[j - 1][-1], output_filter, nested_scope, test) used_weights.update(get_factorized_weights_name(nested_scope)) else: # dummy variable. Should never be used. reduced_var = nn.Variable([1, 1, 1, 1]) prev_layers[j].append(reduced_var) # reduced (or "pooled") variable is stored in pooled_layers (prev_layers[1:]). # basically, repeat the same process, for whole layers. for i, elem in enumerate(operations): scope = 'w{}_{}'.format(i, elem) # basic operation (and connects it with previous layers if it has skip connections) using_layer_index = ref_groups[i] connect_pattern = connect_patterns[i] x, local_used_weights = apply_ops_and_connect( prev_layers[using_layer_index][-1], prev_layers[using_layer_index], connect_pattern, ops, elem, output_filter, scope, test) used_weights.update(local_used_weights) prev_layers[using_layer_index].append(x) # factorized reduction for j in range(using_layer_index + 1, len(prev_layers)): if required_indices[i + 1][j]: nested_scope = "{0}_pool{1}".format(scope, j) reduced_var = factorized_reduction(prev_layers[j - 1][-1], output_filter, nested_scope, test) used_weights.update(get_factorized_weights_name(nested_scope)) else: reduced_var = nn.Variable([1, 1, 1, 1]) # dummy variable. prev_layers[j].append(reduced_var) x = F.global_average_pooling(x) if not test: dropout_rate = 0.5 x = F.dropout(x, dropout_rate) with nn.parameter_scope("fc"): pred = PF.affine(x, num_class, with_bias=False) used_weights.add("fc/affine/W") return pred, used_weights
def resnet56_prediction(image, test=False, ncls=10, nmaps=64, act=F.relu, seed=0): """ Construct ResNet 56 """ channels = [16, 32, 64] # Residual Unit def res_unit(x, scope_name, c, i): subsampling = i == 0 and c > 16 strides = (2, 2) if subsampling else (1, 1) with nn.parameter_scope(scope_name): # Conv -> BN -> Nonlinear with nn.parameter_scope("conv1"): h = PF.convolution(x, c, kernel=(3, 3), pad=(1, 1), stride=strides, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = act(h) # Conv -> BN -> Nonlinear with nn.parameter_scope("conv2"): h = PF.convolution(h, c, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) if subsampling: # Conv -> BN with nn.parameter_scope("conv3"): x = PF.convolution(x, c, kernel=(1, 1), pad=(0, 0), stride=(2, 2), with_bias=False) # Residual -> Nonlinear h = act(F.add2(h, x)) return h # Conv -> BN -> Nonlinear with nn.parameter_scope("conv1"): # Preprocess if not test: image = F.image_augmentation(image, min_scale=0.8, max_scale=1.2, flip_lr=True, seed=seed) image.need_grad = False h = PF.convolution(image, channels[0], kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = act(h) for c in channels: h = res_unit(h, f"{c}_conv2", c, 0) h = res_unit(h, f"{c}_conv3", c, 1) h = res_unit(h, f"{c}_conv4", c, 2) h = res_unit(h, f"{c}_conv5", c, 3) h = res_unit(h, f"{c}_conv6", c, 4) h = res_unit(h, f"{c}_conv7", c, 5) h = res_unit(h, f"{c}_conv8", c, 6) h = res_unit(h, f"{c}_conv9", c, 7) h = res_unit(h, f"{c}_conv10", c, 8) h = F.global_average_pooling(h) # -> 1x1 if test: h.need_grad = False pred = PF.affine(h, ncls) return pred, h
def __call__(self, x, test=False): h = x if test else F.image_augmentation(x, flip_lr=True, angle=0.26) h = self.model0(h, test) h = self.model1(h, test) return h
def cifar10_resnet23_prediction(image, ctx, test=False): """ Construct ResNet 23 """ # Residual Unit def res_unit(x, scope_name, rng, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): w_init = UniformInitializer(calc_uniform_lim_glorot( C, C / 2, kernel=(1, 1)), rng=rng) h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): w_init = UniformInitializer(calc_uniform_lim_glorot( C / 2, C / 2, kernel=(3, 3)), rng=rng) h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): w_init = UniformInitializer(calc_uniform_lim_glorot( C / 2, C, kernel=(1, 1)), rng=rng) h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h # Random generator for using the same init parameters in all devices rng = np.random.RandomState(0) nmaps = 64 ncls = 10 # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope("conv1"): # Preprocess if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False w_init = UniformInitializer(calc_uniform_lim_glorot(3, nmaps, kernel=(3, 3)), rng=rng) h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", rng, False) # -> 32x32 h = res_unit(h, "conv3", rng, True) # -> 16x16 h = res_unit(h, "conv4", rng, False) # -> 16x16 h = res_unit(h, "conv5", rng, True) # -> 8x8 h = res_unit(h, "conv6", rng, False) # -> 8x8 h = res_unit(h, "conv7", rng, True) # -> 4x4 h = res_unit(h, "conv8", rng, False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 w_init = UniformInitializer(calc_uniform_lim_glorot(int( np.prod(h.shape[1:])), ncls, kernel=(1, 1)), rng=rng) pred = PF.affine(h, ncls, w_init=w_init) return pred
def cifar10_shift_prediction(image, maps=64, test=False, p=0, module="sc2"): """ Construct ShiftNet """ # Shift def shift(x, ksize=3): maps = x.shape[1] cpg = maps // (ksize**2) x_pad = F.pad(x, (1, 1, 1, 1)) b, c, h, w = x_pad.shape xs = [] # Bottom shift i = 0 xs += [x_pad[:, i * cpg:(i + 1) * cpg, :h - 2, 1:w - 1]] # Top shift i = 1 xs += [x_pad[:, i * cpg:(i + 1) * cpg, 2:, 1:w - 1]] # Right shift i = 2 xs += [x_pad[:, i * cpg:(i + 1) * cpg, 1:h - 1, :w - 2]] # Left shift i = 3 xs += [x_pad[:, i * cpg:(i + 1) * cpg, 1:h - 1, 2:]] # Bottom Right shift i = 4 xs += [x_pad[:, i * cpg:(i + 1) * cpg, :h - 2, :w - 2]] # Bottom Left shift i = 5 xs += [x_pad[:, i * cpg:(i + 1) * cpg, :h - 2, 2:]] # Top Right shift i = 6 xs += [x_pad[:, i * cpg:(i + 1) * cpg, 2:, :w - 2]] # Top Left shift i = 7 xs += [x_pad[:, i * cpg:(i + 1) * cpg, 2:, 2:]] i = 8 xs += [x_pad[:, i * cpg:, 1:h - 1, 1:w - 1]] h = F.concatenate(*xs, axis=1) return h # Shift Units def sc2(x, scope_name, dn=False): C = x.shape[1] h = x with nn.parameter_scope(scope_name): with nn.parameter_scope("shift1"): # no meaning but semantics h = shift(h) with nn.parameter_scope("conv1"): h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h, True) h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) with nn.parameter_scope("shift2"): # no meaning but semantics h = shift(h) with nn.parameter_scope("conv2"): h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h, True) stride = (2, 2) if dn else (1, 1) if p > 0: h = F.dropout(h, p=0.5) if not test else h h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), stride=stride, with_bias=False) s = F.average_pooling(x, (2, 2)) if dn else x return h + s def csc(x, scope_name, dn=False): C = x.shape[1] h = x with nn.parameter_scope(scope_name): with nn.parameter_scope("conv1"): h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h, True) h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) with nn.parameter_scope("shift"): # no meaning but semantics h = shift(h) with nn.parameter_scope("conv2"): h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h, True) stride = (2, 2) if dn else (1, 1) if p > 0: h = F.dropout(h, p=0.5) if not test else h h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), stride=stride, with_bias=False) s = F.average_pooling(x, (2, 2)) if dn else x return h + s def shift_unit(x, scope_name, dn=False): if module == "sc2": return sc2(x, scope_name, dn) if module == "csc": return csc(x, scope_name, dn) ncls = 10 # Conv -> BN -> Relu with nn.parameter_scope("conv1"): # Preprocess image /= 255.0 if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.convolution(image, maps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = shift_unit(h, "conv2", False) # -> 32x32 h = shift_unit(h, "conv3", True) # -> 16x16 h = shift_unit(h, "conv4", False) # -> 16x16 h = shift_unit(h, "conv5", True) # -> 8x8 h = shift_unit(h, "conv6", False) # -> 8x8 h = shift_unit(h, "conv7", True) # -> 4x4 h = shift_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, ncls) return pred
def cifar10_min_max_resnet23_prediction(image, maps=64, ql_min=0, ql_max=255, p_min_max=False, a_min_max=False, a_ema=False, ste_fine_grained=True, test=False): """ Construct MinMaxNet using resnet23. """ a_min_max = a_min_max if not test else False # Residual Unit def res_unit(x, scope_name, dn=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> MinMaxQuantize -> Relu with nn.parameter_scope("conv1"): h = PF.min_max_quantized_convolution( x, C // 2, kernel=(1, 1), pad=(0, 0), ql_min_w=ql_min, ql_max_w=ql_max, w_min_max=p_min_max, ste_fine_grained_w=ste_fine_grained, with_bias=False) h = F.relu(h) h = PF.batch_normalization(h, batch_stat=not test) h = PF.min_max_quantize(h, x_min_max=a_min_max, ema=a_ema, ql_min=ql_min, ql_max=ql_max, ste_fine_grained=ste_fine_grained) # Conv -> BN -> MinMaxQuantize -> Relu with nn.parameter_scope("conv2"): h = PF.min_max_quantized_convolution( h, C // 2, kernel=(3, 3), pad=(1, 1), ql_min_w=ql_min, ql_max_w=ql_max, w_min_max=p_min_max, ste_fine_grained_w=ste_fine_grained, with_bias=False) h = F.relu(h) h = PF.batch_normalization(h, batch_stat=not test) h = PF.min_max_quantize(h, x_min_max=a_min_max, ema=a_ema, ql_min=ql_min, ql_max=ql_max, ste_fine_grained=ste_fine_grained) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.min_max_quantized_convolution( h, C, kernel=(1, 1), pad=(0, 0), ql_min_w=ql_min, ql_max_w=ql_max, w_min_max=p_min_max, ste_fine_grained_w=ste_fine_grained, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> MinMaxQuantize -> Relu h = PF.min_max_quantize(h, x_min_max=a_min_max, ema=a_ema, ql_min=ql_min, ql_max=ql_max, ste_fine_grained=ste_fine_grained) h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h ncls = 10 # Conv -> BN -> MinMaxQuantize with nn.parameter_scope("conv1"): # Preprocess image /= 255.0 if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.min_max_quantized_convolution( image, maps, kernel=(3, 3), pad=(1, 1), ql_min_w=ql_min, ql_max_w=ql_max, w_min_max=p_min_max, ste_fine_grained_w=ste_fine_grained, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = PF.min_max_quantize(h, x_min_max=a_min_max, ema=a_ema, ql_min=ql_min, ql_max=ql_max, ste_fine_grained=ste_fine_grained) h = F.relu(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.min_max_quantized_affine(h, ncls, ql_min_w=ql_min, ql_max_w=ql_max, w_min_max=p_min_max, ste_fine_grained_w=ste_fine_grained, with_bias=False, quantize_b=True, ql_min_b=ql_min, ql_max_b=ql_max, b_min_max=p_min_max, ste_fine_grained_b=ste_fine_grained) return pred
def cifar10_inq_resnet23_prediction(image, maps=64, num_bits=4, inq_iterations=(5000, 6000, 7000, 8000, 9000), selection_algorithm='largest_abs', test=False): """ Construct INQ Network using resnet23. """ # Residual Unit def res_unit(x, scope_name, dn=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.inq_convolution(x, C // 2, kernel=(1, 1), pad=(0, 0), inq_iterations=inq_iterations, selection_algorithm=selection_algorithm, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.inq_convolution(h, C // 2, kernel=(3, 3), pad=(1, 1), inq_iterations=inq_iterations, selection_algorithm=selection_algorithm, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.inq_convolution(h, C, kernel=(1, 1), pad=(0, 0), inq_iterations=inq_iterations, selection_algorithm=selection_algorithm, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h ncls = 10 # Conv -> BN with nn.parameter_scope("conv1"): # Preprocess image /= 255.0 if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.inq_convolution(image, maps, kernel=(3, 3), pad=(1, 1), inq_iterations=inq_iterations, selection_algorithm=selection_algorithm, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.inq_affine(h, ncls, inq_iterations=inq_iterations, selection_algorithm=selection_algorithm) return pred
def cifar10_pow2_net_resnet23_prediction(image, maps=64, n=8, m=1, ste_fine_grained=True, test=False): """ Construct Pow2Net using resnet23. """ # Residual Unit def res_unit(x, scope_name, dn=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Pow2Quantize -> Relu with nn.parameter_scope("conv1"): h = PF.pow2_quantized_convolution(x, C // 2, kernel=(1, 1), pad=(0, 0), n_w=n, m_w=m, n_b=n, m_b=m, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.pow2_quantize(h, n=n, m=m, ste_fine_grained=ste_fine_grained) h = F.relu(h) # Conv -> BN -> Pow2Quantize -> Relu with nn.parameter_scope("conv2"): h = PF.pow2_quantized_convolution(h, C // 2, kernel=(3, 3), pad=(1, 1), n_w=n, m_w=m, n_b=n, m_b=m, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.pow2_quantize(h, n=n, m=m, ste_fine_grained=ste_fine_grained) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.pow2_quantized_convolution(h, C, kernel=(1, 1), pad=(0, 0), n_w=n, m_w=m, n_b=n, m_b=m, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Pow2Quantize -> Relu h = F.pow2_quantize(h, n=n, m=m, ste_fine_grained=ste_fine_grained) h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h ncls = 10 # Conv -> BN -> Pow2Quantize with nn.parameter_scope("conv1"): # Preprocess image /= 255.0 if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.pow2_quantized_convolution(image, maps, kernel=(3, 3), pad=(1, 1), n_w=n, m_w=m, n_b=n, m_b=m, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.pow2_quantize(h, n=n, m=m, ste_fine_grained=ste_fine_grained) h = F.relu(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.pow2_quantized_affine(h, ncls, n_w=n, m_w=m, n_b=n, m_b=m) return pred
def cifar10_shuffle_prediction(image, maps=64, groups=1, test=False): """ Construct ShuffleNet """ def shuffle(x): n, c, h, w = x.shape g = groups assert c % g == 0 # N, C, H, W -> N, g, C/g, H, W -> N, C/g, g, H, W -> N, C, H, W x = F.reshape(x, [n, g, c // g, h, w]) x = F.transpose(x, [0, 2, 1, 3, 4]) x = F.reshape(x, [n, c, h, w]) return x # Shuffle def shuffle_unit(x, scope_name, dn=False): """ Figure. 2 (b) and (c) in https://arxiv.org/pdf/1707.01083.pdf """ C = x.shape[1] h = x with nn.parameter_scope(scope_name): with nn.parameter_scope("gconv1"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), group=groups, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h, True) with nn.parameter_scope("shuffle"): # no meaning but semantics h = shuffle(h) with nn.parameter_scope("dconv"): stride = (2, 2) if dn else (1, 1) h = PF.depthwise_convolution(h, kernel=(3, 3), pad=(1, 1), stride=stride, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) with nn.parameter_scope("gconv2"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), group=groups, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) s = F.average_pooling(x, (2, 2)) if dn else x h = F.concatenate(*[h, s], axis=1) if dn else h + s h = F.relu(h) return h ncls = 10 # Conv -> BN -> Relu with nn.parameter_scope("conv1"): # Preprocess image /= 255.0 if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.convolution(image, maps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = shuffle_unit(h, "conv2", False) # -> 32x32 h = shuffle_unit(h, "conv3", True) # -> 16x16 h = shuffle_unit(h, "conv4", False) # -> 16x16 h = shuffle_unit(h, "conv5", True) # -> 8x8 h = shuffle_unit(h, "conv6", False) # -> 8x8 h = shuffle_unit(h, "conv7", True) # -> 4x4 h = shuffle_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, ncls) return pred
def cifar10_fp_net_resnet23_prediction(image, maps=64, test=False): """ Construct Fixed-Point Net using resnet23. Fixed-Point Net quantizes weights and activations using FixedPointQuantize function. """ # Residual Unit def res_unit(x, scope_name, rng, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> FixedPointQuantize -> Relu with nn.parameter_scope("conv1"): h = PF.fixed_point_quantized_convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.fixed_point_quantize(h) h = F.relu(h) # Conv -> BN -> FixedPointQuantize -> Relu with nn.parameter_scope("conv2"): h = PF.fixed_point_quantized_convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.fixed_point_quantize(h) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.fixed_point_quantized_convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> FixedPointQuantize -> Relu h = F.fixed_point_quantize(h) h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h ncls = 10 # Conv -> BN -> FixedPointQuantize with nn.parameter_scope("conv1"): # Preprocess image /= 255.0 if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.fixed_point_quantized_convolution(image, maps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.fixed_point_quantize(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.fixed_point_quantized_affine(h, ncls) return pred
def cifar10_svd_factorized_resnet23_prediction(image, maps=64, test=False, compression_ratio=0.0): """ Construct Resnet23 with factorized affine and convolution """ # SVD affine def svd_affine(x, n_outputs, cr): W = get_parameter('affine/W') if W is None: UV = None else: UV = W.d b = get_parameter('affine/b') # compute rank (size of intermediate activations) # to obtained desired reduction inshape = np.prod(x.shape[1:]) outshape = np.prod(n_outputs) rank = int( np.floor((1 - cr) * inshape * outshape / (inshape + outshape))) # Initialize bias to existing b in affine if exists if b is not None: b_new = get_parameter_or_create('svd_affine/b', b.d.shape, need_grad=b.need_grad) b_new.d = b.d.copy() logger.info( "SVD affine created: input_shape = {}; output_shape = {}; compression = {}; rank = {};" .format(inshape, outshape, cr, rank)) # create svd_affine initialized from W in current context if it exists return PF.svd_affine(x, n_outputs, rank, uv_init=UV) # SVD convolution def svd_convolution(x, n_outputs, kernel, pad, with_bias, cr): W = get_parameter('conv/W') if W is None: UV = None else: UV = W.d b = get_parameter('conv/b') # compute rank (size of intermediate activations) # to obtained desired reduction inmaps = x.shape[1] outmaps = n_outputs Ksize = np.prod(kernel) rank = int( np.floor((1 - cr) * inmaps * outmaps * Ksize / (inmaps * Ksize + inmaps * outmaps))) # Initialize bias to existing b in affine if exists if b is not None: b_new = get_parameter_or_create('svd_conv/b', b.d.shape, need_grad=b.need_grad) b_new.d = b.d.copy() logger.info( "SVD convolution created: inmaps = {}; outmaps = {}; compression = {}; rank = {};" .format(inmaps, outmaps, cr, rank)) # create svd_convolution initialized from W in current context if it exists return PF.svd_convolution(x, n_outputs, kernel=kernel, r=rank, pad=pad, with_bias=with_bias, uv_init=UV) # Residual Unit def res_unit(x, scope_name, dn=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.convolution(x, C // 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = svd_convolution(h, C // 2, kernel=(3, 3), pad=(1, 1), with_bias=False, cr=compression_ratio) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h ncls = 10 # Conv -> BN -> Relu with nn.parameter_scope("conv1"): # Preprocess image /= 255.0 if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = svd_convolution(image, maps, kernel=(3, 3), pad=(1, 1), with_bias=False, cr=compression_ratio) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = svd_affine(h, ncls, compression_ratio) return pred
def resnet_prediction(image, test=False, ncls=2, nmaps=128, act=F.relu): # Residual Unit def res_unit(x, scope_name, dn=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Nonlinear with nn.parameter_scope("conv1"): h = PF.convolution(x, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = act(h) # Conv -> BN -> Nonlinear with nn.parameter_scope("conv2"): h = PF.convolution(h, C, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = act(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Nonlinear h = act(F.add2(h, x, inplace=False)) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h # Conv -> BN -> Nonlinear with nn.parameter_scope("conv1"): # Preprocess if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = act(h) image_size = image.shape[-1] for i in range(int(np.log2(image_size)) - 1): h = res_unit(h, f'conv{i*2+2}', False) if i != np.log2(image_size) - 2: h = res_unit(h, f'conv{i*2+3}', True) h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, ncls) return pred
def cifar10_resnet23_prediction(image, net="teacher", maps=64, test=False): """ Construct ResNet 23 """ # Residual Unit def res_unit(x, scope_name, dn=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h ncls = 10 with nn.parameter_scope(net): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): # Preprocess image /= 255.0 if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.convolution(image, maps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, ncls) return pred
def cifar100_resnet23_prediction(image, ctx, test=False): """ Construct ResNet 23 """ # Residual Unit def res_unit(x, scope_name, rng, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): w_init = UniformInitializer( calc_uniform_lim_glorot(C, C / 2, kernel=(1, 1)), rng=rng) h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): w_init = UniformInitializer( calc_uniform_lim_glorot(C / 2, C / 2, kernel=(3, 3)), rng=rng) h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): w_init = UniformInitializer( calc_uniform_lim_glorot(C / 2, C, kernel=(1, 1)), rng=rng) h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h # Random generator for using the same init parameters in all devices rng = np.random.RandomState(0) nmaps = 384 ncls = 100 # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope("conv1"): # Preprocess if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False w_init = UniformInitializer( calc_uniform_lim_glorot(3, nmaps, kernel=(3, 3)), rng=rng) h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", rng, False) # -> 32x32 h = res_unit(h, "conv3", rng, True) # -> 16x16 h = res_unit(h, "conv4", rng, False) # -> 16x16 h = res_unit(h, "conv5", rng, True) # -> 8x8 h = res_unit(h, "conv6", rng, False) # -> 8x8 h = res_unit(h, "conv7", rng, True) # -> 4x4 h = res_unit(h, "conv8", rng, False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 w_init = UniformInitializer( calc_uniform_lim_glorot(int(np.prod(h.shape[1:])), ncls, kernel=(1, 1)), rng=rng) pred = PF.affine(h, ncls, w_init=w_init) return pred
def vgg16_prediction(image, test=False, ncls=10, seed=0): # Preprocess if not test: image = F.image_augmentation(image, min_scale=0.8, max_scale=1.2, flip_lr=True, seed=seed) image.need_grad = False # Convolution layers h = PF.convolution(image, 64, (3, 3), pad=(1, 1), stride=(1, 1), name="block1_conv1") h = F.relu(h) h = PF.convolution(h, 64, (3, 3), pad=(1, 1), stride=(1, 1), name="block1_conv2") h = F.relu(h) h = F.max_pooling(h, (2, 2), stride=(2, 2)) h = PF.convolution(h, 128, (3, 3), pad=(1, 1), stride=(1, 1), name="block2_conv1") h = F.relu(h) h = PF.convolution(h, 128, (3, 3), pad=(1, 1), stride=(1, 1), name="block2_conv2") h = F.relu(h) h = F.max_pooling(h, (2, 2), stride=(2, 2)) h = PF.convolution(h, 256, (3, 3), pad=(1, 1), stride=(1, 1), name="block3_conv1") h = F.relu(h) h = PF.convolution(h, 256, (3, 3), pad=(1, 1), stride=(1, 1), name="block3_conv2") h = F.relu(h) h = PF.convolution(h, 256, (3, 3), pad=(1, 1), stride=(1, 1), name="block3_conv3") h = F.relu(h) h = F.max_pooling(h, (2, 2), stride=(2, 2)) h = PF.convolution(h, 512, (3, 3), pad=(1, 1), stride=(1, 1), name="block4_conv1") h = F.relu(h) h = PF.convolution(h, 512, (3, 3), pad=(1, 1), stride=(1, 1), name="block4_conv2") h = F.relu(h) h = PF.convolution(h, 512, (3, 3), pad=(1, 1), stride=(1, 1), name="block4_conv3") h = F.relu(h) h = F.max_pooling(h, (2, 2), stride=(2, 2)) h = PF.convolution(h, 512, (3, 3), pad=(1, 1), stride=(1, 1), name="block5_conv1") h = F.relu(h) h = PF.convolution(h, 512, (3, 3), pad=(1, 1), stride=(1, 1), name="block5_conv2") h = F.relu(h) h = PF.convolution(h, 512, (3, 3), pad=(1, 1), stride=(1, 1), name="block5_conv3") h = F.relu(h) hidden = F.max_pooling(h, (2, 2), stride=(2, 2)) # Fully-Connected layers h = PF.affine(hidden, 4096, name="fc1") h = F.relu(h) h = PF.affine(h, 4096, name="fc2") h = F.relu(h) pred = PF.affine(h, ncls, name="fc3") return pred, h
def cifar10_binary_net_resnet23_prediction(image, maps=64, test=False): """ Construct BianryNet using resnet23. Binary Net binaries weights and activations. References: Courbariaux Matthieu, Bengio Yoshua, David Jean-Pierre, "BinaryConnect: Training Deep Neural Networks with binary weights during propagations", Advances in Neural Information Processing Systems 28 (NIPS 2015) """ # Residual Unit def res_unit(x, scope_name, rng, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> BinaryTanh with nn.parameter_scope("conv1"): h = PF.binary_connect_convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.binary_tanh(h) # Conv -> BN -> BinaryTanh with nn.parameter_scope("conv2"): h = PF.binary_connect_convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.binary_tanh(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.binary_connect_convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> BinaryTanh h = F.binary_tanh(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h ncls = 10 # Conv -> BN -> Binary_tanh with nn.parameter_scope("conv1"): # Preprocess image /= 255.0 if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.binary_connect_convolution(image, maps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.binary_tanh(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.binary_connect_affine(h, ncls) return pred
def cifar10_binary_weight_resnet23_prediction(image, maps=64, test=False): """ Construct BianryWeight using resnet23. Binary Weight binaries weights, but use the approximate coefficients to alleviate the binary quantization. References: Rastegari Mohammad, Ordonez Vicente, Redmon Joseph, and Farhadi Ali, "XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks", arXiv:1603.05279 """ # Residual Unit def res_unit(x, scope_name, rng, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.binary_weight_convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.binary_weight_convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.binary_weight_convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h ncls = 10 # Conv -> BN -> Relu with nn.parameter_scope("conv1"): # Preprocess image /= 255.0 if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False h = PF.binary_weight_convolution(image, maps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.binary_weight_affine(h, ncls) return pred
def single_image_augment(image): image = F.image_augmentation(image, contrast=1.0, angle=0.0, flip_lr=True) image = F.random_shift(image, shifts=(4, 4), border_mode="reflect") return image