def __init__(self, nfm, first=False, strides=1, batch_norm=False): self.trunk = None self.side_path = None main_path = [ Convolution( **conv_params(1, nfm, strides=strides, batch_norm=batch_norm)), Convolution(**conv_params(3, nfm, batch_norm=batch_norm)), Convolution(**conv_params(1, nfm * 4, relu=False, batch_norm=False)) ] if first or strides == 2: self.side_path = Convolution(**conv_params( 1, nfm * 4, strides=strides, relu=False, batch_norm=False)) else: if batch_norm: main_path = [BatchNorm(), Activation(Rectlin())] + main_path else: main_path = [Activation(Rectlin())] + main_path if strides == 2: if batch_norm: self.trunk = Sequential([BatchNorm(), Activation(Rectlin())]) else: self.trunk = Sequential([Activation(Rectlin())]) self.main_path = Sequential(main_path)
def create_network(): ''' Define 3D convolutional network ''' # Define for weight initialization g1 = GaussianInit(mean=0., var=0.01) g5 = GaussianInit(mean=0., var=0.005) c0 = ConstantInit(val=0.) c1 = ConstantInit(val=1.) ax.Y.length = 101 padding = {'D': 1, 'H': 1, 'W': 1, 'C': 0} strides = {'D': 2, 'H': 2, 'W': 2, 'C': 1} layers = [ Convolution((3, 3, 3, 64), padding=padding, filter_init=g1, bias_init=c0, activation=Rectlin()), Pooling((1, 2, 2), strides={ 'D': 1, 'H': 2, 'W': 2, 'C': 1 }), Convolution((3, 3, 3, 128), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Convolution((3, 3, 3, 256), padding=padding, filter_init=g1, bias_init=c1, activation=Rectlin()), Pooling((2, 2, 2), strides=strides), Affine(nout=2048, weight_init=g5, bias_init=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=2048, weight_init=g5, bias_init=c1, activation=Rectlin()), Dropout(keep=0.5), Affine(axes=ax.Y, weight_init=g1, bias_init=c0, activation=Softmax()) ] return Sequential(layers)
def test_axis_preservation(conv1d_placeholder, output_size): """ Test that axes into a conv are the same as axes out""" conv_layer = Convolution((3, output_size), lambda x: 1) output = conv_layer(conv1d_placeholder) assert output.axes == conv1d_placeholder.axes, ( "Output axes are not the same as input axes: " "{} != {}").format(output.axes, conv1d_placeholder.axes)
def __init__(self, inputs, stage_depth, batch_norm=True, activation=True, preprocess=True): nfms = [ 2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth) ] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] layers = [] if preprocess: layers = Preprocess(functor=cifar_mean_subtract) parallel_axis = inputs['image'].axes.batch_axes() with ng.metadata(device_id=('1', '2'), parallel=parallel_axis[0]): layers.append( Convolution(**conv_params(3, 16, batch_norm=batch_norm))) layers.append(f_module(nfms[0], first=True)) for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride)) if batch_norm: layers.append(BatchNorm()) if activation: layers.append(Activation(Rectlin())) layers.append(Pool2D(8, strides=2, op='avg')) layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) self.layers = layers
def __init__(self, inputs, dataset, stage_depth, batch_norm=False, activation=False, preprocess=False): nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)] strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] layers = [] if preprocess and dataset == 'cifar10': layers = Preprocess(functor=cifar_mean_subtract) layers.append(Convolution(**conv_params(3, 16, batch_norm=batch_norm))) layers.append(f_module(nfms[0], first=True, batch_norm=batch_norm)) for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride, batch_norm=batch_norm)) if batch_norm: layers.append(BatchNorm()) if activation: layers.append(Activation(Rectlin())) layers.append(Pool2D(8, strides=2, op='avg')) if dataset == 'cifar10': ax.Y.length = 10 layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) elif dataset == 'i1k': ax.Y.length = 1000 layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) else: raise ValueError("Incorrect dataset provided") super(mini_residual_network, self).__init__(layers=layers)
def get_mp_sp(self, num_fils, net_type, direct=True, bottleneck=False, strides=1): if (net_type == "cifar10"): # Mainpath for CIFAR10 is fixed main_path = Sequential([ Convolution(**conv_params(3, num_fils, strides=strides)), Convolution(**conv_params(3, num_fils, activation=None)) ]) # Side Path if (direct): side_path = None else: side_path = Convolution(**conv_params( 1, num_fils, strides=strides, activation=None)) elif (net_type == "i1k"): # Mainpath for i1k is depends if bottleneck is enabled or not if (bottleneck): main_path = Sequential([ Convolution(**conv_params(1, num_fils, strides=strides)), Convolution(**conv_params(3, num_fils)), Convolution(**conv_params(1, num_fils * 4, activation=None)) ]) else: main_path = Sequential([ Convolution(**conv_params(3, num_fils, strides=strides)), Convolution(**conv_params(3, num_fils, activation=None)) ]) # Side Path if (direct): side_path = None else: if (bottleneck): side_path = Convolution(**conv_params( 1, num_fils * 4, strides=strides, activation=None)) else: side_path = Convolution(**conv_params( 1, num_fils, strides=strides, activation=None)) else: raise NameError( "Incorrect dataset. Should be --dataset cifar10 or --dataset i1k" ) return main_path, side_path
def make_discriminator(bn=True, disc_activation=None, bias_init=None): conv_layers = [ Convolution((4, 4, 128), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=False, bias_init=bias_init) ] conv_layers.append( Convolution((4, 4, 256), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 512), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 1024), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Affine(weight_init=filter_init, activation=None, batch_norm=False, axes=ng.make_axes({ "C": 1, "H": 1, "W": 1 }))) return Sequential(conv_layers, name="Discriminator")
def __init__(self, nfilters, filter_width, str_w, nbands, depth, hidden_size, batch_norm=False, batch_norm_affine=False, batch_norm_conv=False, to_ctc=True): self.to_ctc = to_ctc # Initializers gauss = GaussianInit(0.01) glorot = GlorotInit() # 1D Convolution layer padding = dict(pad_h=0, pad_w=filter_width // 2, pad_d=0) strides = dict(str_h=1, str_w=str_w, str_d=1) dilation = dict(dil_d=1, dil_h=1, dil_w=1) conv_layer = Convolution((nbands, filter_width, nfilters), gauss, bias_init=ConstantInit(0), padding=padding, strides=strides, dilation=dilation, activation=Rectlin(), batch_norm=batch_norm_conv) # Add BiRNN layers deep_birnn = DeepBiRNN(depth, hidden_size, glorot, Rectlinclip(), batch_norm=batch_norm) # Add a single affine layer fc = Affine(nout=hidden_size, weight_init=glorot, activation=Rectlinclip(), batch_norm=batch_norm_affine) # Add the final affine layer # Softmax output is computed within the CTC cost function, so no activation is needed here. if self.to_ctc is False: activation = Softmax() else: activation = None final = Affine(axes=ax.Y, weight_init=glorot, activation=activation) layers = [conv_layer, deep_birnn, fc, final] super(Deepspeech, self).__init__(layers=layers)
def test_channel_axis_introduction(conv1d_no_channel_axis, output_size, channel_axis): """ Test that a channel axis is added when it doesn't exist in the input""" conv_layer = Convolution((3, output_size), lambda x: 1) output = conv_layer(conv1d_no_channel_axis) t_axes = conv1d_no_channel_axis.axes + channel_axis assert output.axes.is_equal_set(t_axes), ( "Output axes are not input axes + channel axis:" "{} != {} + {}").format(output.axes, conv1d_no_channel_axis.axes, channel_axis)
def test_alternate_channel_axes(conv1d_placeholder, output_size, channel_axis): """ Test that channel axis names are modifiable""" channel_axis.name = "channel" assert len(conv1d_placeholder.axes.find_by_name("channel")) == 1 conv_layer = Convolution((3, output_size), lambda x: 1) with pytest.raises(IncompatibleAxesError): conv_layer(conv1d_placeholder) output = conv_layer(conv1d_placeholder, channel_axes="channel") assert output.axes == conv1d_placeholder.axes
def __init__(self, branch_units, activation=Rectlin(), bias_init=UniformInit(low=-0.08, high=0.08), filter_init=XavierInit()): (p1, p2, p3, p4) = branch_units self.branch_1 = Convolution((1, 1, p1[0]), activation=activation, bias_init=bias_init, filter_init=filter_init) self.branch_2 = [Convolution((1, 1, p2[0]), activation=activation, bias_init=bias_init, filter_init=filter_init), Convolution((3, 3, p2[1]), activation=activation, bias_init=bias_init, filter_init=filter_init, padding=1)] self.branch_3 = [Convolution((1, 1, p3[0]), activation=activation, bias_init=bias_init, filter_init=filter_init), Convolution((5, 5, p3[1]), activation=activation, bias_init=bias_init, filter_init=filter_init, padding=2)] self.branch_4 = [Pool2D(fshape=3, padding=1, strides=1, op="max"), Convolution((1, 1, p3[0]), activation=activation, bias_init=bias_init, filter_init=filter_init)]
def test_dilated_conv(dilation): """Test that the dilated convolution layer output matches expected. This test compares the maximum output value to an expected max output value. The expected value is computed based on the dilation parameter. The test also checks that the output size matches the expected size based on the dilaton parameter value.""" image_size = 3 batch_size = 1 init_val = 0.1 conv_size = 3 pad = 3 N_filters = 1 image_channels = 3 model = Sequential([ Convolution((conv_size, conv_size, N_filters), filter_init=ConstantInit(val=init_val), padding=pad, dilation=dilation) ]) X = np.ones(shape=(batch_size, 3, image_size, image_size)) # Create dummy image data = {'image': X, 'iteration': 1} data_size = OrderedDict([('N', batch_size), ('C', 3), ('H', image_size), ('W', image_size)]) ax = [ ng.make_axis(length=data_size[k], name=k) for k in list(data_size.keys()) ] p_axes = ng.make_axes(ax) named_inputs = {'image': ng.placeholder(p_axes)} outputs = model(named_inputs['image']) named_outputs = {outputs.name: outputs} with closing(ngt.make_transformer()) as transformer: m = make_bound_computation(transformer, named_outputs, named_inputs) output = m(data)[list(m(data).keys())[0]] filter_size = dilation * (conv_size - 1) + 1 # Compute expected filter size # Compute the expected output size based on convolution parameters out_size = (image_size + 2 * pad - filter_size) + 1 filt_tmp = np.zeros(filter_size) filt_tmp[0::dilation] = 1 # max overlap between dilated filter and image (in 1-d) max_overlap = int(np.min([filter_size, image_size])) exp_max_output = init_val * image_channels * (np.sum( filt_tmp[0:max_overlap]))**2 # Expected max output changes for different dilation parameter values# assert int(10 * np.max(output)) == int(10 * exp_max_output), \ ("Dilated conv max outputs do not match expected: " "{} != {}").format(np.max(output), init_val * conv_size * ((image_size - (dilation - 1))**2)) assert np.shape(output) == (batch_size, N_filters, out_size, out_size), \ ("Dilated conv output is not expected size: " "{} != {}").format(np.shape(output), (batch_size, N_filters, out_size, out_size))
def define_model(out_axis, filter_shapes=[5], n_filters=[32], init=KaimingInit()): assert len(filter_shapes) == len(n_filters) layers = [] for e, (f, n) in enumerate(zip(filter_shapes, n_filters)): layers.append(Convolution(filter_shape=(f, n), filter_init=init, strides=1, padding="valid", dilation=1, activation=Rectlin(), batch_norm=True)) affine_layer = Affine(weight_init=init, bias_init=init, activation=Identity(), axes=out_axis) model = Sequential(layers + [affine_layer]) return model
def make_generator_gp(bn=True, n_extra_layers=0, bias_init=None): deconv_layers = [ Deconvolution((4, 4, 512), filter_init, strides=1, padding=0, activation=relu, batch_norm=bn, bias_init=bias_init), Deconvolution((4, 4, 256), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init), Deconvolution((4, 4, 128), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init), Deconvolution((4, 4, 64), filter_init, strides=2, padding=1, activation=relu, batch_norm=bn, bias_init=bias_init) ] for i in range(n_extra_layers): deconv_layers.append( Convolution((3, 3, 64), filter_init, strides=1, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) deconv_layers.append( Deconvolution((4, 4, 3), filter_init, strides=2, padding=1, activation=Tanh(), batch_norm=False, bias_init=bias_init)) return Sequential(deconv_layers, name="Generator")
def test_alternate_spatial_axes(conv1d_placeholder, output_size, width_axis): """ Test that spatial axis names are modifiable """ width_axis.name = "time" assert len(conv1d_placeholder.axes.find_by_name("time")) == 1 conv_layer = Convolution((3, output_size), lambda x: 1) with pytest.raises(IncompatibleAxesError): conv_layer(conv1d_placeholder) # As a dictionary output = conv_layer(conv1d_placeholder, spatial_axes={"W": "time"}) assert output.axes == conv1d_placeholder.axes # As a tuple output = conv_layer(conv1d_placeholder, spatial_axes=("D", "H", "time")) assert output.axes == conv1d_placeholder.axes
def test_causal_convolution(conv1d_placeholder, spatial_onehot, output_size, width): """ Test that causal convolutions only operate on leftward inputs""" conv_layer = Convolution((3, output_size), lambda x: 1, padding="causal") output = conv_layer(conv1d_placeholder) output_width = output.axes.find_by_name("W")[0].length assert output_width == width, "Causal convolution output width != " \ "input width: {} != {}".format(output_width, width) with executor(output, conv1d_placeholder) as comp: output_val = comp(spatial_onehot) # First 1 is at width // 2, so anything before that should be 0 assert ( output_val[:, :width // 2] == 0).all(), "Acausal outputs in causal convolution"
def test_same_convolution(conv1d_placeholder, spatial_onehot, output_size, width, stride): """ Test that 'same' always results in out_size = np.ceil(in_size / stride) """ conv_layer = Convolution((3, output_size), lambda x: 1, strides=stride, padding="same") output = conv_layer(conv1d_placeholder) output_width = output.axes.find_by_name("W")[0].length assert output_width == np.ceil( width / float(stride)), ("Same convolution output width != " "ceil(input_width / stride): {} != " "ceil({} / {})").format( output_width, width, stride)
def make_discriminator_gp(bn=True, n_extra_layers=0, disc_activation=None, bias_init=None): conv_layers = [ Convolution((4, 4, 64), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=False, bias_init=bias_init) ] for i in range(n_extra_layers): conv_layers.append( Convolution((3, 3, 64), filter_init, strides=1, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 128), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 256), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 512), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn, bias_init=bias_init)) conv_layers.append( Convolution((4, 4, 1), filter_init, strides=1, padding=0, activation=disc_activation, batch_norm=False, bias_init=bias_init)) return Sequential(conv_layers, name="Discriminator")
def __init__(self, branch_units=[(384, ), (64, 96, 96)], name=None): """ Second inception block with three branches, concatenated in the end 1. 3x3 conv (stride = 2, valid) 2. 1x1 conv, 3x3 conv, 3x3 conv (stride=2, valid) 3. 3x3 pool (stride = 2, valid) Convolution(H, W, K) : height, width, number of filters Mixed_6a layer """ (p1, p2) = branch_units branch1 = Convolution(name=name + '_br1_3x3conv', **conv_params(filter_shape=(3, 3, p1[0]), strides=2, padding=0)) branch2 = Sequential([ Convolution(name=name + '_br2_1x1conv', **conv_params(filter_shape=(1, 1, p2[0]))), Convolution(name=name + '_br2_3x3conv1', **conv_params(filter_shape=(3, 3, p2[1]), padding=1)), Convolution(name=name + '_br2_3x3conv2', **conv_params(filter_shape=(3, 3, p2[2]), strides=2, padding=0)) ]) branch3 = Pooling(pool_shape=(3, 3), padding=0, strides=2, pool_type="max", name=name + '_br3_maxpool') branches = [branch1, branch2, branch3] super(Inceptionv3_b2, self).__init__(name=name, branches=branches, mode='concat')
def __init__(self, branch_units=[(192, 320), (192, 192, 192, 192)], name=None): """ Fourth inception block with three branches, concatenated in the end 1. 1x1 conv, 3x3 conv (stride=2, valid) 2. 1x1 conv, 1x7 conv, 7x1 conv, 3x3 conv (stride=2, valid) 3. 3x3 pool (stride=2, valid) Convolution(H, W, K) : height, width, number of filters Mixed_7a layer """ (p1, p2) = branch_units branch1 = Sequential([ Convolution(name=name + '_br1_conv1x1', **conv_params(filter_shape=(1, 1, p1[0]))), Convolution(name=name + '_br1_conv3x3', **conv_params(filter_shape=(3, 3, p1[1]), strides=2, padding=0)) ]) branch2 = Sequential([ Convolution(name=name + '_br2_conv1x1', **conv_params(filter_shape=(1, 1, p2[0]))), Convolution(name=name + '_br2_conv1x7', **conv_params(filter_shape=(1, 7, p2[1]), padding={ 'H': 0, 'W': 3, 'D': 0 })), Convolution(name=name + '_br2_conv7x1', **conv_params(filter_shape=(7, 1, p2[2]), padding={ 'H': 3, 'W': 0, 'D': 0 })), Convolution(name=name + '_br2_conv3x3', **conv_params(filter_shape=(3, 3, p2[3]), strides=2, padding=0)) ]) branch3 = Pooling(name=name + '_br3_maxpool', pool_shape=(3, 3), padding=0, strides=2, pool_type="max") branches = [branch1, branch2, branch3] super(Inceptionv3_b4, self).__init__(name=name, branches=branches, mode='concat')
def __init__(self, branch_units=[(64, ), (48, 64), (64, 96, 96), (64, )], name=None): """ First inception block with four branches, concatenated in the end 1. 1x1 conv 2. 1x1 conv, 5x5 conv 3. 1x1 conv, 3x3conv, 3x3 conv 4. 3x3 pool, 1x1 conv Convolution(H, W, K) : height, width, number of filters Mixed_5b, Mixed_5c, Mixed_5d layers """ (p1, p2, p3, p4) = branch_units branch1 = Convolution(name=name + '_br1_1x1conv', **conv_params(filter_shape=(1, 1, p1[0]))) branch2 = Sequential([ Convolution(name=name + '_br2_1x1conv', **conv_params(filter_shape=(1, 1, p2[0]))), Convolution(name=name + '_br2_5x5conv', **conv_params(filter_shape=(5, 5, p2[1]), padding=2)) ]) branch3 = Sequential([ Convolution(name=name + '_br3_1x1conv', **conv_params(filter_shape=(1, 1, p3[0]))), Convolution(name=name + '_br3_3x3conv1', **conv_params(filter_shape=(3, 3, p3[1]), padding=1)), Convolution(name=name + '_br3_3x3conv2', **conv_params(filter_shape=(3, 3, p3[2]), padding=1)) ]) branch4 = Sequential([ Pooling(name=name + '_br4_avgpool', pool_shape=(3, 3), padding=1, strides=1, pool_type="avg"), Convolution(name=name + '_br4_conv1x1', **conv_params(filter_shape=(1, 1, p4[0]))) ]) branches = [branch1, branch2, branch3, branch4] super(Inceptionv3_b1, self).__init__(name=name, branches=branches, mode='concat')
def make_discriminator(bn=True, disc_activation=None): conv_layers = [ Convolution((3, 3, 96), filter_init, strides=1, padding=1, activation=lrelu, batch_norm=bn), Convolution((3, 3, 96), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn), Convolution((3, 3, 192), filter_init, strides=1, padding=1, activation=lrelu, batch_norm=bn), Convolution((3, 3, 192), filter_init, strides=2, padding=1, activation=lrelu, batch_norm=bn), Convolution((3, 3, 192), filter_init, strides=1, padding=1, activation=lrelu, batch_norm=bn), Convolution((1, 1, 16), filter_init, strides=1, padding=0, activation=lrelu, batch_norm=bn), Convolution((7, 7, 1), filter_init, strides=1, padding=0, activation=disc_activation, batch_norm=False) ] return Sequential(conv_layers, name="Discriminator")
def __init__(self, stage_depth): nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)] print(nfms) strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] layers = [Preprocess(functor=cifar_mean_subtract), Convolution(**conv_params(3, 16)), f_module(nfms[0], first=True)] for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride)) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling((8, 8), pool_type='avg')) layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), activation=Softmax())) super(residual_network, self).__init__(layers=layers)
def make_layers(use_large, vocab_size): if use_large: init = GaussianInit(0., 0.02) else: init = GaussianInit(0., 0.05) layers = [] layers.append(make_embedding_layer(vocab_size)) layers.append(lambda op: ng.map_roles(op, {'REC': 'W', 'F': 'C'})) kernel_sizes = [7, 7, 3, 3, 3, 3] pool_layer_idxs = [0, 1, 5] conv_nout = 1024 if use_large else 256 fc_nout = 2048 if use_large else 1024 for i in range(6): conv_layer = Convolution( **conv_params(kernel_sizes[i], conv_nout, init)) layers.append(conv_layer) if i in pool_layer_idxs: pool_layer = Pooling(pool_shape=(3, ), strides=3) layers.append(pool_layer) layers.append( Affine(nout=fc_nout, weight_init=init, bias_init=ConstantInit(0.), activation=Rectlin())) layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=fc_nout, weight_init=init, bias_init=ConstantInit(0.), activation=Rectlin())) layers.append(Dropout(keep=0.5)) layers.append( Affine(axes=(ax.Y, ), weight_init=init, bias_init=ConstantInit(0.), activation=Softmax())) return layers
def __init__(self): super(ConvolutionLayer, self).__init__() self.layer = Convolution({ 'T': 1, 'R': 1, 'S': 1, 'K': 1 }, ConstantInit(0.0), { 'str_d': 1, 'str_h': 1, 'str_w': 1 }, { 'pad_d': 0, 'pad_h': 0, 'pad_w': 0 }, { 'dil_d': 0, 'dil_h': 0, 'dil_w': 0 }, bias_init=ConstantInit(0.0), batch_norm=True)
def test_conv1d(transformer_factory, filter_width, num_filters, strides, padding, time_steps, feature_dimension, batch_size): dilation = 1 # reference conv does not support dilation F = ng.make_axis(name='F', length=feature_dimension) REC = ng.make_axis(name='REC', length=time_steps) N = ng.make_axis(name='N', length=batch_size) in_axes = ng.make_axes([F, REC, N]) inputs = ng.placeholder(axes=in_axes) input_vals = np.random.randn(*in_axes.lengths) filter_init = GaussianInit() conv1d = Convolution((filter_width, num_filters), filter_init, strides=strides, padding=padding, dilation=dilation, bias_init=None, activation=Rectlin(), batch_norm=None) result_op = conv1d(inputs, channel_axes='F', spatial_axes={'W': 'REC'}) with closing(ngt.make_transformer()) as transformer: result_comp = transformer.add_computation( ng.computation(result_op, inputs)) filter_vals = transformer.add_computation(ng.computation( conv1d.conv.W))() result_ng = result_comp(input_vals) result_np = np.squeeze( reference_conv1d(input_vals, filter_vals, lambda x: np.maximum(0, x))) ng.testing.assert_allclose(result_ng, result_np)
# Model specification def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor(axes=x.axes[0], initial_value=np.array([[104., 119., 127.]])) y = ng.expand_dims((x - bgr_mean) / 255., ax.D, 1) return y init_uni = UniformInit(-0.1, 0.1) seq1 = Sequential([ Preprocess(functor=cifar_mean_subtract), Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin()), Pool2D(2, strides=2), Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin()), Pool2D(2, strides=2), Affine(nout=500, weight_init=init_uni, activation=Rectlin()), Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax()) ]) ###################### # Input specification ax.C.length, ax.H.length, ax.W.length = train_set.shapes['image'] ax.D.length = 1 ax.N.length = args.batch_size ax.Y.length = 10 # placeholders with descriptive names
# Model specification def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor(axes=[x.axes.channel_axis()], initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255. init_uni = UniformInit(-0.1, 0.1) seq1 = Sequential([ Preprocess(functor=cifar_mean_subtract), Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pool2D(2, strides=2), Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pool2D(2, strides=2), Affine(nout=500, weight_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax()) ]) optimizer = GradientDescentMomentum(0.01, 0.9)
} } train_set = ArrayIterator(train_data, batch_size=args.batch_size, total_iterations=args.num_iterations) inputs = train_set.make_placeholders(include_iteration=True) ax.Y.length = 1000 # number of outputs of last layer. # weight initialization init = UniformInit(low=-0.08, high=0.08) # Setup model seq1 = Sequential([ Convolution((3, 3, 64), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Pool2D(2, strides=2), Convolution((3, 3, 128), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Pool2D(2, strides=2), Convolution((3, 3, 256), filter_init=GaussianInit(var=0.01), bias_init=init, activation=Rectlin(), padding=1), Convolution((3, 3, 256),
def __init__(self, net_type, resnet_size, bottleneck, num_resnet_mods): # For CIFAR10 dataset if net_type == 'cifar10': # Number of Filters num_fils = [16, 32, 64] # Network Layers layers = [ # Subtracting mean as suggested in paper Preprocess(functor=cifar10_mean_subtract), # First Conv with 3x3 and stride=1 Convolution(**conv_params(3, 16)) ] first_resmod = True # Indicates the first residual module # Loop 3 times for each filter. for fil in range(3): # Lay out n residual modules so that we have 2n layers. for resmods in range(num_resnet_mods): if (resmods == 0): if (first_resmod): # Strides=1 and Convolution side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) first_resmod = False else: # Strides=2 and Convolution side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False, strides=2) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) else: # Strides=1 and direct connection main_path, side_path = self.get_mp_sp( num_fils[fil], net_type) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) # Do average pooling --> fully connected--> softmax. layers.append(Pooling([8, 8], pool_type='avg')) layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=True)) layers.append(Activation(Softmax())) # For I1K dataset elif net_type == "i1k": # Number of Filters num_fils = [64, 128, 256, 512] # Number of residual modules we need to instantiate at each level num_resnet_mods = num_i1k_resmods(resnet_size) # Network layers layers = [ # Subtracting mean Preprocess(functor=i1k_mean_subtract), # First Conv layer Convolution((7, 7, 64), strides=2, padding=3, batch_norm=True, activation=Rectlin(), filter_init=KaimingInit()), # Max Pooling Pooling([3, 3], strides=2, pool_type='max', padding=1) ] first_resmod = True # Indicates the first residual module for which strides are 1 # Loop 4 times for each filter for fil in range(4): # Lay out residual modules as in num_resnet_mods list for resmods in range(num_resnet_mods[fil]): if (resmods == 0): if (first_resmod): # Strides=1 and Convolution Side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False, bottleneck=bottleneck) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) first_resmod = False else: # Strides=2 and Convolution side path main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, direct=False, bottleneck=bottleneck, strides=2) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) else: # Strides=1 and direct connection main_path, side_path = self.get_mp_sp( num_fils[fil], net_type, bottleneck=bottleneck) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) # Do average pooling --> fully connected--> softmax. layers.append(Pooling([7, 7], pool_type='avg')) layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=True)) layers.append(Activation(Softmax())) else: raise NameError( "Incorrect dataset. Should be --dataset cifar10 or --dataset i1k" ) super(BuildResnet, self).__init__(layers=layers)