def conv_params(fil_size, num_fils, strides=1, batch_norm=True, activation=Rectlin()): return dict(filter_shape=(fil_size, fil_size, num_fils), filter_init=KaimingInit(), strides=strides, padding=(1 if fil_size > 1 else 0), batch_norm=batch_norm, activation=activation)
def __init__(self, branch_units, activation=Rectlin(), bias_init=UniformInit(low=-0.08, high=0.08), filter_init=XavierInit()): (p1, p2, p3, p4) = branch_units self.branch_1 = Convolution((1, 1, p1[0]), activation=activation, bias_init=bias_init, filter_init=filter_init) self.branch_2 = [ Convolution((1, 1, p2[0]), activation=activation, bias_init=bias_init, filter_init=filter_init), Convolution((3, 3, p2[1]), activation=activation, bias_init=bias_init, filter_init=filter_init, padding=1) ] self.branch_3 = [ Convolution((1, 1, p3[0]), activation=activation, bias_init=bias_init, filter_init=filter_init), Convolution((5, 5, p3[1]), activation=activation, bias_init=bias_init, filter_init=filter_init, padding=2) ] self.branch_4 = [ Pooling(pool_shape=(3, 3), padding=1, strides=1, pool_type="max"), Convolution((1, 1, p3[0]), activation=activation, bias_init=bias_init, filter_init=filter_init) ]
def test_conv1d(transformer_factory, filter_width, num_filters, strides, padding, time_steps, feature_dimension, batch_size): dilation = 1 # reference conv does not support dilation F = ng.make_axis(name='F', length=feature_dimension) REC = ng.make_axis(name='REC', length=time_steps) N = ng.make_axis(name='N', length=batch_size) in_axes = ng.make_axes([F, REC, N]) inputs = ng.placeholder(axes=in_axes) input_vals = np.random.randn(*in_axes.lengths) filter_init = GaussianInit() conv1d = Convolution((filter_width, num_filters), filter_init, strides=strides, padding=padding, dilation=dilation, bias_init=None, activation=Rectlin(), batch_norm=None) result_op = conv1d(inputs, channel_axes='F', spatial_axes={'W': 'REC'}) with closing(ngt.make_transformer()) as transformer: result_comp = transformer.add_computation( ng.computation(result_op, inputs)) filter_vals = transformer.add_computation(ng.computation( conv1d.conv.W))() result_ng = result_comp(input_vals) result_np = np.squeeze( reference_conv1d(input_vals, filter_vals, lambda x: np.maximum(0, x))) ng.testing.assert_allclose(result_ng, result_np)
branch_4_output = self.branch_4[0](in_obj) branch_4_output = self.branch_4[1](branch_4_output) outputs = [ branch_1_output, branch_2_output, branch_3_output, branch_4_output ] # This does the equivalent of neon's merge-broadcast return ng.concat_along_axis(outputs, branch_1_output.axes.channel_axis()) seq1 = Sequential([ Convolution((7, 7, 64), padding=3, strides=2, activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit()), Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'), Convolution((1, 1, 64), activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit()), Convolution((3, 3, 192), activation=Rectlin(), bias_init=bias_init, filter_init=XavierInit(), padding=1), Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'), Inception([(64, ), (96, 128), (16, 32), (32, )]), Inception([(128, ), (128, 192), (32, 96), (64, )]),
inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor( axes=x.axes.find_by_name('C'), initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255. seq1 = Sequential([Preprocess(functor=cifar_mean_subtract), Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()), Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax())]) optimizer = GradientDescentMomentum(0.1, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(results=inference_prob, cross_ent_loss=eval_loss) # Now bind the computations we are interested in with closing(ngt.make_transformer()) as transformer:
###################### # Model specification def cifar_mean_subtract(x): bgr_mean = ng.constant( const=np.array([104., 119., 127.]), axes=[x.axes.channel_axis()]) return (x - bgr_mean) / 255. init_uni = UniformInit(-0.1, 0.1) seq1 = Sequential([Preprocess(functor=cifar_mean_subtract), Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pooling((2, 2), strides=2), Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pooling((2, 2), strides=2), Affine(nout=500, weight_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax())]) optimizer = GradientDescentMomentum(0.01, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost)
def __init__(self, net_type, resnet_size, bottleneck, num_resnet_mods, batch_norm=True): # For CIFAR10 dataset if (net_type in ('cifar10', 'cifar100')): # Number of Filters num_fils = [16, 32, 64] # Network Layers layers = [ # Subtracting mean as suggested in paper Preprocess(functor=cifar10_mean_subtract), # First Conv with 3x3 and stride=1 Convolution(**conv_params(3, 16, batch_norm=batch_norm))] first_resmod = True # Indicates the first residual module # Loop 3 times for each filter. for fil in range(3): # Lay out n residual modules so that we have 2n layers. for resmods in range(num_resnet_mods): if(resmods == 0): if(first_resmod): # Strides=1 and Convolution side path main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, direct=False, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) first_resmod = False else: # Strides=2 and Convolution side path main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, direct=False, strides=2, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) else: # Strides=1 and direct connection main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) # Do average pooling --> fully connected--> softmax. layers.append(Pooling((8, 8), pool_type='avg')) layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm)) layers.append(Activation(Softmax())) # For I1K dataset elif (net_type in ('i1k', 'i1k100')): # Number of Filters num_fils = [64, 128, 256, 512] # Number of residual modules we need to instantiate at each level num_resnet_mods = num_i1k_resmods(resnet_size) # Network layers layers = [ # Subtracting mean Preprocess(functor=i1k_mean_subtract), # First Conv layer Convolution((7, 7, 64), strides=2, padding=3, batch_norm=batch_norm, activation=Rectlin(), filter_init=KaimingInit()), # Max Pooling Pooling((3, 3), strides=2, pool_type='max', padding=1)] first_resmod = True # Indicates the first residual module for which strides are 1 # Loop 4 times for each filter for fil in range(4): # Lay out residual modules as in num_resnet_mods list for resmods in range(num_resnet_mods[fil]): if(resmods == 0): if(first_resmod): # Strides=1 and Convolution Side path main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, direct=False, bottleneck=bottleneck, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) first_resmod = False else: # Strides=2 and Convolution side path main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, direct=False, bottleneck=bottleneck, strides=2, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) else: # Strides=1 and direct connection main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, bottleneck=bottleneck, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) # Do average pooling --> fully connected--> softmax. layers.append(Pooling((7, 7), pool_type='avg')) layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm)) layers.append(Activation(Softmax())) else: raise NameError("Incorrect dataset. Should be --dataset cifar10 or --dataset i1k") super(BuildResnet, self).__init__(layers=layers)
# Create the dataloader train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(valid_data, args.batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic()) ]) optimizer = GradientDescentMomentum(0.1, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image'])
'axes': ('N', 'C', 'H', 'W')}, 'label': {'data': y_train, 'axes': ('N',)}} train_set = ArrayIterator(train_data, batch_size=args.batch_size, total_iterations=args.num_iterations) inputs = train_set.make_placeholders(include_iteration=True) ax.Y.length = 1000 # number of outputs of last layer. # weight initialization init = UniformInit(low=-0.08, high=0.08) # Setup model seq1 = Sequential([Convolution((11, 11, 64), filter_init=GaussianInit(std=0.01), bias_init=init, activation=Rectlin(), padding=3, strides=4), Pooling((3, 3), strides=2), Convolution((5, 5, 192), filter_init=GaussianInit(std=0.01), bias_init=init, activation=Rectlin(), padding=2), Pooling((3, 3), strides=2), Convolution((3, 3, 384), filter_init=GaussianInit(std=0.03), bias_init=init, activation=Rectlin(), padding=1), Convolution((3, 3, 256), filter_init=GaussianInit(std=0.03), bias_init=init, activation=Rectlin(), padding=1), Convolution((3, 3, 256), filter_init=GaussianInit(std=0.03), bias_init=init, activation=Rectlin(), padding=1), Pooling((3, 3), strides=2),