Inception([(64, ), (96, 128), (16, 32), (32, )]), Inception([(128, ), (128, 192), (32, 96), (64, )]), Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'), Inception([(192, ), (96, 208), (16, 48), (64, )]), Inception([(160, ), (112, 224), (24, 64), (64, )]), Inception([(128, ), (128, 256), (24, 64), (64, )]), Inception([(112, ), (144, 288), (32, 64), (64, )]), Inception([(256, ), (160, 320), (32, 128), (128, )]), Pooling(pool_shape=(3, 3), padding=1, strides=2, pool_type='max'), Inception([(256, ), (160, 320), (32, 128), (128, )]), Inception([(384, ), (192, 384), (48, 128), (128, )]), Pooling(pool_shape=(7, 7), strides=1, pool_type="avg"), Affine(axes=ax.Y, weight_init=XavierInit(), bias_init=bias_init, activation=Softmax()) ]) lr_schedule = { 'name': 'schedule', 'base_lr': 0.01, 'gamma': (1 / 250.)**(1 / 3.), 'schedule': [22, 44, 65] } optimizer = GradientDescentMomentum(lr_schedule, 0.0, wdecay=0.0005, iteration=inputs['iteration']) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob,
out_axis = ng.make_axis(length=len(shakes.vocab) + 1, name="out_feature_axis") in_axes = ng.make_axes([batch_axis, time_axis]) out_axes = ng.make_axes([batch_axis, time_axis]) # Build placeholders for the created axes inputs = {'X': ng.placeholder(in_axes), 'y': ng.placeholder(out_axes), 'iteration': ng.placeholder(axes=())} # Network Definition if(use_embedding is False): seq1 = Sequential([Preprocess(functor=expand_onehot), LSTM(nout=recurrent_units, init=init_uni, backward=False, reset_cells=True, activation=Logistic(), gate_activation=Tanh(), return_sequence=True), Affine(weight_init=init_uni, bias_init=init_uni, activation=Softmax(), axes=out_axis)]) else: embedding_dim = 8 seq1 = Sequential([LookupTable(len(shakes.vocab) + 1, embedding_dim, init_uni, update=True), LSTM(nout=recurrent_units, init=init_uni, backward=False, reset_cells=True, activation=Logistic(), gate_activation=Tanh(), return_sequence=True), Affine(weight_init=init_uni, bias_init=init_uni, activation=Softmax(), axes=out_axis)]) # Optimizer # Initial learning rate is 0.01 (base_lr) # At iteration (num_iterations // 75), lr is multiplied by gamma (new lr = .95 * .01) # At iteration (num_iterations * 2 // 75), it is reduced by gamma again # So on.. no_steps = 75
ax.Y.length = 10 ###################### # Model specification def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor( axes=x.axes.find_by_name('C'), initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255. seq1 = Sequential([Preprocess(functor=cifar_mean_subtract), Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()), Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax())]) optimizer = GradientDescentMomentum(0.1, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(results=inference_prob, cross_ent_loss=eval_loss) # Now bind the computations we are interested in with closing(ngt.make_transformer()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs)
else: layer_0 = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)) if args.layer_type == "rnn": rlayer = Recurrent(hidden_size, init, activation=Tanh()) elif args.layer_type == "birnn": rlayer = BiRNN(hidden_size, init, activation=Tanh(), return_sequence=True, sum_out=True) # model initialization seq1 = Sequential([ layer_0, rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp() train_prob = seq1(inputs['inp_txt']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['inp_txt'])
return (x - bgr_mean) / 255. init_uni = UniformInit(-0.1, 0.1) seq1 = Sequential([Preprocess(functor=cifar_mean_subtract), Convolution((5, 5, 16), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pooling((2, 2), strides=2), Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pooling((2, 2), strides=2), Affine(nout=500, weight_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax())]) optimizer = GradientDescentMomentum(0.01, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) # errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(results=inference_prob, cross_ent_loss=eval_loss) # Now bind the computations we are interested in
def __init__(self, net_type, resnet_size, bottleneck, num_resnet_mods, batch_norm=True): # For CIFAR10 dataset if (net_type in ('cifar10', 'cifar100')): # Number of Filters num_fils = [16, 32, 64] # Network Layers layers = [ # Subtracting mean as suggested in paper Preprocess(functor=cifar10_mean_subtract), # First Conv with 3x3 and stride=1 Convolution(**conv_params(3, 16, batch_norm=batch_norm))] first_resmod = True # Indicates the first residual module # Loop 3 times for each filter. for fil in range(3): # Lay out n residual modules so that we have 2n layers. for resmods in range(num_resnet_mods): if(resmods == 0): if(first_resmod): # Strides=1 and Convolution side path main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, direct=False, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) first_resmod = False else: # Strides=2 and Convolution side path main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, direct=False, strides=2, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) else: # Strides=1 and direct connection main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) # Do average pooling --> fully connected--> softmax. layers.append(Pooling((8, 8), pool_type='avg')) layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm)) layers.append(Activation(Softmax())) # For I1K dataset elif (net_type in ('i1k', 'i1k100')): # Number of Filters num_fils = [64, 128, 256, 512] # Number of residual modules we need to instantiate at each level num_resnet_mods = num_i1k_resmods(resnet_size) # Network layers layers = [ # Subtracting mean Preprocess(functor=i1k_mean_subtract), # First Conv layer Convolution((7, 7, 64), strides=2, padding=3, batch_norm=batch_norm, activation=Rectlin(), filter_init=KaimingInit()), # Max Pooling Pooling((3, 3), strides=2, pool_type='max', padding=1)] first_resmod = True # Indicates the first residual module for which strides are 1 # Loop 4 times for each filter for fil in range(4): # Lay out residual modules as in num_resnet_mods list for resmods in range(num_resnet_mods[fil]): if(resmods == 0): if(first_resmod): # Strides=1 and Convolution Side path main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, direct=False, bottleneck=bottleneck, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) first_resmod = False else: # Strides=2 and Convolution side path main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, direct=False, bottleneck=bottleneck, strides=2, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) else: # Strides=1 and direct connection main_path, side_path = self.get_mp_sp(num_fils[fil], net_type, bottleneck=bottleneck, batch_norm=batch_norm) layers.append(ResidualModule(main_path, side_path)) layers.append(Activation(Rectlin())) # Do average pooling --> fully connected--> softmax. layers.append(Pooling((7, 7), pool_type='avg')) layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm)) layers.append(Activation(Softmax())) else: raise NameError("Incorrect dataset. Should be --dataset cifar10 or --dataset i1k") super(BuildResnet, self).__init__(layers=layers)