def test_scope(layer_cls): """ Test that creating layer in and outside of scope behaves correctly """ scope1 = 's1' layer0 = layer_cls() with Layer.variable_scope(scope1): assert Layer.active_scope == scope1 layer1 = layer_cls() layer2 = layer_cls() # have to call layers to initialize W x = layer0.get_input() layer0.get_layer()(x) layer1.get_layer()(x) layer2.get_layer()(x) for w in layer0.get_weights(): assert w.scope is None for w in layer1.get_weights(): assert w.scope == scope1, "found scope {} instead of {}".format( w.scope, scope1) for w in layer2.get_weights(): assert w.scope is None
def test_inference_reuse_lstm(recurrent_input): layer = LSTM(10, dummy_init, activation=lambda x: x) layer(recurrent_input) train_params = (layer.W_input["f"], layer.W_recur["f"]) with Layer.inference_mode_on(): layer(recurrent_input) inference_params = (layer.W_input["f"], layer.W_recur["f"]) for train_param, inference_param in zip(train_params, inference_params): assert train_param is inference_param
def test_inference_reuse_batch_norm(input_placeholder): layer = BatchNorm() layer(input_placeholder) train_params = (layer.gamma, layer.beta) with Layer.inference_mode_on(): layer(input_placeholder) inference_params = (layer.gamma, layer.beta) for train_param, inference_param in zip(train_params, inference_params): assert train_param is inference_param
def test_inference_reuse_linear(input_placeholder): layer = Linear(dummy_init, 10) layer(input_placeholder) train_params = (layer.W, ) with Layer.inference_mode_on(): layer(input_placeholder) inference_params = (layer.W, ) for train_param, inference_param in zip(train_params, inference_params): assert train_param is inference_param
def test_inference_reuse_lut(recurrent_axis, batch_axis): seq_input = ng.placeholder(axes=[recurrent_axis, batch_axis]) layer = LookupTable(20, 10, dummy_init) layer(seq_input) train_params = (layer.W, ) with Layer.inference_mode_on(): layer(seq_input) inference_params = (layer.W, ) for train_param, inference_param in zip(train_params, inference_params): assert train_param is inference_param
def build_regressor_computations(): train_preds = predictions(encoder, affine_layer, inputs['X']) train_loss = ng.squared_L2(train_preds - inputs['y']) # Cost calculation batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") with Layer.inference_mode_on(): eval_preds = predictions(encoder, affine_layer, inputs['X']) eval_loss = ng.mean(ng.squared_L2(eval_preds - inputs['y']), out_axes=()) loss_computation = ng.computation([eval_loss], "all") return train_computation, loss_computation
def build_seq2seq_computations(): # Training loss, optimizer train_decoded = recurrent_model.encode_and_decode(encoder, decoder, inputs['X'], previous) train_loss = ng.squared_L2(target - train_decoded) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") # Evaluation loss with Layer.inference_mode_on(): eval_decoded = recurrent_model.encode_and_generate(encoder, decoder, inputs['X'], in_axes) eval_loss = ng.mean(ng.squared_L2(target - eval_decoded), out_axes=()) loss_computation = ng.computation([eval_loss], "all") return train_computation, loss_computation
def test_layer_caching(): in_obj = ng.placeholder(()) layer = SimpleLayer() out_train = layer(in_obj) out_train2 = layer(in_obj) with Layer.inference_mode_on(): out_inference = layer(in_obj) out_inference2 = layer(in_obj) out_train3 = layer(in_obj) assert out_train is out_train2, "Training mode call not cached" assert out_inference is out_inference2, "Inference mode call not cached" assert out_train is not out_inference, "Training and inference mode calls are the same" assert out_train is out_train3, "Training mode not restored"
def test_inference_reuse_conv(conv_input_placeholder): fshape = {k: 1 for k in "TRSK"} dilation = {"dil_{}".format(k): 1 for k in "hwd"} padding = {"pad_{}".format(k): 0 for k in "hwd"} strides = {"str_{}".format(k): 1 for k in "hwd"} layer = ConvBase(fshape, dummy_init, dilation=dilation, strides=strides, padding=padding) layer(conv_input_placeholder) train_params = (layer.W, ) with Layer.inference_mode_on(): layer(conv_input_placeholder) inference_params = (layer.W, ) for train_param, inference_param in zip(train_params, inference_params): assert train_param is inference_param
optimizer = Adam(learning_rate=2e-3) print('compiling the graph') # Cost set up batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) # Predicted class is the max probability out of the 2=3 # Required Outputs- Batch Cost, Train Probability,misclass train train_outputs = dict(batch_cost=batch_cost, inps=inputs['answer'], logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=dropout_val) # Inference Mode for validation dataset: with Layer.inference_mode_on(): eval_outputs = dict(logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=drop_pointer) # Now bind the computations we are interested in print('generating transformer') eval_frequency = 20 val_frequency = np.ceil(len(train['para']['data']) / params_dict['batch_size']) train_error_frequency = 1000 # Create Transformer transformer = ngt.make_transformer() train_computation = make_bound_computation(transformer, train_outputs, inputs) valid_computation = make_bound_computation(transformer, eval_outputs, inputs)
return optimizer parser = NgraphArgparser(description='MLP GAN example') args = parser.parse_args() # model parameters h_dim = 4 minibatch_discrimination = False num_iterations = 600 batch_size = 12 num_examples = num_iterations * batch_size # generator g_scope = 'generator' with Layer.variable_scope(g_scope): generator_layers = [ affine_layer(h_dim, Rectlin(), name='g0'), affine_layer(1, Identity(), name='g1') ] generator = Sequential(generator_layers) # discriminator d_scope = 'discriminator' with Layer.variable_scope(d_scope): discriminator_layers = [ affine_layer(2 * h_dim, Tanh(), name='d0'), affine_layer(2 * h_dim, Tanh(), name='d1') ] if minibatch_discrimination: raise NotImplementedError
def build_regressor_prediction(): with Layer.inference_mode_on(): eval_preds = predictions(encoder, affine_layer, inputs['X']) return ng.computation([eval_preds], "all")
def build_generator_computation(): with Layer.inference_mode_on(): generated = recurrent_model.encode_and_generate(encoder, decoder, inputs['X'], in_axes) return ng.computation([generated], "all")
def train_network(model, train_set, valid_set, batch_size, epochs, log_file): ''' Trains the predefined network. Trains the model and saves the progress in the log file that is defined in the arguments model(object): Defines the model in Neon train_set(object): Defines the training set valid_set(object): Defines the validation set args(object): Training arguments batch_size(int): Minibatch size epochs(int): Number of training epoch log_file(string): File name to store trainig logs for plotting ''' # Form placeholders for inputs to the network # Iterations needed for learning rate schedule inputs = train_set.make_placeholders(include_iteration=True) # Convert labels into one-hot vectors one_hot_label = ng.one_hot(inputs['label'], axis=ax.Y) learning_rate_policy = { 'name': 'schedule', 'schedule': list(np.arange(2, epochs, 2)), 'gamma': 0.6, 'base_lr': 0.001 } optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=0.9, wdecay=0.005, iteration=inputs['iteration']) # Define graph for training train_prob = model(inputs['video']) train_loss = ng.cross_entropy_multi(train_prob, one_hot_label) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) with closing(ngt.make_transformer()) as transformer: # Define graph for calculating validation set error and misclassification rate # Use inference mode for validation to avoid dropout in forward pass with Layer.inference_mode_on(): inference_prob = model(inputs['video']) errors = ng.not_equal(ng.argmax(inference_prob), inputs['label']) eval_loss = ng.cross_entropy_multi(inference_prob, one_hot_label) eval_outputs = {'cross_ent_loss': eval_loss, 'misclass': errors} eval_computation = make_bound_computation(transformer, eval_outputs, inputs) train_outputs = {'batch_cost': batch_cost} train_computation = make_bound_computation(transformer, train_outputs, inputs) interval_cost = 0.0 # Train in epochs logs = {'train': [], 'validation': [], 'misclass': []} for epoch in trange(epochs, desc='Epochs'): # Setup the training bar numBatches = train_set.ndata // batch_size tpbar = tqdm(unit='batches', ncols=100, total=numBatches, leave=False) train_set.reset() valid_set.reset() train_log = [] for step, data in enumerate(train_set): data = dict(data) data['iteration'] = epoch # learning schedule based on epochs output = train_computation(data) train_log.append(float(output['batch_cost'])) tpbar.update(1) tpbar.set_description("Training {:0.4f}".format( float(output['batch_cost']))) interval_cost += float(output['batch_cost']) tqdm.write("Epoch {epch} complete. " "Avg Train Cost {cost:0.4f}".format(epch=epoch, cost=interval_cost / step)) interval_cost = 0.0 tpbar.close() validation_loss = run_validation(valid_set, eval_computation) tqdm.write("Avg losses: {}".format(validation_loss)) logs['train'].append(train_log) logs['validation'].append(validation_loss['cross_ent_loss']) logs['misclass'].append(validation_loss['misclass']) # Save log data and plot at the end of each epoch with open(log_file, 'wb') as f: pickle.dump(logs, f) plot_logs(logs=logs)
def train_mnist_mlp(transformer_name, data_dir=None, rng_seed=12, batch_size=128, train_iter=10, eval_iter=10): assert transformer_name in ['cpu', 'hetr'] assert isinstance(rng_seed, int) # Apply this metadata to graph regardless of transformer, # but it is ignored for non-HeTr case hetr_device_ids = (0, 1) # use consistent rng seed between runs np.random.seed(rng_seed) # Data train_data, valid_data = MNIST(path=data_dir).load_data() train_set = ArrayIterator(train_data, batch_size, total_iterations=train_iter) valid_set = ArrayIterator(valid_data, batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 # Model with ng.metadata(device_id=hetr_device_ids, parallel=ax.N): seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic()) ]) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary( train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) optimizer = GradientDescentMomentum(0.1, 0.9) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_binary( inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors) # Runtime with closing( ngt.make_transformer_factory(transformer_name)()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) train_costs = list() for step in range(train_iter): out = train_computation(next(train_set)) train_costs.append(float(out['batch_cost'])) ce_loss = list() for step in range(eval_iter): out = loss_computation(next(valid_set)) ce_loss.append(np.mean(out['cross_ent_loss'])) return train_costs, ce_loss
'base_lr': 0.1 } optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=0.9, wdecay=0.0001, iteration=inputs['iteration']) label_indices = inputs['label'] train_loss = ng.cross_entropy_multi(resnet(inputs['image']), ng.one_hot(label_indices, axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") with Layer.inference_mode_on(): inference_prob = resnet(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), label_indices) eval_loss = ng.cross_entropy_multi( inference_prob, ng.one_hot(label_indices, axis=ax.Y)) eval_loss_names = ['cross_ent_loss', 'misclass'] eval_computation = ng.computation([eval_loss, errors], "all") # Now bind the computations we are interested in transformer = ngt.make_transformer() train_function = transformer.add_computation(train_computation) eval_function = transformer.add_computation(eval_computation) tpbar = tqdm(unit="batches", ncols=100, total=args.num_iterations) interval_cost = 0.0
# Create the dataloader train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size) # noise source noise_dim = (2, 1, 3, 3) noise_generator = Noise(train_set.ndata, shape=noise_dim + (args.batch_size, ), seed=args.seed) # generator network g_scope = 'generator' filter_init = GaussianInit(var=0.05) relu = Rectlin(slope=0) with Layer.variable_scope(g_scope) as scope: deconv_layers = [ Deconvolution((1, 1, 16), filter_init, strides=1, padding=0, activation=relu, batch_norm=True), Deconvolution((3, 3, 192), filter_init, strides=1, padding=0, activation=relu, batch_norm=True, deconv_out_shape=(1, 5, 5)), Deconvolution((3, 3, 192),