def define_recurrent_layers(out_axes=None, celltype='RNN', recurrent_units=[32], init=GlorotInit(), return_sequence=True): layers = [] for e, i in enumerate(recurrent_units): layer_return_sequence = e < len(recurrent_units) - 1 or return_sequence if celltype == 'RNN': layers.append( Recurrent(nout=i, init=init, backward=False, activation=Tanh(), return_sequence=layer_return_sequence)) elif celltype == 'LSTM': layers.append( LSTM(nout=i, init=init, backward=False, activation=Tanh(), gate_activation=Logistic(), return_sequence=layer_return_sequence)) if out_axes is not None: affine_layer = Affine(weight_init=init, bias_init=init, activation=Identity(), axes=out_axes) layers.append(affine_layer) return layers
ax.Y.length = time_steps # create iterator and placeholders for training data train_set = TSPSequentialArrayIterator(data_arrays=tsp_data['train'], nfeatures=num_features, batch_size=args.batch_size, time_steps=time_steps, total_iterations=args.num_iterations) inputs = train_set.make_placeholders() # weight initializationn init = UniformInit(low=-0.08, high=0.08) # build computational graph enc = LSTM(args.hs, init, activation=Tanh(), reset_cells=True, gate_activation=Logistic(), return_sequence=True) dec = LSTM(args.hs, init, activation=Tanh(), reset_cells=True, gate_activation=Logistic(), return_sequence=True) if args.emb is True: # encoder input embedding hidden_feature_axis = ng.make_axis(length=args.hs, name='hidden_feature_axis') feature_axis = ng.make_axis(length=num_features, name='feature_axis') W_emb = ng.variable(axes=[hidden_feature_axis, feature_axis], initial_value=init) emb_enc_inputs = ng.dot(W_emb, inputs['inp_txt']) # decoder input embedding emb_dec_input = [] ax.N.length = args.batch_size for i in range(ax.N.length):
inputs = train_set.make_placeholders() ax.Y.length = len(tree_bank_data.vocab) def expand_onehot(x): return ng.one_hot(x, axis=ax.Y) # weight initialization init = UniformInit(low=-0.08, high=0.08) if args.layer_type == "lstm": rlayer1 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), return_sequence=True) rlayer2 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), return_sequence=True) # model initialization seq1 = Sequential([ Preprocess(functor=expand_onehot), rlayer1, rlayer2, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp(gradient_clip_value=gradient_clip_value)
filter_init, strides=1, padding=1, activation=lrelu, batch_norm=True), Convolution((1, 1, 16), filter_init, strides=1, padding=0, activation=lrelu, batch_norm=True), Convolution((7, 7, 1), filter_init, strides=1, padding=0, activation=Logistic(), batch_norm=False) ] discriminator = Sequential(conv_layers, name="Discriminator") # noise placeholder N = ng.make_axis(name='N', length=args.batch_size) noise_ax_names = 'CDHW' noise_axes = ng.make_axes([ ng.make_axis(name=nm, length=l) for nm, l in zip(noise_ax_names, noise_dim) ]) z_ax = noise_axes + N z = ng.placeholder(axes=z_ax) # image placeholder C = ng.make_axis(name='C', length=1)
np.random.seed(args.rng_seed) # Create the dataloader train_data, valid_data = MNIST(args.data_dir).load_data() train_set = ArrayIterator(train_data, args.batch_size, total_iterations=args.num_iterations) valid_set = ArrayIterator(valid_data, args.batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 ###################### # Model specification seq1 = Sequential([Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic())]) optimizer = GradientDescentMomentum(0.1, 0.9) output_prob = seq1.train_outputs(inputs['image']) errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label']) loss = ng.cross_entropy_binary(output_prob, ng.one_hot(inputs['label'], axis=ax.Y)) mean_cost = ng.mean(loss, out_axes=()) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors) # Now bind the computations we are interested in transformer = ngt.make_transformer() train_computation = make_bound_computation(transformer, train_outputs, inputs)
def check_lstm(seq_len, input_size, hidden_size, batch_size, init_func, return_seq=True, backward=False, reset_cells=False, num_iter=2): Cin = ng.make_axis(input_size) REC = ng.make_axis(seq_len, name='R') N = ng.make_axis(batch_size, name='N') with ExecutorFactory() as ex: np.random.seed(0) inp_ng = ng.placeholder([Cin, REC, N]) lstm_ng = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(), reset_cells=reset_cells, return_sequence=return_seq, backward=backward) out_ng = lstm_ng.train_outputs(inp_ng) fprop_neon_fun = ex.executor(out_ng, inp_ng) fprop_neon_list = [] input_value_list = [] for i in range(num_iter): # fprop on random inputs input_value = rng.uniform(-1, 1, inp_ng.axes) fprop_neon = fprop_neon_fun(input_value).copy() if return_seq is True: fprop_neon = fprop_neon[:, :, 0] input_value_list.append(input_value) fprop_neon_list.append(fprop_neon) if reset_cells is False: # look at the last hidden states assert ng.testing.allclose(fprop_neon[:, -1].reshape(-1, 1), lstm_ng.h_init.value.get(None), rtol=rtol, atol=atol) # after the rnn graph has been executed, can get the W values. Get copies so # shared values don't confuse derivatives # concatenate weights to i, f, o, g together (in this order) gates = ['i', 'f', 'o', 'g'] Wxh_neon = [lstm_ng.W_input[k].value.get(None).copy().T for k in gates] Whh_neon = [lstm_ng.W_recur[k].value.get(None).copy().T for k in gates] bh_neon = [lstm_ng.b[k].value.get(None).copy() for k in gates] # reference numpy LSTM lstm_ref = RefLSTM() WLSTM = lstm_ref.init(input_size, hidden_size) # make ref weights and biases with neon model WLSTM[0, :] = np.concatenate(bh_neon) WLSTM[1:input_size + 1, :] = np.concatenate(Wxh_neon, 1) WLSTM[input_size + 1:] = np.concatenate(Whh_neon, 1) # transpose input X and do fprop fprop_ref_list = [] c0 = h0 = None for i in range(num_iter): input_value = input_value_list[i] inp_ref = input_value.copy().transpose([1, 2, 0]) (Hout_ref, cprev, hprev, batch_cache) = lstm_ref.forward(inp_ref, WLSTM, c0, h0) if reset_cells is False: c0 = cprev h0 = hprev # the output needs transpose as well Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T fprop_ref_list.append(Hout_ref) for i in range(num_iter): assert ng.testing.allclose(fprop_neon_list[i], fprop_ref_list[i], rtol=rtol, atol=atol)
def check_stacked_lstm(seq_len, input_size, hidden_size, batch_size, init_func, return_seq=True, backward=False, reset_cells=False, num_iter=2): Cin = ng.make_axis(input_size) REC = ng.make_axis(seq_len, name='R') N = ng.make_axis(batch_size, name='N') with ExecutorFactory() as ex: np.random.seed(0) inp_ng = ng.placeholder([Cin, REC, N]) lstm_ng_1 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(), reset_cells=reset_cells, return_sequence=return_seq, backward=backward) lstm_ng_2 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(), reset_cells=reset_cells, return_sequence=return_seq, backward=backward) out_ng_1 = lstm_ng_1.train_outputs(inp_ng) out_ng_2 = lstm_ng_2.train_outputs(out_ng_1) fprop_neon_fun_2 = ex.executor(out_ng_2, inp_ng) # fprop on random inputs for multiple iterations fprop_neon_2_list = [] input_value_list = [] for i in range(num_iter): input_value = rng.uniform(-1, 1, inp_ng.axes) fprop_neon_2 = fprop_neon_fun_2(input_value).copy() # comparing outputs if return_seq is True: fprop_neon_2 = fprop_neon_2[:, :, 0] input_value_list.append(input_value) fprop_neon_2_list.append(fprop_neon_2) if reset_cells is False: # look at the last hidden states assert ng.testing.allclose(fprop_neon_2[:, -1].reshape(-1, 1), lstm_ng_2.h_init.value.get(None), rtol=rtol, atol=atol) # after the rnn graph has been executed, can get the W values. Get copies so # shared values don't confuse derivatives # concatenate weights to i, f, o, g together (in this order) gates = ['i', 'f', 'o', 'g'] Wxh_neon_1 = \ np.concatenate([lstm_ng_1.W_input[k].value.get(None).copy().T for k in gates], 1) Whh_neon_1 = \ np.concatenate([lstm_ng_1.W_recur[k].value.get(None).copy().T for k in gates], 1) bh_neon_1 = \ np.concatenate([lstm_ng_1.b[k].value.get(None).copy() for k in gates]) Wxh_neon_2 = \ np.concatenate([lstm_ng_2.W_input[k].value.get(None).copy().T for k in gates], 1) Whh_neon_2 = \ np.concatenate([lstm_ng_2.W_recur[k].value.get(None).copy().T for k in gates], 1) bh_neon_2 = \ np.concatenate([lstm_ng_2.b[k].value.get(None).copy() for k in gates]) # reference numpy LSTM lstm_ref_1 = RefLSTM() lstm_ref_2 = RefLSTM() WLSTM_1 = lstm_ref_1.init(input_size, hidden_size) WLSTM_2 = lstm_ref_2.init(hidden_size, hidden_size) # make ref weights and biases the same with neon model WLSTM_1[0, :] = bh_neon_1 WLSTM_1[1:input_size + 1, :] = Wxh_neon_1 WLSTM_1[input_size + 1:] = Whh_neon_1 WLSTM_2[0, :] = bh_neon_2 WLSTM_2[1:hidden_size + 1, :] = Wxh_neon_2 WLSTM_2[hidden_size + 1:] = Whh_neon_2 # transpose input X and do fprop fprop_ref_2_list = [] c0_1 = h0_1 = None c0_2 = h0_2 = None for i in range(num_iter): input_value = input_value_list[i] inp_ref = input_value.copy().transpose([1, 2, 0]) (Hout_ref_1, cprev_1, hprev_1, batch_cache) = lstm_ref_1.forward(inp_ref, WLSTM_1, c0_1, h0_1) (Hout_ref_2, cprev_2, hprev_2, batch_cache) = lstm_ref_2.forward(Hout_ref_1, WLSTM_2, c0_2, h0_2) if reset_cells is False: c0_1 = cprev_1 h0_1 = hprev_1 c0_2 = cprev_2 h0_2 = hprev_2 # the output needs transpose as well Hout_ref_2 = Hout_ref_2.reshape(seq_len * batch_size, hidden_size).T fprop_ref_2_list.append(Hout_ref_2) for i in range(num_iter): assert ng.testing.allclose(fprop_neon_2_list[i], fprop_ref_2_list[i], rtol=rtol, atol=atol)
feature_axis = ng.make_axis(length=n_features, name="F") out_axis = ng.make_axis(length=n_features, name="Fo") in_axes = ng.make_axes([batch_axis, time_axis, feature_axis]) out_axes = ng.make_axes([batch_axis, time_axis, out_axis]) # Build placeholders for the created axes inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes), iteration=ng.placeholder(axes=())) # define model if args.modeltype == "TCN": affine_layer = Affine(axes=out_axis, weight_init=GaussianInit(0, 0.01), activation=Logistic()) model = Sequential( [lambda op: ng.map_roles(op, { 'F': 'C', 'REC': 'W' })] + tcn(n_features, hidden_sizes, kernel_size=kernel_size, dropout=dropout).layers + [lambda op: ng.map_roles(op, { 'C': 'F', 'W': 'REC' })] + [affine_layer]) elif args.modeltype == "LSTM": model = Sequential( recurrent_model.define_model(out_axis, celltype=args.modeltype,
generator_layers = [ affine_layer(h_dim, Rectlin(), name='g0'), affine_layer(1, Identity(), name='g1') ] generator = Sequential(generator_layers) # discriminator discriminator_layers = [ affine_layer(2 * h_dim, Tanh(), name='d0'), affine_layer(2 * h_dim, Tanh(), name='d1') ] if minibatch_discrimination: raise NotImplementedError else: discriminator_layers.append(affine_layer(2 * h_dim, Tanh(), name='d2')) discriminator_layers.append(affine_layer(1, Logistic(), name='d3')) discriminator = Sequential(discriminator_layers) # TODO discriminator pre-training # dataloader np.random.seed(1) toy_gan_data = ToyGAN(batch_size, num_iterations) train_data = toy_gan_data.load_data() train_set = ArrayIterator(train_data, batch_size, num_iterations) # reset seed for weights np.random.seed(2) # build network graph inputs = train_set.make_placeholders()
z_ax = noise_axes + N z = ng.placeholder(axes=z_ax) # image placeholder C = ng.make_axis(name='C', length=1) D = ng.make_axis(name='D', length=1) H = ng.make_axis(name='H', length=28) W = ng.make_axis(name='W', length=28) image_axes = ng.make_axes([C, D, H, W, N]) image = ng.placeholder(axes=image_axes) # DCGAN if args.loss_type == "DCGAN": generator = make_generator(bn=True) discriminator = make_discriminator(bn=True, disc_activation=Logistic()) # build network graph generated = generator(z) D1 = discriminator(image) D2 = discriminator(generated) weight_clip_value = None # no weight clipping gp_scale = None # no gradient penalty loss_d = -ng.log(D1) - ng.log(1 - D2) loss_g = -ng.log(D2) # Wasserstein GAN elif args.loss_type == "WGAN":
def train_mnist_mlp(transformer_name, data_dir=None, rng_seed=12, batch_size=128, train_iter=10, eval_iter=10): assert transformer_name in ['cpu', 'hetr'] assert isinstance(rng_seed, int) # Apply this metadata to graph regardless of transformer, # but it is ignored for non-HeTr case hetr_device_ids = (0, 1) # use consistent rng seed between runs np.random.seed(rng_seed) # Data train_data, valid_data = MNIST(path=data_dir).load_data() train_set = ArrayIterator(train_data, batch_size, total_iterations=train_iter) valid_set = ArrayIterator(valid_data, batch_size) inputs = train_set.make_placeholders() ax.Y.length = 10 # Model with ng.metadata(device_id=hetr_device_ids, parallel=ax.N): seq1 = Sequential([ Preprocess(functor=lambda x: x / 255.), Affine(nout=100, weight_init=GaussianInit(), activation=Rectlin()), Affine(axes=ax.Y, weight_init=GaussianInit(), activation=Logistic()) ]) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_binary( train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) optimizer = GradientDescentMomentum(0.1, 0.9) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_binary( inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors) # Runtime with closing( ngt.make_transformer_factory(transformer_name)()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) train_costs = list() for step in range(train_iter): out = train_computation(next(train_set)) train_costs.append(float(out['batch_cost'])) ce_loss = list() for step in range(eval_iter): out = loss_computation(next(valid_set)) ce_loss.append(np.mean(out['cross_ent_loss'])) return train_costs, ce_loss