def test_seq(self): X = K.placeholder((None, 28, 28, 1)) f = N.Sequence([ N.Conv(8, (3, 3), strides=1, pad='same'), N.Dimshuffle(pattern=(0, 3, 1, 2)), N.Flatten(outdim=2), N.Noise(level=0.3, noise_dims=None, noise_type='gaussian'), N.Dense(128, activation=tf.nn.relu), N.Dropout(level=0.3, noise_dims=None), N.Dense(10, activation=tf.nn.softmax) ]) y = f(X) yT = f.T(y) f1 = K.function(X, y, defaults={K.is_training(): True}) f2 = K.function(X, yT, defaults={K.is_training(): False}) f = cPickle.loads(cPickle.dumps(f)) y = f(X) yT = f.T(y) f3 = K.function(X, y, defaults={K.is_training(): True}) f4 = K.function(X, yT, defaults={K.is_training(): False}) x = np.random.rand(12, 28, 28, 1) self.assertEquals(f1(x).shape, (2688, 10)) self.assertEquals(f3(x).shape, (2688, 10)) self.assertEqual(np.round(f1(x).sum(), 4), np.round(f3(x).sum(), 4)) self.assertEquals(y.shape.as_list(), (None, 10)) self.assertEquals(f2(x).shape, (12, 28, 28, 1)) self.assertEquals(f4(x).shape, (12, 28, 28, 1)) self.assertEqual(str(f2(x).sum())[:4], str(f4(x).sum())[:4]) self.assertEquals(yT.shape.as_list(), (None, 28, 28, 1))
def test_computational_graph3(self): # validate the number of updates found by ComputationGraph X = K.placeholder(shape=(None, 28, 28, 3)) f = N.Sequence([ N.Conv(32, 3, pad='same', activation=K.linear), N.BatchNorm(activation=K.relu), N.Flatten(outdim=2), N.Dense(16), N.BatchNorm(), N.Dense(10) ]) K.set_training(True) y_train = f(X) K.set_training(False) y_score = f(X) self.assertTrue( K.get_shape(y_train) == K.get_shape(y_score) and K.get_shape(y_score) == (None, 10)) cc_train = K.ComputationGraph(y_train) cc_score = K.ComputationGraph(y_score) self.assertTrue(len(cc_score.updates) == 0) self.assertTrue(len(cc_train.updates) == 4) # create real function fn_train = K.function(X, y_train) fn_score = K.function(X, y_score) shape1 = fn_train(np.random.rand(12, 28, 28, 3)).shape shape2 = fn_score(np.random.rand(12, 28, 28, 3)).shape self.assertTrue(shape1 == shape2 and shape1 == (12, 10))
def test_simple_rnn(self): np.random.seed(12082518) x = np.random.rand(128, 8, 32) # X = K.placeholder(shape=(None, 8, 32)) X1 = K.placeholder(shape=(None, 8, 32)) X2 = K.placeholder(shape=(None, 8, 32)) X3 = K.placeholder(shape=(None, 8, 33)) f = N.RNN(32, activation=K.relu, input_mode='skip') # y = f(X, mask=K.ones(shape=(128, 8))) graph = K.ComputationGraph(y) self.assertEqual(len(graph.inputs), 1) f1 = K.function([X], y) x1 = f1(x) # ====== different placeholder ====== # y = f(X1) f2 = K.function([X1], y) x2 = f1(x) self.assertEqual(np.sum(x1[0] == x2[0]), np.prod(x1[0].shape)) # ====== pickle load ====== # f = cPickle.loads(cPickle.dumps(f)) y = f(X2) f2 = K.function([X2], y) x3 = f2(x) self.assertEqual(np.sum(x2[0] == x3[0]), np.prod(x2[0].shape)) # ====== other input shape ====== # error_happen = False try: y = f(X3) f3 = K.function([X3], y) x3 = f3(np.random.rand(128, 8, 33)) except (ValueError, Exception): error_happen = True self.assertTrue(error_happen)
def load(): f = cPickle.load(open(U.get_modelpath('dummy.ai'), 'r')) y = f(X) yT = f.T(y) f1 = K.function(X, y) f2 = K.function(X, yT) _ = f1(x) print(_.shape, _.sum()) _ = f2(x) print(_.shape, _.sum())
def test_dropout(self): x = K.placeholder((4, 6)) f1 = N.Dropout(level=0.5, noise_dims=0, rescale=True) y = f1(x) f = K.function(x, y, defaults={K.is_training(): True}) z = f(np.ones((4, 6))) z = z.tolist() self.assertTrue(all(i == z[0] for i in z)) f1 = N.Dropout(level=0.5, noise_dims=1, rescale=True) y = f1(x) f = K.function(x, y, defaults={K.is_training(): True}) z = f(np.ones((4, 6))) z = z.T.tolist() self.assertTrue(all(i == z[0] for i in z))
def test_noise(self): x = K.placeholder((2, 3)) f1 = N.Noise(level=0.5, noise_dims=0, noise_type='gaussian') y = f1(x) f = K.function(x, y, defaults={K.is_training(): True}) z = f(np.ones((2, 3))) z = z.tolist() self.assertTrue(all(i == z[0] for i in z)) f1 = N.Noise(level=0.5, noise_dims=1, noise_type='gaussian') y = f1(x) f = K.function(x, y, defaults={K.is_training(): True}) z = f(np.ones((2, 3))) z = z.T.tolist() self.assertTrue(all(i == z[0] for i in z))
def test_batch_norm(self): K.set_training(True) x = K.placeholder((None, 8, 12)) y = N.BatchNorm()(x) f = K.function(x, y) z = f(np.random.rand(25, 8, 12)) self.assertEquals(z.shape, (25, 8, 12)) # ====== Not training ====== # K.set_training(False) x = K.placeholder((None, 8, 12)) y = N.BatchNorm()(x) f = K.function(x, y) z = f(np.random.rand(25, 8, 12)) self.assertEquals(z.shape, (25, 8, 12))
def test_rnn_decorator(self): @K.rnn_decorator(sequences='X', states='out') def rnn(X, out): return K.relu(X + out) y = rnn(K.ones(shape=(25, 12, 18, 8)), K.zeros(shape=(25, 18, 8))) f = K.function([], y) self.assertEqual(f()[0].shape, (25, 12, 18, 8))
def test_conv2D(self): x = K.placeholder((None, 28, 28, 3)) f1 = N.Conv(16, (3, 3), strides=(2, 2), pad='same') y = f1(x) f = K.function(x, y) z = f(np.random.rand(12, 28, 28, 3)) self.assertEquals(z.shape, (12, 14, 14, 16)) self.assertEquals(y.shape.as_list(), [None, 14, 14, 16]) # ====== transpose convolution ====== # y = f1.T(y) f = K.function(x, y) z = f(np.random.rand(12, 28, 28, 3)) self.assertEquals(z.shape, (12, 28, 28, 3)) self.assertEquals(y.shape.as_list(), [None, 28, 28, 3])
def test_load_save3(self): X = K.placeholder(shape=(None, 28, 28)) ops = N.Sequence([ N.Dimshuffle(pattern=(0, 1, 2, 'x')), N.Conv(8, (3, 3), strides=(1, 1), pad='same', activation=K.relu), K.pool2d, N.Flatten(outdim=2), N.Dense(64, activation=K.relu), N.Dense(10, activation=K.softmax) ]) y = ops(X) f1 = K.function(X, y) ops_ = cPickle.loads(cPickle.dumps(ops, protocol=cPickle.HIGHEST_PROTOCOL)) y_ = ops_(X) f2 = K.function(X, y_) x = np.random.rand(32, 28, 28) self.assertEqual(np.sum(f1(x) - f2(x)), 0.)
def test_dilatedConv(self): x = K.placeholder((None, 28, 28, 3)) f1 = N.Conv(16, (3, 3), dilation=(2, 2)) y = f1(x) f = K.function(x, y) z = f(np.random.rand(12, 28, 28, 3)) self.assertEquals(z.shape, (12, 24, 24, 16)) self.assertEquals(y.shape.as_list(), [None, 24, 24, 16])
def test_conv3D(self): x = K.placeholder((None, 28, 28, 28, 3)) f1 = N.Conv(16, (3, 3, 3), strides=1, pad='valid') y = f1(x) f = K.function(x, y) z = f(np.random.rand(12, 28, 28, 28, 3)) self.assertEquals(z.shape, (12, 26, 26, 26, 16)) self.assertEquals(y.shape.as_list(), [None, 26, 26, 26, 16])
def test_flatten(self): x = K.placeholder(shape=(None, 8, 12, 25, 18)) for i in range(1, 5): y = K.flatten(x, outdim=i) f = K.function(x, y) shape1 = K.get_shape(y) shape2 = f(np.random.rand(16, 8, 12, 25, 18)).shape self.assertEqual(len(shape1), len(shape2)) self.assertTrue( all(i == j for i, j in zip(shape1, shape2) if i is not None))
def _create_function(self): self._check_initialized() # ====== prediction function ====== # if 'pred' not in self._functions: f_pred = K.function(self._inputs, self._y_pred) self._functions['pred'] = f_pred # ====== training function ====== # if 'train' not in self._functions: # update optimizer arguments _ = inspect.getargspec(self._optimizer) optimizer_kwargs = { i: j for i, j in zip(reversed(_.args), reversed(_.defaults)) } optimizer_kwargs.update(self._train_args) # update loss_function arguments _ = inspect.getargspec(self._loss) if _.defaults is not None: loss_kwargs = { i: j for i, j in zip(reversed(_.args), reversed(_.defaults)) } loss_kwargs.update(self._train_args) else: loss_kwargs = {} # create cost, updates and fucntion cost_train = K.mean( self._loss(self._y_train, self._outputs[0], **loss_kwargs)) parameters = self._seq_ops.parameters updates = self._optimizer(cost_train, parameters, **optimizer_kwargs) f_train = K.function(self._inputs + self._outputs, cost_train, updates=updates) self._functions['train'] = f_train # ====== scoring function ====== # if 'score' not in self._functions: cost_pred = K.mean(self._metric(self._y_pred, self._outputs[0])) f_score = K.function(self._inputs + self._outputs, cost_pred) self._functions['score'] = f_score
def test_load_save2(self): K.set_training(True) X = K.placeholder((None, 1, 28, 28)) f = N.Dense(128, activation=K.relu) y = f(X) yT = f.T(y) f1 = K.function(X, y) f2 = K.function(X, yT) f = cPickle.loads(cPickle.dumps(f)) y = f(X) yT = f.T(y) f3 = K.function(X, y) f4 = K.function(X, yT) x = np.random.rand(12, 1, 28, 28) self.assertEqual(f1(x).sum(), f3(x).sum()) self.assertEqual(f2(x).sum(), f4(x).sum())
def test_load_save3(self): X = K.placeholder(shape=(None, 28, 28)) ops = N.Sequence([ N.Dimshuffle(pattern=(0, 1, 2, 'x')), N.Conv(8, (3, 3), strides=(1, 1), pad='same', activation=K.relu), K.pool2d, N.Flatten(outdim=2), N.Dense(64, activation=K.relu), N.Dense(10, activation=K.softmax) ]) y = ops(X) f1 = K.function(X, y) ops_ = cPickle.loads( cPickle.dumps(ops, protocol=cPickle.HIGHEST_PROTOCOL)) y_ = ops_(X) f2 = K.function(X, y_) x = np.random.rand(32, 28, 28) self.assertEqual(np.sum(f1(x) - f2(x)), 0.)
def create(): f = N.Sequence([ N.Conv(8, (3, 3), strides=1, pad='same'), N.Dimshuffle(pattern=(0, 3, 1, 2)), N.FlattenLeft(outdim=2), N.Noise(level=0.3, noise_dims=None, noise_type='gaussian'), N.Dense(128, activation=K.relu), N.Dropout(level=0.3, noise_dims=None), N.Dense(10, activation=K.softmax) ], debug=True) y = f(X) yT = f.T(y) f1 = K.function(X, y) f2 = K.function(X, yT) cPickle.dump(f, open(U.get_modelpath('dummy.ai', override=True), 'w')) _ = f1(x) print(_.shape, _.sum()) _ = f2(x) print(_.shape, _.sum())
def test_slice_ops(self): X = K.placeholder(shape=(None, 28, 28, 28, 3)) f = N.Sequence([ N.Conv(32, 3, pad='same', activation=K.linear), N.BatchNorm(activation=tf.nn.relu), N.Flatten(outdim=4)[:, 8:12, 18:25, 13:], ]) y = f(X) fn = K.function(X, y) self.assertTrue( fn(np.random.rand(12, 28, 28, 28, 3)).shape[1:] == tuple( y.shape.as_list()[1:])) self.assertEqual(y.shape.as_list()[1:], [4, 7, 883])
def test_dense(self): x = K.placeholder((None, 10)) f1 = N.Dense(20) f2 = N.Dense(30) y = f2(f1(x)) y = f1.T(f2.T(y)) f = K.function(x, y) x = f(np.random.rand(12, 10)) self.assertEquals(x.shape, (12, 10)) self.assertEquals(y.shape.as_list(), [None, 10])
def test_computational_graph2(self): np.random.seed(1208) X = K.variable(np.zeros((8, 12)), name='X') Y = K.variable(np.random.rand(12, 8), name='Y') Z = K.placeholder(shape=(8, 8), name='Z') a = K.dot(X, Y) add_roles(a, Auxiliary) a = a + Z g1 = K.ComputationGraph(a) self.assertEqual(len(g1.trainable_variables), 2) self.assertEqual(len(g1.placeholders), 1) self.assertEqual(len(g1.updates), 1) self.assertEqual(len(g1.auxiliary_variables), 1) f = K.function(Z, [a] + g1.auxiliary_variables) output = f(np.random.rand(8, 8)) self.assertEqual(repr(np.sum(output[0]))[:5], "32.20") self.assertEqual(np.sum(output[1]), 0) self.assertEqual(np.unique(K.eval(X)).tolist(), [12.])
def test_cudnn_rnn_backend(self): if get_device() == 'cpu': return print() np.random.seed(1208) batch_size = 25 hidden_size = 12 X_linear = K.placeholder(shape=(None, 8, 32), name='X_linear') X_skip = K.placeholder(shape=(None, 8, 12), name='X_skip') for direction_mode in ['bidirectional', 'unidirectional']: for nb_layers in [1, 2, 3]: for rnn_mode in ['gru', 'lstm', 'rnn_tanh']: for input_mode in ['linear', 'skip']: if input_mode == 'linear': X = X_linear x = np.random.rand(batch_size, 8, 32) else: X = X_skip x = np.random.rand(batch_size, 8, 12) start = timeit.default_timer() y = K.rnn_dnn(X, hidden_size=hidden_size, rnn_mode=rnn_mode, input_mode=input_mode, num_layers=nb_layers, direction_mode=direction_mode) # perform function f = K.function(X, y) output = f(x) benchmark = timeit.default_timer() - start self.assertEqual([list(i.shape) for i in output], [[ batch_size if j is None else j for j in K.get_shape(i) ] for i in y]) print( "*PASSED* [Layers]%s [Mode]%-8s [Input]%-6s [Direction]%s [Benchmark]%.4f" % (nb_layers, rnn_mode, input_mode, direction_mode, benchmark))
X = ds[FEATURE_NAME] indices = ds['indices_%s' % FEATURE_NAME] spkid = ds['spkid'] # =========================================================================== # Load the model # =========================================================================== # ====== load the network ====== # x_vec = N.deserialize(path=MODEL, force_restore_vars=True) # ====== get output tensors ====== # y_logit = x_vec() y_proba = tf.nn.softmax(y_logit) X = K.ComputationGraph(y_proba).placeholders[0] z = K.ComputationGraph(y_proba).get(roles=N.Dense, scope='LatentOutput', beginning_scope=False)[0] f_prob = K.function(inputs=X, outputs=y_proba, training=False) f_z = K.function(inputs=X, outputs=z, training=False) print('Inputs:', ctext(X, 'cyan')) print('Predic:', ctext(y_proba, 'cyan')) print('Latent:', ctext(z, 'cyan')) # =========================================================================== # Helper # =========================================================================== def evaluate_prediction(name_list, y_pred, y_true, title): def _report(y_p, y_t, pad=''): with catch_warnings_ignore(Warning): z_ = np.concatenate(y_p, axis=0) z = np.concatenate(y_t, axis=0) print(pad, '*** %s ***' % ctext('Frame-level', 'lightcyan')) print(pad, "#Samples:", ctext(len(z), 'cyan')) print(pad, "Log loss:", log_loss(y_true=z, y_pred=z_, labels=labels))
def test_cudnn_rnn_nnet(self): if get_device() == 'cpu': return print() np.random.seed(1208) batch_size = 6 hidden_size = 4 X_linear = K.placeholder(shape=(None, 3, 8), name='X_linear') X_skip = K.placeholder(shape=(None, 3, hidden_size), name='X_skip') for direction_mode in ['bidirectional', 'unidirectional']: is_bidirectional = direction_mode == 'bidirectional' for nb_layers in [2]: real_layers = nb_layers * 2 if is_bidirectional else nb_layers for rnn_mode in ['gru', 'lstm', 'rnn_relu', 'rnn_tanh']: for init_state, init_state_name in zip( [ None, # None init K.init.uniform, # function init K.variable( np.random.rand(real_layers, 1, hidden_size)), # variable K.variable( np.random.rand(real_layers, batch_size, hidden_size)), # variable K.zeros(shape=(real_layers, 1, hidden_size)), K.ones(shape=(real_layers, batch_size, hidden_size)) ], [ 'None', 'Function', 'Var1', 'VarB', 'Tensor1', 'TensorB' ]): for input_mode in ['linear', 'skip']: if input_mode == 'linear': X = X_linear x = np.random.rand(batch_size, 3, 8) else: X = X_skip x = np.random.rand(batch_size, 3, hidden_size) start = timeit.default_timer() f = N.CudnnRNN(num_units=hidden_size, rnn_mode=rnn_mode, input_mode=input_mode, num_layers=nb_layers, direction_mode=direction_mode, params_split=False, return_states=True) # perform function y = f(X, h0=init_state, c0=init_state) f = K.function(X, y) output = f(x) benchmark = timeit.default_timer() - start self.assertTrue([list(i.shape) for i in output] == [[ batch_size if j is None else j for j in K.get_shape(i) ] for i in y]) print( "*PASSED* [Layers]%s [Mode]%-8s [Input]%-6s [Direction]%-12s [State]%s [Benchmark]%.4f" % (nb_layers, rnn_mode, input_mode, direction_mode, init_state_name, benchmark))
def test_gru(self): # ====== pre-define parameters ====== # W_in_to_updategate = random(28, 32) W_hid_to_updategate = random(32, 32) b_updategate = random(32) # W_in_to_resetgate = random(28, 32) W_hid_to_resetgate = random(32, 32) b_resetgate = random(32) # W_in_to_hidden_update = random(28, 32) W_hid_to_hidden_update = random(32, 32) b_hidden_update = random(32) # hid_init = random(1, 32) x = random(12, 28, 28) x_mask = np.random.randint(0, 2, size=(12, 28)) # ====== odin ====== # X = K.placeholder(shape=(None, 28, 28), name='X') mask = K.placeholder(shape=(None, 28), name='mask', dtype='int32') f = N.Sequence([ N.Merge([ N.Dense(32, W_init=W_in_to_updategate, b_init=b_updategate, activation=K.linear, name='update'), N.Dense(32, W_init=W_in_to_resetgate, b_init=b_resetgate, activation=K.linear, name='reset'), N.Dense(32, W_init=W_in_to_hidden_update, b_init=b_hidden_update, activation=K.linear, name='hidden') ], merge_function=K.concatenate), N.GRU(32, activation=K.tanh, gate_activation=K.sigmoid, W_hid_init=[ W_hid_to_updategate, W_hid_to_resetgate, W_hid_to_hidden_update ], input_mode='skip') ]) y = f(X, h0=hid_init, mask=mask) f = K.function([X, mask], y) out1 = f(x, x_mask) # ====== lasagne ====== # if get_backend() == 'tensorflow': self.assertTrue(repr(np.sum(out1))[:8] == repr(2490.0596)[:8]) return l = lasagne.layers.InputLayer(shape=(None, 28, 28)) l.input_var = X l_mask = lasagne.layers.InputLayer(shape=(None, 28)) l_mask.input_var = mask l = lasagne.layers.GRULayer( l, num_units=32, updategate=lasagne.layers.Gate( W_cell=None, W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate, nonlinearity=lasagne.nonlinearities.sigmoid), resetgate=lasagne.layers.Gate( W_cell=None, W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate, nonlinearity=lasagne.nonlinearities.sigmoid), hidden_update=lasagne.layers.Gate( W_cell=None, W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update, nonlinearity=lasagne.nonlinearities.tanh), hid_init=hid_init, mask_input=l_mask, precompute_input=True) y = lasagne.layers.get_output(l) f = K.function([X, mask], y) out2 = f(x, x_mask) # ====== test ====== # self.assertAlmostEqual(np.sum(np.abs(out1 - out2)), 0.)
N.Noise(level=1.0, noise_type='gaussian'), N.Dense(num_units=512, activation=K.relu), N.BatchNorm(axes=0, activation=K.relu), N.Dense(num_units=512, activation=K.relu), N.BatchNorm(axes=0, activation=K.relu), N.Dense(num_units=np.prod(input_shape[1:]), activation=K.linear), N.Reshape(shape=([0],) + input_shape[1:]) ], debug=True, name='DecoderNetwork') # =========================================================================== # Create model and objectives # =========================================================================== Z = f_encoder(X) X_logits = f_decoder(Z) X_probas = tf.nn.sigmoid(X_logits) f_X = K.function(inputs=X, outputs=X_probas, training=True) X_samples = f_decoder(tf.random_normal(shape=(25, args.dim), dtype=X_probas.dtype)) f_samples = K.function(inputs=[], outputs=X_samples, training=False) # ====== `distortion` is the negative log likelihood ====== # if args.loss == 'ce': loss = tf.losses.softmax_cross_entropy(onehot_labels=X, logits=X_logits) elif args.loss == 'mse': loss = tf.losses.mean_squared_error(labels=X, predictions=X_probas) elif args.loss == 'huber': loss = tf.losses.huber_loss(labels=X, predictions=X_probas) elif args.loss == 'lglo': loss = tf.losses.log_loss(labels=X, predictions=X_probas) # =========================================================================== # Optimizing the network
K.set_training(True) y_train = f(X) K.set_training(False) y_pred = f(X) cost_train = K.mean(K.categorical_crossentropy(y_train, y_true)) cost_pred = K.mean(K.categorical_accuracy(y_pred, y_true)) cost_eval = K.mean(K.categorical_crossentropy(y_pred, y_true)) parameters = f.parameters print('Parameters:', [p.name for p in parameters]) optz = K.optimizers.RMSProp() updates = optz.get_updates(cost_train, parameters) print("Build training function ...") f_train = K.function([X, y_true], cost_train, updates=updates) print("Build scoring function ...") f_score = K.function([X, y_true], [cost_pred, cost_eval]) # =========================================================================== # Create trainer # =========================================================================== print("Create trainer ...") trainer = training.MainLoop(batch_size=32, seed=12082518, shuffle_level=2) trainer.set_save(utils.get_modelpath('cifar10.ai', override=True), f) trainer.set_task(f_train, [X_learn, y_learn], epoch=25, p=1, name='Train') trainer.set_subtask(f_score, [X_test, y_test], freq=1, name='Valid') trainer.set_callback([ training.ProgressMonitor(name='Train', format='Results: {:.4f}'), training.ProgressMonitor(name='Valid', format='Results: {:.4f},{:.4f}'), # early stop based on crossentropy on test (not a right procedure,
N.Dense(num_units=nb_labels, activation=K.softmax) ], debug=True) y_pred = f(X) params = [p for p in f.parameters if not has_roles(p, EmbeddingWeight)] print('Params:', [p.name for p in params]) cost_train = K.mean(K.categorical_crossentropy(y_pred, y)) cost_score = K.mean(K.categorical_accuracy(y_pred, y)) opt = K.optimizers.RMSProp() updates = opt.get_updates(cost_train, params) print('Build training function ...') f_train = K.function([X, y], cost_train, updates) print('Build scoring function ...') f_score = K.function([X, y], cost_score) trainer = training.MainLoop(batch_size=128, seed=1208, shuffle_level=2) trainer.set_task(f_train, (X_train, y_train), epoch=args['epoch'], name='train') trainer.set_subtask(f_score, (X_valid, y_valid), freq=1., name='valid') trainer.set_callback([ training.ProgressMonitor('train', format='Train:{:.4f}'), training.ProgressMonitor('valid', format='Test:{:.4f}'), training.History() ]) trainer.run()
def train(X, y_true, y_pred, train_data, valid_data=None, valid_freq=1., patience=3, threshold=5, rollback=True, objectives=[tf.losses.softmax_cross_entropy], metrics=[0], training_metrics=[], l1_regu=0., l2_regu=0., parameters=[], prior_weights=None, sample_weights=None, batch_size=256, epochs=8, shuffle=True, optimizer='rmsprop', optz_kwargs={'lr': 0.001}, updates=None, init_vars=True, labels=None, seed=5218, verbose=2): """ Parameters ---------- rollback : bool (default: True) if True, allow rollback to the best checkpoint during training objectives : {callable, tensorflow.Tensor} if `callable`, the function must take `y_true`, and `y_pred` The objectives must be differentiable and used for training. metrics : {callable, tensorflow.Tensor, int} if `callable`, the function must take `y_true`, and `y_pred` The `metrics` is for monitoring the training process. if `int`, it is the index of the loss in `objectives` NOTE: the first metrics in the list will be used for early-stopping (smaller is better). training_metrics : {callable, tensorflow.Tensor, int} if `int`, it is the index of the loss in `metrics` parameters : {list or tensorflow.Variables} All the parameters will be updated by the `optimizer`, if None or empty list is given, use ComputationalGraph to get all variables with Parameters roles related to the objectives init_vars : bool (default: True) automatically initialize all variables labels : {None, list of string} Given labels for classification task seed : int specific random seed for reproducible verbose : int 0 - Turn off all log 1 - only show notification 2 - show notification, important log and summary 3 - Show progress, summary, notification and logging 4 - Show debug information and everything Return ------ Function used for prediction """ from odin import backend as K # ====== preprocess inputs ====== # X = as_tuple(X, t=K.is_tensor) y_true = as_tuple(y_true, t=K.is_tensor) y_pred = as_tuple(y_pred, t=K.is_tensor) # ====== parsing objectives and metrics ====== # # for training prior_weights = _preprocess_prior_weights(y_true=y_true, prior_weights=prior_weights) if prior_weights is not None: if sample_weights is not None: sample_weights = sample_weights + prior_weights else: sample_weights = prior_weights objectives = _preprocessing_losses(as_tuple(objectives), y_true, y_pred, sample_weights=sample_weights) # metrics for monitoring metrics = as_tuple(metrics) get_value = lambda x: np.mean(x) if len(metrics) > 0 and \ (metrics[0] == tf.metrics.accuracy or metrics[0] == K.metrics.categorical_accuracy): get_value = lambda x: 1 - np.mean(x) metrics = _preprocessing_losses(metrics, y_true, y_pred, inherit_losses=objectives) # training_metrics training_metrics = _preprocessing_losses(as_tuple(training_metrics), y_true, y_pred, inherit_losses=metrics) # sum the objectives for differentiable if len(objectives) > 0: objectives = [sum(objectives) if len(objectives) > 1 else objectives[0]] # ====== preprocess optimizer and get updates====== # if updates is None: # not given updates if is_string(optimizer): optimizer = _parse_optimizer(optimizer) optimizer = optimizer(**optz_kwargs) elif not isinstance(optimizer, K.optimizers.Optimizer): raise ValueError("`optimizer` must be string - name of algorithm or instance " "of odin.backend.optimizers.Optimizer") parameters = K.ComputationGraph(objectives).parameters\ if len(parameters) == 0 else as_tuple(parameters, t=K.is_variable) # check objectives if len(objectives) == 0: raise RuntimeError("`objectives` must be given due to `updates=None`") weights = [p for p in parameters if K.role.has_roles(p, roles=K.role.Weight)] # l1 regularization if l1_regu > 0.: l1_norm = sum(tf.norm(w, ord=1) for w in weights) objectives[0] += l1_norm # l2 regularization if l2_regu > 0.: l2_norm = sum(tf.norm(w, ord=2) for w in weights) objectives[0] += l2_norm # update rules updates = optimizer.get_updates(objectives[0], parameters) # adding global norm and learning rate training_metrics.append(optimizer.norm) training_metrics.append(optimizer.lr) elif K.is_operation(updates): # given updates optimizer = None else: raise ValueError("`updates` can be None or tensorflow Operation, but given " "type: %s" % str(type(updates))) # ====== placeholders ====== # inputs_plh = [] for plh in X: for i in (K.ComputationGraph(plh).placeholders if not K.is_placeholder(plh) else as_tuple(plh)): inputs_plh.append(i) outputs_plh = [] for plh in y_true: # no duplicated inputs (e.g. autoencoder X == y) if not K.is_placeholder(plh): plh = K.ComputationGraph(plh).placeholders for i in as_tuple(plh): if i not in inputs_plh: outputs_plh.append(i) inputs = inputs_plh + outputs_plh # ====== initialize variables ====== # if bool(init_vars): K.initialize_all_variables() # ====== creating function ====== # # training function f_train = K.function(inputs=inputs, outputs=objectives + training_metrics, updates=updates, training=True) # scoring function f_score = None if len(metrics) > 0: f_score = K.function(inputs=inputs, outputs=metrics, training=False) # prediction function f_pred = K.function(inputs=inputs_plh, outputs=y_pred[0] if len(y_pred) == 1 else y_pred, training=False) # ====== preprocessing data ====== # train_data, valid_data = _preprocessing_data(train_data, valid_data) # print some debug information if necessary if verbose >= 4: print("%s %s %s" % ( ctext("============", 'cyan'), ctext("Prepare for Training", 'red'), ctext("============", 'cyan'))) print(ctext("Input placeholders:", 'yellow')) for i in inputs_plh: print(" * ", str(i)) print(ctext("Output placeholders:", 'yellow')) for i in outputs_plh: print(" * ", str(i)) print(ctext("Parameters:", 'yellow')) for p in parameters: print(" * ", p.name, '-', p.shape, ';', p.dtype.name) print(ctext("Optimizer:", 'yellow')) print(" * ", str(optimizer)) print(" * Optimizer kwargs:", optz_kwargs) print(" * L1:", l1_regu) print(" * L2:", l2_regu) print(ctext("Training:", 'yellow')) print(" * Valid freq:", valid_freq) print(" * Patience:", patience) print(" * Threshold:", threshold) print(" * Rollback:", rollback) print(" * Batch size:", batch_size) print(" * Epoch:", epochs) print(" * Shuffle:", shuffle) print(" * Seed:", seed) print(ctext("Objectives:", 'yellow')) for o in objectives: print(" * ", str(o)) print(ctext("Weights:", 'yellow')) print(" * Prior:", str(prior_weights)) print(" * Sample:", str(sample_weights)) print(ctext("Metrics:", 'yellow')) for m in metrics: print(" * ", str(m)) print(ctext("Training metrics:", 'yellow')) for t in training_metrics: print(" * ", str(t)) print(ctext("Training Data:", 'yellow'), str(train_data)) print(ctext("Validating Data:", 'yellow'), str(valid_data)) print(ctext("Labels:", 'yellow'), labels) # ====== create trainer ====== # callback_log = True if verbose > 0 else False trainer = MainLoop(batch_size=batch_size, seed=seed if shuffle else None, shuffle_level=2 if shuffle else 0, allow_rollback=rollback, verbose=verbose, labels=labels) trainer.set_checkpoint(path=None, obj=None, variables=parameters) # create callback callbacks = [NaNDetector(patience=patience, log=callback_log)] if valid_data is not None and f_score is not None: callbacks.append( EarlyStopGeneralizationLoss(task_name='valid', output_name=metrics[0], threshold=threshold, patience=patience, log=callback_log, get_value=get_value)) trainer.set_callbacks(callbacks) # set the tasks trainer.set_train_task(func=f_train, data=train_data, epoch=epochs, name='train') if valid_data is not None and f_score is not None: trainer.set_valid_task(func=f_score, data=valid_data, freq=Timer(percentage=valid_freq), name='valid') # running trainer.run() return f_pred
def _initialize(self, X): # ====== check inputs dimensions ====== # if not hasattr(X, 'shape'): raise ValueError("`X` must have `shape` attribute.") feat_dim = np.prod(X.shape[1:]) if self._feat_dim is None: self._feat_dim = feat_dim # validate input dimension if feat_dim != self._feat_dim: raise RuntimeError("Feature dimension mismatch %d and %d" % (feat_dim, self.feat_dim)) # check if tensorflow op initalized if hasattr(self, '_f_train'): return # ====== binary or multi-classes ====== # if self.nb_classes == 2: out_shape = (None,) fn_activation = tf.nn.sigmoid fn_loss = tf.losses.sigmoid_cross_entropy fn_acc = K.metrics.binary_accuracy else: out_shape = (None, self.nb_classes) fn_activation = tf.nn.softmax fn_loss = tf.losses.softmax_cross_entropy fn_acc = K.metrics.categorical_accuracy # ====== create model ====== # with tf.name_scope(self.name, 'logistic_regression'): # inputs self._X = K.placeholder(shape=(None, self.feat_dim), dtype=self.dtype, name='%s_input' % self.name) self._y = K.placeholder(shape=out_shape, dtype=self.dtype, name='%s_output' % self.name) # check the bias if is_number(self.fit_intercept): b_init = float(self.fit_intercept) elif self.fit_intercept is False or \ self.fit_intercept is None: b_init = None else: b_init = self.fit_intercept # create the model and initialize with K.variable_dtype(dtype=self.dtype): self._model = N.Dense(num_units=self.nb_classes, W_init=init_ops.glorot_uniform_initializer(seed=self._rand_state.randint()), b_init=b_init, activation=K.linear) y_logits = self._model(self._X) y_prob = fn_activation(y_logits) # applying class weights class_weights = tf.constant(value=self._class_weight, dtype=self.dtype, name="class_weights") weights = tf.gather(class_weights, tf.cast(self._y, 'int32') if self.nb_classes == 2 else tf.argmax(self._y, axis=-1)) # optimizer params = [v for v in self._model.variables if has_roles(v, Weight) or has_roles(v, Bias)] losses = fn_loss(self._y, y_logits, weights=weights) l1_norm = tf.norm(self._model.get('W'), ord=1) if self.l1 > 0. else 0 l2_norm = tf.norm(self._model.get('W'), ord=2) if self.l2 > 0. else 0 losses = losses + self.l1 * l1_norm + self.l2 * l2_norm acc = fn_acc(self._y, y_prob) updates = self._optimizer.get_updates(losses, params) # create function if self.confusion_matrix: cm = K.metrics.confusion_matrix(y_true=self._y, y_pred=y_prob, labels=self.nb_classes) metrics = [losses, acc, cm] if self.confusion_matrix else [losses, acc] self._f_train = K.function(inputs=(self._X, self._y), outputs=metrics, updates=updates, training=True) self._f_score = K.function(inputs=(self._X, self._y), outputs=metrics, training=False) self._f_pred_prob = K.function(inputs=self._X, outputs=y_prob, training=False) self._f_pred_logit = K.function(inputs=self._X, outputs=y_logits, training=False) return self
def convolutional_vae(X, saved_states, **kwargs): """ convolutional_vae Return ------ [y_encoder, y_decoder] States ------ [f_inference (encoder), f_generative (decoder)] """ n = kwargs.get('n', 10) batch_size = K.get_shape(X)[0] if batch_size is None: raise ValueError("You must specify batch_size dimension for the input placeholder.") # ====== init ====== # if saved_states is None: # Encoder f_inference = N.Sequence([ N.Reshape(shape=(-1, 28, 28, 1)), N.Conv(num_filters=32, filter_size=3, strides=1, pad='valid', b_init=init_ops.constant_initializer(0.), activation=K.elu), N.Conv(num_filters=64, filter_size=5, strides=2, pad='same', b_init=init_ops.constant_initializer(0.), activation=K.elu), N.Dropout(level=0.1), N.Flatten(outdim=2), N.Dense(num_units=n * 2, b_init=None), N.BatchNorm(axes=0) ], debug=True, name='Encoder') # Decoder f_generative = N.Sequence([ N.Dimshuffle(pattern=(0, 'x', 'x', 1)), N.TransposeConv(num_filters=64, filter_size=3, strides=1, pad='valid', b_init=init_ops.constant_initializer(0.), activation=K.elu), N.TransposeConv(num_filters=32, filter_size=5, strides=2, pad='same', b_init=init_ops.constant_initializer(0.), activation=K.elu), N.TransposeConv(num_filters=1, filter_size=13, strides=3, pad='valid', b_init=None), N.BatchNorm(activation=K.linear), N.Flatten(outdim=3) ], debug=True, name="Decoder") else: f_inference, f_generative = saved_states # ====== Perfrom ====== # # Encoder y_encoder = f_inference(K.cast(X, 'float32')) mu = y_encoder[:, :n] sigma = K.softplus(y_encoder[:, n:]) qz = Normal(mu=mu, sigma=sigma, name='Normal_qz') # Decoder z = Normal(mu=K.zeros(shape=(batch_size, n)), sigma=K.ones(shape=(batch_size, n)), name="Normal_pz") logits = f_generative(z) X_reconstruct = Bernoulli(logits=logits) # inference params = f_inference.parameters + f_generative.parameters inference = ed.KLqp(latent_vars={z: qz}, data={X_reconstruct: X}) # ====== get cost for training ====== # # Bind p(x, z) and q(z | x) to the same placeholder for x. if K.is_training(): import tensorflow as tf inference.initialize() if True: optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) updates = optimizer.apply_gradients( optimizer.compute_gradients(inference.loss, var_list=params)) init = tf.global_variables_initializer() init.run() f_train = K.function(X, inference.loss, updates) else: optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer, var_list=params) init = tf.global_variables_initializer() init.run() f_train = lambda x: inference.update(feed_dict={X: x})['loss'] samples = K.sigmoid(logits) return (samples, z, qz), (f_inference, f_generative)
def test_odin_vs_lasagne(self): X1 = K.placeholder(shape=(None, 28, 28)) X2 = K.placeholder(shape=(None, 784)) def lasagne_net1(): "FNN" i = lasagne.layers.InputLayer(shape=(None, 784)) i.input_var = X2 i = lasagne.layers.DenseLayer(i, num_units=32, W=random(784, 32), b=zeros(32), nonlinearity=lasagne.nonlinearities.rectify) i = lasagne.layers.DenseLayer(i, num_units=16, W=random(32, 16), b=zeros(16), nonlinearity=lasagne.nonlinearities.softmax) return X2, lasagne.layers.get_output(i) def odin_net1(): "FNN" f = N.Sequence([ N.Dense(32, W_init=random(784, 32), b_init=zeros(32), activation=K.relu), N.Dense(16, W_init=random(32, 16), b_init=zeros(16), activation=K.softmax) ]) return X2, f(X2) def lasagne_net2(): "CNN" i = lasagne.layers.InputLayer(shape=(None, 28, 28)) i.input_var = X1 i = lasagne.layers.DimshuffleLayer(i, (0, 'x', 1, 2)) i = lasagne.layers.Conv2DLayer(i, 12, (3, 3), stride=(1, 1), pad='same', untie_biases=False, W=random(12, 1, 3, 3), nonlinearity=lasagne.nonlinearities.rectify) i = lasagne.layers.Pool2DLayer(i, pool_size=(2, 2), stride=None, mode='max', ignore_border=True) i = lasagne.layers.Conv2DLayer(i, 16, (3, 3), stride=(1, 1), pad='same', untie_biases=False, W=random(16, 12, 3, 3), nonlinearity=lasagne.nonlinearities.sigmoid) return X1, lasagne.layers.get_output(i) def odin_net2(): "CNN" f = N.Sequence([ N.Dimshuffle((0, 1, 2, 'x')), N.Conv(12, (3, 3), strides=(1, 1), pad='same', untie_biases=False, W_init=random(3, 3, 1, 12), activation=K.relu), N.Pool(pool_size=(2, 2), strides=None, mode='max'), N.Conv(16, (3, 3), strides=(1, 1), pad='same', untie_biases=False, W_init=random(3, 3, 12, 16), activation=K.sigmoid), N.Dimshuffle((0, 3, 1, 2)) ]) return X1, f(X1) def lasagne_net3(): "RNN" i = lasagne.layers.InputLayer(shape=(None, 28, 28)) i.input_var = X1 W = [random(28, 32), random(32, 32), random(32), random_bin(12, 28)] i = lasagne.layers.RecurrentLayer(i, num_units=32, W_in_to_hid=W[0], W_hid_to_hid=W[1], b=W[2], nonlinearity=lasagne.nonlinearities.rectify, hid_init=zeros(1, 32), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False) return X1, lasagne.layers.get_output(i) def odin_net3(): "RNN" W = [random(28, 32), random(32, 32), random(32), random_bin(12, 28)] f = N.Sequence([ N.Dense(num_units=32, W_init=W[0], b_init=W[2], activation=K.linear), N.RNN(num_units=32, activation=K.relu, W_init=W[1]) ]) return X1, f(X1, hid_init=zeros(1, 32)) func_list = [ (lasagne_net1, odin_net1), # (lasagne_net2, odin_net2), (lasagne_net3, odin_net3) ] print() for i, j in func_list: print('Test:', i.__name__, j.__name__) seed = np.random.randint(10e8) # ====== call the function ====== # np.random.seed(seed) i = i() np.random.seed(seed) j = j() # ====== create theano function ====== # f1 = K.function(i[0], i[1]) f2 = K.function(j[0], j[1]) shape = K.get_shape(i[0]) # ====== get the output ====== # x = np.random.rand(*[12 if s is None else s for s in shape]) y1 = f1(x) y2 = f2(x) self.assertEqual(y1.shape, y2.shape) self.assertAlmostEqual(np.sum(np.abs(y1 - y2)), 0.)
X1 = K.placeholder(shape=(10000, 1000), name='X1') X2 = K.placeholder(shape=(10000, 1000), name='X2') X3 = K.placeholder(shape=(10000, 2000), name='X3') y1 = K.placeholder(shape=(1000, 2000), name='y1') y2 = K.placeholder(shape=(2000, 3000), name='y2') y3 = K.placeholder(shape=(3000, 4000), name='y3') y4 = K.placeholder(shape=(4000, 5000), name='y4') z = K.dot(X1, y1) + K.dot(X2, y1) z = K.dot(z, y2) z = K.dot(z, y3) z = K.dot(z, y4) print(z) f = K.function([X1, X2, y1, y2, y3, y4], outputs=z) X1 = X3[:, :1000] X2 = X3[:, 1000:] z1 = K.dot(X1, y1) + K.dot(X2, y1) z1 = K.dot(z1, y2) z1 = K.dot(z1, y3) z1 = K.dot(z1, y4) print(z1) f1 = K.function([X3, y1, y2, y3, y4], outputs=z1) v = [np.random.rand(*i.shape.as_list()) for i in [X1, X2, X3, y1, y2, y3, y4]] f(v[0], v[1], v[3], v[4], v[5], v[6]) f1(v[2], v[3], v[4], v[5], v[6])
def test_lstm(self): W_in_to_ingate = random(28, 32) / 12 W_hid_to_ingate = random(32, 32) / 12 b_ingate = random(32) / 12 W_in_to_forgetgate = random(28, 32) / 12 W_hid_to_forgetgate = random(32, 32) / 12 b_forgetgate = random(32) / 12 W_in_to_cell = random(28, 32) / 12 W_hid_to_cell = random(32, 32) / 12 b_cell = random(32) / 12 W_in_to_outgate = random(28, 32) / 12 W_hid_to_outgate = random(32, 32) / 12 b_outgate = random(32) / 12 W_cell_to_ingate = random(32) / 12 W_cell_to_forgetgate = random(32) / 12 W_cell_to_outgate = random(32) / 12 cell_init = random(1, 32) / 12 hid_init = random(1, 32) / 12 # ====== pre-define parameters ====== # x = random(12, 28, 28) x_mask = np.random.randint(0, 2, size=(12, 28)) # x_mask = np.ones(shape=(12, 28)) # ====== odin ====== # X = K.placeholder(shape=(None, 28, 28), name='X') mask = K.placeholder(shape=(None, 28), name='mask', dtype='int32') f = N.Sequence([ N.Merge([ N.Dense(32, W_init=W_in_to_ingate, b_init=b_ingate, activation=K.linear), N.Dense(32, W_init=W_in_to_forgetgate, b_init=b_forgetgate, activation=K.linear), N.Dense(32, W_init=W_in_to_cell, b_init=b_cell, activation=K.linear), N.Dense(32, W_init=W_in_to_outgate, b_init=b_outgate, activation=K.linear) ], merge_function=K.concatenate), N.LSTM(32, activation=K.tanh, gate_activation=K.sigmoid, W_hid_init=[ W_hid_to_ingate, W_hid_to_forgetgate, W_hid_to_cell, W_hid_to_outgate ], W_peepholes=[ W_cell_to_ingate, W_cell_to_forgetgate, W_cell_to_outgate ], input_mode='skip', name='lstm') ]) y = f(X, h0=hid_init, c0=cell_init, mask=mask) f = K.function([X, mask], y) out1 = f(x, x_mask) # ====== lasagne ====== # if get_backend() == 'tensorflow': self.assertTrue(repr(np.sum(out1))[:4] == repr(43.652363)[:4]) return l = lasagne.layers.InputLayer(shape=(None, 28, 28)) l.input_var = X l_mask = lasagne.layers.InputLayer(shape=(None, 28)) l_mask.input_var = mask l = lasagne.layers.LSTMLayer( l, num_units=32, ingate=lasagne.layers.Gate( nonlinearity=lasagne.nonlinearities.sigmoid, W_in=W_in_to_ingate, W_hid=W_hid_to_ingate, W_cell=W_cell_to_ingate, b=b_ingate), forgetgate=lasagne.layers.Gate( nonlinearity=lasagne.nonlinearities.sigmoid, W_in=W_in_to_forgetgate, W_hid=W_hid_to_forgetgate, W_cell=W_cell_to_forgetgate, b=b_forgetgate), cell=lasagne.layers.Gate(nonlinearity=lasagne.nonlinearities.tanh, W_in=W_in_to_cell, W_hid=W_hid_to_cell, W_cell=None, b=b_cell), outgate=lasagne.layers.Gate( nonlinearity=lasagne.nonlinearities.sigmoid, W_in=W_in_to_outgate, W_hid=W_hid_to_outgate, W_cell=W_cell_to_outgate, b=b_outgate), nonlinearity=lasagne.nonlinearities.tanh, cell_init=cell_init, hid_init=hid_init, mask_input=l_mask, precompute_input=True, backwards=False) y = lasagne.layers.get_output(l) f = K.function([X, mask], y) out2 = f(x, x_mask) # ====== test ====== # self.assertAlmostEqual(np.sum(np.abs(out1 - out2)), 0.)
N.Flatten(outdim=2), N.Dense(num_units=128, activation=K.relu), N.Dense(num_units=nb_labels, activation=K.softmax) ], debug=True) y_pred = f(X) params = [p for p in f.parameters if not has_roles(p, EmbeddingWeight)] print('Params:', [p.name for p in params]) cost_train = K.mean(K.categorical_crossentropy(y_pred, y)) cost_score = K.mean(K.categorical_accuracy(y_pred, y)) opt = K.optimizers.RMSProp() updates = opt.get_updates(cost_train, params) print('Build training function ...') f_train = K.function([X, y], cost_train, updates) print('Build scoring function ...') f_score = K.function([X, y], cost_score) trainer = training.MainLoop(batch_size=128, seed=1208, shuffle_level=2) trainer.set_task(f_train, (X_train, y_train), epoch=args['epoch'], name='train') trainer.set_subtask(f_score, (X_valid, y_valid), freq=1., name='valid') trainer.set_callback([ training.ProgressMonitor('train', format='Train:{:.4f}'), training.ProgressMonitor('valid', format='Test:{:.4f}'), training.History() ]) trainer.run()
K.variable(np.arange(1200, 2400).reshape(-1, 2)) ] outputs_info = K.zeros(shape=(1200,)) X = np.random.rand(600, 3000) # ====== tf.scan ====== # y = Scan2(doit, sequences=sequences, outputs_info=outputs_info, n_steps=None, backwards=True, name=None) print('Scan:') with utils.UnitTimer(): f2 = K.function(sequences[0], y) with utils.UnitTimer(12): for i in range(12): _ = f2(X) print(np.sum(_)) # ====== unroll ====== # y = Scan1(doit, sequences=sequences, outputs_info=outputs_info, n_steps=None, backwards=True, name=None) print('Unroll:') with utils.UnitTimer(): f1 = K.function(sequences[0], y) with utils.UnitTimer(12):
z = K.ComputationGraph(y_proba).get(roles=N.Dense, scope='LatentOutput', beginning_scope=False)[0] print('Latent space:', ctext(z, 'cyan')) # ====== create loss ====== # ce = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=y_logit) acc = K.metrics.categorical_accuracy(y_true=y, y_pred=y_proba) # ====== params and optimizing ====== # updates = K.optimizers.Adam(lr=0.0001, name='XAdam').minimize( loss=ce, roles=[K.role.TrainableParameter], exclude_roles=[K.role.InitialState], verbose=True) K.initialize_all_variables() # # ====== Functions ====== # print('Building training functions ...') f_train = K.function(inputs, [ce, acc], updates=updates, training=True) print('Building testing functions ...') f_score = K.function(inputs, [ce, acc], training=False) # Latent spaces f_z = K.function(inputs=X, outputs=z, training=False) # =========================================================================== # Create trainer # =========================================================================== if TRAIN_MODEL: print('Start training ...') task = training.MainLoop(batch_size=args.batch, seed=120825, shuffle_level=2, allow_rollback=True) task.set_checkpoint(MODEL_PATH, x_vec) task.set_callbacks([ training.NaNDetector(),
def test_odin_vs_lasagne(self): X1 = K.placeholder(shape=(None, 28, 28)) X2 = K.placeholder(shape=(None, 784)) def lasagne_net1(): "FNN" i = lasagne.layers.InputLayer(shape=(None, 784)) i.input_var = X2 i = lasagne.layers.DenseLayer(i, num_units=32, W=random(784, 32), b=zeros(32), nonlinearity=lasagne.nonlinearities.rectify) i = lasagne.layers.DenseLayer(i, num_units=16, W=random(32, 16), b=zeros(16), nonlinearity=lasagne.nonlinearities.softmax) return X2, lasagne.layers.get_output(i) def odin_net1(): "FNN" f = N.Sequence([ N.Dense(32, W_init=random(784, 32), b_init=zeros(32), activation=K.relu), N.Dense(16, W_init=random(32, 16), b_init=zeros(16), activation=K.softmax) ]) return X2, f(X2) def lasagne_net2(): "CNN" i = lasagne.layers.InputLayer(shape=(None, 28, 28)) i.input_var = X1 i = lasagne.layers.DimshuffleLayer(i, (0, 'x', 1, 2)) i = lasagne.layers.Conv2DLayer(i, 12, (3, 3), stride=(1, 1), pad='same', untie_biases=False, W=random(12, 1, 3, 3), nonlinearity=lasagne.nonlinearities.rectify) i = lasagne.layers.Pool2DLayer(i, pool_size=(2, 2), stride=None, mode='max', ignore_border=True) i = lasagne.layers.Conv2DLayer(i, 16, (3, 3), stride=(1, 1), pad='same', untie_biases=False, W=random(16, 12, 3, 3), nonlinearity=lasagne.nonlinearities.sigmoid) return X1, lasagne.layers.get_output(i) def odin_net2(): "CNN" f = N.Sequence([ N.Dimshuffle((0, 1, 2, 'x')), N.Conv(12, (3, 3), strides=(1, 1), pad='same', untie_biases=False, W_init=random(3, 3, 1, 12), activation=K.relu), N.Pool(pool_size=(2, 2), strides=None, mode='max', ignore_border=True), N.Conv(16, (3, 3), strides=(1, 1), pad='same', untie_biases=False, W_init=random(3, 3, 12, 16), activation=K.sigmoid), N.Dimshuffle((0, 3, 1, 2)) ]) return X1, f(X1) def lasagne_net3(): "RNN" i = lasagne.layers.InputLayer(shape=(None, 28, 28)) i.input_var = X1 W = [random(28, 32), random(32, 32), random(32), random_bin(12, 28)] i = lasagne.layers.RecurrentLayer(i, num_units=32, W_in_to_hid=W[0], W_hid_to_hid=W[1], b=W[2], nonlinearity=lasagne.nonlinearities.rectify, hid_init=zeros(1, 32), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False) return X1, lasagne.layers.get_output(i) def odin_net3(): "RNN" W = [random(28, 32), random(32, 32), random(32), random_bin(12, 28)] f = N.Sequence([ N.Dense(num_units=32, W_init=W[0], b_init=W[2], activation=K.linear), N.RNN(num_units=32, activation=K.relu, W_init=W[1]) ]) return X1, f(X1, hid_init=zeros(1, 32)) func_list = [ (lasagne_net1, odin_net1), # (lasagne_net2, odin_net2), (lasagne_net3, odin_net3) ] print() for i, j in func_list: print('Test:', i.__name__, j.__name__) seed = np.random.randint(10e8) # ====== call the function ====== # np.random.seed(seed) i = i() np.random.seed(seed) j = j() # ====== create theano function ====== # f1 = K.function(i[0], i[1]) f2 = K.function(j[0], j[1]) shape = K.get_shape(i[0]) # ====== get the output ====== # x = np.random.rand(*[12 if s is None else s for s in shape]) y1 = f1(x) y2 = f2(x) self.assertEqual(y1.shape, y2.shape) self.assertAlmostEqual(np.sum(np.abs(y1 - y2)), 0.)
scope='LatentOutput', beginning_scope=False)[0] print('Latent space:', ctext(z, 'cyan')) # ====== create loss ====== # ce = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=y_logit) acc = K.metrics.categorical_accuracy(y_true=y, y_pred=y_proba) # ====== params and optimizing ====== # updates = K.optimizers.Adam(lr=0.0001, name='XAdam').minimize( loss=ce, roles=[K.role.TrainableParameter], exclude_roles=[K.role.InitialState], verbose=True) K.initialize_all_variables() # # ====== Functions ====== # print('Building training functions ...') f_train = K.function(inputs, [ce, acc], updates=updates, training=True) print('Building testing functions ...') f_score = K.function(inputs, [ce, acc], training=False) # Latent spaces f_z = K.function(inputs=X, outputs=z, training=False) # =========================================================================== # Create trainer # =========================================================================== if TRAIN_MODEL: print('Start training ...') task = training.MainLoop(batch_size=args.batch, seed=120825, shuffle_level=2, allow_rollback=True) task.set_checkpoint(MODEL_PATH, x_vec) task.set_callbacks([
beginning_scope=False)[0] print('Latent space:', ctext([z1, z2, z3], 'cyan')) # ====== create loss ====== # ce = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=y_logit) acc = K.metrics.categorical_accuracy(y_true=y, y_pred=y_proba) cm = K.metrics.confusion_matrix(y_true=y, y_pred=y_proba, labels=len(labels)) # ====== params and optimizing ====== # updates = K.optimizers.Adam(lr=0.001).minimize( loss=ce, roles=[K.role.TrainableParameter], exclude_roles=[K.role.InitialState], verbose=True) K.initialize_all_variables() # ====== Functions ====== # print('Building training functions ...') f_train = K.function(inputs, [ce, acc, cm], updates=updates, training=True) print('Building testing functions ...') f_score = K.function(inputs, [ce, acc, cm], training=False) print('Building predicting functions ...') f_pred_proba = K.function(X, y_proba, training=False) # Latent spaces f_z1 = K.function(inputs=X, outputs=z1, training=False) f_z2 = K.function(inputs=X, outputs=z2, training=False) f_z3 = K.function(inputs=X, outputs=z3, training=False) # =========================================================================== # Training # =========================================================================== print('Start training ...') task = training.MainLoop(batch_size=args.batch, seed=1234, shuffle_level=2,
outputs = model(*inputs) # ====== create losses ====== # ce = tf.losses.softmax_cross_entropy(inputs[-1], outputs['logit']) acc = K.metrics.categorical_accuracy(outputs['prob'], inputs[-1]) cm = K.metrics.confusion_matrix(y_pred=outputs['prob'], y_true=inputs[-1], labels=nb_labels) # ====== create optimizer ====== # optz = K.optimizers.Adam(lr=LEARNING_RATE) parameters = model.parameters print("#Parameters:", len(parameters)) updates = optz(ce, parameters) K.initialize_all_variables() # ====== function ====== # print('Building training functions ...') f_train = K.function(inputs, [ce, optz.norm, cm], updates=updates, training=True) print('Building testing functions ...') f_test = K.function(inputs, [ce, acc, cm], training=False) print('Building predicting functions ...') f_pred = K.function(inputs[0], outputs['prob'], training=False) # =========================================================================== # Build trainer # =========================================================================== # ====== spliting the data ====== # idx = np.arange(len(X_train), dtype='int32') idx_train, idx_valid = train_valid_test_split(idx, train=0.8, inc_test=False, seed=1234) X_valid = X_train[idx_valid] y_valid = y_train[idx_valid] X_train = X_train[idx_train] y_train = y_train[idx_train]
z3 = K.ComputationGraph(y_proba).get(scope='LatentDense', beginning_scope=False)[0] print('Latent space:', ctext([z1, z2, z3], 'cyan')) # ====== create loss ====== # ce = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=y_logit) acc = K.metrics.categorical_accuracy(y_true=y, y_pred=y_proba) cm = K.metrics.confusion_matrix(y_true=y, y_pred=y_proba, labels=len(labels)) # ====== params and optimizing ====== # updates = K.optimizers.Adam(lr=0.001).minimize( loss=ce, roles=[K.role.TrainableParameter], exclude_roles=[K.role.InitialState], verbose=True) K.initialize_all_variables() # ====== Functions ====== # print('Building training functions ...') f_train = K.function(inputs, [ce, acc, cm], updates=updates, training=True) print('Building testing functions ...') f_score = K.function(inputs, [ce, acc, cm], training=False) print('Building predicting functions ...') f_pred_proba = K.function(X, y_proba, training=False) # Latent spaces f_z1 = K.function(inputs=X, outputs=z1, training=False) f_z2 = K.function(inputs=X, outputs=z2, training=False) f_z3 = K.function(inputs=X, outputs=z3, training=False) # =========================================================================== # Training # =========================================================================== print('Start training ...') task = training.MainLoop(batch_size=args.batch, seed=120825, shuffle_level=2, allow_rollback=True, labels=labels)
# ====== applying the NNOps ====== # y_pred = ops(X) if arg.rnn: loss = tf.losses.softmax_cross_entropy(y_onehot, ops(X, training=True)) else: loss = tf.losses.softmax_cross_entropy(y_onehot, y_pred) acc = K.metrics.categorical_accuracy(y, y_pred, name="Acc") cm = K.metrics.confusion_matrix(y_pred=y_pred, y_true=y, labels=10) # ====== optimizer ====== # optimizer = K.optimizers.Adam(lr=0.001) updates = optimizer.minimize(loss, verbose=True) # ====== initialize all variable ====== # K.initialize_all_variables() # ====== function ====== # print('Building training functions ...') f_train = K.function([X, y], [loss, optimizer.norm, cm], updates=updates, training=True) print('Building testing functions ...') f_test = K.function([X, y], [loss, acc, cm], training=False) print('Building predicting functions ...') f_pred = K.function(X, y_pred, training=False) # =========================================================================== # Build trainer # =========================================================================== print('Start training ...') # ====== some configurations ====== # model_save_path = '/tmp/EXP_MNIST' if os.path.exists(model_save_path): shutil.rmtree(model_save_path) os.mkdir(model_save_path) print("Save path:", model_save_path)
N.Dense(10, activation=K.softmax) ]) ops = cPickle.loads(cPickle.dumps(ops)) # test if the ops is pickle-able y_pred_train = ops(X_train) y_pred_score = ops(X_score) cost_train = K.mean(K.categorical_crossentropy(y_pred_train, y)) cost_test_1 = K.mean(K.categorical_crossentropy(y_pred_score, y)) cost_test_2 = K.mean(K.categorical_accuracy(y_pred_score, y)) cost_test_3 = K.confusion_matrix(y_pred_score, y, labels=range(10)) parameters = ops.parameters optimizer = K.optimizers.RMSProp(lr= 0.0001, clipnorm=100.) updates = optimizer(cost_train, parameters) print('Building training functions ...') f_train = K.function([X_train, y], [cost_train, optimizer.norm], updates=updates) print('Building testing functions ...') f_test = K.function([X_score, y], [cost_test_1, cost_test_2, cost_test_3]) # ====== normalize 0-1 ====== # if False: print('Normalized data in range [0-1]') X_train = ds['X_train'][:] X_train = (X_train - np.min(X_train, 0)) / (np.max(X_train) - np.min(X_train)) X_test = ds['X_test'][:] X_test = (X_test - np.min(X_test, 0)) / (np.max(X_test) - np.min(X_test)) # ====== Gaussian normalize ====== # else: print('Gaussian normalized the data') X_train = ds['X_train'][:] X_train = (X_train - np.mean(X_train, 0)) / (np.std(X_train))