def prepare(self): # Output layers self.output_layer = Chain(self.input_dim).stack( Dense(self.output_size * self.class_size)) self.softmax_layer = Softmax().initialize(input_dim=self.output_size) self.class_layer = Chain(self.input_dim).stack(Dense(self.class_size), Softmax3D()) self.register_inner_layers(self.class_layer, self.output_layer) # Target tensor self.target_tensor = T.imatrix('target') self.register_external_targets(self.target_tensor) # arange cache self.arange_cache = theano.shared(np.arange(10 * 64), name="arange_cache")
def prepare(self): # Output layers self.output_layer = Chain(self.input_dim).stack(Dense(self.output_size * self.class_size)) self.softmax_layer = Softmax().initialize(input_dim=self.output_size) self.class_layer = Chain(self.input_dim).stack(Dense(self.class_size), Softmax3D()) self.register_inner_layers(self.class_layer, self.output_layer) # Target tensor self.target_tensor = T.imatrix("target") self.register_external_targets(self.target_tensor) # arange cache self.arange_cache = theano.shared(np.arange(10 * 64), name="arange_cache")
class ClassOutputLayer(NeuralLayer): def __init__(self, output_size, class_size): super(ClassOutputLayer, self).__init__("class_output") self.output_size = output_size self.class_size = class_size def prepare(self): # Output layers self.output_layer = Chain(self.input_dim).stack( Dense(self.output_size * self.class_size)) self.softmax_layer = Softmax().initialize(input_dim=self.output_size) self.class_layer = Chain(self.input_dim).stack(Dense(self.class_size), Softmax3D()) self.register_inner_layers(self.class_layer, self.output_layer) # Target tensor self.target_tensor = T.imatrix('target') self.register_external_targets(self.target_tensor) # arange cache self.arange_cache = theano.shared(np.arange(10 * 64), name="arange_cache") def compute_tensor(self, x): """ :param x: (batch, time, vec) """ # Target class class_matrix = self.target_tensor // self.output_size class_vector = class_matrix.reshape((-1, )) # Target index target_matrix = self.target_tensor % self.output_size target_vector = target_matrix.reshape((-1, )) # Input matrix input_matrix = x.reshape((-1, self.input_dim)) # Output matrix output_tensor3d = self.output_layer.compute_tensor(x) output_matrix = output_tensor3d.reshape( (-1, self.class_size, self.output_size)) arange_vec = self.arange_cache[:output_matrix.shape[0]] sub_output_matrix = output_matrix[arange_vec, class_vector] # Softmax softmax_output_matrix = self.softmax_layer.compute_tensor( sub_output_matrix) # Class prediction class_output_matrix = self.class_layer.compute_tensor(x) # Costs output_cost = LMCost(softmax_output_matrix, target_vector).get() class_cost = LMCost(class_output_matrix, class_matrix).get() final_cost = output_cost + class_cost return final_cost
def run(method, model_path): model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(128, 'relu'), Dense(128, 'relu'), Dense(10, 'linear'), Softmax()) trainer = ScipyTrainer(model, method) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=100) trainer.run(mnist, epoch_controllers=[annealer]) model.save_params(model_path)
class ClassOutputLayer(NeuralLayer): def __init__(self, output_size, class_size): super(ClassOutputLayer, self).__init__("class_output") self.output_size = output_size self.class_size = class_size def setup(self): # Output layers self.output_layer = Chain(self.input_dim).stack(Dense(self.output_size * self.class_size)) self.softmax_layer = Softmax().connect(input_dim=self.output_size) self.class_layer = Chain(self.input_dim).stack(Dense(self.class_size), Softmax3D()) self.register_inner_layers(self.class_layer, self.output_layer) # Target tensor self.target_tensor = T.imatrix('target') self.register_external_targets(self.target_tensor) # arange cache self.arange_cache = theano.shared(np.arange(10*64), name="arange_cache") def output(self, x): """ :param x: (batch, time, vec) """ # Target class class_matrix = self.target_tensor // self.output_size class_vector = class_matrix.reshape((-1,)) # Target index target_matrix = self.target_tensor % self.output_size target_vector = target_matrix.reshape((-1,)) # Input matrix input_matrix = x.reshape((-1, self.input_dim)) # Output matrix output_tensor3d = self.output_layer.output(x) output_matrix = output_tensor3d.reshape((-1, self.class_size, self.output_size)) arange_vec = self.arange_cache[:output_matrix.shape[0]] sub_output_matrix = output_matrix[arange_vec, class_vector] # Softmax softmax_output_matrix = self.softmax_layer.output(sub_output_matrix) # Class prediction class_output_matrix = self.class_layer.output(x) # Costs output_cost = LMCost(softmax_output_matrix, target_vector).get() class_cost = LMCost(class_output_matrix, class_matrix).get() final_cost = output_cost + class_cost return final_cost
def run(initializer, model_path): model = NeuralClassifier(input_dim=28 * 28) for _ in range(6): model.stack(Dense(128, 'relu', init=initializer)) model.stack(Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer(trainer) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(model_path)
def setup(self): """ All codes that create parameters should be put into 'setup' function. """ self.output_dim = 10 self.encoder = Chain(self.input_dim).stack( Dense(self.internal_layer_size, 'tanh')) self.decoder = Chain(self.internal_layer_size).stack( Dense(self.input_dim)) self.classifier = Chain(self.internal_layer_size).stack( Dense(50, 'tanh'), Dense(self.output_dim), Softmax()) self.register_inner_layers(self.encoder, self.decoder, self.classifier) self.target_input = T.ivector('target') self.register_external_inputs(self.target_input)
default_model = os.path.join(os.path.dirname(__file__), "models", "deep_conv.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack( # Reshape to 3D tensor Reshape((-1, 28, 28)), # Add a new dimension for convolution DimShuffle((0, 'x', 1, 2)), Convolution((4, 1, 5, 5), activation="relu"), Dropout(0.15), Convolution((8, 4, 5, 5), activation="relu"), Dropout(0.1), Convolution((16, 8, 3, 3), activation="relu"), Flatten(), Dropout(0.1), # As dimension information was lost, reveal it to the pipe line RevealDimension(16), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(default_model)
Classify MNIST digits using a very deep think network. Plain deep networks are very hard to be trained, as shown in this case. But we should notice that if highway layers just learn to pass information forward, in other words, just be transparent layers, then they would be meaningless. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import MomentumTrainer, LearningRateAnnealer model_path = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) for _ in range(20): model.stack(Dense(71, 'relu')) model.stack(Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[LearningRateAnnealer()]) model.save_params(model_path)
#!/usr/bin/env python # -*- coding: utf-8 -*- """ For reference, this model should achieve 1.50% error rate, in 10 mins with i7 CPU (8 threads). """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax, Dropout from deepy.trainers import MomentumTrainer, LearningRateAnnealer default_model = os.path.join(os.path.dirname(__file__), "models", "mlp_dropout1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(256, 'relu'), Dropout(0.5), Dense(256, 'relu'), Dropout(0.5), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, epoch_controllers=[annealer]) model.save_params(default_model)
#!/usr/bin/env python # -*- coding: utf-8 -*- import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax, Dropout, PRelu from deepy.trainers import MomentumTrainer, LearningRateAnnealer default_model = os.path.join(os.path.dirname(__file__), "models", "mlp_prelu_dropout1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(256, 'linear'), PRelu(), Dropout(0.2), Dense(256, 'linear'), PRelu(), Dropout(0.2), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(default_model)
batch_set = MiniBatches(dataset) if __name__ == '__main__': model = NeuralClassifier(input_dim=26, input_tensor=3) model.stack( RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.1), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.3), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.6), RNN(hidden_size=30, input_type="sequence", output_type="one", vector_core=0.9), Dense(4), Softmax()) trainer = SGDTrainer(model) annealer = LearningRateAnnealer() trainer.run(batch_set.train_set(), batch_set.valid_set(), controllers=[annealer])
def clip_param_norm(): for param in model.parameters: if param.name.startswith("W"): l2_norms = np.sqrt( np.sum(param.get_value()**2, axis=0, keepdims=True)) desired_norms = np.clip(l2_norms, 0, L2NORM_LIMIT) scale = (desired_norms + EPSILON) / (l2_norms + EPSILON) param.set_value(param.get_value() * scale) if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.training_callbacks.append(clip_param_norm) model.stack(Dropout(0.2), Maxout(240, num_pieces=5, init=UniformInitializer(.005)), Maxout(240, num_pieces=5, init=UniformInitializer(.005)), Dense(10, 'linear', init=UniformInitializer(.005)), Softmax()) trainer = MomentumTrainer(model, { "learning_rate": graph.shared(0.01), "momentum": 0.5 }) annealer = ExponentialLearningRateAnnealer(debug=True) mnist = MiniBatches(MnistDataset(), batch_size=100) trainer.run(mnist, epoch_controllers=[annealer]) model.save_params(default_model)
#!/usr/bin/env python # -*- coding: utf-8 -*- """ This experiment setting is described in http://arxiv.org/pdf/1502.03167v3.pdf. MNIST MLP baseline model. Gaussian initialization described in the paper did not convergence, I have no idea. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import SGDTrainer default_model = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(10, 'linear'), Softmax()) trainer = SGDTrainer(model) batches = MiniBatches(MnistDataset(), batch_size=60) trainer.run(batches, controllers=[]) model.save_params(default_model)