示例#1
0
    def prepare(self):
        # Output layers
        self.output_layer = Chain(self.input_dim).stack(
            Dense(self.output_size * self.class_size))
        self.softmax_layer = Softmax().initialize(input_dim=self.output_size)

        self.class_layer = Chain(self.input_dim).stack(Dense(self.class_size),
                                                       Softmax3D())
        self.register_inner_layers(self.class_layer, self.output_layer)
        # Target tensor
        self.target_tensor = T.imatrix('target')
        self.register_external_targets(self.target_tensor)
        # arange cache
        self.arange_cache = theano.shared(np.arange(10 * 64),
                                          name="arange_cache")
示例#2
0
文件: layers.py 项目: robi56/deepy
    def prepare(self):
        # Output layers
        self.output_layer = Chain(self.input_dim).stack(Dense(self.output_size * self.class_size))
        self.softmax_layer = Softmax().initialize(input_dim=self.output_size)

        self.class_layer = Chain(self.input_dim).stack(Dense(self.class_size), Softmax3D())
        self.register_inner_layers(self.class_layer, self.output_layer)
        # Target tensor
        self.target_tensor = T.imatrix("target")
        self.register_external_targets(self.target_tensor)
        # arange cache
        self.arange_cache = theano.shared(np.arange(10 * 64), name="arange_cache")
示例#3
0
class ClassOutputLayer(NeuralLayer):
    def __init__(self, output_size, class_size):
        super(ClassOutputLayer, self).__init__("class_output")
        self.output_size = output_size
        self.class_size = class_size

    def prepare(self):
        # Output layers
        self.output_layer = Chain(self.input_dim).stack(
            Dense(self.output_size * self.class_size))
        self.softmax_layer = Softmax().initialize(input_dim=self.output_size)

        self.class_layer = Chain(self.input_dim).stack(Dense(self.class_size),
                                                       Softmax3D())
        self.register_inner_layers(self.class_layer, self.output_layer)
        # Target tensor
        self.target_tensor = T.imatrix('target')
        self.register_external_targets(self.target_tensor)
        # arange cache
        self.arange_cache = theano.shared(np.arange(10 * 64),
                                          name="arange_cache")

    def compute_tensor(self, x):
        """
        :param x: (batch, time, vec)
        """
        # Target class
        class_matrix = self.target_tensor // self.output_size
        class_vector = class_matrix.reshape((-1, ))
        # Target index
        target_matrix = self.target_tensor % self.output_size
        target_vector = target_matrix.reshape((-1, ))
        # Input matrix
        input_matrix = x.reshape((-1, self.input_dim))
        # Output matrix
        output_tensor3d = self.output_layer.compute_tensor(x)
        output_matrix = output_tensor3d.reshape(
            (-1, self.class_size, self.output_size))
        arange_vec = self.arange_cache[:output_matrix.shape[0]]
        sub_output_matrix = output_matrix[arange_vec, class_vector]
        # Softmax
        softmax_output_matrix = self.softmax_layer.compute_tensor(
            sub_output_matrix)
        # Class prediction
        class_output_matrix = self.class_layer.compute_tensor(x)
        # Costs
        output_cost = LMCost(softmax_output_matrix, target_vector).get()
        class_cost = LMCost(class_output_matrix, class_matrix).get()
        final_cost = output_cost + class_cost

        return final_cost
示例#4
0
def run(method, model_path):
    model = NeuralClassifier(input_dim=28 * 28)
    model.stack(Dense(128, 'relu'), Dense(128, 'relu'), Dense(10, 'linear'),
                Softmax())

    trainer = ScipyTrainer(model, method)

    annealer = LearningRateAnnealer()

    mnist = MiniBatches(MnistDataset(), batch_size=100)

    trainer.run(mnist, epoch_controllers=[annealer])

    model.save_params(model_path)
示例#5
0
文件: layers.py 项目: 52nlp/deepy
class ClassOutputLayer(NeuralLayer):

    def __init__(self, output_size, class_size):
        super(ClassOutputLayer, self).__init__("class_output")
        self.output_size = output_size
        self.class_size = class_size

    def setup(self):
        # Output layers
        self.output_layer = Chain(self.input_dim).stack(Dense(self.output_size * self.class_size))
        self.softmax_layer = Softmax().connect(input_dim=self.output_size)

        self.class_layer = Chain(self.input_dim).stack(Dense(self.class_size),
                                                        Softmax3D())
        self.register_inner_layers(self.class_layer, self.output_layer)
        # Target tensor
        self.target_tensor = T.imatrix('target')
        self.register_external_targets(self.target_tensor)
        # arange cache
        self.arange_cache = theano.shared(np.arange(10*64), name="arange_cache")


    def output(self, x):
        """
        :param x: (batch, time, vec)
        """
        # Target class
        class_matrix = self.target_tensor // self.output_size
        class_vector = class_matrix.reshape((-1,))
        # Target index
        target_matrix = self.target_tensor % self.output_size
        target_vector = target_matrix.reshape((-1,))
        # Input matrix
        input_matrix = x.reshape((-1, self.input_dim))
        # Output matrix
        output_tensor3d = self.output_layer.output(x)
        output_matrix = output_tensor3d.reshape((-1, self.class_size, self.output_size))
        arange_vec = self.arange_cache[:output_matrix.shape[0]]
        sub_output_matrix = output_matrix[arange_vec, class_vector]
        # Softmax
        softmax_output_matrix = self.softmax_layer.output(sub_output_matrix)
        # Class prediction
        class_output_matrix = self.class_layer.output(x)
        # Costs
        output_cost = LMCost(softmax_output_matrix, target_vector).get()
        class_cost = LMCost(class_output_matrix, class_matrix).get()
        final_cost = output_cost + class_cost

        return final_cost
示例#6
0
文件: util.py 项目: slorr80/deepy
def run(initializer, model_path):
    model = NeuralClassifier(input_dim=28 * 28)
    for _ in range(6):
        model.stack(Dense(128, 'relu', init=initializer))
    model.stack(Dense(10, 'linear'), Softmax())

    trainer = MomentumTrainer(model)

    annealer = LearningRateAnnealer(trainer)

    mnist = MiniBatches(MnistDataset(), batch_size=20)

    trainer.run(mnist, controllers=[annealer])

    model.save_params(model_path)
示例#7
0
    def setup(self):
        """
        All codes that create parameters should be put into 'setup' function.
        """
        self.output_dim = 10
        self.encoder = Chain(self.input_dim).stack(
            Dense(self.internal_layer_size, 'tanh'))
        self.decoder = Chain(self.internal_layer_size).stack(
            Dense(self.input_dim))
        self.classifier = Chain(self.internal_layer_size).stack(
            Dense(50, 'tanh'), Dense(self.output_dim), Softmax())

        self.register_inner_layers(self.encoder, self.decoder, self.classifier)

        self.target_input = T.ivector('target')
        self.register_external_inputs(self.target_input)
示例#8
0
default_model = os.path.join(os.path.dirname(__file__), "models",
                             "deep_conv.gz")

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=28 * 28)
    model.stack(  # Reshape to 3D tensor
        Reshape((-1, 28, 28)),
        # Add a new dimension for convolution
        DimShuffle((0, 'x', 1, 2)),
        Convolution((4, 1, 5, 5), activation="relu"),
        Dropout(0.15),
        Convolution((8, 4, 5, 5), activation="relu"),
        Dropout(0.1),
        Convolution((16, 8, 3, 3), activation="relu"),
        Flatten(),
        Dropout(0.1),
        # As dimension information was lost, reveal it to the pipe line
        RevealDimension(16),
        Dense(10, 'linear'),
        Softmax())

    trainer = MomentumTrainer(model)

    annealer = LearningRateAnnealer()

    mnist = MiniBatches(MnistDataset(), batch_size=20)

    trainer.run(mnist, controllers=[annealer])

    model.save_params(default_model)
示例#9
0
Classify MNIST digits using a very deep think network.
Plain deep networks are very hard to be trained, as shown in this case.

But we should notice that if highway layers just learn to pass information forward,
in other words, just be transparent layers, then they would be meaningless.
"""

import logging, os
logging.basicConfig(level=logging.INFO)

from deepy.dataset import MnistDataset, MiniBatches
from deepy.networks import NeuralClassifier
from deepy.layers import Dense, Softmax
from deepy.trainers import MomentumTrainer, LearningRateAnnealer

model_path = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz")

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=28 * 28)
    for _ in range(20):
        model.stack(Dense(71, 'relu'))
    model.stack(Dense(10, 'linear'), Softmax())

    trainer = MomentumTrainer(model)

    mnist = MiniBatches(MnistDataset(), batch_size=20)

    trainer.run(mnist, controllers=[LearningRateAnnealer()])

    model.save_params(model_path)
示例#10
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
For reference, this model should achieve 1.50% error rate, in 10 mins with i7 CPU (8 threads).
"""

import logging, os
logging.basicConfig(level=logging.INFO)

from deepy.dataset import MnistDataset, MiniBatches
from deepy.networks import NeuralClassifier
from deepy.layers import Dense, Softmax, Dropout
from deepy.trainers import MomentumTrainer, LearningRateAnnealer

default_model = os.path.join(os.path.dirname(__file__), "models",
                             "mlp_dropout1.gz")

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=28 * 28)
    model.stack(Dense(256, 'relu'), Dropout(0.5), Dense(256, 'relu'),
                Dropout(0.5), Dense(10, 'linear'), Softmax())

    trainer = MomentumTrainer(model)

    annealer = LearningRateAnnealer()

    mnist = MiniBatches(MnistDataset(), batch_size=20)

    trainer.run(mnist, epoch_controllers=[annealer])

    model.save_params(default_model)
示例#11
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging, os
logging.basicConfig(level=logging.INFO)

from deepy.dataset import MnistDataset, MiniBatches
from deepy.networks import NeuralClassifier
from deepy.layers import Dense, Softmax, Dropout, PRelu
from deepy.trainers import MomentumTrainer, LearningRateAnnealer

default_model = os.path.join(os.path.dirname(__file__), "models",
                             "mlp_prelu_dropout1.gz")

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=28 * 28)
    model.stack(Dense(256, 'linear'), PRelu(), Dropout(0.2),
                Dense(256, 'linear'), PRelu(), Dropout(0.2),
                Dense(10, 'linear'), Softmax())

    trainer = MomentumTrainer(model)

    annealer = LearningRateAnnealer()

    mnist = MiniBatches(MnistDataset(), batch_size=20)

    trainer.run(mnist, controllers=[annealer])

    model.save_params(default_model)
示例#12
0
batch_set = MiniBatches(dataset)

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=26, input_tensor=3)
    model.stack(
        RNN(hidden_size=30,
            input_type="sequence",
            output_type="sequence",
            vector_core=0.1),
        RNN(hidden_size=30,
            input_type="sequence",
            output_type="sequence",
            vector_core=0.3),
        RNN(hidden_size=30,
            input_type="sequence",
            output_type="sequence",
            vector_core=0.6),
        RNN(hidden_size=30,
            input_type="sequence",
            output_type="one",
            vector_core=0.9), Dense(4), Softmax())

    trainer = SGDTrainer(model)

    annealer = LearningRateAnnealer()

    trainer.run(batch_set.train_set(),
                batch_set.valid_set(),
                controllers=[annealer])
示例#13
0
def clip_param_norm():
    for param in model.parameters:
        if param.name.startswith("W"):
            l2_norms = np.sqrt(
                np.sum(param.get_value()**2, axis=0, keepdims=True))
            desired_norms = np.clip(l2_norms, 0, L2NORM_LIMIT)
            scale = (desired_norms + EPSILON) / (l2_norms + EPSILON)
            param.set_value(param.get_value() * scale)


if __name__ == '__main__':
    model = NeuralClassifier(input_dim=28 * 28)
    model.training_callbacks.append(clip_param_norm)
    model.stack(Dropout(0.2),
                Maxout(240, num_pieces=5, init=UniformInitializer(.005)),
                Maxout(240, num_pieces=5, init=UniformInitializer(.005)),
                Dense(10, 'linear', init=UniformInitializer(.005)), Softmax())

    trainer = MomentumTrainer(model, {
        "learning_rate": graph.shared(0.01),
        "momentum": 0.5
    })

    annealer = ExponentialLearningRateAnnealer(debug=True)

    mnist = MiniBatches(MnistDataset(), batch_size=100)

    trainer.run(mnist, epoch_controllers=[annealer])

    model.save_params(default_model)
示例#14
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This experiment setting is described in http://arxiv.org/pdf/1502.03167v3.pdf.
MNIST MLP baseline model.
Gaussian initialization described in the paper did not convergence, I have no idea.
"""

import logging, os
logging.basicConfig(level=logging.INFO)

from deepy.dataset import MnistDataset, MiniBatches
from deepy.networks import NeuralClassifier
from deepy.layers import Dense, Softmax
from deepy.trainers import SGDTrainer

default_model = os.path.join(os.path.dirname(__file__), "models",
                             "baseline1.gz")

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=28 * 28)
    model.stack(Dense(100, 'sigmoid'), Dense(100, 'sigmoid'),
                Dense(100, 'sigmoid'), Dense(10, 'linear'), Softmax())

    trainer = SGDTrainer(model)

    batches = MiniBatches(MnistDataset(), batch_size=60)

    trainer.run(batches, controllers=[])

    model.save_params(default_model)