Exemplo n.º 1
0
    def __init__(self, input_dim, output_dim, hidden_size, init_ranges,
                 **kwargs):
        linear1 = LinearMaxout(input_dim=input_dim,
                               output_dim=hidden_size,
                               num_pieces=2,
                               name='linear1')
        linear2 = LinearMaxout(input_dim=hidden_size,
                               output_dim=hidden_size,
                               num_pieces=2,
                               name='linear2')
        linear3 = Linear(input_dim=hidden_size, output_dim=output_dim)
        logistic = Logistic()
        bricks = [
            linear1,
            BatchNormalization(input_dim=hidden_size, name='bn2'), linear2,
            BatchNormalization(input_dim=hidden_size, name='bnl'), linear3,
            logistic
        ]
        for init_range, b in zip(init_ranges, (linear1, linear2, linear3)):
            b.biases_init = initialization.Constant(0)
            b.weights_init = initialization.Uniform(width=init_range)

        kwargs.setdefault('use_bias', False)
        super(ConcatenateClassifier, self).__init__([b.apply for b in bricks],
                                                    **kwargs)
Exemplo n.º 2
0
def create_model_brick():
    decoder = MLP(
        dims=[NLAT, GEN_HIDDEN, GEN_HIDDEN, GEN_HIDDEN, GEN_HIDDEN, INPUT_DIM],
        activations=[Sequence([BatchNormalization(GEN_HIDDEN).apply,
                               GEN_ACTIVATION().apply],
                              name='decoder_h1'),
                     Sequence([BatchNormalization(GEN_HIDDEN).apply,
                               GEN_ACTIVATION().apply],
                              name='decoder_h2'),
                     Sequence([BatchNormalization(GEN_HIDDEN).apply,
                               GEN_ACTIVATION().apply],
                              name='decoder_h3'),
                     Sequence([BatchNormalization(GEN_HIDDEN).apply,
                               GEN_ACTIVATION().apply],
                              name='decoder_h4'),
                     Identity(name='decoder_out')],
        use_bias=False,
        name='decoder')

    discriminator = Sequence(
        application_methods=[
            LinearMaxout(
                input_dim=INPUT_DIM,
                output_dim=DISC_HIDDEN,
                num_pieces=MAXOUT_PIECES,
                weights_init=GAUSSIAN_INIT,
                biases_init=ZERO_INIT,
                name='discriminator_h1').apply,
            LinearMaxout(
                input_dim=DISC_HIDDEN,
                output_dim=DISC_HIDDEN,
                num_pieces=MAXOUT_PIECES,
                weights_init=GAUSSIAN_INIT,
                biases_init=ZERO_INIT,
                name='discriminator_h2').apply,
            LinearMaxout(
                input_dim=DISC_HIDDEN,
                output_dim=DISC_HIDDEN,
                num_pieces=MAXOUT_PIECES,
                weights_init=GAUSSIAN_INIT,
                biases_init=ZERO_INIT,
                name='discriminator_h3').apply,
            Linear(
                input_dim=DISC_HIDDEN,
                output_dim=1,
                weights_init=GAUSSIAN_INIT,
                biases_init=ZERO_INIT,
                name='discriminator_out').apply],
        name='discriminator')

    gan = GAN(decoder=decoder, discriminator=discriminator,
              weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='gan')
    gan.push_allocation_config()
    decoder.linear_transformations[-1].use_bias = True
    gan.initialize()

    return gan
Exemplo n.º 3
0
def test_linear_maxout():
    x = tensor.matrix()

    linear_maxout = LinearMaxout(input_dim=16, output_dim=8, num_pieces=3,
                                 weights_init=Constant(2),
                                 biases_init=Constant(1))
    y = linear_maxout.apply(x)
    linear_maxout.initialize()
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(
        y.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((16, 24))) +
            numpy.ones((4, 24))).reshape(4, 8, 3).max(2))
Exemplo n.º 4
0
def test_linear_maxout():
    x = tensor.matrix()

    linear_maxout = LinearMaxout(input_dim=16, output_dim=8, num_pieces=3,
                                 weights_init=Constant(2),
                                 biases_init=Constant(1))
    y = linear_maxout.apply(x)
    linear_maxout.initialize()
    x_val = numpy.ones((4, 16), dtype=theano.config.floatX)
    assert_allclose(
        y.eval({x: x_val}),
        (x_val.dot(2 * numpy.ones((16, 24))) +
            numpy.ones((4, 24))).reshape(4, 8, 3).max(2))
Exemplo n.º 5
0
    def __init__(self, visual_dim, textual_dim, output_dim, hidden_size,
                 init_ranges, **kwargs):
        (visual_range, textual_range, linear_range_1, linear_range_2,
         linear_range_3) = init_ranges
        visual_layer = FeedforwardSequence([
            BatchNormalization(input_dim=visual_dim).apply,
            LinearMaxout(
                input_dim=visual_dim,
                output_dim=hidden_size,
                weights_init=initialization.Uniform(width=visual_range),
                use_bias=False,
                biases_init=initialization.Constant(0),
                num_pieces=2).apply
        ],
                                           name='visual_layer')
        textual_layer = FeedforwardSequence([
            BatchNormalization(input_dim=textual_dim).apply,
            LinearMaxout(
                input_dim=textual_dim,
                output_dim=hidden_size,
                weights_init=initialization.Uniform(width=textual_range),
                biases_init=initialization.Constant(0),
                use_bias=False,
                num_pieces=2).apply
        ],
                                            name='textual_layer')
        logistic_mlp = MLPGenreClassifier(
            hidden_size, output_dim, hidden_size,
            [linear_range_1, linear_range_2, linear_range_3])
        # logistic_mlp = Sequence([
        #   BatchNormalization(input_dim=hidden_size, name='bn1').apply,
        #   Linear(hidden_size, output_dim, name='linear_output', use_bias=False,
        #          weights_init=initialization.Uniform(width=linear_range_1)).apply,
        #   Logistic().apply
        #], name='logistic_mlp')

        children = [visual_layer, textual_layer, logistic_mlp]
        kwargs.setdefault('use_bias', False)
        kwargs.setdefault('children', children)
        super(LinearSumClassifier, self).__init__(**kwargs)
Exemplo n.º 6
0
        def create_model_brick():
            encoder_mapping = MLP(
                dims=[2 * INPUT_DIM, GEN_HIDDEN, GEN_HIDDEN, NLAT],
                activations=[
                    Sequence([
                        BatchNormalization(GEN_HIDDEN).apply,
                        GEN_ACTIVATION().apply
                    ],
                             name='encoder_h1'),
                    Sequence([
                        BatchNormalization(GEN_HIDDEN).apply,
                        GEN_ACTIVATION().apply
                    ],
                             name='encoder_h2'),
                    Identity(name='encoder_out')
                ],
                use_bias=False,
                name='encoder_mapping')
            encoder = COVConditional(encoder_mapping, (INPUT_DIM, ),
                                     name='encoder')

            decoder_mapping = MLP(dims=[
                NLAT, GEN_HIDDEN, GEN_HIDDEN, GEN_HIDDEN, GEN_HIDDEN, INPUT_DIM
            ],
                                  activations=[
                                      Sequence([
                                          BatchNormalization(GEN_HIDDEN).apply,
                                          GEN_ACTIVATION().apply
                                      ],
                                               name='decoder_h1'),
                                      Sequence([
                                          BatchNormalization(GEN_HIDDEN).apply,
                                          GEN_ACTIVATION().apply
                                      ],
                                               name='decoder_h2'),
                                      Sequence([
                                          BatchNormalization(GEN_HIDDEN).apply,
                                          GEN_ACTIVATION().apply
                                      ],
                                               name='decoder_h3'),
                                      Sequence([
                                          BatchNormalization(GEN_HIDDEN).apply,
                                          GEN_ACTIVATION().apply
                                      ],
                                               name='decoder_h4'),
                                      Identity(name='decoder_out')
                                  ],
                                  use_bias=False,
                                  name='decoder_mapping')
            decoder = DeterministicConditional(decoder_mapping, name='decoder')

            x_discriminator = Identity(name='x_discriminator')
            z_discriminator = Identity(name='z_discriminator')
            joint_discriminator = Sequence(application_methods=[
                LinearMaxout(input_dim=INPUT_DIM + NLAT,
                             output_dim=DISC_HIDDEN,
                             num_pieces=MAXOUT_PIECES,
                             weights_init=GAUSSIAN_INIT,
                             biases_init=ZERO_INIT,
                             name='discriminator_h1').apply,
                LinearMaxout(input_dim=DISC_HIDDEN,
                             output_dim=DISC_HIDDEN,
                             num_pieces=MAXOUT_PIECES,
                             weights_init=GAUSSIAN_INIT,
                             biases_init=ZERO_INIT,
                             name='discriminator_h2').apply,
                LinearMaxout(input_dim=DISC_HIDDEN,
                             output_dim=DISC_HIDDEN,
                             num_pieces=MAXOUT_PIECES,
                             weights_init=GAUSSIAN_INIT,
                             biases_init=ZERO_INIT,
                             name='discriminator_h3').apply,
                Linear(input_dim=DISC_HIDDEN,
                       output_dim=1,
                       weights_init=GAUSSIAN_INIT,
                       biases_init=ZERO_INIT,
                       name='discriminator_out').apply
            ],
                                           name='joint_discriminator')
            discriminator = XZJointDiscriminator(x_discriminator,
                                                 z_discriminator,
                                                 joint_discriminator,
                                                 name='discriminator')

            ali = ALI(encoder=encoder,
                      decoder=decoder,
                      discriminator=discriminator,
                      weights_init=GAUSSIAN_INIT,
                      biases_init=ZERO_INIT,
                      name='ali')
            ali.push_allocation_config()
            encoder_mapping.linear_transformations[-1].use_bias = True
            decoder_mapping.linear_transformations[-1].use_bias = True
            ali.initialize()

            print("Number of parameters in discriminator: {}".format(
                numpy.sum([
                    numpy.prod(v.shape.eval()) for v in Selector(
                        ali.discriminator).get_parameters().values()
                ])))
            print("Number of parameters in encoder: {}".format(
                numpy.sum([
                    numpy.prod(v.shape.eval())
                    for v in Selector(ali.encoder).get_parameters().values()
                ])))
            print("Number of parameters in decoder: {}".format(
                numpy.sum([
                    numpy.prod(v.shape.eval())
                    for v in Selector(ali.decoder).get_parameters().values()
                ])))

            return ali
    def __init__(self, config_dict, init_type="xavier", **kwargs):

        super(CharRNNModel, self).__init__(**kwargs)

        self.batch_size = config_dict["batch_size"]
        self.num_subwords = config_dict["num_subwords"]
        self.num_words = config_dict["num_words"]
        self.subword_embedding_size = config_dict["subword_embedding_size"]
        self.input_vocab_size = config_dict["input_vocab_size"]
        self.output_vocab_size = config_dict["output_vocab_size"]
        self.subword_RNN_hidden_state_size = config_dict["subword_RNN_hidden_state_size"]
        self.table_width = config_dict["table_width"]
        self.max_out_dim = config_dict["max_out_dim"]
        self.max_out_K = config_dict["max_out_K"]

        self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name="input_lookup")
        self.lookup.weights_init = Uniform(width=self.table_width)
        self.lookup.biases_init = Constant(0)

        if init_type == "xavier":
            linear_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size)
            lstm_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size)
        else:  # default is gaussian
            linear_init = IsotropicGaussian()
            lstm_init = IsotropicGaussian()

        # The `inputs` are then split in this order: Input gates, forget gates, cells and output gates
        self.linear_forward = Linear(
            input_dim=self.subword_embedding_size,
            output_dim=self.subword_RNN_hidden_state_size * 4,
            name="linear_forward",
            weights_init=linear_init,
            biases_init=Constant(0.0),
        )

        self.language_model = LSTM(
            dim=self.subword_RNN_hidden_state_size,
            activation=Tanh(),
            name="language_model_RNN",
            weights_init=lstm_init,
            biases_init=Constant(0.0),
        )

        self.max_out = LinearMaxout(
            self.subword_RNN_hidden_state_size,
            self.max_out_dim,
            self.max_out_K,
            name="max_out",
            weights_init=IsotropicGaussian(),
            biases_init=Constant(0.0),
        )

        self.softmax_linear = Linear(
            self.max_out_dim,
            self.output_vocab_size,
            name="soft_max_linear",
            weights_init=IsotropicGaussian(),
            biases_init=Constant(0.0),
        )

        self.softmax = NDimensionalSoftmax()

        self.children = [
            self.lookup,
            self.linear_forward,
            self.language_model,
            self.max_out,
            self.softmax_linear,
            self.softmax,
        ]
class CharRNNModel(Initializable):
    """
    A model for testing that the components of my more complex models work.

    This is just a model that predicts one character at a time using a LSTM layer
    """

    def __init__(self, config_dict, init_type="xavier", **kwargs):

        super(CharRNNModel, self).__init__(**kwargs)

        self.batch_size = config_dict["batch_size"]
        self.num_subwords = config_dict["num_subwords"]
        self.num_words = config_dict["num_words"]
        self.subword_embedding_size = config_dict["subword_embedding_size"]
        self.input_vocab_size = config_dict["input_vocab_size"]
        self.output_vocab_size = config_dict["output_vocab_size"]
        self.subword_RNN_hidden_state_size = config_dict["subword_RNN_hidden_state_size"]
        self.table_width = config_dict["table_width"]
        self.max_out_dim = config_dict["max_out_dim"]
        self.max_out_K = config_dict["max_out_K"]

        self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name="input_lookup")
        self.lookup.weights_init = Uniform(width=self.table_width)
        self.lookup.biases_init = Constant(0)

        if init_type == "xavier":
            linear_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size)
            lstm_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size)
        else:  # default is gaussian
            linear_init = IsotropicGaussian()
            lstm_init = IsotropicGaussian()

        # The `inputs` are then split in this order: Input gates, forget gates, cells and output gates
        self.linear_forward = Linear(
            input_dim=self.subword_embedding_size,
            output_dim=self.subword_RNN_hidden_state_size * 4,
            name="linear_forward",
            weights_init=linear_init,
            biases_init=Constant(0.0),
        )

        self.language_model = LSTM(
            dim=self.subword_RNN_hidden_state_size,
            activation=Tanh(),
            name="language_model_RNN",
            weights_init=lstm_init,
            biases_init=Constant(0.0),
        )

        self.max_out = LinearMaxout(
            self.subword_RNN_hidden_state_size,
            self.max_out_dim,
            self.max_out_K,
            name="max_out",
            weights_init=IsotropicGaussian(),
            biases_init=Constant(0.0),
        )

        self.softmax_linear = Linear(
            self.max_out_dim,
            self.output_vocab_size,
            name="soft_max_linear",
            weights_init=IsotropicGaussian(),
            biases_init=Constant(0.0),
        )

        self.softmax = NDimensionalSoftmax()

        self.children = [
            self.lookup,
            self.linear_forward,
            self.language_model,
            self.max_out,
            self.softmax_linear,
            self.softmax,
        ]

    @application(inputs=["features", "features_mask", "targets", "targets_mask"], outputs=["cost"])
    def apply(self, features, features_mask, targets, targets_mask):

        subword_embeddings = self.lookup.apply(features)
        sentence_embeddings = self.language_model.apply(
            self.linear_forward.apply(subword_embeddings), mask=features_mask
        )[
            0
        ]  # [0] = hidden states, [1] = cells

        linear_output = self.softmax_linear.apply(self.max_out.apply(sentence_embeddings))
        cost = self.softmax.categorical_cross_entropy(targets, linear_output, extra_ndim=1).mean()
        cost.name = "cost"
        return ((cost * targets_mask).sum()) / targets_mask.sum()
Exemplo n.º 9
0
    def create_model_brick(self):
        decoder = MLP(
            dims=[
                self._config["num_zdim"], self._config["gen_hidden_size"],
                self._config["gen_hidden_size"],
                self._config["gen_hidden_size"],
                self._config["gen_hidden_size"], self._config["num_xdim"]
            ],
            activations=[
                Sequence([
                    BatchNormalization(self._config["gen_hidden_size"]).apply,
                    self._config["gen_activation"]().apply
                ],
                         name='decoder_h1'),
                Sequence([
                    BatchNormalization(self._config["gen_hidden_size"]).apply,
                    self._config["gen_activation"]().apply
                ],
                         name='decoder_h2'),
                Sequence([
                    BatchNormalization(self._config["gen_hidden_size"]).apply,
                    self._config["gen_activation"]().apply
                ],
                         name='decoder_h3'),
                Sequence([
                    BatchNormalization(self._config["gen_hidden_size"]).apply,
                    self._config["gen_activation"]().apply
                ],
                         name='decoder_h4'),
                Identity(name='decoder_out')
            ],
            use_bias=False,
            name='decoder')

        discriminator = Sequence(application_methods=[
            LinearMaxout(input_dim=self._config["num_xdim"] *
                         self._config["num_packing"],
                         output_dim=self._config["disc_hidden_size"],
                         num_pieces=self._config["disc_maxout_pieces"],
                         weights_init=IsotropicGaussian(
                             self._config["weights_init_std"]),
                         biases_init=self._config["biases_init"],
                         name='discriminator_h1').apply,
            LinearMaxout(input_dim=self._config["disc_hidden_size"],
                         output_dim=self._config["disc_hidden_size"],
                         num_pieces=self._config["disc_maxout_pieces"],
                         weights_init=IsotropicGaussian(
                             self._config["weights_init_std"]),
                         biases_init=self._config["biases_init"],
                         name='discriminator_h2').apply,
            LinearMaxout(input_dim=self._config["disc_hidden_size"],
                         output_dim=self._config["disc_hidden_size"],
                         num_pieces=self._config["disc_maxout_pieces"],
                         weights_init=IsotropicGaussian(
                             self._config["weights_init_std"]),
                         biases_init=self._config["biases_init"],
                         name='discriminator_h3').apply,
            Linear(input_dim=self._config["disc_hidden_size"],
                   output_dim=1,
                   weights_init=IsotropicGaussian(
                       self._config["weights_init_std"]),
                   biases_init=self._config["biases_init"],
                   name='discriminator_out').apply
        ],
                                 name='discriminator')

        gan = PacGAN(decoder=decoder,
                     discriminator=discriminator,
                     weights_init=IsotropicGaussian(
                         self._config["weights_init_std"]),
                     biases_init=self._config["biases_init"],
                     name='gan')
        gan.push_allocation_config()
        decoder.linear_transformations[-1].use_bias = True
        gan.initialize()

        print("Number of parameters in discriminator: {}".format(
            numpy.sum([
                numpy.prod(v.shape.eval())
                for v in Selector(gan.discriminator).get_parameters().values()
            ])))
        print("Number of parameters in decoder: {}".format(
            numpy.sum([
                numpy.prod(v.shape.eval())
                for v in Selector(gan.decoder).get_parameters().values()
            ])))

        return gan