def __init__(self, input_dim, output_dim, hidden_size, init_ranges, **kwargs): linear1 = LinearMaxout(input_dim=input_dim, output_dim=hidden_size, num_pieces=2, name='linear1') linear2 = LinearMaxout(input_dim=hidden_size, output_dim=hidden_size, num_pieces=2, name='linear2') linear3 = Linear(input_dim=hidden_size, output_dim=output_dim) logistic = Logistic() bricks = [ linear1, BatchNormalization(input_dim=hidden_size, name='bn2'), linear2, BatchNormalization(input_dim=hidden_size, name='bnl'), linear3, logistic ] for init_range, b in zip(init_ranges, (linear1, linear2, linear3)): b.biases_init = initialization.Constant(0) b.weights_init = initialization.Uniform(width=init_range) kwargs.setdefault('use_bias', False) super(ConcatenateClassifier, self).__init__([b.apply for b in bricks], **kwargs)
def create_model_brick(): decoder = MLP( dims=[NLAT, GEN_HIDDEN, GEN_HIDDEN, GEN_HIDDEN, GEN_HIDDEN, INPUT_DIM], activations=[Sequence([BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply], name='decoder_h1'), Sequence([BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply], name='decoder_h2'), Sequence([BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply], name='decoder_h3'), Sequence([BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply], name='decoder_h4'), Identity(name='decoder_out')], use_bias=False, name='decoder') discriminator = Sequence( application_methods=[ LinearMaxout( input_dim=INPUT_DIM, output_dim=DISC_HIDDEN, num_pieces=MAXOUT_PIECES, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_h1').apply, LinearMaxout( input_dim=DISC_HIDDEN, output_dim=DISC_HIDDEN, num_pieces=MAXOUT_PIECES, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_h2').apply, LinearMaxout( input_dim=DISC_HIDDEN, output_dim=DISC_HIDDEN, num_pieces=MAXOUT_PIECES, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_h3').apply, Linear( input_dim=DISC_HIDDEN, output_dim=1, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_out').apply], name='discriminator') gan = GAN(decoder=decoder, discriminator=discriminator, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='gan') gan.push_allocation_config() decoder.linear_transformations[-1].use_bias = True gan.initialize() return gan
def test_linear_maxout(): x = tensor.matrix() linear_maxout = LinearMaxout(input_dim=16, output_dim=8, num_pieces=3, weights_init=Constant(2), biases_init=Constant(1)) y = linear_maxout.apply(x) linear_maxout.initialize() x_val = numpy.ones((4, 16), dtype=theano.config.floatX) assert_allclose( y.eval({x: x_val}), (x_val.dot(2 * numpy.ones((16, 24))) + numpy.ones((4, 24))).reshape(4, 8, 3).max(2))
def __init__(self, visual_dim, textual_dim, output_dim, hidden_size, init_ranges, **kwargs): (visual_range, textual_range, linear_range_1, linear_range_2, linear_range_3) = init_ranges visual_layer = FeedforwardSequence([ BatchNormalization(input_dim=visual_dim).apply, LinearMaxout( input_dim=visual_dim, output_dim=hidden_size, weights_init=initialization.Uniform(width=visual_range), use_bias=False, biases_init=initialization.Constant(0), num_pieces=2).apply ], name='visual_layer') textual_layer = FeedforwardSequence([ BatchNormalization(input_dim=textual_dim).apply, LinearMaxout( input_dim=textual_dim, output_dim=hidden_size, weights_init=initialization.Uniform(width=textual_range), biases_init=initialization.Constant(0), use_bias=False, num_pieces=2).apply ], name='textual_layer') logistic_mlp = MLPGenreClassifier( hidden_size, output_dim, hidden_size, [linear_range_1, linear_range_2, linear_range_3]) # logistic_mlp = Sequence([ # BatchNormalization(input_dim=hidden_size, name='bn1').apply, # Linear(hidden_size, output_dim, name='linear_output', use_bias=False, # weights_init=initialization.Uniform(width=linear_range_1)).apply, # Logistic().apply #], name='logistic_mlp') children = [visual_layer, textual_layer, logistic_mlp] kwargs.setdefault('use_bias', False) kwargs.setdefault('children', children) super(LinearSumClassifier, self).__init__(**kwargs)
def create_model_brick(): encoder_mapping = MLP( dims=[2 * INPUT_DIM, GEN_HIDDEN, GEN_HIDDEN, NLAT], activations=[ Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='encoder_h1'), Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='encoder_h2'), Identity(name='encoder_out') ], use_bias=False, name='encoder_mapping') encoder = COVConditional(encoder_mapping, (INPUT_DIM, ), name='encoder') decoder_mapping = MLP(dims=[ NLAT, GEN_HIDDEN, GEN_HIDDEN, GEN_HIDDEN, GEN_HIDDEN, INPUT_DIM ], activations=[ Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='decoder_h1'), Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='decoder_h2'), Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='decoder_h3'), Sequence([ BatchNormalization(GEN_HIDDEN).apply, GEN_ACTIVATION().apply ], name='decoder_h4'), Identity(name='decoder_out') ], use_bias=False, name='decoder_mapping') decoder = DeterministicConditional(decoder_mapping, name='decoder') x_discriminator = Identity(name='x_discriminator') z_discriminator = Identity(name='z_discriminator') joint_discriminator = Sequence(application_methods=[ LinearMaxout(input_dim=INPUT_DIM + NLAT, output_dim=DISC_HIDDEN, num_pieces=MAXOUT_PIECES, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_h1').apply, LinearMaxout(input_dim=DISC_HIDDEN, output_dim=DISC_HIDDEN, num_pieces=MAXOUT_PIECES, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_h2').apply, LinearMaxout(input_dim=DISC_HIDDEN, output_dim=DISC_HIDDEN, num_pieces=MAXOUT_PIECES, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_h3').apply, Linear(input_dim=DISC_HIDDEN, output_dim=1, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='discriminator_out').apply ], name='joint_discriminator') discriminator = XZJointDiscriminator(x_discriminator, z_discriminator, joint_discriminator, name='discriminator') ali = ALI(encoder=encoder, decoder=decoder, discriminator=discriminator, weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT, name='ali') ali.push_allocation_config() encoder_mapping.linear_transformations[-1].use_bias = True decoder_mapping.linear_transformations[-1].use_bias = True ali.initialize() print("Number of parameters in discriminator: {}".format( numpy.sum([ numpy.prod(v.shape.eval()) for v in Selector( ali.discriminator).get_parameters().values() ]))) print("Number of parameters in encoder: {}".format( numpy.sum([ numpy.prod(v.shape.eval()) for v in Selector(ali.encoder).get_parameters().values() ]))) print("Number of parameters in decoder: {}".format( numpy.sum([ numpy.prod(v.shape.eval()) for v in Selector(ali.decoder).get_parameters().values() ]))) return ali
def __init__(self, config_dict, init_type="xavier", **kwargs): super(CharRNNModel, self).__init__(**kwargs) self.batch_size = config_dict["batch_size"] self.num_subwords = config_dict["num_subwords"] self.num_words = config_dict["num_words"] self.subword_embedding_size = config_dict["subword_embedding_size"] self.input_vocab_size = config_dict["input_vocab_size"] self.output_vocab_size = config_dict["output_vocab_size"] self.subword_RNN_hidden_state_size = config_dict["subword_RNN_hidden_state_size"] self.table_width = config_dict["table_width"] self.max_out_dim = config_dict["max_out_dim"] self.max_out_K = config_dict["max_out_K"] self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name="input_lookup") self.lookup.weights_init = Uniform(width=self.table_width) self.lookup.biases_init = Constant(0) if init_type == "xavier": linear_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() # The `inputs` are then split in this order: Input gates, forget gates, cells and output gates self.linear_forward = Linear( input_dim=self.subword_embedding_size, output_dim=self.subword_RNN_hidden_state_size * 4, name="linear_forward", weights_init=linear_init, biases_init=Constant(0.0), ) self.language_model = LSTM( dim=self.subword_RNN_hidden_state_size, activation=Tanh(), name="language_model_RNN", weights_init=lstm_init, biases_init=Constant(0.0), ) self.max_out = LinearMaxout( self.subword_RNN_hidden_state_size, self.max_out_dim, self.max_out_K, name="max_out", weights_init=IsotropicGaussian(), biases_init=Constant(0.0), ) self.softmax_linear = Linear( self.max_out_dim, self.output_vocab_size, name="soft_max_linear", weights_init=IsotropicGaussian(), biases_init=Constant(0.0), ) self.softmax = NDimensionalSoftmax() self.children = [ self.lookup, self.linear_forward, self.language_model, self.max_out, self.softmax_linear, self.softmax, ]
class CharRNNModel(Initializable): """ A model for testing that the components of my more complex models work. This is just a model that predicts one character at a time using a LSTM layer """ def __init__(self, config_dict, init_type="xavier", **kwargs): super(CharRNNModel, self).__init__(**kwargs) self.batch_size = config_dict["batch_size"] self.num_subwords = config_dict["num_subwords"] self.num_words = config_dict["num_words"] self.subword_embedding_size = config_dict["subword_embedding_size"] self.input_vocab_size = config_dict["input_vocab_size"] self.output_vocab_size = config_dict["output_vocab_size"] self.subword_RNN_hidden_state_size = config_dict["subword_RNN_hidden_state_size"] self.table_width = config_dict["table_width"] self.max_out_dim = config_dict["max_out_dim"] self.max_out_K = config_dict["max_out_K"] self.lookup = LookupTable(length=self.input_vocab_size, dim=self.subword_embedding_size, name="input_lookup") self.lookup.weights_init = Uniform(width=self.table_width) self.lookup.biases_init = Constant(0) if init_type == "xavier": linear_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size) lstm_init = XavierInitializationOriginal(self.subword_embedding_size, self.subword_RNN_hidden_state_size) else: # default is gaussian linear_init = IsotropicGaussian() lstm_init = IsotropicGaussian() # The `inputs` are then split in this order: Input gates, forget gates, cells and output gates self.linear_forward = Linear( input_dim=self.subword_embedding_size, output_dim=self.subword_RNN_hidden_state_size * 4, name="linear_forward", weights_init=linear_init, biases_init=Constant(0.0), ) self.language_model = LSTM( dim=self.subword_RNN_hidden_state_size, activation=Tanh(), name="language_model_RNN", weights_init=lstm_init, biases_init=Constant(0.0), ) self.max_out = LinearMaxout( self.subword_RNN_hidden_state_size, self.max_out_dim, self.max_out_K, name="max_out", weights_init=IsotropicGaussian(), biases_init=Constant(0.0), ) self.softmax_linear = Linear( self.max_out_dim, self.output_vocab_size, name="soft_max_linear", weights_init=IsotropicGaussian(), biases_init=Constant(0.0), ) self.softmax = NDimensionalSoftmax() self.children = [ self.lookup, self.linear_forward, self.language_model, self.max_out, self.softmax_linear, self.softmax, ] @application(inputs=["features", "features_mask", "targets", "targets_mask"], outputs=["cost"]) def apply(self, features, features_mask, targets, targets_mask): subword_embeddings = self.lookup.apply(features) sentence_embeddings = self.language_model.apply( self.linear_forward.apply(subword_embeddings), mask=features_mask )[ 0 ] # [0] = hidden states, [1] = cells linear_output = self.softmax_linear.apply(self.max_out.apply(sentence_embeddings)) cost = self.softmax.categorical_cross_entropy(targets, linear_output, extra_ndim=1).mean() cost.name = "cost" return ((cost * targets_mask).sum()) / targets_mask.sum()
def create_model_brick(self): decoder = MLP( dims=[ self._config["num_zdim"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["gen_hidden_size"], self._config["num_xdim"] ], activations=[ Sequence([ BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply ], name='decoder_h1'), Sequence([ BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply ], name='decoder_h2'), Sequence([ BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply ], name='decoder_h3'), Sequence([ BatchNormalization(self._config["gen_hidden_size"]).apply, self._config["gen_activation"]().apply ], name='decoder_h4'), Identity(name='decoder_out') ], use_bias=False, name='decoder') discriminator = Sequence(application_methods=[ LinearMaxout(input_dim=self._config["num_xdim"] * self._config["num_packing"], output_dim=self._config["disc_hidden_size"], num_pieces=self._config["disc_maxout_pieces"], weights_init=IsotropicGaussian( self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_h1').apply, LinearMaxout(input_dim=self._config["disc_hidden_size"], output_dim=self._config["disc_hidden_size"], num_pieces=self._config["disc_maxout_pieces"], weights_init=IsotropicGaussian( self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_h2').apply, LinearMaxout(input_dim=self._config["disc_hidden_size"], output_dim=self._config["disc_hidden_size"], num_pieces=self._config["disc_maxout_pieces"], weights_init=IsotropicGaussian( self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_h3').apply, Linear(input_dim=self._config["disc_hidden_size"], output_dim=1, weights_init=IsotropicGaussian( self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='discriminator_out').apply ], name='discriminator') gan = PacGAN(decoder=decoder, discriminator=discriminator, weights_init=IsotropicGaussian( self._config["weights_init_std"]), biases_init=self._config["biases_init"], name='gan') gan.push_allocation_config() decoder.linear_transformations[-1].use_bias = True gan.initialize() print("Number of parameters in discriminator: {}".format( numpy.sum([ numpy.prod(v.shape.eval()) for v in Selector(gan.discriminator).get_parameters().values() ]))) print("Number of parameters in decoder: {}".format( numpy.sum([ numpy.prod(v.shape.eval()) for v in Selector(gan.decoder).get_parameters().values() ]))) return gan