def __init__(self, config, **kwargs): super(Model, self).__init__(config, **kwargs) self.dest_mlp = MLP( activations=[Rectifier() for _ in config.dim_hidden_dest] + [Softmax()], dims=[config.dim_hidden[-1]] + config.dim_hidden_dest + [config.dim_output_dest], name='dest_mlp') self.time_mlp = MLP( activations=[Rectifier() for _ in config.dim_hidden_time] + [Softmax()], dims=[config.dim_hidden[-1]] + config.dim_hidden_time + [config.dim_output_time], name='time_mlp') self.dest_classes = theano.shared(numpy.array( config.dest_tgtcls, dtype=theano.config.floatX), name='dest_classes') self.time_classes = theano.shared(numpy.array( config.time_tgtcls, dtype=theano.config.floatX), name='time_classes') self.inputs.append('input_time') self.children.extend([self.dest_mlp, self.time_mlp])
def create_vae(x=None, batch=batch_size): x = T.matrix('features') if x is None else x x = x / 255. encoder = MLP( activations=[Rectifier(), Logistic()], dims=[img_dim**2, hidden_dim, 2*latent_dim], weights_init=IsotropicGaussian(std=0.01, mean=0), biases_init=Constant(0.01), name='encoder' ) encoder.initialize() z_param = encoder.apply(x) z_mean, z_log_std = z_param[:,latent_dim:], z_param[:,:latent_dim] z = Sampling(theano_seed=seed).apply([z_mean, z_log_std], batch=batch_size) decoder = MLP( activations=[Rectifier(), Logistic()], dims=[latent_dim, hidden_dim, img_dim**2], weights_init=IsotropicGaussian(std=0.01, mean=0), biases_init=Constant(0.01), name='decoder' ) decoder.initialize() x_reconstruct = decoder.apply(z) cost = VAEloss().apply(x, x_reconstruct, z_mean, z_log_std) cost.name = 'vae_cost' return cost
def setup_model(): # shape: T x B x F input_ = T.tensor3('features') # shape: B target = T.lvector('targets') model = LSTMAttention(input_dim=10000, dim=500, mlp_hidden_dims=[2000, 500, 4], batch_size=100, image_shape=(100, 100), patch_shape=(28, 28), weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) model.initialize() h, c = model.apply(input_) classifier = MLP([Rectifier(), Softmax()], [500, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) classifier.initialize() probabilities = classifier.apply(h[-1]) cost = CategoricalCrossEntropy().apply(target, probabilities) error_rate = MisclassificationRate().apply(target, probabilities) return cost, error_rate
def __init__(self, stack_dim=500, **kwargs): """Sole constructor. Args: stack_dim (int): Size of vectors on the stack. """ super(PushDownSequenceContentAttention, self).__init__(**kwargs) self.stack_dim = stack_dim self.max_stack_depth = 25 self.stack_op_names = self.state_names + ['weighted_averages'] self.stack_pop_transformer = MLP(activations=[Logistic()], dims=None) self.stack_pop_transformers = Parallel( input_names=self.stack_op_names, prototype=self.stack_pop_transformer, name="stack_pop") self.stack_push_transformer = MLP(activations=[Logistic()], dims=None) self.stack_push_transformers = Parallel( input_names=self.stack_op_names, prototype=self.stack_push_transformer, name="stack_push") self.stack_input_transformer = Linear() self.stack_input_transformers = Parallel( input_names=self.stack_op_names, prototype=self.stack_input_transformer, name="stack_input") self.children.append(self.stack_pop_transformers) self.children.append(self.stack_push_transformers) self.children.append(self.stack_input_transformers)
def generation(z_list, n_latent, hu_decoder, n_out, y): logger.info('in generation: n_latent: %d, hu_decoder: %d', n_latent, hu_decoder) if hu_decoder == 0: return generation_simple(z_list, n_latent, n_out, y) mlp1 = MLP(activations=[Rectifier()], dims=[n_latent, hu_decoder], name='latent_to_hidDecoder') initialize([mlp1]) hid_to_out = Linear(name='hidDecoder_to_output', input_dim=hu_decoder, output_dim=n_out) initialize([hid_to_out]) mysigmoid = Logistic(name='y_hat_vae') agg_logpy_xz = 0. agg_y_hat = 0. for i, z in enumerate(z_list): y_hat = mysigmoid.apply(hid_to_out.apply( mlp1.apply(z))) #reconstructed x agg_logpy_xz += cross_entropy_loss(y_hat, y) agg_y_hat += y_hat agg_logpy_xz /= len(z_list) agg_y_hat /= len(z_list) return agg_y_hat, agg_logpy_xz
def build_classifier(dimension): mlp = MLP([Tanh(),Tanh(), Softmax()], [784, 100,50, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() return mlp
def setup_model(configs): tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5) # shape: T x B x C x X x Y input_ = tensor5("features") tensor3 = theano.tensor.TensorType(config.floatX, (False,) * 3) locs = tensor3("locs") # shape: B x Classes target = T.ivector("targets") model = LSTMAttention(configs, weights_init=Glorot(), biases_init=Constant(0)) model.initialize() (h, c, location, scale, alpha, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply( input_, locs ) model.location = location model.scale = scale model.alpha = location model.patch = patch classifier = MLP( [Rectifier(), Softmax()], configs["classifier_dims"], weights_init=Glorot(), biases_init=Constant(0) ) classifier.initialize() probabilities = classifier.apply(h[-1]) cost = CategoricalCrossEntropy().apply(target, probabilities) cost.name = "CE" error_rate = MisclassificationRate().apply(target, probabilities) error_rate.name = "ER" model.cost = cost model.error_rate = error_rate model.probabilities = probabilities if configs["load_pretrained"]: blocks_model = Model(model.cost) all_params = blocks_model.parameters with open("VGG_CNN_params.npz") as f: loaded = np.load(f) all_conv_params = loaded.keys() for param in all_params: if param.name in loaded.keys(): assert param.get_value().shape == loaded[param.name].shape param.set_value(loaded[param.name]) all_conv_params.pop(all_conv_params.index(param.name)) print "the following parameters did not match: " + str(all_conv_params) if configs["test_model"]: print "TESTING THE MODEL: CHECK THE INPUT SIZE!" cg = ComputationGraph(model.cost) f = theano.function(cg.inputs, [model.cost], on_unused_input="ignore", allow_input_downcast=True) data = configs["get_streams"](configs["batch_size"])[0].get_epoch_iterator().next() f(data[1], data[0], data[2]) print "Test passed! ;)" model.monitorings = [cost, error_rate] return model
def __init__(self, state_names, state_dims, sequence_dim, match_dim, state_transformer=None, sequence_transformer=None, energy_computer=None, weights_init=None, biases_init=None, **kwargs): super(SequenceContentAttention, self).__init__(**kwargs) update_instance(self, locals()) self.state_transformers = Parallel(state_names, self.state_transformer, name="state_trans") if not self.sequence_transformer: self.sequence_transformer = MLP([Identity()], name="seq_trans") if not self.energy_computer: self.energy_computer = MLP([Identity()], name="energy_comp") self.children = [ self.state_transformers, self.sequence_transformer, self.energy_computer ]
def __init__(self, n_out, dwin, vector_size, n_hidden_layer, **kwargs): super(ConvPoolNlp, self).__init__(**kwargs) self.vector_size = vector_size self.n_hidden_layer = n_hidden_layer self.dwin = dwin self.n_out = n_out self.rectifier = Rectifier() """ self.convolution = Convolutional(filter_size=(1,self.filter_size),num_filters=self.num_filter,num_channels=1, weights_init=IsotropicGaussian(0.01), use_bias=False) """ # second dimension is of fixed size sum(vect_size) less the fiter_size borders self.mlp = MLP(activations=[Rectifier()] * len(self.n_hidden_layer) + [Identity()], dims=[self.n_out] + self.n_hidden_layer + [2], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.)) self.parameters = [] self.children = [] #self.children.append(self.lookup) #self.children.append(self.convolution) self.children.append(self.mlp) self.children.append(self.rectifier)
def build_mlp(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels): features = tensor.concatenate([ features_hascar, means['cp'][features_cp[:, 0]], means['dep'][features_cp[:, 1]] ], axis=1) mlp = MLP(activations=[Rectifier(), Rectifier(), None], dims=[5, 50, 50, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp') mlp.initialize() prediction = mlp.apply(features) cost = MAPECost().apply(labels, prediction) cg = ComputationGraph(cost) input_var = VariableFilter(roles=[INPUT])(cg.variables) print input_var cg_dropout1 = apply_dropout(cg, [input_var[3], input_var[5]], .4) cost_dropout1 = cg_dropout1.outputs[0] return prediction, cost_dropout1, cg_dropout1.parameters, cost
def test_pylearn2_trainin(): # Construct the model mlp = MLP(activations=[Sigmoid(), Sigmoid()], dims=[784, 100, 784], weights_init=IsotropicGaussian(), biases_init=Constant(0.01)) mlp.initialize() cost = SquaredError() block_cost = BlocksCost(cost) block_model = BlocksModel(mlp, (VectorSpace(dim=784), 'features')) # Load the data rng = numpy.random.RandomState(14) train_dataset = random_dense_design_matrix(rng, 1024, 784, 10) valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10) # Silence Pylearn2's logger logger = logging.getLogger(pylearn2.__name__) logger.setLevel(logging.ERROR) # Training algorithm sgd = SGD(learning_rate=0.01, cost=block_cost, batch_size=128, monitoring_dataset=valid_dataset) train = Train(train_dataset, block_model, algorithm=sgd) train.main_loop(time_budget=3)
def main(save_to, num_epochs): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(tensor.flatten(x, outdim=2)) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum() cost.name = 'final_cost' mnist_train = MNIST(("train", )) mnist_test = MNIST(("test", )) algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring([cost, error_rate], Flatten(DataStream.default_stream( mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), which_sources=('features', )), prefix="test"), TrainingDataMonitoring([ cost, error_rate, aggregation.mean(algorithm.total_gradient_norm) ], prefix="train", after_epoch=True), Checkpoint(save_to), Printing() ] if BLOCKS_EXTRAS_AVAILABLE: extensions.append( Plot('MNIST example', channels=[[ 'test_final_cost', 'test_misclassificationrate_apply_error_rate' ], ['train_total_gradient_norm']])) main_loop = MainLoop(algorithm, Flatten(DataStream.default_stream( mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), which_sources=('features', )), model=Model(cost), extensions=extensions) main_loop.run()
def create_base_model(self, x, y, input_dim, interim_dim=30): # Create the output of the MLP mlp = MLP([Tanh(), Tanh(), Tanh()], [input_dim, 60, 60, interim_dim], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) mlp.initialize() inter = mlp.apply(x) fine_tuner = MLP([Logistic()], [interim_dim, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) fine_tuner.initialize() probs = fine_tuner.apply(inter) #sq_err = BinaryCrossEntropy() err = T.sqr(y.flatten() - probs.flatten()) # cost = T.mean(err * y.flatten() * (1 - self.p) + err * # (1 - y.flatten()) * self.p) cost = T.mean(err) #cost = sq_err.apply(probs.flatten(), y.flatten()) # cost = T.mean(y.flatten() * T.log(probs.flatten()) + # (1 - y.flatten()) * T.log(1 - probs.flatten())) cost.name = 'cost' pred_out = probs > 0.5 mis_cost = T.sum(T.neq(y.flatten(), pred_out.flatten())) mis_cost.name = 'MisclassificationRate' return mlp, fine_tuner, cost, mis_cost
def create_model(self, x, y, input_dim, tol=10e-5): # Create the output of the MLP mlp = MLP( [Rectifier(), Rectifier(), Logistic()], [input_dim, 100, 100, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() probs = mlp.apply(x) y = y.dimshuffle(0, 'x') # Create the if-else cost function true_p = (T.sum(y * probs) + tol) * 1.0 / (T.sum(y) + tol) true_n = (T.sum((1 - y) * (1 - probs)) + tol) * \ 1.0 / (T.sum(1 - y) + tol) #p = (T.sum(y) + tol) / (y.shape[0] + tol) theta = (1 - self.p) / self.p numerator = (1 + self.beta**2) * true_p denominator = self.beta**2 + theta + true_p - theta * true_n Fscore = numerator / denominator cost = -1 * Fscore cost.name = "cost" return mlp, cost, probs
def __init__(self, config, **kwargs): super(Model, self).__init__(**kwargs) self.config = config self.pre_context_embedder = ContextEmbedder( config.pre_embedder, name='pre_context_embedder') self.post_context_embedder = ContextEmbedder( config.post_embedder, name='post_context_embedder') in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings) self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec') self.rec = LSTM(dim=config.hidden_state_dim, name='recurrent') in2 = config.hidden_state_dim + sum( x[2] for x in config.post_embedder.dim_embeddings) self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output') self.sequences = ['latitude', 'latitude_mask', 'longitude'] self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs self.inputs = self.sequences + self.context self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ] self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim, ), name="initial_state") self.initial_cells = shared_floatx_zeros((config.hidden_state_dim, ), name="initial_cells")
def test_pylearn2_training(): # Construct the model mlp = MLP(activations=[Sigmoid(), Sigmoid()], dims=[784, 100, 784], weights_init=IsotropicGaussian(), biases_init=Constant(0.01)) mlp.initialize() cost = SquaredError() # Load the data rng = numpy.random.RandomState(14) train_dataset = random_dense_design_matrix(rng, 1024, 784, 10) valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10) x = tensor.matrix('features') block_cost = Pylearn2Cost(cost.apply(x, mlp.apply(x))) block_model = Pylearn2Model(mlp) # Silence Pylearn2's logger logger = logging.getLogger(pylearn2.__name__) logger.setLevel(logging.ERROR) # Training algorithm sgd = SGD(learning_rate=0.01, cost=block_cost, batch_size=128, monitoring_dataset=valid_dataset) train = Pylearn2Train(train_dataset, block_model, algorithm=sgd) train.main_loop(time_budget=3)
def build_mlp(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels): prediction, _, _, _, = \ build_mlp_onlyloc(features_car_cat, features_car_int, features_nocar_cat, features_nocar_int, features_cp, features_hascar, means, labels) mlp_crm = MLP(activations=[None], dims=[1, 1], weights_init=IsotropicGaussian(.1), biases_init=Constant(0), name='mlp_crm') mlp_crm.initialize() crm = features_nocar_int[:, 0][:, None] prediction = prediction * mlp_crm.apply(crm) cost = MAPECost().apply(labels, prediction) cg = ComputationGraph(cost) input_var = VariableFilter(roles=[INPUT])(cg.variables) print input_var cg_dropout = apply_dropout(cg, [input_var[7], input_var[5]], .4) cost_dropout = cg_dropout.outputs[0] return prediction, cost_dropout, cg_dropout.parameters, cost
def __init__(self, mlp, frame_size = 401, k = 20, const=1e-5, **kwargs): super(SPF0Emitter, self).__init__(**kwargs) self.mlp = mlp input_dim = self.mlp.output_dim self.const = const self.frame_size = frame_size mlp_gmm = GMMMLP(mlp = mlp, dim = (frame_size-2)*k, k = k, const = const) self.gmm_emitter = GMMEmitter( gmmmlp = mlp_gmm, output_size = frame_size-2, k = k, name = "gmm_emitter" ) self.mu = MLP(activations=[Identity()], dims=[input_dim, 1], name=self.name + "_mu") self.sigma = MLP(activations=[SoftPlus()], dims=[input_dim, 1], name=self.name + "_sigma") self.binary = MLP(activations=[Logistic()], dims=[input_dim, 1], name=self.name + "_binary") self.children = [self.mlp, self.mu, self.sigma, self.binary, self.gmm_emitter]
def main(save_to, num_batches, continue_=False): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent(cost=cost, params=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=([LoadFromDump(save_to)] if continue_ else []) + [ Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Dump(save_to), Printing() ]) main_loop.run() return main_loop
def build_mlp(features_int, features_cat, labels, labels_mean): inputs = tensor.concatenate([features_int, features_cat], axis=1) mlp = MLP(activations=[Rectifier(), Rectifier(), Rectifier(), None], dims=[337, 800, 1200, 1], weights_init=IsotropicGaussian(), biases_init=Constant(1)) mlp.initialize() prediction = mlp.apply(inputs) cost = MAPECost().apply(prediction, labels, labels_mean) cg = ComputationGraph(cost) #cg_dropout0 = apply_dropout(cg, [VariableFilter(roles=[INPUT])(cg.variables)[1]], .2) cg_dropout1 = apply_dropout(cg, [ VariableFilter(roles=[OUTPUT])(cg.variables)[1], VariableFilter(roles=[OUTPUT])(cg.variables)[3], VariableFilter(roles=[OUTPUT])(cg.variables)[5] ], .2) cost_dropout1 = cg_dropout1.outputs[0] return cost_dropout1, cg_dropout1.parameters, cost #cost, cg.parameters, cost #
def construct_model(input_dim, output_dim): # Construct the model r = tensor.fmatrix('r') x = tensor.fmatrix('x') y = tensor.ivector('y') # input_dim must be nr mlp = MLP(activations=activation_functions, dims=[input_dim] + hidden_dims + [2]) weights = mlp.apply(r) final = tensor.dot(x, weights) cost = Softmax().categorical_cross_entropy(y, final).mean() pred = final.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in [mlp]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply noise cg = ComputationGraph([cost, error_rate]) noise_vars = VariableFilter(roles=[WEIGHT])(cg) apply_noise(cg, noise_vars, noise_std) [cost, error_rate] = cg.outputs return cost, error_rate
def main(save_to, num_batches, continue_=False): mlp = MLP([Tanh(), Identity()], [1, 10, 1], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), seed=1) mlp.initialize() x = tensor.vector('numbers') y = tensor.vector('roots') cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None])) cost.name = "cost" main_loop = MainLoop( GradientDescent( cost=cost, params=ComputationGraph(cost).parameters, step_rule=Scale(learning_rate=0.001)), get_data_stream(range(100)), model=Model(cost), extensions=([LoadFromDump(save_to)] if continue_ else []) + [Timing(), FinishAfter(after_n_batches=num_batches), DataStreamMonitoring( [cost], get_data_stream(range(100, 200)), prefix="test"), TrainingDataMonitoring([cost], after_epoch=True), Dump(save_to), Printing()]) main_loop.run() return main_loop
def test_mlp_use_bias_pushed_when_not_explicitly_specified(): mlp = MLP(activations=[Tanh(), Tanh(), None], dims=[4, 5, 6, 7], prototype=Linear(use_bias=False), use_bias=True) mlp.push_allocation_config() assert [lin.use_bias for lin in mlp.linear_transformations]
def build_model_mnist(): # CNN filter_size = (5, 5) activation = Rectifier().apply pooling_size = (2, 2) num_filters = 50 layer0 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, pooling_size=pooling_size, weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_0") filter_size = (3, 3) activation = Rectifier().apply num_filters = 20 layer1 = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, pooling_size=pooling_size, weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_1") conv_layers = [layer0, layer1] convnet = ConvolutionalSequence(conv_layers, num_channels= 1, image_size=(28, 28)) convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) mlp = MLP(activations=[Identity()], dims=[output_dim, 10], weights_init=Uniform(width=0.1), biases_init=Uniform(width=0.01), name="layer_2") mlp.initialize() classifier = Classifier(convnet, mlp) classifier.initialize() return classifier
def __init__(self, attended_dim, **kwargs): super(GRUInitialState, self).__init__(**kwargs) self.attended_dim = attended_dim self.initial_transformer = MLP(activations=[Tanh()], dims=[attended_dim, self.dim], name='state_initializer') self.children.append(self.initial_transformer)
def __init__(self, config, **kwargs): super(Model, self).__init__(**kwargs) self.config = config self.pre_context_embedder = ContextEmbedder(config.pre_embedder, name='pre_context_embedder') self.post_context_embedder = ContextEmbedder(config.post_embedder, name='post_context_embedder') in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings) self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec') self.rec = LSTM( dim = config.hidden_state_dim, name = 'recurrent' ) in2 = config.hidden_state_dim + sum(x[2] for x in config.post_embedder.dim_embeddings) self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output') self.sequences = ['latitude', 'latitude_mask', 'longitude'] self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs self.inputs = self.sequences + self.context self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ] self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim,), name="initial_state") self.initial_cells = shared_floatx_zeros((config.hidden_state_dim,), name="initial_cells")
class AttentionReader(Initializable): def __init__(self, x_dim, dec_dim, channels, height, width, N, **kwargs): super(AttentionReader, self).__init__(name="reader", **kwargs) self.img_height = height self.img_width = width self.N = N self.x_dim = x_dim self.dec_dim = dec_dim self.output_dim = 2*channels*N*N self.zoomer = ZoomableAttentionWindow(channels, height, width, N) self.readout = MLP(activations=[Identity()], dims=[dec_dim, 5], **kwargs) self.children = [self.readout] def get_dim(self, name): if name == 'input': return self.dec_dim elif name == 'x_dim': return self.x_dim elif name == 'output': return self.output_dim else: raise ValueError @application(inputs=['x', 'x_hat', 'h_dec'], outputs=['r']) def apply(self, x, x_hat, h_dec): l = self.readout.apply(h_dec) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) w = gamma * self.zoomer.read(x , center_y, center_x, delta, sigma) w_hat = gamma * self.zoomer.read(x_hat, center_y, center_x, delta, sigma) return T.concatenate([w, w_hat], axis=1) @application(inputs=['x', 'x_hat', 'h_dec'], outputs=['r','center_y', 'center_x', 'delta']) def apply_detailed(self, x, x_hat, h_dec): l = self.readout.apply(h_dec) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) w = gamma * self.zoomer.read(x , center_y, center_x, delta, sigma) w_hat = gamma * self.zoomer.read(x_hat, center_y, center_x, delta, sigma) r = T.concatenate([w, w_hat], axis=1) return r, center_y, center_x, delta @application(inputs=['x', 'h_dec'], outputs=['r','center_y', 'center_x', 'delta']) def apply_simple(self, x, h_dec): l = self.readout.apply(h_dec) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) r = gamma * self.zoomer.read(x , center_y, center_x, delta, sigma) return r, center_y, center_x, delta
def setup_model(configs): tensor5 = theano.tensor.TensorType(config.floatX, (False,) * 5) # shape: T x B x C x X x Y input_ = tensor5('features') # shape: B x Classes target = T.lmatrix('targets') model = LSTMAttention( configs, weights_init=Glorot(), biases_init=Constant(0)) model.initialize() (h, c, location, scale, patch, downn_sampled_input, conved_part_1, conved_part_2, pre_lstm) = model.apply(input_) classifier = MLP( [Rectifier(), Logistic()], configs['classifier_dims'], weights_init=Glorot(), biases_init=Constant(0)) classifier.initialize() probabilities = classifier.apply(h[-1]) cost = BinaryCrossEntropy().apply(target, probabilities) cost.name = 'CE' error_rate = MisclassificationRate().apply(target, probabilities) error_rate.name = 'ER' model.cost = cost if configs['load_pretrained']: blocks_model = Model(model.cost) all_params = blocks_model.parameters with open('VGG_CNN_params.npz') as f: loaded = np.load(f) all_conv_params = loaded.keys() for param in all_params: if param.name in loaded.keys(): assert param.get_value().shape == loaded[param.name].shape param.set_value(loaded[param.name]) all_conv_params.pop(all_conv_params.index(param.name)) print "the following parameters did not match: " + str(all_conv_params) if configs['test_model']: cg = ComputationGraph(model.cost) f = theano.function(cg.inputs, [model.cost], on_unused_input='ignore', allow_input_downcast=True) data = np.random.randn(10, 40, 3, 224, 224) targs = np.random.randn(40, 101) f(data, targs) print "Test passed! ;)" model.monitorings = [cost, error_rate] return model
def test_snapshot(): x = tensor.matrix('x') linear = MLP([Identity(), Identity()], [10, 10, 10], weights_init=Constant(1), biases_init=Constant(2)) linear.initialize() y = linear.apply(x) cg = ComputationGraph(y) snapshot = cg.get_snapshot(dict(x=numpy.zeros((1, 10), dtype=floatX))) assert len(snapshot) == 14
def test_extract_parameter_values(): mlp = MLP([Identity(), Identity()], [10, 20, 10]) mlp.allocate() param_values = extract_parameter_values(mlp) assert len(param_values) == 4 assert isinstance(param_values['/mlp/linear_0.W'], numpy.ndarray) assert isinstance(param_values['/mlp/linear_0.b'], numpy.ndarray) assert isinstance(param_values['/mlp/linear_1.W'], numpy.ndarray) assert isinstance(param_values['/mlp/linear_1.b'], numpy.ndarray)
class MLP_conv_dense(Initializable): def __init__(self, n_layers_conv, n_layers_dense_lower, n_layers_dense_upper, n_hidden_conv, n_hidden_dense_lower, n_hidden_dense_lower_output, n_hidden_dense_upper, spatial_width, n_colors, n_temporal_basis): """ The multilayer perceptron, that provides temporal weighting coefficients for mu and sigma images. This consists of a lower segment with a convolutional MLP, and optionally with a dense MLP in parallel. The upper segment then consists of a per-pixel dense MLP (convolutional MLP with 1x1 kernel). """ super(MLP_conv_dense, self).__init__() self.n_colors = n_colors self.spatial_width = spatial_width self.n_hidden_dense_lower = n_hidden_dense_lower self.n_hidden_dense_lower_output = n_hidden_dense_lower_output self.n_hidden_conv = n_hidden_conv ## the lower layers self.mlp_conv = MultiLayerConvolution(n_layers_conv, n_hidden_conv, spatial_width, n_colors) self.children = [self.mlp_conv] if n_hidden_dense_lower > 0 and n_layers_dense_lower > 0: n_input = n_colors*spatial_width**2 n_output = n_hidden_dense_lower_output*spatial_width**2 self.mlp_dense_lower = MLP([dense_nonlinearity] * n_layers_conv, [n_input] + [n_hidden_dense_lower] * (n_layers_conv-1) + [n_output], name='MLP dense lower', weights_init=Orthogonal(), biases_init=Constant(0)) self.children.append(self.mlp_dense_lower) else: n_hidden_dense_lower_output = 0 ## the upper layers (applied to each pixel independently) n_output = n_colors*n_temporal_basis*2 # "*2" for both mu and sigma self.mlp_dense_upper = MLP([dense_nonlinearity] * (n_layers_dense_upper-1) + [Identity()], [n_hidden_conv+n_hidden_dense_lower_output] + [n_hidden_dense_upper] * (n_layers_dense_upper-1) + [n_output], name='MLP dense upper', weights_init=Orthogonal(), biases_init=Constant(0)) self.children.append(self.mlp_dense_upper) @application def apply(self, X): """ Take in noisy input image and output temporal coefficients for mu and sigma. """ Y = self.mlp_conv.apply(X) Y = Y.dimshuffle(0,2,3,1) if self.n_hidden_dense_lower > 0: n_images = X.shape[0] X = X.reshape((n_images, self.n_colors*self.spatial_width**2)) Y_dense = self.mlp_dense_lower.apply(X) Y_dense = Y_dense.reshape((n_images, self.spatial_width, self.spatial_width, self.n_hidden_dense_lower_output)) Y = T.concatenate([Y/T.sqrt(self.n_hidden_conv), Y_dense/T.sqrt(self.n_hidden_dense_lower_output)], axis=3) Z = self.mlp_dense_upper.apply(Y) return Z
def create_model(self): x = self.x input_dim = self.input_dim mlp = MLP([Logistic(), Logistic(), Tanh()], [input_dim, 100, 100, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0)) mlp.initialize() self.mlp = mlp probs = mlp.apply(x) return probs
def test_inject_parameter_values(): mlp = MLP([Identity()], [10, 10]) mlp.allocate() param_values = { '/mlp/linear_0.W': 2 * numpy.ones((10, 10), dtype=floatX), '/mlp/linear_0.b': 3 * numpy.ones(10, dtype=floatX) } inject_parameter_values(mlp, param_values) assert numpy.all(mlp.linear_transformations[0].params[0].get_value() == 2) assert numpy.all(mlp.linear_transformations[0].params[1].get_value() == 3)
def test_fully_layer(): batch_size=2 x = T.tensor4(); y = T.ivector() V = 200 layer_conv = Convolutional(filter_size=(5,5),num_filters=V, name="toto", weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) # try with no bias activation = Rectifier() pool = MaxPooling(pooling_size=(2,2)) convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15, image_size=(10,10), name="conv_section") convnet.push_allocation_config() convnet.initialize() output=convnet.apply(x) batch_size=output.shape[0] output_dim=np.prod(convnet.get_dim('output')) result_conv = output.reshape((batch_size, output_dim)) mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) mlp.initialize() output=mlp.apply(result_conv) cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output)) cg = ComputationGraph(cost) W = VariableFilter(roles=[WEIGHT])(cg.variables) B = VariableFilter(roles=[BIAS])(cg.variables) W = W[0]; b = B[0] inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg) outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg) var_input=inputs_fully[0] var_output=outputs_fully[0] [d_W,d_S,d_b] = T.grad(cost, [W, var_output, b]) d_b = d_b.dimshuffle(('x',0)) d_p = T.concatenate([d_W, d_b], axis=0) x_value = 1e3*np.random.ranf((2,15, 10, 10)) f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore') A, B, C= f(x_value, [5, 0]) A = np.concatenate([A, np.ones((2,1))], axis=1) print 'A', A.shape print 'B', B.shape print 'C', C.shape print lin.norm(C - np.dot(np.transpose(A), B), 'fro') return """
def create_model(self): input_dim = self.input_dim x = self.x y = self.y p = self.p mask = self.mask hidden_dim = self.hidden_dim embedding_dim = self.embedding_dim lookup = LookupTable(self.dict_size, embedding_dim, weights_init=IsotropicGaussian(0.001), name='LookupTable') x_to_h = Linear(embedding_dim, hidden_dim * 4, name='x_to_h', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) lstm = LSTM(hidden_dim, name='lstm', weights_init=IsotropicGaussian(0.001), biases_init=Constant(0.0)) h_to_o = MLP([Logistic()], [hidden_dim, 1], weights_init=IsotropicGaussian(0.001), biases_init=Constant(0), name='h_to_o') lookup.initialize() x_to_h.initialize() lstm.initialize() h_to_o.initialize() embed = lookup.apply(x).reshape( (x.shape[0], x.shape[1], self.embedding_dim)) embed.name = "embed_vec" x_transform = x_to_h.apply(embed.transpose(1, 0, 2)) x_transform.name = "Transformed X" self.lookup = lookup self.x_to_h = x_to_h self.lstm = lstm self.h_to_o = h_to_o #if mask is None: h, c = lstm.apply(x_transform) #else: #h, c = lstm.apply(x_transform, mask=mask) h.name = "hidden_state" c.name = "cell state" # only values of hidden units of the last timeframe are used for # the classification indices = T.sum(mask, axis=0) - 1 rel_hid = h[indices, T.arange(h.shape[1])] out = self.h_to_o.apply(rel_hid) probs = out return probs
def main(save_to, num_epochs, bokeh=False): mlp = MLP([Tanh(), Softmax()], [784, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') y = tensor.lmatrix('targets') probs = mlp.apply(x) cost = CategoricalCrossEntropy().apply(y.flatten(), probs) error_rate = MisclassificationRate().apply(y.flatten(), probs) cg = ComputationGraph([cost]) W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables) cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum() cost.name = 'final_cost' mnist_train = MNIST("train") mnist_test = MNIST("test") algorithm = GradientDescent( cost=cost, params=cg.parameters, step_rule=Scale(learning_rate=0.1)) extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs), DataStreamMonitoring( [cost, error_rate], DataStream(mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, 500)), prefix="test"), TrainingDataMonitoring( [cost, error_rate, aggregation.mean(algorithm.total_gradient_norm)], prefix="train", after_epoch=True), Checkpoint(save_to), Printing()] if bokeh: extensions.append(Plot( 'MNIST example', channels=[ ['test_final_cost', 'test_misclassificationrate_apply_error_rate'], ['train_total_gradient_norm']])) main_loop = MainLoop( algorithm, DataStream(mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, 50)), model=Model(cost), extensions=extensions) main_loop.run()
def _build_bricks(self, *args, **kwargs): # Build lookup tables self.word_embed = self._embed(len(self.dataset.word2index), self.config.word_embed_dim, name='word_embed') self.hashtag_embed = self._embed(len(self.dataset.hashtag2index), self.config.lstm_dim, name='hashtag_embed') # Build text encoder self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim, output_dim=4 * self.config.lstm_dim, name='mlstm_in') self.mlstm_ins.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim)) self.mlstm_ins.biases_init = Constant(0) self.mlstm_ins.initialize() self.mlstm = MLSTM(self.config.lstm_time, self.config.lstm_dim, shared=False) self.mlstm.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim)) self.mlstm.biases_init = Constant(0) self.mlstm.initialize() self.hashtag2word = MLP( activations=[Tanh('hashtag2word_tanh')], dims=[self.config.lstm_dim, self.config.word_embed_dim], name='hashtag2word_mlp') self.hashtag2word.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.hashtag2word.biases_init = Constant(0) self.hashtag2word.initialize() self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias') self.hashtag2word_bias.biases_init = Constant(0) self.hashtag2word_bias.initialize() #Build character embedding self.char_embed = self._embed(len(self.dataset.char2index), self.config.char_embed_dim, name='char_embed') # Build sparse word encoder self.rnn_ins = Linear(input_dim=self.config.char_embed_dim, output_dim=self.config.word_embed_dim, name='rnn_in') self.rnn_ins.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim + self.config.word_embed_dim)) self.rnn_ins.biases_init = Constant(0) self.rnn_ins.initialize() self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim, activation=Tanh()) self.rnn.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.rnn.initialize()
def __init__(self, input_dim, hidden_dim, **kwargs): super(VariationalAutoEncoder, self).__init__(**kwargs) encoder_mlp = MLP([Sigmoid(), Identity()], [input_dim, 101, None]) decoder_mlp = MLP([Sigmoid(), Sigmoid()], [hidden_dim, 101, input_dim]) self.hidden_dim = hidden_dim self.encoder = VAEEncoder(encoder_mlp, hidden_dim) self.decoder = VAEDecoder(decoder_mlp) self.children = [self.encoder, self.decoder]
def test_serialization(): # Create a simple brick with two parameters mlp = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[1].W W.set_value(W.get_value() * 2) # Check the data using numpy.load with NamedTemporaryFile(delete=False) as f: dump(mlp, f) numpy_data = numpy.load(f.name) assert set(numpy_data.keys()) == \ set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl']) assert_allclose(numpy_data['mlp-linear_0.W'], numpy.ones((10, 10))) assert numpy_data['mlp-linear_0.W'].dtype == theano.config.floatX # Ensure that it can be unpickled mlp = load(f.name) assert_allclose(mlp.linear_transformations[1].W.get_value(), numpy.ones((10, 10)) * 2) # Ensure that only parameters are saved as NPY files mlp.random_data = numpy.random.rand(10) with NamedTemporaryFile(delete=False) as f: dump(mlp, f) numpy_data = numpy.load(f.name) assert set(numpy_data.keys()) == \ set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl']) # Ensure that parameters can be loaded with correct names parameter_values = load_parameter_values(f.name) assert set(parameter_values.keys()) == \ set(['/mlp/linear_0.W', '/mlp/linear_1.W']) # Ensure that duplicate names are dealt with for child in mlp.children: child.name = 'linear' with NamedTemporaryFile(delete=False) as f: dump(mlp, f) numpy_data = numpy.load(f.name) assert set(numpy_data.keys()) == \ set(['mlp-linear.W', 'mlp-linear.W_2', 'pkl']) # Ensure warnings are raised when __main__ namespace objects are dumped foo.__module__ = '__main__' import __main__ __main__.__dict__['foo'] = foo mlp.foo = foo with NamedTemporaryFile(delete=False) as f: with warnings.catch_warnings(record=True) as w: dump(mlp, f) assert len(w) == 1 assert '__main__' in str(w[-1].message)
def __init__(self, emb_dim, dim, dropout=0.0, def_word_gating="none", dropout_type="per_unit", compose_type="sum", word_dropout_weighting="no_weighting", shortcut_unk_and_excluded=False, num_input_words=-1, exclude_top_k=-1, vocab=None, **kwargs): self._dropout = dropout self._num_input_words = num_input_words self._exclude_top_K = exclude_top_k self._dropout_type = dropout_type self._compose_type = compose_type self._vocab = vocab self._shortcut_unk_and_excluded = shortcut_unk_and_excluded self._word_dropout_weighting = word_dropout_weighting self._def_word_gating = def_word_gating if def_word_gating not in {"none", "self_attention"}: raise NotImplementedError() if word_dropout_weighting not in {"no_weighting"}: raise NotImplementedError("Not implemented " + word_dropout_weighting) if dropout_type not in {"per_unit", "per_example", "per_word"}: raise NotImplementedError() children = [] if self._def_word_gating=="self_attention": self._gate_mlp = Linear(dim, dim) self._gate_act = Logistic() children.extend([self._gate_mlp, self._gate_act]) if compose_type == 'fully_connected_linear': self._def_state_compose = MLP(activations=[None], dims=[emb_dim + dim, emb_dim]) children.append(self._def_state_compose) if compose_type == "gated_sum" or compose_type == "gated_transform_and_sum": if dropout_type == "per_word" or dropout_type == "per_example": raise RuntimeError("I dont think this combination makes much sense") self._compose_gate_mlp = Linear(dim + emb_dim, emb_dim, name='gate_linear') self._compose_gate_act = Logistic() children.extend([self._compose_gate_mlp, self._compose_gate_act]) if compose_type == 'sum': if not emb_dim == dim: raise ValueError("Embedding has different dim! Cannot use compose_type='sum'") if compose_type == 'transform_and_sum' or compose_type == "gated_transform_and_sum": self._def_state_transform = Linear(dim, emb_dim, name='state_transform') children.append(self._def_state_transform) super(MeanPoolCombiner, self).__init__(children=children, **kwargs)
def __init__(self, representation_dim, representation_name='initial_state_representation', **kwargs): super(GRUSpecialInitialState, self).__init__(**kwargs) self.representation_dim = representation_dim self.representation_name = representation_name self.initial_transformer = MLP(activations=[Tanh()], dims=[representation_dim, self.dim], name='state_initializer') self.children.append(self.initial_transformer)
def __init__(self, attended_dim, context_dim, **kwargs): super(GRUInitialStateWithInitialStateConcatContext, self).__init__(**kwargs) self.attended_dim = attended_dim self.context_dim = context_dim self.initial_transformer = MLP( activations=[Tanh(), Tanh(), Tanh()], dims=[attended_dim + context_dim, 1000, 500, self.dim], name='state_initializer') self.children.append(self.initial_transformer)
def build_model(self, hidden_dim): board_input = T.vector('input') mlp = MLP(activations=[LeakyRectifier(0.1), LeakyRectifier(0.1)], dims=[9, hidden_dim, 9], weights_init=IsotropicGaussian(0.00001), biases_init=Constant(0.01)) output = mlp.apply(board_input) masked_output = Softmax().apply(output * T.eq(board_input, 0) * 1000) mlp.initialize() cost, chosen = self.get_cost(masked_output) return board_input, mlp, cost, chosen, output
def __init__(self, attended_dim, **kwargs): super(LSTM2GO, self).__init__(**kwargs) self.attended_dim = attended_dim self.initial_transformer_s = MLP(activations=[Tanh()], dims=[attended_dim, self.dim], name='state_initializer') self.children.append(self.initial_transformer_s) self.initial_transformer_c = MLP(activations=[Tanh()], dims=[attended_dim, self.dim], name='cell_initializer') self.children.append(self.initial_transformer_c)
def prior_network(x, n_input, hu_encoder, n_latent): logger.info('In prior_network: n_input: %d, hu_encoder: %d', n_input, hu_encoder) mlp1 = MLP(activations=[Rectifier()], dims=[n_input, hu_encoder], name='prior_in_to_hidEncoder') initialize([mlp1]) h_encoder = mlp1.apply(x) h_encoder = debug_print(h_encoder, 'h_encoder', False) lin1 = Linear(name='prior_hiddEncoder_to_latent_mu', input_dim=hu_encoder, output_dim=n_latent) lin2 = Linear(name='prior_hiddEncoder_to_latent_sigma', input_dim=hu_encoder, output_dim=n_latent) initialize([lin1]) initialize([lin2], rndstd=0.001) mu = lin1.apply(h_encoder) log_sigma = lin2.apply(h_encoder) return mu, log_sigma
def apply(self, input_, target): mlp = MLP(self.non_lins, self.dims, weights_init=IsotropicGaussian(0.01), biases_init=Constant(0), name=self.name) mlp.initialize() probs = mlp.apply(T.flatten(input_, outdim=2)) probs.name = 'probs' cost = CategoricalCrossEntropy().apply(target.flatten(), probs) cost.name = "CE" self.outputs = {} self.outputs['probs'] = probs self.outputs['cost'] = cost
def test_serialization(): # Create a simple MLP to dump. mlp = MLP(activations=[None, None], dims=[10, 10, 10], weights_init=Constant(1.), use_bias=False) mlp.initialize() W = mlp.linear_transformations[1].W W.set_value(W.get_value() * 2) # Ensure warnings are raised when __main__ namespace objects are dumped. foo.__module__ = '__main__' import __main__ __main__.__dict__['foo'] = foo mlp.foo = foo with NamedTemporaryFile(delete=False) as f: with warnings.catch_warnings(record=True) as w: dump(mlp.foo, f) assert len(w) == 1 assert '__main__' in str(w[-1].message) # Check the parameters. with NamedTemporaryFile(delete=False) as f: dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with open(f.name, 'rb') as ff: numpy_data = load_parameters(ff) assert set(numpy_data.keys()) == \ set(['/mlp/linear_0.W', '/mlp/linear_1.W']) assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10))) assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX # Ensure that it can be unpickled. with open(f.name, 'rb') as ff: mlp = load(ff) assert_allclose(mlp.linear_transformations[1].W.get_value(), numpy.ones((10, 10)) * 2) # Ensure that duplicate names are dealt with. for child in mlp.children: child.name = 'linear' with NamedTemporaryFile(delete=False) as f: dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with open(f.name, 'rb') as ff: numpy_data = load_parameters(ff) assert set(numpy_data.keys()) == \ set(['/mlp/linear.W', '/mlp/linear.W_2']) # Check when we don't dump the main object. with NamedTemporaryFile(delete=False) as f: dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W]) with tarfile.open(f.name, 'r') as tarball: assert set(tarball.getnames()) == set(['_parameters'])
def construct_model(input_dim, output_dim): # Construct the model r = tensor.fmatrix('r') x = tensor.fmatrix('x') y = tensor.ivector('y') nx = x.shape[0] nj = x.shape[1] # also is r.shape[0] nr = r.shape[1] # r is nj x nr # x is nx x nj # y is nx x 1 # r_rep is nx x nj x nr r_rep = r[None, :, :].repeat(axis=0, repeats=nx) # x3 is nx x nj x 1 x3 = x[:, :, None] # concat is nx x nj x (nr + 1) concat = tensor.concatenate([r_rep, x3], axis=2) mlp_input = concat.reshape((nx * nj, nr + 1)) # input_dim must be nr mlp = MLP(activations=activation_functions, dims=[input_dim+1] + hidden_dims + [output_dim]) activations = mlp.apply(mlp_input) act_sh = activations.reshape((nx, nj, output_dim)) final = act_sh.mean(axis=1) cost = Softmax().categorical_cross_entropy(y, final).mean() pred = final.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in [mlp]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply noise cg = ComputationGraph([cost, error_rate]) noise_vars = VariableFilter(roles=[WEIGHT])(cg) apply_noise(cg, noise_vars, noise_std) [cost_reg, error_rate_reg] = cg.outputs return cost_reg, error_rate_reg, cost, error_rate
def __init__(self, state_names, state_dims, sequence_dim, match_dim, state_transformer=None, sequence_transformer=None, energy_computer=None, weights_init=None, biases_init=None, **kwargs): super(SequenceContentAttention, self).__init__(**kwargs) update_instance(self, locals()) self.state_transformers = Parallel(state_names, self.state_transformer, name="state_trans") if not self.sequence_transformer: self.sequence_transformer = MLP([Identity()], name="seq_trans") if not self.energy_computer: self.energy_computer = MLP([Identity()], name="energy_comp") self.children = [self.state_transformers, self.sequence_transformer, self.energy_computer]
def __init__(self, config, **kwargs): super(Model, self).__init__(config, **kwargs) self.dest_mlp = MLP(activations=[Rectifier() for _ in config.dim_hidden_dest] + [Softmax()], dims=[config.dim_hidden[-1]] + config.dim_hidden_dest + [config.dim_output_dest], name='dest_mlp') self.time_mlp = MLP(activations=[Rectifier() for _ in config.dim_hidden_time] + [Softmax()], dims=[config.dim_hidden[-1]] + config.dim_hidden_time + [config.dim_output_time], name='time_mlp') self.dest_classes = theano.shared(numpy.array(config.dest_tgtcls, dtype=theano.config.floatX), name='dest_classes') self.time_classes = theano.shared(numpy.array(config.time_tgtcls, dtype=theano.config.floatX), name='time_classes') self.inputs.append('input_time') self.children.extend([self.dest_mlp, self.time_mlp])
def create_model(): """Create the deep autoencoder model with Blocks, and load MNIST.""" mlp = MLP(activations=[Logistic(), Logistic(), Logistic(), None, Logistic(), Logistic(), Logistic(), Logistic()], dims=[784, 1000, 500, 250, 30, 250, 500, 1000, 784], weights_init=Sparse(15, IsotropicGaussian()), biases_init=Constant(0)) mlp.initialize() x = tensor.matrix('features') x_hat = mlp.apply(tensor.flatten(x, outdim=2)) squared_err = SquaredError().apply(tensor.flatten(x, outdim=2), x_hat) cost = BinaryCrossEntropy().apply(tensor.flatten(x, outdim=2), x_hat) return x, cost, squared_err
def setup_model(): # shape: T x B x F input_ = T.tensor3('features') # shape: B target = T.lvector('targets') model = LSTMAttention(dim=256, mlp_hidden_dims=[256, 4], batch_size=100, image_shape=(64, 64), patch_shape=(16, 16), weights_init=Glorot(), biases_init=Constant(0)) model.initialize() h, c, location, scale = model.apply(input_) classifier = MLP([Rectifier(), Softmax()], [256 * 2, 200, 10], weights_init=Glorot(), biases_init=Constant(0)) model.h = h model.c = c model.location = location model.scale = scale classifier.initialize() probabilities = classifier.apply(T.concatenate([h[-1], c[-1]], axis=1)) cost = CategoricalCrossEntropy().apply(target, probabilities) error_rate = MisclassificationRate().apply(target, probabilities) model.cost = cost location_x_0_avg = T.mean(location[0, :, 0]) location_x_0_avg.name = 'location_x_0_avg' location_x_10_avg = T.mean(location[10, :, 0]) location_x_10_avg.name = 'location_x_10_avg' location_x_20_avg = T.mean(location[-1, :, 0]) location_x_20_avg.name = 'location_x_20_avg' scale_x_0_avg = T.mean(scale[0, :, 0]) scale_x_0_avg.name = 'scale_x_0_avg' scale_x_10_avg = T.mean(scale[10, :, 0]) scale_x_10_avg.name = 'scale_x_10_avg' scale_x_20_avg = T.mean(scale[-1, :, 0]) scale_x_20_avg.name = 'scale_x_20_avg' monitorings = [error_rate, location_x_0_avg, location_x_10_avg, location_x_20_avg, scale_x_0_avg, scale_x_10_avg, scale_x_20_avg] model.monitorings = monitorings return model
def __init__( self, input_dim, h0_dim, s0_dim, h1_dim, output_dim, ): super(SeqToSeqLSTM, self).__init__() self.h0__input = MLP( [Tanh()], dims=[ input_dim, h0_dim ], weights_init=init.IsotropicGaussian(0.01), biases_init=init.IsotropicGaussian(0.3), name='MLP:h0__input' ) self.s0__h0_input = LSTMLayer( input_dim=h0_dim + input_dim, state_dim=s0_dim, name='LSTMLayer:s0__h0_input' ) self.h1__s0_h0_input = MLP( [Tanh()], dims=[ s0_dim + h0_dim + input_dim, h1_dim ], weights_init=init.IsotropicGaussian(0.01), biases_init=init.Constant(0.0), name='MLP:h1__s0_h0_input' ) self.output__h1_s0_h0_input = Linear( input_dim=h1_dim + s0_dim + h0_dim + input_dim, output_dim=output_dim, weights_init=init.IsotropicGaussian(0.01), biases_init=init.Constant(0.0), name='Linear:output__h1_s0_h0_input' ) self.children = [ self.h0__input, self.s0__h0_input, self.h1__s0_h0_input, self.output__h1_s0_h0_input ]
class DGSRNN(BaseRecurrent, Initializable): def __init__(self, input_dim, state_dim, act, transition_h, tr_h_activations, **kwargs): super(DGSRNN, self).__init__(**kwargs) self.input_dim = input_dim self.state_dim = state_dim logistic = Logistic() self.inter = MLP(dims=[input_dim + state_dim] + transition_h, activations=tr_h_activations, name='inter') self.reset = MLP(dims=[transition_h[-1], state_dim], activations=[logistic], name='reset') self.update = MLP(dims=[transition_h[-1], state_dim], activations=[act], name='update') self.children = [self.inter, self.reset, self.update, logistic, act] + tr_h_activations # init state self.params = [shared_floatx_zeros((state_dim,), name='init_state')] add_role(self.params[0], INITIAL_STATE) def get_dim(self, name): if name == 'state': return self.state_dim return super(GFGRU, self).get_dim(name) @recurrent(sequences=['inputs', 'drop_updates_mask'], states=['state'], outputs=['state', 'reset'], contexts=[]) def apply(self, inputs=None, drop_updates_mask=None, state=None): inter_v = self.inter.apply(tensor.concatenate([inputs, state], axis=1)) reset_v = self.reset.apply(inter_v) update_v = self.update.apply(inter_v) reset_v = reset_v * drop_updates_mask new_state = state * (1 - reset_v) + reset_v * update_v return new_state, reset_v @application def initial_state(self, state_name, batch_size, *args, **kwargs): return tensor.repeat(self.params[0][None, :], repeats=batch_size, axis=0)
def setupNN(NNParam): NNWidth = NNParam['NNWidth'] WeightStdDev = NNParam['WeightStdDev'] L2Weight = NNParam['L2Weight'] DropOutProb = NNParam['DropOutProb'] InitialLearningRate = NNParam['InitialLearningRate'] x = theano.tensor.concatenate([x0, x1, x2, x3], axis=1) mlp = MLP(activations=[Rectifier(), Rectifier(), Rectifier(), Rectifier(), Rectifier()], dims=[69*4, NNWidth, NNWidth, NNWidth, NNWidth, 100], weights_init=IsotropicGaussian(WeightStdDev), biases_init=Constant(0)) x_forward = mlp.apply(x) mlp_sm = MLP(activations=[None], dims=[100, 39], weights_init=IsotropicGaussian(WeightStdDev), biases_init=Constant(0)) y_hat_b = Softmax().apply(mlp_sm.apply(x_forward)) mlp.initialize() mlp_sm.initialize() cg = blocks.graph.ComputationGraph(y_hat_b) parameters = list() for p in cg.parameters: parameters.append(p) weights = VariableFilter(roles=[blocks.roles.WEIGHT])(cg.variables) cg_dropout = blocks.graph.apply_dropout(cg,[weights[3]] , DropOutProb) y_hat_b_do = cg_dropout.outputs[0] pred_b = theano.tensor.argmax(cg.outputs[0],axis=1) err_b = theano.tensor.mean(theano.tensor.eq(pred_b,y_b)) cW = 0 for W in weights: cW += (W**2).sum() cost = theano.tensor.mean(theano.tensor.nnet.categorical_crossentropy(y_hat_b_do, y_b)) + cW*L2Weight Learning_Rate_Decay = numpy.float32(0.98) learning_rate_theano = theano.shared(numpy.float32(InitialLearningRate), name='learning_rate') learning_rate_update = theano.function(inputs=[],outputs=learning_rate_theano,updates=[(learning_rate_theano,learning_rate_theano*Learning_Rate_Decay)]) update_proc = momentum_sgd(cost,parameters,0.8, learning_rate_theano) #train training_proc = theano.function( inputs=[shuffIdx], outputs=cost, updates=update_proc, givens={x0:tX[theano.tensor.flatten(shuffIdx[:,0])], x1:tX[theano.tensor.flatten(shuffIdx[:,1])], x2:tX[theano.tensor.flatten(shuffIdx[:,2])], x3:tX[theano.tensor.flatten(shuffIdx[:,3])], y_b:tYb[theano.tensor.flatten(shuffIdx[:,1])]}) #test test_on_testing_proc = theano.function( inputs=[shuffIdx], outputs=[err_b], givens={x0:vX[shuffIdx[:,0]],x1:vX[shuffIdx[:,1]],x2:vX[shuffIdx[:,2]],x3:vX[shuffIdx[:,3]],y_b:vYb[shuffIdx[:,1]]}) test_on_training_proc = theano.function( inputs=[shuffIdx], outputs=[err_b], givens={x0:tX[shuffIdx[:,0]],x1:tX[shuffIdx[:,1]],x2:tX[shuffIdx[:,2]],x3:tX[shuffIdx[:,3]],y_b:tYb[shuffIdx[:,1]]}) forward_proc = theano.function(inputs=[x0,x1,x2,x3],outputs=[x_forward]) return (learning_rate_update, training_proc, test_on_testing_proc,test_on_training_proc,forward_proc)
class topicalq_transformer(Initializable): def __init__(self, vocab_size, topical_embedding_dim, state_dim,word_num,batch_size, **kwargs): super(topicalq_transformer, self).__init__(**kwargs) self.vocab_size = vocab_size; self.word_embedding_dim = topical_embedding_dim; self.state_dim = state_dim; self.word_num=word_num; self.batch_size=batch_size; self.look_up=LookupTable(name='topical_embeddings'); self.transformer=MLP(activations=[Tanh()], dims=[self.word_embedding_dim*self.word_num, self.state_dim], name='topical_transformer'); self.children = [self.look_up,self.transformer]; def _push_allocation_config(self): self.look_up.length = self.vocab_size self.look_up.dim = self.word_embedding_dim # do we have to push_config? remain unsure @application(inputs=['source_topical_word_sequence'], outputs=['topical_embedding']) def apply(self, source_topical_word_sequence): # Time as first dimension source_topical_word_sequence=source_topical_word_sequence.T; word_topical_embeddings = self.look_up.apply(source_topical_word_sequence); word_topical_embeddings=word_topical_embeddings.swapaxes(0,1); #requires testing concatenated_topical_embeddings=tensor.reshape(word_topical_embeddings,[word_topical_embeddings.shape[0],word_topical_embeddings.shape[1]*word_topical_embeddings.shape[2]]); topical_embedding=self.transformer.apply(concatenated_topical_embeddings); return topical_embedding
class SingleSoftmax(Initializable): def __init__(self, hidden_dim, n_classes, **kwargs): super(SingleSoftmax, self).__init__(**kwargs) self.hidden_dim = hidden_dim self.n_classes = n_classes self.mlp = MLP(activations=[Rectifier(), Softmax()], dims=[hidden_dim, hidden_dim/2, self.n_classes], weights_init=Orthogonal(), biases_init=Constant(0)) self.softmax = Softmax() self.children = [self.mlp, self.softmax] # some day: @application(...) def feedback(self, h) @application(inputs=['cs', 'y'], outputs=['cost']) def cost(self, cs, y, n_patches): energies = [self.mlp.apply(cs[:, t, :]) for t in xrange(n_patches)] cross_entropies = [self.softmax.categorical_cross_entropy(y.flatten(), energy) for energy in energies] error_rates = [T.neq(y, energy.argmax(axis=1)).mean(axis=0) for energy in energies] # train on final prediction cost = util.named(cross_entropies[-1], "cost") # monitor final prediction self.add_auxiliary_variable(cross_entropies[-1], name="cross_entropy") self.add_auxiliary_variable(error_rates[-1], name="error_rate") return cost