def check_models(models): """Check if all models in the list are roughly the same.""" layers_list = [get_all_layers(m) for m in models] n = len(layers_list[0]) assert all(n == len(l) for l in layers_list) for layers in zip(*layers_list): first, *rest = layers assert all(check_layer(first, c) for c in rest)
def __init__(self, output_layer, description="", tags=None, predecessor_experiment=""): self.layers = get_all_layers(output_layer) self._deterministic_output_func = None self.train_iterations = 0 self.description = description self.tags = none_to_list(tags) for tag in self.tags: if tag not in VALID_TAGS: raise ValueError("{} is not a valid tag!".format(tag)) self.predecessor_experiment = predecessor_experiment
def __init__(self, *args, **kwargs): super(TrainerMixin, self).__init__(*args, **kwargs) input_var = tensor.tensor4('inputs') target_var = tensor.ivector('targets') loss, _ = loss_acc(self.model, input_var, target_var, deterministic=False) layers = get_all_layers(self.model) decay = regularize_layer_params(layers, l2) * 0.0001 loss = loss + decay params = get_all_params(self.model, trainable=True) updates = momentum(loss, params, momentum=0.9, learning_rate=self.learning_rate) self.set_training(input_var, target_var, loss, updates)
def fit(self, X, X_valid=None): m = 1 # define the model x_in = layers.InputLayer((None, X.shape[1])) hid = x_in for i in range(self.nb_layers): hid = layers.DenseLayer(hid, num_units=self.nb_units*m/(2**i), nonlinearity=nonlinearities.sigmoid) for i in range(self.nb_layers - 1): k = self.nb_layers - 2 - i hid = layers.DenseLayer(hid, num_units=self.nb_units*m/(2**k), nonlinearity=nonlinearities.sigmoid) o = layers.DenseLayer(hid, num_units=X.shape[1], nonlinearity=nonlinearities.sigmoid) model = LightweightModel([x_in], [o]) all_layers = get_all_layers(o) self.all_layers = all_layers rng = rng_mrg.MRG_RandomStreams() def get_reconstruction_error(model, X, x_hat=None): if x_hat is None: x_hat, = model.get_output(X) return (-(X * T.log(x_hat) + (1 - X) * T.log(1 - x_hat)).sum(axis=1).mean()) def loss_function(model, tensors): X = tensors["X"] X_noisy = X #X_noisy = X * (rng.uniform(X.shape) < (1 - self.corruption)) #if self.corruption_type == "masking_noise": # X_noisy = corrupted_masking_noise(rng, X, self.corruption) #elif self.corruption_type == "salt_and_pepper": # X_noisy = corrupted_salt_and_pepper(rng, X, self.corruption) x_hat, = model.get_output(X_noisy) # l1 = 0.01 * sum( T.abs_(layer.W).sum() for layer in all_layers[1:-1]) l1 = 0 diversity = 0 return get_reconstruction_error(model, X, x_hat) + diversity + l1 input_variables = dict( X=dict(tensor_type=T.matrix), ) functions = dict( predict=dict( get_output=lambda model, X:model.get_output(X)[0], params=["X"] ), get_reconstruction_error=dict( get_output=get_reconstruction_error, params=["X"] ) ) for i, layer in enumerate(all_layers[1:-1]): functions["get_layer_{0}".format(i + 1)] = dict(get_output=lambda model, X: model.get_output(X)[0], params=["X"]) class MyBatchOptimizer(BatchOptimizer): def iter_update(self, epoch, nb_batches, iter_update_batch): status = super(MyBatchOptimizer, self).iter_update(epoch, nb_batches, iter_update_batch) status["reconstruction_error_train"] = capsule.get_reconstruction_error(X[0:100]) if X_valid is not None: status["reconstruction_error_valid"] = capsule.get_reconstruction_error(X_valid[0:100]) return status for i, layer in enumerate(all_layers[1:-1]): getter = getattr(capsule, "get_layer_{0}".format(i + 1)) activations = getter(X) status["activations_{0}_mean".format(i + 1)] = activations.mean() status["activations_{0}_std".format(i + 1)] = activations.std() return status batch_optimizer = MyBatchOptimizer( verbose=1, max_nb_epochs=self.max_epochs, batch_size=self.batch_size, optimization_procedure=( updates.adagrad, {"learning_rate": self.learning_rate} ), whole_dataset_in_device=True ) capsule = Capsule( input_variables, model, loss_function, functions=functions, batch_optimizer=batch_optimizer, ) capsule.fit(X=X) self.capsule = capsule
def __init__(self, incoming, input_to_hidden, hidden_to_hidden, nonlinearity=nonlinearities.rectify, hid_init=init.Constant(0.), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, gamma=0.9, **kwargs): # This layer inherits from a MergeLayer, because it can have three # inputs - the layer input, the mask and the initial hidden state. We # will just provide the layer input as incomings, unless a mask input # or initial hidden state was provided. incomings = [incoming] self.mask_incoming_index = -1 self.hid_init_incoming_index = -1 if mask_input is not None: incomings.append(mask_input) self.mask_incoming_index = len(incomings)-1 if isinstance(hid_init, Layer): incomings.append(hid_init) self.hid_init_incoming_index = len(incomings)-1 super(CustomRecurrentLayerWithFastWeights, self).__init__(incomings, **kwargs) input_to_hidden_in_layers = \ [layer for layer in helper.get_all_layers(input_to_hidden) if isinstance(layer, InputLayer)] if len(input_to_hidden_in_layers) != 1: raise ValueError( '`input_to_hidden` must have exactly one InputLayer, but it ' 'has {}'.format(len(input_to_hidden_in_layers))) hidden_to_hidden_in_lyrs = \ [layer for layer in helper.get_all_layers(hidden_to_hidden) if isinstance(layer, InputLayer)] if len(hidden_to_hidden_in_lyrs) != 1: raise ValueError( '`hidden_to_hidden` must have exactly one InputLayer, but it ' 'has {}'.format(len(hidden_to_hidden_in_lyrs))) hidden_to_hidden_in_layer = hidden_to_hidden_in_lyrs[0] self.input_to_hidden = input_to_hidden self.hidden_to_hidden = hidden_to_hidden self.learn_init = learn_init self.backwards = backwards self.gradient_steps = gradient_steps self.grad_clipping = grad_clipping self.unroll_scan = unroll_scan self.precompute_input = precompute_input self.only_return_final = only_return_final self.gamma = gamma if unroll_scan and gradient_steps != -1: raise ValueError( "Gradient steps must be -1 when unroll_scan is true.") # Retrieve the dimensionality of the incoming layer input_shape = self.input_shapes[0] if unroll_scan and input_shape[1] is None: raise ValueError("Input sequence length cannot be specified as " "None when unroll_scan is True") # Check that the input_to_hidden connection can appropriately handle # a first dimension of input_shape[0]*input_shape[1] when we will # precompute the input dot product if (self.precompute_input and input_to_hidden.output_shape[0] is not None and input_shape[0] is not None and input_shape[1] is not None and (input_to_hidden.output_shape[0] != input_shape[0]*input_shape[1])): raise ValueError( 'When precompute_input == True, ' 'input_to_hidden.output_shape[0] must equal ' 'incoming.output_shape[0]*incoming.output_shape[1] ' '(i.e. batch_size*sequence_length) or be None but ' 'input_to_hidden.output_shape[0] = {} and ' 'incoming.output_shape[0]*incoming.output_shape[1] = ' '{}'.format(input_to_hidden.output_shape[0], input_shape[0]*input_shape[1])) # Check that the first dimension of input_to_hidden and # hidden_to_hidden's outputs match when we won't precompute the input # dot product if (not self.precompute_input and input_to_hidden.output_shape[0] is not None and hidden_to_hidden.output_shape[0] is not None and (input_to_hidden.output_shape[0] != hidden_to_hidden.output_shape[0])): raise ValueError( 'When precompute_input == False, ' 'input_to_hidden.output_shape[0] must equal ' 'hidden_to_hidden.output_shape[0] but ' 'input_to_hidden.output_shape[0] = {} and ' 'hidden_to_hidden.output_shape[0] = {}'.format( input_to_hidden.output_shape[0], hidden_to_hidden.output_shape[0])) # Check that input_to_hidden and hidden_to_hidden output shapes match, # but don't check a dimension if it's None for either shape if not all(s1 is None or s2 is None or s1 == s2 for s1, s2 in zip(input_to_hidden.output_shape[1:], hidden_to_hidden.output_shape[1:])): raise ValueError("The output shape for input_to_hidden and " "hidden_to_hidden must be equal after the first " "dimension, but input_to_hidden.output_shape={} " "and hidden_to_hidden.output_shape={}".format( input_to_hidden.output_shape, hidden_to_hidden.output_shape)) # Check that input_to_hidden's output shape is the same as # hidden_to_hidden's input shape but don't check a dimension if it's # None for either shape h_to_h_input_shape = hidden_to_hidden_in_layer.output_shape if not all(s1 is None or s2 is None or s1 == s2 for s1, s2 in zip(input_to_hidden.output_shape[1:], h_to_h_input_shape[1:])): raise ValueError( "The output shape for input_to_hidden must be equal to the " "input shape of hidden_to_hidden after the first dimension, " "but input_to_hidden.output_shape={} and " "hidden_to_hidden:input_layer.shape={}".format( input_to_hidden.output_shape, h_to_h_input_shape)) if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity # Initialize hidden state if isinstance(hid_init, Layer): self.hid_init = hid_init else: self.hid_init = self.add_param( hid_init, (1,) + hidden_to_hidden.output_shape[1:], name="hid_init", trainable=learn_init, regularizable=False)
def load_random_streams(model, path): """Load the random streams from a file into a model.""" layers = [l for l in get_all_layers(model) if hasattr(l, '_srng')] with numpy.load(path) as fobj: for i, layer in enumerate(layers): layer._srng = RandomStreams(fobj[f'seed_{i}'].item())
def get_random_streams(model): """Return a list with all ``RandomStreams`` in the model.""" return [l._srng for l in get_all_layers(model) if hasattr(l, '_srng')]
def __init__(self, incoming, input_to_hidden, hidden_to_hidden, nonlinearity=nonlinearities.rectify, hid_init=init.Constant(0.), backwards=False, learn_init=False, gradient_steps=-1, grad_clipping=0, unroll_scan=False, precompute_input=True, mask_input=None, only_return_final=False, **kwargs): # This layer inherits from a MergeLayer, because it can have three # inputs - the layer input, the mask and the initial hidden state. We # will just provide the layer input as incomings, unless a mask input # or initial hidden state was provided. incomings = [incoming] self.mask_incoming_index = -1 self.hid_init_incoming_index = -1 if mask_input is not None: incomings.append(mask_input) self.mask_incoming_index = len(incomings)-1 if isinstance(hid_init, Layer): incomings.append(hid_init) self.hid_init_incoming_index = len(incomings)-1 super(onlyRecurrentLayer, self).__init__(incomings, **kwargs) input_to_hidden_in_layers = \ [layer for layer in helper.get_all_layers(input_to_hidden) if isinstance(layer, InputLayer)] if len(input_to_hidden_in_layers) != 1: raise ValueError( '`input_to_hidden` must have exactly one InputLayer, but it ' 'has {}'.format(len(input_to_hidden_in_layers))) hidden_to_hidden_in_lyrs = \ [layer for layer in helper.get_all_layers(hidden_to_hidden) if isinstance(layer, InputLayer)] if len(hidden_to_hidden_in_lyrs) != 1: raise ValueError( '`hidden_to_hidden` must have exactly one InputLayer, but it ' 'has {}'.format(len(hidden_to_hidden_in_lyrs))) hidden_to_hidden_in_layer = hidden_to_hidden_in_lyrs[0] self.input_to_hidden = input_to_hidden self.hidden_to_hidden = hidden_to_hidden self.learn_init = learn_init self.backwards = backwards self.gradient_steps = gradient_steps self.grad_clipping = grad_clipping self.unroll_scan = unroll_scan self.precompute_input = precompute_input self.only_return_final = only_return_final if unroll_scan and gradient_steps != -1: raise ValueError( "Gradient steps must be -1 when unroll_scan is true.") # Retrieve the dimensionality of the incoming layer input_shape = self.input_shapes[0] if nonlinearity is None: self.nonlinearity = nonlinearities.identity else: self.nonlinearity = nonlinearity # Initialize hidden state if isinstance(hid_init, Layer): self.hid_init = hid_init else: self.hid_init = self.add_param( hid_init, (1,) + hidden_to_hidden.output_shape[1:], name="hid_init", trainable=learn_init, regularizable=False)