Exemplo n.º 1
0
    def __init__(self,
                 inputs=None,
                 outputs=None,
                 params=None,
                 outdir='outputs/basic',
                 activation='rectifier',
                 weights_init='uniform',
                 weights_mean=0,
                 weights_std=5e-3,
                 weights_interval='glorot',
                 bias_init=0.0,
                 mrg=RNG_MRG.MRG_RandomStreams(1),
                 **kwargs):
        """
        Initialize a basic layer.

        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        outputs : int
            The dimensionality of the output for this model.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        activation : str or callable
            The activation function to use after the dot product going from input -> output. This can be a string
            representing an option from opendeep.utils.activation, or your own function as long as it is callable.
        weights_init : str
            Determines the method for initializing input -> output weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        """
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        initial_parameters.pop('self')
        super(Dense, self).__init__(**initial_parameters)
        if self.inputs is None:
            return

        ##################
        # specifications #
        ##################
        if len(self.inputs) > 1:
            raise NotImplementedError(
                "Expected 1 input to Dense, found %d. Please merge inputs before passing "
                "to the Dense model!" % len(self.inputs))
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        if isinstance(input_shape, int):
            self.input_size = ((None, ) *
                               (self.input.ndim - 1)) + (input_shape, )
        else:
            self.input_size = input_shape
        assert self.input_size is not None, "Need to specify the shape for the last dimension of the input!"

        # We also only have 1 output
        assert self.output_size is not None, "Need to specify outputs size!"
        out_size = self.output_size[0]
        if isinstance(out_size, int):
            self.output_size = self.input_size[:-1] + (out_size, )
        else:
            self.output_size = out_size

        # activation function!
        activation_func = get_activation_function(activation)

        #########################################################
        # parameters - make sure to deal with input dictionary! #
        #########################################################
        W = self.params.get("W") or get_weights(
            weights_init=weights_init,
            shape=(self.input_size[-1], self.output_size[-1]),
            name="W",
            rng=mrg,
            # if gaussian
            mean=weights_mean,
            std=weights_std,
            # if uniform
            interval=weights_interval)

        b = self.params.get("b") or get_bias(
            shape=self.output_size[-1], name="b", init_values=bias_init)

        # Finally have the two parameters - weights matrix W and bias vector b. That is all!
        self.params = OrderedDict([("W", W), ("b", b)])

        ###############
        # computation #
        ###############
        # Here is the meat of the computation transforming input -> output
        # It simply involves a matrix multiplication of inputs*weights, adding the bias vector, and then passing
        # the result through our activation function (normally something nonlinear such as: max(0, output))
        self.output = activation_func(dot(self.input, W) + b)

        log.debug(
            "Initialized a basic fully-connected layer with shape %s and activation: %s",
            str((self.input_size[-1], self.output_size[-1])), str(activation))
Exemplo n.º 2
0
    def __init__(self, inputs=None, params=None, outdir='outputs/conv1d',
                 n_filters=None, filter_size=None, stride=None, border_mode='valid',
                 weights_init='uniform', weights_interval='glorot', weights_mean=0, weights_std=5e-3,
                 bias_init=0,
                 activation='elu',
                 convolution='mc0',
                 mrg=RNG_MRG.MRG_RandomStreams(1),
                 **kwargs):
        """
        Initialize a 1-D convolutional layer.

        Parameters
        ----------
        inputs : tuple(shape, `Theano.TensorType`)
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. Shape of the incoming data:
            (batch_size, num_channels, data_dimensionality). Most likely, your channels
            will be 1. For example, batches of text will be of the form (N, 1, D) where N=examples in minibatch and
            D=dimensionality (chars, words, etc.)
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        n_filters : int
            The number of filters to use (convolution kernels).
        filter_size : int
            The size of the convolution filter.
        stride : int
            The distance between the receptive field centers of neighboring units. This is the 'stride' of the
            convolution operation.
        border_mode : str, one of 'valid', 'full', 'same'
            A string indicating the convolution border mode.
            If 'valid', the convolution is only computed where the input and the
            filter fully overlap.
            If 'full', the convolution is computed wherever the input and the
            filter overlap by at least one position.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 1-dimensional convolution implementation to use. The default of 'mc0' is normally fine. See
            opendeep.utils.conv1d_implementations for alternatives. (This is necessary because Theano only
            supports 2D convolutions at the moment).
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.

        Notes
        -----
        Theano's default convolution function (`theano.tensor.nnet.conv.conv2d`)
        does not support the 'same' border mode by default. This layer emulates
        it by performing a 'full' convolution and then cropping the result, which
        may negatively affect performance.
        """
        initial_parameters = locals().copy()
        initial_parameters.pop('self')
        super(Conv1D, self).__init__(**initial_parameters)
        if self.inputs is None:
            return

        ##################
        # specifications #
        ##################
        # grab info from the inputs_hook, or from parameters
        # expect input to be in the form (B, C, I) (batch, channel, input data)
        # inputs_hook is a tuple of (Shape, Input)
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        assert self.input.ndim == 3, "Expected 3D input variable with form (batch, channel, input_data)"
        assert len(input_shape) == 3, "Expected 3D input shape with form (batch, channel, input_data)"

        n_channels = input_shape[1]

        filter_shape = (n_filters, n_channels, filter_size)

        # activation function!
        activation_func = get_activation_function(activation)

        # convolution function!
        convolution_func = get_conv1d_function(convolution)

        outshape = ConvOp.getOutputShape(
            inshp=(input_shape[-1],),
            kshp=(filter_size,),
            stride=(stride,),
            mode=border_mode
        )
        self.output_size = (input_shape[0], n_filters) + outshape

        ##########
        # Params #
        ##########
        W = self.params.get(
            "W",
            get_weights(weights_init=weights_init,
                        shape=filter_shape,
                        name="W",
                        rng=mrg,
                        # if gaussian
                        mean=weights_mean,
                        std=weights_std,
                        # if uniform
                        interval=weights_interval)
        )

        b = self.params.get(
            "b",
            get_bias(shape=(n_filters,), name="b", init_values=bias_init)
        )

        # Finally have the two parameters!
        self.params = OrderedDict([("W", W), ("b", b)])

        ########################
        # Computational Graph! #
        ########################
        if border_mode in ['valid', 'full']:
            conved = convolution_func(self.input,
                                      W,
                                      subsample=(stride,),
                                      image_shape=input_shape,
                                      filter_shape=filter_shape,
                                      border_mode=border_mode)
        else:
            log.error("Invalid border mode: '%s'" % border_mode)
            raise RuntimeError("Invalid border mode: '%s'" % border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x'))
Exemplo n.º 3
0
    def __init__(self, inputs=None, hiddens=None, params=None, outdir='outputs/lstm/',
                 activation='relu', gate_activation='sigmoid',
                 mrg=RNG_MRG.MRG_RandomStreams(1),
                 weights_init='uniform', weights_interval='glorot', weights_mean=0, weights_std=5e-3,
                 bias_init=0.0,
                 r_weights_init='identity', r_weights_interval='glorot', r_weights_mean=0, r_weights_std=5e-3,
                 r_bias_init=0.0,
                 direction='forward',
                 clip_recurrent_grads=False):
        """
        Initialize an LSTM.

        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `inputs` variable are expected to be of the form (timesteps, batch, data).
            `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        hiddens : int or Tuple of (shape, `Theano.TensorType`)
            Int for the number of hidden units to use, or a tuple of shape, expression to route the starting
            hidden values from elsewhere.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str
            The location to produce outputs from training or running the :class:`LSTM`. If None, nothing will be saved.
        activation : str or callable
            The nonlinear (or linear) activation to perform for the hidden units.
            This activation function should be appropriate for the output unit types, i.e. 'sigmoid' for binary.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        gate_activation : str or callable
            The activation to perform for the hidden gates (default sigmoid).
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        weights_init : str
            Determines the method for initializing input-hidden model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        r_weights_init : str
            Determines the method for initializing recurrent hidden-hidden model weights.
            See opendeep.utils.nnet for options.
        r_weights_interval : str or float
            If Uniform `r_weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        r_weights_mean : float
            If Gaussian `r_weights_init`, the mean value to use.
        r_weights_std : float
            If Gaussian `r_weights_init`, the standard deviation to use.
        r_bias_init : float
            The initial value to use for the recurrent bias parameter. Most often, the default of 0.0 is preferred.
        direction : str
            The direction this recurrent model should go over its inputs. Can be 'forward', 'backward', or
            'bidirectional'. In the case of 'bidirectional', it will make two passes over the sequence,
            computing two sets of hiddens and adding them together.
        clip_recurrent_grads : False or float, optional
            Whether to clip the gradients for the parameters that unroll over timesteps (such as the weights
            connecting previous hidden states to the current hidden state, and not the weights from current
            input to hiddens). If it is a float, the gradients for the weights will be hard clipped to the range
            `+-clip_recurrent_grads`.
        """
        initial_parameters = locals().copy()
        initial_parameters.pop('self')
        super(LSTM, self).__init__(**initial_parameters)

        ##################
        # specifications #
        ##################
        backward = direction.lower() == 'backward'
        bidirectional = direction.lower() == 'bidirectional'

        ########################
        # activation functions #
        ########################
        # recurrent hidden activation functions!
        self.hidden_activation_func = get_activation_function(activation)
        self.gate_activation_func = get_activation_function(gate_activation)

        ##########
        # inputs #
        ##########
        # inputs are expected to have the shape (n_timesteps, batch_size, data)
        if len(self.inputs) > 1:
            raise NotImplementedError("Expected 1 input, found %d. Please merge inputs before passing "
                                      "to the model!" % len(self.inputs))
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        if isinstance(input_shape, int):
            self.input_size = ((None,) * (self.input.ndim - 1)) + (input_shape,)
        else:
            self.input_size = input_shape
        assert self.input_size is not None, "Need to specify the shape for at least the last dimension of the input!"
        # input is 3D tensor of (timesteps, batch_size, data_dim)
        # if input is 2D tensor, assume it is of the form (timesteps, data_dim) i.e. batch_size is 1. Convert to 3D.
        # if input is > 3D tensor, assume it is of form (timesteps, batch_size, data...) and flatten to 3D.
        if self.input.ndim == 1:
            self.input = unbroadcast(self.input.dimshuffle(0, 'x', 'x'), [1, 2])

        elif self.input.ndim == 2:
            self.input = unbroadcast(self.input.dimshuffle(0, 'x', 1), 1)

        elif self.input.ndim > 3:
            flat_in = Flatten((self.input_size, self.input), ndim=3)
            self.input = flat_in.get_outputs()
            self.input_size = flat_in.output_size

        ###########
        # hiddens #
        ###########
        # have only 1 hiddens
        assert len(self.hiddens) == 1, "Expected 1 `hiddens` param, found %d" % len(self.hiddens)
        self.hiddens = self.hiddens[0]
        # if hiddens is an int (hidden size parameter, not routing info)
        h_init = None
        if isinstance(self.hiddens, int):
            self.hidden_size = self.hiddens
        elif isinstance(self.hiddens, tuple):
            hidden_shape, h_init = self.hiddens
            if isinstance(hidden_shape, int):
                self.hidden_size = hidden_shape
            else:
                self.hidden_size = hidden_shape[-1]
        else:
            raise AssertionError("Hiddens need to be an int or tuple of (shape, theano_expression), found %s" %
                                 type(self.hiddens))

        # output shape is going to be 3D with (timesteps, batch_size, hidden_size)
        self.output_size = (None, None, self.hidden_size)

        ##########################################################
        # parameters - make sure to deal with params dict input! #
        ##########################################################
        # all input-to-hidden weights
        W_c, W_i, W_f, W_o = [
            self.params.get(
                "W_%s" % sub,
                get_weights(weights_init=weights_init,
                            shape=(self.input_size[-1], self.hidden_size),
                            name="W_%s" % sub,
                            # if gaussian
                            mean=weights_mean,
                            std=weights_std,
                            # if uniform
                            interval=weights_interval)
            )
            for sub in ['c', 'i', 'f', 'o']
        ]
        # all hidden-to-hidden weights
        U_c, U_i, U_f, U_o = [
            self.params.get(
                "U_%s" % sub,
                get_weights(weights_init=r_weights_init,
                            shape=(self.hidden_size, self.hidden_size),
                            name="U_%s" % sub,
                            # if gaussian
                            mean=r_weights_mean,
                            std=r_weights_std,
                            # if uniform
                            interval=r_weights_interval)
            )
            for sub in ['c', 'i', 'f', 'o']
        ]
        # if bidirectional, make hidden-to-hidden weights again to go the opposite direction
        U_c_b, U_i_b, U_f_b, U_o_b = None, None, None, None
        if bidirectional:
            U_c_b, U_i_b, U_f_b, U_o_b = [
                self.params.get(
                    "U_%s_b" % sub,
                    get_weights(weights_init=r_weights_init,
                                shape=(self.hidden_size, self.hidden_size),
                                name="U_%s_b" % sub,
                                # if gaussian
                                mean=r_weights_mean,
                                std=r_weights_std,
                                # if uniform
                                interval=r_weights_interval)
                )
                for sub in ['c', 'i', 'f', 'o']
            ]
        # biases
        b_c, b_i, b_f, b_o = [
            self.params.get(
                "b_%s" % sub,
                get_bias(shape=(self.hidden_size,),
                         name="b_%s" % sub,
                         init_values=r_bias_init)
            )
            for sub in ['c', 'i', 'f', 'o']
        ]
        # clip gradients if we are doing that
        recurrent_params = [U_c, U_i, U_f, U_o, U_c_b, U_i_b, U_f_b, U_o_b]
        if clip_recurrent_grads:
            clip = abs(clip_recurrent_grads)
            U_c, U_i, U_f, U_o, U_c_b, U_i_b, U_f_b, U_o_b = [
                grad_clip(param, -clip, clip) if param is not None
                else None
                for param in recurrent_params
            ]

        # put all the parameters into our dictionary
        self.params = {
            "W_c": W_c,
            "W_i": W_i,
            "W_f": W_f,
            "W_o": W_o,

            "U_c": U_c,
            "U_i": U_i,
            "U_f": U_f,
            "U_o": U_o,

            "b_c": b_c,
            "b_i": b_i,
            "b_f": b_f,
            "b_o": b_o,
        }
        if bidirectional:
            self.params.update(
                {
                    "U_c_b": U_c_b,
                    "U_i_b": U_i_b,
                    "U_f_b": U_f_b,
                    "U_o_b": U_o_b,
                }
            )

        # make h_init the right sized tensor
        if h_init is None:
            h_init = zeros_like(dot(self.input[0], W_c))

        c_init = zeros_like(dot(self.input[0], W_c))

        ###############
        # computation #
        ###############
        # move some computation outside of scan to speed it up!
        x_c = dot(self.input, W_c) + b_c
        x_i = dot(self.input, W_i) + b_i
        x_f = dot(self.input, W_f) + b_f
        x_o = dot(self.input, W_o) + b_o

        # now do the recurrent stuff
        (self.hiddens, _), self.updates = scan(
            fn=self.recurrent_step,
            sequences=[x_c, x_i, x_f, x_o],
            outputs_info=[h_init, c_init],
            non_sequences=[U_c, U_i, U_f, U_o],
            go_backwards=backward,
            name="lstm_scan",
            strict=True
        )

        # if bidirectional, do the same in reverse!
        if bidirectional:
            (hiddens_b, _), updates_b = scan(
                fn=self.recurrent_step,
                sequences=[x_c, x_i, x_f, x_o],
                outputs_info=[h_init, c_init],
                non_sequences=[U_c_b, U_i_b, U_f_b, U_o_b],
                go_backwards=not backward,
                name="lstm_scan_back",
                strict=True
            )
            # flip the hiddens to be the right direction
            hiddens_b = hiddens_b[::-1]
            # update stuff
            self.updates.update(updates_b)
            self.hiddens += hiddens_b

        log.info("Initialized an LSTM!")
Exemplo n.º 4
0
    def __init__(self,
                 inputs=None,
                 hiddens=None,
                 params=None,
                 outdir='outputs/gru/',
                 activation='relu',
                 gate_activation='sigmoid',
                 mrg=RNG_MRG.MRG_RandomStreams(1),
                 weights_init='uniform',
                 weights_interval='glorot',
                 weights_mean=0,
                 weights_std=5e-3,
                 bias_init=0.0,
                 r_weights_init='identity',
                 r_weights_interval='glorot',
                 r_weights_mean=0,
                 r_weights_std=5e-3,
                 r_bias_init=0.0,
                 direction='forward',
                 clip_recurrent_grads=False):
        """
        Initialize a GRU layer.

        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `inputs` variable are expected to be of the form (timesteps, batch, data).
            `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        hiddens : int or Tuple of (shape, `Theano.TensorType`)
            Int for the number of hidden units to use, or a tuple of shape, expression to route the starting
            hidden values from elsewhere.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str
            The location to produce outputs from training or running the :class:`GRU`. If None, nothing will be saved.
        activation : str or callable
            The nonlinear (or linear) activation to perform for the hidden units.
            This activation function should be appropriate for the output unit types, i.e. 'sigmoid' for binary.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        gate_activation : str or callable
            The activation to perform for the hidden gates (default sigmoid).
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        weights_init : str
            Determines the method for initializing input-hidden model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        r_weights_init : str
            Determines the method for initializing recurrent hidden-hidden model weights.
            See opendeep.utils.nnet for options.
        r_weights_interval : str or float
            If Uniform `r_weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        r_weights_mean : float
            If Gaussian `r_weights_init`, the mean value to use.
        r_weights_std : float
            If Gaussian `r_weights_init`, the standard deviation to use.
        r_bias_init : float
            The initial value to use for the recurrent bias parameter. Most often, the default of 0.0 is preferred.
        direction : str
            The direction this recurrent model should go over its inputs. Can be 'forward', 'backward', or
            'bidirectional'. In the case of 'bidirectional', it will make two passes over the sequence,
            computing two sets of hiddens and adding them together.
        clip_recurrent_grads : False or float, optional
            Whether to clip the gradients for the parameters that unroll over timesteps (such as the weights
            connecting previous hidden states to the current hidden state, and not the weights from current
            input to hiddens). If it is a float, the gradients for the weights will be hard clipped to the range
            `+-clip_recurrent_grads`.
        """
        initial_parameters = locals().copy()
        initial_parameters.pop('self')
        super(GRU, self).__init__(**initial_parameters)

        ##################
        # specifications #
        ##################
        backward = direction.lower() == 'backward'
        bidirectional = direction.lower() == 'bidirectional'

        ########################
        # activation functions #
        ########################
        # recurrent hidden activation functions!
        self.hidden_activation_func = get_activation_function(activation)
        self.gate_activation_func = get_activation_function(gate_activation)

        ##########
        # inputs #
        ##########
        # inputs are expected to have the shape (n_timesteps, batch_size, data)
        if len(self.inputs) > 1:
            raise NotImplementedError(
                "Expected 1 input, found %d. Please merge inputs before passing "
                "to the model!" % len(self.inputs))
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        self.input_shape, self.input = self.inputs[0]
        if isinstance(self.input_shape, int):
            self.input_shape = ((None, ) *
                                (self.input.ndim - 1)) + (self.input_shape, )
        assert self.input_shape is not None, "Need to specify the shape for at least the last dimension of the input!"
        # input is 3D tensor of (timesteps, batch_size, data_dim)
        # if input is 2D tensor, assume it is of the form (timesteps, data_dim) i.e. batch_size is 1. Convert to 3D.
        # if input is > 3D tensor, assume it is of form (timesteps, batch_size, data...) and flatten to 3D.
        if self.input.ndim == 1:
            self.input = unbroadcast(self.input.dimshuffle(0, 'x', 'x'),
                                     [1, 2])

        elif self.input.ndim == 2:
            self.input = unbroadcast(self.input.dimshuffle(0, 'x', 1), 1)

        elif self.input.ndim > 3:
            flat_in = Flatten((self.input_size, self.input), ndim=3)
            self.input = flat_in.get_outputs()
            self.input_size = flat_in.output_size

        ###########
        # hiddens #
        ###########
        # have only 1 hiddens
        assert len(
            self.hiddens) == 1, "Expected 1 `hiddens` param, found %d" % len(
                self.hiddens)
        self.hiddens = self.hiddens[0]
        # if hiddens is an int (hidden size parameter, not routing info)
        h_init = None
        if isinstance(self.hiddens, int):
            self.hidden_size = self.hiddens
        elif isinstance(self.hiddens, tuple):
            hidden_shape, h_init = self.hiddens
            if isinstance(hidden_shape, int):
                self.hidden_size = hidden_shape
            else:
                self.hidden_size = hidden_shape[-1]
        else:
            raise AssertionError(
                "Hiddens need to be an int or tuple of (shape, theano_expression), found %s"
                % type(self.hiddens))

        # output shape is going to be 3D with (timesteps, batch_size, hidden_size)
        self.output_size = (None, None, self.hidden_size)

        ##########################################################
        # parameters - make sure to deal with params dict input! #
        ##########################################################
        # all input-to-hidden weights
        W_z, W_r, W_h = [
            self.params.get(
                "W_%s" % sub,
                get_weights(
                    weights_init=weights_init,
                    shape=(self.input_shape[-1], self.hidden_size),
                    name="W_%s" % sub,
                    # if gaussian
                    mean=weights_mean,
                    std=weights_std,
                    # if uniform
                    interval=weights_interval)) for sub in ['z', 'r', 'h']
        ]
        # all hidden-to-hidden (one direction) weights
        U_z, U_r, U_h = [
            self.params.get(
                "U_%s" % sub,
                get_weights(
                    weights_init=r_weights_init,
                    shape=(self.hidden_size, self.hidden_size),
                    name="U_%s" % sub,
                    # if gaussian
                    mean=r_weights_mean,
                    std=r_weights_std,
                    # if uniform
                    interval=r_weights_interval)) for sub in ['z', 'r', 'h']
        ]
        # if bidirectional, make hidden-to-hidden weights again to go the opposite direction
        U_z_b, U_r_b, U_h_b = None, None, None
        if bidirectional:
            U_z_b, U_r_b, U_h_b = [
                self.params.get(
                    "U_%s_b" % sub,
                    get_weights(
                        weights_init=r_weights_init,
                        shape=(self.hidden_size, self.hidden_size),
                        name="U_%s_b" % sub,
                        # if gaussian
                        mean=r_weights_mean,
                        std=r_weights_std,
                        # if uniform
                        interval=r_weights_interval))
                for sub in ['z', 'r', 'h']
            ]
        # biases
        b_z, b_r, b_h = [
            self.params.get(
                "b_%s" % sub,
                get_bias(shape=(self.hidden_size, ),
                         name="b_%s" % sub,
                         init_values=r_bias_init)) for sub in ['z', 'r', 'h']
        ]
        # clip gradients if we are doing that
        r_params = [U_z, U_r, U_h, U_z_b, U_r_b, U_h_b]
        if clip_recurrent_grads:
            clip = abs(clip_recurrent_grads)
            U_z, U_r, U_h, U_z_b, U_r_b, U_h_b = [
                grad_clip(param, -clip, clip) if param is not None else None
                for param in r_params
            ]

        # put all the parameters into our dictionary
        self.params = {
            "W_z": W_z,
            "W_r": W_r,
            "W_h": W_h,
            "U_z": U_z,
            "U_r": U_r,
            "U_h": U_h,
            "b_z": b_z,
            "b_r": b_r,
            "b_h": b_h,
        }
        if bidirectional:
            self.params.update({
                "U_z_b": U_z_b,
                "U_r_b": U_r_b,
                "U_h_b": U_h_b,
            })

        # make h_init the right sized tensor
        if h_init is None:
            h_init = zeros_like(dot(self.input[0], W_h))

        ###############
        # computation #
        ###############
        # move some computation outside of scan to speed it up!
        x_z = dot(self.input, W_z) + b_z
        x_r = dot(self.input, W_r) + b_r
        x_h = dot(self.input, W_h) + b_h

        # now do the recurrent stuff
        self.hiddens, self.updates = scan(fn=self.recurrent_step,
                                          sequences=[x_z, x_r, x_h],
                                          outputs_info=[h_init],
                                          non_sequences=[U_z, U_r, U_h],
                                          go_backwards=backward,
                                          name="gru_scan",
                                          strict=True)

        # if bidirectional, do the same in reverse!
        if bidirectional:
            hiddens_b, updates_b = scan(fn=self.recurrent_step,
                                        sequences=[x_z, x_r, x_h],
                                        outputs_info=[h_init],
                                        non_sequences=[U_z_b, U_r_b, U_h_b],
                                        go_backwards=not backward,
                                        name="gru_scan_back",
                                        strict=True)
            # flip the hiddens to be the right direction
            hiddens_b = hiddens_b[::-1]
            # update stuff
            self.updates.update(updates_b)
            self.hiddens += hiddens_b

        log.info("Initialized a GRU!")
Exemplo n.º 5
0
    def __init__(self, inputs=None, params=None, outdir='outputs/conv2d',
                 n_filters=None, filter_size=None, stride=(1, 1), border_mode='valid',
                 weights_init='uniform', weights_interval='glorot', weights_mean=0, weights_std=5e-3,
                 bias_init=0,
                 activation='elu',
                 convolution='conv2d',
                 mrg=RNG_MRG.MRG_RandomStreams(1),
                 **kwargs):
        """
        Initialize a 2-dimensional convolutional layer.

        Parameters
        ----------
        inputs : tuple(shape, `Theano.TensorType`)
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. Shape of the incoming data:
            (batch_size, num_channels, input_height, input_width).
            If input_size is None, it can be inferred. However, border_mode can't be 'same'.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        n_filters : int
            The number of filters to use (convolution kernels).
        filter_size : tuple(int) or int
            (filter_height, filter_width). If it is an int, size will be duplicated across height and width.
        stride : tuple(int)
            The distance between the receptive field centers of neighboring units. This is the 'stride' of the
            convolution operation.
        border_mode : str, one of 'valid', 'full'
            A string indicating the convolution border mode.
            If 'valid', the convolution is only computed where the input and the
            filter fully overlap.
            If 'full', the convolution is computed wherever the input and the
            filter overlap by at least one position.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 2-dimensional convolution implementation to use. The default of 'conv2d' is normally fine because it
            uses theano's tensor.nnet.conv.conv2d, which cherry-picks the best implementation with a meta-optimizer if
            you set the theano configuration flag 'optimizer_including=conv_meta'. Otherwise, you could pass a
            callable function, such as cudnn or cuda-convnet if you don't want to use the meta-optimizer.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.

        Notes
        -----
        Theano's default convolution function (`theano.tensor.nnet.conv.conv2d`)
        does not support the 'same' border mode by default. This layer emulates
        it by performing a 'full' convolution and then cropping the result, which
        may negatively affect performance.
        """
        super(Conv2D, self).__init__(**{arg: val for (arg, val) in locals().items() if arg is not 'self'})

        ##################
        # specifications #
        ##################
        # expect input to be in the form (B, C, 0, 1) (batch, channel, rows, cols)
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        assert self.input.ndim == 4, "Expected 4D input variable with form (batch, channel, rows, cols)"
        assert len(input_shape) == 4, "Expected 4D input shape with form (batch, channel, rows, cols)"

        n_channels = input_shape[1]

        if isinstance(filter_size, int):
            filter_size = (filter_size, )*2

        # activation function!
        activation_func = get_activation_function(activation)

        # convolution function!
        if convolution == 'conv2d':
            # using the theano flag optimizer_including=conv_meta will let this conv function optimize itself.
            convolution_func = conv2d
        else:
            assert callable(convolution), "Input convolution was not 'conv2d' and was not Callable."
            convolution_func = convolution

        # filter shape should be in the form (num_filters, num_channels, filter_size[0], filter_size[1])

        outshape = ConvOp.getOutputShape(
            inshp=input_shape[-2:],
            kshp=filter_size,
            stride=stride,
            mode=border_mode
        )
        self.output_size = (input_shape[0], n_filters) + outshape

        filter_shape = (n_filters, n_channels) + filter_size

        ##########
        # Params #
        ##########
        W = self.params.get(
            "W",
            get_weights(weights_init=weights_init,
                        shape=filter_shape,
                        name="W",
                        rng=mrg,
                        # if gaussian
                        mean=weights_mean,
                        std=weights_std,
                        # if uniform
                        interval=weights_interval)
        )

        b = self.params.get(
            "b",
            get_bias(shape=(n_filters, ), name="b", init_values=bias_init)
        )

        # Finally have the two parameters!
        self.params = OrderedDict([("W", W), ("b", b)])

        ########################
        # Computational Graph! #
        ########################
        if border_mode in ['valid', 'full']:
            conved = convolution_func(self.input,
                                      W,
                                      subsample=stride,
                                      image_shape=input_shape,
                                      filter_shape=filter_shape,
                                      border_mode=border_mode)
        else:
            raise RuntimeError("Invalid border mode: '%s'" % border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x', 'x'))
    def __init__(self, inputs=None, hiddens=None, params=None, outdir='outputs/rbm/',
                 visible_activation='sigmoid', hidden_activation='sigmoid',
                 weights_init='uniform', weights_mean=0, weights_std=5e-3, weights_interval='glorot',
                 bias_init=0.0,
                 mrg=RNG_MRG.MRG_RandomStreams(1),
                 k=15):
        """
        RBM constructor. Defines the parameters of the model along with
        basic operations for inferring hidden from visible (and vice-versa),
        as well as for performing CD updates.

        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `inputs` variable are expected to be of the form (timesteps, batch, data).
            `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        hiddens : int or Tuple of (shape, `Theano.TensorType`)
            Int for the number of hidden units to use, or a tuple of shape, expression to route the starting
            hidden values from elsewhere.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when sampling. The RBM is a probabilistic model, so it relies a lot
            on sampling. I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        k : int
            The k number of steps used for CD-k or PCD-k with Gibbs sampling. Basically, the number of samples
            generated from the model to train against reconstructing the original input.
        """
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        initial_parameters.pop('self')
        super(RBM, self).__init__(**initial_parameters)

        ##################
        # specifications #
        ##################
        if len(self.inputs) > 1:
            raise NotImplementedError("Expected 1 input to RBM, found %d. Please merge inputs before passing "
                                      "to the model!" % len(self.inputs))
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        if isinstance(input_shape, int):
            self.input_size = ((None,) * (self.input.ndim - 1)) + (input_shape,)
        else:
            self.input_size = input_shape
        assert self.input_size is not None, "Need to specify the shape for the last dimension of the input!"

        # our output space is the same as the input space
        self.output_size = self.input_size

        # grab hiddens
        # have only 1 hiddens
        assert len(self.hiddens) == 1, "Expected 1 `hiddens` param, found %d" % len(self.hiddens)
        self.hiddens = self.hiddens[0]
        if isinstance(self.hiddens, int):
            hidden_size = self.hiddens
            hiddens_init = None
        elif isinstance(self.hiddens, tuple):
            hidden_shape, hiddens_init = self.hiddens
            if isinstance(hidden_shape, int):
                hidden_size = hidden_shape
            else:
                hidden_size = hidden_shape[-1]
        else:
            raise AssertionError("Hiddens need to be an int or tuple of (shape, theano_expression), found %s" %
                                 type(self.hiddens))

        # other specifications
        # visible activation function!
        self.visible_activation_func = get_activation_function(visible_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation_func):
            self.visible_sampling = mrg.binomial
        else:
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError("Non-binary visible activation not supported yet!")

        # hidden activation function!
        self.hidden_activation_func = get_activation_function(hidden_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.hidden_activation_func):
            self.hidden_sampling = mrg.binomial
        else:
            # TODO: implement non-binary activation
            log.error("Non-binary hidden activation not supported yet!")
            raise NotImplementedError("Non-binary hidden activation not supported yet!")

        ####################################################
        # parameters - make sure to deal with params_hook! #
        ####################################################
        self.W = self.params.get(
            "W",
            get_weights(weights_init=weights_init,
                        shape=(self.input_size[-1], hidden_size),
                        name="W",
                        rng=mrg,
                        # if gaussian
                        mean=weights_mean,
                        std=weights_std,
                        # if uniform
                        interval=weights_interval)
        )
        self.b_v = self.params.get(
            "b_v",
            get_bias(shape=self.input_size[-1], name="b_v", init_values=bias_init)
        )
        self.b_h = self.params.get(
            "b_h",
            get_bias(shape=hidden_size, name="b_h", init_values=bias_init)
        )

        # Finally have the parameters
        self.params = {"W": self.W, "b_v": self.b_v, "b_h": self.b_h}

        ###############
        # computation #
        ###############
        # initialize from visibles if we aren't generating from some hiddens
        if hiddens_init is None:
            [_, v_chain, _, h_chain], self.updates = theano.scan(fn=self._gibbs_step_vhv,
                                                                 outputs_info=[None, self.input, None, None],
                                                                 n_steps=k)
        # initialize from hiddens
        else:
            [_, v_chain, _, h_chain], self.updates = theano.scan(fn=self._gibbs_step_hvh,
                                                                 outputs_info=[None, None, None, hiddens_init],
                                                                 n_steps=k)

        self.v_sample = v_chain[-1]
        self.h_sample = h_chain[-1]

        mean_v, _, _, _ = self._gibbs_step_vhv(self.v_sample)

        # the free-energy cost function!
        # consider v_sample constant when computing gradients on the cost function
        # this actually keeps v_sample from being considered in the gradient, to set gradient to 0 instead,
        # use theano.gradient.zero_grad
        v_sample_constant = theano.gradient.disconnected_grad(self.v_sample)
        # v_sample_constant = v_sample
        self.cost = (self.free_energy(self.input) - self.free_energy(v_sample_constant)) / self.input.shape[0]

        log.debug("Initialized an RBM shape %s",
                  str((self.input_size, hidden_size)))
    def __init__(self,
                 inputs=None,
                 hiddens=None,
                 params=None,
                 outdir='outputs/rbm/',
                 visible_activation='sigmoid',
                 hidden_activation='sigmoid',
                 weights_init='uniform',
                 weights_mean=0,
                 weights_std=5e-3,
                 weights_interval='glorot',
                 bias_init=0.0,
                 mrg=RNG_MRG.MRG_RandomStreams(1),
                 k=15):
        """
        RBM constructor. Defines the parameters of the model along with
        basic operations for inferring hidden from visible (and vice-versa),
        as well as for performing CD updates.

        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `inputs` variable are expected to be of the form (timesteps, batch, data).
            `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        hiddens : int or Tuple of (shape, `Theano.TensorType`)
            Int for the number of hidden units to use, or a tuple of shape, expression to route the starting
            hidden values from elsewhere.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        visible_activation : str or callable
            The nonlinear (or linear) visible activation to perform after the dot product from hiddens -> visible layer.
            This activation function should be appropriate for the input unit types, i.e. 'sigmoid' for binary inputs.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        hidden_activation : str or callable
            The nonlinear (or linear) hidden activation to perform after the dot product from visible -> hiddens layer.
            See opendeep.utils.activation for a list of available activation functions. Alternatively, you can pass
            your own function to be used as long as it is callable.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when sampling. The RBM is a probabilistic model, so it relies a lot
            on sampling. I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        k : int
            The k number of steps used for CD-k or PCD-k with Gibbs sampling. Basically, the number of samples
            generated from the model to train against reconstructing the original input.
        """
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        initial_parameters.pop('self')
        super(RBM, self).__init__(**initial_parameters)

        ##################
        # specifications #
        ##################
        if len(self.inputs) > 1:
            raise NotImplementedError(
                "Expected 1 input to RBM, found %d. Please merge inputs before passing "
                "to the model!" % len(self.inputs))
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        if isinstance(input_shape, int):
            self.input_size = ((None, ) *
                               (self.input.ndim - 1)) + (input_shape, )
        else:
            self.input_size = input_shape
        assert self.input_size is not None, "Need to specify the shape for the last dimension of the input!"

        # our output space is the same as the input space
        self.output_size = self.input_size

        # grab hiddens
        # have only 1 hiddens
        assert len(
            self.hiddens) == 1, "Expected 1 `hiddens` param, found %d" % len(
                self.hiddens)
        self.hiddens = self.hiddens[0]
        if isinstance(self.hiddens, int):
            hidden_size = self.hiddens
            hiddens_init = None
        elif isinstance(self.hiddens, tuple):
            hidden_shape, hiddens_init = self.hiddens
            if isinstance(hidden_shape, int):
                hidden_size = hidden_shape
            else:
                hidden_size = hidden_shape[-1]
        else:
            raise AssertionError(
                "Hiddens need to be an int or tuple of (shape, theano_expression), found %s"
                % type(self.hiddens))

        # other specifications
        # visible activation function!
        self.visible_activation_func = get_activation_function(
            visible_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.visible_activation_func):
            self.visible_sampling = mrg.binomial
        else:
            # TODO: implement non-binary activation
            log.error("Non-binary visible activation not supported yet!")
            raise NotImplementedError(
                "Non-binary visible activation not supported yet!")

        # hidden activation function!
        self.hidden_activation_func = get_activation_function(
            hidden_activation)

        # make sure the sampling functions are appropriate for the activation functions.
        if is_binary(self.hidden_activation_func):
            self.hidden_sampling = mrg.binomial
        else:
            # TODO: implement non-binary activation
            log.error("Non-binary hidden activation not supported yet!")
            raise NotImplementedError(
                "Non-binary hidden activation not supported yet!")

        ####################################################
        # parameters - make sure to deal with params_hook! #
        ####################################################
        self.W = self.params.get(
            "W",
            get_weights(
                weights_init=weights_init,
                shape=(self.input_size[-1], hidden_size),
                name="W",
                rng=mrg,
                # if gaussian
                mean=weights_mean,
                std=weights_std,
                # if uniform
                interval=weights_interval))
        self.b_v = self.params.get(
            "b_v",
            get_bias(shape=self.input_size[-1],
                     name="b_v",
                     init_values=bias_init))
        self.b_h = self.params.get(
            "b_h",
            get_bias(shape=hidden_size, name="b_h", init_values=bias_init))

        # Finally have the parameters
        self.params = {"W": self.W, "b_v": self.b_v, "b_h": self.b_h}

        ###############
        # computation #
        ###############
        # initialize from visibles if we aren't generating from some hiddens
        if hiddens_init is None:
            [_, v_chain, _, h_chain], self.updates = theano.scan(
                fn=self._gibbs_step_vhv,
                outputs_info=[None, self.input, None, None],
                n_steps=k)
        # initialize from hiddens
        else:
            [_, v_chain, _, h_chain], self.updates = theano.scan(
                fn=self._gibbs_step_hvh,
                outputs_info=[None, None, None, hiddens_init],
                n_steps=k)

        self.v_sample = v_chain[-1]
        self.h_sample = h_chain[-1]

        mean_v, _, _, _ = self._gibbs_step_vhv(self.v_sample)

        # the free-energy cost function!
        # consider v_sample constant when computing gradients on the cost function
        # this actually keeps v_sample from being considered in the gradient, to set gradient to 0 instead,
        # use theano.gradient.zero_grad
        v_sample_constant = theano.gradient.disconnected_grad(self.v_sample)
        # v_sample_constant = v_sample
        self.cost = (self.free_energy(self.input) -
                     self.free_energy(v_sample_constant)) / self.input.shape[0]

        log.debug("Initialized an RBM shape %s",
                  str((self.input_size, hidden_size)))
Exemplo n.º 8
0
    def __init__(self,
                 inputs=None,
                 params=None,
                 outdir='outputs/conv1d',
                 n_filters=None,
                 filter_size=None,
                 stride=None,
                 border_mode='valid',
                 weights_init='uniform',
                 weights_interval='glorot',
                 weights_mean=0,
                 weights_std=5e-3,
                 bias_init=0,
                 activation='elu',
                 convolution='mc0',
                 mrg=RNG_MRG.MRG_RandomStreams(1),
                 **kwargs):
        """
        Initialize a 1-D convolutional layer.

        Parameters
        ----------
        inputs : tuple(shape, `Theano.TensorType`)
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. Shape of the incoming data:
            (batch_size, num_channels, data_dimensionality). Most likely, your channels
            will be 1. For example, batches of text will be of the form (N, 1, D) where N=examples in minibatch and
            D=dimensionality (chars, words, etc.)
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        n_filters : int
            The number of filters to use (convolution kernels).
        filter_size : int
            The size of the convolution filter.
        stride : int
            The distance between the receptive field centers of neighboring units. This is the 'stride' of the
            convolution operation.
        border_mode : str, one of 'valid', 'full', 'same'
            A string indicating the convolution border mode.
            If 'valid', the convolution is only computed where the input and the
            filter fully overlap.
            If 'full', the convolution is computed wherever the input and the
            filter overlap by at least one position.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 1-dimensional convolution implementation to use. The default of 'mc0' is normally fine. See
            opendeep.utils.conv1d_implementations for alternatives. (This is necessary because Theano only
            supports 2D convolutions at the moment).
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.

        Notes
        -----
        Theano's default convolution function (`theano.tensor.nnet.conv.conv2d`)
        does not support the 'same' border mode by default. This layer emulates
        it by performing a 'full' convolution and then cropping the result, which
        may negatively affect performance.
        """
        initial_parameters = locals().copy()
        initial_parameters.pop('self')
        super(Conv1D, self).__init__(**initial_parameters)
        if self.inputs is None:
            return

        ##################
        # specifications #
        ##################
        # grab info from the inputs_hook, or from parameters
        # expect input to be in the form (B, C, I) (batch, channel, input data)
        # inputs_hook is a tuple of (Shape, Input)
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        assert self.input.ndim == 3, "Expected 3D input variable with form (batch, channel, input_data)"
        assert len(
            input_shape
        ) == 3, "Expected 3D input shape with form (batch, channel, input_data)"

        n_channels = input_shape[1]

        filter_shape = (n_filters, n_channels, filter_size)

        # activation function!
        activation_func = get_activation_function(activation)

        # convolution function!
        convolution_func = get_conv1d_function(convolution)

        outshape = ConvOp.getOutputShape(inshp=(input_shape[-1], ),
                                         kshp=(filter_size, ),
                                         stride=(stride, ),
                                         mode=border_mode)
        self.output_size = (input_shape[0], n_filters) + outshape

        ##########
        # Params #
        ##########
        W = self.params.get(
            "W",
            get_weights(
                weights_init=weights_init,
                shape=filter_shape,
                name="W",
                rng=mrg,
                # if gaussian
                mean=weights_mean,
                std=weights_std,
                # if uniform
                interval=weights_interval))

        b = self.params.get(
            "b", get_bias(shape=(n_filters, ), name="b",
                          init_values=bias_init))

        # Finally have the two parameters!
        self.params = OrderedDict([("W", W), ("b", b)])

        ########################
        # Computational Graph! #
        ########################
        if border_mode in ['valid', 'full']:
            conved = convolution_func(self.input,
                                      W,
                                      subsample=(stride, ),
                                      image_shape=input_shape,
                                      filter_shape=filter_shape,
                                      border_mode=border_mode)
        else:
            log.error("Invalid border mode: '%s'" % border_mode)
            raise RuntimeError("Invalid border mode: '%s'" % border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x'))
Exemplo n.º 9
0
    def __init__(self,
                 inputs=None,
                 params=None,
                 outdir='outputs/conv2d',
                 n_filters=None,
                 filter_size=None,
                 stride=(1, 1),
                 border_mode='valid',
                 weights_init='uniform',
                 weights_interval='glorot',
                 weights_mean=0,
                 weights_std=5e-3,
                 bias_init=0,
                 activation='elu',
                 convolution='conv2d',
                 mrg=RNG_MRG.MRG_RandomStreams(1),
                 **kwargs):
        """
        Initialize a 2-dimensional convolutional layer.

        Parameters
        ----------
        inputs : tuple(shape, `Theano.TensorType`)
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. Shape of the incoming data:
            (batch_size, num_channels, input_height, input_width).
            If input_size is None, it can be inferred. However, border_mode can't be 'same'.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        n_filters : int
            The number of filters to use (convolution kernels).
        filter_size : tuple(int) or int
            (filter_height, filter_width). If it is an int, size will be duplicated across height and width.
        stride : tuple(int)
            The distance between the receptive field centers of neighboring units. This is the 'stride' of the
            convolution operation.
        border_mode : str, one of 'valid', 'full'
            A string indicating the convolution border mode.
            If 'valid', the convolution is only computed where the input and the
            filter fully overlap.
            If 'full', the convolution is computed wherever the input and the
            filter overlap by at least one position.
        weights_init : str
            Determines the method for initializing model weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        activation : str or Callable
            The activation function to apply to the layer. See opendeep.utils.activation for options.
        convolution : str or Callable
            The 2-dimensional convolution implementation to use. The default of 'conv2d' is normally fine because it
            uses theano's tensor.nnet.conv.conv2d, which cherry-picks the best implementation with a meta-optimizer if
            you set the theano configuration flag 'optimizer_including=conv_meta'. Otherwise, you could pass a
            callable function, such as cudnn or cuda-convnet if you don't want to use the meta-optimizer.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.

        Notes
        -----
        Theano's default convolution function (`theano.tensor.nnet.conv.conv2d`)
        does not support the 'same' border mode by default. This layer emulates
        it by performing a 'full' convolution and then cropping the result, which
        may negatively affect performance.
        """
        super(Conv2D, self).__init__(
            **
            {arg: val
             for (arg, val) in locals().items() if arg is not 'self'})

        ##################
        # specifications #
        ##################
        # expect input to be in the form (B, C, 0, 1) (batch, channel, rows, cols)
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        assert self.input.ndim == 4, "Expected 4D input variable with form (batch, channel, rows, cols)"
        assert len(
            input_shape
        ) == 4, "Expected 4D input shape with form (batch, channel, rows, cols)"

        n_channels = input_shape[1]

        if isinstance(filter_size, int):
            filter_size = (filter_size, ) * 2

        # activation function!
        activation_func = get_activation_function(activation)

        # convolution function!
        if convolution == 'conv2d':
            # using the theano flag optimizer_including=conv_meta will let this conv function optimize itself.
            convolution_func = conv2d
        else:
            assert callable(
                convolution
            ), "Input convolution was not 'conv2d' and was not Callable."
            convolution_func = convolution

        # filter shape should be in the form (num_filters, num_channels, filter_size[0], filter_size[1])

        outshape = ConvOp.getOutputShape(inshp=input_shape[-2:],
                                         kshp=filter_size,
                                         stride=stride,
                                         mode=border_mode)
        self.output_size = (input_shape[0], n_filters) + outshape

        filter_shape = (n_filters, n_channels) + filter_size

        ##########
        # Params #
        ##########
        W = self.params.get(
            "W",
            get_weights(
                weights_init=weights_init,
                shape=filter_shape,
                name="W",
                rng=mrg,
                # if gaussian
                mean=weights_mean,
                std=weights_std,
                # if uniform
                interval=weights_interval))

        b = self.params.get(
            "b", get_bias(shape=(n_filters, ), name="b",
                          init_values=bias_init))

        # Finally have the two parameters!
        self.params = OrderedDict([("W", W), ("b", b)])

        ########################
        # Computational Graph! #
        ########################
        if border_mode in ['valid', 'full']:
            conved = convolution_func(self.input,
                                      W,
                                      subsample=stride,
                                      image_shape=input_shape,
                                      filter_shape=filter_shape,
                                      border_mode=border_mode)
        else:
            raise RuntimeError("Invalid border mode: '%s'" % border_mode)

        self.output = activation_func(conved + b.dimshuffle('x', 0, 'x', 'x'))
Exemplo n.º 10
0
    def __init__(self, inputs=None, outputs=None, params=None, outdir='outputs/basic',
                 activation='rectifier',
                 weights_init='uniform', weights_mean=0, weights_std=5e-3, weights_interval='glorot',
                 bias_init=0.0,
                 mrg=RNG_MRG.MRG_RandomStreams(1),
                 **kwargs):
        """
        Initialize a basic layer.

        Parameters
        ----------
        inputs : List of [tuple(shape, `Theano.TensorType`)]
            The dimensionality of the inputs for this model, and the routing information for the model
            to accept inputs from elsewhere. `shape` will be a monad tuple representing known
            sizes for each dimension in the `Theano.TensorType`. The length of `shape` should be equal to number of
            dimensions in `Theano.TensorType`, where the shape element is an integer representing the size for its
            dimension, or None if the shape isn't known. For example, if you have a matrix with unknown batch size
            but fixed feature size of 784, `shape` would be: (None, 784). The full form of `inputs` would be:
            [((None, 784), <TensorType(float32, matrix)>)].
        outputs : int
            The dimensionality of the output for this model.
        params : Dict(string_name: theano SharedVariable), optional
            A dictionary of model parameters (shared theano variables) that you should use when constructing
            this model (instead of initializing your own shared variables). This parameter is useful when you want to
            have two versions of the model that use the same parameters - such as siamese networks or pretraining some
            weights.
        outdir : str
            The directory you want outputs (parameters, images, etc.) to save to. If None, nothing will
            be saved.
        activation : str or callable
            The activation function to use after the dot product going from input -> output. This can be a string
            representing an option from opendeep.utils.activation, or your own function as long as it is callable.
        weights_init : str
            Determines the method for initializing input -> output weights. See opendeep.utils.nnet for options.
        weights_interval : str or float
            If Uniform `weights_init`, the +- interval to use. See opendeep.utils.nnet for options.
        weights_mean : float
            If Gaussian `weights_init`, the mean value to use.
        weights_std : float
            If Gaussian `weights_init`, the standard deviation to use.
        bias_init : float
            The initial value to use for the bias parameter. Most often, the default of 0.0 is preferred.
        mrg : random
            A random number generator that is used when adding noise.
            I recommend using Theano's sandbox.rng_mrg.MRG_RandomStreams.
        """
        # init Model to combine the defaults and config dictionaries with the initial parameters.
        initial_parameters = locals().copy()
        initial_parameters.pop('self')
        super(Dense, self).__init__(**initial_parameters)
        if self.inputs is None:
            return

        ##################
        # specifications #
        ##################
        if len(self.inputs) > 1:
            raise NotImplementedError("Expected 1 input to Dense, found %d. Please merge inputs before passing "
                                      "to the Dense model!" % len(self.inputs))
        # self.inputs is a list of all the input expressions (we enforce only 1, so self.inputs[0] is the input)
        input_shape, self.input = self.inputs[0]
        if isinstance(input_shape, int):
            self.input_size = ((None, ) * (self.input.ndim-1)) + (input_shape, )
        else:
            self.input_size = input_shape
        assert self.input_size is not None, "Need to specify the shape for the last dimension of the input!"

        # We also only have 1 output
        assert self.output_size is not None, "Need to specify outputs size!"
        out_size = self.output_size[0]
        if isinstance(out_size, int):
            self.output_size = self.input_size[:-1] + (out_size,)
        else:
            self.output_size = out_size

        # activation function!
        activation_func = get_activation_function(activation)

        #########################################################
        # parameters - make sure to deal with input dictionary! #
        #########################################################
        W = self.params.get(
            "W",
            get_weights(weights_init=weights_init,
                        shape=(self.input_size[-1], self.output_size[-1]),
                        name="W",
                        rng=mrg,
                        # if gaussian
                        mean=weights_mean,
                        std=weights_std,
                        # if uniform
                        interval=weights_interval)
        )

        b = self.params.get(
            "b",
            get_bias(shape=self.output_size[-1], name="b", init_values=bias_init)
        )

        # Finally have the two parameters - weights matrix W and bias vector b. That is all!
        self.params = OrderedDict([("W", W), ("b", b)])

        ###############
        # computation #
        ###############
        # Here is the meat of the computation transforming input -> output
        # It simply involves a matrix multiplication of inputs*weights, adding the bias vector, and then passing
        # the result through our activation function (normally something nonlinear such as: max(0, output))
        self.output = activation_func(dot(self.input, W) + b)

        log.debug("Initialized a basic fully-connected layer with shape %s and activation: %s",
                  str((self.input_size[-1], self.output_size[-1])), str(activation))