def testConv1DOutputSize(self):
     try:
         x = ftensor3('x')
         #batch, channels, dim
         s = (None, 15, 94)
         filters = 25
         filter_size = 2
         padding = 2
         stride = 2
         conv1 = Conv1D(inputs=(s, x),
                        n_filters=filters,
                        filter_size=filter_size,
                        padding=padding,
                        stride=stride,
                        outdir=None)
         f1 = function(inputs=[x],
                       outputs=conv1.get_outputs().shape,
                       allow_input_downcast=True)
         x1 = np.ones((100, 15, 94))
         outs = f1(x1)
         self.compareSizes(outs=outs,
                           output_size=conv1.output_size,
                           in_size=s,
                           batches=100)
     finally:
         if 'x' in locals():
             del x
         if 'conv1' in locals():
             del conv1
         if 'f1' in locals():
             del f1
         if 'outs' in locals():
             del outs
         if 'x1' in locals():
             del x1
示例#2
0
    def testConv2DOutputSize(self):
        try:
            x = ftensor4('x')
            # batch, channels, height, width
            s = (None, 3, 25, 32)
            filters = 25
            filter_size = 5
            padding = 3
            stride = 3
            conv1 = Conv2D(inputs=(s, x), n_filters=filters, filter_size=filter_size, padding=padding, stride=stride,
                           outdir=None)
            f1 = function(inputs=[x], outputs=conv1.get_outputs().shape, allow_input_downcast=True)
            x1 = np.ones((100, 3, 25, 32))
            outs = f1(x1)
            self.compareSizes(outs=outs, output_size=conv1.output_size, in_size=s, batches=100)

        finally:
            if 'x' in locals():
                del x
            if 'conv1' in locals():
                del conv1
            if 'f1' in locals():
                del f1
            if 'outs' in locals():
                del outs
            if 'x1' in locals():
                del x1
示例#3
0
    def compile_run_fn(self):
        """
        This is a helper function to compile the f_run function for computing the model's outputs given inputs.
        Compile and set the f_run function used for `run()`.

        It sets the `self.f_run` attribute to the f_run function.

        .. note::
            The run function defaults like so::

                self.f_run = function(inputs  = raise_to_list(self.get_inputs()),
                                      outputs = self.get_outputs(),
                                      updates = self.get_updates(),
                                      name    = 'f_run')
        """
        if not hasattr(self, 'f_run'):
            log.debug("Compiling f_run...")
            t = time.time()
            self.f_run = function(inputs  = raise_to_list(self.get_inputs()),
                                  outputs = self.get_outputs(),
                                  updates = self.get_updates(),
                                  name    = 'f_run')
            log.debug("Compilation done. Took %s", make_time_units_string(time.time() - t))
        else:
            log.warn('f_run already exists!')
示例#4
0
    def compile_run_fn(self):
        """
        This is a helper function to compile the f_run function for computing the model's outputs given inputs.
        Compile and set the f_run function used for `run()`.

        It sets the `self.f_run` attribute to the f_run function.

        .. note::
            The run function defaults like so::

                self.f_run = function(inputs  = raise_to_list(self.get_inputs()),
                                      outputs = self.get_outputs(),
                                      updates = self.get_updates(),
                                      name    = 'f_run')
        """
        if not hasattr(self, 'f_run'):
            log.debug("Compiling f_run...")
            t = time.time()
            self.f_run = function(inputs=raise_to_list(self.get_inputs()),
                                  outputs=self.get_outputs(),
                                  updates=self.get_updates(),
                                  name='f_run')
            log.debug("Compilation done. Took %s",
                      make_time_units_string(time.time() - t))
        else:
            log.warn('f_run already exists!')
示例#5
0
    def generate(self, initial=None, n_steps=None):
        """
        Generate visible inputs from the model for `n_steps` and starting at recurrent hidden state `initial`.

        Parameters
        ----------
        initial : tensor
            Recurrent hidden state to start generation from.
        n_steps : int
            Number of generation steps to do.

        Returns
        -------
        tuple(array_like, array_like)
            The generated inputs and the ending recurrent hidden states.
        """
        # compile the generate function!
        if not hasattr(self, 'f_generate'):
            log.debug("compiling f_generate...")
            self.f_generate = function(inputs=[self.generate_u0, self.n_steps],
                                       outputs=[self.x_ts, self.u_t],
                                       updates=self.updates_generate)
            log.debug("compilation done!")

        initial = initial or self.u0.eval()
        n_steps = n_steps or self.generate_n_steps
        return self.f_generate(initial, n_steps)
示例#6
0
    def run(self, input):
        """
        This method will return the Prototype's output (run through the `f_run` function), given an input. The input
        comes from all unique inputs to the models in the Prototype as calculated from `get_inputs()` and the outputs
        computed similarly from `get_outputs`.

        Try to avoid re-compiling the theano function created for run - check a `hasattr(self, 'f_run')` or
        something similar first.

        Parameters
        ----------
        input: array_like
            Theano/numpy tensor-like object that is the input into the model's computation graph.

        Returns
        -------
        array_like
            Theano/numpy tensor-like object that is the output of the model's computation graph.
        """
        # make sure the input is raised to a list - we are going to splat it!
        input = raise_to_list(input)
        # first check if we already made an f_run function
        if hasattr(self, 'f_run'):
            return self.f_run(*input)
        # otherwise, compile it!
        else:
            inputs = self.get_inputs()
            outputs = self.get_outputs()
            updates = self.get_updates()
            t = time.time()
            log.info("Compiling f_run...")
            self.f_run = function(inputs=inputs, outputs=outputs, updates=updates, name="f_run")
            log.info("Compilation done! Took %s", make_time_units_string(time.time() - t))
            return self.f_run(*input)
def _compile_csl_fn():
    """
    BUG HERE, not doing properly by chains (still has the bug, I don't see it)
    This is taking too much GPU mem

    mean: N(# of chains)*K(samples per chain)*D(data dim)
    minibatch: M(# of examples)*D (data dim)
    M * N matrix where each element is LL of one example against one chain.

    This function is for computing CSL over parallel chains of minibatches.

    Returns
    -------
    theano function
        Function computing M * N matrix where each element is LL of one example against one chain.
    """
    # when means is a 3D tensor (N, K, D)
    # When there are N chains, each chain having K samples of dimension D
    log.debug('building theano fn for Bernoulli CSL')
    means = T.tensor3('chains')
    minibatch = T.matrix('inputs')

    # how many chains CSL average over
    N = 5
    # minibatch size
    M = 10
    # data dim
    D = 784
    minibatch.tag.test_value = as_floatX(numpy.random.binomial(1, 0.5, size=(M, D)))
    # chain length
    K = 100
    means.tag.test_value = as_floatX(numpy.random.uniform(size=(N, K, D)))

    # computing LL

    # the length of each chain
    sample_size = means.shape[1]

    _minibatch = minibatch.dimshuffle(0, 'x', 'x', 1)
    _means = means.dimshuffle('x', 0, 1, 2)

    A = T.log(sample_size)
    B = _minibatch * T.log(_means) + (1. - _minibatch) * T.log(1. - _means)
    C = B.sum(axis=3)
    D = log_sum_exp_theano(C, axis=2)
    E = D - A
    # G = E.mean(axis=1)
    f = function(
        inputs=[minibatch, means],
        outputs=E,
        name='CSL_independent_bernoulli_fn'
    )
    return f
def _compile_csl_fn():
    """
    BUG HERE, not doing properly by chains (still has the bug, I don't see it)
    This is taking too much GPU mem

    mean: N(# of chains)*K(samples per chain)*D(data dim)
    minibatch: M(# of examples)*D (data dim)
    M * N matrix where each element is LL of one example against one chain.

    This function is for computing CSL over parallel chains of minibatches.

    Returns
    -------
    theano function
        Function computing M * N matrix where each element is LL of one example against one chain.
    """
    # when means is a 3D tensor (N, K, D)
    # When there are N chains, each chain having K samples of dimension D
    log.debug('building theano fn for Bernoulli CSL')
    means = T.tensor3('chains')
    minibatch = T.matrix('inputs')

    # how many chains CSL average over
    N = 5
    # minibatch size
    M = 10
    # data dim
    D = 784
    minibatch.tag.test_value = as_floatX(
        numpy.random.binomial(1, 0.5, size=(M, D)))
    # chain length
    K = 100
    means.tag.test_value = as_floatX(numpy.random.uniform(size=(N, K, D)))

    # computing LL

    # the length of each chain
    sample_size = means.shape[1]

    _minibatch = minibatch.dimshuffle(0, 'x', 'x', 1)
    _means = means.dimshuffle('x', 0, 1, 2)

    A = T.log(sample_size)
    B = _minibatch * T.log(_means) + (1. - _minibatch) * T.log(1. - _means)
    C = B.sum(axis=3)
    D = log_sum_exp_theano(C, axis=2)
    E = D - A
    # G = E.mean(axis=1)
    f = function(inputs=[minibatch, means],
                 outputs=E,
                 name='CSL_independent_bernoulli_fn')
    return f
def _compile_csl_fn_v2(mu):
    """
    p(x) = sum_h p(x|h)p(h) where p(x|h) is independent Bernoulli with
    a vector mu, mu_i for dim_i

    This function is for computing CSL over minibatches (in a single chain).

    Parameters
    ----------
    mu : array_like
        mu is (N,D) numpy array

    Returns
    -------
    theano function
        Function computing the Bernoulli CSL log likelihood.
    """
    #
    log.debug('building theano fn for Bernoulli CSL')
    x = T.fmatrix('inputs')
    x.tag.test_value = as_floatX(numpy.random.uniform(size=(10, 784)))
    mu = numpy.clip(mu, 1e-10, (1 - (1e-5)))
    mu = mu[None, :, :]
    inner_1 = numpy.log(mu)
    inner_2 = numpy.log(1. - mu)

    k = mu.shape[1]
    D = mu.shape[2]

    # there are two terms in the log(p(x|mu))

    term_1 = -T.log(k)
    c = T.sum(x.dimshuffle(0, 'x', 1) * inner_1 +
              (1. - x.dimshuffle(0, 'x', 1)) * inner_2,
              axis=2)
    debug = c.sum(axis=1)
    term_2 = log_sum_exp_theano(c, axis=1)

    log_likelihood = term_1 + term_2
    f = function([x], log_likelihood, name='CSL_independent_bernoulli_fn')
    return f
def _compile_csl_fn_v2(mu):
    """
    p(x) = sum_h p(x|h)p(h) where p(x|h) is independent Bernoulli with
    a vector mu, mu_i for dim_i

    This function is for computing CSL over minibatches (in a single chain).

    Parameters
    ----------
    mu : array_like
        mu is (N,D) numpy array

    Returns
    -------
    theano function
        Function computing the Bernoulli CSL log likelihood.
    """
    #
    log.debug('building theano fn for Bernoulli CSL')
    x = T.fmatrix('inputs')
    x.tag.test_value = as_floatX(numpy.random.uniform(size=(10, 784)))
    mu = numpy.clip(mu, 1e-10, (1 - (1e-5)))
    mu = mu[None, :, :]
    inner_1 = numpy.log(mu)
    inner_2 = numpy.log(1. - mu)

    k = mu.shape[1]
    D = mu.shape[2]

    # there are two terms in the log(p(x|mu))

    term_1 = -T.log(k)
    c = T.sum(x.dimshuffle(0, 'x', 1) * inner_1 +
              (1. - x.dimshuffle(0, 'x', 1)) * inner_2,
              axis=2)
    debug = c.sum(axis=1)
    term_2 = log_sum_exp_theano(c, axis=1)

    log_likelihood = term_1 + term_2
    f = function([x], log_likelihood, name='CSL_independent_bernoulli_fn')
    return f
示例#11
0
    def run(self, input):
        """
        This method will return the model's output (run through the function), given an input. In the case that
        input_hooks or hidden_hooks are used, the function should use them appropriately and assume they are the input.

        Try to avoid re-compiling the theano function created for run - check a hasattr(self, 'f_run') or
        something similar first. I recommend creating your theano f_run in a create_computation_graph method
        to be called after the class initializes.
        ------------------

        :param input: Theano/numpy tensor-like object that is the input into the model's computation graph.
        :type input: tensor

        :return: Theano/numpy tensor-like object that is the output of the model's computation graph.
        :rtype: tensor
        """
        # set any noise switches to zero
        if len(self.get_noise_switch()) > 0:
            vals = [switch.get_value() for switch in self.get_noise_switch()]
            [switch.set_value(0.) for switch in self.get_noise_switch()]

        # check if the run function is already compiled, otherwise compile it!
        if not hasattr(self, 'f_run'):
            log.debug("Compiling f_run...")
            t = time.time()
            self.f_run = function(inputs  = raise_to_list(self.get_inputs()),
                                  outputs = self.get_outputs(),
                                  updates = self.get_updates())
            log.debug("Compilation done. Took %s", make_time_units_string(time.time() - t))

        # because we use the splat to account for multiple inputs to the function, make sure input is a list.
        input = raise_to_list(input)
        # return the results of the run function!
        output =  self.f_run(*input)

        # reset the noise switches
        if len(self.get_noise_switch()) > 0:
            [switch.set_value(val) for switch, val in zip(self.get_noise_switch(), vals)]

        return output
示例#12
0
    def generate(self, initial=None, n_steps=None):
        """
        Generate visible inputs from the model for n_steps and starting at recurrent hidden state initial

        :param initial: recurrent hidden state to start generation from
        :type initial: tensor

        :param n_steps: number of generation steps to do
        :type n_steps: int

        :return: the generated inputs and the ending recurrent hidden state
        :rtype: matrix, matrix
        """
        # compile the generate function!
        if not hasattr(self, 'f_generate'):
            self.f_generate = function(inputs=[self.generate_u0, self.n_steps],
                                       outputs=[self.v_ts, self.u_t],
                                       updates=self.updates_generate)

        initial = initial or self.u0.eval()
        n_steps = n_steps or self.generate_n_steps
        return self.f_generate(initial, n_steps)
示例#13
0
文件: rnn_rbm.py 项目: 52nlp/OpenDeep
    def generate(self, initial=None, n_steps=None):
        """
        Generate visible inputs from the model for n_steps and starting at recurrent hidden state initial

        :param initial: recurrent hidden state to start generation from
        :type initial: tensor

        :param n_steps: number of generation steps to do
        :type n_steps: int

        :return: the generated inputs and the ending recurrent hidden state
        :rtype: matrix, matrix
        """
        # compile the generate function!
        if not hasattr(self, 'f_generate'):
            self.f_generate = function(inputs=[self. generate_u0, self.n_steps],
                                       outputs=[self.v_ts, self.u_t],
                                       updates=self.updates_generate)

        initial = initial or self.u0.eval()
        n_steps = n_steps or self.generate_n_steps
        return self.f_generate(initial, n_steps)
示例#14
0
    def run(self, input):
        """
        This method will return the Prototype's output (run through the `f_run` function), given an input. The input
        comes from all unique inputs to the models in the Prototype as calculated from `get_inputs()` and the outputs
        computed similarly from `get_outputs`.

        Try to avoid re-compiling the theano function created for run - check a `hasattr(self, 'f_run')` or
        something similar first.

        Parameters
        ----------
        input: array_like
            Theano/numpy tensor-like object that is the input into the model's computation graph.

        Returns
        -------
        array_like
            Theano/numpy tensor-like object that is the output of the model's computation graph.
        """
        # make sure the input is raised to a list - we are going to splat it!
        input = raise_to_list(input)
        # first check if we already made an f_run function
        if hasattr(self, 'f_run'):
            return self.f_run(*input)
        # otherwise, compile it!
        else:
            inputs = self.get_inputs()
            outputs = self.get_outputs()
            updates = self.get_updates()
            t = time.time()
            log.info("Compiling f_run...")
            self.f_run = function(inputs=inputs,
                                  outputs=outputs,
                                  updates=updates,
                                  name="f_run")
            log.info("Compilation done! Took %s",
                     make_time_units_string(time.time() - t))
            return self.f_run(*input)
示例#15
0
    def train(self, monitor_channels=None, train_outservice=None, plot=None, continue_training=False):
        """
        This method performs the training!!!
        It is an online training method that goes over minibatches from the dataset for a number of epochs,
        updating parameters after each minibatch.

        You can disrupt training with a KeyBoardInterrupt and it should exit/save parameters gracefully.

        Parameters
        ----------
        monitor_channels : list(MonitorsChannel or Monitor), optional
            The list of channels or monitors containing monitor expressions/variables to compile and evaluate
            on the data.
        train_outservice : OutService, optional
            The OutService to use for the automatically created train_cost monitor. Default of None just outputs
            to logs.
        plot : Plot, optional
            The Plot object to use if we want to graph the outputs (uses bokeh server).
        continue_training : bool
            Whether to continue training from a previous point.
        """
        ###############################################
        # theano index variable to use on the dataset #
        ###############################################
        # index to a [mini]batch - both start and end
        data_idx = T.iscalar('data_index')
        data_end_idx = T.iscalar('data_end_index')
        function_input = [data_idx, data_end_idx]
        batch_slice = slice(data_idx, data_end_idx)

        # compute number of minibatches for training, validation and testing
        # shapes is list of list - input list of datasets to optimizer (for multiple inputs), and each dataset
        # could be a list of shared variables (like multiple sequences from files)
        train_data_shapes = raise_to_list(self.dataset.getDataShape(TRAIN))
        valid_data_shapes = raise_to_list(self.dataset.getDataShape(VALID))
        test_data_shapes = raise_to_list(self.dataset.getDataShape(TEST))

        # train_batches is going to be lists of tuples that contain the start and end indices for train data.
        # this is more useful in the case of datasets that are lists of sequences, so that the start and end
        # indices can make sure a batch does not cross the sequence boundary on the concatenated data
        train_data_lens = [shape[0] for shape in train_data_shapes]
        self.train_batches = self._get_batch_indices(train_data_lens)

        if valid_data_shapes is not None:
            valid_data_lens = [shape[0] for shape in valid_data_shapes]
            self.valid_batches = self._get_batch_indices(valid_data_lens)
        else:
            self.valid_batches = None
        if test_data_shapes is not None:
            test_data_lens = [shape[0] for shape in test_data_shapes]
            self.test_batches = self._get_batch_indices(test_data_lens)
        else:
            self.test_batches = None

        # create the givens for the input function as pairs of (input_variable: sliced_data)
        train_givens = self._get_givens_subset(TRAIN, batch_slice)
        valid_givens = self._get_givens_subset(VALID, batch_slice)
        test_givens = self._get_givens_subset(TEST, batch_slice)

        # Now time to create the gradient updates for the model - make sure to handle the possible
        # list of costs used for pretraining of certain parts of the model.
        train_costs = raise_to_list(self.model.get_train_cost())
        train_updates = []
        self.gradients = []
        for i, train_cost in enumerate(train_costs):
            # Now create the training cost function for the model to use while training - update parameters
            # gradient!
            gradients, _ = self.model.get_gradient(cost=train_cost)
            self.gradients.append(gradients)

            # Calculate the optimizer updates each run
            # This is where the magic happens for a lot of sub-implementations of SGD!
            # It tells how to update the params each training epoch
            gradient_updates = self.get_updates(gradients)

            # Combine the updates from the model also if applicable
            updates = self.model.get_updates()
            if updates:
                updates.update(gradient_updates)
            else:
                updates = gradient_updates
            train_updates.append(updates)

        # grab the model parameters to use during training
        self.params = self.model.get_params()
        log.info("%s params: %s", str(type(self.model)), str(self.params))

        # deal with the monitor channels if they were given (or take them from the plot)
        if monitor_channels is None and plot is not None and len(plot.channels) > 0:
            monitor_channels = plot.channels
        self.train_monitors_dict = {}
        self.valid_monitors_dict = {}
        self.test_monitors_dict = {}
        self.train_monitors_outservice_dict = {}
        self.valid_monitors_outservice_dict = {}
        self.test_monitors_outservice_dict = {}
        if monitor_channels:
            # collapse the appropriate monitors into their (name, expression, out_service) tuples
            train_collapsed = collapse_channels(monitor_channels, train=True)
            valid_collapsed = collapse_channels(monitor_channels, valid=True)
            test_collapsed  = collapse_channels(monitor_channels, test=True)
            # get name: expression dictionary
            self.train_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in train_collapsed])
            self.valid_monitors_dict = OrderedDict([(name, expression) for name, expression, _ in valid_collapsed])
            self.test_monitors_dict  = OrderedDict([(name, expression) for name, expression, _ in test_collapsed])
            # get name: outservice dictionary
            self.train_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in train_collapsed])
            self.valid_monitors_outservice_dict = OrderedDict([(name, out) for name, _, out in valid_collapsed])
            self.test_monitors_outservice_dict  = OrderedDict([(name, out) for name, _, out in test_collapsed])
        # finally deal with an outservice provided to monitor training cost
        self.train_outservice = train_outservice
        # remove redundant files made by the fileservice for the train monitor.
        # TODO: THIS FEELS LIKE A HACK. I don't like it.
        if isinstance(self.train_outservice, FileService):
            os.remove(self.train_outservice.valid_filename)
            os.remove(self.train_outservice.test_filename)

        #######################################
        # compile train and monitor functions #
        #######################################
        train_functions = []
        for i in range(len(train_costs)):
            updates = train_updates[i]
            train_cost = train_costs[i]
            # Compile the training function!
            log.info('Compiling f_learn %d/%d function for model %s...', i + 1, len(train_updates),
                     str(type(self.model)))
            t = time.time()

            f_learn = function(inputs=function_input,
                               updates=updates,
                               outputs=[train_cost] + self.train_monitors_dict.values(),
                               givens=train_givens,
                               name='f_learn_%d' % i)

            log.info('f_learn compilation took %s', make_time_units_string(time.time() - t))
            train_functions.append(f_learn)

        # figure out if we want valid and test
        self.valid_flag = (self.dataset.getSubset(VALID)[0] is not None) and (len(self.valid_monitors_dict) > 0)
        self.test_flag = (self.dataset.getSubset(TEST)[0] is not None) and (len(self.test_monitors_dict) > 0)
        # Now compile the monitor functions!
        log.debug("Compiling monitor functions...")
        monitor_t = time.time()
        # valid monitors
        if self.valid_flag:
            self.valid_monitor_function = function(
                inputs=function_input,
                updates=self.model.get_updates(),
                outputs=self.valid_monitors_dict.values(),
                givens=valid_givens,
                name='valid_monitor_function'
            )
        else:
            self.valid_monitor_function = None

        # test monitors
        if self.test_flag:
            self.test_monitor_function = function(
                inputs=function_input,
                updates=self.model.get_updates(),
                outputs=self.test_monitors_dict.values(),
                givens=test_givens,
                name='test_monitor_function'
            )
        else:
            self.test_monitor_function = None

        log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t))

        ##################
        # start training #
        ##################
        # make sure to deal with a list of train_cost functions - for layer-wise pretraining!
        # this list of training functions was created during __init__()
        start_time = time.time()
        for func_i, train_function in enumerate(train_functions):
            log.info("-----------TRAINING %s function %d/%d FOR %d EPOCHS (continue_training=%s)-----------",
                     str(type(self.model)), func_i + 1, len(train_functions), self.n_epoch, str(continue_training))

            log.debug("Train dataset size is: %s", self.dataset.getDataShape(TRAIN))
            if self.dataset.getSubset(VALID)[0] is not None:
                log.debug("Valid dataset size is: %s", self.dataset.getDataShape(VALID))
            if self.dataset.getSubset(TEST)[0] is not None:
                log.debug("Test dataset size is: %s", self.dataset.getDataShape(TEST))

            self.STOP = False
            self.epoch_counter = 0
            if not continue_training:
                # reset any decay params
                for decay_param in self.get_decay_params():
                    decay_param.reset()

            self.times = []
            self.best_cost = numpy.inf
            self.best_params = None
            self.patience = 0

            t = time.time()

            while not self.STOP:
                try:
                    self.STOP = self._perform_one_epoch(train_function, plot)
                except KeyboardInterrupt:
                    log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT")
                    self.STOP = True

            # save params
            if self.best_params is not None:
                log.debug("Restoring best model parameters...")
                set_shared_values(self.params, self.best_params)
            log.debug("Saving model parameters...")
            self.model.save_params('trained_epoch_' + str(self.epoch_counter) + '.pkl')

            log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t))

        log.info("------------TOTAL %s TRAIN TIME TOOK %s---------",
                 str(type(self.model)), make_time_units_string(time.time() - start_time))
示例#16
0
    def __init__(self,
                 model,
                 dataset,
                 iterator_class=SequentialIterator,
                 config=None,
                 defaults=_defaults,
                 rng=None,
                 n_epoch=None,
                 batch_size=None,
                 minimum_batch_size=None,
                 save_frequency=None,
                 early_stop_threshold=None,
                 early_stop_length=None,
                 learning_rate=None,
                 lr_decay=None,
                 lr_factor=None,
                 momentum=None,
                 momentum_decay=None,
                 momentum_factor=None,
                 nesterov_momentum=None,
                 flag_para_load=None):
        # superclass init
        super(SGD, self).__init__(config=config, defaults=defaults)
        # config and defaults are now combined in self.args! yay!

        self.model = model
        self.dataset = dataset
        self.iterator = iterator_class

        # Training epochs - how many times to iterate over the whole dataset
        self.n_epoch = n_epoch or self.args.get('n_epoch')

        # Dataset iteration batch sizes - number of examples in each calculation
        self.batch_size = batch_size or self.args.get('batch_size')
        self.minimum_batch_size = minimum_batch_size or self.args.get(
            'minimum_batch_size')

        # Number of epochs between saving model parameters
        self.save_frequency = save_frequency or self.args.get('save_frequency')

        # Early stopping threshold and patience - by how much does the cost have to improve over a number of epochs
        self.early_stop_threshold = early_stop_threshold or self.args.get(
            'early_stop_threshold')
        self.early_stop_length = early_stop_length or self.args.get(
            'early_stop_length')

        # Learning rate - how drastic of a step do the parameters change
        lr = learning_rate or self.args.get('learning_rate')
        self.learning_rate = sharedX(lr, 'learning_rate')
        self.lr_scalers = self.model.get_lr_scalers()
        if lr_decay or self.args.get('lr_decay'):
            self.learning_rate_decay = get_decay_function(
                lr_decay or self.args.get('lr_decay'), self.learning_rate,
                self.learning_rate.get_value(), lr_factor
                or self.args.get('lr_factor'))

        # Momentum - smoothing over the parameter changes (see Hinton)
        self.momentum = sharedX(momentum or self.args.get('momentum'),
                                'momentum')
        if self.args.get('momentum_decay'):
            self.momentum_decay = get_decay_function(
                momentum_decay or self.args.get('momentum_decay'),
                self.momentum, self.momentum.get_value(), momentum_factor
                or self.args.get('momentum_factor'))
        self.nesterov_momentum = nesterov_momentum or self.args.get(
            'nesterov_momentum')

        # RNG for working on random iterator
        if rng is None:
            random.seed(123)
            self.rng = random
        else:
            self.rng = rng

        self.params = self.model.get_params()

        # Now create the training cost function for the model to use while training - update parameters
        log.info("%s params: %s", str(type(self.model)), str(self.params))
        # gradient!
        gradient = grad(self.model.get_train_cost(), self.params)
        grads = OrderedDict(zip(self.params, gradient))

        # Calculate the optimizer updates each run
        # This is where the magic happens for a lot of sub-implementations of SGD, including AdaDelta!
        # It tells how to update the params each training epoch
        gradient_updates = self.get_updates(grads)

        # Combine the updates from the model also if applicable
        train_updates = model.get_updates()
        if train_updates:
            train_updates.update(gradient_updates)
        else:
            train_updates = gradient_updates

        # Compile the training function!
        log.info('Compiling f_learn function for model %s...',
                 str(type(self.model)))
        t = time.time()
        self.f_learn = function(inputs=model.get_inputs(),
                                updates=train_updates,
                                outputs=self.model.get_train_cost(),
                                name='f_learn')
        log.info('f_learn compilation took %s',
                 make_time_units_string(time.time() - t))

        # Determine if this function is unsupervised or not by looking at the number of inputs to the f_learn function.
        # If there is only one input, it is unsupervised, otherwise, it is supervised.
        # This workaround was provided by Pascal Lamblin on the theano-users google group
        num_inputs = len(
            [i for i in self.f_learn.maker.inputs if not i.shared])
        if num_inputs == 1:
            log.debug("Model is unsupervised: 1 input to f_learn.")
            self.unsupervised = True
        elif num_inputs == 2:
            log.debug("Model is supervised: 2 inputs to f_learn.")
            self.unsupervised = False
        else:
            log.error(
                "Number of inputs to f_learn on model %s was %s. Needs to be 1 for unsupervised or 2 for supervised.",
                str(type(self.model)), str(num_inputs))
            raise AssertionError(
                "Number of inputs to f_learn on model %s was %s. Needs to be 1 for unsupervised or 2 for supervised."
                % str(type(self.model)), str(num_inputs))

        # grab the function(s) to use to monitor different model values during training
        self.monitors = self.model.get_monitors()
示例#17
0
    def __init__(self,
                 config=None,
                 defaults=_defaults,
                 inputs_hook=None,
                 hiddens_hook=None,
                 params_hook=None,
                 input_size=None,
                 hidden_size=None,
                 corruption_level=None,
                 hidden_activation=None,
                 visible_activation=None,
                 cost_function=None):
        # Now, initialize with Model class to combine config and defaults!
        # Here, defaults is defined via a dictionary. However, you could also
        # pass a filename to a JSON or YAML file with the same format.
        super(DenoisingAutoencoder, self).__init__(config=config,
                                                   defaults=defaults)
        # Any parameter from the 'config' will overwrite the 'defaults' dictionary.
        # These parameters are now accessible from the 'self.args' variable!

        # When accessing model parameters, it is best practice to try to find the parameters
        # explicitly passed first, and then go to the 'self.args' configuration.

        # Define model hyperparameters
        # deal with the inputs_hook and hiddens_hook for the size parameters!
        # if the hook exists, grab the size from the first element of the tuple.
        if inputs_hook:
            input_size = inputs_hook[0]
        # otherwise, grab the size from the configurations.
        else:
            input_size = input_size or self.args.get('input_size')
        if hiddens_hook:
            hidden_size = hiddens_hook[0]
        else:
            hidden_size = hidden_size or self.args.get('hidden_size')

        corruption_level = corruption_level or self.args.get(
            'corruption_level')

        # use the helper methods to grab appropriate activation functions from names!
        hidden_act_name = hidden_activation or self.args.get(
            'hidden_activation')
        hidden_activation = get_activation_function(hidden_act_name)
        visible_act_name = visible_activation or self.args.get(
            'visible_activation')
        visible_activation = get_activation_function(visible_act_name)

        # do the same for the cost function
        cost_func_name = cost_function or self.args.get('cost_function')
        cost_function = get_cost_function(cost_func_name)

        # Now, define the symbolic input to the model (Theano)
        # We use a matrix rather than a vector so that minibatch processing can be done in parallel.
        # Make sure to deal with 'inputs_hook' if it exists!
        if inputs_hook:
            # grab the new input variable from the inputs_hook tuple
            x = inputs_hook[1]
        else:
            x = T.fmatrix("X")
        self.inputs = [x]

        # Build the model's parameters - a weight matrix and two bias vectors
        # Make sure to deal with 'params_hook' if it exists!
        if params_hook:
            # check to see if it contains the three necessary variables
            assert len(params_hook
                       ) == 3, "Not correct number of params to DAE, needs 3!"
            W, b0, b1 = params_hook
        else:
            W = get_weights_uniform(shape=(input_size, hidden_size), name="W")
            b0 = get_bias(shape=input_size, name="b0")
            b1 = get_bias(shape=hidden_size, name="b1")
        self.params = [W, b0, b1]

        # Perform the computation for a denoising autoencoder!
        # first, add noise (corrupt) the input
        corrupted_input = salt_and_pepper(input=x,
                                          corruption_level=corruption_level)
        # next, compute the hidden layer given the inputs (the encoding function)
        # We don't need to worry about hiddens_hook during training, because we can't
        # compute a cost without having the input!
        # hiddens_hook is more for the predict function and linking methods below.
        hiddens = hidden_activation(T.dot(corrupted_input, W) + b1)
        # finally, create the reconstruction from the hidden layer (we tie the weights with W.T)
        reconstruction = visible_activation(T.dot(hiddens, W.T) + b0)
        # the training cost is reconstruction error
        self.train_cost = cost_function(output=reconstruction, target=x)

        # Compile everything into a Theano function for prediction!
        # When using real-world data in predictions, we wouldn't corrupt the input first.
        # Therefore, create another version of the hiddens and reconstruction without adding the noise.
        # Here is where we would handle hiddens_hook because this is a generative model!
        # For the predict function, it would take in the hiddens instead of the input variable x.
        if hiddens_hook:
            self.hiddens = hiddens_hook[1]
        else:
            self.hiddens = hidden_activation(T.dot(x, W) + b1)
        # make the reconstruction (generated) from the hiddens
        self.recon_predict = visible_activation(T.dot(self.hiddens, W.T) + b0)
        # now compile the predict function accordingly - if it used x or hiddens as the input.
        if hiddens_hook:
            self.f_predict = function(inputs=[self.hiddens],
                                      outputs=self.recon_predict)
        else:
            self.f_predict = function(inputs=[x], outputs=self.recon_predict)
    def __init__(self, inputs_hook=None, hiddens_hook=None, params_hook=None,
                 input_size=28*28, hidden_size=1000, noise_level=0.4,
                 hidden_activation='tanh', visible_activation='sigmoid', cost_function='binary_crossentropy'):
        # initialize the Model superclass
        super(DenoisingAutoencoder, self).__init__(
            **{arg: val for (arg, val) in locals().iteritems() if arg is not 'self'}
        )

        # Define model hyperparameters
        # deal with the inputs_hook and hiddens_hook for the size parameters!
        # if the hook exists, grab the size from the first element of the tuple.
        if self.inputs_hook is not None:
            assert len(self.inputs_hook) == 2, "Was expecting inputs_hook to be a tuple."
            self.input_size = inputs_hook[0]

        if self.hiddens_hook is not None:
            assert len(self.hiddens_hook) == 2, "was expecting hiddens_hook to be a tuple."
            hidden_size = hiddens_hook[0]


        # use the helper methods to grab appropriate activation functions from names!
        hidden_activation  = get_activation_function(hidden_activation)
        visible_activation = get_activation_function(visible_activation)

        # do the same for the cost function
        cost_function = get_cost_function(cost_function)

        # Now, define the symbolic input to the model (Theano)
        # We use a matrix rather than a vector so that minibatch processing can be done in parallel.
        # Make sure to deal with 'inputs_hook' if it exists!
        if self.inputs_hook is not None:
            # grab the new input variable from the inputs_hook tuple
            x = self.inputs_hook[1]
        else:
            x = T.matrix("X")
        self.inputs = [x]

        # Build the model's parameters - a weight matrix and two bias vectors
        # Make sure to deal with 'params_hook' if it exists!
        if self.params_hook:
            # check to see if it contains the three necessary variables
            assert len(self.params_hook) == 3, "Not correct number of params to DAE, needs 3!"
            W, b0, b1 = self.params_hook
        else:
            W  = get_weights_uniform(shape=(self.input_size, hidden_size), name="W")
            b0 = get_bias(shape=self.input_size, name="b0")
            b1 = get_bias(shape=hidden_size, name="b1")
        self.params = [W, b0, b1]

        # Perform the computation for a denoising autoencoder!
        # first, add noise (corrupt) the input
        corrupted_input = salt_and_pepper(input=x, noise_level=noise_level)
        # next, run the hidden layer given the inputs (the encoding function)
        # We don't need to worry about hiddens_hook during training, because we can't
        # run a cost without having the input!
        # hiddens_hook is more for the run function and linking methods below.
        hiddens = hidden_activation(T.dot(corrupted_input, W) + b1)
        # finally, create the reconstruction from the hidden layer (we tie the weights with W.T)
        reconstruction = visible_activation(T.dot(hiddens, W.T) + b0)
        # the training cost is reconstruction error
        self.train_cost = cost_function(output=reconstruction, target=x)

        # Compile everything into a Theano function for prediction!
        # When using real-world data in predictions, we wouldn't corrupt the input first.
        # Therefore, create another version of the hiddens and reconstruction without adding the noise.
        # Here is where we would handle hiddens_hook because this is a generative model!
        # For the run function, it would take in the hiddens instead of the input variable x.
        if self.hiddens_hook is not None:
            self.hiddens = self.hiddens_hook[1]
        else:
            self.hiddens = hidden_activation(T.dot(x, W) + b1)
        # make the reconstruction (generated) from the hiddens
        self.recon_predict = visible_activation(T.dot(self.hiddens, W.T) + b0)
        # now compile the run function accordingly - if it used x or hiddens as the input.
        if self.hiddens_hook is not None:
            self.f_run = function(inputs=[self.hiddens], outputs=self.recon_predict)
        else:
            self.f_run = function(inputs=[x], outputs=self.recon_predict)
示例#19
0
    def _build_computation_graph(self):
        ###################### BUILD NETWORK ##########################
        # whether or not to mirror the input images before feeding them into the network
        if self.flag_datalayer:
            layer_1_input = mirror_images(
                input=self.x,
                image_shape=(self.batch_size, 3, 256, 256),  # bc01 format
                cropsize=227,
                rand=self.rand,
                flag_rand=self.rand_crop)
        else:
            layer_1_input = self.x  # 4D tensor (going to be in bc01 format)

        # Start with 5 convolutional pooling layers
        log.debug("convpool layer 1...")
        convpool_layer1 = ConvPoolLayer(inputs_hook=((self.batch_size, 3, 227,
                                                      227), layer_1_input),
                                        filter_shape=(96, 3, 11, 11),
                                        convstride=4,
                                        padsize=0,
                                        group=1,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        local_response_normalization=True)
        # Add this layer's parameters!
        self.params += convpool_layer1.get_params()

        log.debug("convpool layer 2...")
        convpool_layer2 = ConvPoolLayer(inputs_hook=((
            self.batch_size,
            96,
            27,
            27,
        ), convpool_layer1.get_outputs()),
                                        filter_shape=(256, 96, 5, 5),
                                        convstride=1,
                                        padsize=2,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.1,
                                        local_response_normalization=True)
        # Add this layer's parameters!
        self.params += convpool_layer2.get_params()

        log.debug("convpool layer 3...")
        convpool_layer3 = ConvPoolLayer(
            inputs_hook=((self.batch_size, 256, 13, 13),
                         convpool_layer2.get_outputs()),
            filter_shape=(384, 256, 3, 3),
            convstride=1,
            padsize=1,
            group=1,
            poolsize=1,
            poolstride=0,
            bias_init=0.0,
            local_response_normalization=False)
        # Add this layer's parameters!
        self.params += convpool_layer3.get_params()

        log.debug("convpool layer 4...")
        convpool_layer4 = ConvPoolLayer(
            inputs_hook=((self.batch_size, 384, 13, 13),
                         convpool_layer3.get_outputs()),
            filter_shape=(384, 384, 3, 3),
            convstride=1,
            padsize=1,
            group=2,
            poolsize=1,
            poolstride=0,
            bias_init=0.1,
            local_response_normalization=False)
        # Add this layer's parameters!
        self.params += convpool_layer4.get_params()

        log.debug("convpool layer 5...")
        convpool_layer5 = ConvPoolLayer(
            inputs_hook=((self.batch_size, 384, 13, 13),
                         convpool_layer4.get_outputs()),
            filter_shape=(256, 384, 3, 3),
            convstride=1,
            padsize=1,
            group=2,
            poolsize=3,
            poolstride=2,
            bias_init=0.0,
            local_response_normalization=False)
        # Add this layer's parameters!
        self.params += convpool_layer5.get_params()

        # Now onto the fully-connected layers!
        fc_config = {
            'activation':
            'rectifier',  # type of activation function to use for output
            'weights_init':
            'gaussian',  # either 'gaussian' or 'uniform' - how to initialize weights
            'weights_mean': 0.0,  # mean for gaussian weights init
            'weights_std':
            0.005,  # standard deviation for gaussian weights init
            'bias_init': 0.0  # how to initialize the bias parameter
        }
        log.debug("fully connected layer 1 (model layer 6)...")
        # we want to have dropout applied to the training version, but not the test version.
        fc_layer6_input = T.flatten(convpool_layer5.get_outputs(), 2)
        fc_layer6 = BasicLayer(inputs_hook=(9216, fc_layer6_input),
                               output_size=4096,
                               noise='dropout',
                               noise_level=0.5,
                               **fc_config)
        # Add this layer's parameters!
        self.params += fc_layer6.get_params()
        # Add the dropout noise switch
        self.noise_switches += fc_layer6.get_noise_switch()

        log.debug("fully connected layer 2 (model layer 7)...")
        fc_layer7 = BasicLayer(inputs_hook=(4096, fc_layer6.get_outputs()),
                               output_size=4096,
                               noise='dropout',
                               noise_level=0.5,
                               **fc_config)

        # Add this layer's parameters!
        self.params += fc_layer7.get_params()
        # Add the dropout noise switch
        self.noise_switches += fc_layer7.get_noise_switch()

        # last layer is a softmax prediction output layer
        softmax_config = {
            'weights_init': 'gaussian',
            'weights_mean': 0.0,
            'weights_std': 0.005,
            'bias_init': 0.0
        }
        log.debug("softmax classification layer (model layer 8)...")
        softmax_layer8 = SoftmaxLayer(inputs_hook=(4096,
                                                   fc_layer7.get_outputs()),
                                      output_size=1000,
                                      **softmax_config)

        # Add this layer's parameters!
        self.params += softmax_layer8.get_params()

        # finally the softmax output from the whole thing!
        self.output = softmax_layer8.get_outputs()
        self.targets = softmax_layer8.get_targets()

        #####################
        # Cost and monitors #
        #####################
        self.train_cost = softmax_layer8.negative_log_likelihood()
        cost = softmax_layer8.negative_log_likelihood()
        errors = softmax_layer8.errors()
        train_errors = softmax_layer8.errors()

        self.monitors = OrderedDict([('cost', cost), ('errors', errors),
                                     ('dropout_errors', train_errors)])

        #########################
        # Compile the functions #
        #########################
        log.debug("Compiling functions!")
        t = time.time()
        log.debug("f_run...")
        # use the actual argmax from the classification
        self.f_run = function(inputs=[self.x],
                              outputs=softmax_layer8.get_argmax_prediction())
        log.debug("compilation took %s",
                  make_time_units_string(time.time() - t))
示例#20
0
    def __init__(self,
                 inputs_hook=None,
                 hiddens_hook=None,
                 params_hook=None,
                 input_size=28 * 28,
                 hidden_size=1000,
                 noise_level=0.4,
                 hidden_activation='tanh',
                 visible_activation='sigmoid',
                 cost_function='binary_crossentropy'):
        # initialize the Model superclass
        super(DenoisingAutoencoder, self).__init__(**{
            arg: val
            for (arg, val) in locals().iteritems() if arg is not 'self'
        })

        # Define model hyperparameters
        # deal with the inputs_hook and hiddens_hook for the size parameters!
        # if the hook exists, grab the size from the first element of the tuple.
        if self.inputs_hook is not None:
            assert len(self.inputs_hook
                       ) == 2, "Was expecting inputs_hook to be a tuple."
            self.input_size = inputs_hook[0]

        if self.hiddens_hook is not None:
            assert len(self.hiddens_hook
                       ) == 2, "was expecting hiddens_hook to be a tuple."
            hidden_size = hiddens_hook[0]

        # use the helper methods to grab appropriate activation functions from names!
        hidden_activation = get_activation_function(hidden_activation)
        visible_activation = get_activation_function(visible_activation)

        # do the same for the cost function
        cost_function = get_cost_function(cost_function)

        # Now, define the symbolic input to the model (Theano)
        # We use a matrix rather than a vector so that minibatch processing can be done in parallel.
        # Make sure to deal with 'inputs_hook' if it exists!
        if self.inputs_hook is not None:
            # grab the new input variable from the inputs_hook tuple
            x = self.inputs_hook[1]
        else:
            x = T.matrix("X")
        self.inputs = [x]

        # Build the model's parameters - a weight matrix and two bias vectors
        # Make sure to deal with 'params_hook' if it exists!
        if self.params_hook:
            # check to see if it contains the three necessary variables
            assert len(self.params_hook
                       ) == 3, "Not correct number of params to DAE, needs 3!"
            W, b0, b1 = self.params_hook
        else:
            W = get_weights_uniform(shape=(self.input_size, hidden_size),
                                    name="W")
            b0 = get_bias(shape=self.input_size, name="b0")
            b1 = get_bias(shape=hidden_size, name="b1")
        self.params = [W, b0, b1]

        # Perform the computation for a denoising autoencoder!
        # first, add noise (corrupt) the input
        corrupted_input = salt_and_pepper(input=x, noise_level=noise_level)
        # next, run the hidden layer given the inputs (the encoding function)
        # We don't need to worry about hiddens_hook during training, because we can't
        # run a cost without having the input!
        # hiddens_hook is more for the run function and linking methods below.
        hiddens = hidden_activation(T.dot(corrupted_input, W) + b1)
        # finally, create the reconstruction from the hidden layer (we tie the weights with W.T)
        reconstruction = visible_activation(T.dot(hiddens, W.T) + b0)
        # the training cost is reconstruction error
        self.train_cost = cost_function(output=reconstruction, target=x)

        # Compile everything into a Theano function for prediction!
        # When using real-world data in predictions, we wouldn't corrupt the input first.
        # Therefore, create another version of the hiddens and reconstruction without adding the noise.
        # Here is where we would handle hiddens_hook because this is a generative model!
        # For the run function, it would take in the hiddens instead of the input variable x.
        if self.hiddens_hook is not None:
            self.hiddens = self.hiddens_hook[1]
        else:
            self.hiddens = hidden_activation(T.dot(x, W) + b1)
        # make the reconstruction (generated) from the hiddens
        self.recon_predict = visible_activation(T.dot(self.hiddens, W.T) + b0)
        # now compile the run function accordingly - if it used x or hiddens as the input.
        if self.hiddens_hook is not None:
            self.f_run = function(inputs=[self.hiddens],
                                  outputs=self.recon_predict)
        else:
            self.f_run = function(inputs=[x], outputs=self.recon_predict)
    def build_computation_graph(self):
        #################
        # Build the GSN #
        #################
        log.debug("Building GSN graphs...")

        # GSN for training - with noise specified in initialization
        # if there is no hiddens_hook, build the GSN normally using the input X
        if not self.hiddens_flag:
            p_X_chain, _ = self.build_gsn(add_noise=self.add_noise)

        # if there is a hiddens_hook, we want to change the order layers are updated and make this purely
        # generative from the hiddens
        else:
            p_X_chain, _,  = self.build_gsn(hiddens=self.hiddens, add_noise=self.add_noise, reverse=True)

        # GSN for prediction - same as above but no noise
        # deal with hiddens_hook exactly as above.
        if not self.hiddens_flag:
            p_X_chain_recon, recon_hiddens = self.build_gsn(add_noise=False)
        else:
            p_X_chain_recon, recon_hiddens = self.build_gsn(hiddens=self.hiddens, add_noise=False, reverse=True)

        ####################
        # Costs and output #
        ####################
        log.debug('Cost w.r.t p(X|...) at every step in the graph for the GSN')
        # use the noisy ones for training cost
        costs          = [self.cost_function(output=rX, target=self.X, **self.cost_args) for rX in p_X_chain]
        self.show_cost = costs[-1]  # for a monitor to show progress
        cost           = numpy.sum(costs)  # THIS IS THE TRAINING COST - RECONSTRUCTION OF OUTPUT FROM NOISY GRAPH

        # use the non-noisy graph for prediction
        gsn_costs_recon = [self.cost_function(output=rX, target=self.X, **self.cost_args) for rX in p_X_chain_recon]
        # another monitor, same as self.show_cost but on the non-noisy graph.
        self.monitor    = gsn_costs_recon[-1]
        # this should be considered the main output of the computation, the sample after the
        # last walkback from the non-noisy graph.
        output     = p_X_chain_recon[-1]
        # these should be considered the model's hidden representation - the hidden representation after
        # the last walkback from the non-noisy graph.
        hiddens    = recon_hiddens

        train_mse = T.mean(T.sqr(p_X_chain[-1] - self.X), axis=0)
        train_mse = T.mean(train_mse)

        mse = T.mean(T.sqr(p_X_chain_recon[-1] - self.X), axis=0)
        mse = T.mean(mse)

        monitors = OrderedDict([('noisy_recon_cost', self.show_cost),
                                ('recon_cost', self.monitor),
                                ('mse', mse),
                                ('train_mse', train_mse)])

        ############
        # Sampling #
        ############
        # the input to the sampling function
        X_sample = T.matrix("X_sampling")
        self.network_state_input = [X_sample] + [T.matrix("H_sampling_"+str(i+1)) for i in range(self.layers)]
       
        # "Output" state of the network (noisy)
        # initialized with input, then we apply updates
        self.network_state_output = [X_sample] + self.network_state_input[1:]
        visible_pX_chain = []
    
        # ONE update
        log.debug("Performing one walkback in network state sampling.")
        self.update_layers(self.network_state_output, visible_pX_chain, add_noise=True, reverse=False)

        #####################################################
        #     Create the run and monitor functions      #
        #####################################################
        log.debug("Compiling functions...")
        t = time.time()

        # doesn't make sense to have this if there is a hiddens_hook
        if not self.hiddens_flag:
            # THIS IS THE MAIN PREDICT FUNCTION - takes in a real matrix and produces the output from the non-noisy
            # computation graph
            log.debug("f_run...")
            self.f_run = function(inputs  = [self.X],
                                  outputs = output,
                                  name    = 'gsn_f_run')

        # this is a helper function - it corrupts inputs when testing the non-noisy graph (aka before feeding the
        # input to f_run)
        log.debug("f_noise...")
        self.f_noise = function(inputs  = [self.X],
                                outputs = self.input_noise(self.X),
                                name    = 'gsn_f_noise')

        # the sampling function, for creating lots of samples from the computational graph. (mostly for log-likelihood
        # or visualization)
        log.debug("f_sample...")
        if self.layers == 1: 
            self.f_sample = function(inputs  = [X_sample],
                                     outputs = visible_pX_chain[-1],
                                     name    = 'gsn_f_sample_single_layer')
        else:
            # WHY IS THERE A WARNING????
            # because the first odd layers are not used -> directly computed FROM THE EVEN layers
            # unused input = warn
            self.f_sample = function(inputs  = self.network_state_input,
                                     outputs = self.network_state_output + visible_pX_chain,
                                     name    = 'gsn_f_sample')

        log.debug("GSN compiling done. Took %s", make_time_units_string(time.time() - t))

        return cost, monitors, output, hiddens
示例#22
0
    def train(self, continue_training=False):
        """
        This method performs the training!!!
        :param continue_training:
        :type continue_training:
        :return:
        :rtype:
        """
        # grab the model parameters to use during training
        self.params = self.model.get_params()
        log.info("%s params: %s", str(type(self.model)), str(self.params))

        ###############################################
        # theano index variable to use on the dataset #
        ###############################################
        # index to a [mini]batch - both start and end
        data_idx = T.iscalar('data_index')
        data_end_idx = T.iscalar('data_end_index')
        batch_slice = slice(data_idx, data_end_idx)

        # compute number of minibatches for training, validation and testing
        # shapes is list of list - input list of datasets to optimizer (for multiple inputs), and each dataset
        # could be a list of shared variables (like multiple sequences from files)
        train_data_shapes = raise_to_list(self.dataset.getDataShape(TRAIN))
        valid_data_shapes = raise_to_list(self.dataset.getDataShape(VALID))
        test_data_shapes  = raise_to_list(self.dataset.getDataShape(TEST))

        # train_batches is going to be lists of tuples that contain the start and end indices for train data
        train_data_lens = [shape[0] for shape in train_data_shapes]
        self.train_batches = self.get_batch_indices(train_data_lens)

        if valid_data_shapes is not None:
            valid_data_lens = [shape[0] for shape in valid_data_shapes]
            self.valid_batches = self.get_batch_indices(valid_data_lens)
        else:
            self.valid_batches = None
        if test_data_shapes is not None:
            test_data_lens = [shape[0] for shape in test_data_shapes]
            self.test_batches = self.get_batch_indices(test_data_lens)
        else:
            self.test_batches = None

        # translate the data_idx into the givens for the model
        model_inputs = raise_to_list(self.model.get_inputs())
        model_targets = raise_to_list(self.model.get_targets())

        train_data, train_labels = self.dataset.getSubset(TRAIN)
        train_givens = OrderedDict(zip(model_inputs, [train_data[batch_slice]]))
        if model_targets is not None and len(model_targets) > 0:
            train_givens.update(OrderedDict(zip(model_targets, [train_labels[batch_slice]])))

        valid_data, valid_labels = self.dataset.getSubset(VALID)
        valid_givens = OrderedDict(zip(model_inputs, [valid_data[batch_slice]]))
        if model_targets is not None and len(model_targets) > 0:
            valid_givens.update(OrderedDict(zip(model_targets, [valid_labels[batch_slice]])))

        test_data, test_labels = self.dataset.getSubset(TEST)
        test_givens = OrderedDict(zip(model_inputs, [test_data[batch_slice]]))
        if model_targets is not None and len(model_targets) > 0:
            test_givens.update(OrderedDict(zip(model_targets, [test_labels[batch_slice]])))

        # Now time to create the training cost functions for the model - make sure to handle the possible
        # list of costs used for pretraining of certain parts of the model.
        train_costs = raise_to_list(self.model.get_train_cost())
        self.train_functions = []
        for i, train_cost in enumerate(train_costs):
            # Now create the training cost function for the model to use while training - update parameters
            # gradient!
            gradients, _ = self.model.get_gradient(cost=train_cost)

            # Calculate the optimizer updates each run
            # This is where the magic happens for a lot of sub-implementations of SGD, including AdaDelta!
            # It tells how to update the params each training epoch
            gradient_updates = self.get_updates(gradients)

            # Combine the updates from the model also if applicable
            train_updates = self.model.get_updates()
            if train_updates:
                train_updates.update(gradient_updates)
            else:
                train_updates = gradient_updates

            # Compile the training function!
            log.info('Compiling f_learn %d/%d function for model %s...', i + 1, len(train_costs),
                     str(type(self.model)))
            t = time.time()

            f_learn = function(inputs=[data_idx, data_end_idx],
                               updates=train_updates,
                               outputs=train_cost,
                               givens=train_givens,
                               name='f_learn_%d' % i)

            log.info('f_learn compilation took %s', make_time_units_string(time.time() - t))
            self.train_functions.append(f_learn)

        # grab the expression(s) to use to monitor different model values during training
        log.debug("Compiling monitor functions...")
        monitor_t = time.time()
        self.monitors = OrderedDict(self.model.get_monitors())
        self.monitor_names = self.monitors.keys()
        if len(self.monitors.keys()) > 0:
            self.train_monitor_function = function(
                inputs=[data_idx, data_end_idx],
                updates=self.model.get_updates(),
                outputs=self.monitors.values(),
                givens=train_givens,
                name="train_monitor_function"
            )
        if len(self.monitors.keys()) > 0:
            self.valid_monitor_function = function(
                inputs=[data_idx, data_end_idx],
                updates=self.model.get_updates(),
                outputs=self.monitors.values(),
                givens=valid_givens,
                name="valid_monitor_function"
            )
        if len(self.monitors.keys()) > 0:
            self.test_monitor_function = function(
                inputs=[data_idx, data_end_idx],
                updates=self.model.get_updates(),
                outputs=self.monitors.values(),
                givens=test_givens,
                name="test_monitor_function"
            )
        log.debug("Compilation done. Took %s", make_time_units_string(time.time() - monitor_t))

        self.noise_switches = raise_to_list(self.model.get_noise_switch())

        ##################
        # start training #
        ##################
        # make sure to deal with a list of train_cost functions - for layer-wise pretraining!
        # this list of training functions was created during __init__()
        start_time = time.time()
        for func_i, train_function in enumerate(self.train_functions):
            log.info("-----------TRAINING %s function %d/%d FOR %d EPOCHS (continue_training=%s)-----------",
                     str(type(self.model)), func_i + 1, len(self.train_functions), self.n_epoch, str(continue_training))

            log.debug("Train dataset size is: %s", self.dataset.getDataShape(TRAIN))
            if self.dataset.hasSubset(VALID):
                log.debug("Valid dataset size is: %s", self.dataset.getDataShape(VALID))
            if self.dataset.hasSubset(TEST):
                log.debug("Test dataset size is: %s", self.dataset.getDataShape(TEST))

            self.STOP = False
            self.epoch_counter = 0
            if not continue_training:
                # reset the learning rate
                if hasattr(self, 'learning_rate_decay') and self.learning_rate_decay:
                    self.learning_rate_decay.reset()
                # reset the other model decaying functions
                for decay_param in self.model.get_decay_params():
                    decay_param.reset()

            self.times = []
            self.best_cost = numpy.inf
            self.best_params = None
            self.patience = 0

            t = time.time()

            while not self.STOP:
                try:
                    self.STOP = self._perform_one_epoch(train_function)
                except KeyboardInterrupt:
                    log.info("STOPPING EARLY FROM KEYBOARDINTERRUPT")
                    self.STOP = True

            # save params
            if self.best_params is not None:
                log.debug("Restoring best model parameters...")
                set_shared_values(self.params, self.best_params)
            log.debug("Saving model parameters...")
            self.model.save_params('trained_epoch_' + str(self.epoch_counter) + '.pkl')

            log.info("------------TRAIN TIME TOOK %s---------", make_time_units_string(time.time() - t))

        log.info("------------TOTAL %s TRAIN TIME TOOK %s---------",
                 str(type(self.model)), make_time_units_string(time.time() - start_time))
示例#23
0
    def _build_computation_graph(self):
        ###################### BUILD NETWORK ##########################
        # whether or not to mirror the input images before feeding them into the network
        if self.flag_datalayer:
            layer_1_input = mirror_images(input=self.x,
                                          image_shape=(self.batch_size, 3, 256, 256),  # bc01 format
                                          cropsize=227,
                                          rand=self.rand,
                                          flag_rand=self.rand_crop)
        else:
            layer_1_input = self.x  # 4D tensor (going to be in bc01 format)

        # Start with 5 convolutional pooling layers
        log.debug("convpool layer 1...")
        convpool_layer1 = ConvPoolLayer(inputs_hook=((self.batch_size, 3, 227, 227), layer_1_input),
                                        filter_shape=(96, 3, 11, 11),
                                        convstride=4,
                                        padsize=0,
                                        group=1,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        local_response_normalization=True)
        # Add this layer's parameters!
        self.params += convpool_layer1.get_params()

        log.debug("convpool layer 2...")
        convpool_layer2 = ConvPoolLayer(inputs_hook=((self.batch_size, 96, 27, 27, ), convpool_layer1.get_outputs()),
                                        filter_shape=(256, 96, 5, 5),
                                        convstride=1,
                                        padsize=2,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.1,
                                        local_response_normalization=True)
        # Add this layer's parameters!
        self.params += convpool_layer2.get_params()

        log.debug("convpool layer 3...")
        convpool_layer3 = ConvPoolLayer(inputs_hook=((self.batch_size, 256, 13, 13), convpool_layer2.get_outputs()),
                                        filter_shape=(384, 256, 3, 3),
                                        convstride=1,
                                        padsize=1,
                                        group=1,
                                        poolsize=1,
                                        poolstride=0,
                                        bias_init=0.0,
                                        local_response_normalization=False)
        # Add this layer's parameters!
        self.params += convpool_layer3.get_params()

        log.debug("convpool layer 4...")
        convpool_layer4 = ConvPoolLayer(inputs_hook=((self.batch_size, 384, 13, 13), convpool_layer3.get_outputs()),
                                        filter_shape=(384, 384, 3, 3),
                                        convstride=1,
                                        padsize=1,
                                        group=2,
                                        poolsize=1,
                                        poolstride=0,
                                        bias_init=0.1,
                                        local_response_normalization=False)
        # Add this layer's parameters!
        self.params += convpool_layer4.get_params()

        log.debug("convpool layer 5...")
        convpool_layer5 = ConvPoolLayer(inputs_hook=((self.batch_size, 384, 13, 13), convpool_layer4.get_outputs()),
                                        filter_shape=(256, 384, 3, 3),
                                        convstride=1,
                                        padsize=1,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        local_response_normalization=False)
        # Add this layer's parameters!
        self.params += convpool_layer5.get_params()

        # Now onto the fully-connected layers!
        fc_config = {
            'activation': 'rectifier',  # type of activation function to use for output
            'weights_init': 'gaussian',  # either 'gaussian' or 'uniform' - how to initialize weights
            'weights_mean': 0.0,  # mean for gaussian weights init
            'weights_std': 0.005,  # standard deviation for gaussian weights init
            'bias_init': 0.0  # how to initialize the bias parameter
        }
        log.debug("fully connected layer 1 (model layer 6)...")
        # we want to have dropout applied to the training version, but not the test version.
        fc_layer6_input = T.flatten(convpool_layer5.get_outputs(), 2)
        fc_layer6 = BasicLayer(inputs_hook=(9216, fc_layer6_input),
                               output_size=4096,
                               noise='dropout',
                               noise_level=0.5,
                               **fc_config)
        # Add this layer's parameters!
        self.params += fc_layer6.get_params()
        # Add the dropout noise switch
        self.noise_switches += fc_layer6.get_noise_switch()

        log.debug("fully connected layer 2 (model layer 7)...")
        fc_layer7 = BasicLayer(inputs_hook=(4096, fc_layer6.get_outputs()),
                               output_size=4096,
                               noise='dropout',
                               noise_level=0.5,
                               **fc_config)

        # Add this layer's parameters!
        self.params += fc_layer7.get_params()
        # Add the dropout noise switch
        self.noise_switches += fc_layer7.get_noise_switch()

        # last layer is a softmax prediction output layer
        softmax_config = {
            'weights_init': 'gaussian',
            'weights_mean': 0.0,
            'weights_std': 0.005,
            'bias_init': 0.0
        }
        log.debug("softmax classification layer (model layer 8)...")
        softmax_layer8 = SoftmaxLayer(inputs_hook=(4096, fc_layer7.get_outputs()),
                                      output_size=1000,
                                      **softmax_config)

        # Add this layer's parameters!
        self.params += softmax_layer8.get_params()

        # finally the softmax output from the whole thing!
        self.output = softmax_layer8.get_outputs()
        self.targets = softmax_layer8.get_targets()

        #####################
        # Cost and monitors #
        #####################
        self.train_cost = softmax_layer8.negative_log_likelihood()
        cost = softmax_layer8.negative_log_likelihood()
        errors = softmax_layer8.errors()
        train_errors = softmax_layer8.errors()

        self.monitors = OrderedDict([('cost', cost), ('errors', errors), ('dropout_errors', train_errors)])

        #########################
        # Compile the functions #
        #########################
        log.debug("Compiling functions!")
        t = time.time()
        log.debug("f_run...")
        # use the actual argmax from the classification
        self.f_run = function(inputs=[self.x], outputs=softmax_layer8.get_argmax_prediction())
        log.debug("compilation took %s", make_time_units_string(time.time() - t))
示例#24
0
    def build_computation_graph(self):
        #################
        # Build the GSN #
        #################
        log.debug("Building GSN graphs...")

        # GSN for training - with noise specified in initialization
        # if there is no hiddens_hook, build the GSN normally using the input X
        if not self.hiddens_flag:
            p_X_chain, _ = self.build_gsn(add_noise=self.add_noise)

        # if there is a hiddens_hook, we want to change the order layers are updated and make this purely
        # generative from the hiddens
        else:
            p_X_chain, _, = self.build_gsn(hiddens=self.hiddens,
                                           add_noise=self.add_noise,
                                           reverse=True)

        # GSN for prediction - same as above but no noise
        # deal with hiddens_hook exactly as above.
        if not self.hiddens_flag:
            p_X_chain_recon, recon_hiddens = self.build_gsn(add_noise=False)
        else:
            p_X_chain_recon, recon_hiddens = self.build_gsn(
                hiddens=self.hiddens, add_noise=False, reverse=True)

        ####################
        # Costs and output #
        ####################
        log.debug('Cost w.r.t p(X|...) at every step in the graph for the GSN')
        # use the noisy ones for training cost
        costs = [
            self.cost_function(output=rX, target=self.X, **self.cost_args)
            for rX in p_X_chain
        ]
        self.show_cost = costs[-1]  # for a monitor to show progress
        cost = numpy.sum(
            costs
        )  # THIS IS THE TRAINING COST - RECONSTRUCTION OF OUTPUT FROM NOISY GRAPH

        # use the non-noisy graph for prediction
        gsn_costs_recon = [
            self.cost_function(output=rX, target=self.X, **self.cost_args)
            for rX in p_X_chain_recon
        ]
        # another monitor, same as self.show_cost but on the non-noisy graph.
        self.monitor = gsn_costs_recon[-1]
        # this should be considered the main output of the computation, the sample after the
        # last walkback from the non-noisy graph.
        output = p_X_chain_recon[-1]
        # these should be considered the model's hidden representation - the hidden representation after
        # the last walkback from the non-noisy graph.
        hiddens = recon_hiddens

        train_mse = T.mean(T.sqr(p_X_chain[-1] - self.X), axis=0)
        train_mse = T.mean(train_mse)

        mse = T.mean(T.sqr(p_X_chain_recon[-1] - self.X), axis=0)
        mse = T.mean(mse)

        monitors = OrderedDict([('noisy_recon_cost', self.show_cost),
                                ('recon_cost', self.monitor), ('mse', mse),
                                ('train_mse', train_mse)])

        ############
        # Sampling #
        ############
        # the input to the sampling function
        X_sample = T.matrix("X_sampling")
        self.network_state_input = [X_sample] + [
            T.matrix("H_sampling_" + str(i + 1)) for i in range(self.layers)
        ]

        # "Output" state of the network (noisy)
        # initialized with input, then we apply updates
        self.network_state_output = [X_sample] + self.network_state_input[1:]
        visible_pX_chain = []

        # ONE update
        log.debug("Performing one walkback in network state sampling.")
        self.update_layers(self.network_state_output,
                           visible_pX_chain,
                           add_noise=True,
                           reverse=False)

        #####################################################
        #     Create the run and monitor functions      #
        #####################################################
        log.debug("Compiling functions...")
        t = time.time()

        # doesn't make sense to have this if there is a hiddens_hook
        if not self.hiddens_flag:
            # THIS IS THE MAIN PREDICT FUNCTION - takes in a real matrix and produces the output from the non-noisy
            # computation graph
            log.debug("f_run...")
            self.f_run = function(inputs=[self.X],
                                  outputs=output,
                                  name='gsn_f_run')

        # this is a helper function - it corrupts inputs when testing the non-noisy graph (aka before feeding the
        # input to f_run)
        log.debug("f_noise...")
        self.f_noise = function(inputs=[self.X],
                                outputs=self.input_noise(self.X),
                                name='gsn_f_noise')

        # the sampling function, for creating lots of samples from the computational graph. (mostly for log-likelihood
        # or visualization)
        log.debug("f_sample...")
        if self.layers == 1:
            self.f_sample = function(inputs=[X_sample],
                                     outputs=visible_pX_chain[-1],
                                     name='gsn_f_sample_single_layer')
        else:
            # WHY IS THERE A WARNING????
            # because the first odd layers are not used -> directly computed FROM THE EVEN layers
            # unused input = warn
            self.f_sample = function(inputs=self.network_state_input,
                                     outputs=self.network_state_output +
                                     visible_pX_chain,
                                     name='gsn_f_sample')

        log.debug("GSN compiling done. Took %s",
                  make_time_units_string(time.time() - t))

        return cost, monitors, output, hiddens