Exemplo n.º 1
0
def test_contains_inf():
    """
    Tests that pylearn2.utils.contains_inf correctly
    identifies `np.inf` values in an array.
    """
    arr = np.random.random(100)
    assert not contains_inf(arr)
    arr[0] = np.nan
    assert not contains_inf(arr)
    arr[1] = np.inf
    assert contains_inf(arr)
    arr[1] = -np.inf
    assert contains_inf(arr)
Exemplo n.º 2
0
 def main_loop(self):
     self.algorithm.setup(agent=self.agent, environment=self.environment)
     i = 0
     for param in self.agent.get_params():
         assert not contains_nan(param.get_value()), (i, param.name)
         assert not contains_inf(param.get_value()), (i, param.name)
     while True:
         rval = self.algorithm.train()
         assert rval is None
         i += 1
         for param in self.agent.get_params():
             assert not contains_nan(param.get_value()), (i, param.name)
             assert not contains_inf(param.get_value()), (i, param.name)
         if i % 1000 == 0:
             serial.save(self.save_path, self.agent)
             logger.info('saved!')
Exemplo n.º 3
0
 def main_loop(self):
     self.algorithm.setup(agent=self.agent, environment=self.environment)
     i = 0
     for param in self.agent.get_params():
         assert not contains_nan(param.get_value()), (i, param.name)
         assert not contains_inf(param.get_value()), (i, param.name)
     while True:
         rval = self.algorithm.train()
         assert rval is None
         i += 1
         for param in self.agent.get_params():
             assert not contains_nan(param.get_value()), (i, param.name)
             assert not contains_inf(param.get_value()), (i, param.name)
         if i % 1000 == 0:
             serial.save(self.save_path, self.agent)
             logger.info('saved!')
Exemplo n.º 4
0
        def do_check_on(var, nd, f, is_input):
            """
            Checks `var` for NaNs / Infs. If detected, raises an exception
            and / or prints information about `nd`, `f`, and `is_input` to
            help the user determine the cause of the invalid values.

            Parameters
            ----------
            var : numpy.ndarray
                The value to be checked.
            nd : theano.gof.Apply
                The Apply node being executed
            f : callable
                The thunk for the apply node
            is_input : bool
                If True, `var` is an input to `nd`.
                If False, it is an output.
            """
            error = False
            if nan_is_error:
                if contains_nan(var):
                    logger.error('NaN detected')
                    error = True
            if inf_is_error:
                if contains_inf(var):
                    logger.error('Inf detected')
                    error = True
            if big_is_error:
                if np.abs(var).max() > 1e10:
                    logger.error('Big value detected')
                    error = True
            if error:
                if is_input:
                    logger.error('In an input')
                else:
                    logger.error('In an output')
                logger.error('Inputs: ')
                for ivar, ival in zip(nd.inputs, f.inputs):
                    logger.error('var')
                    logger.error(ivar)
                    logger.error(theano.printing.min_informative_str(ivar))
                    logger.error('val')
                    logger.error(ival)
                logger.error('Node:')
                logger.error(nd)
                assert False
Exemplo n.º 5
0
        def do_check_on(var, nd, f, is_input):
            """
            Checks `var` for NaNs / Infs. If detected, raises an exception
            and / or prints information about `nd`, `f`, and `is_input` to
            help the user determine the cause of the invalid values.

            Parameters
            ----------
            var : numpy.ndarray
                The value to be checked.
            nd : theano.gof.Apply
                The Apply node being executed
            f : callable
                The thunk for the apply node
            is_input : bool
                If True, `var` is an input to `nd`.
                If False, it is an output.
            """
            error = False
            if nan_is_error:
                if contains_nan(var):
                    logger.error('NaN detected')
                    error = True
            if inf_is_error:
                if contains_inf(var):
                    logger.error('Inf detected')
                    error = True
            if big_is_error:
                if np.abs(var).max() > 1e10:
                    logger.error('Big value detected')
                    error = True
            if error:
                if is_input:
                    logger.error('In an input')
                else:
                    logger.error('In an output')
                logger.error('Inputs: ')
                for ivar, ival in zip(nd.inputs, f.inputs):
                    logger.error('var')
                    logger.error(ivar)
                    logger.error(theano.printing.min_informative_str(ivar))
                    logger.error('val')
                    logger.error(ival)
                logger.error('Node:')
                logger.error(nd)
                assert False
def stochastic_max_pool_bc01(bc01, pool_shape, pool_stride, image_shape, rng = None):
    """
    .. todo::
        WRITEME properly
    Stochastic max pooling for training as defined in:
    Stochastic Pooling for Regularization of Deep Convolutional Neural Networks
    Matthew D. Zeiler, Rob Fergus
    Parameters
    ----------
    bc01 : theano 4-tensor
        in format (batch size, channels, rows, cols),
        IMPORTANT: All values should be positive
    pool_shape : tuple
        shape of the pool region (rows, cols)
    pool_stride : tuple
        strides between pooling regions (row stride, col stride)
    image_shape : tuple
        avoid doing some of the arithmetic in theano
    rng : theano random stream
    """
    r, c = image_shape
    pr, pc = pool_shape
    rs, cs = pool_stride

    batch = bc01.shape[0]
    channel = bc01.shape[1]

    rng = make_theano_rng(rng, 2022, which_method='multinomial')

    # Compute index in pooled space of last needed pool
    # (needed = each input pixel must appear in at least one pool)
    def last_pool(im_shp, p_shp, p_strd):
        rval = int(numpy.ceil(float(im_shp - p_shp) / p_strd))
        assert p_strd * rval + p_shp >= im_shp
        assert p_strd * (rval - 1) + p_shp < im_shp
        return rval
    # Compute starting row of the last pool
    last_pool_r = last_pool(image_shape[0] ,pool_shape[0], pool_stride[0]) * pool_stride[0]
    # Compute number of rows needed in image for all indexes to work out
    required_r = last_pool_r + pr

    last_pool_c = last_pool(image_shape[1] ,pool_shape[1], pool_stride[1]) * pool_stride[1]
    required_c = last_pool_c + pc

    # final result shape
    res_r = int(numpy.floor(last_pool_r/rs)) + 1
    res_c = int(numpy.floor(last_pool_c/cs)) + 1

    for bc01v in get_debug_values(bc01):
        assert not contains_inf(bc01v)
        assert bc01v.shape[2] == image_shape[0]
        assert bc01v.shape[3] == image_shape[1]

    # padding
    padded = tensor.alloc(0.0, batch, channel, required_r, required_c)
    name = bc01.name
    if name is None:
        name = 'anon_bc01'
    bc01 = tensor.set_subtensor(padded[:,:, 0:r, 0:c], bc01)
    bc01.name = 'zero_padded_' + name

    # unraveling
    window = tensor.alloc(0.0, batch, channel, res_r, res_c, pr, pc)
    window.name = 'unravlled_winodows_' + name

    for row_within_pool in xrange(pool_shape[0]):
        row_stop = last_pool_r + row_within_pool + 1
        for col_within_pool in xrange(pool_shape[1]):
            col_stop = last_pool_c + col_within_pool + 1
            win_cell = bc01[:,:,row_within_pool:row_stop:rs, col_within_pool:col_stop:cs]
            window  =  tensor.set_subtensor(window[:,:,:,:, row_within_pool, col_within_pool], win_cell)

    # find the norm
    norm = window.sum(axis = [4, 5])
    norm = tensor.switch(tensor.eq(norm, 0.0), 1.0, norm)
    norm = window / norm.dimshuffle(0, 1, 2, 3, 'x', 'x')
    # get prob
    prob = rng.multinomial(pvals = norm.reshape((batch * channel * res_r * res_c, pr * pc)), dtype='float32')
    # select
    res = (window * prob.reshape((batch, channel, res_r, res_c,  pr, pc))).max(axis=5).max(axis=4)
    res.name = 'pooled_' + name

    return tensor.cast(res, theano.config.floatX)
Exemplo n.º 7
0
def main():
    """
    .. todo::

        WRITEME
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--out")
    parser.add_argument("model_paths", nargs='+')
    options = parser.parse_args()
    model_paths = options.model_paths

    if options.out is not None:
      import matplotlib
      matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    print 'generating names...'
    model_names = [model_path.replace('.pkl', '!') for model_path in
            model_paths]
    model_names = unique_substrings(model_names, min_size=10)
    model_names = [model_name.replace('!','') for model_name in
            model_names]
    print '...done'

    for i, arg in enumerate(model_paths):
        try:
            model = serial.load(arg)
        except Exception:
            if arg.endswith('.yaml'):
                print >> sys.stderr, arg + " is a yaml config file," + \
                "you need to load a trained model."
                quit(-1)
            raise
        this_model_channels = model.monitor.channels

        if len(sys.argv) > 2:
            postfix = ":" + model_names[i]
        else:
            postfix = ""

        for channel in this_model_channels:
            channels[channel+postfix] = this_model_channels[channel]
        del model
        gc.collect()


    while True:
        # Make a list of short codes for each channel so user can specify them
        # easily
        tag_generator = _TagGenerator()
        codebook = {}
        sorted_codes = []
        for channel_name in sorted(channels,
                key = number_aware_alphabetical_key):
            code = tag_generator.get_tag()
            codebook[code] = channel_name
            codebook['<'+channel_name+'>'] = channel_name
            sorted_codes.append(code)

        x_axis = 'example'
        print 'set x_axis to example'

        if len(channels.values()) == 0:
            print "there are no channels to plot"
            break

        # If there is more than one channel in the monitor ask which ones to
        # plot
        prompt = len(channels.values()) > 1

        if prompt:

            # Display the codebook
            for code in sorted_codes:
                print code + '. ' + codebook[code]

            print

            print "Put e, b, s or h in the list somewhere to plot " + \
                    "epochs, batches, seconds, or hours, respectively."
            response = raw_input('Enter a list of channels to plot ' + \
                    '(example: A, C,F-G, h, <test_err>) or q to quit' + \
                    ' or o for options: ')

            if response == 'o':
                print '1: smooth all channels'
                print 'any other response: do nothing, go back to plotting'
                response = raw_input('Enter your choice: ')
                if response == '1':
                    for channel in channels.values():
                        k = 5
                        new_val_record = []
                        for i in xrange(len(channel.val_record)):
                            new_val = 0.
                            count = 0.
                            for j in xrange(max(0, i-k), i+1):
                                new_val += channel.val_record[j]
                                count += 1.
                            new_val_record.append(new_val / count)
                        channel.val_record = new_val_record
                continue

            if response == 'q':
                break

            #Remove spaces
            response = response.replace(' ','')

            #Split into list
            codes = response.split(',')

            final_codes = set([])

            for code in codes:
                if code == 'e':
                    x_axis = 'epoch'
                    continue
                elif code == 'b':
                    x_axis = 'batche'
                elif code == 's':
                    x_axis = 'second'
                elif code == 'h':
                    x_axis = 'hour'
                elif code.startswith('<'):
                    assert code.endswith('>')
                    final_codes.add(code)
                elif code.find('-') != -1:
                    #The current list element is a range of codes

                    rng = code.split('-')

                    if len(rng) != 2:
                        print "Input not understood: "+code
                        quit(-1)

                    found = False
                    for i in xrange(len(sorted_codes)):
                        if sorted_codes[i] == rng[0]:
                            found = True
                            break

                    if not found:
                        print "Invalid code: "+rng[0]
                        quit(-1)

                    found = False
                    for j in xrange(i,len(sorted_codes)):
                        if sorted_codes[j] == rng[1]:
                            found = True
                            break

                    if not found:
                        print "Invalid code: "+rng[1]
                        quit(-1)

                    final_codes = final_codes.union(set(sorted_codes[i:j+1]))
                else:
                    #The current list element is just a single code
                    final_codes = final_codes.union(set([code]))
            # end for code in codes
        else:
            final_codes ,= set(codebook.keys())

        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
        styles = list(colors)
        styles += [color+'--' for color in colors]
        styles += [color+':' for color in colors]

        fig = plt.figure()
        ax = plt.subplot(1,1,1)

        # plot the requested channels
        for idx, code in enumerate(sorted(final_codes)):

            channel_name= codebook[code]
            channel = channels[channel_name]

            y = np.asarray(channel.val_record)

            if contains_nan(y):
                print channel_name + ' contains NaNs'

            if contains_inf(y):
                print channel_name + 'contains infinite values'

            if x_axis == 'example':
                x = np.asarray(channel.example_record)
            elif x_axis == 'batche':
                x = np.asarray(channel.batch_record)
            elif x_axis == 'epoch':
                try:
                    x = np.asarray(channel.epoch_record)
                except AttributeError:
                    # older saved monitors won't have epoch_record
                    x = np.arange(len(channel.batch_record))
            elif x_axis == 'second':
                x = np.asarray(channel.time_record)
            elif x_axis == 'hour':
                x = np.asarray(channel.time_record) / 3600.
            else:
                assert False


            ax.plot( x,
                      y,
                      styles[idx % len(styles)],
                      marker = '.', # add point margers to lines
                      label = channel_name)

        plt.xlabel('# '+x_axis+'s')
        ax.ticklabel_format( scilimits = (-3,3), axis = 'both')

        handles, labels = ax.get_legend_handles_labels()
        lgd = ax.legend(handles, labels, loc='upper center',
                bbox_to_anchor=(0.5,-0.1))
        # 0.046 is the size of 1 legend box
        fig.subplots_adjust(bottom=0.11 + 0.046 * len(final_codes))

        if options.out is None:
          plt.show()
        else:
          plt.savefig(options.out)

        if not prompt:
            break
Exemplo n.º 8
0
def main():
    """
    .. todo::

        WRITEME
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--out")
    parser.add_argument("model_paths", nargs='+')
    parser.add_argument("--yrange",
                        help='The y-range to be used for plotting, e.g.  0:1')

    options = parser.parse_args()
    model_paths = options.model_paths

    if options.out is not None:
        import matplotlib
        matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    print('generating names...')
    model_names = [
        model_path.replace('.pkl', '!') for model_path in model_paths
    ]
    model_names = unique_substrings(model_names, min_size=10)
    model_names = [model_name.replace('!', '') for model_name in model_names]
    print('...done')

    for i, arg in enumerate(model_paths):
        try:
            model = serial.load(arg)
        except Exception:
            if arg.endswith('.yaml'):
                print(sys.stderr,
                      arg + " is a yaml config file," +
                      "you need to load a trained model.",
                      file=sys.stderr)
                quit(-1)
            raise
        this_model_channels = model.monitor.channels

        if len(sys.argv) > 2:
            postfix = ":" + model_names[i]
        else:
            postfix = ""

        for channel in this_model_channels:
            channels[channel + postfix] = this_model_channels[channel]
        del model
        gc.collect()

    while True:
        # Make a list of short codes for each channel so user can specify them
        # easily
        tag_generator = _TagGenerator()
        codebook = {}
        sorted_codes = []
        for channel_name in sorted(channels,
                                   key=number_aware_alphabetical_key):
            code = tag_generator.get_tag()
            codebook[code] = channel_name
            codebook['<' + channel_name + '>'] = channel_name
            sorted_codes.append(code)

        x_axis = 'example'
        print('set x_axis to example')

        if len(channels.values()) == 0:
            print("there are no channels to plot")
            break

        # If there is more than one channel in the monitor ask which ones to
        # plot
        prompt = len(channels.values()) > 1

        if prompt:

            # Display the codebook
            for code in sorted_codes:
                print(code + '. ' + codebook[code])

            print()

            print("Put e, b, s or h in the list somewhere to plot " +
                  "epochs, batches, seconds, or hours, respectively.")
            response = input('Enter a list of channels to plot ' + \
                    '(example: A, C,F-G, h, <test_err>) or q to quit' + \
                    ' or o for options: ')

            if response == 'o':
                print('1: smooth all channels')
                print('any other response: do nothing, go back to plotting')
                response = input('Enter your choice: ')
                if response == '1':
                    for channel in channels.values():
                        k = 5
                        new_val_record = []
                        for i in xrange(len(channel.val_record)):
                            new_val = 0.
                            count = 0.
                            for j in xrange(max(0, i - k), i + 1):
                                new_val += channel.val_record[j]
                                count += 1.
                            new_val_record.append(new_val / count)
                        channel.val_record = new_val_record
                continue

            if response == 'q':
                break

            #Remove spaces
            response = response.replace(' ', '')

            #Split into list
            codes = response.split(',')

            final_codes = set([])

            for code in codes:
                if code == 'e':
                    x_axis = 'epoch'
                    continue
                elif code == 'b':
                    x_axis = 'batche'
                elif code == 's':
                    x_axis = 'second'
                elif code == 'h':
                    x_axis = 'hour'
                elif code.startswith('<'):
                    assert code.endswith('>')
                    final_codes.add(code)
                elif code.find('-') != -1:
                    #The current list element is a range of codes

                    rng = code.split('-')

                    if len(rng) != 2:
                        print("Input not understood: " + code)
                        quit(-1)

                    found = False
                    for i in xrange(len(sorted_codes)):
                        if sorted_codes[i] == rng[0]:
                            found = True
                            break

                    if not found:
                        print("Invalid code: " + rng[0])
                        quit(-1)

                    found = False
                    for j in xrange(i, len(sorted_codes)):
                        if sorted_codes[j] == rng[1]:
                            found = True
                            break

                    if not found:
                        print("Invalid code: " + rng[1])
                        quit(-1)

                    final_codes = final_codes.union(set(sorted_codes[i:j + 1]))
                else:
                    #The current list element is just a single code
                    final_codes = final_codes.union(set([code]))
            # end for code in codes
        else:
            final_codes, = set(codebook.keys())

        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
        styles = list(colors)
        styles += [color + '--' for color in colors]
        styles += [color + ':' for color in colors]

        fig = plt.figure()
        ax = plt.subplot(1, 1, 1)

        # plot the requested channels
        for idx, code in enumerate(sorted(final_codes)):

            channel_name = codebook[code]
            channel = channels[channel_name]

            y = np.asarray(channel.val_record)

            if contains_nan(y):
                print(channel_name + ' contains NaNs')

            if contains_inf(y):
                print(channel_name + 'contains infinite values')

            if x_axis == 'example':
                x = np.asarray(channel.example_record)
            elif x_axis == 'batche':
                x = np.asarray(channel.batch_record)
            elif x_axis == 'epoch':
                try:
                    x = np.asarray(channel.epoch_record)
                except AttributeError:
                    # older saved monitors won't have epoch_record
                    x = np.arange(len(channel.batch_record))
            elif x_axis == 'second':
                x = np.asarray(channel.time_record)
            elif x_axis == 'hour':
                x = np.asarray(channel.time_record) / 3600.
            else:
                assert False

            ax.plot(
                x,
                y,
                styles[idx % len(styles)],
                marker='.',  # add point margers to lines
                label=channel_name)

        plt.xlabel('# ' + x_axis + 's')
        ax.ticklabel_format(scilimits=(-3, 3), axis='both')

        handles, labels = ax.get_legend_handles_labels()
        lgd = ax.legend(handles,
                        labels,
                        loc='upper center',
                        bbox_to_anchor=(0.5, -0.1))
        # 0.046 is the size of 1 legend box
        fig.subplots_adjust(bottom=0.11 + 0.046 * len(final_codes))

        if (options.yrange is not None):
            ymin, ymax = map(float, options.yrange.split(':'))
            plt.ylim(ymin, ymax)

        if options.out is None:
            plt.show()
        else:
            plt.savefig(options.out)

        if not prompt:
            break
Exemplo n.º 9
0
    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.

        Parameters
        ----------
        model : a Model instance
        dataset : Dataset
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [param for param in model.get_params()
                      if contains_inf(param.get_value())]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: "+str(inf_params))
        if any([contains_nan(param.get_value())
                for param in model.get_params()]):
            nan_params = [param for param in model.get_params()
                          if contains_nan(param.get_value())]
            raise ValueError("These params are NaN: "+str(nan_params))
        self.model = model

        self._synchronize_batch_size(model)
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        # test if force batch size and batch size
        has_force_batch_size = getattr(model, "force_batch_size", False)
        train_dataset_is_uneven = \
            dataset.get_num_examples() % self.batch_size != 0

        has_monitoring_datasets = \
            self.monitoring_dataset is not None and \
            self.monitoring_dataset.values() > 0

        if has_monitoring_datasets:
            monitoring_datasets_are_uneven = \
                any(d.get_num_examples() % self.batch_size
                    != 0 for d in self.monitoring_dataset.values())
        else:
            monitoring_datasets_are_uneven = False  # or True it doesn't matter

        if has_force_batch_size and train_dataset_is_uneven and \
           not has_uniform_batch_size(self.train_iteration_mode):

            raise ValueError("Dataset size is not a multiple of batch size."
                             "You should set train_iteration_mode (and "
                             "maybe monitor_iteration_mode) to "
                             "even_sequential, even_shuffled_sequential or "
                             "even_batchwise_shuffled_sequential")

        if has_force_batch_size and has_monitoring_datasets and \
           monitoring_datasets_are_uneven and \
           not has_uniform_batch_size(self.monitor_iteration_mode):

            raise ValueError("Dataset size is not a multiple of batch size."
                             "You should set monitor_iteration_mode to "
                             "even_sequential, even_shuffled_sequential or "
                             "even_batchwise_shuffled_sequential")

        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name,
                                          batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    ** fixed_var_descr.fixed_vars)

        if cost_value is not None and cost_value.name is None:
            # Concatenate the name of all tensors in theano_args !?
            cost_value.name = 'objective'

        learning_rate = self.learning_rate
        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        grads, updates = self.cost.get_gradients(model, nested_args,
                                                 ** fixed_var_descr.fixed_vars)
        if not isinstance(grads, OrderedDict):
            raise TypeError(str(type(self.cost)) + ".get_gradients returned " +
                            "something with" + str(type(grads)) + "as its " +
                            "first member. Expected OrderedDict.")

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        for param in grads:
            if grads[param].name is None and cost_value is not None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': cost_value.name,
                                      'paramname': param.name})
            assert grads[param].dtype == param.dtype

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError("Tried to scale the learning rate on " +\
                        str(key)+" which is not an optimization parameter.")

        log.info('Parameter and initial learning rate summary:')
        for param in params:
            param_name = param.name
            if param_name is None:
                param_name = 'anon_param'
            lr = learning_rate.get_value() * lr_scalers.get(param,1.)
            log.info('\t' + param_name + ': ' + str(lr))

        if self.learning_rule:
            updates.update(self.learning_rule.get_updates(
                learning_rate, grads, lr_scalers))
        else:
            # Use standard SGD updates with fixed learning rate.
            updates.update( dict(safe_zip(params, [param - learning_rate * \
                lr_scalers.get(param, 1.) * grads[param]
                                    for param in params])))

        for param in params:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.modify_updates(updates)
        for param in params:
            update = updates[param]
            if update.name is None:
                update.name = 'censor(sgd_update(' + param.name + '))'
            for update_val in get_debug_values(update):
                if contains_inf(update_val):
                    raise ValueError("debug value of %s contains infs" %
                            update.name)
                if contains_nan(update_val):
                    raise ValueError("debug value of %s contains nans" %
                            update.name)


        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost.
        # We have to do that after learning_rule.get_updates has been
        # called, since it may have an effect on
        # learning_rule.add_channels_to_monitor (that is currently the case
        # for AdaDelta and RMSProp).
        self._setup_monitor()

        with log_timing(log, 'Compiling sgd_update'):
            self.sgd_update = function(theano_args,
                                       updates=updates,
                                       name='sgd_update',
                                       on_unused_input='ignore',
                                       mode=self.theano_function_mode)
        self.params = params
Exemplo n.º 10
0
    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.

        Parameters
        ----------
        model : a Model instance
        dataset : Dataset
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [param for param in model.get_params() if contains_inf(param.get_value())]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: " + str(inf_params))
        if any([contains_nan(param.get_value()) for param in model.get_params()]):
            nan_params = [param for param in model.get_params() if contains_nan(param.get_value())]
            raise ValueError("These params are NaN: " + str(nan_params))
        self.model = model

        self._synchronize_batch_size(model)
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        # test if force batch size and batch size
        has_force_batch_size = getattr(model, "force_batch_size", False)
        train_dataset_is_uneven = dataset.get_num_examples() % self.batch_size != 0

        has_monitoring_datasets = self.monitoring_dataset is not None and self.monitoring_dataset.values() > 0

        if has_monitoring_datasets:
            monitoring_datasets_are_uneven = any(
                d.get_num_examples() % self.batch_size != 0 for d in self.monitoring_dataset.values()
            )
        else:
            monitoring_datasets_are_uneven = False  # or True it doesn't matter

        if has_force_batch_size and train_dataset_is_uneven and not has_uniform_batch_size(self.train_iteration_mode):

            raise ValueError(
                "Dataset size is not a multiple of batch size."
                "You should set train_iteration_mode (and "
                "maybe monitor_iteration_mode) to "
                "even_sequential, even_shuffled_sequential or "
                "even_batchwise_shuffled_sequential"
            )

        if (
            has_force_batch_size
            and has_monitoring_datasets
            and monitoring_datasets_are_uneven
            and not has_uniform_batch_size(self.monitor_iteration_mode)
        ):

            raise ValueError(
                "Dataset size is not a multiple of batch size."
                "You should set monitor_iteration_mode to "
                "even_sequential, even_shuffled_sequential or "
                "even_batchwise_shuffled_sequential"
            )

        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = "%s[%s]" % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name, batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args, **fixed_var_descr.fixed_vars)

        if cost_value is not None and cost_value.name is None:
            # Concatenate the name of all tensors in theano_args !?
            cost_value.name = "objective"

        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost
        learning_rate = self.learning_rate
        if self.monitoring_dataset is not None:
            if self.monitoring_batch_size is None and self.monitoring_batches is None:
                self.monitoring_batch_size = self.batch_size
                self.monitoring_batches = self.batches_per_iter
            self.monitor.setup(
                dataset=self.monitoring_dataset,
                cost=self.cost,
                batch_size=self.monitoring_batch_size,
                num_batches=self.monitoring_batches,
                extra_costs=self.monitoring_costs,
                mode=self.monitor_iteration_mode,
            )
            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            # TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(
                name="learning_rate",
                ipt=None,
                val=learning_rate,
                data_specs=(NullSpace(), ""),
                dataset=monitoring_dataset,
            )

            if self.learning_rule:
                self.learning_rule.add_channels_to_monitor(self.monitor, monitoring_dataset)

        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = "sgd_params[%d]" % i

        grads, updates = self.cost.get_gradients(model, nested_args, **fixed_var_descr.fixed_vars)
        if not isinstance(grads, OrderedDict):
            raise TypeError(
                str(type(self.cost))
                + ".get_gradients returned "
                + "something with"
                + str(type(grads))
                + "as its "
                + "first member. Expected OrderedDict."
            )

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        for param in grads:
            if grads[param].name is None and cost_value is not None:
                grads[param].name = "grad(%(costname)s, %(paramname)s)" % {
                    "costname": cost_value.name,
                    "paramname": param.name,
                }
            assert grads[param].dtype == param.dtype

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError(
                    "Tried to scale the learning rate on " + str(key) + " which is not an optimization parameter."
                )

        log.info("Parameter and initial learning rate summary:")
        for param in params:
            param_name = param.name
            if param_name is None:
                param_name = "anon_param"
            lr = learning_rate.get_value() * lr_scalers.get(param, 1.0)
            log.info("\t" + param_name + ": " + str(lr))

        if self.learning_rule:
            updates.update(self.learning_rule.get_updates(learning_rate, grads, lr_scalers))
        else:
            # Use standard SGD updates with fixed learning rate.
            updates.update(
                dict(
                    safe_zip(
                        params, [param - learning_rate * lr_scalers.get(param, 1.0) * grads[param] for param in params]
                    )
                )
            )

        for param in params:
            if updates[param].name is None:
                updates[param].name = "sgd_update(" + param.name + ")"
        model.modify_updates(updates)
        for param in params:
            update = updates[param]
            if update.name is None:
                update.name = "censor(sgd_update(" + param.name + "))"
            for update_val in get_debug_values(update):
                if contains_inf(update_val):
                    raise ValueError("debug value of %s contains infs" % update.name)
                if contains_nan(update_val):
                    raise ValueError("debug value of %s contains nans" % update.name)

        with log_timing(log, "Compiling sgd_update"):
            self.sgd_update = function(
                theano_args,
                updates=updates,
                name="sgd_update",
                on_unused_input="ignore",
                mode=self.theano_function_mode,
            )
        self.params = params
Exemplo n.º 11
0
def stochastic_max_pool_bc01(bc01, pool_shape, pool_stride, image_shape, rng = None):
    """
    .. todo::

        WRITEME properly

    Stochastic max pooling for training as defined in:

    Stochastic Pooling for Regularization of Deep Convolutional Neural Networks
    Matthew D. Zeiler, Rob Fergus

    Parameters
    ----------
    bc01 : theano 4-tensor
        in format (batch size, channels, rows, cols),
        IMPORTANT: All values should be positive
    pool_shape : tuple
        shape of the pool region (rows, cols)
    pool_stride : tuple
        strides between pooling regions (row stride, col stride)
    image_shape : tuple
        avoid doing some of the arithmetic in theano
    rng : theano random stream
    """
    r, c = image_shape
    pr, pc = pool_shape
    rs, cs = pool_stride

    batch = bc01.shape[0]
    channel = bc01.shape[1]

    rng = make_theano_rng(rng, 2022, which_method='multinomial')

    # Compute index in pooled space of last needed pool
    # (needed = each input pixel must appear in at least one pool)
    def last_pool(im_shp, p_shp, p_strd):
        rval = int(numpy.ceil(float(im_shp - p_shp) / p_strd))
        assert p_strd * rval + p_shp >= im_shp
        assert p_strd * (rval - 1) + p_shp < im_shp
        return rval
    # Compute starting row of the last pool
    last_pool_r = last_pool(image_shape[0] ,pool_shape[0], pool_stride[0]) * pool_stride[0]
    # Compute number of rows needed in image for all indexes to work out
    required_r = last_pool_r + pr

    last_pool_c = last_pool(image_shape[1] ,pool_shape[1], pool_stride[1]) * pool_stride[1]
    required_c = last_pool_c + pc

    # final result shape
    res_r = int(numpy.floor(last_pool_r/rs)) + 1
    res_c = int(numpy.floor(last_pool_c/cs)) + 1

    for bc01v in get_debug_values(bc01):
        assert not contains_inf(bc01v)
        assert bc01v.shape[2] == image_shape[0]
        assert bc01v.shape[3] == image_shape[1]

    # padding
    padded = tensor.alloc(0.0, batch, channel, required_r, required_c)
    name = bc01.name
    if name is None:
        name = 'anon_bc01'
    bc01 = tensor.set_subtensor(padded[:,:, 0:r, 0:c], bc01)
    bc01.name = 'zero_padded_' + name

    # unraveling
    window = tensor.alloc(0.0, batch, channel, res_r, res_c, pr, pc)
    window.name = 'unravlled_winodows_' + name

    for row_within_pool in xrange(pool_shape[0]):
        row_stop = last_pool_r + row_within_pool + 1
        for col_within_pool in xrange(pool_shape[1]):
            col_stop = last_pool_c + col_within_pool + 1
            win_cell = bc01[:,:,row_within_pool:row_stop:rs, col_within_pool:col_stop:cs]
            window  =  tensor.set_subtensor(window[:,:,:,:, row_within_pool, col_within_pool], win_cell)

    # find the norm
    norm = window.sum(axis = [4, 5])
    norm = tensor.switch(tensor.eq(norm, 0.0), 1.0, norm)
    norm = window / norm.dimshuffle(0, 1, 2, 3, 'x', 'x')
    # get prob
    prob = rng.multinomial(pvals = norm.reshape((batch * channel * res_r * res_c, pr * pc)), dtype='float32')
    # select
    res = (window * prob.reshape((batch, channel, res_r, res_c,  pr, pc))).max(axis=5).max(axis=4)
    res.name = 'pooled_' + name

    return tensor.cast(res, theano.config.floatX)
Exemplo n.º 12
0
def weighted_max_pool_bc01(bc01, pool_shape, pool_stride, image_shape, rng=None):
    """
    This implements test time probability weighted pooling defined in:

    Stochastic Pooling for Regularization of Deep Convolutional Neural Networks
    Matthew D. Zeiler, Rob Fergus

    Parameters
    ----------
    bc01 : theano 4-tensor
        minibatch in format (batch size, channels, rows, cols),
        IMPORTANT: All values should be poitivie
    pool_shape : theano 4-tensor
        shape of the pool region (rows, cols)
    pool_stride : tuple
        strides between pooling regions (row stride, col stride)
    image_shape : tuple
        avoid doing some of the arithmetic in theano
    """
    r, c = image_shape
    pr, pc = pool_shape
    rs, cs = pool_stride

    batch = bc01.shape[0]
    channel = bc01.shape[1]

    rng = make_theano_rng(rng, 2022, which_method="multinomial")

    # Compute index in pooled space of last needed pool
    # (needed = each input pixel must appear in at least one pool)
    def last_pool(im_shp, p_shp, p_strd):
        rval = int(numpy.ceil(float(im_shp - p_shp) / p_strd))
        assert p_strd * rval + p_shp >= im_shp
        assert p_strd * (rval - 1) + p_shp < im_shp
        return rval

    # Compute starting row of the last pool
    last_pool_r = last_pool(image_shape[0], pool_shape[0], pool_stride[0]) * pool_stride[0]
    # Compute number of rows needed in image for all indexes to work out
    required_r = last_pool_r + pr

    last_pool_c = last_pool(image_shape[1], pool_shape[1], pool_stride[1]) * pool_stride[1]
    required_c = last_pool_c + pc

    # final result shape
    res_r = int(numpy.floor(last_pool_r / rs)) + 1
    res_c = int(numpy.floor(last_pool_c / cs)) + 1

    for bc01v in get_debug_values(bc01):
        assert not contains_inf(bc01v)
        assert bc01v.shape[2] == image_shape[0]
        assert bc01v.shape[3] == image_shape[1]

    # padding
    padded = tensor.alloc(0.0, batch, channel, required_r, required_c)
    name = bc01.name
    if name is None:
        name = "anon_bc01"
    bc01 = tensor.set_subtensor(padded[:, :, 0:r, 0:c], bc01)
    bc01.name = "zero_padded_" + name

    # unraveling
    window = tensor.alloc(0.0, batch, channel, res_r, res_c, pr, pc)
    window.name = "unravlled_winodows_" + name

    for row_within_pool in xrange(pool_shape[0]):
        row_stop = last_pool_r + row_within_pool + 1
        for col_within_pool in xrange(pool_shape[1]):
            col_stop = last_pool_c + col_within_pool + 1
            win_cell = bc01[:, :, row_within_pool:row_stop:rs, col_within_pool:col_stop:cs]
            window = tensor.set_subtensor(window[:, :, :, :, row_within_pool, col_within_pool], win_cell)

    # find the norm
    norm = window.sum(axis=[4, 5])
    norm = tensor.switch(tensor.eq(norm, 0.0), 1.0, norm)
    norm = window / norm.dimshuffle(0, 1, 2, 3, "x", "x")
    # average
    res = (window * norm).sum(axis=[4, 5])
    res.name = "pooled_" + name

    return res.reshape((batch, channel, res_r, res_c))
Exemplo n.º 13
0
def main():
    """
    .. todo::

        WRITEME
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--out")
    parser.add_argument("model_paths", nargs='+')
    parser.add_argument("--yrange", help='The y-range to be used for plotting, e.g.  0:1')
    
    options = parser.parse_args()
    model_paths = options.model_paths

    if options.out is not None:
      import matplotlib
      matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    print('generating names...')
    model_names = [model_path.replace('.pkl', '!') for model_path in
            model_paths]
    model_names = unique_substrings(model_names, min_size=10)
    model_names = [model_name.replace('!','') for model_name in
            model_names]
    print('...done')

    for i, arg in enumerate(model_paths):
        try:
            model = serial.load(arg)
        except Exception:
            if arg.endswith('.yaml'):
                print(sys.stderr, arg + " is a yaml config file," + 
                      "you need to load a trained model.", file=sys.stderr)
                quit(-1)
            raise
        this_model_channels = model.monitor.channels

        if len(sys.argv) > 2:
            postfix = ":" + model_names[i]
        else:
            postfix = ""

        for channel in this_model_channels:
            channels[channel+postfix] = this_model_channels[channel]
        del model
        gc.collect()


    while True:
        # Make a list of short codes for each channel so user can specify them
        # easily
        tag_generator = _TagGenerator()
        codebook = {}
        sorted_codes = []
        for channel_name in sorted(channels,
                key = number_aware_alphabetical_key):
            code = tag_generator.get_tag()
            codebook[code] = channel_name
            codebook['<'+channel_name+'>'] = channel_name
            sorted_codes.append(code)

        x_axis = 'example'
        print('set x_axis to example')

        if len(channels.values()) == 0:
            print("there are no channels to plot")
            break

        # If there is more than one channel in the monitor ask which ones to
        # plot
        prompt = len(channels.values()) > 1

        if prompt:

            # Display the codebook
            for code in sorted_codes:
                print(code + '. ' + codebook[code])

            print()

            print("Put e, b, s or h in the list somewhere to plot " + 
                    "epochs, batches, seconds, or hours, respectively.")
            response = input('Enter a list of channels to plot ' + \
                    '(example: A, C,F-G, h, <test_err>) or q to quit' + \
                    ' or o for options: ')

            if response == 'o':
                print('1: smooth all channels')
                print('any other response: do nothing, go back to plotting')
                response = input('Enter your choice: ')
                if response == '1':
                    for channel in channels.values():
                        k = 5
                        new_val_record = []
                        for i in xrange(len(channel.val_record)):
                            new_val = 0.
                            count = 0.
                            for j in xrange(max(0, i-k), i+1):
                                new_val += channel.val_record[j]
                                count += 1.
                            new_val_record.append(new_val / count)
                        channel.val_record = new_val_record
                continue

            if response == 'q':
                break

            #Remove spaces
            response = response.replace(' ','')

            #Split into list
            codes = response.split(',')

            final_codes = set([])

            for code in codes:
                if code == 'e':
                    x_axis = 'epoch'
                    continue
                elif code == 'b':
                    x_axis = 'batche'
                elif code == 's':
                    x_axis = 'second'
                elif code == 'h':
                    x_axis = 'hour'
                elif code.startswith('<'):
                    assert code.endswith('>')
                    final_codes.add(code)
                elif code.find('-') != -1:
                    #The current list element is a range of codes

                    rng = code.split('-')

                    if len(rng) != 2:
                        print("Input not understood: "+code)
                        quit(-1)

                    found = False
                    for i in xrange(len(sorted_codes)):
                        if sorted_codes[i] == rng[0]:
                            found = True
                            break

                    if not found:
                        print("Invalid code: "+rng[0])
                        quit(-1)

                    found = False
                    for j in xrange(i,len(sorted_codes)):
                        if sorted_codes[j] == rng[1]:
                            found = True
                            break

                    if not found:
                        print("Invalid code: "+rng[1])
                        quit(-1)

                    final_codes = final_codes.union(set(sorted_codes[i:j+1]))
                else:
                    #The current list element is just a single code
                    final_codes = final_codes.union(set([code]))
            # end for code in codes
        else:
            final_codes ,= set(codebook.keys())

        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
        styles = list(colors)
        styles += [color+'--' for color in colors]
        styles += [color+':' for color in colors]

        fig = plt.figure()
        ax = plt.subplot(1,1,1)

        # plot the requested channels
        for idx, code in enumerate(sorted(final_codes)):

            channel_name= codebook[code]
            channel = channels[channel_name]

            y = np.asarray(channel.val_record)

            if contains_nan(y):
                print(channel_name + ' contains NaNs')

            if contains_inf(y):
                print(channel_name + 'contains infinite values')

            if x_axis == 'example':
                x = np.asarray(channel.example_record)
            elif x_axis == 'batche':
                x = np.asarray(channel.batch_record)
            elif x_axis == 'epoch':
                try:
                    x = np.asarray(channel.epoch_record)
                except AttributeError:
                    # older saved monitors won't have epoch_record
                    x = np.arange(len(channel.batch_record))
            elif x_axis == 'second':
                x = np.asarray(channel.time_record)
            elif x_axis == 'hour':
                x = np.asarray(channel.time_record) / 3600.
            else:
                assert False


            ax.plot( x,
                      y,
                      styles[idx % len(styles)],
                      marker = '.', # add point margers to lines
                      label = channel_name)

        plt.xlabel('# '+x_axis+'s')
        ax.ticklabel_format( scilimits = (-3,3), axis = 'both')

        handles, labels = ax.get_legend_handles_labels()
        lgd = ax.legend(handles, labels, loc = 'upper left',
               bbox_to_anchor = (1.05, 1.02))

        # Get the axis positions and the height and width of the legend

        plt.draw()       
        ax_pos = ax.get_position()
        pad_width = ax_pos.x0 * fig.get_size_inches()[0]
        pad_height = ax_pos.y0 * fig.get_size_inches()[1]
        dpi = fig.get_dpi()
        lgd_width = ax.get_legend().get_frame().get_width() / dpi 
        lgd_height = ax.get_legend().get_frame().get_height() / dpi 

        # Adjust the bounding box to encompass both legend and axis.  Axis should be 3x3 inches.
        # I had trouble getting everything to align vertically.

        ax_width = 3
        ax_height = 3
        total_width = 2*pad_width + ax_width + lgd_width
        total_height = 2*pad_height + np.maximum(ax_height, lgd_height)

        fig.set_size_inches(total_width, total_height)
        ax.set_position([pad_width/total_width, 1-6*pad_height/total_height, ax_width/total_width, ax_height/total_height])

        if(options.yrange is not None):
            ymin, ymax = map(float, options.yrange.split(':'))
            plt.ylim(ymin, ymax)
        
        if options.out is None:
          plt.show()
        else:
          plt.savefig(options.out)

        if not prompt:
            break