Пример #1
0
    def __call__(self, trainer=None):
        # set up a reporter
        reporter = reporter_module.Reporter()
        reporter.add_observer(self.name, self.target)

        with reporter:
            with configuration.using_config('train', False):
                with configuration.using_config('lmt', True):
                    with configuration.using_config('lmt-fc', True):
                        with configuration.using_config('exact', True):
                            with configuration.using_config(
                                    'cudnn_deterministic', True):
                                self.evaluate(
                                    os.path.join(trainer.out, 'margin.npy'))
Пример #2
0
    def __call__(self, trainer=None):
        """Executes the evaluator extension.
        Unlike usual extensions, this extension can be executed without passing
        a trainer object. This extension reports the performance on validation
        dataset using the :func:`~chainer.report` function. Thus, users can use
        this extension independently from any trainer by manually configuring
        a :class:`~chainer.Reporter` object.
        Args:
            trainer (~chainer.training.Trainer): Trainer object that invokes
                this extension. It can be omitted in case of calling this
                extension manually.
        Returns:
            dict: Result dictionary that contains mean statistics of values
            reported by the evaluation function.
        """
        # set up a reporter
        reporter = reporter_module.Reporter()
        if self.name is not None:
            prefix = self.name + '/'
        else:
            prefix = ''
        for name, target in six.iteritems(self._targets):
            reporter.add_observer(prefix + name, target)
            reporter.add_observers(prefix + name,
                                   target.namedlinks(skipself=True))

        with reporter:
            with configuration.using_config('train', False):
                result = self.evaluate_roc(trainer=trainer)

        reporter_module.report(result)
        return result
Пример #3
0
def force_backprop_mode():
    """Enable back-propagation for Variable whose volatile is auto.

    When you want to enable back-propagation in :func:`no_backprop_mode`,
    call this method. In this context, :class:`~chainer.Variable` object
    whose ``volatile`` attribute is ``'auto'`` behaves like a **volatile**
    variable. That means you can disable :func:`no_backprop_mode` in this
    context.

    If you call this method outside of :func:`no_backprop_mode` context, it
    changes nothing. :class:`~chainer.Variable` object with ``volatile='auto'``
    behaves like a volatile variable by default.

    In this example, the volatility of ``x`` and ``y`` is ``'auto'``. In
    :func:`no_backprop_mode` context, ``y`` does not have a computational graph
    but in :func:`force_backprop_mode` it has a graph.

    >>> with chainer.no_backprop_mode():
    ...   # Variable with volatile='auto' behaves like volatile='on'
    ...   with chainer.force_backprop_mode():
    ...     # Variable with volatile='auto' behaves like volatile='off'
    ...     y = x + 1

    .. seealso::

       See :func:`no_backprop_mode` for details of back-prop mode.

    """
    return configuration.using_config('enable_backprop', True)
Пример #4
0
def numerical_grad(f, inputs, grad_outputs, eps=1e-3):
    """Computes numerical gradient by finite differences.

    This function is used to implement gradient check. For usage example, see
    unit tests of :mod:`chainer.functions`.

    Args:
        f (function): Python function with no arguments that runs forward
            computation and returns the result.
        inputs (tuple of arrays): Tuple of arrays that should be treated as
            inputs. Each element of them is slightly modified to realize
            numerical gradient by finite differences.
        grad_outputs (tuple of arrays): Tuple of arrays that are treated as
            output gradients.
        eps (float): Epsilon value of finite differences.

    Returns:
        tuple: Numerical gradient arrays corresponding to ``inputs``.

    """
    assert eps > 0
    inputs = tuple(inputs)
    grad_outputs = tuple(grad_outputs)
    gpu = any(isinstance(x, cuda.ndarray) for x in inputs + grad_outputs)
    cpu = any(isinstance(x, numpy.ndarray) for x in inputs + grad_outputs)

    if gpu and cpu:
        raise RuntimeError('Do not mix GPU and CPU arrays in `numerical_grad`')

    if gpu:
        xp = cuda.cupy
        numerical_grad_kernel = cuda.reduce(
            'T y1, T y2, U gy, T eps', 'V gxi',
            '(y1 - y2) * gy', 'a + b', 'gxi += a / (eps * 2)', '0',
            'numerical_grad_kernel'
        )
    else:
        xp = numpy
    grads = [xp.zeros_like(x) for x in inputs]

    with configuration.using_config('type_check', False):
        for x, gx in six.moves.zip(inputs, grads):
            for i in numpy.ndindex(x.shape):
                orig = x[i].copy()  # hold original value
                x[i] = orig + eps
                ys1 = _copy_arrays(f())
                x[i] = orig - eps
                ys2 = _copy_arrays(f())
                x[i] = orig
                for y1, y2, gy in six.moves.zip(ys1, ys2, grad_outputs):
                    if gy is not None:
                        if (gpu and isinstance(y1, cuda.ndarray) and
                                isinstance(y2, cuda.ndarray) and
                                isinstance(gy, cuda.ndarray)):
                            numerical_grad_kernel(y1, y2, gy, eps, gx[i])
                        else:
                            dot = ((y1 - y2) * gy).sum()
                            gx[i] += dot / (2 * eps)

    return grads
Пример #5
0
def fixed_batch_renormalization(x, gamma, beta, mean, var, eps=2e-5):
    warnings.warn(
        'fixed_batch_renormalization is deprecated. '
        'Use fixed_batch_normalization instead.', DeprecationWarning)
    with configuration.using_config('train', False):
        return batch_normalization.fixed_batch_normalization(
            x, gamma, beta, mean, var, eps)
Пример #6
0
    def evaluate(self, snapshot_name=''):
        current_device = cuda.get_device_from_id(self.args.gpu)
        with current_device:
            gt_data = []
            pred_data = []

            for i, batch in enumerate(
                    tqdm(self.data_iterator,
                         total=len(self.data_loader) // self.args.batchsize)):
                image, gt_bboxes, gt_labels = batch[0]
                gt_data.append((gt_bboxes, gt_labels))
                # if self.args.gpu is not None:
                #     image = cuda.to_gpu(image, current_device)

                with cuda.Device(self.args.gpu):
                    with configuration.using_config('train', False):
                        bboxes, labels, scores = self.model.predict(
                            image.copy()[None, ...])
                        if len(bboxes[0]) == 0:
                            bboxes = [np.zeros((1, 4), dtype=np.float32)]
                            labels = [np.zeros((1, ), dtype=np.int32)]
                            scores = [np.zeros((1, ), dtype=np.float32)]
                        pred_data.append((bboxes[0], labels[0], scores[0]))
                        # TODO handle empty predictions!!

            bboxes, labels, scores = zip(*pred_data)
            gt_bboxes, gt_labels = concat_examples(gt_data)
            result = eval_detection_voc(bboxes, labels, scores, gt_bboxes,
                                        gt_labels, None)
            map = result['map']

            self.save_eval_results(snapshot_name, map)
Пример #7
0
def force_backprop_mode():
    """Make a context manager which enables back-propagation.

    When you want to enable back-propagation in :func:`no_backprop_mode`, call
    this method. A :class:`~chainer.Variable` created in this context always
    has a computational graph unless overridden by deeper contexts. If you call
    this method outside of :func:`no_backprop_mode` context, it changes
    nothing.

    In the following example, ``y`` has a computational graph and calling
    :func:`~chainer.Variable.backward` on ``y`` will compute and accumulate the
    gradients of the variables in the graph, in this case only ``x``.

    >>> x = chainer.Variable(np.array([1,], 'f'))
    >>> with chainer.no_backprop_mode():
    ...     with chainer.force_backprop_mode():
    ...         y = x + 1
    >>> y.backward()
    >>> x.grad
    array([ 1.], dtype=float32)

    .. seealso::

       See :func:`no_backprop_mode` for details on disabled back-propagation
       mode.

    """
    return configuration.using_config('enable_backprop', True)
Пример #8
0
def fixed_batch_normalization(x, gamma, beta, mean, var, eps=2e-5,
                              use_cudnn=True):
    """Batch normalization function with fixed statistics.

    This is a variant of batch normalization, where the mean and variance
    statistics are given by the caller as fixed variables. This is
    used on testing mode of the batch normalization layer, where batch
    statistics cannot be used for prediction consistency.

    Args:
        x (Variable): Input variable.
        gamma (Variable): Scaling parameter of normalized data.
        beta (Variable): Shifting parameter of scaled normalized data.
        mean (Variable): Shifting parameter of input.
        var (Variable): Square of scaling parameter of input.
        eps (float): Epsilon value for numerical stability.
        use_cudnn (bool): If ``True`` and cuDNN is enabled, then this function
            uses cuDNN as the core implementation.

    .. seealso::
       :func:`functions.batch_normalization`,
       :class:`links.BatchNormalization`

    """
    with configuration.using_config('train', False):
        return BatchNormalizationFunction(eps, None, None, 0.0,
                                          use_cudnn)(x, gamma, beta, mean, var)
Пример #9
0
def no_backprop_mode():
    """Make a context manager which disables back-propagation.

    In this context, Chainer does not make a computational graph. It has the
    benefit of reducing memory consumption. However, a
    :class:`~chainer.Variable` created in this context does not hold a
    reference to the :class:`~chainer.FunctionNode` that created itself so no
    gradients are accumulated by :func:`~chainer.Variable.backward`.

    In the following example, ``y`` is created in this context, which means
    that calling :func:`~chainer.Variable.backward` on ``y`` has no effect on
    the gradients of ``x``.

    >>> x = chainer.Variable(np.array([1,], 'f'))
    >>> with chainer.no_backprop_mode():
    ...     y = x + 1
    >>> y.backward()
    >>> x.grad is None
    True

    .. seealso::

       See :func:`force_backprop_mode` for details on how to override this
       context.

    """
    return configuration.using_config('enable_backprop', False)
Пример #10
0
    def evaluate(self):
        results = []
        with chainer.cuda.Device(self.args.gpu):
            for i, line in enumerate(tqdm.tqdm(self.lines)):
                image_file = line[0]
                labels = self.xp.array(line[1:], dtype=self.xp.int32)
                labels = labels.reshape((-1, self.args.num_labels))
                image = self.load_image(image_file)
                with configuration.using_config('train', False):
                    predictions, crops, grids = self.net(image[self.xp.newaxis,
                                                               ...])

                words, gt_words = self.calc_accuracy(predictions, labels)
                results.append([words, gt_words])

                if self.save_rois:
                    image = self.xp.asarray(image)
                    self.bbox_plotter.xp = self.xp
                    self.bbox_plotter.render_rois(predictions, crops, grids, i,
                                                  image)

        self.print_results()

        with open(os.path.join(self.model_dir, "eval_results.csv"),
                  "w") as results_file:
            writer = csv.writer(results_file, delimiter=',')
            writer.writerows(results)
Пример #11
0
def force_backprop_mode():
    """Make a context manager which enables back-propagation.

    When you want to enable back-propagation in :func:`no_backprop_mode`, call
    this method. A :class:`~chainer.Variable` created in this context always
    has a computational graph unless overridden by deeper contexts. If you call
    this method outside of :func:`no_backprop_mode` context, it changes
    nothing.

    In the following example, ``y`` has a computational graph and calling
    :func:`~chainer.Variable.backward` on ``y`` will compute and accumulate the
    gradients of the variables in the graph, in this case only ``x``.

    >>> x = chainer.Variable(np.array([1,], 'f'))
    >>> with chainer.no_backprop_mode():
    ...     with chainer.force_backprop_mode():
    ...         y = x + 1
    >>> y.backward()
    >>> x.grad
    array([ 1.], dtype=float32)

    .. seealso::

       See :func:`no_backprop_mode` for details on disabled back-propagation
       mode.

    """
    return configuration.using_config('enable_backprop', True)
Пример #12
0
def no_backprop_mode():
    """Make a context manager which disables back-propagation.

    In this context, Chainer does not make a computational graph. It has the
    benefit of reducing memory consumption. However, a
    :class:`~chainer.Variable` created in this context does not hold a
    reference to the :class:`~chainer.FunctionNode` that created itself so no
    gradients are accumulated by :func:`~chainer.Variable.backward`.

    In the following example, ``y`` is created in this context, which means
    that calling :func:`~chainer.Variable.backward` on ``y`` has no effect on
    the gradients of ``x``.

    >>> x = chainer.Variable(np.array([1,], 'f'))
    >>> with chainer.no_backprop_mode():
    ...     y = x + 1
    >>> y.backward()
    >>> x.grad is None
    True

    .. seealso::

       See :func:`force_backprop_mode` for details on how to override this
       context.

    """
    return configuration.using_config('enable_backprop', False)
def numerical_grad(f, inputs, grad_outputs, eps=1e-3):
    """Computes numerical gradient by finite differences.

    This function is used to implement gradient check. For usage example, see
    unit tests of :mod:`chainer.functions`.

    Args:
        f (function): Python function with no arguments that runs forward
            computation and returns the result.
        inputs (tuple of arrays): Tuple of arrays that should be treated as
            inputs. Each element of them is slightly modified to realize
            numerical gradient by finite differences.
        grad_outputs (tuple of arrays): Tuple of arrays that are treated as
            output gradients.
        eps (float): Epsilon value of finite differences.

    Returns:
        tuple: Numerical gradient arrays corresponding to ``inputs``.

    """
    assert eps > 0
    inputs = tuple(inputs)
    grad_outputs = tuple(grad_outputs)
    gpu = any(isinstance(x, cuda.ndarray) for x in inputs + grad_outputs)
    cpu = any(isinstance(x, numpy.ndarray) for x in inputs + grad_outputs)

    if gpu and cpu:
        raise RuntimeError('Do not mix GPU and CPU arrays in `numerical_grad`')

    if gpu:
        xp = cuda.cupy
        numerical_grad_kernel = cuda.reduce(
            'T y1, T y2, U gy, T eps', 'V gxi',
            '(y1 - y2) * gy', 'a + b', 'gxi += a / (eps * 2)', '0',
            'numerical_grad_kernel'
        )
    else:
        xp = numpy
    grads = [xp.zeros_like(x) for x in inputs]

    with configuration.using_config('type_check', False):
        for x, gx in six.moves.zip(inputs, grads):
            for i in numpy.ndindex(x.shape):
                orig = x[i].copy()  # hold original value
                x[i] = orig + eps
                ys1 = _copy_arrays(f())
                x[i] = orig - eps
                ys2 = _copy_arrays(f())
                x[i] = orig
                for y1, y2, gy in six.moves.zip(ys1, ys2, grad_outputs):
                    if gy is not None:
                        if (gpu and isinstance(y1, cuda.ndarray) and
                                isinstance(y2, cuda.ndarray) and
                                isinstance(gy, cuda.ndarray)):
                            numerical_grad_kernel(y1, y2, gy, eps, gx[i])
                        else:
                            dot = ((y1 - y2) * gy).sum()
                            gx[i] += dot / (2 * eps)

    return grads
Пример #14
0
def fixed_batch_renormalization(x, gamma, beta, mean, var, eps=2e-5):
    warnings.warn(
        'fixed_batch_renormalization is deprecated. '
        'Use fixed_batch_normalization instead.',
        DeprecationWarning)
    with configuration.using_config('train', False):
        return batch_normalization.fixed_batch_normalization(
            x, gamma, beta, mean, var, eps
        )
Пример #15
0
    def test_forward_cpu2(self):
        y_dyn = self.chain.dynamic_call(self.x)
        x2 = 2*self.x
        # todo: add a new config so that we can still use 'train'
        with configuration.using_config('train', False):
            y_static1 = self.chain.static_call(x2)
            y_static1.grad = y_static1.data.copy()
            y_static1.backward()

            schedule_manager = self.chain.schedule_manager
            print("sched 1: ", schedule_manager)
            y_static = self.chain.static_call(self.x)
        chainer.testing.assert_allclose(y_dyn.data, y_static.data)
Пример #16
0
    def test_forward_cpu2(self):
        y_dyn = self.chain.dynamic_call(self.x)
        x2 = 2 * self.x
        # todo: add a new config so that we can still use 'train'
        with configuration.using_config('train', False):
            y_static1 = self.chain.static_call(x2)
            y_static1.grad = y_static1.data.copy()
            y_static1.backward()

            schedule_manager = self.chain.schedule_manager
            print('sched 1: ', schedule_manager)
            y_static = self.chain.static_call(self.x)
        chainer.testing.assert_allclose(y_dyn.data, y_static.data)
Пример #17
0
    def predict_core(self, model, batch):
        in_arrays = self.converter(batch, self.device)

        with function.no_backprop_mode():
            with configuration.using_config('train', False):

                if isinstance(in_arrays, tuple):
                    y = model(*in_arrays)
                elif isinstance(in_arrays, dict):
                    y = model(**in_arrays)
                else:
                    y = model(in_arrays)

        return _variable_to_array(y, to_cpu=self.to_cpu)
Пример #18
0
 def evaluate(model, iter):
     # Evaluation routine to be used for validation and test.
     evaluator = model.copy()  # to use different state
     evaluator.predictor.reset_state()  # initialize state
     sum_perp = 0
     data_count = 0
     # Enable evaluation mode.
     with configuration.using_config('train', False):
         # This is optional but can reduce computational overhead.
         with chainer.using_config('enable_backprop', False):
             for batch in copy.copy(iter):
                 x, t = convert.concat_examples(batch, args.gpu)
                 loss = evaluator(x, t)
                 sum_perp += loss.array
                 data_count += 1
     return np.exp(float(sum_perp) / data_count)
Пример #19
0
def test(model, dataset, inv_vocab, device=-1, batchsize=128):
    """
    Predict without evaluating. Refer :func:`test` for the information about
    arguments.

    Returns:
        numpy.ndarray: Prediction probability whose size is `data size` x
            `number of labels`.

    """
    if device >= 0:
        model.to_gpu(device)

    it = SerialIterator(dataset, batchsize, repeat=False, shuffle=False)

    results = []
    for batch in it:
        in_arrays = convert(batch, device)
        with chainer.function.no_backprop_mode(), using_config('train', False):
            y, z_prob, z = model.forward(in_arrays['xs'])
            loss, loss_encoder, sparsity, coherence, regressor_cost, loss_generator = \
                model.calc_loss(y, z, z_prob, in_arrays['ys'])
        loss = to_cpu(loss.data)
        loss_encoder = to_cpu(loss_encoder.data)
        sparsity = to_cpu(sparsity)
        coherence = to_cpu(coherence)
        regressor_cost = to_cpu(regressor_cost)
        loss_generator = to_cpu(loss_generator.data)
        y = to_cpu(y.data).tolist()
        z = [to_cpu(zi).tolist() for zi in z]
        xs = [to_cpu(xi).tolist() for xi in in_arrays['xs']]

        results.extend(({
            'x': xs[i],
            'z': list(map(int, z[i])),
            'y': y[i],
            'text': [inv_vocab[t] for t in xs[i]],
            'rationale': [inv_vocab[t] if zt > 0.5 else '_'
                          for t, zt in zip(xs[i], z[i])],
            'loss': float(loss[i]),
            'loss_encoder': float(loss_encoder[i]),
            'sparsity_cost': float(sparsity[i]),
            'coherence': float(coherence[i]),
            'regressor_cost': float(regressor_cost[i]),
            'loss_generator': float(loss_generator[i])
        } for i in range(len(y))))
    return results
Пример #20
0
def run_train_loop(optimizer, train_iter, test_iter, test_count, epoch,
                   device):
    model = optimizer.target

    train_count = 0
    sum_accuracy = 0
    sum_loss = 0
    while train_iter.epoch < epoch:
        batch = train_iter.next()
        # Reduce learning rate by 0.5 every 25 epochs.
        if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch:
            optimizer.lr *= 0.5
            print('Reducing learning rate to: {}'.format(optimizer.lr))

        x_array, t_array = convert.concat_examples(batch, device)
        x = chainer.Variable(x_array)
        t = chainer.Variable(t_array, requires_grad=False)
        optimizer.update(model, x, t)
        train_count += len(t)
        sum_loss += float(model.loss.array) * len(t)
        sum_accuracy += float(model.accuracy.array) * len(t)

        if train_iter.is_new_epoch:
            print('epoch: {}'.format(train_iter.epoch))
            print('train mean loss: {}, accuracy: {}'.format(
                sum_loss / train_count, sum_accuracy / train_count))
            # evaluation
            train_count = 0
            sum_accuracy = 0
            sum_loss = 0
            model.predictor.train = False
            # It is good practice to turn off train mode during evaluation.
            with configuration.using_config('train', False):
                for batch in test_iter:
                    x_array, t_array = convert.concat_examples(batch, device)
                    x = chainer.Variable(x_array)
                    t = chainer.Variable(t_array, requires_grad=False)
                    loss = model(x, t)
                    sum_loss += float(loss.array) * len(t)
                    sum_accuracy += float(model.accuracy.array) * len(t)

            test_iter.reset()
            model.predictor.train = True
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_accuracy / test_count))
            sum_accuracy = 0
            sum_loss = 0
Пример #21
0
    def __call__(self, trainer):
        # set up a reporter
        reporter = reporter_module.Reporter()
        if hasattr(self, 'name'):
            prefix = self.name + '/'
        else:
            prefix = ''
        for name, target in six.iteritems(self.targets):
            reporter.add_observer(prefix + name, target)
            reporter.add_observers(prefix + name,
                                   target.namedlinks(skipself=True))

        with reporter:
            with configuration.using_config('train', False):
                result = self.evaluate(trainer)

        reporter_module.report(result)
        return result
Пример #22
0
def no_backprop_mode():
    """Make a context manager which disables back-propagation.

    In this context, Chainer does not make a computational graph.
    :class:`~chainer.Variable` created in this context does not have
    reference to the :class:`~chainer.Function` which created the variable.
    So, you cannot compute gradient with :func:`~chainer.Variable.backward`.
    Instead memory consumption is reduced.

    In this example, ``y`` is created in this context. So you cannot call
    :func:`~chianer.Variable.backward`.

    >>> x = chainer.Variable(np.array([1,], 'f'))
    >>> with chainer.no_backprop_mode():
    ...   y = x + 1

    """
    return configuration.using_config('enable_backprop', False)
Пример #23
0
def no_backprop_mode():
    """Make a context manager which disables back-propagation.

    In this context, Chainer does not make a computational graph.
    :class:`~chainer.Variable` created in this context does not have
    reference to the :class:`~chainer.Function` which created the variable.
    So, you cannot compute gradient with :func:`~chainer.Variable.backward`.
    Instead memory consumption is reduced.

    In this example, ``y`` is created in this context. So you cannot call
    :func:`~chianer.Variable.backward`.

    >>> x = chainer.Variable(numpy.array([1,], 'f'))
    >>> with chainer.no_backprop_mode():
    ...    y = x + 1

    """
    return configuration.using_config('enable_backprop', False)
Пример #24
0
def evaluate_rationale(model, dataset, device=-1, batchsize=128):
    if device >= 0:
        model.to_gpu(device)
    it = SerialIterator(dataset, batchsize, repeat=False, shuffle=False)

    tot_mse = 0.0
    accum_precision = 0.0  # for calculating macro precision
    true_positives = 0.0  # for calculating micro precision
    chosen_ratios = 0.0  # for calculating micro precision
    tot_z, tot_n, tot_t = 1e-10, 1e-10, 1e-10
    for batch in it:
        in_arrays = convert(batch, device)
        with chainer.function.no_backprop_mode(), using_config('train', False):
            pred, z_prob, z = model.forward(in_arrays['xs'])
            regressor_cost = model.calc_loss(pred, z, z_prob, in_arrays['ys'])[4]
        regressor_cost = to_cpu(regressor_cost)
        z = [to_cpu(zi).tolist() for zi in z]

        tot_mse += regressor_cost.sum()

        for bi, zi in zip(batch, z):
            true_z = bi['zs']
            nzi = sum(zi)
            tp = np.sum(np.logical_and(zi, true_z))
            if nzi == 0:
                # precision is undefined when there is 0 prediction
                continue
            accum_precision += tp / float(nzi)
            tot_n += 1
            true_positives += tp
            tot_z += nzi
            chosen_ratios += nzi / float(len(zi))
            tot_t += len(zi)

    result = {
        "mse": tot_mse/len(dataset),
        "macro_precision": accum_precision / tot_n,
        "micro_precision": true_positives / tot_z,
        "micro_chosen_ratio": tot_z / tot_t,
        "macro_chosen_ratio": chosen_ratios / tot_n,
    }
    return result
Пример #25
0
    def __call__(self, trainer=None):
        """Executes the evaluator extension.
        Unlike usual extensions, this extension can be executed without passing
        a trainer object. This extension reports the performance on validation
        dataset using the :func:`~chainer.report` function. Thus, users can use
        this extension independently from any trainer by manually configuring
        a :class:`~chainer.Reporter` object.
        Args:
            trainer (~chainer.training.Trainer): Trainer object that invokes
                this extension. It can be omitted in case of calling this
                extension manually.
        Returns:
            dict: Result dictionary that contains mean statistics of values
                reported by the evaluation function.
        """
        with configuration.using_config('train', False):
            result = self.evaluate()

        reporter_module.report(result)
        return result
def predict_see(image):
    image = Image.fromarray(image)
    image = preprocess_image(image, xp, image_size)
    with configuration.using_config('train', False):
        predictions, crops, grids = network(image[xp.newaxis, ...])

    predictions = F.concat(
        [F.expand_dims(prediction, axis=0) for prediction in predictions],
        axis=0)

    classification = F.softmax(predictions, axis=2)
    classification = classification.data
    classification = xp.argmax(classification, axis=2)
    classification = xp.transpose(classification, (1, 0))

    word = strip_prediction(classification, xp, args.blank_symbol)[0]

    word = "".join(map(lambda x: chr(char_map[str(x)]), word))

    return word
Пример #27
0
def no_backprop_mode():
    """Disable back-propagation for Variable whose volatile is auto.

    In the default setting a :class:`~chainer.Variable` object whose
    ``volatile`` attribute is ``'auto'`` behaves like a **non-volatile**
    variable. That means such a :class:`~chainer.Variable` object builds a
    computational graph, consumes memory to store the graph, and you can
    execute back-propagation for it. With this context such a
    :class:`~chainer.Variable` object behaves like a **volatile** variable.
    So, you can easily switch training and evaluation.

    In this example, the volatility of ``x`` and ``y`` is ``'auto'``. So, ``y``
    does not have a computational graph.

    >>> x = chainer.Variable(numpy.array([1,], 'f'), volatile='auto')
    >>> with chainer.no_backprop_mode():
    ...    y = x + 1

    """
    return configuration.using_config('enable_backprop', False)
Пример #28
0
def process(image, network, char_map, xp, args):
    with configuration.using_config('train', False):
        predictions, crops, grids = network(image[xp.newaxis, ...])

    # extract class scores for each word
    words = OrderedDict({})

    predictions = F.concat(
        [F.expand_dims(prediction, axis=0) for prediction in predictions],
        axis=0)

    classification = F.softmax(predictions, axis=2)
    classification = classification.data
    classification = xp.argmax(classification, axis=2)
    classification = xp.transpose(classification, (1, 0))

    words = strip_prediction(classification, xp, args.blank_symbol)
    words = " ".join([
        "".join(map(lambda x: chr(char_map[str(x)]), word)) for word in words
    ])
    return words
Пример #29
0
    def __call__(self, trainer=None):
        # set up a reporter
        reporter = reporter_module.Reporter()
        reporter.add_observer(self.name, self.target)

        with reporter:
            with configuration.using_config('cudnn_deterministic', True):
                with configuration.using_config('train', False):
                    with configuration.using_config('lmt', True):
                        with configuration.using_config('lmt-fc', True):
                            with configuration.using_config('exact', True):
                                upper = self.calculate_upper_lipschitz()
                    with configuration.using_config('lmt', False):
                        with configuration.using_config('lmt-fc', False):
                            with configuration.using_config('exact', False):
                                if not self.nograd:
                                    loc = self.calculate_local_lipschitz()
                                    glo = self.calculate_global_lipschitz()
                                adv = self.calculate_adversarial_perturbation()
        print('\revaluation end, saving result', flush=True)
        if self.nograd:
            values = np.array(list(zip(upper, adv)))
        else:
            values = np.array(list(zip(upper, glo, loc, adv)))
        output_dir = self.output_dir or trainer.out
        filename = pathlib.Path(output_dir) / 'inequlaity_{0}.npy'.format(
            self.attack_name)
        np.save(str(filename), values)
        print('\rassertions start', flush=True)
        if self.nograd:
            for up, ad in zip(upper, adv):
                assert up <= ad
        else:
            for up, gl, lo, ad in zip(upper, glo, loc, adv):
                assert up <= gl
                assert gl <= lo
                assert up <= ad
Пример #30
0
def force_backprop_mode():
    """Make a context manager which enables back-propagation.

    When you want to enable back-propagation in :func:`no_backprop_mode`,
    call this method. :~chainer.Variable: created in this context always has
    a computational graph.
    If you call this method outside of :func:`no_backprop_mode` context, it
    changes nothing.

    In this example, ``y`` has a computational graph and ``y.backward``
    computes gradients of variables in the graph.

    >>> with chainer.no_backprop_mode():
    ...   with chainer.force_backprop_mode():
    ...     y = x + 1

    .. seealso::

       See :func:`no_backprop_mode` for details of back-prop mode.

    """
    return configuration.using_config('enable_backprop', True)
Пример #31
0
def force_backprop_mode():
    """Make a context manager which enables back-propagation.

    When you want to enable back-propagation in :func:`no_backprop_mode`,
    call this method. :~chainer.Variable: created in this context always has
    a computational graph.
    If you call this method outside of :func:`no_backprop_mode` context, it
    changes nothing.

    In this example, ``y`` has a computational graph and ``y.backward``
    computes gradients of variables in the graph.

    >>> with chainer.no_backprop_mode():
    ...   with chainer.force_backprop_mode():
    ...     y = x + 1

    .. seealso::

       See :func:`no_backprop_mode` for details of back-prop mode.

    """
    return configuration.using_config('enable_backprop', True)
Пример #32
0
def run_train_loop(
        optimizer, train_iter, test_iter, train_count, test_count, epoch,
        device):
    model = optimizer.target

    sum_accuracy = 0
    sum_loss = 0
    while train_iter.epoch < epoch:
        batch = train_iter.next()
        x_array, t_array = convert.concat_examples(batch, device)
        x = chainer.Variable(x_array)
        t = chainer.Variable(t_array, requires_grad=False)
        optimizer.update(model, x, t)
        sum_loss += float(model.loss.array) * len(t)
        sum_accuracy += float(model.accuracy.array) * len(t)

        if train_iter.is_new_epoch:
            print('epoch: ', train_iter.epoch)
            print('train mean loss: {}, accuracy: {}'.format(
                sum_loss / train_count, sum_accuracy / train_count))
            # evaluation
            sum_accuracy = 0
            sum_loss = 0
            # It is good practice to turn off train mode during evaluation.
            with configuration.using_config('train', False):
                for batch in test_iter:
                    x_array, t_array = convert.concat_examples(
                        batch, device)
                    x = chainer.Variable(x_array)
                    t = chainer.Variable(t_array, requires_grad=False)
                    loss = model(x, t)
                    sum_loss += float(loss.array) * len(t)
                    sum_accuracy += float(model.accuracy.array) * len(t)

            test_iter.reset()
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_accuracy / test_count))
            sum_accuracy = 0
            sum_loss = 0
Пример #33
0
def force_backprop_mode():
    """Make a context manager which enables back-propagation.

    When you want to enable back-propagation in :func:`no_backprop_mode`, call
    this method. A :class:`~chainer.Variable` created in this context always
    has a computational graph unless overridden by deeper contexts. If you call
    this method outside of :func:`no_backprop_mode` context, it changes
    nothing.

    In the following example, ``y`` has a computational graph and calling
    :func:`~chainer.Variable.backward` on ``y`` will compute and accumulate the
    gradients of the variables in the graph, in this case only ``x``.

    >>> x = chainer.Variable(np.array([1,], np.float32))
    >>> with chainer.no_backprop_mode():
    ...     with chainer.force_backprop_mode():
    ...         y = x + 1
    >>> y.backward()
    >>> x.grad
    array([1.], dtype=float32)

    .. note::

       ``chainer.force_backprop_mode()`` implicitly applies ChainerX's
       counterpart :func:`chainerx.force_backprop_mode()`, but not vice versa.
       Also, setting ``enable_backprop`` :ref:`configuration <configuration>`
       does not affect ChainerX.

    .. seealso::

       See :func:`chainer.no_backprop_mode` for details on disabled
       back-propagation mode.

    """
    c = configuration.using_config('enable_backprop', True)
    if chainerx.is_available():
        return _BackpropModeContext((c, chainerx.force_backprop_mode()))
    return _BackpropModeContext((c,))
Пример #34
0
def force_backprop_mode():
    """Make a context manager which enables back-propagation.

    When you want to enable back-propagation in :func:`no_backprop_mode`, call
    this method. A :class:`~chainer.Variable` created in this context always
    has a computational graph unless overridden by deeper contexts. If you call
    this method outside of :func:`no_backprop_mode` context, it changes
    nothing.

    In the following example, ``y`` has a computational graph and calling
    :func:`~chainer.Variable.backward` on ``y`` will compute and accumulate the
    gradients of the variables in the graph, in this case only ``x``.

    >>> x = chainer.Variable(np.array([1,], np.float32))
    >>> with chainer.no_backprop_mode():
    ...     with chainer.force_backprop_mode():
    ...         y = x + 1
    >>> y.backward()
    >>> x.grad
    array([1.], dtype=float32)

    .. note::

       ``chainer.force_backprop_mode()`` implicitly applies ChainerX's
       counterpart :func:`chainerx.force_backprop_mode()`, but not vice versa.
       Also, setting ``enable_backprop`` :ref:`configuration <configuration>`
       does not affect ChainerX.

    .. seealso::

       See :func:`chainer.no_backprop_mode` for details on disabled
       back-propagation mode.

    """
    c = configuration.using_config('enable_backprop', True)
    if chainerx.is_available():
        return _BackpropModeContext((c, chainerx.force_backprop_mode()))
    return _BackpropModeContext((c, ))
Пример #35
0
def main():
    mc_iteration = 10
    mc_samples = np.random.rand(1, 10, 2).astype(np.float32)
    mc_samples = np.repeat(mc_samples, mc_iteration, axis=0)

    _mean, _var = _calc_uncertanty_from_mc_samples(mc_samples)

    print('numpy')
    print(_mean)
    print(_var)
    print('------')

    mean = chainer.functions.mean(mc_samples, axis=0)
    var = mc_samples - mean
    var = chainer.functions.mean(chainer.functions.square(var), axis=0)

    mean = mean.data
    var = var.data

    print('chainer')
    print(mean)
    print(var)

    print((np.abs(mean - _mean)))
    print((np.abs(var - _var)))
    print('------')

    sampler = MCSampler(lambda x: x, mc_iteration, lambda x: x, None, None)
    with configuration.using_config('train', False):
        mean, var = sampler(mc_samples[0])

    print('mc_sampler')
    print(mean)
    print(var)

    print((np.abs(mean - _mean)))
    print((np.abs(var - _var)))
    print('------')
Пример #36
0
    def evaluate(model, iter):
        # Evaluation routine to be used for validation and test.
        evaluator = model.copy()  # to use different state
        evaluator.rnn.reset_state()  # initialize state
        sum_perp = 0
        data_count = 0
        words = []
        labels = []
        lossfun = softmax_cross_entropy.softmax_cross_entropy
        with configuration.using_config('train', False):
            for batch in copy.copy(iter):
                word, label = convert.concat_examples(batch, args.gpu)
                words.append(word)
                labels.append(label)
                data_count += 1
            outputs = evaluator(words)

            for ind in range(len(outputs)):
                y = outputs[ind]
                label = labels[ind]
                loss = lossfun(y, label)
                sum_perp += loss.data
        return np.exp(float(sum_perp) / data_count)
Пример #37
0
def no_backprop_mode():
    """Make a context manager which disables back-propagation.

    In this context, Chainer does not make a computational graph. It has the
    benefit of reducing memory consumption. However, a
    :class:`~chainer.Variable` created in this context does not hold a
    reference to the :class:`~chainer.FunctionNode` that created itself so no
    gradients are accumulated by :func:`~chainer.Variable.backward`.

    In the following example, ``y`` is created in this context, which means
    that calling :func:`~chainer.Variable.backward` on ``y`` has no effect on
    the gradients of ``x``.

    >>> x = chainer.Variable(np.array([1,], np.float32))
    >>> with chainer.no_backprop_mode():
    ...     y = x + 1
    >>> y.backward()
    >>> x.grad is None
    True

    .. note::

       ``chainer.no_backprop_mode()`` implicitly applies ChainerX's
       counterpart :func:`chainerx.no_backprop_mode()`, but not vice versa.
       Also, setting ``enable_backprop`` :ref:`configuration <configuration>`
       does not affect ChainerX.

    .. seealso::

       See :func:`chainer.force_backprop_mode` for details on how to override
       this context.

    """
    c = configuration.using_config('enable_backprop', False)
    if chainerx.is_available():
        return _BackpropModeContext((c, chainerx.no_backprop_mode()))
    return _BackpropModeContext((c,))
Пример #38
0
def no_backprop_mode():
    """Make a context manager which disables back-propagation.

    In this context, Chainer does not make a computational graph. It has the
    benefit of reducing memory consumption. However, a
    :class:`~chainer.Variable` created in this context does not hold a
    reference to the :class:`~chainer.FunctionNode` that created itself so no
    gradients are accumulated by :func:`~chainer.Variable.backward`.

    In the following example, ``y`` is created in this context, which means
    that calling :func:`~chainer.Variable.backward` on ``y`` has no effect on
    the gradients of ``x``.

    >>> x = chainer.Variable(np.array([1,], np.float32))
    >>> with chainer.no_backprop_mode():
    ...     y = x + 1
    >>> y.backward()
    >>> x.grad is None
    True

    .. note::

       ``chainer.no_backprop_mode()`` implicitly applies ChainerX's
       counterpart :func:`chainerx.no_backprop_mode()`, but not vice versa.
       Also, setting ``enable_backprop`` :ref:`configuration <configuration>`
       does not affect ChainerX.

    .. seealso::

       See :func:`chainer.force_backprop_mode` for details on how to override
       this context.

    """
    c = configuration.using_config('enable_backprop', False)
    if chainerx.is_available():
        return _BackpropModeContext((c, chainerx.no_backprop_mode()))
    return _BackpropModeContext((c, ))
Пример #39
0
def fixed_batch_normalization(x, gamma, beta, mean, var, eps=2e-5):
    """Batch normalization function with fixed statistics.

    This is a variant of batch normalization, where the mean and variance
    statistics are given by the caller as fixed variables. This is
    used on testing mode of the batch normalization layer, where batch
    statistics cannot be used for prediction consistency.

    Args:
        x (Variable): Input variable.
        gamma (Variable): Scaling parameter of normalized data.
        beta (Variable): Shifting parameter of scaled normalized data.
        mean (Variable): Shifting parameter of input.
        var (Variable): Square of scaling parameter of input.
        eps (float): Epsilon value for numerical stability.

    .. seealso::
       :func:`functions.batch_normalization`,
       :class:`links.BatchNormalization`

    """
    with configuration.using_config('train', False):
        return BatchNormalizationFunction(eps, None, None, 0.0)(
            x, gamma, beta, mean, var)
Пример #40
0
    def __call__(self, trainer=None):
        """Executes the evaluator extension.

        Unlike usual extensions, this extension can be executed without passing
        a trainer object. This extension reports the performance on validation
        dataset using the :func:`~chainer.report` function. Thus, users can use
        this extension independently from any trainer by manually configuring
        a :class:`~chainer.Reporter` object.

        Args:
            trainer (~chainer.training.Trainer): Trainer object that invokes
                this extension. It can be omitted in case of calling this
                extension manually.

        Returns:
            dict: Result dictionary that contains mean statistics of values
                reported by the evaluation function.

        """
        # set up a reporter
        reporter = reporter_module.Reporter()
        if hasattr(self, 'name'):
            prefix = self.name + '/'
        else:
            prefix = ''
        for name, target in six.iteritems(self._targets):
            reporter.add_observer(prefix + name, target)
            reporter.add_observers(prefix + name,
                                   target.namedlinks(skipself=True))

        with reporter:
            with configuration.using_config('train', False):
                result = self.evaluate()

        reporter_module.report(result)
        return result
Пример #41
0
    def evaluate(model, iter):
        # Evaluation routine to be used for validation and test.
        evaluator = model.copy()  # to use different state
        evaluator.rnn.reset_state()  # initialize state
        sum_perp = 0
        data_count = 0
        words = []
        labels = []
        lossfun = softmax_cross_entropy.softmax_cross_entropy
        with configuration.using_config('train', False):
            iter.reset()
            for batch in iter:
                word, label = convert.concat_examples(batch, args.gpu)
                words.append(word)
                labels.append(label)
                data_count += 1
            outputs = evaluator(words)

            for ind in range(len(outputs)):
                y = outputs[ind]
                label = labels[ind]
                loss = lossfun(y, label)
                sum_perp += loss.array
        return np.exp(float(sum_perp) / data_count)
Пример #42
0
 def __call__(self, x):
     with configuration.using_config('train', False):
         return super(_SingleArgumentFunctionTestMode, self).__call__(x)
Пример #43
0
 def test_backward_cpu(self):
     chain = self.static_chain
     with configuration.using_config('train', False):
         self.check_backward(self.x, self.gy, chain)
Пример #44
0
def fixed_batch_renormalization(x, gamma, beta, mean, var, eps=2e-5):
    with configuration.using_config('train', False):
        return BatchRenormalizationFunction(eps, None, None, 0.0)(
            x, gamma, beta, mean, var)
Пример #45
0
def numerical_grad(
        f, inputs, grad_outputs, eps=1e-3,
        detect_nondifferentiable=False, diff_atol=0, diff_rtol=1e-2,
        center_outputs=None):
    """Computes numerical gradient by finite differences.

    This function is used to implement gradient check. For usage example, see
    unit tests of :mod:`chainer.functions`.

    By default, ``numerical_grad`` computes the gradient to the first order of
    ``eps``.

    Args:
        f (callable): Python function with no arguments that runs forward
            computation and returns the result.
        inputs (tuple of arrays): Tuple of arrays that should be treated as
            inputs. Each element of them is slightly modified to realize
            numerical gradient by finite differences.
        grad_outputs (tuple of arrays or scalars): Tuple of arrays or scalars
            that are treated as output gradients.
        eps (float): Epsilon value of finite differences.
        detect_nondifferentiable (bool):
            ``False`` by default.
            If ``True``, ``numerical_grad`` checks whether ``f`` is
            differentiable at ``inputs``.
            It requires evaluation of ``f`` at 5 points instead of 2.
            As a side effect, the accuracy of numerical gradient will be
            increased to the third order of ``eps``.
            If it turns out that ``f`` is non-differentiable at ``input``,
            ``numerical_grad`` raises
            :class:`~chainer.gradient_check.NondifferentiableError`.
        diff_atol (float):
            Absolute tolerance of fitting error of non-differentiable point
            detection.
        diff_rtol (float):
            Tolerance of fitting error of non-differentiable point detection
            relative to the output values of ``f``.
        center_outputs (tuple of arrays or None):
            Only used if ``detect_nondifferentiable`` is ``True``.
            If specified, these arrays are used as the outputs of ``f`` at
            ``inputs``.
            Otherwise, it is calculated.
            It can be used to reduce the computation if these arrays are
            already calculated before calling ``numerical_grad``.

    Returns:
        tuple: Numerical gradient arrays corresponding to ``inputs``.

    """
    # TODO(niboshi): Deprecate `center_outputs` argument.
    # If dtype of this argument is not float64, often the resolution is
    # insufficient for numerical gradient calculation. We might use it only
    # when its dtype is float64, but it would be better to simply remove it.
    center_outputs = None

    assert eps > 0
    assert isinstance(inputs, (tuple, list))
    for x in inputs:
        if x.dtype.kind != 'f':
            raise RuntimeError(
                'The dtype of input arrays must be kind of float')

    inputs = tuple(inputs)
    # Cast grad_outputs to float64
    grad_outputs = tuple([
        None if g is None
        else numpy.float64(g) if numpy.isscalar(g)
        else g.astype(numpy.float64)
        for g in grad_outputs])

    if not chainer.is_arrays_compatible(
            [a for a in inputs + grad_outputs if not numpy.isscalar(a)]):
        raise RuntimeError('Do not mix GPU and CPU arrays in `numerical_grad`')

    device = backend.get_device_from_array(*(inputs + grad_outputs))
    xp = device.xp

    if xp is cuda.cupy:
        numerical_grad_kernel_1 = cuda.reduce(
            'T y1, T y2, U gy, T eps', 'V gxi',
            '(y1 - y2) * gy', 'a + b', 'gxi += a / (eps * 2)', '0',
            'numerical_grad_kernel_1'
        )
        numerical_grad_kernel_3 = cuda.reduce(
            'T y1, T y2, T y3, T y4, U gy, T eps', 'V gxi',
            '(-y1 + 8 * y2 - 8 * y3 + y4) * gy',
            'a + b', 'gxi += a / (eps * 6)', '0',
            'numerical_grad_kernel_3'
        )

    if xp is chainerx:
        grads = [
            xp.zeros(x.shape, numpy.float64, device=x.device) for x in inputs]
    else:
        grads = [xp.zeros(x.shape, numpy.float64) for x in inputs]

    if detect_nondifferentiable:
        if center_outputs is None:
            ys0 = _copy_arrays(f())
        else:
            ys0 = center_outputs
        nout = len(ys0)
        shapes = [_.shape for _ in ys0]
        sizes = numpy.array([_.size for _ in ys0])
        cumsizes = numpy.cumsum(sizes)

    # Evaluate func at a single input
    def eval_func(x, i, delta, orig):
        x[i] = orig + delta
        y = _copy_arrays(f())
        assert len(y) == len(grad_outputs)
        assert all([
            gy is None
            for y_, gy in zip(y, grad_outputs)
            if y_ is None])
        assert all([
            gy is None or numpy.isscalar(gy) or y_.shape == gy.shape
            for y_, gy in zip(y, grad_outputs)])
        x[i] = orig
        return y

    # An iteration on a single input displacement
    def iterate_single_input(i_in, x, orig_x, i):
        orig = orig_x[i]
        # `yss` holds a list of output arrays for each of 2 or 5 sampling
        # points.
        if detect_nondifferentiable:
            yss = [
                eval_func(x, i, -eps * 1., orig),
                eval_func(x, i, -eps * .5, orig),
                ys0,
                eval_func(x, i, +eps * .5, orig),
                eval_func(x, i, +eps * 1., orig),
            ]
        else:
            yss = [
                eval_func(x, i, -eps * 1, orig),
                eval_func(x, i, +eps * 1, orig),
            ]

        if detect_nondifferentiable:
            # Detect non-differentiable point by quadratic fitting

            # Check for non-finite output.
            # If any single element in the output arrays has different
            # finiteness among sampled points, that means this is a
            # non-differentiable point.
            # If the function consistently generates non-finite values
            # around the point, we do not treat the point as
            # non-differentiable.
            # (Example: x<0 region for the logarithm function)
            any_nonfinite = False
            for i_out in range(nout):
                isfinites = [xp.isfinite(ys[i_out]) for ys in yss]
                if any((isfinites[0] != isfinites[i]).any()
                       for i in range(1, len(yss))):
                    s = six.StringIO()
                    s.write(
                        'Tried to compute the numeric gradient on a '
                        'non-differentiable point.\n\n')
                    s.write('i_in: {}\n'.format(i_in))
                    s.write('i_out: {}\n'.format(i_out))
                    s.write('x: {}\n'.format(inputs[i_in]))
                    s.write('index on x: {}\n'.format(i))
                    s.write('eps: {}\n'.format(eps))
                    s.write('y[x-eps  ]: {}\n'.format(yss[0][i_out]))
                    s.write('y[x-eps/2]: {}\n'.format(yss[1][i_out]))
                    s.write('y[x      ]: {}\n'.format(yss[2][i_out]))
                    s.write('y[x+eps/2]: {}\n'.format(yss[3][i_out]))
                    s.write('y[x+eps  ]: {}\n'.format(yss[4][i_out]))
                    raise NondifferentiableError(s.getvalue())

                any_nonfinite |= not all((_).all() for _ in isfinites)

            if not any_nonfinite:
                # Stack flattened outputs to make (5, *)-shaped 2D array
                ystack = xp.vstack(
                    [xp.hstack([y.ravel() for y in ys]) for ys in yss])
                assert ystack.ndim == 2 and ystack.shape[0] == len(yss)
                # Fit to quadratic
                if xp is not numpy:
                    ystack = _cpu._to_cpu(ystack)
                polyfit = numpy.polynomial.polynomial.polyfit
                _, (residuals, _, _, _) = polyfit(
                    range(len(yss)), ystack, deg=2, full=True)
                if xp is not numpy:
                    residuals = device.send(residuals)
                residuals = xp.sqrt(residuals / len(yss))

                # Check for error for each output array
                for i_out in range(nout):
                    size = sizes[i_out]
                    cumsize = cumsizes[i_out]
                    shape = shapes[i_out]
                    # TODO(niboshi): The following two lines could be
                    # rewritten using xp.stack, which is supported in
                    # NumPy>=1.10
                    ymax = xp.concatenate(
                        [ys[i_out][None] for ys in yss]).max(axis=0)
                    ymin = xp.concatenate(
                        [ys[i_out][None] for ys in yss]).min(axis=0)
                    # Restore the shape of flattened residual
                    res = residuals[cumsize - size:cumsize]
                    res = res.reshape(shape)
                    det = utils.force_array(
                        diff_atol + diff_rtol * (ymax - ymin) < res)
                    # Constant output = not nondifferentiable
                    det[ymax == ymin] = False
                    if det.any():
                        s = six.StringIO()
                        s.write(
                            'Tried to compute the numeric gradient on a '
                            'non-differentiable point.\n\n')
                        s.write('i_in: {}\n'.format(i_in))
                        s.write('i_out: {}\n'.format(i_out))
                        s.write('x: {}\n'.format(inputs[i_in]))
                        s.write('index on x: {}\n'.format(i))
                        s.write('eps: {}\n'.format(eps))
                        s.write('diff_rtol: {}\n'.format(diff_rtol))
                        s.write('diff_atol: {}\n'.format(diff_atol))
                        s.write('ymax: {}\n'.format(ymax))
                        s.write('ymin: {}\n'.format(ymin))
                        s.write(
                            'diff_atol + diff_rtol * (ymax-ymin): {}\n'.format(
                                diff_atol + diff_rtol * (ymax - ymin)))
                        s.write('fitting errors: {}\n'.format(res))
                        s.write('y[x-eps  ]: {}\n'.format(yss[0][i_out]))
                        s.write('y[x-eps/2]: {}\n'.format(yss[1][i_out]))
                        s.write('y[x      ]: {}\n'.format(yss[2][i_out]))
                        s.write('y[x+eps/2]: {}\n'.format(yss[3][i_out]))
                        s.write('y[x+eps  ]: {}\n'.format(yss[4][i_out]))
                        raise NondifferentiableError(s.getvalue())

        # Calculate numerical gradient
        for i_out, gy in enumerate(grad_outputs):
            if gy is None:
                continue
            if not numpy.isscalar(gy):
                gy = gy.astype(numpy.float64, copy=False)
            gpu_ = (xp is cuda.cupy and
                    all(isinstance(ys[i_out], cuda.ndarray)
                        for ys in yss))
            # If any output sample is None, all others must be.
            assert all([
                (yss[0][i_out] is None) == (yss[j][i_out] is None)
                for j in range(len(yss))])
            # If outputs samples are None, the part of numeric gradient for
            # this output is considered as zero: skip the accumulation.
            if yss[0][i_out] is None:
                continue

            if len(yss) == 2:  # 1st order
                y0 = yss[0][i_out]
                y1 = yss[1][i_out]
                if gpu_:
                    numerical_grad_kernel_1(
                        y1, y0, xp.asarray(gy), eps, gx[i])
                else:
                    dot = ((y1 - y0) * gy).sum()
                    gx[i] = gx[i] + dot / (2 * eps)
            elif len(yss) == 5:  # 3rd order
                y0 = yss[0][i_out]
                y1 = yss[1][i_out]
                y2 = yss[3][i_out]
                y3 = yss[4][i_out]
                if gpu_:
                    numerical_grad_kernel_3(
                        y3, y2, y1, y0, gy, eps, gx[i])
                else:
                    num = -y3 + 8 * y2 - 8 * y1 + y0
                    dot = (num * gy).sum()
                    gx[i] = gx[i] + dot / (6 * eps)
            else:
                assert False

    # Calculate numeric gradient
    with configuration.using_config('type_check', False):
        for i_in, (x, gx) in enumerate(six.moves.zip(inputs, grads)):
            orig_x = x.copy()  # hold original value
            for i in numpy.ndindex(x.shape):
                iterate_single_input(i_in, x, orig_x, i)

    return [g.astype(x.dtype, copy=False)
            for g, x in six.moves.zip(grads, inputs)]
Пример #46
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--model', '-m', default='MLP',
                        help='Choose the model: MLP or MLPSideEffect')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    if args.model == 'MLP':
        model = L.Classifier(train_mnist.MLP(args.unit, 10))
    elif args.model == 'MLPSideEffect':
        model = L.Classifier(train_mnist.MLPSideEffect(args.unit, 10))
    if args.gpu >= 0:
        # Make a speciied GPU current
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_count = len(train)
    test_count = len(test)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    sum_accuracy = 0
    sum_loss = 0

    while train_iter.epoch < args.epoch:
        batch = train_iter.next()
        x_array, t_array = convert.concat_examples(batch, args.gpu)
        x = chainer.Variable(x_array)
        t = chainer.Variable(t_array)
        optimizer.update(model, x, t)
        sum_loss += float(model.loss.data) * len(t.data)
        sum_accuracy += float(model.accuracy.data) * len(t.data)

        if train_iter.is_new_epoch:
            print('epoch: ', train_iter.epoch)
            print('train mean loss: {}, accuracy: {}'.format(
                sum_loss / train_count, sum_accuracy / train_count))
            # evaluation
            sum_accuracy = 0
            sum_loss = 0
            # It is good practice to turn off train mode during evaluation.
            with configuration.using_config('train', False):
                for batch in test_iter:
                    x_array, t_array = convert.concat_examples(batch, args.gpu)
                    x = chainer.Variable(x_array)
                    t = chainer.Variable(t_array)
                    loss = model(x, t)
                    sum_loss += float(loss.data) * len(t.data)
                    sum_accuracy += float(model.accuracy.data) * len(t.data)

            test_iter.reset()
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_accuracy / test_count))
            sum_accuracy = 0
            sum_loss = 0

    # Save the model and the optimizer
    print('save the model')
    serializers.save_npz('mlp.model', model)
    print('save the optimizer')
    serializers.save_npz('mlp.state', optimizer)
Пример #47
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot using model '
                             'and state files in the specified directory')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    model = L.Classifier(train_mnist.MLP(args.unit, 10))
    if args.gpu >= 0:
        # Make a speciied GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    if args.resume:
        # Resume from a snapshot
        serializers.load_npz('{}/mlp.model'.format(args.resume), model)
        serializers.load_npz('{}/mlp.state'.format(args.resume), optimizer)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_count = len(train)
    test_count = len(test)

    with MultiprocessIterator(train, args.batchsize) as train_iter, \
        MultiprocessIterator(test, args.batchsize,
                             repeat=False, shuffle=False) as test_iter:

        sum_accuracy = 0
        sum_loss = 0

        while train_iter.epoch < args.epoch:
            batch = train_iter.next()
            x, t = convert.concat_examples(batch, args.gpu)
            optimizer.update(model, x, t)
            sum_loss += float(model.loss.data) * len(t)
            sum_accuracy += float(model.accuracy.data) * len(t)

            if train_iter.is_new_epoch:
                print('epoch: {}'.format(train_iter.epoch))
                print('train mean loss: {}, accuracy: {}'.format(
                    sum_loss / train_count, sum_accuracy / train_count))
                # evaluation
                sum_accuracy = 0
                sum_loss = 0
                # Enable evaluation mode.
                with configuration.using_config('train', False):
                    # This is optional but can reduce computational overhead.
                    with chainer.using_config('enable_backprop', False):
                        for batch in test_iter:
                            x, t = convert.concat_examples(batch, args.gpu)
                            loss = model(x, t)
                            sum_loss += float(loss.data) * len(t)
                            sum_accuracy += float(model.accuracy.data) * len(t)

                test_iter.reset()
                print('test mean  loss: {}, accuracy: {}'.format(
                    sum_loss / test_count, sum_accuracy / test_count))
                sum_accuracy = 0
                sum_loss = 0

        # Save the model and the optimizer
        print('save the model')
        serializers.save_npz('{}/mlp.model'.format(args.out), model)
        print('save the optimizer')
        serializers.save_npz('{}/mlp.state'.format(args.out), optimizer)
Пример #48
0
 def __call__(self, x):
     with configuration.using_config('use_cudnn', self.use_cudnn):
         return super(_SingleArgumentFunctionWithCudnn, self).__call__(x)
Пример #49
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--device', '-d', type=str, default='-1',
                        help='Device specifier. Either ChainerX device '
                        'specifier or an integer. If non-negative integer, '
                        'CuPy arrays with specified device id are used. If '
                        'negative integer, NumPy arrays are used')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot using model '
                             'and state files in the specified directory')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    group = parser.add_argument_group('deprecated arguments')
    group.add_argument('--gpu', '-g', type=int, nargs='?', const=0,
                       help='GPU ID (negative value indicates CPU)')
    args = parser.parse_args()

    device = parse_device(args)

    print('Device: {}'.format(device))
    print('# unit: {}'.format(args.unit))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train
    model = L.Classifier(train_mnist.MLP(args.unit, 10))
    model.to_device(device)
    device.use()

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    if args.resume:
        # Resume from a snapshot
        serializers.load_npz('{}/mlp.model'.format(args.resume), model)
        serializers.load_npz('{}/mlp.state'.format(args.resume), optimizer)

    # Load the MNIST dataset
    train, test = chainer.datasets.get_mnist()

    train_count = len(train)
    test_count = len(test)

    with SerialIterator(train, args.batchsize) as train_iter, \
        SerialIterator(
            test, args.batchsize, repeat=False, shuffle=False) as test_iter:

        sum_accuracy = 0
        sum_loss = 0

        while train_iter.epoch < args.epoch:
            batch = train_iter.next()
            x, t = convert.concat_examples(batch, device)
            optimizer.update(model, x, t)
            sum_loss += float(model.loss.array) * len(t)
            sum_accuracy += float(model.accuracy.array) * len(t)

            if train_iter.is_new_epoch:
                print('epoch: {}'.format(train_iter.epoch))
                print('train mean loss: {}, accuracy: {}'.format(
                    sum_loss / train_count, sum_accuracy / train_count))
                # evaluation
                sum_accuracy = 0
                sum_loss = 0
                # Enable evaluation mode.
                with configuration.using_config('train', False):
                    # This is optional but can reduce computational overhead.
                    with chainer.using_config('enable_backprop', False):
                        for batch in test_iter:
                            x, t = convert.concat_examples(batch, device)
                            loss = model(x, t)
                            sum_loss += float(loss.array) * len(t)
                            sum_accuracy += float(
                                model.accuracy.array) * len(t)

                test_iter.reset()
                print('test mean  loss: {}, accuracy: {}'.format(
                    sum_loss / test_count, sum_accuracy / test_count))
                sum_accuracy = 0
                sum_loss = 0

        # Save the model and the optimizer
        print('save the model')
        serializers.save_npz('{}/mlp.model'.format(args.out), model)
        print('save the optimizer')
        serializers.save_npz('{}/mlp.state'.format(args.out), optimizer)
Пример #50
0
 def __init__(self, debug):
     warnings.warn('chainer.DebugMode is deprecated. '
                   'Use chainer.using_config("debug", ...) instead.',
                   DeprecationWarning)
     self._using = using_config('debug', debug)
Пример #51
0
def main():
    parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
    parser.add_argument('--dataset', '-d', default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--batchsize', '-b', type=int, default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate', '-l', type=float, default=0.05,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch', '-e', type=int, default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.add_argument('--resume', '-r', type=str,
                        help='Directory that has `vgg.model` and `vgg.state`')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    if args.dataset == 'cifar10':
        print('Using CIFAR10 dataset.')
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == 'cifar100':
        print('Using CIFAR100 dataset.')
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')

    if args.test:
        train = train[:200]
        test = test[:200]

    train_count = len(train)
    test_count = len(test)

    model = L.Classifier(models.VGG.VGG(class_labels))
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    if args.resume is not None:
        resume = args.resume
        if os.path.exists(resume):
            serializers.load_npz(os.path.join(resume, 'vgg.model'), model)
            serializers.load_npz(os.path.join(resume, 'vgg.state'), optimizer)
        else:
            raise ValueError(
                '`args.resume` ("{}") is specified,'
                ' but it does not exist.'.format(resume)
            )

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    sum_acc = 0
    sum_loss = 0

    while train_iter.epoch < args.epoch:
        batch = train_iter.next()
        # Reduce learning rate by 0.5 every 25 epochs.
        if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch:
            optimizer.lr *= 0.5
            print('Reducing learning rate to: {}'.format(optimizer.lr))

        x_array, t_array = convert.concat_examples(batch, args.gpu)
        x = chainer.Variable(x_array)
        t = chainer.Variable(t_array)
        optimizer.update(model, x, t)
        sum_loss += float(model.loss.array) * len(t)
        sum_acc += float(model.accuracy.array) * len(t)

        if train_iter.is_new_epoch:
            print('epoch: {}'.format(train_iter.epoch))
            print('train mean loss: {}, accuracy: {}'.format(
                sum_loss / train_count, sum_acc / train_count))
            sum_acc = 0
            sum_loss = 0
            # Enable evaluation mode.
            with configuration.using_config('train', False):
                # This is optional but can reduce computational overhead.
                with chainer.using_config('enable_backprop', False):
                    for batch in test_iter:
                        x, t = convert.concat_examples(batch, args.gpu)
                        x = chainer.Variable(x)
                        t = chainer.Variable(t)
                        loss = model(x, t)
                        sum_loss += float(loss.array) * len(t)
                        sum_acc += float(model.accuracy.array) * len(t)

            test_iter.reset()
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_acc / test_count))
            sum_acc = 0
            sum_loss = 0

    # Save the model and the optimizer
    out = args.out
    if not os.path.exists(out):
        os.makedirs(out)
    print('save the model')
    serializers.save_npz(os.path.join(out, 'vgg.model'), model)
    print('save the optimizer')
    serializers.save_npz(os.path.join(out, 'vgg.state'), optimizer)
Пример #52
0
 def __init__(self, debug):
     warnings.warn('chainer.DebugMode is deprecated. '
                   'Use chainer.using_config("debug", ...) instead.',
                   DeprecationWarning)
     self._using = using_config('debug', debug)
Пример #53
0
def main():
    parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
    parser.add_argument('--dataset', '-d', default='cifar10',
                        help='The dataset to use: cifar10 or cifar100')
    parser.add_argument('--batchsize', '-b', type=int, default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate', '-l', type=float, default=0.05,
                        help='Learning rate for SGD')
    parser.add_argument('--epoch', '-e', type=int, default=300,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', type=int, default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--test', action='store_true',
                        help='Use tiny datasets for quick tests')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    args = parser.parse_args()

    print('GPU: {}'.format(args.gpu))
    print('# Minibatch-size: {}'.format(args.batchsize))
    print('# epoch: {}'.format(args.epoch))
    print('')

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.
    if args.dataset == 'cifar10':
        print('Using CIFAR10 dataset.')
        class_labels = 10
        train, test = get_cifar10()
    elif args.dataset == 'cifar100':
        print('Using CIFAR100 dataset.')
        class_labels = 100
        train, test = get_cifar100()
    else:
        raise RuntimeError('Invalid dataset choice.')

    if args.test:
        train = train[:200]
        test = test[:200]

    train_count = len(train)
    test_count = len(test)

    model = L.Classifier(models.VGG.VGG(class_labels))
    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    sum_accuracy = 0
    sum_loss = 0

    while train_iter.epoch < args.epoch:
        batch = train_iter.next()
        # Reduce learning rate by 0.5 every 25 epochs.
        if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch:
            optimizer.lr *= 0.5
            print('Reducing learning rate to: {}'.format(optimizer.lr))

        x_array, t_array = convert.concat_examples(batch, args.gpu)
        x = chainer.Variable(x_array)
        t = chainer.Variable(t_array)
        optimizer.update(model, x, t)
        sum_loss += float(model.loss.data) * len(t.data)
        sum_accuracy += float(model.accuracy.data) * len(t.data)

        if train_iter.is_new_epoch:
            print('epoch: {}'.format(train_iter.epoch))
            print('train mean loss: {}, accuracy: {}'.format(
                sum_loss / train_count, sum_accuracy / train_count))
            # evaluation
            sum_accuracy = 0
            sum_loss = 0
            model.predictor.train = False
            # It is good practice to turn off train mode during evaluation.
            with configuration.using_config('train', False):
                for batch in test_iter:
                    x_array, t_array = convert.concat_examples(batch, args.gpu)
                    x = chainer.Variable(x_array)
                    t = chainer.Variable(t_array)
                    loss = model(x, t)
                    sum_loss += float(loss.data) * len(t.data)
                    sum_accuracy += float(model.accuracy.data) * len(t.data)

            test_iter.reset()
            model.predictor.train = True
            print('test mean  loss: {}, accuracy: {}'.format(
                sum_loss / test_count, sum_accuracy / test_count))
            sum_accuracy = 0
            sum_loss = 0

    # Save the model and the optimizer
    print('save the model')
    serializers.save_npz('mlp.model', model)
    print('save the optimizer')
    serializers.save_npz('mlp.state', optimizer)