def __call__(self, trainer=None): # set up a reporter reporter = reporter_module.Reporter() reporter.add_observer(self.name, self.target) with reporter: with configuration.using_config('train', False): with configuration.using_config('lmt', True): with configuration.using_config('lmt-fc', True): with configuration.using_config('exact', True): with configuration.using_config( 'cudnn_deterministic', True): self.evaluate( os.path.join(trainer.out, 'margin.npy'))
def __call__(self, trainer=None): """Executes the evaluator extension. Unlike usual extensions, this extension can be executed without passing a trainer object. This extension reports the performance on validation dataset using the :func:`~chainer.report` function. Thus, users can use this extension independently from any trainer by manually configuring a :class:`~chainer.Reporter` object. Args: trainer (~chainer.training.Trainer): Trainer object that invokes this extension. It can be omitted in case of calling this extension manually. Returns: dict: Result dictionary that contains mean statistics of values reported by the evaluation function. """ # set up a reporter reporter = reporter_module.Reporter() if self.name is not None: prefix = self.name + '/' else: prefix = '' for name, target in six.iteritems(self._targets): reporter.add_observer(prefix + name, target) reporter.add_observers(prefix + name, target.namedlinks(skipself=True)) with reporter: with configuration.using_config('train', False): result = self.evaluate_roc(trainer=trainer) reporter_module.report(result) return result
def force_backprop_mode(): """Enable back-propagation for Variable whose volatile is auto. When you want to enable back-propagation in :func:`no_backprop_mode`, call this method. In this context, :class:`~chainer.Variable` object whose ``volatile`` attribute is ``'auto'`` behaves like a **volatile** variable. That means you can disable :func:`no_backprop_mode` in this context. If you call this method outside of :func:`no_backprop_mode` context, it changes nothing. :class:`~chainer.Variable` object with ``volatile='auto'`` behaves like a volatile variable by default. In this example, the volatility of ``x`` and ``y`` is ``'auto'``. In :func:`no_backprop_mode` context, ``y`` does not have a computational graph but in :func:`force_backprop_mode` it has a graph. >>> with chainer.no_backprop_mode(): ... # Variable with volatile='auto' behaves like volatile='on' ... with chainer.force_backprop_mode(): ... # Variable with volatile='auto' behaves like volatile='off' ... y = x + 1 .. seealso:: See :func:`no_backprop_mode` for details of back-prop mode. """ return configuration.using_config('enable_backprop', True)
def numerical_grad(f, inputs, grad_outputs, eps=1e-3): """Computes numerical gradient by finite differences. This function is used to implement gradient check. For usage example, see unit tests of :mod:`chainer.functions`. Args: f (function): Python function with no arguments that runs forward computation and returns the result. inputs (tuple of arrays): Tuple of arrays that should be treated as inputs. Each element of them is slightly modified to realize numerical gradient by finite differences. grad_outputs (tuple of arrays): Tuple of arrays that are treated as output gradients. eps (float): Epsilon value of finite differences. Returns: tuple: Numerical gradient arrays corresponding to ``inputs``. """ assert eps > 0 inputs = tuple(inputs) grad_outputs = tuple(grad_outputs) gpu = any(isinstance(x, cuda.ndarray) for x in inputs + grad_outputs) cpu = any(isinstance(x, numpy.ndarray) for x in inputs + grad_outputs) if gpu and cpu: raise RuntimeError('Do not mix GPU and CPU arrays in `numerical_grad`') if gpu: xp = cuda.cupy numerical_grad_kernel = cuda.reduce( 'T y1, T y2, U gy, T eps', 'V gxi', '(y1 - y2) * gy', 'a + b', 'gxi += a / (eps * 2)', '0', 'numerical_grad_kernel' ) else: xp = numpy grads = [xp.zeros_like(x) for x in inputs] with configuration.using_config('type_check', False): for x, gx in six.moves.zip(inputs, grads): for i in numpy.ndindex(x.shape): orig = x[i].copy() # hold original value x[i] = orig + eps ys1 = _copy_arrays(f()) x[i] = orig - eps ys2 = _copy_arrays(f()) x[i] = orig for y1, y2, gy in six.moves.zip(ys1, ys2, grad_outputs): if gy is not None: if (gpu and isinstance(y1, cuda.ndarray) and isinstance(y2, cuda.ndarray) and isinstance(gy, cuda.ndarray)): numerical_grad_kernel(y1, y2, gy, eps, gx[i]) else: dot = ((y1 - y2) * gy).sum() gx[i] += dot / (2 * eps) return grads
def fixed_batch_renormalization(x, gamma, beta, mean, var, eps=2e-5): warnings.warn( 'fixed_batch_renormalization is deprecated. ' 'Use fixed_batch_normalization instead.', DeprecationWarning) with configuration.using_config('train', False): return batch_normalization.fixed_batch_normalization( x, gamma, beta, mean, var, eps)
def evaluate(self, snapshot_name=''): current_device = cuda.get_device_from_id(self.args.gpu) with current_device: gt_data = [] pred_data = [] for i, batch in enumerate( tqdm(self.data_iterator, total=len(self.data_loader) // self.args.batchsize)): image, gt_bboxes, gt_labels = batch[0] gt_data.append((gt_bboxes, gt_labels)) # if self.args.gpu is not None: # image = cuda.to_gpu(image, current_device) with cuda.Device(self.args.gpu): with configuration.using_config('train', False): bboxes, labels, scores = self.model.predict( image.copy()[None, ...]) if len(bboxes[0]) == 0: bboxes = [np.zeros((1, 4), dtype=np.float32)] labels = [np.zeros((1, ), dtype=np.int32)] scores = [np.zeros((1, ), dtype=np.float32)] pred_data.append((bboxes[0], labels[0], scores[0])) # TODO handle empty predictions!! bboxes, labels, scores = zip(*pred_data) gt_bboxes, gt_labels = concat_examples(gt_data) result = eval_detection_voc(bboxes, labels, scores, gt_bboxes, gt_labels, None) map = result['map'] self.save_eval_results(snapshot_name, map)
def force_backprop_mode(): """Make a context manager which enables back-propagation. When you want to enable back-propagation in :func:`no_backprop_mode`, call this method. A :class:`~chainer.Variable` created in this context always has a computational graph unless overridden by deeper contexts. If you call this method outside of :func:`no_backprop_mode` context, it changes nothing. In the following example, ``y`` has a computational graph and calling :func:`~chainer.Variable.backward` on ``y`` will compute and accumulate the gradients of the variables in the graph, in this case only ``x``. >>> x = chainer.Variable(np.array([1,], 'f')) >>> with chainer.no_backprop_mode(): ... with chainer.force_backprop_mode(): ... y = x + 1 >>> y.backward() >>> x.grad array([ 1.], dtype=float32) .. seealso:: See :func:`no_backprop_mode` for details on disabled back-propagation mode. """ return configuration.using_config('enable_backprop', True)
def fixed_batch_normalization(x, gamma, beta, mean, var, eps=2e-5, use_cudnn=True): """Batch normalization function with fixed statistics. This is a variant of batch normalization, where the mean and variance statistics are given by the caller as fixed variables. This is used on testing mode of the batch normalization layer, where batch statistics cannot be used for prediction consistency. Args: x (Variable): Input variable. gamma (Variable): Scaling parameter of normalized data. beta (Variable): Shifting parameter of scaled normalized data. mean (Variable): Shifting parameter of input. var (Variable): Square of scaling parameter of input. eps (float): Epsilon value for numerical stability. use_cudnn (bool): If ``True`` and cuDNN is enabled, then this function uses cuDNN as the core implementation. .. seealso:: :func:`functions.batch_normalization`, :class:`links.BatchNormalization` """ with configuration.using_config('train', False): return BatchNormalizationFunction(eps, None, None, 0.0, use_cudnn)(x, gamma, beta, mean, var)
def no_backprop_mode(): """Make a context manager which disables back-propagation. In this context, Chainer does not make a computational graph. It has the benefit of reducing memory consumption. However, a :class:`~chainer.Variable` created in this context does not hold a reference to the :class:`~chainer.FunctionNode` that created itself so no gradients are accumulated by :func:`~chainer.Variable.backward`. In the following example, ``y`` is created in this context, which means that calling :func:`~chainer.Variable.backward` on ``y`` has no effect on the gradients of ``x``. >>> x = chainer.Variable(np.array([1,], 'f')) >>> with chainer.no_backprop_mode(): ... y = x + 1 >>> y.backward() >>> x.grad is None True .. seealso:: See :func:`force_backprop_mode` for details on how to override this context. """ return configuration.using_config('enable_backprop', False)
def evaluate(self): results = [] with chainer.cuda.Device(self.args.gpu): for i, line in enumerate(tqdm.tqdm(self.lines)): image_file = line[0] labels = self.xp.array(line[1:], dtype=self.xp.int32) labels = labels.reshape((-1, self.args.num_labels)) image = self.load_image(image_file) with configuration.using_config('train', False): predictions, crops, grids = self.net(image[self.xp.newaxis, ...]) words, gt_words = self.calc_accuracy(predictions, labels) results.append([words, gt_words]) if self.save_rois: image = self.xp.asarray(image) self.bbox_plotter.xp = self.xp self.bbox_plotter.render_rois(predictions, crops, grids, i, image) self.print_results() with open(os.path.join(self.model_dir, "eval_results.csv"), "w") as results_file: writer = csv.writer(results_file, delimiter=',') writer.writerows(results)
def force_backprop_mode(): """Make a context manager which enables back-propagation. When you want to enable back-propagation in :func:`no_backprop_mode`, call this method. A :class:`~chainer.Variable` created in this context always has a computational graph unless overridden by deeper contexts. If you call this method outside of :func:`no_backprop_mode` context, it changes nothing. In the following example, ``y`` has a computational graph and calling :func:`~chainer.Variable.backward` on ``y`` will compute and accumulate the gradients of the variables in the graph, in this case only ``x``. >>> x = chainer.Variable(np.array([1,], 'f')) >>> with chainer.no_backprop_mode(): ... with chainer.force_backprop_mode(): ... y = x + 1 >>> y.backward() >>> x.grad array([ 1.], dtype=float32) .. seealso:: See :func:`no_backprop_mode` for details on disabled back-propagation mode. """ return configuration.using_config('enable_backprop', True)
def no_backprop_mode(): """Make a context manager which disables back-propagation. In this context, Chainer does not make a computational graph. It has the benefit of reducing memory consumption. However, a :class:`~chainer.Variable` created in this context does not hold a reference to the :class:`~chainer.FunctionNode` that created itself so no gradients are accumulated by :func:`~chainer.Variable.backward`. In the following example, ``y`` is created in this context, which means that calling :func:`~chainer.Variable.backward` on ``y`` has no effect on the gradients of ``x``. >>> x = chainer.Variable(np.array([1,], 'f')) >>> with chainer.no_backprop_mode(): ... y = x + 1 >>> y.backward() >>> x.grad is None True .. seealso:: See :func:`force_backprop_mode` for details on how to override this context. """ return configuration.using_config('enable_backprop', False)
def numerical_grad(f, inputs, grad_outputs, eps=1e-3): """Computes numerical gradient by finite differences. This function is used to implement gradient check. For usage example, see unit tests of :mod:`chainer.functions`. Args: f (function): Python function with no arguments that runs forward computation and returns the result. inputs (tuple of arrays): Tuple of arrays that should be treated as inputs. Each element of them is slightly modified to realize numerical gradient by finite differences. grad_outputs (tuple of arrays): Tuple of arrays that are treated as output gradients. eps (float): Epsilon value of finite differences. Returns: tuple: Numerical gradient arrays corresponding to ``inputs``. """ assert eps > 0 inputs = tuple(inputs) grad_outputs = tuple(grad_outputs) gpu = any(isinstance(x, cuda.ndarray) for x in inputs + grad_outputs) cpu = any(isinstance(x, numpy.ndarray) for x in inputs + grad_outputs) if gpu and cpu: raise RuntimeError('Do not mix GPU and CPU arrays in `numerical_grad`') if gpu: xp = cuda.cupy numerical_grad_kernel = cuda.reduce( 'T y1, T y2, U gy, T eps', 'V gxi', '(y1 - y2) * gy', 'a + b', 'gxi += a / (eps * 2)', '0', 'numerical_grad_kernel' ) else: xp = numpy grads = [xp.zeros_like(x) for x in inputs] with configuration.using_config('type_check', False): for x, gx in six.moves.zip(inputs, grads): for i in numpy.ndindex(x.shape): orig = x[i].copy() # hold original value x[i] = orig + eps ys1 = _copy_arrays(f()) x[i] = orig - eps ys2 = _copy_arrays(f()) x[i] = orig for y1, y2, gy in six.moves.zip(ys1, ys2, grad_outputs): if gy is not None: if (gpu and isinstance(y1, cuda.ndarray) and isinstance(y2, cuda.ndarray) and isinstance(gy, cuda.ndarray)): numerical_grad_kernel(y1, y2, gy, eps, gx[i]) else: dot = ((y1 - y2) * gy).sum() gx[i] += dot / (2 * eps) return grads
def fixed_batch_renormalization(x, gamma, beta, mean, var, eps=2e-5): warnings.warn( 'fixed_batch_renormalization is deprecated. ' 'Use fixed_batch_normalization instead.', DeprecationWarning) with configuration.using_config('train', False): return batch_normalization.fixed_batch_normalization( x, gamma, beta, mean, var, eps )
def test_forward_cpu2(self): y_dyn = self.chain.dynamic_call(self.x) x2 = 2*self.x # todo: add a new config so that we can still use 'train' with configuration.using_config('train', False): y_static1 = self.chain.static_call(x2) y_static1.grad = y_static1.data.copy() y_static1.backward() schedule_manager = self.chain.schedule_manager print("sched 1: ", schedule_manager) y_static = self.chain.static_call(self.x) chainer.testing.assert_allclose(y_dyn.data, y_static.data)
def test_forward_cpu2(self): y_dyn = self.chain.dynamic_call(self.x) x2 = 2 * self.x # todo: add a new config so that we can still use 'train' with configuration.using_config('train', False): y_static1 = self.chain.static_call(x2) y_static1.grad = y_static1.data.copy() y_static1.backward() schedule_manager = self.chain.schedule_manager print('sched 1: ', schedule_manager) y_static = self.chain.static_call(self.x) chainer.testing.assert_allclose(y_dyn.data, y_static.data)
def predict_core(self, model, batch): in_arrays = self.converter(batch, self.device) with function.no_backprop_mode(): with configuration.using_config('train', False): if isinstance(in_arrays, tuple): y = model(*in_arrays) elif isinstance(in_arrays, dict): y = model(**in_arrays) else: y = model(in_arrays) return _variable_to_array(y, to_cpu=self.to_cpu)
def evaluate(model, iter): # Evaluation routine to be used for validation and test. evaluator = model.copy() # to use different state evaluator.predictor.reset_state() # initialize state sum_perp = 0 data_count = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): for batch in copy.copy(iter): x, t = convert.concat_examples(batch, args.gpu) loss = evaluator(x, t) sum_perp += loss.array data_count += 1 return np.exp(float(sum_perp) / data_count)
def test(model, dataset, inv_vocab, device=-1, batchsize=128): """ Predict without evaluating. Refer :func:`test` for the information about arguments. Returns: numpy.ndarray: Prediction probability whose size is `data size` x `number of labels`. """ if device >= 0: model.to_gpu(device) it = SerialIterator(dataset, batchsize, repeat=False, shuffle=False) results = [] for batch in it: in_arrays = convert(batch, device) with chainer.function.no_backprop_mode(), using_config('train', False): y, z_prob, z = model.forward(in_arrays['xs']) loss, loss_encoder, sparsity, coherence, regressor_cost, loss_generator = \ model.calc_loss(y, z, z_prob, in_arrays['ys']) loss = to_cpu(loss.data) loss_encoder = to_cpu(loss_encoder.data) sparsity = to_cpu(sparsity) coherence = to_cpu(coherence) regressor_cost = to_cpu(regressor_cost) loss_generator = to_cpu(loss_generator.data) y = to_cpu(y.data).tolist() z = [to_cpu(zi).tolist() for zi in z] xs = [to_cpu(xi).tolist() for xi in in_arrays['xs']] results.extend(({ 'x': xs[i], 'z': list(map(int, z[i])), 'y': y[i], 'text': [inv_vocab[t] for t in xs[i]], 'rationale': [inv_vocab[t] if zt > 0.5 else '_' for t, zt in zip(xs[i], z[i])], 'loss': float(loss[i]), 'loss_encoder': float(loss_encoder[i]), 'sparsity_cost': float(sparsity[i]), 'coherence': float(coherence[i]), 'regressor_cost': float(regressor_cost[i]), 'loss_generator': float(loss_generator[i]) } for i in range(len(y)))) return results
def run_train_loop(optimizer, train_iter, test_iter, test_count, epoch, device): model = optimizer.target train_count = 0 sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < epoch: batch = train_iter.next() # Reduce learning rate by 0.5 every 25 epochs. if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch: optimizer.lr *= 0.5 print('Reducing learning rate to: {}'.format(optimizer.lr)) x_array, t_array = convert.concat_examples(batch, device) x = chainer.Variable(x_array) t = chainer.Variable(t_array, requires_grad=False) optimizer.update(model, x, t) train_count += len(t) sum_loss += float(model.loss.array) * len(t) sum_accuracy += float(model.accuracy.array) * len(t) if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation train_count = 0 sum_accuracy = 0 sum_loss = 0 model.predictor.train = False # It is good practice to turn off train mode during evaluation. with configuration.using_config('train', False): for batch in test_iter: x_array, t_array = convert.concat_examples(batch, device) x = chainer.Variable(x_array) t = chainer.Variable(t_array, requires_grad=False) loss = model(x, t) sum_loss += float(loss.array) * len(t) sum_accuracy += float(model.accuracy.array) * len(t) test_iter.reset() model.predictor.train = True print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0
def __call__(self, trainer): # set up a reporter reporter = reporter_module.Reporter() if hasattr(self, 'name'): prefix = self.name + '/' else: prefix = '' for name, target in six.iteritems(self.targets): reporter.add_observer(prefix + name, target) reporter.add_observers(prefix + name, target.namedlinks(skipself=True)) with reporter: with configuration.using_config('train', False): result = self.evaluate(trainer) reporter_module.report(result) return result
def no_backprop_mode(): """Make a context manager which disables back-propagation. In this context, Chainer does not make a computational graph. :class:`~chainer.Variable` created in this context does not have reference to the :class:`~chainer.Function` which created the variable. So, you cannot compute gradient with :func:`~chainer.Variable.backward`. Instead memory consumption is reduced. In this example, ``y`` is created in this context. So you cannot call :func:`~chianer.Variable.backward`. >>> x = chainer.Variable(np.array([1,], 'f')) >>> with chainer.no_backprop_mode(): ... y = x + 1 """ return configuration.using_config('enable_backprop', False)
def no_backprop_mode(): """Make a context manager which disables back-propagation. In this context, Chainer does not make a computational graph. :class:`~chainer.Variable` created in this context does not have reference to the :class:`~chainer.Function` which created the variable. So, you cannot compute gradient with :func:`~chainer.Variable.backward`. Instead memory consumption is reduced. In this example, ``y`` is created in this context. So you cannot call :func:`~chianer.Variable.backward`. >>> x = chainer.Variable(numpy.array([1,], 'f')) >>> with chainer.no_backprop_mode(): ... y = x + 1 """ return configuration.using_config('enable_backprop', False)
def evaluate_rationale(model, dataset, device=-1, batchsize=128): if device >= 0: model.to_gpu(device) it = SerialIterator(dataset, batchsize, repeat=False, shuffle=False) tot_mse = 0.0 accum_precision = 0.0 # for calculating macro precision true_positives = 0.0 # for calculating micro precision chosen_ratios = 0.0 # for calculating micro precision tot_z, tot_n, tot_t = 1e-10, 1e-10, 1e-10 for batch in it: in_arrays = convert(batch, device) with chainer.function.no_backprop_mode(), using_config('train', False): pred, z_prob, z = model.forward(in_arrays['xs']) regressor_cost = model.calc_loss(pred, z, z_prob, in_arrays['ys'])[4] regressor_cost = to_cpu(regressor_cost) z = [to_cpu(zi).tolist() for zi in z] tot_mse += regressor_cost.sum() for bi, zi in zip(batch, z): true_z = bi['zs'] nzi = sum(zi) tp = np.sum(np.logical_and(zi, true_z)) if nzi == 0: # precision is undefined when there is 0 prediction continue accum_precision += tp / float(nzi) tot_n += 1 true_positives += tp tot_z += nzi chosen_ratios += nzi / float(len(zi)) tot_t += len(zi) result = { "mse": tot_mse/len(dataset), "macro_precision": accum_precision / tot_n, "micro_precision": true_positives / tot_z, "micro_chosen_ratio": tot_z / tot_t, "macro_chosen_ratio": chosen_ratios / tot_n, } return result
def __call__(self, trainer=None): """Executes the evaluator extension. Unlike usual extensions, this extension can be executed without passing a trainer object. This extension reports the performance on validation dataset using the :func:`~chainer.report` function. Thus, users can use this extension independently from any trainer by manually configuring a :class:`~chainer.Reporter` object. Args: trainer (~chainer.training.Trainer): Trainer object that invokes this extension. It can be omitted in case of calling this extension manually. Returns: dict: Result dictionary that contains mean statistics of values reported by the evaluation function. """ with configuration.using_config('train', False): result = self.evaluate() reporter_module.report(result) return result
def predict_see(image): image = Image.fromarray(image) image = preprocess_image(image, xp, image_size) with configuration.using_config('train', False): predictions, crops, grids = network(image[xp.newaxis, ...]) predictions = F.concat( [F.expand_dims(prediction, axis=0) for prediction in predictions], axis=0) classification = F.softmax(predictions, axis=2) classification = classification.data classification = xp.argmax(classification, axis=2) classification = xp.transpose(classification, (1, 0)) word = strip_prediction(classification, xp, args.blank_symbol)[0] word = "".join(map(lambda x: chr(char_map[str(x)]), word)) return word
def no_backprop_mode(): """Disable back-propagation for Variable whose volatile is auto. In the default setting a :class:`~chainer.Variable` object whose ``volatile`` attribute is ``'auto'`` behaves like a **non-volatile** variable. That means such a :class:`~chainer.Variable` object builds a computational graph, consumes memory to store the graph, and you can execute back-propagation for it. With this context such a :class:`~chainer.Variable` object behaves like a **volatile** variable. So, you can easily switch training and evaluation. In this example, the volatility of ``x`` and ``y`` is ``'auto'``. So, ``y`` does not have a computational graph. >>> x = chainer.Variable(numpy.array([1,], 'f'), volatile='auto') >>> with chainer.no_backprop_mode(): ... y = x + 1 """ return configuration.using_config('enable_backprop', False)
def process(image, network, char_map, xp, args): with configuration.using_config('train', False): predictions, crops, grids = network(image[xp.newaxis, ...]) # extract class scores for each word words = OrderedDict({}) predictions = F.concat( [F.expand_dims(prediction, axis=0) for prediction in predictions], axis=0) classification = F.softmax(predictions, axis=2) classification = classification.data classification = xp.argmax(classification, axis=2) classification = xp.transpose(classification, (1, 0)) words = strip_prediction(classification, xp, args.blank_symbol) words = " ".join([ "".join(map(lambda x: chr(char_map[str(x)]), word)) for word in words ]) return words
def __call__(self, trainer=None): # set up a reporter reporter = reporter_module.Reporter() reporter.add_observer(self.name, self.target) with reporter: with configuration.using_config('cudnn_deterministic', True): with configuration.using_config('train', False): with configuration.using_config('lmt', True): with configuration.using_config('lmt-fc', True): with configuration.using_config('exact', True): upper = self.calculate_upper_lipschitz() with configuration.using_config('lmt', False): with configuration.using_config('lmt-fc', False): with configuration.using_config('exact', False): if not self.nograd: loc = self.calculate_local_lipschitz() glo = self.calculate_global_lipschitz() adv = self.calculate_adversarial_perturbation() print('\revaluation end, saving result', flush=True) if self.nograd: values = np.array(list(zip(upper, adv))) else: values = np.array(list(zip(upper, glo, loc, adv))) output_dir = self.output_dir or trainer.out filename = pathlib.Path(output_dir) / 'inequlaity_{0}.npy'.format( self.attack_name) np.save(str(filename), values) print('\rassertions start', flush=True) if self.nograd: for up, ad in zip(upper, adv): assert up <= ad else: for up, gl, lo, ad in zip(upper, glo, loc, adv): assert up <= gl assert gl <= lo assert up <= ad
def force_backprop_mode(): """Make a context manager which enables back-propagation. When you want to enable back-propagation in :func:`no_backprop_mode`, call this method. :~chainer.Variable: created in this context always has a computational graph. If you call this method outside of :func:`no_backprop_mode` context, it changes nothing. In this example, ``y`` has a computational graph and ``y.backward`` computes gradients of variables in the graph. >>> with chainer.no_backprop_mode(): ... with chainer.force_backprop_mode(): ... y = x + 1 .. seealso:: See :func:`no_backprop_mode` for details of back-prop mode. """ return configuration.using_config('enable_backprop', True)
def force_backprop_mode(): """Make a context manager which enables back-propagation. When you want to enable back-propagation in :func:`no_backprop_mode`, call this method. :~chainer.Variable: created in this context always has a computational graph. If you call this method outside of :func:`no_backprop_mode` context, it changes nothing. In this example, ``y`` has a computational graph and ``y.backward`` computes gradients of variables in the graph. >>> with chainer.no_backprop_mode(): ... with chainer.force_backprop_mode(): ... y = x + 1 .. seealso:: See :func:`no_backprop_mode` for details of back-prop mode. """ return configuration.using_config('enable_backprop', True)
def run_train_loop( optimizer, train_iter, test_iter, train_count, test_count, epoch, device): model = optimizer.target sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < epoch: batch = train_iter.next() x_array, t_array = convert.concat_examples(batch, device) x = chainer.Variable(x_array) t = chainer.Variable(t_array, requires_grad=False) optimizer.update(model, x, t) sum_loss += float(model.loss.array) * len(t) sum_accuracy += float(model.accuracy.array) * len(t) if train_iter.is_new_epoch: print('epoch: ', train_iter.epoch) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 # It is good practice to turn off train mode during evaluation. with configuration.using_config('train', False): for batch in test_iter: x_array, t_array = convert.concat_examples( batch, device) x = chainer.Variable(x_array) t = chainer.Variable(t_array, requires_grad=False) loss = model(x, t) sum_loss += float(loss.array) * len(t) sum_accuracy += float(model.accuracy.array) * len(t) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0
def force_backprop_mode(): """Make a context manager which enables back-propagation. When you want to enable back-propagation in :func:`no_backprop_mode`, call this method. A :class:`~chainer.Variable` created in this context always has a computational graph unless overridden by deeper contexts. If you call this method outside of :func:`no_backprop_mode` context, it changes nothing. In the following example, ``y`` has a computational graph and calling :func:`~chainer.Variable.backward` on ``y`` will compute and accumulate the gradients of the variables in the graph, in this case only ``x``. >>> x = chainer.Variable(np.array([1,], np.float32)) >>> with chainer.no_backprop_mode(): ... with chainer.force_backprop_mode(): ... y = x + 1 >>> y.backward() >>> x.grad array([1.], dtype=float32) .. note:: ``chainer.force_backprop_mode()`` implicitly applies ChainerX's counterpart :func:`chainerx.force_backprop_mode()`, but not vice versa. Also, setting ``enable_backprop`` :ref:`configuration <configuration>` does not affect ChainerX. .. seealso:: See :func:`chainer.no_backprop_mode` for details on disabled back-propagation mode. """ c = configuration.using_config('enable_backprop', True) if chainerx.is_available(): return _BackpropModeContext((c, chainerx.force_backprop_mode())) return _BackpropModeContext((c,))
def force_backprop_mode(): """Make a context manager which enables back-propagation. When you want to enable back-propagation in :func:`no_backprop_mode`, call this method. A :class:`~chainer.Variable` created in this context always has a computational graph unless overridden by deeper contexts. If you call this method outside of :func:`no_backprop_mode` context, it changes nothing. In the following example, ``y`` has a computational graph and calling :func:`~chainer.Variable.backward` on ``y`` will compute and accumulate the gradients of the variables in the graph, in this case only ``x``. >>> x = chainer.Variable(np.array([1,], np.float32)) >>> with chainer.no_backprop_mode(): ... with chainer.force_backprop_mode(): ... y = x + 1 >>> y.backward() >>> x.grad array([1.], dtype=float32) .. note:: ``chainer.force_backprop_mode()`` implicitly applies ChainerX's counterpart :func:`chainerx.force_backprop_mode()`, but not vice versa. Also, setting ``enable_backprop`` :ref:`configuration <configuration>` does not affect ChainerX. .. seealso:: See :func:`chainer.no_backprop_mode` for details on disabled back-propagation mode. """ c = configuration.using_config('enable_backprop', True) if chainerx.is_available(): return _BackpropModeContext((c, chainerx.force_backprop_mode())) return _BackpropModeContext((c, ))
def main(): mc_iteration = 10 mc_samples = np.random.rand(1, 10, 2).astype(np.float32) mc_samples = np.repeat(mc_samples, mc_iteration, axis=0) _mean, _var = _calc_uncertanty_from_mc_samples(mc_samples) print('numpy') print(_mean) print(_var) print('------') mean = chainer.functions.mean(mc_samples, axis=0) var = mc_samples - mean var = chainer.functions.mean(chainer.functions.square(var), axis=0) mean = mean.data var = var.data print('chainer') print(mean) print(var) print((np.abs(mean - _mean))) print((np.abs(var - _var))) print('------') sampler = MCSampler(lambda x: x, mc_iteration, lambda x: x, None, None) with configuration.using_config('train', False): mean, var = sampler(mc_samples[0]) print('mc_sampler') print(mean) print(var) print((np.abs(mean - _mean))) print((np.abs(var - _var))) print('------')
def evaluate(model, iter): # Evaluation routine to be used for validation and test. evaluator = model.copy() # to use different state evaluator.rnn.reset_state() # initialize state sum_perp = 0 data_count = 0 words = [] labels = [] lossfun = softmax_cross_entropy.softmax_cross_entropy with configuration.using_config('train', False): for batch in copy.copy(iter): word, label = convert.concat_examples(batch, args.gpu) words.append(word) labels.append(label) data_count += 1 outputs = evaluator(words) for ind in range(len(outputs)): y = outputs[ind] label = labels[ind] loss = lossfun(y, label) sum_perp += loss.data return np.exp(float(sum_perp) / data_count)
def no_backprop_mode(): """Make a context manager which disables back-propagation. In this context, Chainer does not make a computational graph. It has the benefit of reducing memory consumption. However, a :class:`~chainer.Variable` created in this context does not hold a reference to the :class:`~chainer.FunctionNode` that created itself so no gradients are accumulated by :func:`~chainer.Variable.backward`. In the following example, ``y`` is created in this context, which means that calling :func:`~chainer.Variable.backward` on ``y`` has no effect on the gradients of ``x``. >>> x = chainer.Variable(np.array([1,], np.float32)) >>> with chainer.no_backprop_mode(): ... y = x + 1 >>> y.backward() >>> x.grad is None True .. note:: ``chainer.no_backprop_mode()`` implicitly applies ChainerX's counterpart :func:`chainerx.no_backprop_mode()`, but not vice versa. Also, setting ``enable_backprop`` :ref:`configuration <configuration>` does not affect ChainerX. .. seealso:: See :func:`chainer.force_backprop_mode` for details on how to override this context. """ c = configuration.using_config('enable_backprop', False) if chainerx.is_available(): return _BackpropModeContext((c, chainerx.no_backprop_mode())) return _BackpropModeContext((c,))
def no_backprop_mode(): """Make a context manager which disables back-propagation. In this context, Chainer does not make a computational graph. It has the benefit of reducing memory consumption. However, a :class:`~chainer.Variable` created in this context does not hold a reference to the :class:`~chainer.FunctionNode` that created itself so no gradients are accumulated by :func:`~chainer.Variable.backward`. In the following example, ``y`` is created in this context, which means that calling :func:`~chainer.Variable.backward` on ``y`` has no effect on the gradients of ``x``. >>> x = chainer.Variable(np.array([1,], np.float32)) >>> with chainer.no_backprop_mode(): ... y = x + 1 >>> y.backward() >>> x.grad is None True .. note:: ``chainer.no_backprop_mode()`` implicitly applies ChainerX's counterpart :func:`chainerx.no_backprop_mode()`, but not vice versa. Also, setting ``enable_backprop`` :ref:`configuration <configuration>` does not affect ChainerX. .. seealso:: See :func:`chainer.force_backprop_mode` for details on how to override this context. """ c = configuration.using_config('enable_backprop', False) if chainerx.is_available(): return _BackpropModeContext((c, chainerx.no_backprop_mode())) return _BackpropModeContext((c, ))
def fixed_batch_normalization(x, gamma, beta, mean, var, eps=2e-5): """Batch normalization function with fixed statistics. This is a variant of batch normalization, where the mean and variance statistics are given by the caller as fixed variables. This is used on testing mode of the batch normalization layer, where batch statistics cannot be used for prediction consistency. Args: x (Variable): Input variable. gamma (Variable): Scaling parameter of normalized data. beta (Variable): Shifting parameter of scaled normalized data. mean (Variable): Shifting parameter of input. var (Variable): Square of scaling parameter of input. eps (float): Epsilon value for numerical stability. .. seealso:: :func:`functions.batch_normalization`, :class:`links.BatchNormalization` """ with configuration.using_config('train', False): return BatchNormalizationFunction(eps, None, None, 0.0)( x, gamma, beta, mean, var)
def __call__(self, trainer=None): """Executes the evaluator extension. Unlike usual extensions, this extension can be executed without passing a trainer object. This extension reports the performance on validation dataset using the :func:`~chainer.report` function. Thus, users can use this extension independently from any trainer by manually configuring a :class:`~chainer.Reporter` object. Args: trainer (~chainer.training.Trainer): Trainer object that invokes this extension. It can be omitted in case of calling this extension manually. Returns: dict: Result dictionary that contains mean statistics of values reported by the evaluation function. """ # set up a reporter reporter = reporter_module.Reporter() if hasattr(self, 'name'): prefix = self.name + '/' else: prefix = '' for name, target in six.iteritems(self._targets): reporter.add_observer(prefix + name, target) reporter.add_observers(prefix + name, target.namedlinks(skipself=True)) with reporter: with configuration.using_config('train', False): result = self.evaluate() reporter_module.report(result) return result
def evaluate(model, iter): # Evaluation routine to be used for validation and test. evaluator = model.copy() # to use different state evaluator.rnn.reset_state() # initialize state sum_perp = 0 data_count = 0 words = [] labels = [] lossfun = softmax_cross_entropy.softmax_cross_entropy with configuration.using_config('train', False): iter.reset() for batch in iter: word, label = convert.concat_examples(batch, args.gpu) words.append(word) labels.append(label) data_count += 1 outputs = evaluator(words) for ind in range(len(outputs)): y = outputs[ind] label = labels[ind] loss = lossfun(y, label) sum_perp += loss.array return np.exp(float(sum_perp) / data_count)
def __call__(self, x): with configuration.using_config('train', False): return super(_SingleArgumentFunctionTestMode, self).__call__(x)
def test_backward_cpu(self): chain = self.static_chain with configuration.using_config('train', False): self.check_backward(self.x, self.gy, chain)
def fixed_batch_renormalization(x, gamma, beta, mean, var, eps=2e-5): with configuration.using_config('train', False): return BatchRenormalizationFunction(eps, None, None, 0.0)( x, gamma, beta, mean, var)
def numerical_grad( f, inputs, grad_outputs, eps=1e-3, detect_nondifferentiable=False, diff_atol=0, diff_rtol=1e-2, center_outputs=None): """Computes numerical gradient by finite differences. This function is used to implement gradient check. For usage example, see unit tests of :mod:`chainer.functions`. By default, ``numerical_grad`` computes the gradient to the first order of ``eps``. Args: f (callable): Python function with no arguments that runs forward computation and returns the result. inputs (tuple of arrays): Tuple of arrays that should be treated as inputs. Each element of them is slightly modified to realize numerical gradient by finite differences. grad_outputs (tuple of arrays or scalars): Tuple of arrays or scalars that are treated as output gradients. eps (float): Epsilon value of finite differences. detect_nondifferentiable (bool): ``False`` by default. If ``True``, ``numerical_grad`` checks whether ``f`` is differentiable at ``inputs``. It requires evaluation of ``f`` at 5 points instead of 2. As a side effect, the accuracy of numerical gradient will be increased to the third order of ``eps``. If it turns out that ``f`` is non-differentiable at ``input``, ``numerical_grad`` raises :class:`~chainer.gradient_check.NondifferentiableError`. diff_atol (float): Absolute tolerance of fitting error of non-differentiable point detection. diff_rtol (float): Tolerance of fitting error of non-differentiable point detection relative to the output values of ``f``. center_outputs (tuple of arrays or None): Only used if ``detect_nondifferentiable`` is ``True``. If specified, these arrays are used as the outputs of ``f`` at ``inputs``. Otherwise, it is calculated. It can be used to reduce the computation if these arrays are already calculated before calling ``numerical_grad``. Returns: tuple: Numerical gradient arrays corresponding to ``inputs``. """ # TODO(niboshi): Deprecate `center_outputs` argument. # If dtype of this argument is not float64, often the resolution is # insufficient for numerical gradient calculation. We might use it only # when its dtype is float64, but it would be better to simply remove it. center_outputs = None assert eps > 0 assert isinstance(inputs, (tuple, list)) for x in inputs: if x.dtype.kind != 'f': raise RuntimeError( 'The dtype of input arrays must be kind of float') inputs = tuple(inputs) # Cast grad_outputs to float64 grad_outputs = tuple([ None if g is None else numpy.float64(g) if numpy.isscalar(g) else g.astype(numpy.float64) for g in grad_outputs]) if not chainer.is_arrays_compatible( [a for a in inputs + grad_outputs if not numpy.isscalar(a)]): raise RuntimeError('Do not mix GPU and CPU arrays in `numerical_grad`') device = backend.get_device_from_array(*(inputs + grad_outputs)) xp = device.xp if xp is cuda.cupy: numerical_grad_kernel_1 = cuda.reduce( 'T y1, T y2, U gy, T eps', 'V gxi', '(y1 - y2) * gy', 'a + b', 'gxi += a / (eps * 2)', '0', 'numerical_grad_kernel_1' ) numerical_grad_kernel_3 = cuda.reduce( 'T y1, T y2, T y3, T y4, U gy, T eps', 'V gxi', '(-y1 + 8 * y2 - 8 * y3 + y4) * gy', 'a + b', 'gxi += a / (eps * 6)', '0', 'numerical_grad_kernel_3' ) if xp is chainerx: grads = [ xp.zeros(x.shape, numpy.float64, device=x.device) for x in inputs] else: grads = [xp.zeros(x.shape, numpy.float64) for x in inputs] if detect_nondifferentiable: if center_outputs is None: ys0 = _copy_arrays(f()) else: ys0 = center_outputs nout = len(ys0) shapes = [_.shape for _ in ys0] sizes = numpy.array([_.size for _ in ys0]) cumsizes = numpy.cumsum(sizes) # Evaluate func at a single input def eval_func(x, i, delta, orig): x[i] = orig + delta y = _copy_arrays(f()) assert len(y) == len(grad_outputs) assert all([ gy is None for y_, gy in zip(y, grad_outputs) if y_ is None]) assert all([ gy is None or numpy.isscalar(gy) or y_.shape == gy.shape for y_, gy in zip(y, grad_outputs)]) x[i] = orig return y # An iteration on a single input displacement def iterate_single_input(i_in, x, orig_x, i): orig = orig_x[i] # `yss` holds a list of output arrays for each of 2 or 5 sampling # points. if detect_nondifferentiable: yss = [ eval_func(x, i, -eps * 1., orig), eval_func(x, i, -eps * .5, orig), ys0, eval_func(x, i, +eps * .5, orig), eval_func(x, i, +eps * 1., orig), ] else: yss = [ eval_func(x, i, -eps * 1, orig), eval_func(x, i, +eps * 1, orig), ] if detect_nondifferentiable: # Detect non-differentiable point by quadratic fitting # Check for non-finite output. # If any single element in the output arrays has different # finiteness among sampled points, that means this is a # non-differentiable point. # If the function consistently generates non-finite values # around the point, we do not treat the point as # non-differentiable. # (Example: x<0 region for the logarithm function) any_nonfinite = False for i_out in range(nout): isfinites = [xp.isfinite(ys[i_out]) for ys in yss] if any((isfinites[0] != isfinites[i]).any() for i in range(1, len(yss))): s = six.StringIO() s.write( 'Tried to compute the numeric gradient on a ' 'non-differentiable point.\n\n') s.write('i_in: {}\n'.format(i_in)) s.write('i_out: {}\n'.format(i_out)) s.write('x: {}\n'.format(inputs[i_in])) s.write('index on x: {}\n'.format(i)) s.write('eps: {}\n'.format(eps)) s.write('y[x-eps ]: {}\n'.format(yss[0][i_out])) s.write('y[x-eps/2]: {}\n'.format(yss[1][i_out])) s.write('y[x ]: {}\n'.format(yss[2][i_out])) s.write('y[x+eps/2]: {}\n'.format(yss[3][i_out])) s.write('y[x+eps ]: {}\n'.format(yss[4][i_out])) raise NondifferentiableError(s.getvalue()) any_nonfinite |= not all((_).all() for _ in isfinites) if not any_nonfinite: # Stack flattened outputs to make (5, *)-shaped 2D array ystack = xp.vstack( [xp.hstack([y.ravel() for y in ys]) for ys in yss]) assert ystack.ndim == 2 and ystack.shape[0] == len(yss) # Fit to quadratic if xp is not numpy: ystack = _cpu._to_cpu(ystack) polyfit = numpy.polynomial.polynomial.polyfit _, (residuals, _, _, _) = polyfit( range(len(yss)), ystack, deg=2, full=True) if xp is not numpy: residuals = device.send(residuals) residuals = xp.sqrt(residuals / len(yss)) # Check for error for each output array for i_out in range(nout): size = sizes[i_out] cumsize = cumsizes[i_out] shape = shapes[i_out] # TODO(niboshi): The following two lines could be # rewritten using xp.stack, which is supported in # NumPy>=1.10 ymax = xp.concatenate( [ys[i_out][None] for ys in yss]).max(axis=0) ymin = xp.concatenate( [ys[i_out][None] for ys in yss]).min(axis=0) # Restore the shape of flattened residual res = residuals[cumsize - size:cumsize] res = res.reshape(shape) det = utils.force_array( diff_atol + diff_rtol * (ymax - ymin) < res) # Constant output = not nondifferentiable det[ymax == ymin] = False if det.any(): s = six.StringIO() s.write( 'Tried to compute the numeric gradient on a ' 'non-differentiable point.\n\n') s.write('i_in: {}\n'.format(i_in)) s.write('i_out: {}\n'.format(i_out)) s.write('x: {}\n'.format(inputs[i_in])) s.write('index on x: {}\n'.format(i)) s.write('eps: {}\n'.format(eps)) s.write('diff_rtol: {}\n'.format(diff_rtol)) s.write('diff_atol: {}\n'.format(diff_atol)) s.write('ymax: {}\n'.format(ymax)) s.write('ymin: {}\n'.format(ymin)) s.write( 'diff_atol + diff_rtol * (ymax-ymin): {}\n'.format( diff_atol + diff_rtol * (ymax - ymin))) s.write('fitting errors: {}\n'.format(res)) s.write('y[x-eps ]: {}\n'.format(yss[0][i_out])) s.write('y[x-eps/2]: {}\n'.format(yss[1][i_out])) s.write('y[x ]: {}\n'.format(yss[2][i_out])) s.write('y[x+eps/2]: {}\n'.format(yss[3][i_out])) s.write('y[x+eps ]: {}\n'.format(yss[4][i_out])) raise NondifferentiableError(s.getvalue()) # Calculate numerical gradient for i_out, gy in enumerate(grad_outputs): if gy is None: continue if not numpy.isscalar(gy): gy = gy.astype(numpy.float64, copy=False) gpu_ = (xp is cuda.cupy and all(isinstance(ys[i_out], cuda.ndarray) for ys in yss)) # If any output sample is None, all others must be. assert all([ (yss[0][i_out] is None) == (yss[j][i_out] is None) for j in range(len(yss))]) # If outputs samples are None, the part of numeric gradient for # this output is considered as zero: skip the accumulation. if yss[0][i_out] is None: continue if len(yss) == 2: # 1st order y0 = yss[0][i_out] y1 = yss[1][i_out] if gpu_: numerical_grad_kernel_1( y1, y0, xp.asarray(gy), eps, gx[i]) else: dot = ((y1 - y0) * gy).sum() gx[i] = gx[i] + dot / (2 * eps) elif len(yss) == 5: # 3rd order y0 = yss[0][i_out] y1 = yss[1][i_out] y2 = yss[3][i_out] y3 = yss[4][i_out] if gpu_: numerical_grad_kernel_3( y3, y2, y1, y0, gy, eps, gx[i]) else: num = -y3 + 8 * y2 - 8 * y1 + y0 dot = (num * gy).sum() gx[i] = gx[i] + dot / (6 * eps) else: assert False # Calculate numeric gradient with configuration.using_config('type_check', False): for i_in, (x, gx) in enumerate(six.moves.zip(inputs, grads)): orig_x = x.copy() # hold original value for i in numpy.ndindex(x.shape): iterate_single_input(i_in, x, orig_x, i) return [g.astype(x.dtype, copy=False) for g, x in six.moves.zip(grads, inputs)]
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--model', '-m', default='MLP', help='Choose the model: MLP or MLPSideEffect') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train if args.model == 'MLP': model = L.Classifier(train_mnist.MLP(args.unit, 10)) elif args.model == 'MLPSideEffect': model = L.Classifier(train_mnist.MLPSideEffect(args.unit, 10)) if args.gpu >= 0: # Make a speciied GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_count = len(train) test_count = len(test) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) if train_iter.is_new_epoch: print('epoch: ', train_iter.epoch) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 # It is good practice to turn off train mode during evaluation. with configuration.using_config('train', False): for batch in test_iter: x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('mlp.model', model) print('save the optimizer') serializers.save_npz('mlp.state', optimizer)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot using model ' 'and state files in the specified directory') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train model = L.Classifier(train_mnist.MLP(args.unit, 10)) if args.gpu >= 0: # Make a speciied GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) if args.resume: # Resume from a snapshot serializers.load_npz('{}/mlp.model'.format(args.resume), model) serializers.load_npz('{}/mlp.state'.format(args.resume), optimizer) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_count = len(train) test_count = len(test) with MultiprocessIterator(train, args.batchsize) as train_iter, \ MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False) as test_iter: sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() x, t = convert.concat_examples(batch, args.gpu) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t) sum_accuracy += float(model.accuracy.data) * len(t) if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): for batch in test_iter: x, t = convert.concat_examples(batch, args.gpu) loss = model(x, t) sum_loss += float(loss.data) * len(t) sum_accuracy += float(model.accuracy.data) * len(t) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('{}/mlp.model'.format(args.out), model) print('save the optimizer') serializers.save_npz('{}/mlp.state'.format(args.out), optimizer)
def __call__(self, x): with configuration.using_config('use_cudnn', self.use_cudnn): return super(_SingleArgumentFunctionWithCudnn, self).__call__(x)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot using model ' 'and state files in the specified directory') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = parse_device(args) print('Device: {}'.format(device)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train model = L.Classifier(train_mnist.MLP(args.unit, 10)) model.to_device(device) device.use() # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) if args.resume: # Resume from a snapshot serializers.load_npz('{}/mlp.model'.format(args.resume), model) serializers.load_npz('{}/mlp.state'.format(args.resume), optimizer) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_count = len(train) test_count = len(test) with SerialIterator(train, args.batchsize) as train_iter, \ SerialIterator( test, args.batchsize, repeat=False, shuffle=False) as test_iter: sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() x, t = convert.concat_examples(batch, device) optimizer.update(model, x, t) sum_loss += float(model.loss.array) * len(t) sum_accuracy += float(model.accuracy.array) * len(t) if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): for batch in test_iter: x, t = convert.concat_examples(batch, device) loss = model(x, t) sum_loss += float(loss.array) * len(t) sum_accuracy += float( model.accuracy.array) * len(t) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('{}/mlp.model'.format(args.out), model) print('save the optimizer') serializers.save_npz('{}/mlp.state'.format(args.out), optimizer)
def __init__(self, debug): warnings.warn('chainer.DebugMode is deprecated. ' 'Use chainer.using_config("debug", ...) instead.', DeprecationWarning) self._using = using_config('debug', debug)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--resume', '-r', type=str, help='Directory that has `vgg.model` and `vgg.state`') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.test: train = train[:200] test = test[:200] train_count = len(train) test_count = len(test) model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) if args.resume is not None: resume = args.resume if os.path.exists(resume): serializers.load_npz(os.path.join(resume, 'vgg.model'), model) serializers.load_npz(os.path.join(resume, 'vgg.state'), optimizer) else: raise ValueError( '`args.resume` ("{}") is specified,' ' but it does not exist.'.format(resume) ) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_acc = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() # Reduce learning rate by 0.5 every 25 epochs. if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch: optimizer.lr *= 0.5 print('Reducing learning rate to: {}'.format(optimizer.lr)) x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.array) * len(t) sum_acc += float(model.accuracy.array) * len(t) if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_acc / train_count)) sum_acc = 0 sum_loss = 0 # Enable evaluation mode. with configuration.using_config('train', False): # This is optional but can reduce computational overhead. with chainer.using_config('enable_backprop', False): for batch in test_iter: x, t = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x) t = chainer.Variable(t) loss = model(x, t) sum_loss += float(loss.array) * len(t) sum_acc += float(model.accuracy.array) * len(t) test_iter.reset() print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_acc / test_count)) sum_acc = 0 sum_loss = 0 # Save the model and the optimizer out = args.out if not os.path.exists(out): os.makedirs(out) print('save the model') serializers.save_npz(os.path.join(out, 'vgg.model'), model) print('save the optimizer') serializers.save_npz(os.path.join(out, 'vgg.state'), optimizer)
def __init__(self, debug): warnings.warn('chainer.DebugMode is deprecated. ' 'Use chainer.using_config("debug", ...) instead.', DeprecationWarning) self._using = using_config('debug', debug)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.test: train = train[:200] test = test[:200] train_count = len(train) test_count = len(test) model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) sum_accuracy = 0 sum_loss = 0 while train_iter.epoch < args.epoch: batch = train_iter.next() # Reduce learning rate by 0.5 every 25 epochs. if train_iter.epoch % 25 == 0 and train_iter.is_new_epoch: optimizer.lr *= 0.5 print('Reducing learning rate to: {}'.format(optimizer.lr)) x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) optimizer.update(model, x, t) sum_loss += float(model.loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) if train_iter.is_new_epoch: print('epoch: {}'.format(train_iter.epoch)) print('train mean loss: {}, accuracy: {}'.format( sum_loss / train_count, sum_accuracy / train_count)) # evaluation sum_accuracy = 0 sum_loss = 0 model.predictor.train = False # It is good practice to turn off train mode during evaluation. with configuration.using_config('train', False): for batch in test_iter: x_array, t_array = convert.concat_examples(batch, args.gpu) x = chainer.Variable(x_array) t = chainer.Variable(t_array) loss = model(x, t) sum_loss += float(loss.data) * len(t.data) sum_accuracy += float(model.accuracy.data) * len(t.data) test_iter.reset() model.predictor.train = True print('test mean loss: {}, accuracy: {}'.format( sum_loss / test_count, sum_accuracy / test_count)) sum_accuracy = 0 sum_loss = 0 # Save the model and the optimizer print('save the model') serializers.save_npz('mlp.model', model) print('save the optimizer') serializers.save_npz('mlp.state', optimizer)