def evaluate(self): iterator = self._iterators['main'] target = self._targets['main'] if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) in_values, out_values, rest_values = apply_to_iterator( self.predict_func, it) # delete unused iterators explicitly del in_values pred_labels, pred_scores = out_values gt_labels, = rest_values result = eval_multi_label_classification( pred_labels, pred_scores, gt_labels) report = {'map': result['map']} if self.label_names is not None: for l, label_name in enumerate(self.label_names): try: report['ap/{:s}'.format(label_name)] = result['ap'][l] except IndexError: report['ap/{:s}'.format(label_name)] = np.nan observation = {} with reporter.report_scope(observation): reporter.report(report, target) return observation
def __call__(self, trainer): """override method of extensions.Evaluator.""" # set up a reporter reporter = reporter_module.Reporter() if hasattr(self, 'name'): prefix = self.name + '/' else: prefix = '' for name, target in six.iteritems(self._targets): reporter.add_observer(prefix + name, target) reporter.add_observers(prefix + name, target.namedlinks(skipself=True)) with reporter: self.patch_image_dir = os.path.join(trainer.out, self.layer_name) if not os.path.exists(self.patch_image_dir): os.makedirs(self.patch_image_dir) result, locs, bounds = self.evaluate() outputdir = os.path.join(trainer.out, 'features') if not os.path.exists(outputdir): os.makedirs(outputdir) #print(bounds) #self.savetxt(os.path.join(trainer.out, self.layer_name + '.txt'), # features, delimiter='\t') #cupy.savez(os.path.join(trainer.out, self.layer_name + '.npz'), # **{self.layer_name: features}) if locs: self.save_tuple_list(os.path.join(outputdir, 'maxloc_' + self.layer_name + '.txt'), locs) if bounds: self.save_tuple_list(os.path.join(outputdir, 'maxbounds_' + self.layer_name + '.txt'), bounds) reporter_module.report(result) return result
def evaluate(self): bt = time.time() with chainer.no_backprop_mode(): references = [] hypotheses = [] observation = {} with reporter.report_scope(observation): for i in range(0, len(self.test_data), self.batch): src, trg = zip(*self.test_data[i:i + self.batch]) references.extend([[t.tolist()] for t in trg]) src = [chainer.dataset.to_device(self.device, x) for x in src] if self.comm.rank == 0: self.model.translate(src, self.max_length) elif self.comm.rank == 1: ys = [y.tolist() for y in self.model.translate( src, self.max_length)] hypotheses.extend(ys) if self.comm.rank == 1: bleu = bleu_score.corpus_bleu( references, hypotheses, smoothing_function=bleu_score. SmoothingFunction().method1) reporter.report({'bleu': bleu}, self.model) et = time.time() if self.comm.rank == 1: print("BleuEvaluator(single)::evaluate(): " "took {:.3f} [s]".format(et - bt)) sys.stdout.flush() return observation
def __call__(self, trainer=None): """Executes the evaluator extension. Unlike usual extensions, this extension can be executed without passing a trainer object. This extension reports the performance on validation dataset using the :func:`~chainer.report` function. Thus, users can use this extension independently from any trainer by manutally configuring a :class:`~chainer.Reporter` object. Args: trainer (~chainer.training.Trainer): Trainer object that invokes this extension. It can be omitted in case of calling this extension manually. Returns: dict: Result dictionary that contains mean statistics of values reported by the evaluation function. """ # set up a reporter reporter = reporter_module.Reporter() if hasattr(self, 'name'): prefix = self.name + '/' else: prefix = '' for name, target in six.iteritems(self._targets): reporter.add_observer(prefix + name, target) reporter.add_observers(prefix + name, target.namedlinks(skipself=True)) with reporter: result = self.evaluate() reporter_module.report(result) return result
def __call__(self, *inputs): xs = inputs[:len(inputs) // 2] ys = inputs[len(inputs) // 2:] xs = [x[::-1] for x in xs] batch = len(xs) eos = self.xp.zeros(1, self.xp.int32) ys_in = [F.concat([eos, y], axis=0) for y in ys] ys_out = [F.concat([y, eos], axis=0) for y in ys] eys = sequence_embed(self.embed_y, ys_in) # Receive hidden states from encoder process and decode. _, _, os, _ = self.mn_decoder(eys) # It is faster to concatenate data before calculating loss # because only one matrix multiplication is called. concat_os = F.concat(os, axis=0) concat_ys_out = F.concat(ys_out, axis=0) loss = F.sum(F.softmax_cross_entropy( self.W(concat_os), concat_ys_out, reduce='no')) / batch reporter.report({'loss': loss.data}, self) n_words = concat_ys_out.shape[0] perp = self.xp.exp(loss.data * batch / n_words) reporter.report({'perp': perp}, self) return loss
def __call__(self, *inputs): xs = inputs[:len(inputs) // 2] ys = inputs[len(inputs) // 2:] xs = [x[::-1] for x in xs] eos = self.xp.zeros(1, self.xp.int32) ys_in = [F.concat([eos, y], axis=0) for y in ys] ys_out = [F.concat([y, eos], axis=0) for y in ys] # Both xs and ys_in are lists of arrays. exs = sequence_embed(self.embed_x, xs) eys = sequence_embed(self.embed_y, ys_in) batch = len(xs) # None represents a zero vector in an encoder. hx, cx, _ = self.encoder(None, None, exs) _, _, os = self.decoder(hx, cx, eys) # It is faster to concatenate data before calculating loss # because only one matrix multiplication is called. concat_os = F.concat(os, axis=0) concat_ys_out = F.concat(ys_out, axis=0) loss = F.sum(F.softmax_cross_entropy( self.W(concat_os), concat_ys_out, reduce='no')) / batch reporter.report({'loss': loss.data}, self) n_words = concat_ys_out.shape[0] perp = self.xp.exp(loss.data * batch / n_words) reporter.report({'perp': perp}, self) return loss
def __call__(self, x, t): h1 = F.relu(self.l1(x)) y = self.l2(h1) loss = F.mean_squared_error(y, t) reporter.report({'loss': loss}, self) pred_list.append(y.data[0]) return loss
def evaluate(self): iterator = self._iterators['main'] target = self._targets['main'] if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) in_values, out_values, rest_values = apply_to_iterator( target.predict, it) # delete unused iterators explicitly del in_values points, labels, scores = out_values gt_points, gt_labels = rest_values result = eval_projected_3d_bbox_single( points, scores, gt_points, self.vertex, self.intrinsics, diam=self.diam) report = result observation = {} with reporter.report_scope(observation): reporter.report(report, target) return observation
def __call__(self, *args): """Computes the loss value for an input and label pair. It also computes accuracy and stores it to the attribute. Args: args (list of ~chainer.Variable): Input minibatch. The all elements of ``args`` but last one are features and the last element corresponds to ground truth labels. It feeds features to the predictor and compare the result with ground truth labels. Returns: ~chainer.Variable: Loss value. """ assert len(args) >= 2 x = args[:-1] t = args[-1] self.y = None self.loss = None self.accuracy = None self.y = self.predictor(*x) self.loss = self.lossfun(self.y, t) reporter.report({'loss': self.loss}, self) if self.compute_accuracy: self.accuracy = self.accfun(self.y, t) reporter.report({'accuracy': self.accuracy}, self) return self.loss
def __call__(self, x, t): y = self.predictor(x) loss = self.lossfun(y, t) reporter.report({'loss': loss}, self) if self.accfun is not None: accuracy = self.accfun(y, t) reporter.report({'accuracy': accuracy}, self) return loss
def __call__(self, x, context): e = self.embed(context) shape = e.data.shape x = F.broadcast_to(x[:, None], (shape[0], shape[1])) e = F.reshape(e, (shape[0] * shape[1], shape[2])) x = F.reshape(x, (shape[0] * shape[1],)) loss = self.loss_func(e, x) reporter.report({'loss': loss}, self) return loss
def __call__(self, *args): x, t, ignore = args[:3] self.y = None self.loss = None self.pre_rec = None self.y = self.predictor(x) self.loss = self.lossfun(self.y, t, ignore) reporter.report({'loss': self.loss}, self) return self.loss
def __call__(self, x, contexts): e = self.embed(contexts) batch_size, n_context, n_units = e.shape x = F.broadcast_to(x[:, None], (batch_size, n_context)) e = F.reshape(e, (batch_size * n_context, n_units)) x = F.reshape(x, (batch_size * n_context,)) loss = self.loss_func(e, x) reporter.report({'loss': loss}, self) return loss
def forward(self, xs, ys): concat_outputs = self.predict(xs) concat_truths = F.concat(ys, axis=0) loss = F.softmax_cross_entropy(concat_outputs, concat_truths) accuracy = F.accuracy(concat_outputs, concat_truths) reporter.report({'loss': loss.data}, self) reporter.report({'accuracy': accuracy.data}, self) return loss
def forward(self, *args, **kwargs): """Computes the loss value for an input and label pair. It also computes accuracy and stores it to the attribute. Args: args (list of ~chainer.Variable): Input minibatch. kwargs (dict of ~chainer.Variable): Input minibatch. When ``label_key`` is ``int``, the correpoding element in ``args`` is treated as ground truth labels. And when it is ``str``, the element in ``kwargs`` is used. The all elements of ``args`` and ``kwargs`` except the ground trush labels are features. It feeds features to the predictor and compare the result with ground truth labels. .. note:: We set ``None`` to the attributes ``y``, ``loss`` and ``accuracy`` each time before running the predictor, to avoid unnecessary memory consumption. Note that the variables set on those attributes hold the whole computation graph when they are computed. The graph stores interim values on memory required for back-propagation. We need to clear the attributes to free those values. Returns: ~chainer.Variable: Loss value. """ if isinstance(self.label_key, int): if not (-len(args) <= self.label_key < len(args)): msg = 'Label key %d is out of bounds' % self.label_key raise ValueError(msg) t = args[self.label_key] if self.label_key == -1: args = args[:-1] else: args = args[:self.label_key] + args[self.label_key + 1:] elif isinstance(self.label_key, str): if self.label_key not in kwargs: msg = 'Label key "%s" is not found' % self.label_key raise ValueError(msg) t = kwargs[self.label_key] del kwargs[self.label_key] self.y = None self.loss = None self.accuracy = None self.y = self.predictor(*args, **kwargs) self.loss = self.lossfun(self.y, t) reporter.report({'loss': self.loss}, self) if self.compute_accuracy: self.accuracy = self.accfun(self.y, t) reporter.report({'accuracy': self.accuracy}, self) return self.loss
def __call__(self, x, context): e = self.embed(context) shape = e.shape x = F.broadcast_to(x[:, None], (shape[0], shape[1])) e = F.reshape(e, (shape[0] * shape[1], shape[2])) x = F.reshape(x, (shape[0] * shape[1],)) loss = self.loss_func(e, x) # shouldn't we divide loss by batch size? reporter.report({'loss': loss}, self) return loss
def __call__(self, x, context): x = F.broadcast_to(x[:, None], (context.shape[0], context.shape[1])) x = F.reshape(x, (context.shape[0] * context.shape[1],)) context = context.reshape((context.shape[0] * context.shape[1])) e = self.rnn.charRNN(context) loss = self.loss_func(e, x) reporter.report({'loss': loss}, self) return loss
def __call__(self, trainer): """Execute the statistics extension. Collect statistics for the current state of parameters. Note that this method will merely update its statistic summary, unless the internal trigger is fired. If the trigger is fired, the summary will also be reported and then reset for the next accumulation. Args: trainer (~chainer.training.Trainer): Associated trainer that invoked this extension. """ statistics = {} for link in self._links: link_name = getattr(link, 'name', 'None') for param_name, param in link.namedparams(): for attr_name in self._attrs: for function_name, function in \ six.iteritems(self._statistics): # Get parameters as a flattened one-dimensional array # since the statistics function should make no # assumption about the axes params = getattr(param, attr_name).ravel() if (self._skip_nan_params and ( backend.get_array_module(params).isnan(params) .any())): value = numpy.nan else: value = function(params) key = self.report_key_template.format( prefix=self._prefix + '/' if self._prefix else '', link_name=link_name, param_name=param_name, attr_name=attr_name, function_name=function_name ) if (isinstance(value, chainer.get_array_types()) and value.size > 1): # Append integer indices to the keys if the # statistic function return multiple values statistics.update({'{}/{}'.format(key, i): v for i, v in enumerate(value)}) else: statistics[key] = value self._summary.add(statistics) if self._trigger(trainer): reporter.report(self._summary.compute_mean()) self._summary = reporter.DictSummary() # Clear summary
def __call__(self, x, context): context_shape = context.shape context = context.reshape((context.shape[0] * context.shape[1])) e = self.rnn.charRNN(context) e = F.reshape(e, (context_shape[0], context_shape[1], e.shape[1])) h = F.sum(e, axis=1) * (1. / context_shape[1]) loss = self.loss_func(h, x) reporter.report({'loss': loss}, self) return loss
def __call__(self, trainer): observation = trainer.observation if not (self._numerator_key in observation and self._denominator_key in observation): return self._numerator += observation[self._numerator_key] self._denominator += observation[self._denominator_key] if self._trigger(trainer): result = float(self._numerator) / self._denominator self._numerator = 0 self._denominator = 0 reporter.report({self._result_key: result})
def predict(self, xs): # Encoding logits, exs = self._encode(xs) # Discretization D = F.gumbel_softmax(logits, self.tau, axis=2) gumbel_output = D.reshape(-1, self.M * self.K) with chainer.no_backprop_mode(): maxp = F.mean(F.max(D, axis=2)) reporter.report({'maxp': maxp.data}, self) # Decoding y_hat = self._decode(gumbel_output) return y_hat, exs
def __call__(self, *args, **kwargs): """Computes the loss value for an input and label pair. It also computes accuracy and stores it to the attribute. Args: args (list of ~chainer.Variable): Input minibatch. kwargs (dict of ~chainer.Variable): Input minibatch. When ``label_key`` is ``int``, the correpoding element in ``args`` is treated as ground truth labels. And when it is ``str``, the element in ``kwargs`` is used. The all elements of ``args`` and ``kwargs`` except the ground trush labels are features. It feeds features to the predictor and compare the result with ground truth labels. Returns: ~chainer.Variable: Loss value. """ if isinstance(self.label_key, int): if not (-len(args) <= self.label_key < len(args)): msg = 'Label key %d is out of bounds' % self.label_key raise ValueError(msg) t = args[self.label_key] if self.label_key == -1: args = args[:-1] else: args = args[:self.label_key] + args[self.label_key + 1:] elif isinstance(self.label_key, str): if self.label_key not in kwargs: msg = 'Label key "%s" is not found' % self.label_key raise ValueError(msg) t = kwargs[self.label_key] del kwargs[self.label_key] self.y = None self.loss = None self.accuracy = None self.y = self.predictor(*args, **kwargs) self.loss = self.lossfun(self.y, t) reporter.report({'loss': self.loss}, self) if self.compute_accuracy: self.accuracy = self.accfun(self.y, t) reporter.report({'accuracy': self.accuracy}, self) return self.loss
def __call__(self, x, context): x = F.broadcast_to(x[:, None], (context.shape[0], context.shape[1])) x = F.reshape(x, (context.shape[0] * context.shape[1],)) if args.subword == 'rnn': context = context.reshape((context.shape[0] * context.shape[1])) e = self.rnn.charRNN(context) if args.subword == 'none': e = self.embed(context) e = F.reshape(e, (e.shape[0] * e.shape[1], e.shape[2])) loss = self.loss_func(e, x) reporter.report({'loss': loss}, self) return loss
def __call__(self, trainer): with chainer.no_backprop_mode(): references = [] hypotheses = [] for i in range(0, len(self.test_data), self.batch): sources, targets = zip(*self.test_data[i:i + self.batch]) references.extend([[t.tolist()] for t in targets]) sources = [ chainer.dataset.to_device(self.device, x) for x in sources] ys = [y.tolist() for y in self.model.translate(sources, self.max_length)] hypotheses.extend(ys) bleu = bleu_score.corpus_bleu( references, hypotheses, smoothing_function=bleu_score.SmoothingFunction().method1) reporter.report({self.key: bleu})
def __call__(self, x, t): """Computes the loss value for an image and label pair. Args: x (~chainer.Variable): A variable with a batch of images. t (~chainer.Variable): A variable with the ground truth image-wise label. Returns: ~chainer.Variable: Loss value. """ self.y = self.predictor(x) self.loss = F.softmax_cross_entropy( self.y, t, class_weight=self.class_weight, ignore_label=self.ignore_label) reporter.report({'loss': self.loss}, self) return self.loss
def evaluate(self): iterator = self._iterators['main'] target = self._targets['main'] if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) in_values, out_values, rest_values = apply_to_iterator( target.predict, it) # delete unused iterators explicitly del in_values pred_bboxes, pred_labels, pred_scores = out_values if len(rest_values) == 3: gt_bboxes, gt_labels, gt_difficults = rest_values elif len(rest_values) == 2: gt_bboxes, gt_labels = rest_values gt_difficults = None result = eval_detection_voc( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=self.use_07_metric) report = {'map': result['map']} if self.label_names is not None: for l, label_name in enumerate(self.label_names): try: report['ap/{:s}'.format(label_name)] = result['ap'][l] except IndexError: report['ap/{:s}'.format(label_name)] = np.nan observation = {} with reporter.report_scope(observation): reporter.report(report, target) return observation
def __call__(self, xs, ys): # Before making a transpose, you need to sort two lists in descending # order of length. inds = numpy.argsort([-len(x) for x in xs]).astype('i') xs = [xs[i] for i in inds] ys = [ys[i] for i in inds] # Make transposed sequences. # Now xs[t] is a batch of words at time t. xs = F.transpose_sequence(xs) ys = F.transpose_sequence(ys) # h[i] is feature vector for each batch of words. hs = [self.feature(x) for x in xs] loss = self.crf(hs, ys) reporter.report({'loss': loss.data}, self) # To predict labels, call argmax method. _, predict = self.crf.argmax(hs) correct = 0 total = 0 for y, p in six.moves.zip(ys, predict): correct += self.xp.sum(y.data == p) total += len(y.data) reporter.report({'correct': correct}, self) reporter.report({'total': total}, self) return loss
def forward(self, imgs, captions): """Batch of images to a single loss.""" imgs = Variable(imgs) if self.finetune_feat_extractor: img_feats = self.feat_extractor(imgs) else: # Extract features with the `train` configuration set to `False` in # order to basically skip the dropout regularizations. This is how # dropout is used during standard inference. Also, since we are not # going to optimize the feature extractor, we explicitly set the # backpropgation mode to not construct any computational graphs. with chainer.using_config('train', False), \ chainer.no_backprop_mode(): img_feats = self.feat_extractor(imgs) loss = self.lang_model(img_feats, captions) # Report the loss so that it can be printed, logged and plotted by # other trainer extensions reporter.report({'loss': loss}, self) return loss
def __call__(self, trainer): """override method of extensions.Evaluator.""" # set up a reporter reporter = reporter_module.Reporter() if hasattr(self, 'name'): prefix = self.name + '/' else: prefix = '' for name, target in six.iteritems(self._targets): reporter.add_observer(prefix + name, target) reporter.add_observers(prefix + name, target.namedlinks(skipself=True)) with reporter: result, predictions, rankings = self.evaluate() #print(result) #print(predictions) self.save_predictions(os.path.join(trainer.out, 'pred.txt'), predictions) self.save_predictions(os.path.join(trainer.out, 'ranking.txt'), rankings) reporter_module.report(result) return result
def __call__(self, trainer): """override method of extensions.Evaluator.""" # set up a reporter reporter = reporter_module.Reporter() if hasattr(self, 'name'): prefix = self.name + '/' else: prefix = '' for name, target in six.iteritems(self._targets): reporter.add_observer(prefix + name, target) reporter.add_observers(prefix + name, target.namedlinks(skipself=True)) with reporter: result, features = self.evaluate() outputdir = os.path.join(trainer.out, 'features') if not os.path.exists(outputdir): os.makedirs(outputdir) #ioutil.savetxt(os.path.join(trainer.out, self.layer_name + '.txt'), # features, delimiter='\t') #cupy.savez(os.path.join(trainer.out, self.layer_name + '.npz'), # **{self.layer_name: features}) if self.save_features: xp = cuda.get_array_module(features) xp.save(os.path.join(outputdir, self.layer_name + '.npy'), features) if self.top is not None: top_N_args = Vutil.get_argmax_N(features, self.top) #print(top_N_args) ioutil.savetxt(os.path.join(outputdir, 'top_' + self.layer_name + '.txt'), top_N_args, fmt='%d', delimiter='\t') #np.savez(os.path.join(trainer.out, # 'top_' + self.layer_name + '.npz'), # **{self.layer_name: top_N_args}) reporter_module.report(result) return result
def calc_score(self, df_truth, pred): target_types = list(set(df_truth['type'])) diff = df_truth['scalar_coupling_constant'] - pred scores = 0 metrics = {} for target_type in target_types: target_pair = df_truth['type'] == target_type score_exp = np.mean(np.abs(diff[target_pair])) scores += np.log(score_exp) metrics[target_type] = np.log(score_exp) metrics['ALL_LogMAE'] = scores / len(target_types) observation = {} with reporter.report_scope(observation): reporter.report(metrics, self._targets['main']) return observation
def forward(self, **indata): imgs = indata['image'] y = self.center_detector(imgs) loss, hm_loss, wh_loss, offset_loss, detail_losses = center_detection_loss( y, indata, self.hm_weight, self.wh_weight, self.offset_weight, comm=self.comm) hm = y[-1]["hm"] hm_mae = F.mean_absolute_error(hm, indata["hm"]) reporter.report( { 'loss': loss, 'hm_loss': hm_loss, 'hm_pos_loss': detail_losses['hm_pos_loss'], 'hm_neg_loss': detail_losses['hm_neg_loss'], 'hm_mae': hm_mae, 'wh_loss': wh_loss, 'offset_loss': offset_loss }, self) return loss
def __call__(self, trainer): with chainer.no_backprop_mode(): references = [] hypotheses = [] for i in range(0, len(self.test_data[0:100]), self.batch): sources, targets = zip(*self.test_data[i:i + self.batch]) references.extend([[t[0].tolist()] for t in targets]) sources = [ chainer.dataset.to_device(self.device, x) for x in sources ] ys = self.model.translate(sources, self.max_length) ys = [y.tolist() for y in ys] hypotheses.extend(ys) source, target = zip(*self.test_data[0:100]) loss = self.model.CalculateValLoss(source, target) bleu = bleu_score.corpus_bleu( references, hypotheses, smoothing_function=bleu_score.SmoothingFunction().method1) reporter.report({self.key[0]: bleu}) reporter.report({self.key[1]: loss})
def __call__(self, x, labels): x = BatchTransform(self.model.mean)(x) x = self.xp.array(x) scores = self.model(x) B, n_class = scores.shape[:2] one_hot_labels = self.xp.zeros((B, n_class), dtype=np.int32) for i, label in enumerate(labels): one_hot_labels[i, label] = 1 # sigmoid_cross_entropy normalizes the loss # by the size of batch and the number of classes. # It works better to remove the normalization factor # of the number of classes. loss = self.loss_scale * F.sigmoid_cross_entropy( scores, one_hot_labels) result = calc_prec_and_recall(scores, labels) reporter.report({'loss': loss}, self) reporter.report({'recall': result['recall']}, self) reporter.report({'precision': result['precision']}, self) reporter.report({'n_pred': result['n_pred']}, self) reporter.report({'n_pos': result['n_pos']}, self) return loss
def __call__(self, x): chainer.global_config.dtype = numpy.float64 q_z = self.encoder(x) z = q_z.sample(self.k) p_x = self.decoder(z, n_batch_axes=2) p_z = self.prior() reconstr = F.mean( p_x.log_prob(F.broadcast_to(x[None, :], (self.k, ) + x.shape))) kl_penalty = F.mean(q_z.log_prob(z) - p_z.log_prob(z)) loss = -(reconstr - self.beta * kl_penalty) reporter.report({'loss': loss}, self) reporter.report({'reconstr': reconstr}, self) reporter.report({'kl_penalty': kl_penalty}, self) reporter.report({'beta': self.beta}, self) return loss
def forward(self, x, t): xp = cuda.get_array_module(x) y = self.predictor(x) log_softmax = F.log_softmax(y) # SelectItem is not supported by onnx-chainer. # TODO(hamaji): Support it? # log_prob = F.select_item(log_softmax, t) batch_size = chainer.Variable(xp.array(t.size, xp.float32), name='batch_size') self.extra_inputs = [batch_size] # TODO(hamaji): Currently, F.sum with axis=1 cannot be # backpropped properly. # log_prob = F.sum(log_softmax * t, axis=1) # return -F.sum(log_prob, axis=0) / self.batch_size log_prob = F.sum(log_softmax * t, axis=(0, 1)) loss = -log_prob / batch_size reporter.report({'loss': loss}, self) if self.compute_accuracy: acc = accuracy.accuracy(y, xp.argmax(t, axis=1)) reporter.report({'accuracy': acc}, self) loss.name = 'loss' return loss
def __call__(self, x, t): self.y = self.predictor(x) if chainer.config.train: self.aux_loss = F.softmax_cross_entropy( self.y[0], t, class_weight=self.class_weight, ignore_label=self.ignore_label) self.loss = F.softmax_cross_entropy(self.y[1], t, class_weight=self.class_weight, ignore_label=self.ignore_label) reporter.report({'loss': (self.aux_loss * 0.4) + self.loss}, self) return (self.aux_loss * 0.4) + self.loss else: self.loss = F.softmax_cross_entropy(self.y, t, class_weight=self.class_weight, ignore_label=self.ignore_label) reporter.report({'loss': self.loss}, self) return self.loss
def __call__(self, x): batchsize = x.shape[0] logw = self.compute_logw(x) # IWAE = log (1/k) sum_i w_i logp = F.logsumexp(logw, axis=0) - math.log(self.num_zsamples) logp_mean = F.sum(logp) / batchsize obj = -logp_mean # Variance computation obj_c = logp - F.broadcast_to(logp_mean, logp.shape) obj_var = F.sum(obj_c * obj_c) / (batchsize - 1) obj_elbo = -self.compute_elbo(logw) reporter.report({ 'obj': obj, 'obj_var': obj_var, 'obj_elbo': obj_elbo }, self) return obj
def __call__(self, xs, ilens, ys): """E2E forward :param xs: :param ilens: :param ys: :return: """ # 1. encoder hs, ilens = self.enc(xs, ilens) # 3. CTC loss if self.mtlalpha == 0: loss_ctc = None else: loss_ctc = self.ctc(hs, ys) # 4. attention loss if self.mtlalpha == 1: loss_att = None acc = None else: loss_att, acc = self.dec(hs, ys) self.acc = acc alpha = self.mtlalpha if alpha == 0: self.loss = loss_att elif alpha == 1: self.loss = loss_ctc else: self.loss = alpha * loss_ctc + (1 - alpha) * loss_att if self.loss.data < CTC_LOSS_THRESHOLD and not math.isnan( self.loss.data): reporter.report({'loss_ctc': loss_ctc}, self) reporter.report({'loss_att': loss_att}, self) reporter.report({'acc': acc}, self) logging.info('mtl loss:' + str(self.loss.data)) reporter.report({'loss': self.loss}, self) else: logging.warning('loss (=%f) is not correct', self.loss.data) if self.flag_return: return self.loss, loss_ctc, loss_att, acc else: return self.loss
def forward(self, *inputs): batch = len(inputs) // 6 lefts = inputs[0:batch] rights = inputs[batch:batch * 2] dests = inputs[batch * 2:batch * 3] labels = inputs[batch * 3:batch * 4] sequences = inputs[batch * 4:batch * 5] leaf_labels = inputs[batch * 5:batch * 6] inds = numpy.argsort([-len(l) for l in lefts]) # Sort all arrays in descending order and transpose them lefts = F.transpose_sequence([lefts[i] for i in inds]) rights = F.transpose_sequence([rights[i] for i in inds]) dests = F.transpose_sequence([dests[i] for i in inds]) labels = F.transpose_sequence([labels[i] for i in inds]) sequences = F.transpose_sequence([sequences[i] for i in inds]) leaf_labels = F.transpose_sequence([leaf_labels[i] for i in inds]) batch = len(inds) maxlen = len(sequences) loss = 0 count = 0 correct = 0 stack = self.xp.zeros((batch, maxlen * 2, self.n_units), self.xp.float32) for i, (word, label) in enumerate(zip(sequences, leaf_labels)): batch = word.shape[0] es = self.leaf(word) ds = self.xp.full((batch, ), i, self.xp.int32) y = self.label(es) loss += F.softmax_cross_entropy(y, label, normalize=False) * batch count += batch predict = self.xp.argmax(y.array, axis=1) correct += (predict == label.array).sum() stack = thin_stack.thin_stack_set(stack, ds, es) for left, right, dest, label in zip(lefts, rights, dests, labels): l, stack = thin_stack.thin_stack_get(stack, left) r, stack = thin_stack.thin_stack_get(stack, right) o = self.node(l, r) y = self.label(o) batch = l.shape[0] loss += F.softmax_cross_entropy(y, label, normalize=False) * batch count += batch predict = self.xp.argmax(y.array, axis=1) correct += (predict == label.array).sum() stack = thin_stack.thin_stack_set(stack, dest, o) loss /= count reporter.report({'loss': loss}, self) reporter.report({'total': count}, self) reporter.report({'correct': correct}, self) return loss
def forward(self, *args, **kwargs): if isinstance(self.label_key, int): if not (-len(args) <= self.label_key < len(args)): msg = 'Label key %d is out of bounds' % self.label_key raise ValueError(msg) t = args[self.label_key] if self.label_key == -1: args = args[:-1] else: args = args[:self.label_key] + args[self.label_key + 1:] elif isinstance(self.label_key, str): if self.label_key not in kwargs: msg = 'Label key "%s" is not found' % self.label_key raise ValueError(msg) t = kwargs[self.label_key] del kwargs[self.label_key] else: raise ValueError("Invalid type: label_key") # y_t = args[1] with chainer.using_config('train', False), \ chainer.using_config('enable_backprop', False): _, g_t = self.teacher(args[0]) y_s, g_s = self.predictor(args[0], **kwargs) attention_pair = [('res2', 'fire3'), ('res3', 'fire5'), ('res4', 'fire9')] loss_at = [ at_loss(g_t[t_layer], g_s[s_layer]) for t_layer, s_layer in attention_pair ] self.loss = self.lossfun_hard(y_s, t) + self.beta / 2 * sum(loss_at) self.y = y_s reporter.report({'loss': self.loss}, self) if self.compute_accuracy: self.accuracy = self.accfun(self.y, t) reporter.report({'accuracy': self.accuracy}, self) return self.loss
def update_core(self): batch = self._iterators['main'].next() in_arrays = self.converter(batch, self.device) x_data = in_arrays batchsize = x_data.shape[0] z = Variable( cuda.cupy.random.normal(size=(batchsize, self.z_dim, 1, 1), dtype=np.float32)) global x_gen x_gen = self.gen(z) # concatしないままdisに通すと、bnが悪さをする x = F.concat((x_gen, x_data), 0) y = self.dis(x) y_gen, y_data = F.split_axis(y, 2, 0) # sigmoid_cross_entropy(x, 0) == softplus(x) # sigmoid_cross_entropy(x, 1) == softplus(-x) loss_gen = F.sum(F.softplus(-y_gen)) loss_data = F.sum(F.softplus(y_data)) loss = (loss_gen + loss_data) / batchsize for optimizer in self._optimizers.values(): optimizer.target.cleargrads() # compute gradients all at once loss.backward() for optimizer in self._optimizers.values(): optimizer.update() reporter.report({ 'loss': loss, 'loss_gen': loss_gen / batchsize, 'loss_data': loss_data / batchsize })
def evaluate(self): bt = time.time() with chainer.no_backprop_mode(): references = [] hypotheses = [] observation = {} with reporter.report_scope(observation): for i in range(0, len(self.test_data), self.batch): src, trg = zip(*self.test_data[i:i + self.batch]) references.extend([[t.tolist()] for t in trg]) src = [chainer.dataset.to_device(self.device, x) for x in src] ys = [y.tolist() for y in self.model.translate(src, self.max_length)] hypotheses.extend(ys) bleu = bleu_score.corpus_bleu( references, hypotheses, smoothing_function=bleu_score.SmoothingFunction().method1) reporter.report({'bleu': bleu}, self.model) et = time.time() if self.comm is not None: # This evaluator is called via chainermn.MultiNodeEvaluator for i in range(0, self.comm.mpi_comm.size): print("BleuEvaluator::evaluate(): " "took {:.3f} [s]".format(et - bt)) sys.stdout.flush() self.comm.mpi_comm.Barrier() else: # This evaluator is called from a conventional # Chainer exntension print("BleuEvaluator(single)::evaluate(): " "took {:.3f} [s]".format(et - bt)) sys.stdout.flush() return observation
def __call__(self, x, t): # 学習対象のモデルでまず推論を行う y = self.predictor(x) t = cuda.to_cpu(t) for idx, pix_val in enumerate(num_labels): t = np.where(t == pix_val, int(idx), t) #t = to_categorical(t) # 5クラス分類の誤差を計算 # t = t.transpose(2, 0, 1) t = np.array(t, dtype='int32') #print('t:', t.shape) t = cuda.to_gpu(t) #print("y:", y.shape) # chainerではsoftmax_cross_entorpyのtとして正解ラベルのint型インデックス番号を与えている loss = F.softmax_cross_entropy(y, t) # 予測結果(0~1の連続値を持つグレースケール画像)を二値化し, # ChainerCVのeval_semantic_segmentation関数に正解ラベルと # 共に渡して各種スコアを計算 #y, t = cuda.to_cpu(F.sigmoid(y).data), cuda.to_cpu(t) #y = np.asarray(y > 0.5, dtype=np.int32) #y, t = y[:, :, ...], t[:, :, ...] #evals = evaluations.eval_semantic_segmentation(y, t) # 学習中のログに出力 reporter.report( {'loss': loss}, # 'miou': evals['miou'], # 'pa': evals['pixel_accuracy']}, self) return loss
def __call__(self, x, t): h, t1 = self.calc(x) cls_loss = F.softmax_cross_entropy(h, t) reporter.report({'cls_loss': cls_loss}, self) loss = cls_loss # Enforce the transformation as orthogonal matrix trans_loss1 = self.trans_lam1 * calc_trans_loss(t1) reporter.report({'trans_loss': trans_loss1}, self) loss = loss + trans_loss1 reporter.report({'loss': loss}, self) if self.compute_accuracy: acc = F.accuracy(h, t) reporter.report({'accuracy': acc}, self) return loss
def __call__(self, x, t): h, t1, t2 = self.calc(x) loss = functions.mean_squared_error(h, t) reporter.report({'loss': loss}, self) # Enforce the transformation as orthogonal matrix if self.trans and self.trans_lam1 >= 0: trans_loss1 = self.trans_lam1 * calc_trans_loss(t1) reporter.report({'trans_loss1': trans_loss1}, self) loss = loss + trans_loss1 if self.trans and self.trans_lam2 >= 0: trans_loss2 = self.trans_lam2 * calc_trans_loss(t2) reporter.report({'trans_loss2': trans_loss2}, self) loss = loss + trans_loss2 reporter.report({'loss': loss}, self) return loss
def report_scores(self, y, t): with chainer.no_backprop_mode(): if self.nested_label: dice = mean_dice_coefficient(dice_coefficient(y[:, 0:2, ...], t[:, 0, ...])) for i in range(1, t.shape[1]): dices = dice_coefficient(y[:, 2 * i:2 * (i + 1), ...], t[:, i, ...]) dice = F.concat((dice, mean_dice_coefficient(dices)), axis=0) else: dice = dice_coefficient(y, t) mean_dice = mean_dice_coefficient(dice) if self.nested_label: b, c, h, w, d = t.shape y = F.reshape(y, (b, 2, h * c, w, d)) t = F.reshape(t, (b, h * c, w, d)) accuracy = F.accuracy(y, t) reporter.report({ 'acc': accuracy, 'mean_dc': mean_dice }) xp = cuda.get_array_module(y) for i in range(len(dice)): if not xp.isnan(dice.data[i]): reporter.report({'dc_{}'.format(i): dice[i]})
def hidden_layer(self, word_embed, section, tags, index): """ + xs: word embeddings of sentences + ts: gold labels + section: sentence boundry + index: index of each word """ xs = F.split_axis(word_embed, section, axis=0) _, __, ys = self.bi_word(None, None, xs) ysl = self.l(F.concat(ys, axis=0)) ysl = F.split_axis(ysl, section, 0) inds = xp.argsort(xp.array([-x.shape[0] for x in ysl]).astype('i')) ysdes = permutate_list(ysl, inds, inv=False) batch_ts = tags[index[:, 0]] ts = F.split_axis(batch_ts, section, 0) tsdes = permutate_list(ts, inds, inv=False) ysdes = F.transpose_sequence(ysdes) tsdes = F.transpose_sequence(tsdes) loss = self.crf(ysdes, tsdes) reporter.report({'loss': loss}, self) _, predicts = self.crf.argmax(ysdes) predicts = F.transpose_sequence(predicts) predicts = permutate_list(predicts, inds, inv=True) concat_predicts = F.concat(predicts, axis=0) correct = self.xp.sum(batch_ts == concat_predicts.data) accuracy = correct * 1.0 / batch_ts.shape[0] reporter.report({'accuracy': accuracy}, self) return accuracy, loss, concat_predicts, ys
def __call__(self, *args, **kwargs): if isinstance(self.label_key, int): if not (-len(args) <= self.label_key < len(args)): msg = 'Label key %d is out of bounds' % self.label_key raise ValueError(msg) t = args[self.label_key] if self.label_key == -1: args = args[:-1] else: args = args[:self.label_key] + args[self.label_key + 1:] elif isinstance(self.label_key, str): if self.label_key not in kwargs: msg = 'Label key "%s" is not found' % self.label_key raise ValueError(msg) t = kwargs[self.label_key] del kwargs[self.label_key] self.ah = None self.y = None self.attention = None self.loss = None self.accuracy = None self.ah, self.y, self.attention = self.predictor(*args, **kwargs) if not chainer.config.train: self.loss = self.lossfun(self.y, t) else: att_loss = self.lossfun(self.ah, t) per_loss = self.lossfun(self.y, t) self.loss = att_loss + per_loss reporter.report({'loss': self.loss}, self) if self.compute_accuracy: self.accuracy = self.accfun(self.y, t) reporter.report({'accuracy': self.accuracy}, self) return self.loss
def __call__(self, loc, val, y, train=True): bs = val.data.shape[0] ret = self.forward(loc, val, y, train=train) pred, kld0, kld1, kldg, kldi, hypg, hypi = ret # Compute MSE loss mse = F.mean_squared_error(pred, y) rmse = F.sqrt(mse) # Only used for reporting # Now compute the total KLD loss kldt = kld0 * self.lambda0 + kld1 * self.lambda1 kldt += kldg + kldi + hypg + hypi # Total loss is MSE plus regularization losses loss = mse + kldt * (1.0 / self.total_nobs) # Log the errors logs = {'loss': loss, 'rmse': rmse, 'kld0': kld0, 'kld1': kld1, 'kldg': kldg, 'kldi': kldi, 'hypg': hypg, 'hypi': hypi, 'hypglv': F.sum(self.hyper_feat_lv_vec.b), 'hypilv': F.sum(self.hyper_feat_delta_lv.b), 'kldt': kldt, 'bias': F.sum(self.bias_mu.b)} reporter.report(logs, self) return loss
def forward(self, xs1, xs2, xs3, ys, train=True): with chainer.using_config('debug', self.debug): # initialization batch_size = len(ys) direction = F.reshape(F.concat(ys, axis=0), (batch_size, 1)) label = F.concat(self.xp.array(self.xp.array(ys, self.xp.int32) < 0., self.xp.int32), axis=0) # calculate ranking score each pair f1 = self.encoder(xs1, xs2) f2 = self.encoder(xs1, xs3) # reflect direction of higher or lower ranking ps = (f1 - f2) * direction x = F.concat([f1, f2, ps], axis=1) loss = F.sum(F.relu(1 - ps), axis=0)[0] accuracy = F.accuracy(F.concat([f1, f2], axis=1), label) if train: reporter.report({'loss': loss}, self) reporter.report({'accuracy': accuracy}, self) return loss else: return loss, accuracy
def __call__(self, x_STF, y_STF): """ # Param - X_STF (Variable: (S, T, F)) - y_STF (Variable: (S, T, F)) S: samples T: time_steps F: features # Return - loss (Variable: (1, )) """ seq_len = x_STF.shape[0] # add losses loss = 0 for t in range(seq_len): pred = self.predictor(x_STF[t].reshape(1, -1, 1)) obs = y_STF[t] loss += self.lossfun(pred, obs) loss /= seq_len reporter.report({'loss': loss}, self) return loss
def __call__(self, trainer=None): """Executes the evaluator extension. Unlike usual extensions, this extension can be executed without passing a trainer object. This extension reports the performance on validation dataset using the :func:`~chainer.report` function. Thus, users can use this extension independently from any trainer by manually configuring a :class:`~chainer.Reporter` object. Args: trainer (~chainer.training.Trainer): Trainer object that invokes this extension. It can be omitted in case of calling this extension manually. Returns: dict: Result dictionary that contains mean statistics of values reported by the evaluation function. """ # set up a reporter reporter = reporter_module.Reporter() if self.name is not None: prefix = self.name + '/' else: prefix = '' for name, target in six.iteritems(self._targets): reporter.add_observer(prefix + name, target) reporter.add_observers(prefix + name, target.namedlinks(skipself=True)) with reporter: with configuration.using_config('train', False): result = self.evaluate() reporter_module.report(result) return result
def update_core(self): batch = self._iterators['main'].next() in_arrays = self.converter(batch, self.device) x_data = in_arrays batchsize = x_data.shape[0] z = x_data y_gen, x_gen = self.gen.sub(z) loss_gen = F.mean_squared_error(x_gen, y_gen) loss = loss_gen / batchsize for optimizer in self._optimizers.values(): optimizer.target.cleargrads() # compute gradients all at once loss.backward() for optimizer in self._optimizers.values(): optimizer.update() # loss will be summaried and compute_mean() per epoch reporter.report({'loss': loss})
def update_core(self): batch = self.get_iterator("main").next() batchsize = len(batch) # Train the discriminator x_real = self.discriminator.wrap_array(batch) y_real = self.discriminator(x_real, self.stage, self.alpha) gradient = grad([y_real], [x_real], enable_double_backprop=True)[0] gradient_norm = sum(batch_l2_norm_squared(gradient)) / batchsize loss_grad = self.gamma * gradient_norm / 2 z = self.generator.generate_latent(batchsize) mix_z = self.generator.generate_latent( batchsize) if self.mixing > random() else None x_fake = self.generator(z, self.stage, self.alpha, mix_z) y_fake = self.discriminator(x_fake, self.stage, self.alpha) loss_dis = ( (sum((y_real - 1)**2) + sum(y_fake**2)) / 2 if self.lsgan else (sum(softplus(-y_real)) + sum(softplus(y_fake)))) / batchsize loss_dis += loss_grad x_fake.unchain_backward() self.discriminator.cleargrads() loss_dis.backward() self.discriminator_optimizer.update() # Train the generator z = self.generator.generate_latent(batchsize) mix_z = self.generator.generate_latent( batchsize) if self.mixing > random() else None x_fake = self.generator(z, self.stage, self.alpha, mix_z) y_fake = self.discriminator(x_fake, self.stage, self.alpha) loss_gen = (sum((y_fake - 1)**2) / 2 if self.lsgan else sum(softplus(-y_fake))) / batchsize self.generator.cleargrads() loss_gen.backward() self.mapper_optimizer.update() self.generator_optimizer.update() for raw, averaged in zip(self.generator.params(), self.averaged_generator.params()): averaged.copydata((1 - self.decay) * raw + self.decay * averaged) report({"alpha": self.alpha}) report({"loss (gen)": loss_gen}) report({"loss (dis)": loss_dis}) report({"loss (grad)": loss_grad}) self.alpha = min(1.0, self.alpha + self.delta)
def __call__(self, x, x_length, ns, ns_length, label): """ Args: x (numpy.ndarray or cupy.ndarray): sequences of vocabulary indices in shape (batchsize, tokens) x_length (numpy.ndarray or cupy.ndarray): number of tokens in each batch index of ``x`` ns (numpy.ndarray or cupy.ndarray): Negative samples. sequences of vocabulary indices in shape (batchsize, n_negative_samples, tokens) ns_length (numpy.ndarray or cupy.ndarray): number of tokens in each negative sample in shape ``(batchsize, n_negative_samples)`` label: Ignored Returns: chainer.Variable: """ z = self.sent_emb(x, x_length) p = self.pred_topic(z) # reconstructed sentence embedding r: (batchsize, feature size) r = F.matmul(p, self.T) # Embed negative sampling bs, n_ns, _ = ns.shape ns = ns.reshape(bs * n_ns, -1) ns_length = ns_length.astype(np.float32).reshape(-1, 1) n = F.sum(self.sent_emb.embed(ns), axis=1) / ns_length if self.sent_emb.fix_embedding: n.unchain_backward() n = F.reshape(n, (bs, n_ns, -1)) # Calculate contrasive max-margin loss # neg: (batchsize, n_ns) neg = F.sum(F.broadcast_to(F.reshape(r, (bs, 1, -1)), n.shape) * n, axis=-1) pos = F.sum(r * z, axis=-1) pos = F.broadcast_to(F.reshape(pos, (bs, 1)), neg.shape) mask = chainer.Variable(self.xp.zeros(neg.shape, dtype=p.dtype)) loss_pred = F.sum(F.maximum(1. - pos + neg, mask)) reporter.report({'loss_pred': loss_pred}, self) t_norm = F.normalize(self.T, axis=1) loss_reg = self._orthogonality_penalty * F.sqrt( F.sum( F.squared_difference( F.matmul(t_norm, t_norm, transb=True), self.xp.eye(self.T.shape[0], dtype=np.float32)))) reporter.report({'orthogonality_penalty': loss_reg}, self) loss = loss_pred + loss_reg reporter.report({'loss': loss}, self) return loss
def forward(self, x): q_anchor = self.encoder(x[..., 0]) q_target = self.encoder(x[..., 1]) q_negative = self.encoder(x[..., 2]) z = q_anchor.sample(self.k) logq_anchor = q_anchor.log_prob(z) kl_target = logq_anchor - q_target.log_prob(z) kl_negative = logq_anchor - q_negative.log_prob(z) energy = F.mean(F.relu(self.bound + kl_target - kl_negative)) loss = energy reporter.report({'loss': loss}, self) reporter.report({'kl_target': F.mean(kl_target)}, self) reporter.report({'kl_negative': F.mean(kl_negative)}, self) reporter.report({'bound': self.bound}, self) return loss
def forward(self, x): accum_loss = 0.0 result = collections.defaultdict(lambda: 0) # calculate each tree in batch ``x`` because we cannot process as batch for tree in x: loss, _ = self.traverse(tree, evaluate=result) accum_loss += loss reporter.report({'loss': accum_loss}, self) reporter.report({'total': result['total_node']}, self) reporter.report({'correct': result['correct_node']}, self) return accum_loss
def update_core(self): batch = self._iterators['main'].next() #print(self._n) in_arrays_list = [] for i in range(self._subdivisions): in_arrays_list.append( self.converter(batch[i::self._subdivisions], self.device)) #in_arrays_list.append(self.converter(batch, self.device)) optimizer = self._optimizers['main'] loss_func = self.loss_func or optimizer.target loss_func.cleargrads() losses = [] for i, in_arrays in enumerate(in_arrays_list): if isinstance(in_arrays, tuple): in_vars = list(variable.Variable(x) for x in in_arrays) loss = loss_func(*in_vars) elif isinstance(in_arrays, dict): in_vars = { key: variable.Variable(x) for key, x in six.iteritems(in_arrays) } loss = loss_func(in_vars) else: print(type(in_arrays)) loss.backward() #loss = {k: cuda.to_cpu(v.data) for k, v in loss.items()} # for logging loss = cuda.to_cpu(loss.data) losses.append(loss) optimizer.update() # minibatch average if isinstance(loss, dict): avg_loss = {k: 0. for k in losses[0].keys()} for loss in losses: for k, v in loss.items(): avg_loss[k] += v #avg_loss = {k: v / float(self._batchsize) for k, v in avg_loss.items()} avg_loss = {k: v / float(len(losses)) for k, v in avg_loss.items()} #avg_loss = {k: v for k, v in avg_loss.items()} # report all the loss values for k, v in avg_loss.items(): reporter.report({k: v}, loss_func) reporter.report({'loss': sum(list(avg_loss.values()))}, loss_func) else: avg_loss = 0. for loss in losses: avg_loss += loss #avg_loss /= float(self._batchsize) reporter.report({'loss': avg_loss}, loss_func)