def update_core(self): optimizer = self.get_optimizer('main') model_main = optimizer.target models_others = { k: v for k, v in self._models.items() if v is not model_main } iterator = self.get_iterator('main') batch = iterator.next() # # Split the batch to sub-batches. # n = len(self._models) in_arrays_list = {} for i, key in enumerate(six.iterkeys(self._models)): in_arrays_list[key] = self.converter(batch[i::n], self._devices[key]) # For reducing memory for model in six.itervalues(self._models): model.cleargrads() losses = [] for model_key, model in six.iteritems(self._models): in_arrays = in_arrays_list[model_key] loss_func = self.loss_func or model with function.force_backprop_mode(): dev_id = self._devices[model_key] dev_id = dev_id if 0 <= dev_id else None with cuda.get_device_from_id(dev_id): if isinstance(in_arrays, tuple): loss = loss_func(*in_arrays) elif isinstance(in_arrays, dict): loss = loss_func(**in_arrays) else: loss = loss_func(in_arrays) losses.append(loss) # For _uninitialized_params for model in six.itervalues(self._models): model.cleargrads() for loss in losses: loss.backward(loss_scale=self.loss_scale) for model in six.itervalues(models_others): model_main.addgrads(model) optimizer.update() for model in six.itervalues(models_others): model.copyparams(model_main) if self.auto_new_epoch and iterator.is_new_epoch: optimizer.new_epoch(auto=True)
def backward(self, indexes, grad_outputs): inputs = self.get_retained_inputs() with function.force_backprop_mode(): outs = _call_func(self.func, inputs) # Return gradients that are further backproable return chainer.grad( outs, inputs, grad_outputs=grad_outputs, enable_double_backprop=True)
def update_core(self): optimizer = self.get_optimizer('main') model_main = optimizer.target models_others = {k: v for k, v in self._models.items() if v is not model_main} iterator = self.get_iterator('main') batch = iterator.next() # # Split the batch to sub-batches. # n = len(self._models) in_arrays_list = {} for i, key in enumerate(six.iterkeys(self._models)): in_arrays_list[key] = self.converter( batch[i::n], self._devices[key]) # For reducing memory for model in six.itervalues(self._models): model.cleargrads() losses = [] for model_key, model in six.iteritems(self._models): in_arrays = in_arrays_list[model_key] loss_func = self.loss_func or model with function.force_backprop_mode(): dev_id = self._devices[model_key] dev_id = dev_id if 0 <= dev_id else None with cuda.get_device_from_id(dev_id): if isinstance(in_arrays, tuple): loss = loss_func(*in_arrays) elif isinstance(in_arrays, dict): loss = loss_func(**in_arrays) else: loss = loss_func(in_arrays) losses.append(loss) # For _uninitialized_params for model in six.itervalues(self._models): model.cleargrads() for loss in losses: loss.backward(loss_scale=self.loss_scale) for model in six.itervalues(models_others): model_main.addgrads(model) optimizer.update() for model in six.itervalues(models_others): model.copyparams(model_main) if self.auto_new_epoch and iterator.is_new_epoch: optimizer.new_epoch(auto=True)
def update_core(self): optimizer = self.get_optimizer('main') # it is main wrapper class: au_rcnn_train_chain, space_time_rnn model_main = optimizer.target loss_head_module = model_main.loss_head_module models_others = {k: v for k, v in self._models.items() if v != model_main.au_rcnn_train_chain} batch = self.get_iterator('main').next() in_arrays = self.converter(batch, -1) images, bboxes, labels = in_arrays batch_size, T, channel, height, width = images.shape images = images.reshape(batch_size * T, channel, height, width) # B*T, C, H, W bboxes = bboxes.reshape(batch_size * T, config.BOX_NUM[self.database], 4) # B*T, 9, 4 labels = chainer.cuda.to_gpu(labels, device=self._devices["main"]) # labels = labels.reshape(batch_size * T, config.BOX_NUM[self.database], -1) # B*T, 9, 12/22 # For reducing memory for model in six.itervalues(models_others): model.cleargrads() model_main.cleargrads() # # Split the batch to sub-batches. # n = len(self._models) in_arrays_list = {} sub_index = self.split_list(list(range(batch_size * T)), n) for i, key in enumerate(sorted(self._models.keys(), key=lambda e:str(e))): # self._models are all au_rcnn_train_chain includes main gpu in_arrays_list[key] = (F.copy(images[sub_index[i]], self._devices.get(key, self._devices["main"])), F.copy(bboxes[sub_index[i]], self._devices.get(key, self._devices["main"]))) # self._models are all au_rcnn_train_chain includes main gpu with function.force_backprop_mode(): roi_feature_multi_gpu = [] for model_key, au_rcnn_train_chain in sorted(self._models.items(), key=lambda e:str(e[0])): images, bboxes = in_arrays_list[model_key] assert int(images.data.device) == au_rcnn_train_chain._device_id roi_feature = au_rcnn_train_chain(images, bboxes) # shape =(B*T//n, F, D) roi_feature_multi_gpu.append(F.copy(roi_feature, self._devices["main"])) roi_feature = F.concat(roi_feature_multi_gpu, axis=0) # multiple batch combine roi_feature = roi_feature.reshape(batch_size, T, config.BOX_NUM[self.database], roi_feature.shape[-1]) loss = loss_head_module(roi_feature, labels) model_main.cleargrads() for model in six.itervalues(self._models): model.cleargrads() loss.backward() for model in six.itervalues(models_others): model_main.au_rcnn_train_chain.addgrads(model) optimizer.update() for model in six.itervalues(models_others): model.copyparams(model_main.au_rcnn_train_chain) # only the main model will update parameter, so copy to each other models
def backward(self, indexes, grad_outputs): # Double backprop is not allowed if chainer.config.enable_backprop: raise RuntimeError('double backpropagation in functions.forget is ' 'not allowed.') inputs = self.get_retained_inputs() # Create new variables that have no creators dummy_inputs = tuple([variable.Variable(inp.array) for inp in inputs]) with function.force_backprop_mode(),\ chainer.using_config('in_recomputing', True): outs = _call_func(self.func, dummy_inputs) assert len(outs) == len(grad_outputs) for out, grad_output in zip(outs, grad_outputs): out.grad_var = grad_output # TODO(kataoka): use outer backward's `retain_grad` and `loss_scale` chainer.variable._backprop_to_all(outs, False, None) return tuple([inp.grad_var for inp in dummy_inputs])
def backward(self, indexes, grad_outputs): # Double backprop is not allowed if chainer.config.enable_backprop: raise RuntimeError('double backpropagation in functions.forget is ' 'not allowed.') inputs = self.get_retained_inputs() # Create new variables that have no creators dummy_inputs = tuple([variable.Variable(inp.array) for inp in inputs]) with function.force_backprop_mode(): outs = _call_func(self.func, dummy_inputs) assert len(outs) == len(grad_outputs) if len(outs) > 1: # Avoid doing backward multiple times when `outs` is a tuple outs = chainer.functions.identity(*outs) for out, grad_output in zip(outs, grad_outputs): out.grad_var = grad_output outs[0].backward() return tuple([inp.grad_var for inp in dummy_inputs])
def export(model, args, directory=None, export_params=True, graph_name='Graph'): """(Experimental) Export a computational graph as Caffe format. Args: model (~chainer.Chain): The model object you want to export in Caffe format. It should have :meth:`__call__` method because the second argument ``args`` is directly given to the model by the ``()`` accessor. args (list of ~chainer.Variable): The arguments which are given to the model directly. directory (str): The directory used for saving the resulting Caffe model. If None, nothing is saved to the disk. export_params (bool): If True, this function exports all the parameters included in the given model at the same time. If False, the exported Caffe model doesn't include any parameter values. graph_name (str): A string to be used for the ``name`` field of the graph in the exported Caffe model. .. note:: Currently, this function supports networks that created by following layer functions. - :func:`~chainer.functions.linear` - :func:`~chainer.functions.convolution_2d` - :func:`~chainer.functions.deconvolution_2d` - :func:`~chainer.functions.max_pooling_2d` - :func:`~chainer.functions.average_pooling_2d` - :func:`~chainer.functions.batch_normalization` - :func:`~chainer.functions.local_response_normalization` - :func:`~chainer.functions.relu` - :func:`~chainer.functions.leaky_relu` - :func:`~chainer.functions.concat` - :func:`~chainer.functions.softmax` - :func:`~chainer.functions.reshape` - :func:`~chainer.functions.add` This function can export at least following networks. - GoogLeNet - ResNet - VGG And, this function use testing (evaluation) mode. .. admonition:: Example >>> from chainer.exporters import caffe >>> >>> class Model(chainer.Chain): ... def __init__(self): ... super(Model, self).__init__() ... with self.init_scope(): ... self.l1 = L.Convolution2D(None, 1, 1, 1, 0) ... self.b2 = L.BatchNormalization(1) ... self.l3 = L.Linear(None, 1) ... ... def __call__(self, x): ... h = F.relu(self.l1(x)) ... h = self.b2(h) ... return self.l3(h) ... >>> x = chainer.Variable(np.zeros((1, 10, 10, 10), np.float32)) >>> caffe.export(Model(), [x], None, True, 'test') """ assert isinstance(args, (tuple, list)) if len(args) != 1: raise NotImplementedError() for i in args: assert isinstance(i, variable.Variable) with function.force_backprop_mode(), chainer.using_config('train', False): output = model(*args) if isinstance(output, variable.Variable): output = [output] assert isinstance(output, (tuple, list)) for i in output: assert isinstance(i, variable.Variable) prototxt = None caffemodel = None if directory is not None: prototxt = os.path.join(directory, 'chainer_model.prototxt') if export_params: caffemodel = os.path.join(directory, 'chainer_model.caffemodel') retriever = _RetrieveAsCaffeModel(prototxt, caffemodel) retriever(graph_name, args, output)
def backward(self, inputs, grads): with function.force_backprop_mode(): xs = [variable.Variable(x) for x in inputs] outs = self._call_func(xs) _DummyFunction(grads)(*outs).backward() return tuple(x.grad for x in xs)
def export(model, args, directory=None, export_params=True, graph_name='Graph'): """(Experimental) Export a computational graph as Caffe format. Args: model (~chainer.Chain): The model object you want to export in ONNX format. It should have :meth:`__call__` method because the second argment ``args`` is directly given to the model by the ``()`` accessor. args (list of ~chainer.Variable): The argments which are given to the model directly. directory (str): The directory used for saving the resulting Caffe model. If None, nothing is saved to the disk. export_params (bool): If True, this function exports all the parameters included in the given model at the same time. If False, the exported Caffe model doesn't include any parameter values. graph_name (str): A string to be used for the ``name`` field of the graph in the exported Caffe model. .. note:: Currently, this function supports networks that created by following layer functions. - :func:`~chainer.functions.linear` - :func:`~chainer.functions.convolution_2d` - :func:`~chainer.functions.deconvolution_2d` - :func:`~chainer.functions.max_pooling_2d` - :func:`~chainer.functions.average_pooling_2d` - :func:`~chainer.functions.batch_normalization` - :func:`~chainer.functions.local_response_normalization` - :func:`~chainer.functions.relu` - :func:`~chainer.functions.concat` - :func:`~chainer.functions.softmax` - :func:`~chainer.functions.reshape` - :func:`~chainer.functions.add` This function can export at least following networks. - GoogLeNet - ResNet - VGG And, this function use testing (evaluation) mode. .. admonition:: Example >>> from chainer.exporters import caffe >>> >>> class Model(chainer.Chain): ... def __init__(self): ... super(Model, self).__init__() ... with self.init_scope(): ... self.l1 = L.Convolution2D(None, 1, 1, 1, 0) ... self.b2 = L.BatchNormalization(1) ... self.l3 = L.Linear(None, 1) ... ... def __call__(self, x): ... h = F.relu(self.l1(x)) ... h = self.b2(h) ... return self.l3(h) ... >>> x = chainer.Variable(np.zeros((1, 10, 10, 10), np.float32)) >>> caffe.export(Model(), [x], None, True, 'test') """ utils.experimental('chainer.exporters.caffe.export') assert isinstance(args, (tuple, list)) if len(args) != 1: raise NotImplementedError() for i in args: assert isinstance(i, variable.Variable) with function.force_backprop_mode(), chainer.using_config('train', False): output = model(*args) if isinstance(output, variable.Variable): output = [output] assert isinstance(output, (tuple, list)) for i in output: assert isinstance(i, variable.Variable) prototxt = None caffemodel = None if directory is not None: prototxt = os.path.join(directory, 'chainer_model.prototxt') if export_params: caffemodel = os.path.join(directory, 'chainer_model.caffemodel') retriever = _RetrieveAsCaffeModel(prototxt, caffemodel) retriever(graph_name, args, output)
def update_core(self): optimizer = self.get_optimizer("main") model_main = optimizer.target models_others = { k: v for k, v in self._models.items() if v is not model_main } iterator = self.get_iterator("main") batch = iterator.next() # -- split the batch to sub-batches -- # n = len(self._models) in_arrays_lists = {} for i, key in enumerate(six.iterkeys(self._models)): in_arrays_lists[key] = self.converter(batch[i::n], self._devices[key]) # for reducing memory for model in six.itervalues(self._models): model.cleargrads() losses = [] for model_key, model in six.iteritems(self._models): x, adj = in_arrays_lists[model_key] with function.force_backprop_mode(): with chainer.using_device(self._devices[model_key]): z, sum_log_det_jacs = model(x, adj) nll = model.log_prob(z, sum_log_det_jacs) if self.two_step: loss = self.h_nll_weight * nll[0] + nll[1] else: loss = nll #loss += F.square(F.exp(model.ln_var) + F.exp(-model.ln_var)) losses.append(loss) for model in six.itervalues(self._models): model.cleargrads() for loss in losses: loss.backward(loss_scale=self.loss_scale) for model in six.itervalues(models_others): model_main.addgrads(model) total_loss = 0.0 for loss in losses: loss_in_cpu = F.copy(loss, -1) total_loss += loss_in_cpu average_losses = total_loss / len(losses) chainer.report({ "neg_log_likelihood": average_losses, "z_var": model_main.z_var }) optimizer.update() for model in six.itervalues(models_others): model.copyparams(model_main) if self.auto_new_epoch and iterator.is_new_epoch: optimizer.new_epoch(auto=True)
def update_core(self): names = list(six.iterkeys(self.devices)) gen_optimizer = self.get_optimizer('opt_gen') dis_optimizer = self.get_optimizer('opt_dis') for i in range(self.n_dis): # clear the gradients first for model in six.itervalues(self.models): model['gen'].cleargrads() model['dis'].cleargrads() # update D # first calculate the gradients for accumulation_index in range(self.n_accumulation): for name in names: with function.force_backprop_mode(): dev_id = self.devices[name] dev_id = dev_id if 0 <= dev_id else None with cuda.get_device_from_id(dev_id): gen = self.models[name]['gen'] dis = self.models[name]['dis'] xp = gen.xp x_real, y_real = self.get_batch(xp) batchsize = len(x_real) dis_real = dis(x_real, y=y_real) x_fake, y_fake = self._generete_samples( gen=gen, n_gen_samples=batchsize) dis_fake = dis(x_fake, y=y_fake) x_fake.unchain_backward() loss_dis = self.loss_dis( dis_fake=dis_fake, dis_real=dis_real) / float( self.n_accumulation) chainer.reporter.report({'loss_dis': loss_dis}) loss_dis.backward() for name in names: if name != 'main': self.models['main']['dis'].addgrads( self.models[name]['dis']) dis_optimizer.update() if self.iteration % self.n_SR == 0: SR(self.models['main']['dis']) for name in names: if name != 'main': self.models[name]['dis'].copyparams( self.models['main']['dis']) # update G if i == 0: for model in six.itervalues(self.models): model['gen'].cleargrads() model['dis'].cleargrads() for accumulation_index in range(self.n_accumulation): for name in names: with function.force_backprop_mode(): dev_id = self.devices[name] dev_id = dev_id if 0 <= dev_id else None with cuda.get_device_from_id(dev_id): gen = self.models[name]['gen'] dis = self.models[name]['dis'] x_fake, y_fake = self._generete_samples( gen=gen) dis_fake = dis(x_fake, y=y_fake) loss_gen = self.loss_gen( dis_fake=dis_fake) / float( self.n_accumulation) chainer.reporter.report({'loss_gen': loss_gen}) loss_gen.backward() for name in names: if name != 'main': self.models['main']['gen'].addgrads( self.models[name]['gen']) gen_optimizer.update() for name in names: if name != 'main': self.models[name]['gen'].copyparams( self.models['main']['gen'])