def affine_backward(inputs, base_axis=1): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] w0 = inputs[2] base_axis += inputs[0].ndim * (base_axis < 0) ctx = nn.get_current_context() dfx = AffineDataGrad(ctx, base_axis) dfw = AffineFilterGrad(ctx, base_axis) dfx.xshape = x0.shape dfw.wshape = w0.shape dx0 = dfx(dy, w0) dw0 = dfw(dy, x0) if len(inputs) == 4: axes = [i for i in range(0, base_axis)] db0 = F.sum(dy, axes, keepdims=False) return dx0, dw0, db0 else: return dx0, dw0
def fused_batch_normalization_backward(inputs, axes=(1, ), decay_rate=0.9, eps=1e-05, batch_stat=True, nonlinearity='relu'): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ if nonlinearity not in ["", "relu"]: raise ValueError("nonlinearity must be either '' or 'relu'.") ctx = nn.get_current_context() df = FusedBatchNormalizationBackward(ctx, axes, decay_rate, eps, batch_stat, nonlinearity) dy = inputs[0] x0 = inputs[1] b0 = inputs[2] g0 = inputs[3] rm = inputs[4] rv = inputs[5] z0 = inputs[6] if len(inputs) == 7 else None df.is_add = True if z0 else False y0 = get_output(x0, "FusedBatchNormalization") if df.is_add: dx0, db0, dg0, dz0 = df(dy, x0, b0, g0, rm, rv, y0, z0) return dx0, db0, dg0, None, None, dz0 else: dx0, db0, dg0 = df(dy, x0, b0, g0, rm, rv, y0) return dx0, db0, dg0, None, None
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args kernel = self.forward_func.info.args["kernel"] stride = self.forward_func.info.args["stride"] ignore_border = self.forward_func.info.args["ignore_border"] pad = self.forward_func.info.args["pad"] channel_last = self.forward_func.info.args["channel_last"] # Inputs x0 = inputs[0].data dy = inputs[1].data # Outputs dx0 = outputs[0].data # Grads of inputs g_x0 = inputs[0].grad g_dy = inputs[1].grad # Grads of outputs g_dx0 = outputs[0].grad # Compute ctx = nn.get_current_context() backward_func = nn.function.MaxPoolingBackward( ctx, kernel, stride, ignore_border, pad, channel_last) if prop_down[1]: x0_ = nn.Variable(x0.shape).apply( data=x0, grad=g_x0, need_grad=True) dy_ = nn.Variable(dy.shape).apply( data=dy, grad=g_dy, need_grad=True) dx0_ = nn.Variable(dx0.shape).apply(data=dx0, grad=g_dx0) backward_func.setup([x0_, dy_], [dx0_]) backward_func.backward([x0_, dy_], [dx0_], accum=accum)
def convolution_data_grad_backward(inputs, base_axis=1, pad=None, stride=None, dilation=None, group=1, channel_last=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ gdx = inputs[0] dy = inputs[1] w0 = inputs[2] ctx = nn.get_current_context() dfw = ConvolutionFilterGrad(ctx, base_axis, pad, stride, dilation, group, channel_last) dfw.wshape = w0.shape gdy = F.convolution(gdx, w0, None, base_axis, pad, stride, dilation, group, channel_last) gw0 = dfw(dy, gdx) return gdy, gw0
def _call_function(self, type_name, inputs, args): import nnabla.function_bases as FB function_expr = "FB.F.{type_name}(nn.{ctx}, **{args})".format( type_name=type_name, ctx=nn.get_current_context(), args=args) function = eval(function_expr) o = function(*inputs) return o
def deconvolution_filter_grad_backward(inputs, base_axis=1, pad=None, stride=None, dilation=None, group=1, channel_last=False, output_padding=None): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ gdw = inputs[0] dy = inputs[1] x0 = inputs[2] ctx = nn.get_current_context() dfx = DeconvolutionDataGrad(ctx, base_axis, pad, stride, dilation, group, channel_last, output_padding) dfx.xshape = x0.shape gdy = F.deconvolution(x0, gdw, None, base_axis, pad, stride, dilation, group, channel_last, output_padding) gx0 = dfx(dy, gdw) return gdy, gx0
def _create_function(inputs, f, batch_size): ctx = nn.get_current_context() function_proto = f # todo: arrange weight name for NNC if function_proto.type == "Reshape": # if batch_size = -1, something wrong? reshape_shape = resolve_reshape_params( inputs, function_proto, batch_size) function_instance = F.Reshape( ctx, shape=reshape_shape, inplace=function_proto.reshape_param.inplace) elif function_proto.type == 'Broadcast': shape = resolve_broadcast_params(inputs, function_proto, batch_size) function_instance = F.Broadcast(ctx, shape=shape) elif function_proto.type == "RepeatStart": raise NotImplementedError("Repeat not supported.") function_instance = F.Identity(ctx) elif function_proto.type == "RepeatEnd": raise NotImplementedError("Repeat not supported.") function_instance = F.Identity(ctx) elif function_proto.type == "RecurrentOutput": raise NotImplementedError("Recurrent not supported.") function_instance = F.Stack( ctx, axis=function_proto.recurrent_param.axis) elif function_proto.type == "RecurrentInput": raise NotImplementedError("Recurrent not supported.") function_instance = F.Split( ctx, axis=function_proto.recurrent_param.axis) elif function_proto.type == "Delay": raise NotImplementedError("Recurrent not supported.") function_instance = F.Identity(ctx) else: function_instance = _create_function_instance(ctx, function_proto) return function_instance
def scope_function(): # turn off auto forward mode nn.set_auto_forward(False) # clear all parameters nn.clear_parameters() # keep context ctx = nn.get_current_context() yield # restore context nn.set_default_context(ctx)
def batch_normalization_backward(inputs, axes=(1,), decay_rate=0.9, eps=1e-05, batch_stat=True, no_scale=False, no_bias=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ ctx = nn.get_current_context() df = BatchNormalizationBackward( ctx, axes, decay_rate, eps, batch_stat, no_scale, no_bias) d_inputs = df(*inputs) return force_tuple(d_inputs) + (None, None)
def concatenate_backward(inputs, axis=None): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] axis = axis if axis is not None else len(dy.shape) - 1 ctx = nn.get_current_context() df = ConcatenateDataGrad(ctx, axis=axis) df.xshapes = [x.shape for x in inputs[1:]] dx0 = df(dy) return dx0
def unpooling_backward(inputs, kernel, channel_last=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] ctx = nn.get_current_context() df = UnpoolingDataGrad(ctx, kernel, channel_last) df.xshape = x0.shape dx0 = df(dy) return dx0
def global_average_pooling_backward(inputs): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] ctx = nn.get_current_context() pool = GlobalAveragePoolingDataGrad(ctx) pool.xshape = x0.shape dx0 = pool(dy) return dx0
def slice_backward(inputs, start=None, stop=None, step=None): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] ctx = nn.get_current_context() df = SliceDataGrad(ctx, start, stop, step) df.xshape = x0.shape dx0 = df(dy) return dx0
def deconvolution_backward(inputs, base_axis=1, pad=None, stride=None, dilation=None, group=1, channel_last=False, output_padding=None): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] w0 = inputs[2] base_axis += x0.ndim * (base_axis < 0) # base_axis += inputs[0].ndim*(base_axis < 0) ctx = nn.get_current_context() dfx = DeconvolutionDataGrad(ctx, base_axis, pad, stride, dilation, group, channel_last, output_padding) dfw = DeconvolutionFilterGrad(ctx, base_axis, pad, stride, dilation, group, channel_last, output_padding) dfx.xshape = x0.shape dfw.wshape = w0.shape dx0 = dfx(dy, w0) dw0 = dfw(dy, x0) axes = [i for i in range(dy.ndim, base_axis)] db0 = F.sum(dy, axes, keepdims=False) if len(inputs) == 4 else None if len(inputs) == 4: if channel_last: axes = [i for i in range(dy.ndim - 1)] else: axes = [i for i in range(0, base_axis)] + \ [i for i in range(base_axis + 1, dy.ndim)] db0 = F.sum(dy, axes, keepdims=False) if len(inputs) == 4 else None return dx0, dw0, db0 else: return dx0, dw0
def _create_function(self, function_proto): inputs = self._create_inputs(function_proto.input) function_instance = _create_function( nn.get_current_context(), inputs, function_proto, self.batch_size) outputs = function_instance(*inputs) if not isinstance(outputs, tuple): outputs = (outputs,) for i, name in enumerate(function_proto.output): try: var, _ = self.vseen[name] except: self.vseen[name] = (outputs[i], [0]) continue var.rewire_on(outputs[i])
def measure_cpu_gpu_instant_load(): # Get current cpu gpu load, as # load = [rank, cpu_load, nvidia_device_id, gpu_load] # result_arr: [load, load, ...] gpu_load = [] if gpu_load_backend_ok: global gpu_a_load global gpu_m_count gpu_m_count += 1 try: comm = current_communicator() if comm: index = comm.local_rank elif 'cuda' in str(nn.get_current_context().backend): index = 0 else: raise Exception handler = pynvml.nvmlDeviceGetHandleByIndex(index) gpu_load = [[ index, pynvml.nvmlDeviceGetUtilizationRates(handler).gpu ]] if index in gpu_a_load.keys(): gpu_a_load[index]['name'] = pynvml.nvmlDeviceGetName( handler).decode("utf-8") o_load = gpu_a_load[index]['load'] n_load = gpu_load[0][1] gpu_a_load[index]['load'] = ( (gpu_m_count - 1) * o_load + n_load) / gpu_m_count else: gpu_a_load[index] = { 'name': pynvml.nvmlDeviceGetName(handler).decode("utf-8"), 'load': gpu_load[0][1] } except Exception: gpu_load = [] if cpu_load_backend_ok: global p_handler cpu_load = p_handler.cpu_percent() callback.update_status( ('cpu_gpu_load', collect_and_shape_result(cpu_load, gpu_load)))
def interpolate_backward(inputs, output_size, mode, align_corners=True, half_pixel=False, half_pixel_for_nn=False, channel_last=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] ctx = nn.get_current_context() df = InterpolateDataGrad(ctx, output_size, mode, align_corners, half_pixel, half_pixel_for_nn, channel_last) df.xshape = x0.shape dx0 = df(dy) return dx0
def embed_backward(inputs): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] w0 = inputs[2] ctx = nn.get_current_context() dfw = EmbedFilterGrad(ctx) dfw.wshape = w0.shape dw0 = dfw(dy, x0) return None, dw0
def affine_filter_grad_backward(inputs, base_axis=1): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ gdw = inputs[0] dy = inputs[1] x0 = inputs[2] ctx = nn.get_current_context() dfx = AffineDataGrad(ctx, base_axis) dfx.xshape = x0.shape gdy = F.affine(x0, gdw, None, base_axis) gx0 = dfx(dy, gdw) return gdy, gx0
def scope_function(): # turn off auto forward mode nn.set_auto_forward(False) # clear all parameters nn.clear_parameters() # keep context ctx = nn.get_current_context() # use cached array nn.prefer_cached_array(True) # turn off re-computation nn.set_global_recompute(False) yield # restore context nn.set_default_context(ctx)
def affine_data_grad_backward(inputs, base_axis=1): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ gdx = inputs[0] dy = inputs[1] w0 = inputs[2] ctx = nn.get_current_context() dfw = AffineFilterGrad(ctx, base_axis) dfw.wshape = w0.shape gdy = F.affine(gdx, w0, None, base_axis) gw0 = dfw(dy, gdx) return gdy, gw0
def pad_backward(inputs, pad_width, mode='constant', constant_value=0): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ if mode != "constant": raise NotImplementedError( "{}_backward (mode!=constant) is not implemented.".format(func['snake_name'])) dy = inputs[0] x0 = inputs[1] ctx = nn.get_current_context() # constant value is always zero after 1st-order derivative df = PadDataGrad(ctx, pad_width, mode, constant_value=0) df.xshape = x0.shape dx0 = df(dy) return dx0
def _connect_on_gradient_graph(self, grad_vars, f): # 1. accumulate variables used more than one or do nothing vf_vb_map = grad_vars.pop(f) # {VO_fwd: [VI_bwd]} grad_inputs = [] for o in f.outputs: # address `floating` variables; no function takes it as input. # e.g., when dx, db, dg = dBN(...), (db, dg) are not used afterwards. #v = vf_vb_map[o] if o in vf_vb_map else [None] v = vf_vb_map[o] if o in vf_vb_map else [0] if len(v) > 1: grad_inputs += [sum(v)] #grad_inputs += [F.add_n(v)] else: grad_inputs += v # 2. lookup the backward function f_fwd_name = f.info.type_name if f_fwd_name not in registry: raise ValueError( "{} is not in the backward function registry".format( f_fwd_name)) backward_func = registry[f_fwd_name] # 3. connect grad_inputs = grad_inputs + f.inputs ctx = nn.get_current_context() with nn.context_scope(ctx): grad_outputs = backward_func(grad_inputs, **f.info.args) grad_outputs = self._force_list(grad_outputs) # 4. put grad_output as grad_input to a corresponding function for inp, grad_out in zip(f.inputs, grad_outputs): if grad_out is None: continue if inp.parent not in grad_vars: grad_vars[inp.parent] = OrderedDict() if inp not in grad_vars[inp.parent]: grad_vars[inp.parent][inp] = [grad_out] else: grad_vars[inp.parent][inp] += [grad_out] return grad_outputs
def max_pooling_backward_backward(inputs, kernel, stride=None, ignore_border=True, pad=None, channel_last=False): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ gdx = inputs[0] dy = inputs[1] x0 = inputs[2] ctx = nn.get_current_context() df = MaxPoolingBackwardDataGrad(ctx, kernel, stride, ignore_border, pad, channel_last) df.yshape = dy.shape gdy = df(gdx, x0) return gdy, None
def average_pooling_backward(inputs, kernel, stride=None, ignore_border=True, pad=None, channel_last=False, including_pad=True): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] ctx = nn.get_current_context() df = AveragePoolingDataGrad(ctx, kernel, stride, ignore_border, pad, channel_last, including_pad) df.xshape = x0.shape dx0 = df(dy) return dx0
# これで、 mnist-collection/dcgan.pyの内部にアクセスできるようになった。 今回の例ではハイ # パーパラメータが設定されているのでそれに倣う。 source = inspect.getsource(I) print(source[source.index("if __name__"):]) max_iter = 20000 learning_rate = 0.0002 batch_size = 64 weight_decay = 0.0001 # コンテキストを設定する。 context = get_extension_context("cudnn", device_id=0, type_config="float") nn.set_default_context(context) nn.get_current_context() # Fakeパスの設定 z = nn.Variable([batch_size, 100, 1, 1]) fake = I.generator(z) fake.persistent = True # Not to clear at backward pred_fake = I.discriminator(fake) loss_gen = F.mean( F.sigmoid_cross_entropy(pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.get_unlinked_variable(need_grad=True) fake_dis.need_grad = True # TODO: Workaround until v1.0.2 pred_fake_dis = I.discriminator(fake_dis) loss_dis = F.mean( F.sigmoid_cross_entropy(pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Realパスの設定
def __init__(self, ): ctx = nn.get_current_context() if "half" in [x.split(":")[-1] for x in ctx.backend]: raise ValueError( "Half is not supported up to now, context = {}".format(ctx))
def abs_max_recorder(x, M, training=True): ctx = nn.get_current_context() func = AbsMaxRecorder(ctx, training) return func(x, M)
def max_mva_recorder(x, M, decay=0.99, training=True): ctx = nn.get_current_context() func = MaxMvaRecorder(ctx, decay, training) return func(x, M)
def minmax_minmax_recorder(x, m, M, training=True): ctx = nn.get_current_context() func = MinMaxMinMaxRecorder(ctx, training) return func(x, m, M)