def convert_layer_batchnormalization(self, layer_config: Dict[str, object], inputs: List[Variable]) -> List[ Variable]: """ Example: {'class_name': 'BatchNormalization', 'config': {'axis': 3, 'beta_constraint': None, 'beta_initializer': {'class_name': 'Zeros', 'config': {}}, 'beta_regularizer': None, 'center': True, 'epsilon': 0.001, 'gamma_constraint': None, 'gamma_initializer': {'class_name': 'Ones', 'config': {}}, 'gamma_regularizer': None, 'momentum': 0.99, 'moving_mean_initializer': {'class_name': 'Zeros', 'config': {}}, 'moving_variance_initializer': {'class_name': 'Ones', 'config': {}}, 'name': 'bn2a_branch2a', 'scale': True, 'trainable': True}, 'inbound_nodes': [[['res2a_branch2a', 0, 0, {}]]], 'name': 'bn2a_branch2a'}, :param layer_config: :param inputs: :return: """ assert len(inputs) == 1 input = inputs[0] name: str = layer_config["name"] axis = input.order.axes[layer_config["axis"]] mean = self.weights[f"{name}/{name}/moving_mean:0"].value variance = self.weights[f"{name}/{name}/moving_variance:0"].value if layer_config["scale"]: gamma = self.weights[f"{name}/{name}/gamma:0"].value else: gamma = np.ones_like(variance) if layer_config["center"]: beta = self.weights[f"{name}/{name}/beta:0"].value else: beta = np.zeros_like(mean) # (x - mean) / sqrt(var + eps) * gamma + beta # gamma_div_std = gamma / sqrt(var + eps) # beta_scaled = beta - mean * gamma_div_std # y = x * gamma_div_std + beta_scaled gamma_div_std = gamma / np.sqrt(variance + layer_config["epsilon"]) beta_scaled = beta - mean * gamma_div_std scale_opr = AxiswiseScale(name + "_scale", axis=axis) bias_opr = AxiswiseBias(name + "_bias", axis=axis) scale_out, = scale_opr(input, ConstantVariable(gamma_div_std, OrderC)) y, = bias_opr(scale_out, ConstantVariable(beta_scaled, OrderC)) return [y]
def _convert_batch_normalization(converter: KerasConverter, k_op: keras.layers.BatchNormalization): x = converter.get_variable(converter.get_input_tensor(k_op)[0]) axis = x.order.axes[k_op.axis] variance_data, mean_data = K.batch_get_value( [k_op.moving_variance, k_op.moving_mean]) if k_op.scale: gamma_data, = K.batch_get_value([k_op.gamma]) else: gamma_data = np.ones_like(variance_data) if k_op.center: beta_data, = K.batch_get_value([k_op.beta]) else: beta_data = np.zeros_like(mean_data) gamma_div_std_data = gamma_data / np.sqrt(variance_data + k_op.epsilon) beta_scaled_data = beta_data - mean_data * gamma_div_std_data gamma_div_std = ConstantVariable(gamma_div_std_data, Order([axis])) beta_scaled = ConstantVariable(beta_scaled_data, Order([axis])) y, = AxiswiseScale(None, axis=axis)(x, gamma_div_std) y, = AxiswiseBias(None, axis=axis)(y, beta_scaled) converter.set_variable(converter.get_output_tensor(k_op)[0], y)
def test_axiswise_scale(): """ before) s -+ +-{AxiswiseScale}- y x -+ after) s -+ +-{ElementwiseMul}- y x -+ """ s = ConstantVariable(np.random.rand(3), OrderC) x = Variable((2, 3, 4, 5), OrderNCHW) y, = AxiswiseScale(None, axis=Axis.C)(x, s) assert isinstance(y.output_from, AxiswiseScale) UpgradeOperatorType().optimize(Graph([x], [y])) assert isinstance(y.output_from, ElementwiseMul)
def test_conv_scale(): for order_x, order_w in itertools.product(orders4, orders4): conv = Convolution2D(None, ksize=3, stride=1, padding=1) scale = AxiswiseScale(None, axis=Axis.C) x = Variable([8, 7, 6, 5], OrderNHWC) x.change_order(order_x) w_shape = [4, 3, 3, 5] w = ConstantVariable(arange_shaped(w_shape), OrderNHWC) w.change_order(order_w) w_data = w.data.copy() h, = conv(x, w) s_shape = [h.shape_dict[Axis.C]] s = ConstantVariable(arange_shaped(s_shape), OrderC) s_data = s.data.copy() y, = scale(h, s) graph = Graph([x], [y]) graph, _ = ConcatAffine().optimize(graph) # noinspection PyTypeChecker expander = (None, ) * order_w.axes_dict[Axis.N] + ( Ellipsis, ) + (None, ) * (3 - order_w.axes_dict[Axis.N]) w_data_expected = w_data * s_data[expander] ops = listup_operators(graph) assert len(ops) == 1 and isinstance(ops[0], Convolution2D) assert conv.outputs["y"] == y assert np.all(np.equal(w.data, w_data_expected))
def test_NHWC(): vx = np.random.rand(10, 6, 4, 8) vs = np.random.rand(8) vy = vx * vs[None, None, None, :] x = Variable(vx.shape, order=OrderNHWC) s = ConstantVariable(vs, order=OrderC) y, = AxiswiseScale(None, axis=Axis.C)(x, s) generate_kernel_test_case(description=f"AxiswiseScale for input OrderNHWC", backend=["webgpu", "webassembly", "fallback"], graph=Graph([x], [y]), inputs={x: vx}, expected={y: vy})
def test_major_axis(): vx = np.random.rand(10, 6, 4, 8) vs = np.random.rand(10) vy = vx * vs[:, None, None, None] x = Variable(vx.shape, order=OrderCNHW) s = Variable(vs.shape, order=OrderC) y, = AxiswiseScale(None, axis=Axis.C)(x, s) generate_kernel_test_case( description=f"AxiswiseScale for major axis", backend=["webgpu", "webassembly", "fallback"], graph=Graph([x, s], [y]), inputs={x: vx, s: vs}, expected={y: vy} )
def _convert_batch_normalization_function( converter: ChainerConverter, c_op: chainer.functions.normalization. batch_normalization.BatchNormalizationFunction): x = converter.get_variable(c_op.inputs[0]) gamma = converter.get_variable(c_op.inputs[1]) beta = converter.get_variable(c_op.inputs[2]) if len(c_op.inputs) == 5: # noinspection PyUnresolvedReferences mean_data = converter.get_variable(c_op.inputs[3]).data # noinspection PyUnresolvedReferences variance_data = converter.get_variable(c_op.inputs[4]).data elif len(c_op.inputs) == 3: variance_data = c_op.running_var mean_data = c_op.running_mean else: raise ValueError( "inputs to BatchNormalizationFunction have to be 5 or 3.") console.debug(variance_data) # Simplify scale and bias # # from: # y = (x - mean) / sqrt(var + eps) * gamma + beta # # to: # y = x * gamma_div_std + beta_scaled # # gamma_div_std = gamma / sqrt(var + eps) # beta_scaled = beta - mean * gamma_div_std # noinspection PyUnresolvedReferences gamma_div_std = gamma.data / np.sqrt(variance_data + c_op.eps) # noinspection PyUnresolvedReferences beta_scaled = beta.data - mean_data * gamma_div_std scale_opr = AxiswiseScale(None, axis=Axis.C) gamma_div_std_const = ConstantVariable(gamma_div_std, OrderC) scale_out, = scale_opr(x, gamma_div_std_const) offset_opr = AxiswiseBias(None, axis=Axis.C) beta_scaled_const = ConstantVariable(beta_scaled, OrderC) offset_out, = offset_opr(scale_out, beta_scaled_const) converter.set_variable(c_op.outputs[0](), offset_out)
def test_mix_order(): vx = np.random.rand(10, 6, 4, 8) vs = np.random.rand(10) vy = vx * vs[:, None, None, None] x = Variable(vx.shape, order=OrderCNHW) s = ConstantVariable(vs, order=OrderC) y, = AxiswiseScale(None, axis=Axis.C)(x, s) x.change_order(OrderNHWC) vx = np.rollaxis(vx, 0, 4) generate_kernel_test_case(description=f"AxiswiseScale for mix order", backend=["webgpu"], graph=Graph([x], [y]), inputs={x: vx}, expected={y: vy})
def test_every_order(): orders_x = [ OrderNHWC, OrderHWNC, OrderHWCN, OrderNCHW, OrderCNHW, OrderCHWN ] axes = [Axis.C] default_order = {1: OrderC, 2: OrderNC, 4: OrderNHWC, Axis.C: OrderC} for order_x, axis in itertools.product(orders_x, axes): if axis not in order_x.axes: continue op = AxiswiseScale(None, axis=axis) x = Variable(np.arange(order_x.ndim) + 1, default_order[order_x.ndim]) x.change_order(order_x) w = Variable((x.shape_dict[axis], ), default_order[axis]) y, = op(x, w) for axis in y.order.axes: assert y.shape_dict[axis] == x.shape_dict[axis]
def __call__(self, inputs: List[Variable]) -> Tuple[Variable]: assert len(inputs) == 5 x, gamma, beta, mean, variance = inputs # x以外の変数は、加工して新しいConstantとして使う # (x - mean) / sqrt(var + eps) * gamma + beta # gamma_div_std = gamma / sqrt(var + eps) # beta_scaled = beta - mean * gamma_div_std # y = x * gamma_div_std + beta_scaled gamma_div_std = gamma.data / np.sqrt(variance.data + self.cfunc.eps) beta_scaled = beta.data - mean.data * gamma_div_std scale_opr = AxiswiseScale(generate_unique_name(self.cfunc.label), axis=Axis.C) gamma_div_std_const = ConstantVariable(gamma_div_std, OrderC) scale_out, = scale_opr(x, gamma_div_std_const) self.hidden_vars.append(scale_out) self.hidden_consts.append(gamma_div_std_const) offset_opr = AxiswiseBias(generate_unique_name(self.cfunc.label), axis=Axis.C) beta_scaled_const = ConstantVariable(beta_scaled, OrderC) offset_out, = offset_opr(scale_out, beta_scaled_const) self.hidden_consts.append(beta_scaled_const) return offset_out,
def test_invalid_size(): op = AxiswiseScale(None, axis=Axis.C) x = Variable((2, 3, 4, 5), OrderNHWC) w = Variable((6, ), OrderC) y, = op(x, w)