def _batch_norm(inputs, name, trainable=True, training=True):
    params_shape = [inputs.shape[1]]
    # Float32 required to avoid precision-loss when using fp16 input/output
    params_dtype = flow.float32 if inputs.dtype == flow.float16 else inputs.dtype

    if not flow.current_global_function_desc().IsTrainable() or not trainable:
        training = False

    with flow.scope.namespace(name):
        beta = flow.get_variable(
            name="beta",
            shape=params_shape,
            dtype=params_dtype,
            initializer=flow.zeros_initializer(),
            trainable=trainable,
            distribute=distribute_util.broadcast(),
        )

        gamma = flow.get_variable(
            name="gamma",
            shape=params_shape,
            dtype=params_dtype,
            initializer=flow.ones_initializer(),
            trainable=trainable,
            distribute=distribute_util.broadcast(),
        )

        moving_mean = flow.get_variable(
            name="moving_mean",
            shape=params_shape,
            dtype=params_dtype,
            initializer=flow.zeros_initializer(),
            trainable=False,
            distribute=distribute_util.broadcast(),
        )

        moving_variance = flow.get_variable(
            name="moving_variance",
            shape=params_shape,
            dtype=params_dtype,
            initializer=flow.ones_initializer(),
            trainable=False,
            distribute=distribute_util.broadcast(),
        )
    builder = (flow.user_op_builder(
        id_util.UniqueStr(name)).Op("normalization").Input(
            "x", [inputs]).Input("moving_mean", [moving_mean]).Input(
                "moving_variance",
                [moving_variance]).Input("gamma", [gamma]).Input(
                    "beta", [beta]).Output("y").Attr("axis", 1).Attr(
                        "epsilon",
                        1.001e-5).Attr("training",
                                       training).Attr("momentum", 0.997))
    if trainable and training:
        builder = builder.Output("mean").Output("inv_variance")
    return builder.Build().InferAndTryRun().RemoteBlobList()[0]
Exemplo n.º 2
0
def find_or_create_module(module_name, create, reuse=False):
    assert callable(create)
    sess = session_ctx.GetDefaultSession()
    job_name = oneflow.current_global_function_desc().job_config_proto.job_name
    if job_name not in sess.job_name2module_name2module_:
        sess.job_name2module_name2module_[job_name] = {}
    module_name2module = sess.job_name2module_name2module_[job_name]
    if module_name not in module_name2module:
        module = create()
        assert isinstance(module, module_util.Module)
        module_name2module[module_name] = module
    else:
        if not reuse:
            assert module_name not in sess.existed_module_names_, (
                "duplicated module_name `%s' in global_function `%s'" %
                (module_name, job_name))
        else:
            # do nothing
            pass
    sess.existed_module_names_.add(module_name)
    return module_name2module[module_name]
Exemplo n.º 3
0
 def Foo():
     test_case.assertEqual(
         flow.current_global_function_desc().IsTrainable(), False)
     return flow.get_variable("w", (10, ),
                              initializer=flow.constant_initializer(1))
Exemplo n.º 4
0
def batch_normalization(
    inputs: remote_blob_util.BlobDef,
    axis: int = -1,
    momentum: float = 0.99,
    epsilon: float = 0.001,
    center: bool = True,
    scale: bool = True,
    beta_initializer: Optional[op_conf_util.InitializerConf] = None,
    gamma_initializer: Optional[op_conf_util.InitializerConf] = None,
    beta_regularizer: Optional[op_conf_util.RegularizerConf] = None,
    gamma_regularizer: Optional[op_conf_util.RegularizerConf] = None,
    moving_mean_initializer: Optional[op_conf_util.InitializerConf] = None,
    moving_variance_initializer: Optional[op_conf_util.InitializerConf] = None,
    trainable: bool = True,
    training: bool = True,
    name: str = "BatchNorm",
) -> remote_blob_util.BlobDef:
    r"""Analogous to `tf.keras.layers.BatchNormalization <https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization>`_

    Args:
        inputs (remote_blob_util.BlobDef): Input `Blob`.
        axis (int, optional): An int specifies the aixs that should be normalized . Default is -1, which normalizes the last axis.
        momentum (float, optional):  A float specifies the momontum for the moving average. Defaults to 0.99.
        epsilon (float, optional): A small float added to avoid division by zero. Defaults to 0.001.
        center (bool, optional): A boolean specifies whether to add offset to normalized `Blob`. Defaults to True.
        scale (bool, optional): A boolean specifies whether to multiply normalized `Blob` by gamma. Defaults to True.
        beta_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for beta. Defaults to None.
        gamma_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for gamma. Defaults to None.
        beta_regularizer (Optional[op_conf_util.RegularizerConf], optional): Regularizer for beta. Defaults to None.
        gamma_regularizer (Optional[op_conf_util.RegularizerConf], optional): Regularizer for gamma. Defaults to None.
        moving_mean_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for moving mean. Defaults to None.
        moving_variance_initializer (Optional[op_conf_util.InitializerConf], optional): Initializer for moving variance. Defaults to None.
        trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
        training (bool, optional): A boolean specifies whether now is training the model. Defaults to True.
        name (Optional[str], optional): This layer's name. Defaults to None.

    Returns:
        remote_blob_util.BlobDef:  A `Blob` with same shape of input.

    Raises:
        ValueError: If axis is out of dimension of input.
    """
    if axis < 0:
        axis += len(inputs.shape)
    assert axis >= 0 and axis < len(inputs.shape)

    params_shape = [inputs.shape[axis]]
    # Float32 required to avoid precision-loss when using fp16 input/output
    params_dtype = flow.float32 if inputs.dtype == flow.float16 else inputs.dtype

    if not flow.current_global_function_desc().IsTrainable() or not trainable:
        training = False

    with flow.scope.namespace(name):
        if center:
            beta = flow.get_variable(
                name="beta",
                shape=params_shape,
                dtype=params_dtype,
                initializer=beta_initializer or flow.zeros_initializer(),
                regularizer=beta_regularizer,
                trainable=trainable,
                distribute=distribute_util.broadcast(),
                reuse=False,
            )
        else:
            beta = flow.constant(0,
                                 dtype=params_dtype,
                                 shape=params_shape,
                                 name="beta")

        if scale:
            gamma = flow.get_variable(
                name="gamma",
                shape=params_shape,
                dtype=params_dtype,
                initializer=gamma_initializer or flow.ones_initializer(),
                regularizer=gamma_regularizer,
                trainable=trainable,
                distribute=distribute_util.broadcast(),
                reuse=False,
            )
        else:
            gamma = flow.constant(1,
                                  dtype=params_dtype,
                                  shape=params_shape,
                                  name="gamma")

        moving_mean = flow.get_variable(
            name="moving_mean",
            shape=params_shape,
            dtype=params_dtype,
            initializer=moving_mean_initializer or flow.zeros_initializer(),
            trainable=False,
            distribute=distribute_util.broadcast(),
            reuse=False,
        )

        moving_variance = flow.get_variable(
            name="moving_variance",
            shape=params_shape,
            dtype=params_dtype,
            initializer=moving_variance_initializer or flow.ones_initializer(),
            trainable=False,
            distribute=distribute_util.broadcast(),
            reuse=False,
        )

    if flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu":
        if training:
            reduce_axis = []
            for dim in range(len(inputs.shape)):
                if dim != axis:
                    reduce_axis.append(dim)
            mean, variance = flow.nn.moments(inputs,
                                             reduce_axis,
                                             keepdims=False)

            def update_moving(moving, this_batch):
                moving_identity = flow.identity(moving)
                flow.assign(
                    moving,
                    momentum * moving_identity + (1 - momentum) * this_batch)

            update_moving(moving_mean, mean)
            update_moving(moving_variance, variance)

            return flow.nn.batch_normalization(
                x=inputs,
                mean=mean,
                variance=variance,
                offset=beta,
                scale=gamma,
                variance_epsilon=epsilon,
                axis=axis,
                name=name,
            )
        else:
            mean = moving_mean
            variance = moving_variance
            return flow.nn.batch_normalization(
                x=inputs,
                mean=mean,
                variance=variance,
                offset=beta,
                scale=gamma,
                variance_epsilon=epsilon,
                axis=axis,
                name=name,
            )
    else:
        builder = (flow.user_op_builder(name).Op("normalization").Input(
            "x", [inputs]).Input("moving_mean", [moving_mean]).Input(
                "moving_variance",
                [moving_variance]).Input("gamma", [gamma]).Input(
                    "beta", [beta]).Output("y").Attr("axis", axis).Attr(
                        "epsilon",
                        epsilon).Attr("training",
                                      training).Attr("momentum", momentum))
        if trainable and training:
            builder = builder.Output("mean").Output("inv_variance")

        return builder.Build().InferAndTryRun().RemoteBlobList()[0]
Exemplo n.º 5
0
def moving_average_min_max_observer(
    input: oneflow_api.BlobDesc,
    quantization_bit: int = 8,
    quantization_scheme: str = "symmetric",
    quantization_formula: str = "google",
    momentum: float = 0.95,
    name: Optional[str] = None,
) -> Tuple[oneflow_api.BlobDesc, oneflow_api.BlobDesc]:
    r"""Compute the quantization parameters based on the moving average of the input tensor's min and max values.

    First compute the moving\_max and moving\_min value of input tensor:

        if quantization_scheme == "symmetric": 

        .. math::

            & moving\_max = moving\_max * momentum + |max(input)| * (1 - momentum)

            & moving\_min = moving\_max

        elif quantization_scheme == "affine":

        .. math::

            & moving\_max = moving\_max * momentum + max(input) * (1 - momentum)

            & moving\_min = moving\_min * momentum + min(input) * (1 - momentum)

    The moving average of min and max values are initialized as the first batch of input `Blob`'s min and max.

    Then compute the scale and zero_point with the following equations:

        if quantization_scheme == "symmetric": 

        .. math::

            & denom = 2^{quantization\_to\_bit - 1} - 1
            
            & scale = moving\_max / denom

            & zero\_point = 0

        elif quantization_scheme == "affine":

        .. math::

            & denom = 2^{quantization\_to\_bit} - 1
            
            & scale = (moving\_max - moving\_min) / denom

            & zero\_point = -moving\_min / scale
    
    Args:
        input (oneflow_api.BlobDesc): input tensor.
        quantization_bit (int): Quantize input to uintX / intX, X can be in range [2, 8]. Defaults to 8. 
        quantization_scheme (str): "symmetric" or "affine", quantize to signed / unsigned integer. Defaults to "symmetric". 
        quantization_formula (str): Support "google" or "cambricon".
        momentum (float): Smoothing parameter for exponential moving average operation. Defaults to 0.95.
        name (Optional[str]): This operator's name. Defaults to None.

    Returns:
        Tuple[oneflow_api.BlobDesc, oneflow_api.BlobDesc]: The scale and zero_point of input tensor.
    
    For example: 

    .. code-block:: python 

        import oneflow as flow
        import numpy as np
        import oneflow.typing as tp

        @flow.global_function(type="predict", function_config=flow.FunctionConfig())
        def QuantizeJob(
            input: tp.Numpy.Placeholder(input_shape, dtype=type_name_to_flow_type[dtype])
        ): tp.Numpy
            with flow.scope.placement(device_type, "0:0"):
                scale, zero_point = flow.quantization.moving_average_min_max_observer(
                    input, quantization_bit=8,
                    quantization_scheme="symmetric",
                    quantization_formula="google",
                    momentum=0.95
                )
            return scale, zero_point

        input = (np.random.random(input_shape) - 0.5).astype(type_name_to_np_type[dtype])
        scale, zero_point = QuantizeJob(input)

    """
    op_name = (name if name is not None else
               id_util.UniqueStr("MovingAverageMinMaxObserver_"))

    training = True if flow.current_global_function_desc().IsTrainable(
    ) else False

    with flow.scope.namespace(op_name):
        moving_max = flow.get_variable(
            "moving_max",
            shape=(1, ),
            dtype=input.dtype,
            initializer=flow.zeros_initializer(input.dtype),
            trainable=False,
        )
        moving_min = flow.get_variable(
            "moving_min",
            shape=(1, ),
            dtype=input.dtype,
            initializer=flow.zeros_initializer(input.dtype),
            trainable=False,
        )
        current_train_step = flow.get_variable(
            "current_train_step",
            shape=(1, ),
            dtype=flow.int64,
            initializer=flow.zeros_initializer(flow.int64),
            trainable=False,
        )
    stop_update_after_iters = 1
    scale, zero_point = (flow.user_op_builder(op_name).Op(
        "moving_average_min_max_observer").Input("in", [input]).Input(
            "current_train_step",
            [current_train_step]).Input("moving_max", [moving_max]).Input(
                "moving_min",
                [moving_min]).Output("scale").Output("zero_point").Attr(
                    "training",
                    training).Attr("stop_update_after_iters",
                                   stop_update_after_iters).Attr(
                                       "quantization_bit",
                                       quantization_bit).Attr(
                                           "quantization_scheme",
                                           quantization_scheme).Attr(
                                               "quantization_formula",
                                               quantization_formula).Attr(
                                                   "momentum", momentum).
                         Build().InferAndTryRun().RemoteBlobList())

    return scale, zero_point
Exemplo n.º 6
0
def global_mode_current_scope():
    job_name = oneflow.current_global_function_desc().job_config_proto.job_name
    return session_ctx.GetDefaultSession().GetCurrentScope(job_name)