def apply_proximal_adagrad_d(var,
                             accum,
                             lr,
                             l1,
                             l2,
                             grad,
                             var_out,
                             accum_out,
                             use_locking=False,
                             kernel_name="apply_proximal_adagrad_d"):
    """
    Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.

    Parameters
    ----------
    var: dict
        input tensor contains shape and dtype attributes.
        only support float16, float32.
    accum: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    lr: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    l1: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    l2: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    grad: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    var_out: dict
        output tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    accum_out: dict
        output tensor contains shape and dtype attributes.
        Must have the same type as 'accum'.
    use_locking: bool
        default value is "False"
    kernel_name: str
        kernel name, default value is "apply_proximal_adagrad_d"

    Returns:
    None
    """
    _check_shape_is_same(var, accum, grad)

    input_dict = (var, accum, lr, l1, l2, grad)
    args = ApplyOpConfig.TensorArgs(input_dict,
                                    apply_proximal_adagrad_d_compute,
                                    [var_out, accum_out], 15)
    name = ApplyOpConfig.TensorName(all=('var', 'accum', 'lr', 'l1', 'l2',
                                         'grad'),
                                    scalar=('lr', 'l1', 'l2'),
                                    reuse=('var', 'accum'))
    common_apply_op_process(ApplyOpConfig(args, name), kernel_name)
示例#2
0
def apply_adadelta_d(var,
                     accum,
                     accum_update,
                     lr,
                     rho,
                     epsilon,
                     grad,
                     var_out,
                     accum_out,
                     accum_update_out,
                     kernel_name="apply_adadelta_d"):
    """
    Update '*var' according to the adadelta scheme.

    accum = rho * accum + (1 - rho) * grad ** 2
    update = (update_accum + epsilon).sqrt() * (accum + epsilon).rsqrt() * grad
    update_accum = rho * update_accum + (1 - rho) * update.square();
    var -= update * lr;

    Parameters:
    ----------
    var: the dict of input, only support float16, float32

    accum: the dict of accum, only support float16, float32

    accum_update: the dict of accum_update, only support float16, float32

    lr: the dict of lr, only support float16, float32

    rho: the dict of rho, only support float16, float32

    epsilon: the dict of epsilon, only support float16, float32

    grad: the dict of grad, only support float16, float32

    var_out: the dict of var output data

    accum_out: the dict of accum output data

    accum_update_out: the dict of accum_update output data

    kernel_name : cce kernel name, default value is "apply_adadelta_d"

    Returns
    -------
    None
    """
    input_dict = (var, accum, accum_update, lr, rho, epsilon, grad)

    args = ApplyOpConfig.TensorArgs(input_dict, apply_adadelta_d_compute,
                                    [var_out, accum_out, accum_update_out], 16)
    name = ApplyOpConfig.TensorName(all=('var', 'accum', 'accum_update', 'lr',
                                         'rho', 'epsilon', 'grad'),
                                    scalar=('lr', 'rho', 'epsilon'),
                                    reuse=('var', 'accum', 'accum_update'))

    common_apply_op_process(ApplyOpConfig(args, name), kernel_name)
示例#3
0
def apply_power_sign_d(var,
                       m,
                       lr,
                       logbase,
                       sign_decay,
                       beta,
                       grad,
                       var_out,
                       m_out,
                       kernel_name="apply_power_sign_d"):
    """
    Update '*var' according to the AddSign update

    Parameters:
    ----------
    var: dict of Variable, only support float16, float32
    m : dict of input_grad, only support float16, float32
    lr : dict of lr, only support float16, float32
    logbase : dict of logbase, only support float16, float32
    sign_decay : dict of sign_decay, only support float16, float32
    grad : dict of grad, only support float16, float32
    beta : dict of beta, only support float16, float32
    var_out : dict of output, only support float16, float32
    m_out : dict of output, only support float16, float32
    kernel_name : cce kernel name, default value is apply_power_sign

    Algorithm :
    ----------
    m_t <- beta * m_{t-1} + (1 - beta) * grad
    update <- exp(logbase * sign_decay * sign(grad) * sign(m_t)) * grad
    variable <- variable - lr_t * update


    Returns
    ----------
    None
    """
    input_dict = (var, m, lr, logbase, sign_decay, beta, grad)

    check_list = ('float16', 'float32')
    dtype = var.get('dtype')
    check_dtype(dtype, check_list, param_name="var")
    dtype = dtype.lower()

    args = ApplyOpConfig.TensorArgs(input_dict, apply_power_sign_d_compute,
                                    [var_out, m_out],
                                    6 if dtype == 'float32' else 10)
    name = ApplyOpConfig.TensorName(all=('var', 'm', 'lr', 'logbase',
                                         'sign_decay', 'beta', 'grad'),
                                    scalar=('lr', 'logbase', 'sign_decay',
                                            'beta'),
                                    reuse=('m', 'var'))

    common_apply_op_process(ApplyOpConfig(args, name), kernel_name)
示例#4
0
def apply_proximal_gradient_descent(
        var,
        alpha,
        l1,
        l2,
        delta,
        out,
        kernel_name="apply_proximal_gradient_descent"):
    """
    Update '*var' as FOBOS algorithm with fixed learning rate..

    prox_v = var - alpha * delta
    var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}

    Parameters:
    ----------
    var: the dict of var, only support float16, float32
    alpha: the dict of alpha, only support float16, float32
    l1: the dict of l1, only support float16, float32
    l2: the dict of l2, only support float16, float32
    delta: the dict of delta, only support float16, float32
    out: the dict of output, only support float16, float32

    kernel_name : cce kernel name, default value is
        "apply_proximal_gradient_descent"

    Returns
    -------
    None
    """

    check_list = ('float16', 'float32')
    dtype = var.get('dtype')
    check_dtype(dtype, check_list, param_name="var")
    dtype = dtype.lower()

    input_dict = (var, alpha, l1, l2, delta)

    args = ApplyOpConfig.TensorArgs(input_dict,
                                    apply_proximal_gradient_descent_compute,
                                    out, 5 if dtype == 'float32' else 10)
    name = ApplyOpConfig.TensorName(all=('var', 'alpha', 'l1', 'l2', 'delta'),
                                    scalar=('alpha', 'l1', 'l2'),
                                    reuse=('var', ))
    options = ApplyOpConfig.TensorOptions(build=set_bool_storage_config())

    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
示例#5
0
def apply_add_sign_d(var,
                     m,
                     lr,
                     alpha,
                     sign_decay,
                     beta,
                     grad,
                     var_out,
                     m_out,
                     kernel_name="apply_add_sign_d"):
    """
    Update '*var' according to the AddSign update.

    m_t <- beta1 * m_{t-1} + (1 - beta1) * g
    update <- (alpha + sign_decay * sign(g) *sign(m)) * g
    variable <- variable - lr_t * update

    Parameters:
    ----------
    var: the dict of var, support float16, float32
    m: the dict of m, support float16, float32
    lr: the dict of lr, support float16, float32
    alpha: the dict of alpha, support float16, float32
    sign_decay: the dict of sign_decay, support float16, float32
    beta: the dict of beta, support float16, float32
    grad: the dict of grad, support float16, float32
    var_out: the dict of var output data
    m_out: the dict of m output data
    otherwise the behavior is undefined, but may exhibit less contention.
    kernel_name : cce kernel name, default value is "apply_add_sign_d"

    Returns
    -------
    None
    """

    input_dict = (var, m, lr, alpha, sign_decay, beta, grad)
    out = [var_out, m_out]
    args = ApplyOpConfig.TensorArgs(input_dict, apply_add_sign_d_compute, out,
                                    10)
    name = ApplyOpConfig.TensorName(all=('var', 'm', 'lr', 'alpha',
                                         'sign_decay', 'beta', 'grad'),
                                    scalar=('lr', 'alpha', 'sign_decay',
                                            'beta'),
                                    reuse=('var', 'm'))
    options = ApplyOpConfig.TensorOptions(build=set_bool_storage_config())
    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
示例#6
0
文件: sgd.py 项目: gekowa/ascend-opp
def sgd(parameters,
        gradient,
        learning_rate,
        accum,
        momentum,
        stat,
        update,
        dampening,
        weight_decay,
        nesterov,
        kernel_name="sgd"):
    """
    Update '*parameters' according to the SGD algorithm.

    accum = accum * momentum + grad
    if use_nesterov is True:
        parameters -= grad * lr + accum * momentum * lr
    else:
        parameters -= accum * lr

    Parameters:
    ----------
    parameters : mutable tensor parameters.

    gradient : tensor grad.

    learning_rate : scalar lr.

    accum: mutable tensor accum.

    momentum : scalar momentum.

    stat : mutable tensor stat.

    update: out dict.

    dampening: (float, optional): dampening for momentum (default: 0)

    weight_decay: weight decay (L2 penalty) (default: 0)

    nesterov: bool. If true, use nesterov computing grad,
    default value is False.

    kernel_name : cce kernel name, default value is "sgd" (optional).

    Returns:
    -------
    None
    """
    if nesterov and dampening != 0:
        raise RuntimeError("Nesterov requires zero dampening!")
    if weight_decay < 0:
        raise RuntimeError("weight_decay must >=0.")

    input_dict = (parameters, gradient, learning_rate, accum, momentum, stat)
    args = ApplyOpConfig.TensorArgs(
        input_dict,
        sgd_compute,
        update,
        17 if nesterov else 9,
    )

    name = ApplyOpConfig.TensorName(all=('parameters', 'gradient',
                                         'learning_rate', 'accum', 'momentum',
                                         'stat'),
                                    scalar=('learning_rate', 'momentum'),
                                    reuse=('accum', 'parameters', 'stat'))
    options = ApplyOpConfig.TensorOptions(
        attrs=[dampening, weight_decay, nesterov])
    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
示例#7
0
def apply_ftrl_d(var,
                 accum,
                 linear,
                 grad,
                 lr,
                 l1,
                 l2,
                 lr_power,
                 var_out,
                 accum_out,
                 linear_out,
                 kernel_name="apply_ftrl_d"):
    """
    Update '*var' according to the Ftrl-proximal algorithm.
    accum_new = accum + grad * grad
    linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
    quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
    var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
    accum = accum_new

    Parameters:
    ----------
    var : the dict of mutable tensor var, only support float16, float32

    accum : the dict of mutable tensor accum.
        Must have the same data type as `var`.

    linear : the dict of mutable tensor linear.
        Must have the same data type as `var`.

    grad : the dict of tensor grad. Must have the same data type as `var`.

    lr : the dict of scalar lr. Must have the same data type as `var`.

    l1 : the dict of scalar l1. Must have the same data type as `var`.

    l2 : the dict of scalar l2. Must have the same data type as `var`.

    lr_power : the dict of scalar lr_power.
        Must have the same data type as `var`.

    var_out: the dict of var output data.

    accum_out: the dict of accum output data.

    linear_out: the dict of linear output data

    kernel_name : cce kernel name, default value is "apply_ftrl_d".

    Returns
    -------
    None
    """

    input_dict = (var, accum, linear, grad, lr, l1, l2, lr_power)
    out = [var_out, accum_out, linear_out]
    args = ApplyOpConfig.TensorArgs(input_dict, apply_ftrl_d_compute, out, 15)
    name = ApplyOpConfig.TensorName(all=('var', 'accum', 'linear', 'grad',
                                         'lr', 'l1', 'l2', 'lr_power'),
                                    scalar=('lr', 'l1', 'l2', 'lr_power'),
                                    reuse=('var', 'accum', 'linear'))
    options = ApplyOpConfig.TensorOptions(build=set_bool_storage_config())

    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
示例#8
0
def apply_momentum_d(var,
                     accum,
                     lr,
                     grad,
                     momentum,
                     var_out,
                     accum_out,
                     use_nesterov=False,
                     kernel_name="apply_momentum_d"):
    """
    Update '*var' according to the ApplyMomentum algorithm.

    accum = accum * momentum + grad
    if use_nesterov is True:
        var -= gard * lr + accum * momentum * lr
    else:
        var -= accum * lr

    Parameters:
    ----------
    var : the dict of mutable tensor var, only support float16, float32.

    accum : the dict of mutable tensor accum.
        Must have the same data type as `var`.

    lr : the dict of scalar lr. Must have the same data type as `var`.

    grad : the dict of tensor grad. Must have the same data type as `var`.

    momentum : the dict of scalar momentum.
        Must have the same data type as `var`.

    var_out : the dict of output var.

    accum_out : the dict of output accum.

    use_nesterov: bool. If true, use nesterov computing grad,
        default value is False.

    kernel_name : cce kernel name, default value is "apply_momentum_d".

    Returns
    -------
    None
    """

    input_dict = (var, accum, lr, grad, momentum)

    args = ApplyOpConfig.TensorArgs(
        input_dict,
        apply_momentum_compute_d,
        [var_out, accum_out],
        8 if use_nesterov else 6,
    )
    name = ApplyOpConfig.TensorName(all=('var', 'accum', 'lr', 'grad',
                                         'momentum'),
                                    scalar=('lr', 'momentum'),
                                    reuse=('accum', 'var'))
    options = ApplyOpConfig.TensorOptions(attrs=use_nesterov)

    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
示例#9
0
def apply_ftrl_v2_d(var,
                    accum,
                    linear,
                    grad,
                    lr,
                    l1,
                    l2,
                    l2_shrinkage,
                    lr_power,
                    var_out,
                    accum_out,
                    linear_out,
                    use_locking=False,
                    kernel_name="apply_ftrl_v2_d"):
    """
    Update '*var' according to the Ftrl-proximal algorithm.

    grad_with_shrinkage = grad + 2 * l2_shrinkage * var
    accum_new = accum + grad * grad
    linear += grad_with_shrinkage -
        (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
    x = l1 * linear.sign - linear
    y = accum_new^(-lr_power) / lr + 2 * l2
    var = x / y if |linear| > l1 else 0.0
    accum = accum_new

    Parameters:
    ----------
    var : the dict of mutable tensor var, only support float16, float32

    accum : the dict of mutable tensor accum.
        Must have the same data type as `var`.

    linear : the dict of mutable tensor linear.
        Must have the same data type as `var`.

    grad : the dict of tensor grad. Must have the same data type as `var`.

    lr : the dict of scalar lr. Must have the same data type as `var`.

    l1 : the dict of scalar l1. Must have the same data type as `var`.

    l2 : the dict of scalar l2. Must have the same data type as `var`.

    l2_shrinkage: the dict of scalar l2_shrinkage.
        Must have the same data type as `var`.

    lr_power : the dict of scalar lr_power.
        Must have the same data type as `var`.

    var_out : the dict of output var.

    accum_out : the dict of output accum.

    linear_out : the dict of output linear.

    use_locking : optional attr, default value is False.

    kernel_name : cce kernel name, default value is "apply_ftrl_v2_d".

    Returns
    -------
    None
    """
    input_dict = (var, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power)

    args = ApplyOpConfig.TensorArgs(input_dict, apply_ftrl_v2_d_compute,
                                    [var_out, accum_out, linear_out], 15)
    name = ApplyOpConfig.TensorName(all=('var', 'accum', 'linear', 'grad',
                                         'lr', 'l1', 'l2', 'l2_shrinkage',
                                         'lr_power'),
                                    scalar=('lr', 'l1', 'l2', 'l2_shrinkage',
                                            'lr_power'),
                                    reuse=('var', 'accum', 'linear'))
    options = ApplyOpConfig.TensorOptions(build=set_bool_storage_config())
    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
示例#10
0
def apply_adagrad_da_d(var,
                       gradient_accumulator,
                       gradient_squared_accumulator,
                       grad,
                       lr,
                       l1,
                       l2,
                       global_step,
                       var_out,
                       gradient_accumulator_out,
                       gradient_squared_accumulator_out,
                       use_locking=False,
                       kernel_name='apply_adagrad_da_d'):
    """
    Update '*var' according to the Ftrl-proximal algorithm.

    grad_accum += grad
    grad_squared_accum += grad * grad
    tmp_val=sign(grad_accum) * max⁡{|grad_accum|-l1*global_step, 0}
        if l1>0 else grad_accum
    x_value = -1 * lr * tmp_val
    y_value = l2 * global_step * lr + sqrt(grad_squared_accum)
    var = x_value / y_value

    Parameters:
    ----------
    var : the dict of mutable tensor var, only support float16, float32

    gradient_accumulator:
        the dict of mutable tensor gradient_accumulator,
        Must have the same data type as `var`.

    gradient_squared_accumulator :
        the dict of mutable tensor gradient_squared_accumulator,
        Must have the same data type as `var`.

    grad : the dict of tensor grad. Must have the same data type as `var`.

    lr : the dict of scalar lr. Must have the same data type as `var`.

    l1 : the dict of scalar l1. Must have the same data type as `var`.

    l2 : the dict of scalar l2. Must have the same data type as `var`.

    global_step : the dict of scalar global_step, only support int32.

    var_out : the dict of output.

    gradient_accumulator_out : the dict of output.

    gradient_squared_accumulator_out : the dict of output.

    use_locking : optional attr, default value is False.

    kernel_name : cce kernel name, default value is "apply_adagrad_da".

    Returns:
    -------
    None
    """
    # check dtype same
    stype_dict = (var, gradient_accumulator, gradient_squared_accumulator,
                  grad, lr, l1, l2)
    normalized_dtype_list = [None] * len(stype_dict)
    for i, d in enumerate(stype_dict):
        dtype = d.get('dtype')
        normalized_dtype_list[i] = dtype.lower()
    if any(elem != normalized_dtype_list[0] for elem in normalized_dtype_list):
        raise RuntimeError("All input data types must be the same")

    # check global_step dtype
    dtype = global_step.get("dtype").lower()
    check_dtype(dtype, ("int32", ), param_name="global_step")

    input_dict = (var, gradient_accumulator, gradient_squared_accumulator,
                  grad, lr, l1, l2, global_step)
    args = ApplyOpConfig.TensorArgs(
        input_dict, apply_adagrad_da_d_compute,
        [var_out, gradient_accumulator_out, gradient_squared_accumulator_out],
        15)
    name = ApplyOpConfig.TensorName(
        all=('var', 'gradient_accumulator', 'gradient_squared_accumulator',
             'grad', 'lr', 'l1', 'l2', 'global_step'),
        scalar=('lr', 'l1', 'l2', 'global_step'),
        reuse=('var', 'gradient_accumulator', 'gradient_squared_accumulator'))
    options = ApplyOpConfig.TensorOptions(build=set_bool_storage_config(),
                                          dtype=('float16', 'float32',
                                                 'int32'))
    common_apply_op_process(ApplyOpConfig(args, name, options),
                            kernel_name,
                            same_flag=False)
示例#11
0
def apply_centered_rms_prop_d(var,
                              mg,
                              ms,
                              mom,
                              lr,
                              rho,
                              momentum,
                              epsilon,
                              grad,
                              var_out,
                              mg_out,
                              ms_out,
                              mom_out,
                              kernel_name="apply_centered_rms_prop_d"):
    """
    Update '*var' according to the centered RMSProp algorithm.

    mean_square = decay * mean_square + (1-decay) * gradient ** 2
    mean_grad = decay * mean_grad + (1-decay) * gradient
    Delta = learning_rate*gradient/sqrt(mean_square+epsilon-mean_grad**2)
    mg_{t} <- rho * mg_{t-1} + (1-rho) * grad
    ms_{t} <- rho * ms_{t-1} + (1-rho) * grad * grad
    mom_{t} <- momentum*mom_{t-1}+lr*grad/sqrt(ms_{t}-mg{t}*mg{t}+epsilon)
    var_{t} <- var_{t-1} - mom_{t}

    Parameters:
    ----------
    var: dict of tensor var, include shape and dtype,
        dtype support float16 and float32.

    mg: dict of tensor mg(mean_grad), include shape and dtype,
        dtype support float16 and float32.

    ms: dict of tensor ms(mean_square), include shape and dtype,
        dtype support float16 and float32.

    mom: dict of tensor mom, include shape and dtype,
        dtype support float16 and float32.

    lr: dict of scalar lr(learning rate). Must have the same dtype as var.

    rho: dict of scalar rho(decay rate). Must have the same dtype as var.

    momentum: dict of scalar momentum. Must have the same dtype as var.

    epsilon: dict of scalar epsilon. Must have the same dtype as var.

    grad: dict of tensor grad. Must have the same dtype as var.

    var_out: the dict of var output, only support float16, float32

    mg_out: the dict of mg output, only support float16, float32

    ms_out: the dict of ms output, only support float16, float32

    mom_out: the dict of mom output, only support float16, float32

    kernel_name : cce kernel name, default value is "apply_centered_rms_prop_d".

    Returns
    -------
    None
    """

    input_dict = (var, mg, ms, mom, lr, rho, momentum, epsilon, grad)
    out = [var_out, mg_out, ms_out, mom_out]
    check_list = ('float16', 'float32')
    dtype = var.get('dtype')
    check_dtype(dtype, check_list, param_name="var")
    dtype = dtype.lower()

    args = ApplyOpConfig.TensorArgs(input_dict,
                                    apply_centered_rms_prop_d_compute, out,
                                    6 if dtype == "float32" else 12)
    name = ApplyOpConfig.TensorName(all=('var', 'mg', 'ms', 'mom', 'lr', 'rho',
                                         'momentum', 'epsilon', 'grad'),
                                    scalar=('lr', 'rho', 'momentum',
                                            'epsilon'),
                                    reuse=('mg', 'ms', 'mom', 'var'))

    common_apply_op_process(ApplyOpConfig(args, name), kernel_name)
示例#12
0
def apply_adam_d(var,
                 m,
                 v,
                 beta1_power,
                 beta2_power,
                 lr,
                 beta1,
                 beta2,
                 epsilon,
                 grad,
                 var_out,
                 m_out,
                 v_out,
                 use_locking=False,
                 use_nesterov=False,
                 kernel_name="apply_adam_d"):
    """
    the opreator's compute
    lr_t = learning_rate*(sqrt(1-beta2_power)) / (1-beta1_power)
    m_t = m + (1-beta1)*(grad-m)
    v_t = v + (1-beta2)*(grad*grad-v)
    if use_nesterov == True:
        var_t = var - lr_t*(m_t*beta1 + (1-beta1)*grad) / (epsilon + sqrt(v_t))
    else:
        vat_t = var - lr_t*m_t / (epsilon + sqrt(v_t))
    Parameters:
    ----------
    var: dict
        input tensor contains shape and dtype attributes.
        only support float16, float32.
    m: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    v: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    beta1_power: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    beta2_power: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    lr: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    beta1: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    beta2: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    epsilon: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    grad: dict
        input tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    var_out: dict
        output tensor contains shape and dtype attributes.
        Must have the same type as 'var'.
    m_out: dict
        output tensor contains shape and dtype attributes.
        Must have the same type as 'm'.
    v_out: dict
        output tensor contains shape and dtype attributes.
        Must have the same type as 'v'.
    use_locking: bool
        default value is "False".
    use_nesterov: bool
        default value is "False".
    kernel_name : str
        kernel name, default value is "apply_adam_d"

    Returns:
    None
    """
    input_dict = (var, m, v, beta1_power, beta2_power, lr, beta1, beta2,
                  epsilon, grad)

    args = ApplyOpConfig.TensorArgs(input_dict, apply_adam_d_compute,
                                    [var_out, m_out, v_out], 15)
    name = ApplyOpConfig.TensorName(all=('var', 'm', 'v', 'beta1_power',
                                         'beta2_power', 'lr', 'beta1', 'beta2',
                                         'epsilon', 'grad'),
                                    scalar=('lr', 'beta1_power', 'beta2_power',
                                            'beta1', 'beta2', 'epsilon'),
                                    reuse=('m', 'v', 'var'))
    options = ApplyOpConfig.TensorOptions(attrs=(use_nesterov))
    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
示例#13
0
def apply_ada_max_d(var,
                    m,
                    v,
                    beta1_power,
                    lr,
                    beta1,
                    beta2,
                    epsilon,
                    grad,
                    var_out,
                    m_out,
                    v_out,
                    kernel_name='apply_ada_max_d'):
    """
    Update '*var' according to the AdaMax algorithm.

    m_t <- beta1 * m_{t-1} + (1 - beta1) * g
    v_t <- max(beta2 * v_{t-1}, abs(g))
    variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)

    Parameters:
    ----------
    var : the dict of mutable tensor var. Must be one of the following data types:
          `float32`, `float16`.

    m: the dict of mutable tensor m. Must have the same data type as `var`.

    v : the dict of mutable tensor v. Must have the same data type as `var`.

    beta1_power : the dict of scalar beta1_power.
        Must have the same data type as `var`.

    lr : the dict of scalar lr. Must have the same data type as `var`.

    beta1 : the dict of scalar beta1. Must have the same data type as `var`.

    beta2 : the dict of scalar beta2. Must have the same data type as `var`.

    epsilon : the dict of scalar epsilon. Must have the same data type as `var`.

    grad : the dict of tensor grad. Must have the same data type as `var`.

    var_out : the dict of var output.

    m_out : the dict of m output.

    v_out : the dict of v output.

    kernel_name : cce kernel name, default value is "apply_ada_max" (optional).

    Returns:
    -------
    None
    """

    input_dict = (var, m, v, beta1_power, lr, beta1, beta2, epsilon, grad)

    args = ApplyOpConfig.TensorArgs(input_dict, apply_ada_max_d_compute,
                                    [var_out, m_out, v_out], 14)
    name = ApplyOpConfig.TensorName(all=('var', 'm', 'v', 'beta1_power', 'lr',
                                         'beta1', 'beta2', 'epsilon', 'grad'),
                                    scalar=('lr', 'beta1_power', 'beta1',
                                            'beta2', 'epsilon'),
                                    reuse=('m', 'v', 'var'))

    common_apply_op_process(ApplyOpConfig(args, name), kernel_name)
示例#14
0
def apply_keras_momentum_d(var,
                           accum,
                           lr,
                           grad,
                           momentum,
                           out_var,
                           out_accum,
                           use_locking=False,
                           use_nesterov=False,
                           kernel_name="apply_keras_momentum_d"):
    """
    Update '*var' according to the momentum scheme.

    accum = accum * momentum - grad * lr
    if use_nesterov is True:
        var = var + accum * momentum - grad * lr
    else:
        var = var + accum

    Parameters
    ----------
    var : dict of tensor var, include shape and dtype.

    accum : dict of tensor accum, include shape and dtype.

    lr: dict of scalar lr(learning rate), include shape and dtype.

    grad: dict of tensor grad, include shape and dtype.

    momentum: dict of scala, include shape and dtype.

    out_var: dict of updated var.

    out_accum: dict of updated accum.

    use_locking: bool, default value is "False",
                 if "True", var will be updated by using Nesterov momentum.

    use_nesterov: bool, default value is "False".

    kernel_name :  kernel name, default value is "apply_keras_momentum_d"

    Returns
    -------
    None
    """

    input_dict = (var, accum, lr, grad, momentum)

    args = ApplyOpConfig.TensorArgs(
        input_dict,
        apply_keras_momentum_d_compute,
        [out_var, out_accum],
        6 if use_nesterov else 5,
    )
    name = ApplyOpConfig.TensorName(all=('var', 'accum', 'lr', 'grad',
                                         'momentum'),
                                    scalar=('lr', 'momentum'),
                                    reuse=())
    options = ApplyOpConfig.TensorOptions(attrs=use_nesterov)

    common_apply_op_process(ApplyOpConfig(args, name, options), kernel_name)
示例#15
0
def fused_mul_apply_momentum_extern(var,
                                    accum,
                                    lr,
                                    x1,
                                    momentum,
                                    x2,
                                    var_copy,
                                    out_fp32,
                                    out_fp16,
                                    out_accum,
                                    use_nesterov=False,
                                    kernel_name="fused_mul_apply_momentum"):
    """
    Update '*var' according to the ApplyMomentum algorithm.

    accum = accum * momentum + x1 * x2
    if use_nesterov is True:
        var -= gard * lr + accum * momentum * lr
    else:
        var -= accum * lr

    Parameters:
    ----------
    var : the dict of mutable tensor var, Dtype is float32.

    accum: the dict of mutable tensor accum.

    lr : the dict of scalar lr.

    x1 : the dict of tensor grad.

    momentum : the dict of scalar momentum.

    x2 : the dict of tensor grad.

    var_copy : the dict of mutable tensor var, Dtype is float16.

    out_fp32 : the dict of output. Dtype is float32.

    out_fp16 : the dict of output. Dtype is float16.

    out_accum : the dict of output. Dtype is same as input accum.

    use_nesterov: bool. If true, use nesterov computing grad,
                 default value is False.

    kernel_name : cce kernel name, default value is "fused_mul_apply_momentum".

    Returns
    -------
    None
    """
    var_dtype = var.get("dtype")
    op_utils.check_dtype(var_dtype, ("float32", ), param_name="var")
    var_copy_dtype = var_copy.get("dtype")
    op_utils.check_dtype(var_copy_dtype, ("float16", ), param_name="var_copy")
    input_dict = (var, accum, lr, x1, momentum, x2, var_copy)
    outputs = [out_fp32, out_fp16, out_accum]

    args = ApplyOpConfig.TensorArgs(
        input_dict,
        _fused_mul_apply_momentum_extern_compute,
        outputs,
        10 if use_nesterov else 8,
    )
    name = ApplyOpConfig.TensorName(all=('var', 'accum', 'lr', 'x1',
                                         'momentum', 'x2', 'var_copy'),
                                    scalar=('lr', 'momentum', 'x2'),
                                    reuse=('accum', 'var', 'var_copy'))
    options = ApplyOpConfig.TensorOptions(attrs=use_nesterov)

    common_apply_op_process(ApplyOpConfig(args, name, options),
                            kernel_name,
                            same_flag=False)