def __init__(self, inputx, indices, updates):
     super(TestScatterUpdateDynamicNet, self).__init__()
     self.scatter_update = P.ScatterUpdate()
     self.test_dynamic = inner.GpuConvertToDynamicShape()
     self.inputx = Parameter(inputx, name="inputx")
     self.indices = Parameter(indices, name="indices")
     self.updates = Parameter(updates, name="updates")
 def __init__(self, strategy1=None, strategy2=None):
     super(Net, self).__init__()
     self.inputs = Parameter(Tensor(np.ones([32, 64, 128]).astype(np.float32)), "input")
     self.indices = Tensor(np.ones([4, 8]).astype(np.int32))
     self.updates = Tensor(np.ones([4, 8, 64, 128]).astype(np.float32))
     self.scatter_update = P.ScatterUpdate().shard(strategy1)
     self.add = P.TensorAdd().shard(strategy2)
     self.relu = P.ReLU()
示例#3
0
def _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking,
                         use_nesterov, target, beta1_power, beta2_power, beta1,
                         beta2, eps, lr, gradient, params, m, v, ps_parameter,
                         cache_enable):
    """Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse."""
    success = True
    indices = gradient.indices
    values = gradient.values
    if ps_parameter and not cache_enable:
        op_shape = P.Shape()
        shapes = (op_shape(params), op_shape(m), op_shape(v),
                  op_shape(beta1_power), op_shape(beta2_power), op_shape(lr),
                  op_shape(beta1), op_shape(beta2), op_shape(eps),
                  op_shape(values), op_shape(indices))
        success = F.depend(
            success,
            pull(
                push((beta1_power, beta2_power, lr, beta1, beta2, eps, values,
                      indices), shapes), params))
        return success

    if not target:
        success = F.depend(
            success,
            sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1,
                       beta2, eps, values, indices))
    else:
        op_gather = P.Gather()
        op_sqrt = P.Sqrt()
        scatter_add = P.ScatterAdd(use_locking)
        scatter_update = P.ScatterUpdate(use_locking)

        m_slice = op_gather(m, indices, 0)
        v_slice = op_gather(v, indices, 0)

        next_m = m_slice * beta1 + values * (1 - beta1)
        next_v = v_slice * beta2 + values * values * (1 - beta2)

        lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power)

        if use_nesterov:
            m_temp = beta1 * next_m + values * (1 - beta1)
            param_update = m_temp / (op_sqrt(next_v) + eps)
        else:
            param_update = next_m / (op_sqrt(next_v) + eps)

        success = F.depend(success,
                           scatter_add(params, indices, -lr_t * param_update))
        success = F.depend(success, scatter_update(m, indices, next_m))
        success = F.depend(success, scatter_update(v, indices, next_v))

    return success
 def __init__(self, inputx, indices, updates):
     super(TestScatterUpdateNet, self).__init__()
     self.scatter_update = P.ScatterUpdate()
     self.inputx = Parameter(inputx, name="inputx")
     self.indices = Parameter(indices, name="indices")
     self.updates = Parameter(updates, name="updates")
示例#5
0
 def __init__(self, input_x):
     super(ScatterUpdateNet, self).__init__()
     self.input_x = Parameter(input_x, name="para")
     self.scatter_update = P.ScatterUpdate()
示例#6
0
 def __init__(self):
     super(TestScatterUpdateDynamicNet2, self).__init__()
     self.scatter_update = P.ScatterUpdate()
     self.test_dynamic = inner.GpuConvertToDynamicShape()