def compute_expect(data_sum, in_bn, head_active, in_active, layout):
    out_dtype = data_sum.dtype
    relugrad = relu_grad_np(head_active, in_active).astype(out_dtype)
    inbn_cast = in_bn.astype(out_dtype)
    bn_beta_ad = bn_beta_grad_np(relugrad, layout)
    bn_gamma_ad = bn_gamma_grad_np(relugrad, inbn_cast, data_sum, layout)
    return [bn_gamma_ad, bn_beta_ad]
def compute_expect(inshp_data, outshp_data):
    out_shape = outshp_data.shape
    scale = out_shape[0] * out_shape[1] * out_shape[2]
    mul = np.multiply(inshp_data, inshp_data)
    mean1 = np.divide(mul, scale)

    add = np.add(outshp_data, outshp_data)
    addgrad = relu_grad_np(add, outshp_data).astype(inshp_data.dtype)
    mul1 = np.multiply(addgrad, scale)
    sub = np.subtract(mul1, inshp_data)

    outdata_cast = outshp_data.astype(inshp_data.dtype)
    mean2 = np.divide(inshp_data, scale)
    sub1 = np.subtract(outdata_cast, mean2)
    mul2 = np.multiply(sub1, inshp_data)
    div = np.divide(mul2, inshp_data)
    sub2 = np.subtract(sub, div)
    mul3 = np.multiply(mean1, sub2).astype(outshp_data.dtype)

    mul4 = np.multiply(inshp_data, inshp_data)
    mean3 = np.divide(mul4, scale)
    mean4 = np.divide(inshp_data, scale)
    sub3 = np.subtract(outshp_data.astype(inshp_data.dtype), mean4)
    mul5 = np.multiply(inshp_data, sub3)

    div1 = np.divide(mul5, inshp_data)
    sub4 = np.subtract(sub, div1)
    mul6 = np.multiply(mean3, sub4).astype(outshp_data.dtype)
    return [mul3, mul6]