Exemplo n.º 1
0
def test_best_block_config(
    test_id,
    op_type,
    activation,
    kernel,
    stride,
    dilation,
    padding,
    in_shape,
    out_shape,
    layouts,
    acc_config,
    expected_block_configs,
):
    nhwc_to_nhcwb16 = [
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 1 / 16, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 0, 16],
        [0, 0, 0, 0, 1],
    ]
    nhcwb16_to_nhwc = [
        [1, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0],
        [0, 0, 16, 0, 1, -16],
        [0, 0, 0, 0, 0, 1],
    ]
    ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices(
        op_type, kernel, stride, padding, layouts[0], layouts[1], dilation,
        in_shape[3])

    ofm_channels = out_shape[3]
    ifm_channels = in_shape[3]

    if layouts[0] == "NHCWB16":
        in_shape = [
            int(math.ceil(n))
            for n in np.matmul(nhwc_to_nhcwb16, in_shape + (1, )).tolist()[:-1]
        ]
    if layouts[1] == "NHCWB16":
        out_shape = [
            int(math.ceil(n)) for n in np.matmul(nhwc_to_nhcwb16, out_shape +
                                                 (1, )).tolist()[:-1]
        ]

    propagator = cs.Propagator(ifm_matrix, ifm_offset)
    weight_propagator = cs.Propagator(weight_matrix, weight_offset)

    subkernels = ((kernel[0] + 7) // 8) * ((kernel[1] + 7) // 8)

    op_attrs = {
        "op": op_type,
        "activation": activation,
        "stride_h": stride[0],
        "stride_w": stride[1],
        "dilation_h": dilation[0],
        "dilation_w": dilation[1],
    }

    device_config = cs.EthosuDeviceConfig(acc_config)
    block_configs = device_config.get_valid_block_configs(
        propagator,
        op_attrs,
        out_shape,
        ofm_channels,
        ifm_channels,
        layouts[1],
        layouts[0],
        "int8",
        "int8",
        kernel[0],
        kernel[1],
    )

    output_quantum = [1, 1, 2, 8]
    if layouts[1] == "NHCWB16":
        output_quantum = [1, 1, 1, 2, 8]

    # Create EthosUPart
    te_subgraph = cs.TESubgraph([], None)
    part = cs.EthosuPart(
        te_subgraph,
        [propagator, weight_propagator],
        output_quantum,
        subkernels,
        block_configs,
        1,
    )

    order = [1, 2, 3, 4] if layouts[1] == "NHCWB16" else [1, 2, 4, 3, 0]
    stripes = [1] * len(output_quantum)
    offset = [0] * len(output_quantum)

    stripe_config = cs.StripeConfig(out_shape, out_shape, out_shape, order,
                                    stripes, offset)

    block = part.get_block_config(stripe_config)
    block_shape = tuple(int(a) for a in block.output_shape)

    assert block_shape in expected_block_configs[test_id]
Exemplo n.º 2
0
def test_conv_performance(
    accelerator,
    op_type,
    activation,
    kernel,
    stride,
    dilation,
    padding,
    in_shape,
    out_shape,
    block_shape,
    input_block_shape,
    expected,
):
    ifm_channels = in_shape[3]
    ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices(
        op_type,
        kernel,
        stride,
        padding,
        "NHWC",
        "NHWC",
        dilation,
        ifm_channels,
    )

    propagator = cs.Propagator(ifm_matrix, ifm_offset)
    weight_propagator = cs.Propagator(weight_matrix, weight_offset)

    subkernels = ((kernel[0] + 7) // 8) * ((kernel[1] + 7) // 8)

    device_config = cs.EthosuDeviceConfig(accelerator)

    output_cycles = device_config._get_output_cycles(op_type, "", "int8", "int8", activation)
    output_cycles *= reduce(lambda a, b: a * b, block_shape, 1)
    is_partkernel = device_config.is_partkernel(
        op_type, ifm_channels, "int8", kernel[0] * kernel[1]
    )
    compute_cycles = device_config._estimate_compute_cycles_per_block(
        op_type,
        _Shape(block_shape),
        _Shape(input_block_shape),
        kernel[0],
        kernel[1],
        ifm_channels,
        "int8",
        is_partkernel,
    )
    block_configs = [
        cs.BlockConfig(input_block_shape, block_shape, compute_cycles, int(output_cycles))
    ]

    output_quantum = [1, 1, 2, 8]
    te_subgraph = cs.TESubgraph([], None)
    part = cs.EthosuPart(
        te_subgraph,
        [propagator, weight_propagator],
        output_quantum,
        subkernels,
        block_configs,
        1,
    )
    part.set_input(0, cs.Tensor(in_shape, "int8"))
    part.set_input(1, cs.Tensor([ifm_channels, kernel[0], kernel[1], out_shape[-1]], "int8"))
    part.set_output(cs.Tensor(out_shape, "int8"))

    stripes = [1] * len(output_quantum)
    offset = [0] * len(output_quantum)
    order = [1, 2, 3, 4]

    stripe_config = cs.StripeConfig(out_shape, out_shape, out_shape, order, stripes, offset)

    compute_cycles = part.get_performance_info(stripe_config, cs.BufferMode.ROLLING).compute_cycles
    tolerance = expected * 0.1

    assert expected - tolerance <= compute_cycles <= expected + tolerance