def GenerateMulCols(emitter, result_type, lhs_add, rhs_add, aligned, cols,
                    leftovers):
    """Emits code responsible for multiplication of one horizontal lhs strip."""
    emitter.EmitOpenBracket('for (int i = 0; i < col_chunks; ++i)')
    emitter.EmitCall(
        zip_Nx8_neon.BuildName(4, leftovers, aligned),
        ['rhs_chunk', 'k', 'k', 'zipped_rhs_1', 'lhs_offset', 'const_offset'])
    emitter.EmitAssignIncrement('rhs_chunk', 'chunk_size')

    emitter.EmitCall(
        zip_Nx8_neon.BuildName(4, leftovers, aligned),
        ['rhs_chunk', 'k', 'k', 'zipped_rhs_2', 'lhs_offset', 'const_offset'])
    emitter.EmitAssignIncrement('rhs_chunk', 'chunk_size')

    emitter.EmitCall(
        mul_1x8_Mx8_neon.BuildName(result_type, lhs_add, rhs_add, 8),
        GetMul2Params(result_type))

    emitter.EmitAssignIncrement('mul_result_chunk', 8)
    emitter.EmitCloseBracket()

    if cols > 4:
        emitter.EmitCall(zip_Nx8_neon.BuildName(4, leftovers, aligned), [
            'rhs_chunk', 'k', 'k', 'zipped_rhs_1', 'lhs_offset', 'const_offset'
        ])
        emitter.EmitAssignIncrement('rhs_chunk', 'chunk_size')

        emitter.EmitCall(zip_Nx8_neon.BuildName(cols - 4, leftovers, aligned),
                         [
                             'rhs_chunk', 'k', 'k', 'zipped_rhs_2',
                             'lhs_offset', 'const_offset'
                         ])

        emitter.EmitCall(
            mul_1x8_Mx8_neon.BuildName(result_type, lhs_add, rhs_add, cols),
            GetMul2Params(result_type))
    elif cols > 0:
        emitter.EmitCall(zip_Nx8_neon.BuildName(cols, leftovers, aligned), [
            'rhs_chunk', 'k', 'k', 'zipped_rhs_1', 'lhs_offset', 'const_offset'
        ])

        emitter.EmitCall(
            mul_Nx8_Mx8_neon.BuildName(result_type, lhs_add, rhs_add, 1, cols),
            GetMulParams(result_type))
Пример #2
0
def ZipName(rows, leftovers, aligned):
    return zip_Nx8_neon.BuildName(rows, leftovers, aligned)
def GenerateZipVector(emitter, aligned, leftovers):
    emitter.EmitCall(zip_Nx8_neon.BuildName(1, leftovers, aligned),
                     ['lhs', 'k', 'k', 'zipped_lhs', 'rhs_offset', 0])