Exemplo n.º 1
0
    def setUpClass(cls):
        # set up parser and kernels
        cls.parser_x86 = ParserX86ATT()
        cls.parser_AArch64 = ParserAArch64()
        with open(cls._find_file('kernel_x86.s')) as f:
            cls.code_x86 = f.read()
        with open(cls._find_file('kernel_aarch64.s')) as f:
            cls.code_AArch64 = f.read()
        cls.kernel_x86 = reduce_to_section(
            cls.parser_x86.parse_file(cls.code_x86), 'x86')
        cls.kernel_AArch64 = reduce_to_section(
            cls.parser_AArch64.parse_file(cls.code_AArch64), 'aarch64')

        # set up machine models
        cls.machine_model_csx = MachineModel(
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'csx.yml'))
        cls.machine_model_tx2 = MachineModel(
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'tx2.yml'))
        cls.semantics_csx = ArchSemantics(cls.machine_model_csx,
                                          path_to_yaml=os.path.join(
                                              cls.MODULE_DATA_DIR,
                                              'isa/x86.yml'))
        cls.semantics_tx2 = ArchSemantics(
            cls.machine_model_tx2,
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/aarch64.yml'),
        )
        cls.machine_model_zen = MachineModel(arch='zen1')

        for i in range(len(cls.kernel_x86)):
            cls.semantics_csx.assign_src_dst(cls.kernel_x86[i])
            cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i])
        for i in range(len(cls.kernel_AArch64)):
            cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
            cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
Exemplo n.º 2
0
    def test_marker_matching_AArch64(self):
        # preparation
        bytes_1_line = ".byte     213,3,32,31\n"
        bytes_2_lines_1 = ".byte     213,3,32\n" + ".byte 31\n"
        bytes_2_lines_2 = ".byte     213,3\n" + ".byte 32,31\n"
        bytes_2_lines_3 = ".byte     213\n" + ".byte 3,32,31\n"
        bytes_3_lines_1 = ".byte     213,3\n" + ".byte     32\n" + ".byte     31\n"
        bytes_3_lines_2 = ".byte     213\n" + ".byte     3,32\n" + ".byte     31\n"
        bytes_3_lines_3 = ".byte     213\n" + ".byte     3\n" + ".byte     32,31\n"
        bytes_4_lines = ".byte     213\n" + ".byte     3\n" + ".byte     32\n" + ".byte     31\n"
        mov_start_1 = "mov      x1, #111\n"
        mov_start_2 = "mov      x1, 111  // should work as well\n"
        mov_end_1 = "mov      x1, #222 // preferred way\n"
        mov_end_2 = "mov      x1, 222\n"
        prologue = ("mov x12, xzr\n" +
                    "\tldp x9, x10, [sp, #16]      // 8-byte Folded Reload\n" +
                    "     .p2align    6\n")
        kernel = (".LBB0_28:\n" + "fmul    v7.2d, v7.2d, v19.2d\n" +
                  "stp q0, q1, [x10, #-32]\n" + "b.ne    .LBB0_28\n")
        epilogue = ".LBB0_29:   //   Parent Loop BB0_20 Depth=1\n" + "bl    dummy\n"
        kernel_length = len(list(filter(None, kernel.split("\n"))))

        bytes_variations = [
            bytes_1_line,
            bytes_2_lines_1,
            bytes_2_lines_2,
            bytes_2_lines_3,
            bytes_3_lines_1,
            bytes_3_lines_2,
            bytes_3_lines_3,
            bytes_4_lines,
        ]
        mov_start_variations = [mov_start_1, mov_start_2]
        mov_end_variations = [mov_end_1, mov_end_2]
        # actual tests
        for mov_start_var in mov_start_variations:
            for bytes_var_1 in bytes_variations:
                for mov_end_var in mov_end_variations:
                    for bytes_var_2 in bytes_variations:
                        sample_code = (prologue + mov_start_var + bytes_var_1 +
                                       kernel + mov_end_var + bytes_var_2 +
                                       epilogue)
                        with self.subTest(
                                mov_start=mov_start_var,
                                bytes_start=bytes_var_1,
                                mov_end=mov_end_var,
                                bytes_end=bytes_var_2,
                        ):
                            sample_parsed = self.parser_AArch.parse_file(
                                sample_code)
                            sample_kernel = reduce_to_section(
                                sample_parsed, "AArch64")
                            self.assertEqual(len(sample_kernel), kernel_length)
                            kernel_start = len(
                                list(
                                    filter(None, (prologue + mov_start_var +
                                                  bytes_var_1).split("\n"))))
                            parsed_kernel = self.parser_AArch.parse_file(
                                kernel, start_line=kernel_start)
                            self.assertEqual(sample_kernel, parsed_kernel)
Exemplo n.º 3
0
    def test_marker_special_cases_x86(self):
        bytes_line = (
            '.byte     100\n'
            '.byte     103\n'
            '.byte     144\n')
        start_marker = 'movl     $111, %ebx\n' + bytes_line
        end_marker = 'movl     $222, %ebx\n' + bytes_line
        prologue = (
            'movl    -88(%rbp), %r10d\n'
            'xorl    %r11d, %r11d\n'
            '.p2align 4,,10\n')
        kernel = (
            '.L3: #L3\n'
            'vmovsd  .LC1(%rip), %xmm0\n'
            'vmovsd  %xmm0, (%r15,%rcx,8)\n'
            'cmpl    %ecx, %ebx\n'
            'jle .L3\n')
        epilogue = (
            'leaq    -56(%rbp), %rsi\n'
            'movl    %r10d, -88(%rbp)\n'
            'call    timing\n')
        samples = [
            # (test name,
            #  ignored prologue, section to be extraced, ignored epilogue)
            ("markers",
             prologue + start_marker, kernel, end_marker + epilogue),
            ("marker at file start",
             start_marker, kernel, end_marker + epilogue),
            ("no start marker",
             '', prologue + kernel, end_marker + epilogue),
            ("marker at file end",
             prologue + start_marker, kernel, end_marker),
            ("no end marker",
             prologue + start_marker, kernel + epilogue, ''),
            ("empty kernel",
             prologue + start_marker, '', end_marker + epilogue),
        ]

        for test_name, pro, kernel, epi in samples:
            code = pro + kernel + epi
            parsed = self.parser_x86.parse_file(code)
            test_kernel = reduce_to_section(parsed, 'x86')
            if kernel:
                kernel_length = len(kernel.strip().split('\n'))
            else:
                kernel_length = 0
            self.assertEqual(
                len(test_kernel), kernel_length,
                msg="Invalid exctracted kernel length on {!r} sample".format(test_name))
            if pro:
                kernel_start = len((pro).strip().split('\n'))
            else:
                kernel_start = 0
            parsed_kernel = self.parser_x86.parse_file(kernel, start_line=kernel_start)
            self.assertEqual(
                test_kernel, parsed_kernel,
                msg="Invalid exctracted kernel on {!r}".format(test_name))
Exemplo n.º 4
0
    def test_marker_special_cases_AArch(self):
        bytes_line = '.byte     213,3,32,31\n'
        start_marker = 'mov      x1, #111\n' + bytes_line
        end_marker = 'mov      x1, #222\n' + bytes_line
        prologue = (
            'dup v0.2d, x14\n'
            'neg x9, x9\n'
            '.p2align    6\n')
        kernel = (
            '.LBB0_28:\n'
            + 'fmul    v7.2d, v7.2d, v19.2d\n'
            + 'stp q0, q1, [x10, #-32]\n'
            + 'b.ne    .LBB0_28\n')
        epilogue = (
            '.LBB0_29:   //   Parent Loop BB0_20 Depth=1\n'
            'bl    dummy\n')

        samples = [
            # (test name,
            #  ignored prologue, section to be extraced, ignored epilogue)
            ("markers",
             prologue + start_marker, kernel, end_marker + epilogue),
            ("marker at file start",
             start_marker, kernel, end_marker + epilogue),
            ("no start marker",
             '', prologue + kernel, end_marker + epilogue),
            ("marker at file end",
             prologue + start_marker, kernel, end_marker),
            ("no end marker",
             prologue + start_marker, kernel + epilogue, ''),
            ("empty kernel",
             prologue + start_marker, '', end_marker + epilogue),
        ]

        for test_name, pro, kernel, epi in samples:
            code = pro + kernel + epi
            parsed = self.parser_AArch.parse_file(code)
            test_kernel = reduce_to_section(parsed, 'AArch64')
            if kernel:
                kernel_length = len(kernel.strip().split('\n'))
            else:
                kernel_length = 0
            self.assertEqual(
                len(test_kernel), kernel_length,
                msg="Invalid exctracted kernel length on {!r} sample".format(test_name))
            if pro:
                kernel_start = len((pro).strip().split('\n'))
            else:
                kernel_start = 0
            parsed_kernel = self.parser_AArch.parse_file(kernel, start_line=kernel_start)
            self.assertEqual(
                test_kernel, parsed_kernel,
                msg="Invalid exctracted kernel on {!r}".format(test_name))
Exemplo n.º 5
0
    def __init__(self, arch, code):
        self.machine_model = MachineModel(arch=arch)
        self.semantics = ArchSemantics(self.machine_model)
        isa = self.machine_model.get_ISA().lower()
        if isa == 'aarch64':
            self.parser = ParserAArch64()
        elif isa == 'x86':
            self.parser = ParserX86ATT()

        parsed_code = self.parser.parse_file(code)
        self.kernel = reduce_to_section(parsed_code,
                                        self.machine_model.get_ISA())
        self.semantics.add_semantics(self.kernel)
Exemplo n.º 6
0
    def test_marker_matching_x86(self):
        # preparation
        bytes_1_line = ".byte     100,103,144\n"
        bytes_2_lines_1 = ".byte     100,103\n" + ".byte 144\n"
        bytes_2_lines_2 = ".byte     100\n" + ".byte 103,144\n"
        bytes_3_lines = (".byte     100 # IACA MARKER UTILITY\n" +
                         ".byte     103 # IACA MARKER UTILITY\n" +
                         ".byte     144 # IACA MARKER UTILITY\n")
        mov_start_1 = "movl      $111, %ebx # IACA START\n"
        mov_start_2 = "mov      $111, %ebx # IACA START\n"
        mov_end_1 = "movl      $222, %ebx # IACA END\n"
        mov_end_2 = "mov      $222, %ebx # IACA END\n"
        prologue = "movl    -92(%rbp), %r11d\n" + "movl      $111, %ebx\n"
        kernel = ("vfmadd132sd (%r15,%rcx,8), %xmm5, %xmm0\n" +
                  "vmovsd  %xmm0, (%r14,%rcx,8)\n" + "cmpl    %ebx, %ecx\n" +
                  "jge .L8\n")
        epilogue = ".LE9:\t\t#12.2\n" "call    dummy\n"
        kernel_length = len(list(filter(None, kernel.split("\n"))))

        bytes_variations = [
            bytes_1_line, bytes_2_lines_1, bytes_2_lines_2, bytes_3_lines
        ]
        mov_start_variations = [mov_start_1, mov_start_2]
        mov_end_variations = [mov_end_1, mov_end_2]
        # actual tests
        for mov_start_var in mov_start_variations:
            for bytes_var_1 in bytes_variations:
                for mov_end_var in mov_end_variations:
                    for bytes_var_2 in bytes_variations:
                        sample_code = (prologue + mov_start_var + bytes_var_1 +
                                       kernel + mov_end_var + bytes_var_2 +
                                       epilogue)
                        with self.subTest(
                                mov_start=mov_start_var,
                                bytes_start=bytes_var_1,
                                mov_end=mov_end_var,
                                bytes_end=bytes_var_2,
                        ):
                            sample_parsed = self.parser_x86.parse_file(
                                sample_code)
                            sample_kernel = reduce_to_section(
                                sample_parsed, "x86")
                            self.assertEqual(len(sample_kernel), kernel_length)
                            kernel_start = len(
                                list(
                                    filter(None, (prologue + mov_start_var +
                                                  bytes_var_1).split("\n"))))
                            parsed_kernel = self.parser_x86.parse_file(
                                kernel, start_line=kernel_start)
                            self.assertEqual(sample_kernel, parsed_kernel)
Exemplo n.º 7
0
def inspect(args, output_file=sys.stdout):
    """
    Does the actual throughput and critical path analysis of OSACA and prints it to the
    terminal.

    :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing
    :param output_file: Define the stream for output, defaults to :class:`sys.stdout`
    :type output_file: stream, optional
    """
    # Read file
    code = args.file.read()

    # Detect ISA if necessary
    arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)]
    print_arch_warning = False if args.arch else True
    isa = MachineModel.get_isa_for_arch(arch)
    verbose = args.verbose
    ignore_unknown = args.ignore_unknown

    # Parse file
    parser = get_asm_parser(arch)
    try:
        parsed_code = parser.parse_file(code)
    except:
        # probably the wrong parser based on heuristic
        if args.arch is None:
            # change ISA and try again
            arch = DEFAULT_ARCHS['x86'] if BaseParser.detect_ISA(code) == 'aarch64' else DEFAULT_ARCHS['aarch64']
            isa = MachineModel.get_isa_for_arch(arch)
            parser = get_asm_parser(arch)
            parsed_code = parser.parse_file(code)
        else:
            traceback.print_exc(file=sys.stderr)
            sys.exit(1)

    # Reduce to marked kernel or chosen section and add semantics
    if args.lines:
        line_range = get_line_range(args.lines)
        kernel = [line for line in parsed_code if line['line_number'] in line_range]
        print_length_warning = False
    else:
        kernel = reduce_to_section(parsed_code, isa)
        # Print warning if kernel has no markers and is larger than threshold (100)
        print_length_warning = True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False
    machine_model = MachineModel(arch=arch)
    semantics = ArchSemantics(machine_model)
    semantics.add_semantics(kernel)
    # Do optimal schedule for kernel throughput if wished
    if not args.fixed:
        semantics.assign_optimal_throughput(kernel)

    # Create DiGrahps
    kernel_graph = KernelDG(kernel, parser, machine_model)
    if args.dotpath is not None:
        kernel_graph.export_graph(args.dotpath if args.dotpath != '.' else None)
    # Print analysis
    frontend = Frontend(args.file.name, arch=arch)
    print(
        frontend.full_analysis(
            kernel,
            kernel_graph,
            ignore_unknown=ignore_unknown,
            arch_warning=print_arch_warning,
            length_warning=print_length_warning,
            verbose=verbose
        ),
        file=output_file,
    )
Exemplo n.º 8
0
 def test_marker_detection_x86(self):
     kernel = reduce_to_section(self.parsed_x86, "x86")
     self.assertEqual(len(kernel), 9)
     self.assertEqual(kernel[0].line_number, 146)
     self.assertEqual(kernel[-1].line_number, 154)
Exemplo n.º 9
0
 def test_marker_detection_AArch64(self):
     kernel = reduce_to_section(self.parsed_AArch, "AArch64")
     self.assertEqual(len(kernel), 138)
     self.assertEqual(kernel[0].line_number, 307)
     self.assertEqual(kernel[-1].line_number, 444)
Exemplo n.º 10
0
    def setUpClass(cls):
        # set up parser and kernels
        cls.parser_x86 = ParserX86ATT()
        cls.parser_AArch64 = ParserAArch64()
        with open(cls._find_file("kernel_x86.s")) as f:
            cls.code_x86 = f.read()
        with open(cls._find_file("kernel_x86_memdep.s")) as f:
            cls.code_x86_memdep = f.read()
        with open(cls._find_file("kernel_x86_long_LCD.s")) as f:
            cls.code_x86_long_LCD = f.read()
        with open(cls._find_file("kernel_aarch64_memdep.s")) as f:
            cls.code_aarch64_memdep = f.read()
        with open(cls._find_file("kernel_aarch64.s")) as f:
            cls.code_AArch64 = f.read()
        with open(cls._find_file("kernel_aarch64_sve.s")) as f:
            cls.code_AArch64_SVE = f.read()
        cls.kernel_x86 = reduce_to_section(
            cls.parser_x86.parse_file(cls.code_x86), "x86")
        cls.kernel_x86_memdep = reduce_to_section(
            cls.parser_x86.parse_file(cls.code_x86_memdep), "x86")
        cls.kernel_x86_long_LCD = reduce_to_section(
            cls.parser_x86.parse_file(cls.code_x86_long_LCD), "x86")
        cls.kernel_AArch64 = reduce_to_section(
            cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64")
        cls.kernel_aarch64_memdep = reduce_to_section(
            cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64")
        cls.kernel_aarch64_SVE = reduce_to_section(
            cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64")

        # set up machine models
        cls.machine_model_csx = MachineModel(
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "csx.yml"))
        cls.machine_model_tx2 = MachineModel(
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml"))
        cls.machine_model_a64fx = MachineModel(
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml"))
        cls.semantics_x86 = ISASemantics("x86")
        cls.semantics_csx = ArchSemantics(cls.machine_model_csx,
                                          path_to_yaml=os.path.join(
                                              cls.MODULE_DATA_DIR,
                                              "isa/x86.yml"))
        cls.semantics_aarch64 = ISASemantics("aarch64")
        cls.semantics_tx2 = ArchSemantics(
            cls.machine_model_tx2,
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
        )
        cls.semantics_a64fx = ArchSemantics(
            cls.machine_model_a64fx,
            path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"),
        )
        cls.machine_model_zen = MachineModel(arch="zen1")

        for i in range(len(cls.kernel_x86)):
            cls.semantics_csx.assign_src_dst(cls.kernel_x86[i])
            cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i])
        for i in range(len(cls.kernel_x86_memdep)):
            cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i])
            cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i])
        for i in range(len(cls.kernel_x86_long_LCD)):
            cls.semantics_csx.assign_src_dst(cls.kernel_x86_long_LCD[i])
            cls.semantics_csx.assign_tp_lt(cls.kernel_x86_long_LCD[i])
        for i in range(len(cls.kernel_AArch64)):
            cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i])
            cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
        for i in range(len(cls.kernel_aarch64_memdep)):
            cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i])
            cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i])
        for i in range(len(cls.kernel_aarch64_SVE)):
            cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i])
            cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])