def setUpClass(cls): # set up parser and kernels cls.parser_x86 = ParserX86ATT() cls.parser_AArch64 = ParserAArch64() with open(cls._find_file('kernel_x86.s')) as f: cls.code_x86 = f.read() with open(cls._find_file('kernel_aarch64.s')) as f: cls.code_AArch64 = f.read() cls.kernel_x86 = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86), 'x86') cls.kernel_AArch64 = reduce_to_section( cls.parser_AArch64.parse_file(cls.code_AArch64), 'aarch64') # set up machine models cls.machine_model_csx = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'csx.yml')) cls.machine_model_tx2 = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'tx2.yml')) cls.semantics_csx = ArchSemantics(cls.machine_model_csx, path_to_yaml=os.path.join( cls.MODULE_DATA_DIR, 'isa/x86.yml')) cls.semantics_tx2 = ArchSemantics( cls.machine_model_tx2, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, 'isa/aarch64.yml'), ) cls.machine_model_zen = MachineModel(arch='zen1') for i in range(len(cls.kernel_x86)): cls.semantics_csx.assign_src_dst(cls.kernel_x86[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i]) for i in range(len(cls.kernel_AArch64)): cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i])
def setUpClass(self): # set up parser and kernels self.parser_x86 = ParserX86ATT() self.parser_AArch64 = ParserAArch64() with open(self._find_file('kernel_x86.s')) as f: code_x86 = f.read() with open(self._find_file('kernel_aarch64.s')) as f: code_AArch64 = f.read() self.kernel_x86 = self.parser_x86.parse_file(code_x86) self.kernel_AArch64 = self.parser_AArch64.parse_file(code_AArch64) # set up machine models self.machine_model_csx = MachineModel( path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')) self.machine_model_tx2 = MachineModel(arch='tx2') self.semantics_csx = ArchSemantics(self.machine_model_csx, path_to_yaml=os.path.join( self.MODULE_DATA_DIR, 'isa/x86.yml')) self.semantics_tx2 = ArchSemantics( self.machine_model_tx2, path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'isa/aarch64.yml'), ) for i in range(len(self.kernel_x86)): self.semantics_csx.assign_src_dst(self.kernel_x86[i]) self.semantics_csx.assign_tp_lt(self.kernel_x86[i]) for i in range(len(self.kernel_AArch64)): self.semantics_tx2.assign_src_dst(self.kernel_AArch64[i]) self.semantics_tx2.assign_tp_lt(self.kernel_AArch64[i])
def sanity_check(arch: str, verbose=False, internet_check=False, output_file=sys.stdout): """ Checks the database for missing TP/LT values, instructions might missing int the ISA DB and duplicate instructions. :param arch: micro-arch key to define DB to check :type arch: str :param verbose: verbose output flag, defaults to `False` :type verbose: bool, optional :param internet_check: indicates if OSACA should try to look up the src/dst distribution in the internet, defaults to False :type internet_check: boolean, optional :param output_file: output stream specifying where to write output, defaults to :class:`sys.stdout` :type output_file: stream, optional :return: True if everything checked out """ # load arch machine model arch_mm = MachineModel(arch=arch) data = arch_mm["instruction_forms"] # load isa machine model isa = arch_mm.get_ISA() isa_mm = MachineModel(arch="isa/{}".format(isa)) num_of_instr = len(data) # check arch DB entries ( missing_throughput, missing_latency, missing_port_pressure, suspicious_instructions, duplicate_instr_arch, bad_operand, ) = _check_sanity_arch_db(arch_mm, isa_mm, internet_check=internet_check) # check ISA DB entries duplicate_instr_isa, only_in_isa = _check_sanity_isa_db(arch_mm, isa_mm) report = _get_sanity_report( num_of_instr, missing_throughput, missing_latency, missing_port_pressure, suspicious_instructions, duplicate_instr_arch, duplicate_instr_isa, only_in_isa, bad_operand, verbose=verbose, colors=True if output_file == sys.stdout else False, ) print(report, file=output_file) return not any([missing_port_pressure, bad_operand])
class KerncraftAPI(object): def __init__(self, arch, code): self.machine_model = MachineModel(arch=arch) self.semantics = ArchSemantics(self.machine_model) isa = self.machine_model.get_ISA().lower() if isa == 'aarch64': self.parser = ParserAArch64() elif isa == 'x86': self.parser = ParserX86ATT() parsed_code = self.parser.parse_file(code) self.kernel = reduce_to_section(parsed_code, self.machine_model.get_ISA()) self.semantics.add_semantics(self.kernel) def create_output(self, verbose=False): kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model) frontend = Frontend(arch=self.machine_model.get_arch()) return frontend.full_analysis(self.kernel, kernel_graph, verbose=verbose) def get_unmatched_instruction_ratio(self): unmatched_counter = 0 for instruction in self.kernel: if (INSTR_FLAGS.TP_UNKWN in instruction['flags'] and INSTR_FLAGS.LT_UNKWN in instruction['flags']): unmatched_counter += 1 return unmatched_counter / len(self.kernel) def get_port_occupation_cycles(self): throughput_values = self.semantics.get_throughput_sum(self.kernel) port_names = self.machine_model['ports'] return collections.OrderedDict(list(zip(port_names, throughput_values))) def get_total_throughput(self): return max(self.semantics.get_throughput_sum(self.kernel)) def get_latency(self): return (self.get_lcd(), self.get_cp()) def get_cp(self): kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model) kernel_cp = kernel_graph.get_critical_path() return sum([x['latency_cp'] for x in kernel_cp]) def get_lcd(self): kernel_graph = KernelDG(self.kernel, self.parser, self.machine_model) lcd_dict = kernel_graph.get_loopcarried_dependencies() lcd = 0.0 for dep in lcd_dict: lcd_tmp = sum( [x['latency_lcd'] for x in lcd_dict[dep]['dependencies']]) lcd = lcd_tmp if lcd_tmp > lcd else lcd return lcd
def import_benchmark_output(arch, bench_type, filepath, output=sys.stdout): """ Import benchmark results from micro-benchmarks. :param arch: target architecture key :type arch: str :param bench_type: key for defining type of benchmark output :type bench_type: str :param filepath: filepath to the output file :type filepath: str :param output: output stream to dump, defaults to sys.stdout :type output: stream """ supported_bench_outputs = ["ibench", "asmbench"] assert os.path.exists(filepath) if bench_type not in supported_bench_outputs: raise ValueError("Benchmark type is not supported.") with open(filepath, "r") as f: input_data = f.readlines() db_entries = None mm = MachineModel(arch) if bench_type == "ibench": db_entries = _get_ibench_output(input_data, mm.get_ISA()) elif bench_type == "asmbench": db_entries = _get_asmbench_output(input_data, mm.get_ISA()) # write entries to DB for entry in db_entries: mm.set_instruction_entry(db_entries[entry]) if output is None: print(mm.dump()) else: mm.dump(stream=output)
def test_invalid_MachineModel(self): with self.assertRaises(ValueError): MachineModel() with self.assertRaises(ValueError): MachineModel(arch='CSX', path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'csx.yml')) with self.assertRaises(FileNotFoundError): MachineModel(arch='THE_MACHINE') with self.assertRaises(FileNotFoundError): MachineModel(path_to_yaml=os.path.join(self.MODULE_DATA_DIR, 'THE_MACHINE.yml'))
def __init__(self, arch, code): self.machine_model = MachineModel(arch=arch) self.semantics = ArchSemantics(self.machine_model) isa = self.machine_model.get_ISA().lower() if isa == 'aarch64': self.parser = ParserAArch64() elif isa == 'x86': self.parser = ParserX86ATT() parsed_code = self.parser.parse_file(code) self.kernel = reduce_to_section(parsed_code, self.machine_model.get_ISA()) self.semantics.add_semantics(self.kernel)
def insert_byte_marker(args): """ Inserts byte markers into an assembly file using kerncraft. :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing """ try: from kerncraft.incore_model import asm_instrumentation except ImportError: print( 'Module kerncraft not installed. Use \'pip install --user ' 'kerncraft\' for installation.\nFor more information see ' 'https://github.com/RRZE-HPC/kerncraft', file=sys.stderr, ) sys.exit(1) assembly = args.file.read() unmarked_assembly = io.StringIO(assembly) marked_assembly = io.StringIO() asm_instrumentation( input_file=unmarked_assembly, output_file=marked_assembly, block_selection='manual', pointer_increment='auto_with_manual_fallback', isa=MachineModel.get_isa_for_arch(args.arch), ) marked_assembly.seek(0) assembly = marked_assembly.read() with open(args.file.name, 'w') as f: f.write(assembly)
def test_architectures(self): parser = osaca.create_parser() # Run the test kernel for all architectures archs = osaca.SUPPORTED_ARCHS for arch in archs: with self.subTest(micro_arch=arch): isa = MachineModel.get_isa_for_arch(arch) kernel = 'kernel_{}.s'.format(isa) args = parser.parse_args( ['--arch', arch, self._find_test_file(kernel)]) output = StringIO() osaca.run(args, output_file=output)
def get_asm_parser(arch) -> BaseParser: """ Helper function to create the right parser for a specific architecture. :param arch: architecture code :type arch: str :returns: :class:`~osaca.parser.BaseParser` object """ isa = MachineModel.get_isa_for_arch(arch) if isa == 'x86': return ParserX86ATT() elif isa == 'aarch64': return ParserAArch64()
def test_hidden_load(self): machine_model_hld = MachineModel( path_to_yaml=self._find_file('hidden_load_machine_model.yml')) self.assertTrue(machine_model_hld.has_hidden_loads()) semantics_hld = ArchSemantics(machine_model_hld) kernel_hld = self.parser_x86.parse_file(self.code_x86) kernel_hld_2 = self.parser_x86.parse_file(self.code_x86) kernel_hld_2 = self.parser_x86.parse_file(self.code_x86)[-3:] kernel_hld_3 = self.parser_x86.parse_file(self.code_x86)[5:8] semantics_hld.add_semantics(kernel_hld) semantics_hld.add_semantics(kernel_hld_2) semantics_hld.add_semantics(kernel_hld_3) num_hidden_loads = len( [x for x in kernel_hld if INSTR_FLAGS.HIDDEN_LD in x['flags']]) num_hidden_loads_2 = len( [x for x in kernel_hld_2 if INSTR_FLAGS.HIDDEN_LD in x['flags']]) num_hidden_loads_3 = len( [x for x in kernel_hld_3 if INSTR_FLAGS.HIDDEN_LD in x['flags']]) self.assertEqual(num_hidden_loads, 1) self.assertEqual(num_hidden_loads_2, 0) self.assertEqual(num_hidden_loads_3, 1)
def test_MachineModel_getter(self): sample_operands = [{ 'memory': { 'offset': None, 'base': { 'name': 'r12' }, 'index': { 'name': 'rcx' }, 'scale': 8, } }] self.assertIsNone( self.machine_model_csx.get_instruction('GETRESULT', sample_operands)) self.assertIsNone( self.machine_model_tx2.get_instruction('GETRESULT', sample_operands)) self.assertEqual(self.machine_model_csx.get_arch(), 'csx') self.assertEqual(self.machine_model_tx2.get_arch(), 'tx2') self.assertEqual(self.machine_model_csx.get_ISA(), 'x86') self.assertEqual(self.machine_model_tx2.get_ISA(), 'aarch64') ports_csx = ['0', '0DV', '1', '2', '2D', '3', '3D', '4', '5', '6', '7'] data_ports_csx = ['2D', '3D'] self.assertEqual(self.machine_model_csx.get_ports(), ports_csx) self.assertEqual(self.machine_model_csx.get_data_ports(), data_ports_csx) self.assertFalse(self.machine_model_tx2.has_hidden_loads()) self.assertEqual(MachineModel.get_isa_for_arch('CSX'), 'x86') self.assertEqual(MachineModel.get_isa_for_arch('tX2'), 'aarch64') with self.assertRaises(ValueError): self.assertIsNone(MachineModel.get_isa_for_arch('THE_MACHINE'))
def test_MachineModel_getter(self): sample_operands = [{ "memory": { "offset": None, "base": { "name": "r12" }, "index": { "name": "rcx" }, "scale": 8, } }] self.assertIsNone( self.machine_model_csx.get_instruction("GETRESULT", sample_operands)) self.assertIsNone( self.machine_model_tx2.get_instruction("GETRESULT", sample_operands)) self.assertEqual(self.machine_model_csx.get_arch(), "csx") self.assertEqual(self.machine_model_tx2.get_arch(), "tx2") self.assertEqual(self.machine_model_csx.get_ISA(), "x86") self.assertEqual(self.machine_model_tx2.get_ISA(), "aarch64") ports_csx = ["0", "0DV", "1", "2", "2D", "3", "3D", "4", "5", "6", "7"] data_ports_csx = ["2D", "3D"] self.assertEqual(self.machine_model_csx.get_ports(), ports_csx) self.assertEqual(self.machine_model_csx.get_data_ports(), data_ports_csx) self.assertFalse(self.machine_model_tx2.has_hidden_loads()) self.assertEqual(MachineModel.get_isa_for_arch("CSX"), "x86") self.assertEqual(MachineModel.get_isa_for_arch("tX2"), "aarch64") with self.assertRaises(ValueError): self.assertIsNone(MachineModel.get_isa_for_arch("THE_MACHINE"))
def __init__(self, filename='', arch=None, path_to_yaml=None): """ Constructor method. :param filename: path to the analyzed kernel file for documentation, defaults to '' :type filename: str, optional :param arch: micro-arch code for getting the machine model, defaults to None :type arch: str, optional :param path_to_yaml: path to the YAML file for getting the machine model, defaults to None :type path_to_yaml: str, optional """ self._filename = filename if not arch and not path_to_yaml: raise ValueError('Either arch or path_to_yaml required.') if arch and path_to_yaml: raise ValueError('Only one of arch and path_to_yaml is allowed.') self._arch = arch if arch: self._arch = arch.lower() self._machine_model = MachineModel(arch=arch, lazy=True) elif path_to_yaml: self._machine_model = MachineModel(path_to_yaml=path_to_yaml, lazy=True) self._arch = self._machine_model.get_arch()
def extract_model(tree, arch, skip_mem=True): try: isa = MachineModel.get_isa_for_arch(arch) except Exception: print("Skipping...", file=sys.stderr) return None mm = MachineModel(isa=isa) parser = get_parser(isa) for instruction_tag in tree.findall(".//instruction"): ignore = False mnemonic = instruction_tag.attrib["asm"] iform = instruction_tag.attrib["iform"] # reduce to second part if mnemonic contain space (e.g., "REX CRC32") if " " in mnemonic: mnemonic = mnemonic.split(" ", 1)[1] # Extract parameter components try: parameters = extract_paramters(instruction_tag, parser, isa) if isa == "x86": parameters.reverse() except ValueError as e: print(e, file=sys.stderr) # Extract port occupation, throughput and latency port_pressure, throughput, latency, uops = [], None, None, None arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]') if arch_tag is None: continue # skip any instructions without port utilization if not any(["ports" in x.attrib for x in arch_tag.findall("measurement")]): print("Couldn't find port utilization, skip: ", iform, file=sys.stderr) continue # skip if measured TP is smaller than computed if [float(x.attrib["TP_ports"]) > min(float(x.attrib["TP_loop"]), float(x.attrib["TP_unrolled"])) for x in arch_tag.findall("measurement")][0]: print( "Calculated TP is greater than measured TP.", iform, file=sys.stderr, ) # skip if instruction contains memory operand if skip_mem and any( [x.attrib["type"] == "mem" for x in instruction_tag.findall("operand")] ): print("Contains memory operand, skip: ", iform, file=sys.stderr) continue # We collect all measurement and IACA information and compare them later for measurement_tag in arch_tag.iter("measurement"): if "TP_ports" in measurement_tag.attrib: throughput = float(measurement_tag.attrib["TP_ports"]) else: throughput = min( measurement_tag.attrib.get("TP_loop", float('inf')), measurement_tag.attrib.get("TP_unroll", float('inf')), measurement_tag.attrib.get("TP", float('inf')), ) if throughput == float('inf'): throughput = None uops = int(measurement_tag.attrib["uops"]) if "uops" in measurement_tag.attrib else None if "ports" in measurement_tag.attrib: port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib)) latencies = [ int(l_tag.attrib["cycles"]) for l_tag in measurement_tag.iter("latency") if "cycles" in l_tag.attrib ] if len(latencies) == 0: latencies = [ int(l_tag.attrib["max_cycles"]) for l_tag in measurement_tag.iter("latency") if "max_cycles" in l_tag.attrib ] if latencies[1:] != latencies[:-1]: print( "Contradicting latencies found, using smallest:", iform, latencies, file=sys.stderr, ) if latencies: latency = min(latencies) if ignore: continue # Ordered by IACA version (newest last) for iaca_tag in sorted( arch_tag.iter("IACA"), key=lambda i: StrictVersion(i.attrib["version"]) ): if "ports" in iaca_tag.attrib: port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib)) # Check if all are equal if port_pressure: if port_pressure[1:] != port_pressure[:-1]: print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr) port_pressure = port_pressure[-1] else: # print("No data available for this architecture:", mnemonic, file=sys.stderr) continue # Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake: if arch.upper() in intel_archs and not arch.upper() in ["ICL"]: if any([p["class"] == "memory" for p in parameters]): # We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D # TODO remove port7 on 'hsw' onward and split entries depending on addressing mode port_23 = False port_4 = False for i, pp in enumerate(port_pressure): if "2" in pp[1] and "3" in pp[1]: port_23 = True if "4" in pp[1]: port_4 = True # Add (x, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4) if port_23 and not port_4: if arch.upper() in ["SNB", "IVB"] and any( [p.get('name', '') == 'ymm' for p in parameters]) and \ not '128' in mnemonic: # x = 2 if SNB or IVB and ymm regiser in any operand and not '128' in # instruction name port2D3D_pressure = 2 else: # otherwiese x = 1 port2D3D_pressure = 1 port_pressure.append((port2D3D_pressure, ["2D", "3D"])) # Add missing ports: for ports in [pp[1] for pp in port_pressure]: for p in ports: mm.add_port(p) throughput = max(mm.average_port_pressure(port_pressure)) mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops) # TODO eliminate entries which could be covered by automatic load / store expansion return mm
class Frontend(object): def __init__(self, filename='', arch=None, path_to_yaml=None): """ Constructor method. :param filename: path to the analyzed kernel file for documentation, defaults to '' :type filename: str, optional :param arch: micro-arch code for getting the machine model, defaults to None :type arch: str, optional :param path_to_yaml: path to the YAML file for getting the machine model, defaults to None :type path_to_yaml: str, optional """ self._filename = filename if not arch and not path_to_yaml: raise ValueError('Either arch or path_to_yaml required.') if arch and path_to_yaml: raise ValueError('Only one of arch and path_to_yaml is allowed.') self._arch = arch if arch: self._arch = arch.lower() self._machine_model = MachineModel(arch=arch, lazy=True) elif path_to_yaml: self._machine_model = MachineModel(path_to_yaml=path_to_yaml, lazy=True) self._arch = self._machine_model.get_arch() def _is_comment(self, instruction_form): """ Checks if instruction form is a comment-only line. :param instruction_form: instruction form to check :type instruction_form: `dict` :returns: `True` if comment line, `False` otherwise """ return instruction_form[ 'comment'] is not None and instruction_form['instruction'] is None def throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True): """ Build throughput analysis only. :param kernel: Kernel to build throughput analysis for. :type kernel: list :param show_lineno: flag for showing the line number of instructions, defaults to `False` :type show_lineno: bool, optional :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True` :type show_cmnts: bool, optional """ lineno_filler = ' ' if show_lineno else '' port_len = self._get_max_port_len(kernel) separator = '-' * sum([x + 3 for x in port_len]) + '-' separator += '--' + len(str( kernel[-1]['line_number'])) * '-' if show_lineno else '' col_sep = '|' sep_list = self._get_separator_list(col_sep) headline = 'Port pressure in cycles' headline_str = '{{:^{}}}'.format(len(separator)) s = '\n\nThroughput Analysis Report\n--------------------------\n' s += headline_str.format(headline) + '\n' s += lineno_filler + self._get_port_number_line(port_len) + '\n' s += separator + '\n' for instruction_form in kernel: line = '{:4d} {} {} {}'.format( instruction_form['line_number'], self._get_port_pressure(instruction_form['port_pressure'], port_len, separator=sep_list), self._get_flag_symbols(instruction_form['flags']) if instruction_form['instruction'] is not None else ' ', instruction_form['line'].strip().replace('\t', ' '), ) line = line if show_lineno else col_sep + col_sep.join( line.split(col_sep)[1:]) if show_cmnts is False and self._is_comment(instruction_form): continue s += line + '\n' s += '\n' tp_sum = ArchSemantics.get_throughput_sum(kernel) s += lineno_filler + self._get_port_pressure( tp_sum, port_len, separator=' ') + '\n' return s def latency_analysis(self, cp_kernel, separator='|'): """ Build a list-based CP analysis report. :param cp_kernel: loop kernel containing the CP information for each instruction form :type cp_kernel: list :separator: separator symbol for the columns, defaults to '|' :type separator: str, optional """ s = '\n\nLatency Analysis Report\n-----------------------\n' for instruction_form in cp_kernel: s += ('{:4d} {} {:4.1f} {}{}{} {}'.format( instruction_form['line_number'], separator, instruction_form['latency_cp'], separator, 'X' if INSTR_FLAGS.LT_UNKWN in instruction_form['flags'] else ' ', separator, instruction_form['line'], )) + '\n' s += ('\n{:4} {} {:4.1f}'.format( ' ' * max([ len(str(instr_form['line_number'])) for instr_form in cp_kernel ]), ' ' * len(separator), sum([instr_form['latency_cp'] for instr_form in cp_kernel]), )) + '\n' return s def loopcarried_dependencies(self, dep_dict, separator='|'): """ Print a list-based LCD analysis to the terminal. :param dep_dict: dictionary with first instruction in LCD as key and the deps as value :type dep_dict: dict :separator: separator symbol for the columns, defaults to '|' :type separator: str, optional """ s = ('\n\nLoop-Carried Dependencies Analysis Report\n' + '-----------------------------------------\n') # TODO find a way to overcome padding for different tab-lengths for dep in dep_dict: s += '{:4d} {} {:4.1f} {} {:36}{} {}\n'.format( dep, separator, sum([ instr_form['latency_lcd'] for instr_form in dep_dict[dep]['dependencies'] ]), separator, dep_dict[dep]['root']['line'].strip(), separator, [ node['line_number'] for node in dep_dict[dep]['dependencies'] ], ) return s def full_analysis(self, kernel, kernel_dg: KernelDG, ignore_unknown=False, arch_warning=False, length_warning=False, verbose=False): """ Build the full analysis report including header, the symbol map, the combined TP/CP/LCD view and the list based LCD view. :param kernel: kernel to report on :type kernel: list :param kernel_dg: directed graph containing CP and LCD :type kernel_dg: :class:`~osaca.semantics.KernelDG` :param ignore_unknown: flag for ignore warning if performance data is missing, defaults to `False` :type ignore_unknown: boolean, optional :param print_arch_warning: flag for additional user warning to specify micro-arch :type print_arch_warning: boolean, optional :param print_length_warning: flag for additional user warning to specify kernel length with --lines :type print_length_warning: boolean, optional :param verbose: flag for verbosity level, defaults to False :type verbose: boolean, optional """ return (self._header_report() + self._user_warnings(arch_warning, length_warning) + self._symbol_map() + self.combined_view( kernel, kernel_dg.get_critical_path(), kernel_dg.get_loopcarried_dependencies(), ignore_unknown, ) + self.loopcarried_dependencies( kernel_dg.get_loopcarried_dependencies())) def combined_view(self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True): """ Build combined view of kernel including port pressure (TP), a CP column and a LCD column. :param kernel: kernel to report on :type kernel: list :param kernel_dg: directed graph containing CP and LCD :type kernel_dg: :class:`~osaca.semantics.KernelDG` :param dep_dict: dictionary with first instruction in LCD as key and the deps as value :type dep_dict: dict :param ignore_unknown: flag for showing result despite of missing instructions, defaults to `False` :type ignore_unknown: bool, optional :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True` :type show_cmnts: bool, optional """ s = '\n\nCombined Analysis Report\n------------------------\n' lineno_filler = ' ' port_len = self._get_max_port_len(kernel) # Separator for ports separator = '-' * sum([x + 3 for x in port_len]) + '-' # ... for line numbers separator += '--' + len(str(kernel[-1]['line_number'])) * '-' col_sep = '|' # for LCD/CP column separator += '-' * (2 * 6 + len(col_sep)) + '-' * len(col_sep) sep_list = self._get_separator_list(col_sep) headline = 'Port pressure in cycles' headline_str = '{{:^{}}}'.format(len(separator)) # Prepare CP/LCD variable cp_lines = [x['line_number'] for x in cp_kernel] sums = {} for dep in dep_dict: sums[dep] = sum([ instr_form['latency_lcd'] for instr_form in dep_dict[dep]['dependencies'] ]) lcd_sum = max(sums.values()) if len(sums) > 0 else 0.0 lcd_lines = [] if len(dep_dict) > 0: longest_lcd = [ line_no for line_no in sums if sums[line_no] == lcd_sum ][0] lcd_lines = [ d['line_number'] for d in dep_dict[longest_lcd]['dependencies'] ] s += headline_str.format(headline) + '\n' s += ( (lineno_filler + self._get_port_number_line(port_len, separator=col_sep) + '{}{:^6}{}{:^6}{}'.format(col_sep, 'CP', col_sep, 'LCD', col_sep)) + '\n' + separator + '\n') for instruction_form in kernel: if show_cmnts is False and self._is_comment(instruction_form): continue line_number = instruction_form['line_number'] used_ports = [ list(uops[1]) for uops in instruction_form['port_uops'] ] used_ports = list( set([p for uops_ports in used_ports for p in uops_ports])) s += '{:4d} {}{} {} {}\n'.format( line_number, self._get_port_pressure(instruction_form['port_pressure'], port_len, used_ports, sep_list), self._get_lcd_cp_ports( instruction_form['line_number'], cp_kernel if line_number in cp_lines else None, dep_dict[longest_lcd] if line_number in lcd_lines else None, ), self._get_flag_symbols(instruction_form['flags']) if instruction_form['instruction'] is not None else ' ', instruction_form['line'].strip().replace('\t', ' '), ) s += '\n' # check for unknown instructions and throw warning if called without --ignore-unknown if not ignore_unknown and INSTR_FLAGS.TP_UNKWN in [ flag for instr in kernel for flag in instr['flags'] ]: num_missing = len([ instr['flags'] for instr in kernel if INSTR_FLAGS.TP_UNKWN in instr['flags'] ]) s += self._missing_instruction_error(num_missing) else: # lcd_sum already calculated before tp_sum = ArchSemantics.get_throughput_sum(kernel) cp_sum = sum([x['latency_cp'] for x in cp_kernel]) s += (lineno_filler + self._get_port_pressure(tp_sum, port_len, separator=' ') + ' {:^6} {:^6}\n'.format(cp_sum, lcd_sum)) return s #################### # HELPER FUNCTIONS #################### def _missing_instruction_error(self, amount): """Returns the warning for if any instruction form in the analysis is missing.""" s = ( '------------------ WARNING: The performance data for {} instructions is missing.' '------------------\n' ' No final analysis is given. If you want to ignore this\n' ' warning and run the analysis anyway, start osaca with\n' ' --ignore-unknown flag.\n' '--------------------------------------------------------------------------------' '----------------{}\n').format(amount, '-' * len(str(amount))) return s def _user_warnings(self, arch_warning, length_warning): """Returns warning texts for giving the user more insight in what he is doing.""" arch_text = ( 'WARNING: No micro-architecture was specified and a default uarch was used.\n' ' Specify the uarch with --arch. See --help for more information.\n' ) length_text = ( 'WARNING: You are analyzing a large amount of instruction forms. Analysis ' 'across loops/block boundaries often do not make much sense.\n' ' Specify the kernel length with --length. See --help for more ' 'information.\n' ' If this is intentional, you can safely ignore this message.\n' ) warnings = '' warnings += arch_text if arch_warning else '' warnings += length_text if length_warning else '' warnings += '\n' return warnings def _get_separator_list(self, separator, separator_2=' '): """Creates column view for seperators in the TP/combined view.""" separator_list = [] for i in range(len(self._machine_model.get_ports()) - 1): match_1 = re.search(r'\d+', self._machine_model.get_ports()[i]) match_2 = re.search(r'\d+', self._machine_model.get_ports()[i + 1]) if match_1 is not None and match_2 is not None and match_1.group( ) == match_2.group(): separator_list.append(separator_2) else: separator_list.append(separator) separator_list.append(separator) return separator_list def _get_flag_symbols(self, flag_obj): """Returns flags for a flag object of an instruction""" string_result = '' string_result += '*' if INSTR_FLAGS.NOT_BOUND in flag_obj else '' string_result += 'X' if INSTR_FLAGS.TP_UNKWN in flag_obj else '' string_result += 'P' if INSTR_FLAGS.HIDDEN_LD in flag_obj else '' # TODO add other flags string_result += ' ' if len(string_result) == 0 else '' return string_result def _get_port_pressure(self, ports, port_len, used_ports=[], separator='|'): """Returns line of port pressure for an instruction.""" if not isinstance(separator, list): separator = [separator for x in ports] string_result = '{} '.format(separator[-1]) for i in range(len(ports)): if float(ports[i]) == 0.0 and self._machine_model.get_ports( )[i] not in used_ports: string_result += port_len[i] * ' ' + ' {} '.format( separator[i]) continue left_len = len(str(float(ports[i])).split('.')[0]) substr = '{:' + str(left_len) + '.' + str( max(port_len[i] - left_len - 1, 0)) + 'f}' substr = substr.format(ports[i]) string_result += (substr + ' {} '.format(separator[i]) if '.' in substr else '{:.1f}{} '.format(ports[i], separator[i])) return string_result[:-1] def _get_node_by_lineno(self, lineno, kernel): """Returns instruction form from kernel by its line number.""" nodes = [instr for instr in kernel if instr['line_number'] == lineno] return nodes[0] if len(nodes) > 0 else None def _get_lcd_cp_ports(self, line_number, cp_dg, dependency, separator='|'): """Returns the CP and LCD line for one instruction.""" lat_cp = lat_lcd = '' if cp_dg: lat_cp = float( self._get_node_by_lineno(line_number, cp_dg)['latency_cp']) if dependency: lat_lcd = float( self._get_node_by_lineno( line_number, dependency['dependencies'])['latency_lcd']) return '{} {:>4} {} {:>4} {}'.format(separator, lat_cp, separator, lat_lcd, separator) def _get_max_port_len(self, kernel): """Returns the maximal length needed to print all throughputs of the kernel.""" port_len = [4 for x in self._machine_model.get_ports()] for instruction_form in kernel: for i, port in enumerate(instruction_form['port_pressure']): if len('{:.2f}'.format(port)) > port_len[i]: port_len[i] = len('{:.2f}'.format(port)) return port_len def _get_port_number_line(self, port_len, separator='|'): """Returns column view of port identificators of machine_model.""" string_result = separator separator_list = self._get_separator_list(separator, '-') for i, length in enumerate(port_len): substr = '{:^' + str(length + 2) + 's}' string_result += substr.format( self._machine_model.get_ports()[i]) + separator_list[i] return string_result def _header_report(self): """Prints header information""" version = 'v0.3' adjust = 20 header = '' header += 'Open Source Architecture Code Analyzer (OSACA) - {}\n'.format( version) header += 'Analyzed file:'.ljust(adjust) + '{}\n'.format( self._filename) header += 'Architecture:'.ljust(adjust) + '{}\n'.format(self._arch) header += 'Timestamp:'.ljust(adjust) + '{}\n'.format( dt.utcnow().strftime('%Y-%m-%d %H:%M:%S')) return header + '\n' def _symbol_map(self): """Prints instruction flag map.""" symbol_dict = { INSTR_FLAGS.NOT_BOUND: 'Instruction micro-ops not bound to a port', INSTR_FLAGS.TP_UNKWN: 'No throughput/latency information for this instruction in ' + 'data file', INSTR_FLAGS.HIDDEN_LD: 'Throughput of LOAD operation can be hidden behind a past ' + 'or future STORE instruction', } symbol_map = '' for flag in sorted(symbol_dict.keys()): symbol_map += ' {} - {}\n'.format(self._get_flag_symbols([flag]), symbol_dict[flag]) return symbol_map def _port_binding_summary(self): raise NotImplementedError
def test_invalid_add(self): entry = {} with self.assertRaises(KeyError): MachineModel('csx').set_instruction_entry(entry) with self.assertRaises(TypeError): MachineModel('csx').set_instruction()
def test_add_single_entry(self): mm_csx = MachineModel("csx") mm_tx2 = MachineModel("tx2") mm_zen1 = MachineModel("zen1") num_entries_csx = len(mm_csx["instruction_forms"]) num_entries_tx2 = len(mm_tx2["instruction_forms"]) num_entries_zen1 = len(mm_zen1["instruction_forms"]) mm_csx.set_instruction_entry(self.entry_csx) mm_tx2.set_instruction_entry(self.entry_tx2) mm_zen1.set_instruction_entry({"name": "empty_operation"}) num_entries_csx = len(mm_csx["instruction_forms"]) - num_entries_csx num_entries_tx2 = len(mm_tx2["instruction_forms"]) - num_entries_tx2 num_entries_zen1 = len(mm_zen1["instruction_forms"]) - num_entries_zen1 self.assertEqual(num_entries_csx, 1) self.assertEqual(num_entries_tx2, 1) self.assertEqual(num_entries_zen1, 1)
class Frontend(object): def __init__(self, filename="", arch=None, path_to_yaml=None): """ Constructor method. :param filename: path to the analyzed kernel file for documentation, defaults to '' :type filename: str, optional :param arch: micro-arch code for getting the machine model, defaults to None :type arch: str, optional :param path_to_yaml: path to the YAML file for getting the machine model, defaults to None :type path_to_yaml: str, optional """ self._filename = filename if not arch and not path_to_yaml: raise ValueError("Either arch or path_to_yaml required.") if arch and path_to_yaml: raise ValueError("Only one of arch and path_to_yaml is allowed.") self._arch = arch if arch: self._arch = arch.lower() self._machine_model = MachineModel(arch=arch, lazy=True) elif path_to_yaml: self._machine_model = MachineModel(path_to_yaml=path_to_yaml, lazy=True) self._arch = self._machine_model.get_arch() def _is_comment(self, instruction_form): """ Checks if instruction form is a comment-only line. :param instruction_form: instruction form to check :type instruction_form: `dict` :returns: `True` if comment line, `False` otherwise """ return instruction_form[ "comment"] is not None and instruction_form["instruction"] is None def throughput_analysis(self, kernel, show_lineno=False, show_cmnts=True): """ Build throughput analysis only. :param kernel: Kernel to build throughput analysis for. :type kernel: list :param show_lineno: flag for showing the line number of instructions, defaults to `False` :type show_lineno: bool, optional :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True` :type show_cmnts: bool, optional """ lineno_filler = " " if show_lineno else "" port_len = self._get_max_port_len(kernel) separator = "-" * sum([x + 3 for x in port_len]) + "-" separator += "--" + len(str( kernel[-1]["line_number"])) * "-" if show_lineno else "" col_sep = "|" sep_list = self._get_separator_list(col_sep) headline = "Port pressure in cycles" headline_str = "{{:^{}}}".format(len(separator)) s = "\n\nThroughput Analysis Report\n--------------------------\n" s += headline_str.format(headline) + "\n" s += lineno_filler + self._get_port_number_line(port_len) + "\n" s += separator + "\n" for instruction_form in kernel: line = "{:4d} {} {} {}".format( instruction_form["line_number"], self._get_port_pressure(instruction_form["port_pressure"], port_len, separator=sep_list), self._get_flag_symbols(instruction_form["flags"]) if instruction_form["instruction"] is not None else " ", instruction_form["line"].strip().replace("\t", " "), ) line = line if show_lineno else col_sep + col_sep.join( line.split(col_sep)[1:]) if show_cmnts is False and self._is_comment(instruction_form): continue s += line + "\n" s += "\n" tp_sum = ArchSemantics.get_throughput_sum(kernel) s += lineno_filler + self._get_port_pressure( tp_sum, port_len, separator=" ") + "\n" return s def latency_analysis(self, cp_kernel, separator="|"): """ Build a list-based CP analysis report. :param cp_kernel: loop kernel containing the CP information for each instruction form :type cp_kernel: list :separator: separator symbol for the columns, defaults to '|' :type separator: str, optional """ s = "\n\nLatency Analysis Report\n-----------------------\n" for instruction_form in cp_kernel: s += ("{:4d} {} {:4.1f} {}{}{} {}".format( instruction_form["line_number"], separator, instruction_form["latency_cp"], separator, "X" if INSTR_FLAGS.LT_UNKWN in instruction_form["flags"] else " ", separator, instruction_form["line"], )) + "\n" s += ("\n{:4} {} {:4.1f}".format( " " * max([ len(str(instr_form["line_number"])) for instr_form in cp_kernel ]), " " * len(separator), sum([instr_form["latency_cp"] for instr_form in cp_kernel]), )) + "\n" return s def loopcarried_dependencies(self, dep_dict, separator="|"): """ Print a list-based LCD analysis to the terminal. :param dep_dict: dictionary with first instruction in LCD as key and the deps as value :type dep_dict: dict :separator: separator symbol for the columns, defaults to '|' :type separator: str, optional """ s = ("\n\nLoop-Carried Dependencies Analysis Report\n" + "-----------------------------------------\n") # TODO find a way to overcome padding for different tab-lengths for dep in dep_dict: s += "{:4d} {} {:4.1f} {} {:36}{} {}\n".format( dep, separator, dep_dict[dep]["latency"], separator, dep_dict[dep]["root"]["line"].strip(), separator, [ node["line_number"] for node, lat in dep_dict[dep]["dependencies"] ], ) return s def full_analysis( self, kernel, kernel_dg: KernelDG, ignore_unknown=False, arch_warning=False, length_warning=False, lcd_warning=False, verbose=False, ): """ Build the full analysis report including header, the symbol map, the combined TP/CP/LCD view and the list based LCD view. :param kernel: kernel to report on :type kernel: list :param kernel_dg: directed graph containing CP and LCD :type kernel_dg: :class:`~osaca.semantics.KernelDG` :param ignore_unknown: flag for ignore warning if performance data is missing, defaults to `False` :type ignore_unknown: boolean, optional :param arch_warning: flag for additional user warning to specify micro-arch :type arch_warning: boolean, optional :param length_warning: flag for additional user warning to specify kernel length with --lines :type length_warning: boolean, optional :param lcd_warning: flag for additional user warning due to LCD analysis timed out :type lcd_warning: boolean, optional :param verbose: flag for verbosity level, defaults to False :type verbose: boolean, optional """ return (self._header_report() + self._user_warnings_header(arch_warning, length_warning) + self._symbol_map() + self.combined_view( kernel, kernel_dg.get_critical_path(), kernel_dg.get_loopcarried_dependencies(), ignore_unknown, ) + self._user_warnings_footer(lcd_warning) + self.loopcarried_dependencies( kernel_dg.get_loopcarried_dependencies())) def combined_view(self, kernel, cp_kernel: KernelDG, dep_dict, ignore_unknown=False, show_cmnts=True): """ Build combined view of kernel including port pressure (TP), a CP column and a LCD column. :param kernel: kernel to report on :type kernel: list :param kernel_dg: directed graph containing CP and LCD :type kernel_dg: :class:`~osaca.semantics.KernelDG` :param dep_dict: dictionary with first instruction in LCD as key and the deps as value :type dep_dict: dict :param ignore_unknown: flag for showing result despite of missing instructions, defaults to `False` :type ignore_unknown: bool, optional :param show_cmnts: flag for showing comment-only lines in kernel, defaults to `True` :type show_cmnts: bool, optional """ s = "\n\nCombined Analysis Report\n------------------------\n" lineno_filler = " " port_len = self._get_max_port_len(kernel) # Separator for ports separator = "-" * sum([x + 3 for x in port_len]) + "-" # ... for line numbers separator += "--" + len(str(kernel[-1]["line_number"])) * "-" col_sep = "|" # for LCD/CP column separator += "-" * (2 * 6 + len(col_sep)) + "-" * len(col_sep) sep_list = self._get_separator_list(col_sep) headline = "Port pressure in cycles" headline_str = "{{:^{}}}".format(len(separator)) # Prepare CP/LCD variable cp_lines = [x["line_number"] for x in cp_kernel] lcd_sum = 0.0 lcd_lines = {} if dep_dict: longest_lcd = max(dep_dict, key=lambda ln: dep_dict[ln]['latency']) lcd_sum = dep_dict[longest_lcd]['latency'] lcd_lines = { instr["line_number"]: lat for instr, lat in dep_dict[longest_lcd]["dependencies"] } s += headline_str.format(headline) + "\n" s += ( (lineno_filler + self._get_port_number_line(port_len, separator=col_sep) + "{}{:^6}{}{:^6}{}".format(col_sep, "CP", col_sep, "LCD", col_sep)) + "\n" + separator + "\n") for instruction_form in kernel: if show_cmnts is False and self._is_comment(instruction_form): continue line_number = instruction_form["line_number"] used_ports = [ list(uops[1]) for uops in instruction_form["port_uops"] ] used_ports = list( set([p for uops_ports in used_ports for p in uops_ports])) s += "{:4d} {}{} {} {}\n".format( line_number, self._get_port_pressure(instruction_form["port_pressure"], port_len, used_ports, sep_list), self._get_lcd_cp_ports( instruction_form["line_number"], cp_kernel if line_number in cp_lines else None, lcd_lines.get(line_number), ), self._get_flag_symbols(instruction_form["flags"]) if instruction_form["instruction"] is not None else " ", instruction_form["line"].strip().replace("\t", " "), ) s += "\n" # check for unknown instructions and throw warning if called without --ignore-unknown if not ignore_unknown and INSTR_FLAGS.TP_UNKWN in [ flag for instr in kernel for flag in instr["flags"] ]: num_missing = len([ instr["flags"] for instr in kernel if INSTR_FLAGS.TP_UNKWN in instr["flags"] ]) s += self._missing_instruction_error(num_missing) else: # lcd_sum already calculated before tp_sum = ArchSemantics.get_throughput_sum(kernel) # if ALL instructions are unknown, take a line of 0s if not tp_sum: tp_sum = kernel[0]["port_pressure"] cp_sum = sum([x["latency_cp"] for x in cp_kernel]) s += (lineno_filler + self._get_port_pressure(tp_sum, port_len, separator=" ") + " {:^6} {:^6}\n".format(cp_sum, lcd_sum)) return s #################### # HELPER FUNCTIONS #################### def _missing_instruction_error(self, amount): """Returns the warning for if any instruction form in the analysis is missing.""" s = ( "------------------ WARNING: The performance data for {} instructions is missing." "------------------\n" " No final analysis is given. If you want to ignore this\n" " warning and run the analysis anyway, start osaca with\n" " --ignore-unknown flag.\n" "--------------------------------------------------------------------------------" "----------------{}\n").format(amount, "-" * len(str(amount))) return s def _user_warnings_header(self, arch_warning, length_warning): """Returns warning texts for giving the user more insight in what he is doing.""" dashed_line = ( "-------------------------------------------------------------------------" "------------------------\n") arch_text = ( "-------------------------- WARNING: No micro-architecture was specified " "-------------------------\n" " A default uarch for this particular ISA was used. Specify " "the uarch with --arch.\n See --help for more information.\n" + dashed_line) length_text = ( "----------------- WARNING: You are analyzing a large amount of instruction forms " "----------------\n Analysis across loops/block boundaries often do not make" " much sense.\n Specify the kernel length with --length. See --help for more " "information.\n If this is intentional, you can safely ignore this message.\n" + dashed_line) warnings = "" warnings += arch_text if arch_warning else "" warnings += length_text if length_warning else "" warnings += "\n" return warnings def _user_warnings_footer(self, lcd_warning): """Returns warning texts for giving the user more insight in what he is doing.""" dashed_line = ( "-------------------------------------------------------------------------" "------------------------\n") lcd_text = ( "-------------------------------- WARNING: LCD analysis timed out " "-------------------------------\n While searching for all dependency chains" " the analysis timed out and might be\n incomplete. Decrease the number of " "instructions or set the timeout threshold\n with --lcd-timeout. See --help" " for more information.\n" + dashed_line) warnings = "\n" warnings += lcd_text if lcd_warning else "" warnings += "\n" return warnings def _get_separator_list(self, separator, separator_2=" "): """Creates column view for seperators in the TP/combined view.""" separator_list = [] for i in range(len(self._machine_model.get_ports()) - 1): match_1 = re.search(r"\d+", self._machine_model.get_ports()[i]) match_2 = re.search(r"\d+", self._machine_model.get_ports()[i + 1]) if match_1 is not None and match_2 is not None and match_1.group( ) == match_2.group(): separator_list.append(separator_2) else: separator_list.append(separator) separator_list.append(separator) return separator_list def _get_flag_symbols(self, flag_obj): """Returns flags for a flag object of an instruction""" string_result = "" string_result += "*" if INSTR_FLAGS.NOT_BOUND in flag_obj else "" string_result += "X" if INSTR_FLAGS.TP_UNKWN in flag_obj else "" string_result += "P" if INSTR_FLAGS.HIDDEN_LD in flag_obj else "" # TODO add other flags string_result += " " if len(string_result) == 0 else "" return string_result def _get_port_pressure(self, ports, port_len, used_ports=[], separator="|"): """Returns line of port pressure for an instruction.""" if not isinstance(separator, list): separator = [separator for x in ports] string_result = "{} ".format(separator[-1]) for i in range(len(ports)): if float(ports[i]) == 0.0 and self._machine_model.get_ports( )[i] not in used_ports: string_result += port_len[i] * " " + " {} ".format( separator[i]) continue left_len = len(str(float(ports[i])).split(".")[0]) substr = "{:" + str(left_len) + "." + str( max(port_len[i] - left_len - 1, 0)) + "f}" substr = substr.format(ports[i]) string_result += (substr + " {} ".format(separator[i]) if "." in substr else "{:.1f}{} ".format(ports[i], separator[i])) return string_result[:-1] def _get_node_by_lineno(self, lineno, kernel): """Returns instruction form from kernel by its line number.""" nodes = [instr for instr in kernel if instr["line_number"] == lineno] return nodes[0] if len(nodes) > 0 else None def _get_lcd_cp_ports(self, line_number, cp_dg, dep_lat, separator="|"): """Returns the CP and LCD line for one instruction.""" lat_cp = lat_lcd = "" if cp_dg: lat_cp = float( self._get_node_by_lineno(line_number, cp_dg)["latency_cp"]) if dep_lat is not None: lat_lcd = float(dep_lat) return "{} {:>4} {} {:>4} {}".format(separator, lat_cp, separator, lat_lcd, separator) def _get_max_port_len(self, kernel): """Returns the maximal length needed to print all throughputs of the kernel.""" port_len = [4 for x in self._machine_model.get_ports()] for instruction_form in kernel: for i, port in enumerate(instruction_form["port_pressure"]): if len("{:.2f}".format(port)) > port_len[i]: port_len[i] = len("{:.2f}".format(port)) return port_len def _get_port_number_line(self, port_len, separator="|"): """Returns column view of port identificators of machine_model.""" string_result = separator separator_list = self._get_separator_list(separator, "-") for i, length in enumerate(port_len): substr = "{:^" + str(length + 2) + "s}" string_result += substr.format( self._machine_model.get_ports()[i]) + separator_list[i] return string_result def _header_report(self): """Prints header information""" version = _get_version("__init__.py") adjust = 20 header = "" header += "Open Source Architecture Code Analyzer (OSACA) - {}\n".format( version) header += "Analyzed file:".ljust(adjust) + "{}\n".format( self._filename) header += "Architecture:".ljust(adjust) + "{}\n".format( self._arch.upper()) header += "Timestamp:".ljust(adjust) + "{}\n".format( dt.utcnow().strftime("%Y-%m-%d %H:%M:%S")) return header + "\n" def _symbol_map(self): """Prints instruction flag map.""" symbol_dict = { INSTR_FLAGS.NOT_BOUND: "Instruction micro-ops not bound to a port", INSTR_FLAGS.TP_UNKWN: "No throughput/latency information for this instruction in " + "data file", INSTR_FLAGS.HIDDEN_LD: "Throughput of LOAD operation can be hidden behind a past " + "or future STORE instruction", } symbol_map = "" for flag in sorted(symbol_dict.keys()): symbol_map += " {} - {}\n".format(self._get_flag_symbols([flag]), symbol_dict[flag]) return symbol_map def _port_binding_summary(self): raise NotImplementedError
def extract_model(tree, arch, skip_mem=True): try: isa = MachineModel.get_isa_for_arch(arch) except Exception: print("Skipping...", file=sys.stderr) return None mm = MachineModel(isa=isa) parser = get_parser(isa) for instruction_tag in tree.findall('.//instruction'): ignore = False mnemonic = instruction_tag.attrib['asm'] iform = instruction_tag.attrib['iform'] # skip any mnemonic which contain spaces (e.g., "REX CRC32") if ' ' in mnemonic: continue # Extract parameter components try: parameters = extract_paramters(instruction_tag, parser, isa) if isa == 'x86': parameters.reverse() except ValueError as e: print(e, file=sys.stderr) # Extract port occupation, throughput and latency port_pressure, throughput, latency, uops = [], None, None, None arch_tag = instruction_tag.find('architecture[@name="' + arch.upper() + '"]') if arch_tag is None: continue # skip any instructions without port utilization if not any(['ports' in x.attrib for x in arch_tag.findall('measurement')]): print("Couldn't find port utilization, skip: ", iform, file=sys.stderr) continue # skip if computed and measured TP don't match if not [x.attrib['TP_ports'] == x.attrib['TP'] for x in arch_tag.findall('measurement')][ 0 ]: print( "Calculated TP from port utilization doesn't match TP, skip: ", iform, file=sys.stderr, ) continue # skip if instruction contains memory operand if skip_mem and any( [x.attrib['type'] == 'mem' for x in instruction_tag.findall('operand')] ): print("Contains memory operand, skip: ", iform, file=sys.stderr) continue # We collect all measurement and IACA information and compare them later for measurement_tag in arch_tag.iter('measurement'): if 'TP_ports' in measurement_tag.attrib: throughput = measurement_tag.attrib['TP_ports'] else: throughput = ( measurement_tag.attrib['TP'] if 'TP' in measurement_tag.attrib else None ) uops = ( int(measurement_tag.attrib['uops']) if 'uops' in measurement_tag.attrib else None ) if 'ports' in measurement_tag.attrib: port_pressure.append(port_pressure_from_tag_attributes(measurement_tag.attrib)) latencies = [ int(l_tag.attrib['cycles']) for l_tag in measurement_tag.iter('latency') if 'cycles' in l_tag.attrib ] if len(latencies) == 0: latencies = [ int(l_tag.attrib['max_cycles']) for l_tag in measurement_tag.iter('latency') if 'max_cycles' in l_tag.attrib ] if latencies[1:] != latencies[:-1]: print( "Contradicting latencies found, using smallest:", iform, latencies, file=sys.stderr, ) if latencies: latency = min(latencies) if ignore: continue # Ordered by IACA version (newest last) for iaca_tag in sorted( arch_tag.iter('IACA'), key=lambda i: StrictVersion(i.attrib['version']) ): if 'ports' in iaca_tag.attrib: port_pressure.append(port_pressure_from_tag_attributes(iaca_tag.attrib)) # Check if all are equal if port_pressure: if port_pressure[1:] != port_pressure[:-1]: print("Contradicting port occupancies, using latest IACA:", iform, file=sys.stderr) port_pressure = port_pressure[-1] else: # print("No data available for this architecture:", mnemonic, file=sys.stderr) continue # Adding Intel's 2D and 3D pipelines on Intel µarchs, without Ice Lake: if arch.upper() in intel_archs and not arch.upper() in ['ICL']: if any([p['class'] == 'memory' for p in parameters]): # We have a memory parameter, if ports 2 & 3 are present, also add 2D & 3D # TODO remove port7 on 'hsw' onward and split entries depending on addressing mode port_23 = False port_4 = False for i, pp in enumerate(port_pressure): if '2' in pp[1] and '3' in pp[1]: port_23 = True if '4' in pp[1]: port_4 = True # Add (X, ['2D', '3D']) if load ports (2 & 3) are used, but not the store port (4) # X = 2 on SNB and IVB IFF used in combination with ymm register, otherwise X = 1 if arch.upper() in ['SNB', 'IVB'] and \ any([p['class'] == 'register' and p['name'] == 'ymm' for p in parameters]): data_port_throughput = 2 else: data_port_throughput = 1 if port_23 and not port_4: port_pressure.append((data_port_throughput, ['2D', '3D'])) # Add missing ports: for ports in [pp[1] for pp in port_pressure]: for p in ports: mm.add_port(p) throughput = max(mm.average_port_pressure(port_pressure)) mm.set_instruction(mnemonic, parameters, latency, port_pressure, throughput, uops) # TODO eliminate entries which could be covered by automatic load / store expansion return mm
def test_machine_model_various_functions(self): # check dummy MachineModel creation try: MachineModel(isa="x86") MachineModel(isa="aarch64") except ValueError: self.fail() test_mm_x86 = MachineModel( path_to_yaml=self._find_file("test_db_x86.yml")) test_mm_arm = MachineModel( path_to_yaml=self._find_file("test_db_aarch64.yml")) # test get_instruction without mnemonic self.assertIsNone(test_mm_x86.get_instruction(None, [])) self.assertIsNone(test_mm_arm.get_instruction(None, [])) # test get_instruction from DB self.assertIsNone(test_mm_x86.get_instruction(None, [])) self.assertIsNone(test_mm_arm.get_instruction(None, [])) self.assertIsNone(test_mm_x86.get_instruction("NOT_IN_DB", [])) self.assertIsNone(test_mm_arm.get_instruction("NOT_IN_DB", [])) name_x86_1 = "vaddpd" operands_x86_1 = [ { "class": "register", "name": "xmm" }, { "class": "register", "name": "xmm" }, { "class": "register", "name": "xmm" }, ] instr_form_x86_1 = test_mm_x86.get_instruction(name_x86_1, operands_x86_1) self.assertEqual( instr_form_x86_1, test_mm_x86.get_instruction(name_x86_1, operands_x86_1)) self.assertEqual( test_mm_x86.get_instruction("jg", [{ "class": "identifier" }]), test_mm_x86.get_instruction("jg", [{ "class": "identifier" }]), ) name_arm_1 = "fadd" operands_arm_1 = [ { "class": "register", "prefix": "v", "shape": "s" }, { "class": "register", "prefix": "v", "shape": "s" }, { "class": "register", "prefix": "v", "shape": "s" }, ] instr_form_arm_1 = test_mm_arm.get_instruction(name_arm_1, operands_arm_1) self.assertEqual( instr_form_arm_1, test_mm_arm.get_instruction(name_arm_1, operands_arm_1)) self.assertEqual( test_mm_arm.get_instruction("b.ne", [{ "class": "identifier" }]), test_mm_arm.get_instruction("b.ne", [{ "class": "identifier" }]), ) # test full instruction name self.assertEqual( MachineModel.get_full_instruction_name(instr_form_x86_1), "vaddpd register(name:xmm),register(name:xmm),register(name:xmm)", ) self.assertEqual( MachineModel.get_full_instruction_name(instr_form_arm_1), "fadd register(prefix:v,shape:s),register(prefix:v,shape:s)," + "register(prefix:v,shape:s)", ) # test get_store_tp self.assertEqual( test_mm_x86.get_store_throughput({ "base": { "name": "x" }, "offset": None, "index": None, "scale": 1 }), [[2, "237"], [2, "4"]], ) self.assertEqual( test_mm_x86.get_store_throughput({ "base": { "prefix": "NOT_IN_DB" }, "offset": None, "index": "NOT_NONE", "scale": 1 }), [[1, "23"], [1, "4"]], ) self.assertEqual( test_mm_arm.get_store_throughput({ "base": { "prefix": "x" }, "offset": None, "index": None, "scale": 1 }), [[2, "34"], [2, "5"]], ) self.assertEqual( test_mm_arm.get_store_throughput({ "base": { "prefix": "NOT_IN_DB" }, "offset": None, "index": None, "scale": 1 }), [[1, "34"], [1, "5"]], ) # test get_store_lt self.assertEqual( test_mm_x86.get_store_latency({ "base": { "name": "x" }, "offset": None, "index": None, "scale": "1" }), 0, ) self.assertEqual( test_mm_arm.get_store_latency({ "base": { "prefix": "x" }, "offset": None, "index": None, "scale": "1" }), 0, ) # test has_hidden_load self.assertFalse(test_mm_x86.has_hidden_loads()) # test default load tp self.assertEqual( test_mm_x86.get_load_throughput({ "base": { "name": "x" }, "offset": None, "index": None, "scale": 1 }), [[1, "23"], [1, ["2D", "3D"]]], ) # test adding port test_mm_x86.add_port("dummyPort") test_mm_arm.add_port("dummyPort") # test dump of DB with open("/dev/null", "w") as dev_null: test_mm_x86.dump(stream=dev_null) test_mm_arm.dump(stream=dev_null)
def setUpClass(cls): # set up parser and kernels cls.parser_x86 = ParserX86ATT() cls.parser_AArch64 = ParserAArch64() with open(cls._find_file("kernel_x86.s")) as f: cls.code_x86 = f.read() with open(cls._find_file("kernel_x86_memdep.s")) as f: cls.code_x86_memdep = f.read() with open(cls._find_file("kernel_x86_long_LCD.s")) as f: cls.code_x86_long_LCD = f.read() with open(cls._find_file("kernel_aarch64_memdep.s")) as f: cls.code_aarch64_memdep = f.read() with open(cls._find_file("kernel_aarch64.s")) as f: cls.code_AArch64 = f.read() with open(cls._find_file("kernel_aarch64_sve.s")) as f: cls.code_AArch64_SVE = f.read() cls.kernel_x86 = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86), "x86") cls.kernel_x86_memdep = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86_memdep), "x86") cls.kernel_x86_long_LCD = reduce_to_section( cls.parser_x86.parse_file(cls.code_x86_long_LCD), "x86") cls.kernel_AArch64 = reduce_to_section( cls.parser_AArch64.parse_file(cls.code_AArch64), "aarch64") cls.kernel_aarch64_memdep = reduce_to_section( cls.parser_AArch64.parse_file(cls.code_aarch64_memdep), "aarch64") cls.kernel_aarch64_SVE = reduce_to_section( cls.parser_AArch64.parse_file(cls.code_AArch64_SVE), "aarch64") # set up machine models cls.machine_model_csx = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "csx.yml")) cls.machine_model_tx2 = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "tx2.yml")) cls.machine_model_a64fx = MachineModel( path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "a64fx.yml")) cls.semantics_x86 = ISASemantics("x86") cls.semantics_csx = ArchSemantics(cls.machine_model_csx, path_to_yaml=os.path.join( cls.MODULE_DATA_DIR, "isa/x86.yml")) cls.semantics_aarch64 = ISASemantics("aarch64") cls.semantics_tx2 = ArchSemantics( cls.machine_model_tx2, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), ) cls.semantics_a64fx = ArchSemantics( cls.machine_model_a64fx, path_to_yaml=os.path.join(cls.MODULE_DATA_DIR, "isa/aarch64.yml"), ) cls.machine_model_zen = MachineModel(arch="zen1") for i in range(len(cls.kernel_x86)): cls.semantics_csx.assign_src_dst(cls.kernel_x86[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86[i]) for i in range(len(cls.kernel_x86_memdep)): cls.semantics_csx.assign_src_dst(cls.kernel_x86_memdep[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86_memdep[i]) for i in range(len(cls.kernel_x86_long_LCD)): cls.semantics_csx.assign_src_dst(cls.kernel_x86_long_LCD[i]) cls.semantics_csx.assign_tp_lt(cls.kernel_x86_long_LCD[i]) for i in range(len(cls.kernel_AArch64)): cls.semantics_tx2.assign_src_dst(cls.kernel_AArch64[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_AArch64[i]) for i in range(len(cls.kernel_aarch64_memdep)): cls.semantics_tx2.assign_src_dst(cls.kernel_aarch64_memdep[i]) cls.semantics_tx2.assign_tp_lt(cls.kernel_aarch64_memdep[i]) for i in range(len(cls.kernel_aarch64_SVE)): cls.semantics_a64fx.assign_src_dst(cls.kernel_aarch64_SVE[i]) cls.semantics_a64fx.assign_tp_lt(cls.kernel_aarch64_SVE[i])
def test_creation_by_name(self): try: tmp_mm = MachineModel(arch='CSX') ArchSemantics(tmp_mm) except ValueError: self.fail()
def inspect(args, output_file=sys.stdout): """ Does the actual throughput and critical path analysis of OSACA and prints it to the terminal. :param args: arguments given from :class:`~argparse.ArgumentParser` after parsing :param output_file: Define the stream for output, defaults to :class:`sys.stdout` :type output_file: stream, optional """ # Read file code = args.file.read() # Detect ISA if necessary arch = args.arch if args.arch is not None else DEFAULT_ARCHS[BaseParser.detect_ISA(code)] print_arch_warning = False if args.arch else True isa = MachineModel.get_isa_for_arch(arch) verbose = args.verbose ignore_unknown = args.ignore_unknown # Parse file parser = get_asm_parser(arch) try: parsed_code = parser.parse_file(code) except: # probably the wrong parser based on heuristic if args.arch is None: # change ISA and try again arch = DEFAULT_ARCHS['x86'] if BaseParser.detect_ISA(code) == 'aarch64' else DEFAULT_ARCHS['aarch64'] isa = MachineModel.get_isa_for_arch(arch) parser = get_asm_parser(arch) parsed_code = parser.parse_file(code) else: traceback.print_exc(file=sys.stderr) sys.exit(1) # Reduce to marked kernel or chosen section and add semantics if args.lines: line_range = get_line_range(args.lines) kernel = [line for line in parsed_code if line['line_number'] in line_range] print_length_warning = False else: kernel = reduce_to_section(parsed_code, isa) # Print warning if kernel has no markers and is larger than threshold (100) print_length_warning = True if len(kernel) == len(parsed_code) and len(kernel) > 100 else False machine_model = MachineModel(arch=arch) semantics = ArchSemantics(machine_model) semantics.add_semantics(kernel) # Do optimal schedule for kernel throughput if wished if not args.fixed: semantics.assign_optimal_throughput(kernel) # Create DiGrahps kernel_graph = KernelDG(kernel, parser, machine_model) if args.dotpath is not None: kernel_graph.export_graph(args.dotpath if args.dotpath != '.' else None) # Print analysis frontend = Frontend(args.file.name, arch=arch) print( frontend.full_analysis( kernel, kernel_graph, ignore_unknown=ignore_unknown, arch_warning=print_arch_warning, length_warning=print_length_warning, verbose=verbose ), file=output_file, )
def test_machine_model_various_functions(self): # check dummy MachineModel creation try: MachineModel(isa='x86') MachineModel(isa='aarch64') except ValueError: self.fail() test_mm_x86 = MachineModel( path_to_yaml=self._find_file('test_db_x86.yml')) test_mm_arm = MachineModel( path_to_yaml=self._find_file('test_db_aarch64.yml')) # test get_instruction without mnemonic self.assertIsNone(test_mm_x86.get_instruction(None, [])) self.assertIsNone(test_mm_arm.get_instruction(None, [])) # test get_instruction from DB self.assertIsNone(test_mm_x86.get_instruction(None, [])) self.assertIsNone(test_mm_arm.get_instruction(None, [])) self.assertIsNone(test_mm_x86.get_instruction('NOT_IN_DB', [])) self.assertIsNone(test_mm_arm.get_instruction('NOT_IN_DB', [])) name_x86_1 = 'vaddpd' operands_x86_1 = [ { 'class': 'register', 'name': 'xmm' }, { 'class': 'register', 'name': 'xmm' }, { 'class': 'register', 'name': 'xmm' }, ] instr_form_x86_1 = test_mm_x86.get_instruction(name_x86_1, operands_x86_1) self.assertEqual( instr_form_x86_1, test_mm_x86.get_instruction(name_x86_1, operands_x86_1)) self.assertEqual( test_mm_x86.get_instruction('jg', [{ 'class': 'identifier' }]), test_mm_x86.get_instruction('jg', [{ 'class': 'identifier' }]), ) name_arm_1 = 'fadd' operands_arm_1 = [ { 'class': 'register', 'prefix': 'v', 'shape': 's' }, { 'class': 'register', 'prefix': 'v', 'shape': 's' }, { 'class': 'register', 'prefix': 'v', 'shape': 's' }, ] instr_form_arm_1 = test_mm_arm.get_instruction(name_arm_1, operands_arm_1) self.assertEqual( instr_form_arm_1, test_mm_arm.get_instruction(name_arm_1, operands_arm_1)) self.assertEqual( test_mm_arm.get_instruction('b.ne', [{ 'class': 'identifier' }]), test_mm_arm.get_instruction('b.ne', [{ 'class': 'identifier' }]), ) # test full instruction name self.assertEqual( MachineModel.get_full_instruction_name(instr_form_x86_1), 'vaddpd register(name:xmm),register(name:xmm),register(name:xmm)', ) self.assertEqual( MachineModel.get_full_instruction_name(instr_form_arm_1), 'fadd register(prefix:v,shape:s),register(prefix:v,shape:s),' + 'register(prefix:v,shape:s)', ) # test get_store_tp self.assertEqual( test_mm_x86.get_store_throughput({ 'base': { 'name': 'x' }, 'offset': None, 'index': None, 'scale': 1 }), [[2, '237'], [2, '4']], ) self.assertEqual( test_mm_x86.get_store_throughput({ 'base': { 'prefix': 'NOT_IN_DB' }, 'offset': None, 'index': 'NOT_NONE', 'scale': 1 }), [[1, '23'], [1, '4']], ) self.assertEqual( test_mm_arm.get_store_throughput({ 'base': { 'prefix': 'x' }, 'offset': None, 'index': None, 'scale': 1 }), [[2, '34'], [2, '5']], ) self.assertEqual( test_mm_arm.get_store_throughput({ 'base': { 'prefix': 'NOT_IN_DB' }, 'offset': None, 'index': None, 'scale': 1 }), [[1, '34'], [1, '5']], ) # test get_store_lt self.assertEqual( test_mm_x86.get_store_latency({ 'base': { 'name': 'x' }, 'offset': None, 'index': None, 'scale': '1' }), 0, ) self.assertEqual( test_mm_arm.get_store_latency({ 'base': { 'prefix': 'x' }, 'offset': None, 'index': None, 'scale': '1' }), 0, ) # test has_hidden_load self.assertFalse(test_mm_x86.has_hidden_loads()) # test default load tp self.assertEqual( test_mm_x86.get_load_throughput({ 'base': { 'name': 'x' }, 'offset': None, 'index': None, 'scale': 1 }), [[1, '23'], [1, ['2D', '3D']]], ) # test adding port test_mm_x86.add_port('dummyPort') test_mm_arm.add_port('dummyPort') # test dump of DB with open('/dev/null', 'w') as dev_null: test_mm_x86.dump(stream=dev_null) test_mm_arm.dump(stream=dev_null)
def test_add_single_entry(self): mm_csx = MachineModel('csx') mm_tx2 = MachineModel('tx2') mm_zen1 = MachineModel('zen1') num_entries_csx = len(mm_csx['instruction_forms']) num_entries_tx2 = len(mm_tx2['instruction_forms']) num_entries_zen1 = len(mm_zen1['instruction_forms']) mm_csx.set_instruction_entry(self.entry_csx) mm_tx2.set_instruction_entry(self.entry_tx2) mm_zen1.set_instruction_entry({'name': 'empty_operation'}) num_entries_csx = len(mm_csx['instruction_forms']) - num_entries_csx num_entries_tx2 = len(mm_tx2['instruction_forms']) - num_entries_tx2 num_entries_zen1 = len(mm_zen1['instruction_forms']) - num_entries_zen1 self.assertEqual(num_entries_csx, 1) self.assertEqual(num_entries_tx2, 1) self.assertEqual(num_entries_zen1, 1)