class TaskCompactReports(object): description = 'Compacting reports ...' def setup(self, options): self.__printer = Printer(options) # sanitizer.category.count self.__data = {} def process(self, report): sanitizer = report.sanitizer.name_short category = report.category_name number_orig = report.number if not sanitizer in self.__data: self.__data[sanitizer] = {} if not category in self.__data[sanitizer]: self.__data[sanitizer][category] = 0 self.__data[sanitizer][category] += 1 number_new = self.__data[sanitizer][category] if number_new != number_orig: orig_report_str = str(report) new_file_path = utils.files.report_file_path(os.path.dirname(report.file_path), number_new) os.rename(report.file_path, new_file_path) report.number = number_new report.file_path = new_file_path self.__printer.task_info('renaming: ' + orig_report_str + ' -> ' + str(report))
class TaskAnalyseReports(object): """Analyse reports and add (special) data""" description = 'Analysing reports ...' __tsan_data_race_global_location_pattern = re.compile( '^ location is global \'(?P<global_location>.+)\' of size \d', re.IGNORECASE) def setup(self, options): self.__printer = Printer(options) self.__analysing_funcs = { 'tsan': { 'data race': self.__tsan_analyse_data_race } } def process(self, report): if self.__analysing_funcs.get(report.sanitizer.name_short, {}).get(report.category_name): self.__analysing_funcs[report.sanitizer.name_short][ report.category_name](report) def __tsan_analyse_data_race(self, report): with open(report.file_path, 'r') as report_file: for line in report_file: search = self.__tsan_data_race_global_location_pattern.search( line) if search: self.__printer.task_info('found global location of ' + str(report)) report.special[ 'tsan_data_race_global_location'] = search.group( 'global_location') report_file.close()
class TaskSummary(object): """Print various stats about the reports""" def setup(self, options): self.__printer = Printer(options) # sanitizer_name.category_name.(new|old) self.__data = OrderedDict() def process(self, report): sanitizer_name = report.sanitizer.name category_name = report.category_name if not sanitizer_name in self.__data: self.__data[sanitizer_name] = OrderedDict() if not category_name in self.__data[sanitizer_name]: self.__data[sanitizer_name][category_name] = {'new': 0, 'old': 0} self.__data[sanitizer_name][category_name]['new' if report. is_new else 'old'] += 1 def teardown(self): self.__printer.task_description('Summary:') if len(self.__data) < 1: self.__printer.task_info('nothing found') else: for sanitizer_name, categories in sorted(self.__data.items(), key=lambda s: s[0]): self.__printer.just_print(' ' + sanitizer_name + ':') for category_name, count in sorted(categories.items(), key=lambda c: c[0]): new = count['new'] self.__printer.just_print(' ' + category_name + ': ' + str(count['old'] + new) + ' (' + str(new) + ' new)') self.__printer.nl()
class TaskBuildSkeleton(object): description = 'Building the skeleton ...' __root_dir_name = 'skeleton' __tsan_data_race_max_stack_depth = 3 def setup(self, options): self.__root_dir_path = os.path.join(options.output_root_path, self.__root_dir_name) utils.files.makedirs(self.__root_dir_path, True) self.__printer = Printer(options) self.__skeletons = {} self.__add_funcs = { 'tsan': { 'data race': self.__add_tsan_data_race } } def process(self, report): if report.category_name in self.__add_funcs.get(report.sanitizer.name_short, {}): self.__add_funcs[report.sanitizer.name_short][report.category_name](report) def __add(self, report, stack_frame_id, stack_frame): if stack_frame.complete: file_rel_path = stack_frame.src_file_rel_path line_num = stack_frame.line_num if not file_rel_path in self.__skeletons: self.__skeletons[file_rel_path] = Skeleton(stack_frame.src_file_path) self.__skeletons[file_rel_path].mark( stack_frame.line_num, stack_frame.char_pos, str(report) + ' - frame #' + str(stack_frame_id)) def __add_tsan_data_race(self, report): for stack in report.call_stacks: if 'tsan_data_race_type' in stack.special: for i in range(min(len(stack.frames), self.__tsan_data_race_max_stack_depth)): self.__add(report, i, stack.frames[i]) def teardown(self): for src_file_path, skeleton in self.__skeletons.items(): skeleton_file_path = os.path.join(self.__root_dir_path, src_file_path + '.skeleton') self.__printer.task_info( 'creating ' + skeleton_file_path + ' (' + str(skeleton.marked_lines_count) + ' lines)') utils.files.makedirs(os.path.dirname(skeleton_file_path)) with open(skeleton_file_path, 'w') as skeleton_file: skeleton.write(skeleton_file) skeleton_file.close()
class TaskAddTSanContext(object): description = 'Adding TSan context ...' __supported_sanitizer_name_short = 'tsan' __supported_category_names = ['data race'] __max_stack_frames = 3 # max amount of lookups from the top of the call stack def setup(self, options): self.__printer = Printer(options) def process(self, report): if report.is_new and \ report.sanitizer.name_short == self.__supported_sanitizer_name_short and \ report.category_name in self.__supported_category_names: report_file_path = report.file_path buffer_file_path = report_file_path + '.buffer' self.__printer.task_info('adding context to ' + str(report)) with open(buffer_file_path, 'w') as buffer_file: buffer_file.write('\n') for stack in report.call_stacks: if 'tsan_data_race_type' in stack.special: buffer_file.write(stack.title + '\n\n') for i in range(min(len(stack.frames), self.__max_stack_frames)): if stack.frames[i].complete: # TODO: find full function signature func_signature = stack.frames[i].func_name + '(...)' line = SourceCodeLine(stack.frames[i].src_file_path, stack.frames[i].line_num) if line.line: buffer_file.write( func_signature + ' {\n' + ' // ...\n' + '! ' + line.line + '\n' + (' ' * (stack.frames[i].char_pos - line.indent + 1)) + '^\n' + ' // ...\n' + '}\n\n') with open(report_file_path, 'r') as report_file: for line in report_file: buffer_file.write(line) report_file.close() buffer_file.close() os.remove(report_file_path) os.rename(buffer_file_path, report_file_path)
class TaskCreateBlacklist(object): """Base class for creating blacklists; can be inherited to make more sense""" __blacklists_dir_name = 'blacklists' __blacklist_file_ending = '.blacklist' def _setup(self, options, sanitizer_name_short): self.__blacklist_file_path = os.path.join( options.output_root_path, self.__blacklists_dir_name, 'clang-' + sanitizer_name_short + self.__blacklist_file_ending) # dir_name.file_name.func_name self.__printer = Printer(options) self.__data = OrderedDict() def _add_stack_frame(self, frame): if frame.complete: dir_name = frame.src_file_dir_rel_path file_name = frame.src_file_name func_name = frame.func_name if not dir_name in self.__data: self.__data[dir_name] = OrderedDict() if not file_name in self.__data[dir_name]: self.__data[dir_name][file_name] = [] if not func_name in self.__data[dir_name][file_name]: self.__data[dir_name][file_name].append(func_name) self.__printer.task_info('adding ' + func_name + ' (' + frame.src_file_rel_path + ')') def teardown(self): self.__printer.task_info('creating ' + self.__blacklist_file_path) utils.files.makedirs(os.path.dirname(self.__blacklist_file_path)) with open(self.__blacklist_file_path, 'w') as blacklist_file: for dir_name, files in sorted(self.__data.items(), key=lambda d: d[0]): blacklist_file.write( '# --------------------------------------------------------------------------- #\n' + '# ' + dir_name + (' ' * (74 - len(dir_name)))+ '#\n' + '# --------------------------------------------------------------------------- #\n\n') for file_name, func_names in sorted(files.items(), key=lambda f: f[0]): blacklist_file.write('# ' + file_name + ' #\n\n') for func_name in sorted(func_names): blacklist_file.write('fun:' + func_name + '\n') blacklist_file.write('\n') blacklist_file.close()
class TaskEliminateDuplicateReports(object): description = 'Eliminating duplicate reports ...' __tsan_data_race_max_stack_frames = 3 def __init__(self, bank): self.__bank = bank def setup(self, options): self.__printer = Printer(options) self.__duplicate_reports = [] self.__identifiers_funcs = { 'tsan': { 'data race': self.__tsan_data_race_identifiers, 'thread leak': self.__tsan_thread_leak_identifiers } } # TODO: split into separate lists for sanitizers and categories for better performance self.__known_identifiers = [] def process(self, report): if not self.__identifiers_funcs.get(report.sanitizer.name_short, {}).get(report.category_name): self.__printer.bailout('unable to analyse ' + str(report)) identifiers = self.__identifiers_funcs[report.sanitizer.name_short][ report.category_name](report) if not identifiers: self.__printer.bailout('unable to extract identifiers from ' + str(report)) for identifier in identifiers: if identifier in self.__known_identifiers: self.__printer.task_info('removing ' + str(report)) self.__duplicate_reports.append(report) return self.__known_identifiers.extend(identifiers) def teardown(self): for report in self.__duplicate_reports: self.__bank.remove_report(report) def __tsan_data_race_identifiers(self, report): fragments = [] for stack in report.call_stacks: if 'tsan_data_race_type' in stack.special: fragment = [ stack.special.get('tsan_data_race_type'), stack.special.get('tsan_data_race_bytes') ] for i in range( min(len(stack.frames), self.__tsan_data_race_max_stack_frames)): fragment.extend([ stack.frames[i].src_file_rel_path, stack.frames[i].func_name, stack.frames[i].line_num, stack.frames[i].char_pos ]) fragments.append(':'.join( ['?' if not f else str(f) for f in fragment])) if len(fragments) == 1: return fragments if len(fragments) == 2: # either way is fine! return [ fragments[0] + ':' + fragments[1], fragments[1] + ':' + fragments[0] ] def __tsan_thread_leak_identifiers(self, report): for stack in report.call_stacks: if stack.special.get('tsan_thread_leak_thread_name'): return [stack.special['tsan_thread_leak_thread_name']]
class ReportExtractor(object): """Base extractor class; has to be inherited in order to make sense""" __reports_dir_name = 'reports' __report_file_name_pattern = re.compile('^(?P<number>\d{5})\.report$', re.IGNORECASE) def __init__(self, options, sanitizer): self.__options = options self.__sanitizer = sanitizer self.__printer = Printer(options) # category.number self.__counters = {} self.__reports_dir_base_path = os.path.join(options.output_root_path, self.__reports_dir_name, sanitizer.name_short) self.__report_file = None self.__reports = [] @property def reports(self): """Get reports and remove them from the extractor""" r = self.__reports self.__reports = [] return r def _extract_start(self, report): utils.files.makedirs(os.path.dirname(report.file_path)) self.__report_file = open(report.file_path, 'w') def _extract_continue(self, line): if not self.__report_file: return False self.__report_file.write(line) return True def _extract_end(self): self.__report_file.close() self.__report_file = None def __get_category_dir_path(self, category_name): return os.path.join(self.__reports_dir_base_path, category_name.lower().replace(' ', '-')) def __get_report_file_path(self, category_name, no): return utils.files.report_file_path( self.__get_category_dir_path(category_name), no) def _make_and_add_report(self, is_new, category_name, number=None): if number == None: if category_name in self.__counters: self.__counters[category_name] += 1 else: self.__counters[category_name] = 1 number = self.__counters[category_name] elif self.__counters.get(category_name, -1) < number: self.__counters[category_name] = number report = Report(self.__options, is_new, self.__sanitizer, category_name, number, self.__get_report_file_path(category_name, number)) self.__printer.task_info('adding ' + str(report)) self.__reports.append(report) return report def _collect_reports(self, category): dir_path = self.__get_category_dir_path(category) if os.path.isdir(dir_path): for file_name in sorted(os.listdir(dir_path)): search = self.__report_file_name_pattern.search(file_name) if search: self._make_and_add_report(False, category, int(search.group('number')))
class TaskCreateCsvSummaries(object): """Summarise the reports and collect the info in CSV files""" description = 'Summarising the reports ...' __csv_base_dir_name = 'summaries' __csv_file_ending = '.csv' __csv_delimiter = ',' __tsan_data_race_expected_funcs_per_report = 2 def setup(self, options): self.__printer = Printer(options) self.__csv_base_dir_path = os.path.join(options.output_root_path, self.__csv_base_dir_name) utils.files.makedirs(self.__csv_base_dir_path) self.__controls = { 'tsan': { 'data race': { 'header_func': self.__header_tsan_data_race, 'process_func': self.__process_tsan_data_race } } } def process(self, report): sanitizer_name_short = report.sanitizer.name_short category_name = report.category_name controls = self.__controls.get(sanitizer_name_short, {}).get(category_name) if controls: if not 'csv' in controls: csv_file_path = os.path.join( self.__csv_base_dir_path, sanitizer_name_short + '-' + category_name.lower().replace(' ', '-') + self.__csv_file_ending) csv_file = open(csv_file_path, 'w') if csv_file: self.__printer.task_info('creating ' + csv_file_path) controls['csv'] = { 'file': csv_file, 'writer': csv.writer(csv_file, quoting=csv.QUOTE_MINIMAL) } if 'header_func' in controls: self.__write_row(sanitizer_name_short, category_name, controls['header_func']()) if 'process_func' in controls: self.__write_row(sanitizer_name_short, category_name, controls['process_func'](report)) def teardown(self): for categories in self.__controls.values(): for controls in categories.values(): controls['csv']['file'].close() def __write_row(self, sanitizer_name_short, category_name, row): csv = self.__controls.get(sanitizer_name_short, {}).get(category_name, {}).get('csv') if csv and row: csv['writer'].writerow([str(cell) for cell in row]) def __header_tsan_data_race(self): field_names = ['folder', 'file', 'function', 'op', 'size'] return ['id'] + field_names + list( reversed(field_names)) + ['global location'] def __process_tsan_data_race(self, report): row = [report.number] func_count = 0 for stack in report.call_stacks: if 'tsan_data_race_type' in stack.special and len( stack.frames) > 0: func_count += 1 details = [ stack.frames[0].src_file_dir_rel_path, stack.frames[0].src_file_name, stack.frames[0].func_name, stack.special.get('tsan_data_race_type'), stack.special.get('tsan_data_race_bytes') ] # reversing the 2nd function improves the readability of the file row.extend( list(reversed(details) if func_count == 2 else details)) for i in range(func_count, self.__tsan_data_race_expected_funcs_per_report): row.extend([None, None, None, None, None]) row = ['?' if not cell else cell for cell in row] row.append(report.special.get('tsan_data_race_global_location', '')) return row