def __init__(self, cov, code_unit): self.coverage = cov self.code_unit = code_unit self.filename = self.code_unit.filename actual_filename, source = self.find_source(self.filename) self.parser = CodeParser( text=source, filename=actual_filename, exclude=self.coverage._exclude_regex('exclude')) self.statements, self.excluded = self.parser.parse_source() executed = self.coverage.data.executed_lines(self.filename) exec1 = self.parser.first_lines(executed) self.missing = sorted(set(self.statements) - set(exec1)) if self.coverage.data.has_arcs(): self.no_branch = self.parser.lines_matching( join_regex(self.coverage.config.partial_list), join_regex(self.coverage.config.partial_always_list)) n_branches = self.total_branches() mba = self.missing_branch_arcs() n_partial_branches = sum( [len(v) for k, v in iitems(mba) if k not in self.missing]) n_missing_branches = sum([len(v) for k, v in iitems(mba)]) else: n_branches = n_partial_branches = n_missing_branches = 0 self.no_branch = set() self.numbers = Numbers(n_files=1, n_statements=len(self.statements), n_excluded=len(self.excluded), n_missing=len(self.missing), n_branches=n_branches, n_partial_branches=n_partial_branches, n_missing_branches=n_missing_branches)
def __init__(self, text=None, filename=None, exclude=None): """ Source can be provided as `text`, the text itself, or `filename`, from which the text will be read. Excluded lines are those that match `exclude`, a regex. """ self.filename = filename or '<code>' self.text = text if not self.text: try: sourcef = open_source(self.filename) try: self.text = sourcef.read() finally: sourcef.close() except IOError: _, err, _ = sys.exc_info() raise NoSource("No source for code: '%s': %s" % (self.filename, err)) if self.text and ord(self.text[0]) == 65279: self.text = self.text[1:] self.exclude = exclude self.show_tokens = False self.lines = self.text.split('\n') self.excluded = set() self.docstrings = set() self.classdefs = set() self.multiline = {} self.statement_starts = set() self._byte_parser = None
def __init__(self, text = None, filename = None, exclude = None): self.filename = filename or '<code>' self.text = text if not self.text: try: sourcef = open_source(self.filename) try: self.text = sourcef.read() finally: sourcef.close() except IOError: _, err, _ = sys.exc_info() raise NoSource("No source for code: '%s': %s" % (self.filename, err)) if self.text and ord(self.text[0]) == 65279: self.text = self.text[1:] self.exclude = exclude self.show_tokens = False self.lines = self.text.split('\n') self.excluded = set() self.docstrings = set() self.classdefs = set() self.multiline = {} self.statement_starts = set() self._byte_parser = None
def __init__(self, text=None, filename=None, exclude=None): self.filename = filename or '<code>' self.text = text if not self.text: try: sourcef = open_source(self.filename) try: self.text = sourcef.read() finally: sourcef.close() except IOError: _, err, _ = sys.exc_info() raise NoSource("No source for code: '%s': %s" % (self.filename, err)) if self.text and ord(self.text[0]) == 65279: self.text = self.text[1:] self.exclude = exclude self.show_tokens = False self.lines = self.text.split('\n') self.excluded = set() self.docstrings = set() self.classdefs = set() self.multiline = {} self.statement_starts = set() self._byte_parser = None
def __init__(self, text=None, filename=None, exclude=None): """ Source can be provided as `text`, the text itself, or `filename`, from which the text will be read. Excluded lines are those that match `exclude`, a regex. """ assert text or filename, "CodeParser needs either text or filename" self.filename = filename or "<code>" self.text = text if not self.text: try: sourcef = open_source(self.filename) try: self.text = sourcef.read() finally: sourcef.close() except IOError: _, err, _ = sys.exc_info() raise NoSource( "No source for code: '%s': %s" % (self.filename, err) ) # Scrap the BOM if it exists. if self.text and ord(self.text[0]) == 0xfeff: self.text = self.text[1:] self.exclude = exclude self.show_tokens = False # The text lines of the parsed code. self.lines = self.text.split('\n') # The line numbers of excluded lines of code. self.excluded = set() # The line numbers of docstring lines. self.docstrings = set() # The line numbers of class definitions. self.classdefs = set() # A dict mapping line numbers to (lo,hi) for multi-line statements. self.multiline = {} # The line numbers that start statements. self.statement_starts = set() # Lazily-created ByteParser self._byte_parser = None
def test_assert_same_elements(self): self.assertSameElements(set(), set()) self.assertSameElements(set([1, 2, 3]), set([3, 1, 2])) self.assertRaises(AssertionError, self.assertSameElements, set([1, 2, 3]), set()) self.assertRaises(AssertionError, self.assertSameElements, set([1, 2, 3]), set([4, 5, 6]))
def __init__(self, cov, code_unit): self.coverage = cov self.code_unit = code_unit self.filename = self.code_unit.filename ext = os.path.splitext(self.filename)[1] source = None if ext == '.py': if not os.path.exists(self.filename): source = self.coverage.file_locator.get_zip_data(self.filename) if not source: raise NoSource("No source for code: '%s'" % self.filename) self.parser = CodeParser( text=source, filename=self.filename, exclude=self.coverage._exclude_regex('exclude') ) self.statements, self.excluded = self.parser.parse_source() # Identify missing statements. executed = self.coverage.data.executed_lines(self.filename) exec1 = self.parser.first_lines(executed) self.missing = sorted(set(self.statements) - set(exec1)) if self.coverage.data.has_arcs(): self.no_branch = self.parser.lines_matching( join_regex(self.coverage.config.partial_list), join_regex(self.coverage.config.partial_always_list) ) n_branches = self.total_branches() mba = self.missing_branch_arcs() n_partial_branches = sum( [len(v) for k,v in iitems(mba) if k not in self.missing] ) n_missing_branches = sum([len(v) for k,v in iitems(mba)]) else: n_branches = n_partial_branches = n_missing_branches = 0 self.no_branch = set() self.numbers = Numbers( n_files=1, n_statements=len(self.statements), n_excluded=len(self.excluded), n_missing=len(self.missing), n_branches=n_branches, n_partial_branches=n_partial_branches, n_missing_branches=n_missing_branches, )
def _arcs(self): chunks = self._split_into_chunks() byte_chunks = dict([ (c.byte, c) for c in chunks ]) yield (-1, byte_chunks[0].line) for chunk in chunks: if not chunk.first: continue chunks_considered = set() chunks_to_consider = [chunk] while chunks_to_consider: this_chunk = chunks_to_consider.pop() chunks_considered.add(this_chunk) for ex in this_chunk.exits: if ex < 0: yield (chunk.line, ex) else: next_chunk = byte_chunks[ex] if next_chunk in chunks_considered: continue backward_jump = next_chunk.byte < this_chunk.byte if next_chunk.first or backward_jump: if next_chunk.line != chunk.line: yield (chunk.line, next_chunk.line) else: chunks_to_consider.append(next_chunk)
def _arcs(self): """Find the executable arcs in the code. Yields pairs: (from,to). From and to are integer line numbers. If from is < 0, then the arc is an entrance into the code object. If to is < 0, the arc is an exit from the code object. """ chunks = self._split_into_chunks() byte_chunks = dict([(c.byte, c) for c in chunks]) yield (-1, byte_chunks[0].line) for chunk in chunks: if not chunk.first: continue chunks_considered = set() chunks_to_consider = [chunk] while chunks_to_consider: this_chunk = chunks_to_consider.pop() chunks_considered.add(this_chunk) for ex in this_chunk.exits: if ex < 0: yield (chunk.line, ex) else: next_chunk = byte_chunks[ex] if next_chunk in chunks_considered: continue backward_jump = next_chunk.byte < this_chunk.byte if next_chunk.first or backward_jump: if next_chunk.line != chunk.line: yield (chunk.line, next_chunk.line) else: chunks_to_consider.append(next_chunk)
def _arcs(self): chunks = self._split_into_chunks() byte_chunks = dict([(c.byte, c) for c in chunks]) yield (-1, byte_chunks[0].line) for chunk in chunks: if not chunk.first: continue chunks_considered = set() chunks_to_consider = [chunk] while chunks_to_consider: this_chunk = chunks_to_consider.pop() chunks_considered.add(this_chunk) for ex in this_chunk.exits: if ex < 0: yield (chunk.line, ex) else: next_chunk = byte_chunks[ex] if next_chunk in chunks_considered: continue backward_jump = next_chunk.byte < this_chunk.byte if next_chunk.first or backward_jump: if next_chunk.line != chunk.line: yield (chunk.line, next_chunk.line) else: chunks_to_consider.append(next_chunk)
def test_assert_same_elements(self): self.assertSameElements(set(), set()) self.assertSameElements(set([1,2,3]), set([3,1,2])) self.assertRaises(AssertionError, self.assertSameElements, set([1,2,3]), set() ) self.assertRaises(AssertionError, self.assertSameElements, set([1,2,3]), set([4,5,6]) )
def lines_matching(self, *regexes): regex_c = re.compile(join_regex(regexes)) matches = set() for i, ltext in enumerate(self.lines): if regex_c.search(ltext): matches.add(i + 1) return matches
def __init__(self, cov, code_unit): self.coverage = cov self.code_unit = code_unit self.filename = self.code_unit.filename ext = os.path.splitext(self.filename)[1] source = None if ext == '.py': if not os.path.exists(self.filename): source = self.coverage.file_locator.get_zip_data(self.filename) if not source: raise NoSource("No source for code: '%s'" % self.filename) self.parser = CodeParser( text=source, filename=self.filename, exclude=self.coverage._exclude_regex('exclude')) self.statements, self.excluded = self.parser.parse_source() # Identify missing statements. executed = self.coverage.data.executed_lines(self.filename) exec1 = self.parser.first_lines(executed) self.missing = sorted(set(self.statements) - set(exec1)) if self.coverage.data.has_arcs(): self.no_branch = self.parser.lines_matching( join_regex(self.coverage.config.partial_list), join_regex(self.coverage.config.partial_always_list)) n_branches = self.total_branches() mba = self.missing_branch_arcs() n_partial_branches = sum( [len(v) for k, v in iitems(mba) if k not in self.missing]) n_missing_branches = sum([len(v) for k, v in iitems(mba)]) else: n_branches = n_partial_branches = n_missing_branches = 0 self.no_branch = set() self.numbers = Numbers( n_files=1, n_statements=len(self.statements), n_excluded=len(self.excluded), n_missing=len(self.missing), n_branches=n_branches, n_partial_branches=n_partial_branches, n_missing_branches=n_missing_branches, )
def _opcode_set(*names): s = set() for name in names: try: s.add(_opcode(name)) except KeyError: pass return s
def _opcode_set(*names): """Return a set of opcodes by the names in `names`.""" s = set() for name in names: try: s.add(_opcode(name)) except KeyError: pass return s
def __init__(self, cov, code_unit): self.coverage = cov self.code_unit = code_unit self.filename = self.code_unit.filename actual_filename, source = self.find_source(self.filename) self.parser = CodeParser( text=source, filename=actual_filename, exclude=self.coverage._exclude_regex('exclude') ) self.statements, self.excluded = self.parser.parse_source() # Identify missing statements. executed = self.coverage.data.executed_lines(self.filename) exec1 = self.parser.first_lines(executed) self.missing = sorted(set(self.statements) - set(exec1)) if self.coverage.data.has_arcs(): self.no_branch = self.parser.lines_matching( join_regex(self.coverage.config.partial_list), join_regex(self.coverage.config.partial_always_list) ) n_branches = self.total_branches() mba = self.missing_branch_arcs() n_partial_branches = sum( [len(v) for k,v in iitems(mba) if k not in self.missing] ) n_missing_branches = sum([len(v) for k,v in iitems(mba)]) else: n_branches = n_partial_branches = n_missing_branches = 0 self.no_branch = set() self.numbers = Numbers( n_files=1, n_statements=len(self.statements), n_excluded=len(self.excluded), n_missing=len(self.missing), n_branches=n_branches, n_partial_branches=n_partial_branches, n_missing_branches=n_missing_branches, )
def missing_branch_arcs(self): missing = self.arcs_missing() branch_lines = set(self.branch_lines()) mba = {} for l1, l2 in missing: if l1 in branch_lines: if l1 not in mba: mba[l1] = [] mba[l1].append(l2) return mba
def _all_arcs(self): """Get the set of all arcs in this code object and its children. See `_arcs` for details. """ arcs = set() for bp in self.child_parsers(): arcs.update(bp._arcs()) return arcs
def first_lines(self, lines, *ignores): """Map the line numbers in `lines` to the correct first line of the statement. Skip any line mentioned in any of the sequences in `ignores`. Returns a set of the first lines. """ ignore = set() for ign in ignores: ignore.update(ign) lset = set() for l in lines: if l in ignore: continue new_l = self.first_line(l) if new_l not in ignore: lset.add(new_l) return lset
def first_lines(self, lines, ignore=None): ignore = ignore or [] lset = set() for l in lines: if l in ignore: continue new_l = self.first_line(l) if new_l not in ignore: lset.add(new_l) return sorted(lset)
def first_lines(self, lines, ignore = None): ignore = ignore or [] lset = set() for l in lines: if l in ignore: continue new_l = self.first_line(l) if new_l not in ignore: lset.add(new_l) return sorted(lset)
def __init__(self, cov, code_unit): self.coverage = cov self.code_unit = code_unit self.filename = self.code_unit.filename ext = os.path.splitext(self.filename)[1] source = None if ext == '.py': if not os.path.exists(self.filename): source = self.coverage.file_locator.get_zip_data(self.filename) if not source: raise NoSource("No source for code: %r" % self.filename) self.parser = CodeParser( text=source, filename=self.filename, exclude=self.coverage.exclude_re ) self.statements, self.excluded = self.parser.parse_source() # Identify missing statements. executed = self.coverage.data.executed_lines(self.filename) exec1 = self.parser.first_lines(executed) self.missing = sorted(set(self.statements) - set(exec1)) if self.coverage.data.has_arcs(): n_branches = self.total_branches() mba = self.missing_branch_arcs() n_missing_branches = sum([len(v) for v in mba.values()]) else: n_branches = n_missing_branches = 0 self.numbers = Numbers( n_files=1, n_statements=len(self.statements), n_excluded=len(self.excluded), n_missing=len(self.missing), n_branches=n_branches, n_missing_branches=n_missing_branches, )
def _find_statements(self): """Find the statements in `self.code`. Return a set of line numbers that start statements. Recurses into all code objects reachable from `self.code`. """ stmts = set() for bp in self.child_parsers(): # Get all of the lineno information from this code. for _, l in bp._bytes_lines(): stmts.add(l) return stmts
def test_find_python_files(self): self.make_file("sub/a.py") self.make_file("sub/b.py") self.make_file("sub/x.c") # nope: not .py self.make_file("sub/ssub/__init__.py") self.make_file("sub/ssub/s.py") self.make_file("sub/ssub/~s.py") # nope: editor effluvia self.make_file("sub/lab/exp.py") # nope: no __init__.py py_files = set(find_python_files("sub")) self.assert_same_files(py_files, [ "sub/a.py", "sub/b.py", "sub/ssub/__init__.py", "sub/ssub/s.py", ])
def __init__(self, cov, code_unit): self.coverage = cov self.code_unit = code_unit self.filename = self.code_unit.filename ext = os.path.splitext(self.filename)[1] source = None if ext == '.py': if not os.path.exists(self.filename): source = self.coverage.file_locator.get_zip_data(self.filename) if not source: raise NoSource("No source for code: %r" % self.filename) self.parser = CodeParser(text=source, filename=self.filename, exclude=self.coverage.exclude_re) self.statements, self.excluded = self.parser.parse_source() # Identify missing statements. executed = self.coverage.data.executed_lines(self.filename) exec1 = self.parser.first_lines(executed) self.missing = sorted(set(self.statements) - set(exec1)) if self.coverage.data.has_arcs(): n_branches = self.total_branches() mba = self.missing_branch_arcs() n_missing_branches = sum([len(v) for v in mba.values()]) else: n_branches = n_missing_branches = 0 self.numbers = Numbers( n_files=1, n_statements=len(self.statements), n_excluded=len(self.excluded), n_missing=len(self.missing), n_branches=n_branches, n_missing_branches=n_missing_branches, )
def _arcs(self): """Find the executable arcs in the code. Yields pairs: (from,to). From and to are integer line numbers. If from is < 0, then the arc is an entrance into the code object. If to is < 0, the arc is an exit from the code object. """ chunks = self._split_into_chunks() # A map from byte offsets to chunks jumped into. byte_chunks = dict([(c.byte, c) for c in chunks]) # There's always an entrance at the first chunk. yield (-1, byte_chunks[0].line) # Traverse from the first chunk in each line, and yield arcs where # the trace function will be invoked. for chunk in chunks: if not chunk.first: continue chunks_considered = set() chunks_to_consider = [chunk] while chunks_to_consider: # Get the chunk we're considering, and make sure we don't # consider it again this_chunk = chunks_to_consider.pop() chunks_considered.add(this_chunk) # For each exit, add the line number if the trace function # would be triggered, or add the chunk to those being # considered if not. for ex in this_chunk.exits: if ex < 0: yield (chunk.line, ex) else: next_chunk = byte_chunks[ex] if next_chunk in chunks_considered: continue # The trace function is invoked if visiting the first # bytecode in a line, or if the transition is a # backward jump. backward_jump = next_chunk.byte < this_chunk.byte if next_chunk.first or backward_jump: if next_chunk.line != chunk.line: yield (chunk.line, next_chunk.line) else: chunks_to_consider.append(next_chunk)
def lines_matching(self, *regexes): """Find the lines matching one of a list of regexes. Returns a set of line numbers, the lines that contain a match for one of the regexes in `regexes`. The entire line needn't match, just a part of it. """ regex_c = re.compile(join_regex(regexes)) matches = set() for i, ltext in enumerate(self.lines): if regex_c.search(ltext): matches.add(i+1) return matches
def lines_matching(self, *regexes): """Find the lines matching one of a list of regexes. Returns a set of line numbers, the lines that contain a match for one of the regexes in `regexes`. The entire line needn't match, just a part of it. """ regex_c = re.compile(join_regex(regexes)) matches = set() for i, ltext in enumerate(self.lines): if regex_c.search(ltext): matches.add(i + 1) return matches
def missing_branch_arcs(self): """Return arcs that weren't executed from branch lines. Returns {l1:[l2a,l2b,...], ...} """ missing = self.arcs_missing() branch_lines = set(self.branch_lines()) mba = {} for l1, l2 in missing: if l1 in branch_lines: if l1 not in mba: mba[l1] = [] mba[l1].append(l2) return mba
def source_token_lines(source): """Generate a series of lines, one for each line in `source`. Each line is a list of pairs, each pair is a token:: [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ] Each pair has a token class, and the token text. If you concatenate all the token texts, and then join them with newlines, you should have your original `source` back, with two differences: trailing whitespace is not preserved, and a final line with no newline is indistinguishable from a final line with a newline. """ ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]) line = [] col = 0 source = source.expandtabs(8).replace('\r\n', '\n') tokgen = generate_tokens(source) for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): mark_start = True for part in re.split('(\n)', ttext): if part == '\n': yield line line = [] col = 0 mark_end = False elif part == '': mark_end = False elif ttype in ws_tokens: mark_end = False else: if mark_start and scol > col: line.append(("ws", " " * (scol - col))) mark_start = False tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3] if ttype == token.NAME and keyword.iskeyword(ttext): tok_class = "key" line.append((tok_class, part)) mark_end = True scol = 0 if mark_end: col = ecol if line: yield line
def test_should_trace_cache(self): # The tracers should only invoke should_trace once for each file name. # TODO: Might be better to do this with a mocked _should_trace, # rather than by examining debug output. # Make some files that invoke each other. self.make_file( "f1.py", """\ def f1(x, f): return f(x) """) self.make_file( "f2.py", """\ import f1 def func(x): return f1.f1(x, otherfunc) def otherfunc(x): return x*x for i in range(10): func(i) """) # Trace one file, but not the other, and get the debug output. debug_out = StringIO() cov = coverage.coverage(include=["f1.py"], debug=['trace'], debug_file=debug_out) # Import the python file, executing it. self.start_import_stop(cov, "f2") # Grab all the filenames mentioned in debug output, there should be no # duplicates. trace_lines = [ l for l in debug_out.getvalue().splitlines() if l.startswith("Tracing ") or l.startswith("Not tracing ") ] filenames = [re.search(r"'[^']+'", l).group() for l in trace_lines] self.assertEqual(len(filenames), len(set(filenames))) # Double-check that the tracing messages are in there somewhere. self.assertGreater(len(filenames), 5)
def first_lines(self, lines, ignore=None): """Map the line numbers in `lines` to the correct first line of the statement. Skip any line mentioned in `ignore`. Returns a sorted list of the first lines. """ ignore = ignore or [] lset = set() for l in lines: if l in ignore: continue new_l = self.first_line(l) if new_l not in ignore: lset.add(new_l) return sorted(lset)
def test_should_trace_cache(self): # The tracers should only invoke should_trace once for each file name. # TODO: Might be better to do this with a mocked _should_trace, # rather than by examining debug output. # Make some files that invoke each other. self.make_file("f1.py", """\ def f1(x, f): return f(x) """) self.make_file("f2.py", """\ import f1 def func(x): return f1.f1(x, otherfunc) def otherfunc(x): return x*x for i in range(10): func(i) """) # Trace one file, but not the other, and get the debug output. debug_out = StringIO() cov = coverage.coverage( include=["f1.py"], debug=['trace'], debug_file=debug_out ) # Import the python file, executing it. self.start_import_stop(cov, "f2") # Grab all the filenames mentioned in debug output, there should be no # duplicates. trace_lines = [ l for l in debug_out.getvalue().splitlines() if l.startswith("Tracing ") or l.startswith("Not tracing ") ] filenames = [re.search(r"'[^']+'", l).group() for l in trace_lines] self.assertEqual(len(filenames), len(set(filenames))) # Double-check that the tracing messages are in there somewhere. self.assertGreater(len(filenames), 5)
def __init__(self, byte, line, first): self.byte = byte self.line = line self.first = first self.length = 0 self.exits = set()
def __init__(self, byte, line=0): self.byte = byte self.line = line self.length = 0 self.exits = set()
def _arcs(self): """Find the executable arcs in the code. Returns a set of pairs, (from,to). From and to are integer line numbers. If from is < 0, then the arc is an entrance into the code object. If to is < 0, the arc is an exit from the code object. """ chunks = self._split_into_chunks() # A map from byte offsets to chunks jumped into. byte_chunks = dict([(c.byte, c) for c in chunks]) # Build a map from byte offsets to actual lines reached. byte_lines = {} bytes_to_add = set([c.byte for c in chunks]) while bytes_to_add: byte_to_add = bytes_to_add.pop() if byte_to_add in byte_lines or byte_to_add < 0: continue # Which lines does this chunk lead to? bytes_considered = set() bytes_to_consider = [byte_to_add] lines = set() while bytes_to_consider: byte = bytes_to_consider.pop() bytes_considered.add(byte) # Find chunk for byte try: ch = byte_chunks[byte] except KeyError: for ch in chunks: if ch.byte <= byte < ch.byte+ch.length: break else: # No chunk for this byte! raise Exception("Couldn't find chunk @ %d" % byte) byte_chunks[byte] = ch # pylint: disable=W0631 if ch.line: lines.add(ch.line) else: for ex in ch.exits: if ex < 0: lines.add(ex) elif ex not in bytes_considered: bytes_to_consider.append(ex) bytes_to_add.update(ch.exits) byte_lines[byte_to_add] = lines # Figure out for each chunk where the exits go. arcs = set() for chunk in chunks: if chunk.line: for ex in chunk.exits: if ex < 0: exit_lines = [ex] else: exit_lines = byte_lines[ex] for exit_line in exit_lines: if chunk.line != exit_line: arcs.add((chunk.line, exit_line)) for line in byte_lines[0]: arcs.add((-1, line)) return arcs
def _arcs(self): """Find the executable arcs in the code. Returns a set of pairs, (from,to). From and to are integer line numbers. If from is < 0, then the arc is an entrance into the code object. If to is < 0, the arc is an exit from the code object. """ chunks = self._split_into_chunks() # A map from byte offsets to chunks jumped into. byte_chunks = dict([(c.byte, c) for c in chunks]) # Build a map from byte offsets to actual lines reached. byte_lines = {} bytes_to_add = set([c.byte for c in chunks]) while bytes_to_add: byte_to_add = bytes_to_add.pop() if byte_to_add in byte_lines or byte_to_add < 0: continue # Which lines does this chunk lead to? bytes_considered = set() bytes_to_consider = [byte_to_add] lines = set() while bytes_to_consider: byte = bytes_to_consider.pop() bytes_considered.add(byte) # Find chunk for byte try: ch = byte_chunks[byte] except KeyError: for ch in chunks: if ch.byte <= byte < ch.byte + ch.length: break else: # No chunk for this byte! raise Exception("Couldn't find chunk @ %d" % byte) byte_chunks[byte] = ch if ch.line: lines.add(ch.line) else: for ex in ch.exits: if ex < 0: lines.add(ex) elif ex not in bytes_considered: bytes_to_consider.append(ex) bytes_to_add.update(ch.exits) byte_lines[byte_to_add] = lines # Figure out for each chunk where the exits go. arcs = set() for chunk in chunks: if chunk.line: for ex in chunk.exits: if ex < 0: exit_lines = [ex] else: exit_lines = byte_lines[ex] for exit_line in exit_lines: if chunk.line != exit_line: arcs.add((chunk.line, exit_line)) for line in byte_lines[0]: arcs.add((-1, line)) return arcs
def assertSameElements(self, s1, s2): """Assert that the two arguments are equal as sets.""" self.assertEqual(set(s1), set(s2))
def validate_chunks(self, chunks): """Validate the rule that chunks have a single entrance.""" # starts is the entrances to the chunks starts = set([ch.byte for ch in chunks]) for ch in chunks: assert all([(ex in starts or ex < 0) for ex in ch.exits])
def _opcode_set(*names): """Return a set of opcodes by the names in `names`.""" return set([_opcode(name) for name in names])
def _split_into_chunks(self): """Split the code object into a list of `Chunk` objects. Each chunk is only entered at its first instruction, though there can be many exits from a chunk. Returns a list of `Chunk` objects. """ # The list of chunks so far, and the one we're working on. chunks = [] chunk = None # A dict mapping byte offsets of line starts to the line numbers. bytes_lines_map = dict(self._bytes_lines()) # The block stack: loops and try blocks get pushed here for the # implicit jumps that can occur. # Each entry is a tuple: (block type, destination) block_stack = [] # Some op codes are followed by branches that should be ignored. This # is a count of how many ignores are left. ignore_branch = 0 # We have to handle the last two bytecodes specially. ult = penult = None # Get a set of all of the jump-to points. jump_to = set() bytecodes = list(ByteCodes(self.code.co_code)) for bc in bytecodes: if bc.jump_to >= 0: jump_to.add(bc.jump_to) chunk_lineno = 0 # Walk the byte codes building chunks. for bc in bytecodes: # Maybe have to start a new chunk start_new_chunk = False first_chunk = False if bc.offset in bytes_lines_map: # Start a new chunk for each source line number. start_new_chunk = True chunk_lineno = bytes_lines_map[bc.offset] first_chunk = True elif bc.offset in jump_to: # To make chunks have a single entrance, we have to make a new # chunk when we get to a place some bytecode jumps to. start_new_chunk = True elif bc.op in OPS_CHUNK_BEGIN: # Jumps deserve their own unnumbered chunk. This fixes # problems with jumps to jumps getting confused. start_new_chunk = True if not chunk or start_new_chunk: if chunk: chunk.exits.add(bc.offset) chunk = Chunk(bc.offset, chunk_lineno, first_chunk) chunks.append(chunk) # Look at the opcode if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: if ignore_branch: # Someone earlier wanted us to ignore this branch. ignore_branch -= 1 else: # The opcode has a jump, it's an exit for this chunk. chunk.exits.add(bc.jump_to) if bc.op in OPS_CODE_END: # The opcode can exit the code object. chunk.exits.add(-self.code.co_firstlineno) if bc.op in OPS_PUSH_BLOCK: # The opcode adds a block to the block_stack. block_stack.append((bc.op, bc.jump_to)) if bc.op in OPS_POP_BLOCK: # The opcode pops a block from the block stack. block_stack.pop() if bc.op in OPS_CHUNK_END: # This opcode forces the end of the chunk. if bc.op == OP_BREAK_LOOP: # A break is implicit: jump where the top of the # block_stack points. chunk.exits.add(block_stack[-1][1]) chunk = None if bc.op == OP_END_FINALLY: # For the finally clause we need to find the closest exception # block, and use its jump target as an exit. for block in reversed(block_stack): if block[0] in OPS_EXCEPT_BLOCKS: chunk.exits.add(block[1]) break if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: # This is an except clause. We want to overlook the next # branch, so that except's don't count as branches. ignore_branch += 1 penult = ult ult = bc if chunks: # The last two bytecodes could be a dummy "return None" that # shouldn't be counted as real code. Every Python code object seems # to end with a return, and a "return None" is inserted if there # isn't an explicit return in the source. if ult and penult: if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: if self.code.co_consts[penult.arg] is None: # This is "return None", but is it dummy? A real line # would be a last chunk all by itself. if chunks[-1].byte != penult.offset: ex = -self.code.co_firstlineno # Split the last chunk last_chunk = chunks[-1] last_chunk.exits.remove(ex) last_chunk.exits.add(penult.offset) chunk = Chunk( penult.offset, last_chunk.line, False ) chunk.exits.add(ex) chunks.append(chunk) # Give all the chunks a length. chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301 for i in range(len(chunks)-1): chunks[i].length = chunks[i+1].byte - chunks[i].byte #self.validate_chunks(chunks) return chunks
def __init__(self, text, *contexts): """Construct a Templite with the given `text`. `contexts` are dictionaries of values to use for future renderings. These are good for filters and global values. """ self.text = text self.context = {} for context in contexts: self.context.update(context) # We construct a function in source form, then compile it and hold onto # it, and execute it to render the template. code = CodeBuilder() code.add_line("def render(ctx, dot):") code.indent() vars_code = code.add_section() self.all_vars = set() self.loop_vars = set() code.add_line("result = []") code.add_line("a = result.append") code.add_line("e = result.extend") code.add_line("s = str") buffered = [] def flush_output(): """Force `buffered` to the code builder.""" if len(buffered) == 1: code.add_line("a(%s)" % buffered[0]) elif len(buffered) > 1: code.add_line("e([%s])" % ",".join(buffered)) del buffered[:] # Split the text to form a list of tokens. toks = re.split(r"(?s)({{.*?}}|{%.*?%}|{#.*?#})", text) ops_stack = [] for tok in toks: if tok.startswith('{{'): # An expression to evaluate. buffered.append("s(%s)" % self.expr_code(tok[2:-2].strip())) elif tok.startswith('{#'): # Comment: ignore it and move on. continue elif tok.startswith('{%'): # Action tag: split into words and parse further. flush_output() words = tok[2:-2].strip().split() if words[0] == 'if': # An if statement: evaluate the expression to determine if. assert len(words) == 2 ops_stack.append('if') code.add_line("if %s:" % self.expr_code(words[1])) code.indent() elif words[0] == 'for': # A loop: iterate over expression result. assert len(words) == 4 and words[2] == 'in' ops_stack.append('for') self.loop_vars.add(words[1]) code.add_line( "for c_%s in %s:" % ( words[1], self.expr_code(words[3]) ) ) code.indent() elif words[0].startswith('end'): # Endsomething. Pop the ops stack end_what = words[0][3:] if ops_stack[-1] != end_what: raise SyntaxError("Mismatched end tag: %r" % end_what) ops_stack.pop() code.dedent() else: raise SyntaxError("Don't understand tag: %r" % words[0]) else: # Literal content. If it isn't empty, output it. if tok: buffered.append("%r" % tok) flush_output() for var_name in self.all_vars - self.loop_vars: vars_code.add_line("c_%s = ctx[%r]" % (var_name, var_name)) if ops_stack: raise SyntaxError("Unmatched action tag: %r" % ops_stack[-1]) code.add_line("return ''.join(result)") code.dedent() self.render_function = code.get_function('render')