def test_bom(self): # A BOM means utf-8. source = "\xEF\xBB\xBFtext = 'hello'\n" self.assertEqual(source_encoding(source), 'utf-8-sig') # But it has to be the only authority. source = "\xEF\xBB\xBF# coding: cp850\n" with self.assertRaises(SyntaxError): source_encoding(source)
def test_bom(self): # A BOM means utf-8. source = b"\xEF\xBB\xBFtext = 'hello'\n" self.assertEqual(source_encoding(source), 'utf-8-sig') # But it has to be the only authority. source = b"\xEF\xBB\xBF# coding: cp850\n" with self.assertRaises(SyntaxError): source_encoding(source)
def test_detect_source_encoding(self): # Various forms from http://www.python.org/dev/peps/pep-0263/ source = "# coding=cp850\n\n" self.assertEqual(source_encoding(source), 'cp850') source = "#!/usr/bin/python\n# -*- coding: utf-8 -*-\n" self.assertEqual(source_encoding(source), 'utf-8') source = "#!/usr/bin/python\n# vim: set fileencoding=utf8 :\n" self.assertEqual(source_encoding(source), 'utf8') source = "# This Python file uses this encoding: utf-8\n" self.assertEqual(source_encoding(source), 'utf-8')
def test_detect_source_encoding(self): for _, source in ENCODING_DECLARATION_SOURCES: self.assertEqual( source_encoding(source), 'cp850', "Wrong encoding in %r" % source )
def get_python_source(filename): """Return the source code, as unicode.""" base, ext = os.path.splitext(filename) if ext == ".py" and env.WINDOWS: exts = [".py", ".pyw"] else: exts = [ext] for ext in exts: try_filename = base + ext if os.path.exists(try_filename): # A regular text file: open it. source = read_python_source(try_filename) break # Maybe it's in a zip file? source = get_zip_bytes(try_filename) if source is not None: break else: # Couldn't find source. exc_msg = "No source for code: '%s'.\n" % (filename,) exc_msg += "Aborting report output, consider using -i." raise NoSource(exc_msg) # Replace \f because of http://bugs.python.org/issue19035 source = source.replace(b'\f', b' ') source = source.decode(source_encoding(source), "replace") # Python code should always end with a line with a newline. if source and source[-1] != '\n': source += '\n' return source
def test_detect_source_encoding(self): for _, source, expected in ENCODING_DECLARATION_SOURCES: self.assertEqual( source_encoding(source), expected, "Wrong encoding in %r" % source )
def get_python_source(filename): """Return the source code, as unicode.""" base, ext = os.path.splitext(filename) if ext == ".py" and env.WINDOWS: exts = [".py", ".pyw"] else: exts = [ext] for ext in exts: try_filename = base + ext if os.path.exists(try_filename): # A regular text file: open it. source = read_python_source(try_filename) break # Maybe it's in a zip file? source = get_zip_bytes(try_filename) if source is not None: break else: # Couldn't find source. exc_msg = f"No source for code: '{filename}'.\n" exc_msg += "Aborting report output, consider using -i." raise NoSource(exc_msg) # Replace \f because of http://bugs.python.org/issue19035 source = source.replace(b'\f', b' ') source = source.decode(source_encoding(source), "replace") # Python code should always end with a line with a newline. if source and source[-1] != '\n': source += '\n' return source
def parse_file(self, cu, analysis): """ Generate data for single file """ if hasattr(analysis, 'parser'): filename = cu.file_locator.relative_filename(cu.filename) source_lines = analysis.parser.lines with cu.source_file() as source_file: source = source_file.read() try: if sys.version_info < (3, 0): encoding = source_encoding(source) if encoding != 'utf-8': source = source.decode(encoding).encode('utf-8') except UnicodeDecodeError: log.warn('Source file %s can not be properly decoded, skipping. ' 'Please check if encoding declaration is ok', basename(cu.filename)) return else: if hasattr(cu, 'relative_filename'): filename = cu.relative_filename() else: filename = analysis.coverage.file_locator.relative_filename(cu.filename) source_lines = list(enumerate(analysis.file_reporter.source_token_lines())) source = analysis.file_reporter.source() coverage_lines = [self.get_hits(i, analysis) for i in range(1, len(source_lines) + 1)] results = { 'name': filename, 'source': source, 'coverage': coverage_lines, } branches = self.get_arcs(analysis) if branches: results['branches'] = branches self.source_files.append(results)
def get_python_source(filename): """Return the source code, as unicode.""" base, ext = os.path.splitext(filename) if ext == ".py" and env.WINDOWS: exts = [".py", ".pyw"] else: exts = [ext] for ext in exts: try_filename = base + ext if os.path.exists(try_filename): # A regular text file: open it. source = read_python_source(try_filename) break # Maybe it's in a zip file? source = get_zip_bytes(try_filename) if source is not None: break else: # Couldn't find source. raise NoSource("No source for code: '%s'." % filename) source = source.decode(source_encoding(source), "replace") # Python code should always end with a line with a newline. if source and source[-1] != '\n': source += '\n' return source
def test_detect_source_encoding_not_in_comment(self): if env.PYPY and env.PY3: # pramga: no metacov # PyPy3 gets this case wrong. Not sure what I can do about it, # so skip the test. self.skipTest("PyPy3 is wrong about non-comment encoding. Skip it.") # Should not detect anything here source = b'def parse(src, encoding=None):\n pass' self.assertEqual(source_encoding(source), DEF_ENCODING)
def test_detect_source_encoding_not_in_comment(self): if env.PYPY and env.PY3: # PyPy3 gets this case wrong. Not sure what I can do about it, # so skip the test. raise SkipTest # Should not detect anything here source = b'def parse(src, encoding=None):\n pass' self.assertEqual(source_encoding(source), DEF_ENCODING)
def test_detect_source_encoding_not_in_comment(self): if env.PYPY and env.PY3: # PyPy3 gets this case wrong. Not sure what I can do about it, # so skip the test. self.skipTest("PyPy3 is wrong about non-comment encoding. Skip it.") # Should not detect anything here source = b'def parse(src, encoding=None):\n pass' self.assertEqual(source_encoding(source), DEF_ENCODING)
def source(self): if self._source is None: self._source = get_python_source(self.filename) if env.PY2: encoding = source_encoding(self._source) self._source = self._source.decode(encoding, "replace") assert isinstance(self._source, unicode_class) return self._source
def test_neuter_encoding_declaration(self): for source in ENCODING_DECLARATION_SOURCES: neutered = neuter_encoding_declaration(source.decode("ascii")) neutered = neutered.encode("ascii") self.assertEqual( source_encoding(neutered), DEF_ENCODING, "Wrong encoding in %r" % neutered )
def parse_file(self, cu, analysis): """ Generate data for single file """ filename = cu.file_locator.relative_filename(cu.filename) coverage_lines = [self.get_hits(i, analysis) for i in range(1, len(analysis.parser.lines) + 1)] source_file = cu.source_file() try: source = source_file.read() if sys.version_info < (3, 0): encoding = source_encoding(source) if encoding != 'utf-8': source = source.decode(encoding).encode('utf-8') finally: source_file.close() self.source_files.append({ 'name': filename, 'source': source, 'coverage': coverage_lines })
def test_neuter_encoding_declaration(self): for lines_diff_expected, source, _ in ENCODING_DECLARATION_SOURCES: neutered = neuter_encoding_declaration(source.decode("ascii")) neutered = neutered.encode("ascii") # The neutered source should have the same number of lines. source_lines = source.splitlines() neutered_lines = neutered.splitlines() assert len(source_lines) == len(neutered_lines) # Only one of the lines should be different. lines_different = sum( int(nline != sline) for nline, sline in zip(neutered_lines, source_lines) ) assert lines_diff_expected == lines_different # The neutered source will be detected as having no encoding # declaration. assert source_encoding(neutered) == DEF_ENCODING, "Wrong encoding in %r" % neutered
def parse_file(self, cu, analysis): """ Generate data for single file """ filename = cu.file_locator.relative_filename(cu.filename) coverage_lines = [ self.get_hits(i, analysis) for i in range(1, len(analysis.parser.lines) + 1) ] source_file = cu.source_file() try: source = source_file.read() if sys.version_info < (3, 0): encoding = source_encoding(source) if encoding != 'utf-8': source = source.decode(encoding).encode('utf-8') finally: source_file.close() self.source_files.append({ 'name': filename, 'source': source, 'coverage': coverage_lines })
def test_neuter_encoding_declaration(self): for lines_diff_expected, source in ENCODING_DECLARATION_SOURCES: neutered = neuter_encoding_declaration(source.decode("ascii")) neutered = neutered.encode("ascii") # The neutered source should have the same number of lines. source_lines = source.splitlines() neutered_lines = neutered.splitlines() self.assertEqual(len(source_lines), len(neutered_lines)) # Only one of the lines should be different. lines_different = sum( int(nline != sline) for nline, sline in zip(neutered_lines, source_lines) ) self.assertEqual(lines_diff_expected, lines_different) # The neutered source will be detected as having no encoding # declaration. self.assertEqual( source_encoding(neutered), DEF_ENCODING, "Wrong encoding in %r" % neutered )
def test_detect_source_encoding_not_in_comment(self): # Should not detect anything here source = 'def parse(src, encoding=None):\n pass' self.assertEqual(source_encoding(source), 'ascii')
def test_unknown_encoding(self): source = b"# coding: klingon\n" with pytest.raises(SyntaxError, match="unknown encoding: klingon"): source_encoding(source)
def source_encoding(self): return source_encoding(self.source())
def test_dont_detect_source_encoding_on_third_line(self): # A coding declaration doesn't count on the third line. source = b"\n\n# coding=cp850\n\n" self.assertEqual(source_encoding(source), DEF_ENCODING)
def test_bom_is_wrong(self): # A BOM with an explicit non-utf8 encoding is an error. source = b"\xEF\xBB\xBF# coding: cp850\n" with self.assertRaisesRegex(SyntaxError, "encoding problem: utf-8"): source_encoding(source)
def test_detect_source_encoding_of_empty_file(self): # An important edge case. self.assertEqual(source_encoding(""), 'ascii')
def html_file(self, cu, analysis): source_file = cu.source_file() try: source = source_file.read() finally: source_file.close() flat_rootname = cu.flat_rootname() this_hash = self.file_hash(source, cu) that_hash = self.status.file_hash(flat_rootname) if this_hash == that_hash: self.files.append(self.status.index_info(flat_rootname)) return self.status.set_file_hash(flat_rootname, this_hash) if sys.version_info < (3, 0): encoding = source_encoding(source) if encoding.startswith('utf-8') and source[:3] == '\xef\xbb\xbf': source = source[3:] encoding = 'utf-8' nums = analysis.numbers missing_branch_arcs = analysis.missing_branch_arcs() c_run = 'run hide_run' c_exc = 'exc' c_mis = 'mis' c_par = 'par ' + c_run lines = [] for lineno, line in enumerate(source_token_lines(source)): lineno += 1 line_class = [] annotate_html = '' annotate_title = '' if lineno in analysis.statements: line_class.append('stm') if lineno in analysis.excluded: line_class.append(c_exc) elif lineno in analysis.missing: line_class.append(c_mis) elif self.arcs and lineno in missing_branch_arcs: line_class.append(c_par) annlines = [] for b in missing_branch_arcs[lineno]: if b < 0: annlines.append('exit') else: annlines.append(str(b)) annotate_html = ' '.join(annlines) if len(annlines) > 1: annotate_title = 'no jumps to these line numbers' elif len(annlines) == 1: annotate_title = 'no jump to this line number' elif lineno in analysis.statements: line_class.append(c_run) html = [] for tok_type, tok_text in line: if tok_type == 'ws': html.append(escape(tok_text)) else: tok_html = escape(tok_text) or ' ' html.append("<span class='%s'>%s</span>" % (tok_type, tok_html)) lines.append({'html': ''.join(html), 'number': lineno, 'class': ' '.join(line_class) or 'pln', 'annotate': annotate_html, 'annotate_title': annotate_title}) html = spaceless(self.source_tmpl.render({'c_exc': c_exc, 'c_mis': c_mis, 'c_par': c_par, 'c_run': c_run, 'arcs': self.arcs, 'extra_css': self.extra_css, 'cu': cu, 'nums': nums, 'lines': lines})) if sys.version_info < (3, 0): html = html.decode(encoding) html_filename = flat_rootname + '.html' html_path = os.path.join(self.directory, html_filename) self.write_html(html_path, html) index_info = {'nums': nums, 'html_filename': html_filename, 'name': cu.name} self.files.append(index_info) self.status.set_index_info(flat_rootname, index_info)
def test_dont_detect_source_encoding_on_third_line(self): # A coding declaration doesn't count on the third line. source = b"\n\n# coding=cp850\n\n" assert source_encoding(source) == DEF_ENCODING
def test_detect_source_encoding_of_empty_file(self): # An important edge case. self.assertEqual(source_encoding(b""), DEF_ENCODING)
def test_detect_source_encoding_of_empty_file(self): # An important edge case. assert source_encoding(b"") == DEF_ENCODING
def test_bom_with_encoding(self): source = b"\xEF\xBB\xBF# coding: utf-8\ntext = 'hello'\n" self.assertEqual(source_encoding(source), 'utf-8-sig')
def test_bom(self): # A BOM means utf-8. source = b"\xEF\xBB\xBFtext = 'hello'\n" assert source_encoding(source) == 'utf-8-sig'
def test_bom_with_encoding(self): source = b"\xEF\xBB\xBF# coding: utf-8\ntext = 'hello'\n" assert source_encoding(source) == 'utf-8-sig'
def test_bom_is_wrong(self): # A BOM with an explicit non-utf8 encoding is an error. source = b"\xEF\xBB\xBF# coding: cp850\n" with pytest.raises(SyntaxError, match="encoding problem: utf-8"): source_encoding(source)
def test_detect_source_encoding_not_in_comment(self): # Should not detect anything here source = b'def parse(src, encoding=None):\n pass' assert source_encoding(source) == DEF_ENCODING
def html_file(self, cu, analysis): """Generate an HTML file for one source file.""" source_file = cu.source_file() try: source = source_file.read() finally: source_file.close() # Find out if the file on disk is already correct. flat_rootname = cu.flat_rootname() this_hash = self.file_hash(source, cu) that_hash = self.status.file_hash(flat_rootname) if this_hash == that_hash: # Nothing has changed to require the file to be reported again. self.files.append(self.status.index_info(flat_rootname)) return self.status.set_file_hash(flat_rootname, this_hash) # If need be, determine the encoding of the source file. We use it # later to properly write the HTML. if sys.version_info < (3, 0): encoding = source_encoding(source) # Some UTF8 files have the dreaded UTF8 BOM. If so, junk it. if encoding.startswith("utf-8") and source[:3] == "\xef\xbb\xbf": source = source[3:] encoding = "utf-8" # Get the numbers for this file. nums = analysis.numbers if self.arcs: missing_branch_arcs = analysis.missing_branch_arcs() # These classes determine which lines are highlighted by default. c_run = "run hide_run" c_exc = "exc" c_mis = "mis" c_par = "par " + c_run lines = [] for lineno, line in enumerate(source_token_lines(source)): lineno += 1 # 1-based line numbers. # Figure out how to mark this line. line_class = [] annotate_html = "" annotate_title = "" if lineno in analysis.statements: line_class.append("stm") if lineno in analysis.excluded: line_class.append(c_exc) elif lineno in analysis.missing: line_class.append(c_mis) elif self.arcs and lineno in missing_branch_arcs: line_class.append(c_par) annlines = [] for b in missing_branch_arcs[lineno]: if b < 0: annlines.append("exit") else: annlines.append(str(b)) annotate_html = " ".join(annlines) if len(annlines) > 1: annotate_title = "no jumps to these line numbers" elif len(annlines) == 1: annotate_title = "no jump to this line number" elif lineno in analysis.statements: line_class.append(c_run) # Build the HTML for the line html = [] for tok_type, tok_text in line: if tok_type == "ws": html.append(escape(tok_text)) else: tok_html = escape(tok_text) or ' ' html.append("<span class='%s'>%s</span>" % (tok_type, tok_html)) lines.append({ 'html': ''.join(html), 'number': lineno, 'class': ' '.join(line_class) or "pln", 'annotate': annotate_html, 'annotate_title': annotate_title, }) # Write the HTML page for this file. html = spaceless( self.source_tmpl.render({ 'c_exc': c_exc, 'c_mis': c_mis, 'c_par': c_par, 'c_run': c_run, 'arcs': self.arcs, 'extra_css': self.extra_css, 'cu': cu, 'nums': nums, 'lines': lines, })) if sys.version_info < (3, 0): html = html.decode(encoding) html_filename = flat_rootname + ".html" html_path = os.path.join(self.directory, html_filename) self.write_html(html_path, html) # Save this file's information for the index file. index_info = { 'nums': nums, 'html_filename': html_filename, 'name': cu.name, } self.files.append(index_info) self.status.set_index_info(flat_rootname, index_info)
def test_detect_source_encoding_on_second_line(self): # A coding declaration should be found despite a first blank line. source = "\n# coding=cp850\n\n" self.assertEqual(source_encoding(source), 'cp850')
def html_file(self, cu, analysis): """Generate an HTML file for one source file.""" source_file = cu.source_file() try: source = source_file.read() finally: source_file.close() # Find out if the file on disk is already correct. flat_rootname = cu.flat_rootname() this_hash = self.file_hash(source, cu) that_hash = self.status.file_hash(flat_rootname) if this_hash == that_hash: # Nothing has changed to require the file to be reported again. self.files.append(self.status.index_info(flat_rootname)) return self.status.set_file_hash(flat_rootname, this_hash) # If need be, determine the encoding of the source file. We use it # later to properly write the HTML. if sys.version_info < (3, 0): encoding = source_encoding(source) # Some UTF8 files have the dreaded UTF8 BOM. If so, junk it. if encoding.startswith("utf-8") and source[:3] == "\xef\xbb\xbf": source = source[3:] encoding = "utf-8" # Get the numbers for this file. nums = analysis.numbers if self.arcs: missing_branch_arcs = analysis.missing_branch_arcs() # These classes determine which lines are highlighted by default. c_run = "run hide_run" c_exc = "exc" c_mis = "mis" c_par = "par " + c_run lines = [] for lineno, line in enumerate(source_token_lines(source)): lineno += 1 # 1-based line numbers. # Figure out how to mark this line. line_class = [] annotate_html = "" annotate_title = "" if lineno in analysis.statements: line_class.append("stm") if lineno in analysis.excluded: line_class.append(c_exc) elif lineno in analysis.missing: line_class.append(c_mis) elif self.arcs and lineno in missing_branch_arcs: line_class.append(c_par) annlines = [] for b in missing_branch_arcs[lineno]: if b < 0: annlines.append("exit") else: annlines.append(str(b)) annotate_html = " ".join(annlines) if len(annlines) > 1: annotate_title = "no jumps to these line numbers" elif len(annlines) == 1: annotate_title = "no jump to this line number" elif lineno in analysis.statements: line_class.append(c_run) # Build the HTML for the line html = [] for tok_type, tok_text in line: if tok_type == "ws": html.append(escape(tok_text)) else: tok_html = escape(tok_text) or ' ' html.append( "<span class='%s'>%s</span>" % (tok_type, tok_html) ) lines.append({ 'html': ''.join(html), 'number': lineno, 'class': ' '.join(line_class) or "pln", 'annotate': annotate_html, 'annotate_title': annotate_title, }) # Write the HTML page for this file. html = spaceless(self.source_tmpl.render({ 'c_exc': c_exc, 'c_mis': c_mis, 'c_par': c_par, 'c_run': c_run, 'arcs': self.arcs, 'extra_css': self.extra_css, 'cu': cu, 'nums': nums, 'lines': lines, })) if sys.version_info < (3, 0): html = html.decode(encoding) html_filename = flat_rootname + ".html" html_path = os.path.join(self.directory, html_filename) self.write_html(html_path, html) # Save this file's information for the index file. index_info = { 'nums': nums, 'html_filename': html_filename, 'name': cu.name, } self.files.append(index_info) self.status.set_index_info(flat_rootname, index_info)
def test_dont_detect_source_encoding_on_third_line(self): # A coding declaration doesn't count on the third line. source = "\n\n# coding=cp850\n\n" self.assertEqual(source_encoding(source), 'ascii')
def test_unknown_encoding(self): source = b"# coding: klingon\n" with self.assertRaisesRegex(SyntaxError, "unknown encoding: klingon"): source_encoding(source)
def test_detect_source_encoding(self): for _, source, expected in ENCODING_DECLARATION_SOURCES: assert source_encoding( source) == expected, f"Wrong encoding in {source!r}"
def test_bom(self): # A BOM means utf-8. source = b"\xEF\xBB\xBFtext = 'hello'\n" self.assertEqual(source_encoding(source), 'utf-8-sig')
def test_detect_source_encoding_on_second_line(self): # A coding declaration should be found despite a first blank line. source = b"\n# coding=cp850\n\n" self.assertEqual(source_encoding(source), 'cp850')
def source_encoding(self, source): return source_encoding(source)