def balanced_intervals(lines): """ Finds intervals of balanced nesting syntax Args: lines (List[str]): lines of source code """ intervals = [] a = len(lines) - 1 b = len(lines) while b > 0: # move the head pointer up until we become balanced while not static.is_balanced_statement( lines[a:b], only_tokens=True) and a >= 0: a -= 1 if a < 0: raise exceptions.IncompleteParseError( 'ill-formed doctest: cannot find balanced ps1 lines.' ) # we found a balanced interval intervals.append((a, b)) b = a a = a - 1 intervals = intervals[::-1] return intervals
def _complete_source(line, state_indent, line_iter): """ helper remove lines from the iterator if they are needed to complete source """ norm_line = line[state_indent:] # Normalize line indentation prefix = norm_line[:4] suffix = norm_line[4:] assert prefix.strip() in {'>>>', '...'}, '{}'.format(prefix) yield line source_parts = [suffix] while not static.is_balanced_statement(source_parts): try: line_idx, next_line = next(line_iter) except StopIteration: raise SyntaxError('ill-formed doctest') norm_line = next_line[state_indent:] prefix = norm_line[:4] suffix = norm_line[4:] if prefix.strip() not in {'>>>', '...', ''}: # nocover raise SyntaxError( 'Bad indentation in doctest on line {}: {!r}'.format( line_idx, next_line)) source_parts.append(suffix) yield next_line
def _workaround_16806(self, ps1_linenos, exec_source_lines): """ workaround for python issue 16806 (https://bugs.python.org/issue16806) Issue causes lineno for multiline strings to give the line they end on, not the line they start on. A patch for this issue exists `https://github.com/python/cpython/pull/1800` Notes: Starting from the end look at consecutive pairs of indices to inspect the statment it corresponds to. (the first statment goes from ps1_linenos[-1] to the end of the line list. """ new_ps1_lines = [] b = len(exec_source_lines) for a in ps1_linenos[::-1]: # the position of `b` is correct, but `a` may be wrong # is_balanced_statement will be False iff `a` is wrong. while not static.is_balanced_statement(exec_source_lines[a:b]): # shift `a` down until it becomes correct a -= 1 # push the new correct value back into the list new_ps1_lines.append(a) # set the end position of the next string to be `a` , # note, because this `a` is correct, the next `b` is # must also be correct. b = a ps1_linenos = set(new_ps1_lines) return ps1_linenos
def _workaround_16806(ps1_linenos, exec_source_lines): """ workaround for python issue 16806 (https://bugs.python.org/issue16806) This issue causes the AST to report line numbers for multiline strings as the line they end on. The correct behavior is to report the line they start on. Given a list of line numbers and the original source code, this workaround fixes any line number that points from the end of a multiline string to point to the start of it instead. Args: ps1_linenos (List[int]): AST provided line numbers that begin statements and may be Python Issue #16806. exec_source_lines (List[str]): code referenced by ps1_linenos Returns: List[int]: new_ps1_lines Fixed `ps1_linenos` where multiline strings now point to the line where they begin. Note: A patch for this issue exists `<https://github.com/python/cpython/pull/1800>`_. This workaround is a idempotent (i.e. a no-op) when line numbers are correct, so nothing should break when this bug is fixed. Starting from the end look at consecutive pairs of indices to inspect the statement it corresponds to. (the first statement goes from ps1_linenos[-1] to the end of the line list. Example: >>> ps1_linenos = [0, 2, 3] >>> exec_source_lines = ["x = 1", "y = '''foo", " bar'''", "pass"] >>> DoctestParser._workaround_16806(ps1_linenos, exec_source_lines) [0, 1, 3] """ new_ps1_lines = [] b = len(exec_source_lines) for a in ps1_linenos[::-1]: # the position of `b` is correct, but `a` may be wrong # is_balanced_statement will be False iff `a` is wrong. while not static.is_balanced_statement(exec_source_lines[a:b], only_tokens=True): # shift `a` down until it becomes correct a -= 1 # push the new correct value back into the list new_ps1_lines.append(a) # set the end position of the next string to be `a` , note, because # this `a` is correct, the next `b` is must also be correct. b = a return new_ps1_lines[::-1]
def _complete_source(line, state_indent, line_iter): """ helper remove lines from the iterator if they are needed to complete source """ norm_line = line[state_indent:] # Normalize line indentation prefix = norm_line[:4] suffix = norm_line[4:] assert prefix.strip() in {'>>>', '...'}, '{}'.format(prefix) yield line, norm_line source_parts = [suffix] # These hacks actually modify the input doctest slighly HACK_TRIPLE_QUOTE_FIX = True try: while not static.is_balanced_statement(source_parts, only_tokens=True): line_idx, next_line = next(line_iter) norm_line = next_line[state_indent:] prefix = norm_line[:4] suffix = norm_line[4:] if prefix.strip() not in {'>>>', '...', ''}: # nocover error = True if HACK_TRIPLE_QUOTE_FIX: # TODO: make a more robust patch if any("'''" in s or '"""' in s for s in source_parts): # print('HACK FIXING TRIPLE QUOTE') next_line = next_line[:state_indent] + '... ' + norm_line norm_line = '... ' + norm_line prefix = '' suffix = norm_line error = False if error: if DEBUG: print(' * !!!ERROR!!!') print(' * source_parts = {!r}'.format(source_parts)) print(' * prefix = {!r}'.format(prefix)) print(' * norm_line = {!r}'.format(norm_line)) print(' * !!!!!!!!!!!!!') raise SyntaxError( 'Bad indentation in doctest on line {}: {!r}'.format( line_idx, next_line)) source_parts.append(suffix) yield next_line, norm_line except StopIteration: if DEBUG: import ubelt as ub print('<FAIL DID NOT COMPLETE SOURCE>') import traceback tb_text = traceback.format_exc() tb_text = ub.highlight_code(tb_text) tb_text = ub.indent(tb_text) print(tb_text) # print(' * line_iter = {!r}'.format(line_iter)) print(' * state_indent = {!r}'.format(state_indent)) print(' * line = {!r}'.format(line)) # print('source =\n{}'.format('\n'.join(source_parts))) print('# Ensure that the following line should actually fail') print('source_parts = {}'.format(ub.repr2(source_parts, nl=2))) print( ub.codeblock(r''' from xdoctest import static_analysis as static static.is_balanced_statement(source_parts, only_tokens=False) static.is_balanced_statement(source_parts, only_tokens=True) text = '\n'.join(source_parts) print(text) static.six_axt_parse(text) ''')) print('</FAIL DID NOT COMPLETE SOURCE>') # sys.exit(1) # TODO: use AST to reparse all doctest parts to discover where the # syntax error in the doctest is and then raise it. raise exceptions.IncompleteParseError( 'ill-formed doctest: all parts have been processed ' 'but the doctest source is not balanced') else: if DEBUG > 1: import ubelt as ub print('<SUCCESS COMPLETED SOURCE>') # print(' * line_iter = {!r}'.format(line_iter)) print('source_parts = {}'.format(ub.repr2(source_parts, nl=2))) print('</SUCCESS COMPLETED SOURCE>')