def get_next_unescaped_appearance(s, d1, search_from, next_char_not_word=False): while True: if not d1 in s[search_from:]: # print('nope, no %r in s[%s:] = %r' % (d1,search_from, s[search_from:])) # print('cannot find %r in s o f len = %s starting from %s' % (d1, len(s), search_from)) raise NotFound() maybe = s.index(d1, search_from) if s[maybe - 1] == '\\': if 'space' in d1: w = Where(s, maybe, maybe + len(d1)) msg = 'Skipping escaped sequence:\n\n' + w.__str__() logger.debug(msg) # print('found escaped match of %r (prev chars = %r)' % (d1, s[:maybe])) search_from = maybe + 1 else: assert s[maybe:].startswith(d1) nextchar_i = maybe + len(d1) nextchar = s[nextchar_i] if nextchar_i < len(s) else 'o' if next_char_not_word and can_be_used_in_command(nextchar): #print('skipping because nextchar = %r' % nextchar) search_from = maybe + 1 continue # print('found %r at %r ' % (d1, s[maybe:])) return maybe
def check_good_use_of_special_paragraphs(md, filename): lines = md.split('\n') for i in range(1, len(lines)): line = lines[i] prev = lines[i - 1] prefix = has_special_line_prefix(line) if prefix: if prev.strip(): msg = ('Wrong use of special paragraph indicator. You have ' 'to leave an empty line before the special paragraph.') c = location(i, 1, md) c_end = c + len(prefix) where = Where(md, c, c_end).with_filename(filename) raise DPSyntaxError(msg, where=where) if False: def looks_like_list_item(s): if s.startswith('--'): return False if s.startswith('**'): return False return s.startswith('-') or s.startswith('*') if looks_like_list_item(line): if prev.strip() and not looks_like_list_item(prev): msg = ('Wrong use of list indicator. You have ' 'to leave an empty line before the list.') c = location(i, 1, md) c_end = c + 1 where = Where(md, c, c_end).with_filename(filename) raise DPSyntaxError(msg, where=where)
def bb(tokens, loc, s): where = Where(s, loc) try: try: res = b(tokens) except TypeError as e: ttokens = list(tokens) s = "\n".join("- %s " % str(x) for x in ttokens) msg = 'Cannot invoke %r\nwith %d tokens:\n%s.' % ( b, len(ttokens), s) raise_wrapped(TypeError, e, msg) except DPSyntaxError as e: if e.where is None: e.where = where raise DPSyntaxError(str(e), where=where) else: raise except DPSemanticError as e: if e.where is None: raise DPSemanticError(str(e), where=where) else: raise except BaseException as e: raise_wrapped(DPInternalError, e, "Error while parsing.", where=where.__str__(), tokens=tokens) if isnamedtupleinstance(res) and res.where is None: res = get_copy_with_where(res, where=where) return res
def find_corrections(x, parents): # expect an iterator s = x.where.string[x.where.character:x.where.character_end] for suggestion in correct(x, parents): a, b = suggestion if isinstance(a, str): if not a in s: msg = 'Invalid suggestion %s. Could not find piece %r in %r.' % ( suggestion, a, s) raise DPInternalError(msg) a_index = s.index(a) a_len = len(a) # in bytes a_char = x.where.character + a_index a_char_end = a_char + a_len a_where = Where(x.where.string, a_char, a_char_end) else: check_isinstance(a, Where) a_where = a check_isinstance(b, str) sub = (a_where, b) subs.append(sub) return x
def transform(x, parents): # @UnusedVariable w0 = x.where s0 = w0.string def translate(line, col): # add initial white space on first line if line == 0: col += len(extra_before) # add the initial empty lines line += num_empty_lines_start return line, col # these are now in the original string transform_original_s line, col = translate(*line_and_col(w0.character, s0)) character = location(line, col, transform_original_s) if w0.character_end is None: character_end = None else: line, col = translate(*line_and_col(w0.character_end, s0)) character_end = location(line, col, transform_original_s) where = Where(string=transform_original_s, character=character, character_end=character_end) return get_copy_with_where(x, where)
def translate_where(where0, string): """ Take the first where; compute line, col according to where0.string, and find out the corresponding chars in the second string. This assumes that string and where0.string have the same number of lines. """ nlines = len(string.split('\n')) nlines0 = len(where0.string.split('\n')) if nlines != nlines0: msg = 'I expected they have the same lines.' msg += '\n string (%d lines): %r' % (nlines, string) msg += '\n where0.string (%d lines): %r' % (nlines0, where0.string) raise_desc(DPInternalError, msg) string0 = where0.string line, col = line_and_col(where0.character, string0) character2 = location(line, col, string) if where0.character_end is None: character_end2 = None else: line, col = line_and_col(where0.character_end, string0) character_end2 = location(line, col, string) where = Where(string=string, character=character2, character_end=character_end2) return where
def p(tokens, loc, s): #print('spa(): parsing %s %r %r %r ' % (x, tokens, loc, s)) res = bb(tokens, loc, s) # if we are here, then it means the parse was successful # we try again to get loc_end character_end = x.tryParse(s, loc) if isnamedtupleinstance(res): if res.where is not None: check_isinstance(res.where, Where) if isnamedtupleinstance(res) and \ (res.where is None or res.where.character_end is None): w2 = Where(s, character=loc, character_end=character_end) res = get_copy_with_where(res, where=w2) if do_extra_checks(): if not isinstance(res, (float, int, str)): if res.where is None: msg = 'Found element with no where' raise_desc(ValueError, msg, res=res) if hasattr(res, 'where'): assert res.where.character_end is not None, \ (res, isnamedtupleinstance(res)) return res
def transform(x, parents): # @UnusedVariable if x is root: return x w = x.where ws = w.string[w.character:w.character_end] if ws.strip() != ws: is_whitespace = lambda x: x in [' ', '\n'] i = 0 while is_whitespace(ws[i]) and i < len(ws) - 1: i += 1 ninitial = i j = len(ws) - 1 while is_whitespace(ws[j]) and j > 0: j -= 1 ntrailing = (len(ws) - 1) - j character = w.character + ninitial character_end = w.character_end - ntrailing where2 = Where(w.string, character, character_end) x2 = get_copy_with_where(x, where2) return x2 else: return x
def make_list(x, where=None): if x is None: raise ValueError() # if where is None: # raise ValueError() try: if not len(x): return list_types[0](dummy='dummy', where=where) ltype = list_types[len(x)] w1 = x[0].where w2 = x[-1].where if w1 is None or w2 is None: raise_desc(ValueError, 'Cannot create list', x=x) assert w2.character_end is not None w3 = Where(string=w1.string, character=w1.character, character_end=w2.character_end) res = ltype(*tuple(x), where=w3) return res except BaseException as e: msg = 'Cannot create list' raise_wrapped(DPInternalError, e, msg, x=x, where=where, x_last=x[-1])
def assert_not_contains(s, what): if not what in s: return i = s.index(what) if i is not None: msg = 'Found forbidden sequence "%s".' % what where = Where(s, i, i + len(what)) raise DPSyntaxError(msg, where=where)
def move_where_rec(x, parents): # @UnusedVariable assert isnamedtuplewhere(x), type(x) w = x.where assert string0[offset:offset_end] == w.string character = w.character + offset character_end = w.character_end + offset w2 = Where(string0, character, character_end) return get_copy_with_where(x, w2)
def replace_macros(s): ''' Replaces strings of the type @@{key} It looks in MCDPManualConstants.macros Also available @@{MCDPConstants.name} ''' macros = MCDPManualConstants.macros class MyTemplate(Template): delimiter = '@@' idpattern = r'[_a-z][\._a-z0-9]*' def _invalid(self, mo): i = mo.start('invalid') lines = self.template[:i].splitlines(True) if not lines: colno = 1 lineno = 1 else: colno = i - len(''.join(lines[:-1])) lineno = len(lines) char = location(lineno - 1, colno - 1, s) w = Where(s, char) raise DPSyntaxError('Invalid placeholder', where=w) class Sub(object): def __init__(self, data): self.data = data def __getitem__(self, key): if key in self.data: return self.data[key] if '.' in key: i = key.index('.') first, last = key[:i], key[i + 1:] #print('%s -> %s, %s' % (key, first, last)) return self[first][last] raise KeyError(key) t = MyTemplate(s) MyTemplate.idpattern = r'[_a-z][\._a-z0-9]*' try: s2 = t.substitute(Sub(macros)) except KeyError as e: key = str(e).replace("'", "") search_for = MyTemplate.delimiter + key logger.error('Could not find key %r' % key) char = s.index(search_for) w = Where(s, char) msg = 'Key %r not found - maybe use braces?' % key raise DPSyntaxError(msg, where=w) return s2
def check_misspellings(s): # check misspellings misspellings = ['mcpd', 'MCPD'] for m in misspellings: if m in s: c = s.index(m) msg = 'Typo, you wrote MCPD rather than MCDP.' where = Where(s, c, c + len(m)) raise DPSyntaxError(msg, where=where) return s
def get_balanced_brace(s): """ s is a string that starts with '{'. returns pair a, b, with a + b = s and a starting and ending with braces """ assert s[0] in ['{', '['] stack = [] i = 0 while i < len(s): # take care of escaping if s[i] == '\\' and i < len(s) - 1 and s[i + 1] in ['{', '[', '}', ']']: i += 2 continue if s[i] == '{': stack.append(s[i]) if s[i] == '[': stack.append(s[i]) if s[i] == '}': if not stack or stack[-1] != '{': msg = 'One extra closing brace }' msg += '\n\n' + Where(s, i).__str__() raise_desc(Malformed, msg, stack=stack, s=s) stack.pop() if s[i] == ']': if not stack or stack[-1] != '[': msg = 'One extra closing brace ]' msg += '\n\n' + Where(s, i).__str__() raise_desc(Malformed, msg, stack=stack, s=s) stack.pop() if not stack: a = s[:i + 1] b = s[i + 1:] break i += 1 if stack: msg = 'Unmatched braces at the end of s (stack = %s)' % stack raise_desc(Malformed, msg, s=s) assert a[0] in ['{', '['] assert a[-1] in ['}', ']'] assert a + b == s return a, b
def check_no_forbidden(s): # pragma: no cover if '\t' in s: i = s.index('\t') msg = "Tabs bring despair (e.g. Markdown does not recognize them.)" where = Where(s, i) raise DPSyntaxError(msg, where=where) forbidden = { '>=': ['≥'], '<=': ['≤'], '>>': ['?'] # added by mistake by Atom autocompletion } for f in forbidden: if f in s: msg = 'Found forbidden sequence %r. This will not end well.' % f subs = forbidden[f] msg += ' Try one of these substitutions: %s' % format_list(subs) c = s.index(f) where = Where(s, c, c + len(f)) raise DPSyntaxError(msg, where=where)
def _invalid(self, mo): i = mo.start('invalid') lines = self.template[:i].splitlines(True) if not lines: colno = 1 lineno = 1 else: colno = i - len(''.join(lines[:-1])) lineno = len(lines) char = location(lineno - 1, colno - 1, s) w = Where(s, char) raise DPSyntaxError('Invalid placeholder', where=w)
def check_no_forbidden(s, res, location0): forbidden = { '>=': ['≥'], '<=': ['≤'], '>>': ['?'] # added by mistake by Atom autocompletion } for f in forbidden: if f in s: msg = 'Found forbidden sequence %r. This will not end well.' % f subs = forbidden[f] msg += ' Try one of these substitutions: %s' % format_list(subs) c = s.index(f) where = Where(s, c, c + len(f)) res.note_error(msg, LocationInString(where, location0))
def check_lists(s, res, location): lines = s.split('\n') for i, line in enumerate(lines): if i == 0: continue if line.startswith('- ') or line.startswith('* '): previous_empty = lines[i - 1].strip() == '' if not previous_empty: msg = 'It looks like here you wanted to start a list but you did not leave an empty line.' col = 0 line = i character = find_location(line, col, s) where = Where(s, character) res.note_warning(msg, LocationInString(where, location))
def check_parsable(s): from xml.etree import ElementTree as ET # parser = ET.XMLParser() # parser.entity["nbsp"] = unichr(160) s = '<add-wrap-for-xml-parser>' + s + '</add-wrap-for-xml-parser>' # print indent(s, ' for xml') # with open('rtmp.xml', 'w') as f: # f.write(s) try: _ = ET.fromstring(s) except Exception as e: line1, col1 = e.position line = line1 - 1 col = col1 - 1 character = find_location(line, col, s) msg = 'Invalid XML: %s' % e where = Where(s, character) logger.error('line %s col %s' % (where.line, where.col)) logger.error(where) raise DPSyntaxError(msg, where=where)
def location_from_stack(level): """ level = 0: our caller level = 1: our caller's caller """ from inspect import currentframe cf = currentframe() if level == 0: cf = cf.f_back elif level == 1: cf = cf.f_back.f_back elif level == 2: cf = cf.f_back.f_back.f_back elif level == 3: cf = cf.f_back.f_back.f_back.f_back else: raise NotImplementedError(level) assert cf is not None, level filename = inspect.getfile(cf) if not os.path.exists(filename): msg = 'Could not read %r' % filename raise NotImplementedError(msg) lineno = cf.f_lineno - 1 string = open(filename).read() if not string: raise Exception(filename) character = location(lineno, 0, string) character_end = location(lineno + 1, 0, string) - 1 where = Where(string, character, character_end) lf = LocalFile(filename) res = LocationInString(where, lf) return res
def do_preliminary_checks_and_fixes(s, res, location0): if MCDPConstants.allow_tabs: if '\t' in s: i = s.index('\t') msg = "Tabs bring despair (e.g. Markdown does not recognize them.)" where = Where(s, i) location = LocationInString(where, location0) # logger.warn(msg + '\n\n' + indent(where, ' ')) # TODO: make augmented result res.note_warning(msg, location) s = s.replace('\t', ' ' * MCDPConstants.tabsize) else: check_no_tabs(s) check_no_forbidden(s, res, location0) s = remove_comments(s) s = check_misspellings(s) if False: check_lists(s, res, location0) s = check_most_of_it_xml(s, res, location0) return s
def suggestions_build_problem(x): x_string = x.where.string[x.where.character:x.where.character_end] offset = x.where.character #print 'build complete %d %r' %(offset, x.where.string) TOKEN = 'mcdp' first_appearance_mcdp_in_sub = x_string.index(TOKEN) first_appearance_mcdp_in_orig = offset + first_appearance_mcdp_in_sub first_line = x.where.string[:first_appearance_mcdp_in_orig + len(TOKEN)].split('\n')[-1] # print 'first line: %r' % that_line token_distance_from_newline = first_line.index(TOKEN) rest_of_first_line = first_line[token_distance_from_newline + len(TOKEN):] if TOKEN in rest_of_first_line: msg = 'I cannot deal with two "mcdp" in the same line.' raise_desc(NotImplemented, msg, that_line=first_line) # no! initial_spaces = count_initial_spaces(that_line) # print('initial spaces = %d' % initial_spaces) # print # now look for all the new lines later INDENT = MCDPConstants.indent TABSIZE = MCDPConstants.tabsize # a string containing all complete lines including xstring before = x.where.string[:first_appearance_mcdp_in_orig] if '\n' in before: last_newline_before = list(findall('\n', before))[-1] + 1 else: last_newline_before = 0 rbrace = x.rbrace.where.character # string after the rbrace after = x.where.string[rbrace + 1:] if '\n' in after: first_newline_after = list(findall('\n', after))[0] #- 1 first_newline_after += rbrace + 1 else: first_newline_after = len(x.where.string) lines_containing_xstring_offset = last_newline_before lines_containing_xstring = x.where.string[ lines_containing_xstring_offset:first_newline_after] # print() # print() # print('original string: %r' % x.where.string) # print('x_string: %r' % x_string) # print('after : %r' % after) # print('lines_containing_xstring: %r' % lines_containing_xstring) # iterate over the lines line_infos = list( iterate_lines(lines_containing_xstring, lines_containing_xstring_offset)) initial_spaces = count_initial_spaces(line_infos[0].line_string, TABSIZE) for line_info in line_infos: # ignore if empty if 0 == len(line_info.line_string.strip()): continue # print(' --- line %s' % str(line_info)) i = line_info.character that_line = line_info.line_string assert that_line == x.where.string[line_info.character:line_info. character_end] # print('%d its line: %r' % (i, that_line)) # not the last with only a } # index of current line start in global string contains_rbrace = line_info.character <= rbrace < line_info.character_end # print 'that_line = %r' % (that_line) # print ('rbrace pos %r' % rbrace) if contains_rbrace: assert '}' in that_line align_at = initial_spaces before_rbrace = x.where.string[line_info.character:rbrace] before_rbrace_is_ws = is_all_whitespace(before_rbrace) if not before_rbrace_is_ws: # need to add a newline # print('adding newline before rbrace') w = Where(x.where.string, rbrace, rbrace) replace = '\n' + ' ' * align_at yield w, replace continue is_line_with_initial_mcdp = (line_info.character <= first_appearance_mcdp_in_orig < line_info.character_end) if is_line_with_initial_mcdp: # print('skip because first') continue if contains_rbrace: align_at = initial_spaces else: align_at = initial_spaces + INDENT nspaces = count_initial_spaces(that_line, TABSIZE) # print('has spaces %d' % nspaces) if nspaces < align_at: # need to add some indentation at beginning of line w = Where(x.where.string, i, i) to_add = align_at - nspaces remaining = ' ' * to_add # print('add %d spaces' % to_add) yield w, remaining if nspaces > align_at: remove = nspaces - align_at # XXX this should not include tabs... FIXME w = Where(x.where.string, i, i + remove) # print('remove %d spaces' % remove) yield w, '' if TOKEN in that_line: break
def substitute_command_ext(s, name, f, nargs, nopt): """ Subsitute \name[x]{y}{z} with f : args=(x, y), opts=None -> s if nargs=1 and nopt = 0: f : x -> s """ # noccur = s.count('\\'+name) #print('substitute_command_ext name = %s len(s)=%s occur = %d' % (name, len(s), noccur)) lookfor = ('\\' + name) # +( '[' if nopt > 0 else '{') try: start = get_next_unescaped_appearance(s, lookfor, 0, next_char_not_word=True) assert s[start:].startswith(lookfor) # print('s[start:] = %r starts with %r ' % (s[start:start+14], lookfor)) except NotFound: #print('no string %r found' % lookfor) return s before = s[:start] rest = s[start:] # print('before: %r' % before) assert s[start:].startswith(lookfor) # print('s[start:]: %r' % s[start:]) assert rest.startswith(lookfor) consume = consume0 = s[start + len(lookfor):] opts = [] args = [] # print('---- %r' % name) # print('consume= %r'% consume) for _ in range(nopt): consume = consume_whitespace(consume) if not consume or consume[0] != '[': # print('skipping option') opt = None else: opt_string, consume = get_balanced_brace(consume) opt = opt_string[1:-1] # remove brace # print('opt string %r consume %r opt = %r' % (opt_string, consume, opt)) opts.append(opt) # print('after opts= %r'% consume) for _ in range(nargs): consume = consume_whitespace(consume) if not consume or consume[0] != '{': msg = 'Command %r: Expected {: got %r. opts=%s args=%s' % ( name, consume[0], opts, args) character = start character_end = len(s) - len(consume) where = Where(s, character, character_end) raise DPSyntaxError(msg, where=where) arg_string, consume2 = get_balanced_brace(consume) assert arg_string + consume2 == consume consume = consume2 arg = arg_string[1:-1] # remove brace args.append(arg) # print('*') # print('substitute_command_ext for %r : args = %s opts = %s consume0 = %r' % (name, args, opts, consume0)) args = tuple(args) opts = tuple(opts) replace = f(args, opts) if replace is None: msg = 'function %s returned none' % f raise Exception(msg) # nchars = len(consume0) - len(consume) assert consume0.endswith(consume) # print('consume0: %r' % consume0[:nchars]) # print('%s %s %s -> %s ' % (f.__name__, args, opts, replace)) # print('substitute_command_ext calling itself len(s*)=%s occur* = %d' % # (len(consume), consume.count('\\'+name))) after_tran = substitute_command_ext(consume, name, f, nargs, nopt) res = before + replace + after_tran # print('before: %r' % before) # print('replace: %r' % replace) # print('after_tran: %r' % after_tran) # assert not ('\\' + name ) in res, res return res
def parse_wrap(expr, string): """ transparent to MemoryError """ from .refinement import namedtuple_visitor_ext if isinstance(string, unicode): msg = 'The string is unicode. It should be a str with utf-8 encoding.' msg += '\n' + string.encode('utf-8').__repr__() raise ValueError(msg) check_isinstance(string, bytes) # Nice trick: the remove_comments doesn't change the number of lines # it only truncates them... string0 = remove_comments(string) if not string0.strip(): msg = 'Nothing to parse.' where = Where(string, character=len(string)) raise DPSyntaxError(msg, where=where) try: try: w = str(find_parsing_element(expr)) except ValueError: w = '(unknown)' with timeit(w, MCDPConstants.parsing_too_slow_threshold): expr.parseWithTabs() parsed = expr.parseString(string0, parseAll=True) # [0] def transform(x, parents): # @UnusedVariable if x.where is None: # pragma: no cover msg = 'Where is None for this element' raise_desc(DPInternalError, msg, x=recursive_print(x), all=recursive_print(parsed[0])) where = translate_where(x.where, string) return get_copy_with_where(x, where) parsed_transformed = namedtuple_visitor_ext(parsed[0], transform) if hasattr(parsed_transformed, 'where'): # could be an int, str assert_equal(parsed_transformed.where.string, string) res = fix_whitespace(parsed_transformed) return [res] except (ParseException, ParseFatalException) as e: where1 = Where(string0, e.loc) where2 = translate_where(where1, string) s0 = e.__str__() check_isinstance(s0, bytes) s = s0 e2 = DPSyntaxError(s, where=where2) raise DPSyntaxError, e2.args, sys.exc_info()[2] except DPSemanticError as e: msg = 'This should not throw a DPSemanticError' raise_wrapped(DPInternalError, e, msg, exc=sys.exc_info()) except MemoryError as e: raise except RuntimeError as e: msg = 'RuntimeError %s while parsing string.' % (type(e).__name__) msg += '\n' + indent(string, 'string: ') compact = 'maximum recursion depth' in str(e) # compact = False # XXX raise_wrapped(DPInternalError, e, msg, compact=compact) except BaseException as e: msg = 'Unexpected exception %s while parsing string.' % ( type(e).__name__) msg += '\n' + indent(string, 'string: ') raise_wrapped(DPInternalError, e, msg)
def assert_not_inside(substring, s): if substring in s: i = s.index(substring) w = Where(s, i, i + len(substring)) msg = 'I found the forbidden substring %r in string.' % substring raise DPSyntaxError(msg, where=w)
def check_no_tabs(s): if '\t' in s: i = s.index('\t') msg = "Tabs bring despair (e.g. Markdown does not recognize them.)" where = Where(s, i) raise DPSyntaxError(msg, where=where)