def find_all_generator(clazz, text, sub_string, word_boundary=False, word_boundary_chars=None): check.check_string(text) check.check_string(sub_string) check.check_bool(word_boundary) check.check_set(word_boundary_chars, allow_none=True) word_boundary_chars = word_boundary_chars or word_boundary_module.CHARS sub_string_length = len(sub_string) i = 0 while True: i = text.find(sub_string, i) if i < 0: return start = i end = i + sub_string_length - 1 i += sub_string_length if word_boundary: assert word_boundary_chars do_yield = word_boundary_module.word_has_boundary( text, start, end, boundary_chars=word_boundary_chars) else: do_yield = True if do_yield: yield clazz._span(start, end)
def replace_all(clazz, text, src_string, dst_string, word_boundary=False, word_boundary_chars=None): 'Replace src_string with dst_string optionally respecting word boundaries.' check.check_string(text) check.check_string(src_string) check.check_string(dst_string) check.check_bool(word_boundary) check.check_set(word_boundary_chars, allow_none=True) spans = clazz.find_all(text, src_string, word_boundary=word_boundary, word_boundary_chars=word_boundary_chars) if not spans: return text last_start = 0 buf = StringIO() last_span = None for span in spans: left = text[last_start:span.start] if left: buf.write(left) buf.write(dst_string) last_start = span.end + 1 last_span = span if last_span: right = text[last_span.end + 1:] buf.write(right) return buf.getvalue()
def replace(clazz, filename, replacements, backup=True, word_boundary=False, word_boundary_chars=None): check.check_string(filename) check.check_dict(replacements, check.STRING_TYPES, check.STRING_TYPES) check.check_bool(backup) check.check_bool(word_boundary) check.check_set(word_boundary_chars, allow_none=True) content = file_util.read(filename, codec='utf-8') new_content = text_replace.replace( content, replacements, word_boundary=word_boundary, word_boundary_chars=word_boundary_chars) if content == new_content: return False if backup: file_util.backup(filename) file_util.save(filename, content=new_content.encode('utf-8'), mode=file_util.mode(filename)) return True
def from_sql_rows(clazz, rows, exclude = None): check.check_list(rows, tuple) check.check_set(exclude, entry_type = check.STRING_TYPES, allow_none = True) item_type = getattr(clazz, '__value_type__', None) if not item_type: raise AttributeError('No "__value_type__" attribute found in {clazz}') result = clazz() for row in rows: item = item_type.from_sql_row(row, exclude = exclude) result.append(item) return result
def __new__(clazz, name, unixpath, pythonpath, requires, variables): check.check_string(name) unixpath = unixpath or [] if check.is_string(unixpath): unixpath = unixpath.split(':') check.check_string_seq(unixpath) pythonpath = pythonpath or [] if check.is_string(pythonpath): pythonpath = pythonpath.split(':') check.check_string_seq(pythonpath) requires = requires or set() check.check_set(requires) unixpath = [file_path.normalize_sep(p) for p in unixpath] pythonpath = [file_path.normalize_sep(p) for p in pythonpath] return clazz.__bases__[0].__new__(clazz, name, unixpath, pythonpath, requires, variables)
def find_all(clazz, text, sub_string, word_boundary=False, word_boundary_chars=None): 'Returns a list of of all the spans containing sub_string in text' check.check_string(text) check.check_string(sub_string) check.check_bool(word_boundary) check.check_set(word_boundary_chars, allow_none=True) return [ span for span in clazz.find_all_generator( text, sub_string, word_boundary=word_boundary, word_boundary_chars=word_boundary_chars) ]
def replace(clazz, s, replacements, word_boundary=False, word_boundary_chars=None): 'Replace all instances of dict d in string s.' check.check_string(s) check.check_dict(replacements, check.STRING_TYPES, check.STRING_TYPES) check.check_bool(word_boundary) check.check_set(word_boundary_chars, allow_none=True) for src_string, dst_string in replacements.items(): s = clazz.replace_all(s, src_string, dst_string, word_boundary=word_boundary, word_boundary_chars=word_boundary_chars) return s
def word_has_boundary(clazz, text, start, end, boundary_chars=None): check.check_string(text) check.check_int(start) check.check_int(end) check.check_set(boundary_chars, allow_none=True) boundary_chars = boundary_chars or clazz.CHARS if start >= 1: prev_char = text[start - 1] prev_char_is_boundary = prev_char in boundary_chars #print(f'prev_char={prev_char} prev_char_is_boundary={prev_char_is_boundary}') if not prev_char_is_boundary: return False if end < (len(text) - 1): next_char = text[end + 1] next_char_is_boundary = next_char in boundary_chars #print(f'next_char={next_char} next_char_is_boundary={next_char_is_boundary}') if not next_char_is_boundary: return False return True