def english_tokens(line): # Break line into math and english parts mathsep = ut.negative_lookbehind(re.escape('\\')) + re.escape('$') def clean_word(word): if word.startswith('``'): word = word[2:] if word.endswith("''"): word = word[:-2] return word.strip(',').rstrip('.') prev = 0 tokens = [] for count, match in enumerate(re.finditer(mathsep, line)): if count % 2 == 0: curr = match.start() english = line[prev:curr] parts = re.split('[~\s]+', english) parts = (clean_word(p) for p in parts) parts = (p for p in parts if p) tokens.extend(parts) else: curr = match.end() math = line[prev:curr] tokens.append(math) prev = curr return tokens
def modify_code_indent_formatdict(code, locals_): # Parse out search and replace locations in code ncl1 = ut.negative_lookbehind('{') ncl2 = ut.negative_lookahead('{') ncr1 = ut.negative_lookbehind('}') ncr2 = ut.negative_lookahead('}') left = ncl1 + '{' + ncl2 right = ncr1 + '}' + ncr2 fmtpat = left + ut.named_field('key', '[^}]*') + right spacepat = ut.named_field('indent', '^\s+') pattern = spacepat + fmtpat import re seen_ = set([]) for m in re.finditer(pattern, code, flags=re.MULTILINE): indent = (m.groupdict()['indent']) key = (m.groupdict()['key']) if key in locals_ and key not in seen_: seen_.add(key) locals_[key] = ut.indent_rest(locals_[key], indent)
def separate_math(line): # Break line into math and english parts mathsep = ut.negative_lookbehind(re.escape('\\')) + re.escape('$') pos = [0] for count, match in enumerate(re.finditer(mathsep, line)): pos.append(match.start() if count % 2 == 0 else match.end()) pos.append(len(line)) english = [] math = [] for count, (l, r) in enumerate(ut.itertwo(pos)): if count % 2 == 0 and line[l:r]: english.append(line[l:r]) else: math.append(line[l:r]) return english, math
def repr_single_for_md(s): r""" Args: s (str): Returns: str: str_repr CommandLine: python -m ibeis.templates.generate_notebook --exec-repr_single_for_md --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.templates.generate_notebook import * # NOQA >>> s = '#HTML(\'<iframe src="%s" width=700 height=350></iframe>\' % pdf_fpath)' >>> result = repr_single_for_md(s) >>> print(result) """ import utool as ut if True: str_repr = ut.reprfunc(s) import re if str_repr.startswith('\''): dq = (ut.DOUBLE_QUOTE) sq = (ut.SINGLE_QUOTE) bs = (ut.BACKSLASH) dq_, sq_, bs_ = list(map(re.escape, [dq, sq, bs])) no_bs = ut.negative_lookbehind(bs_) #no_sq = ut.negative_lookbehind(sq) #no_dq = ut.negative_lookbehind(dq) #inside = str_repr[1:-1] #inside = re.sub(no_bs + dq, bs + dq, inside) #inside = re.sub(no_bs + bs + sq, r"\\'", r"'", inside) #str_repr = '"' + inside + '"' #inside = re.sub(r'"', r'\\"', inside) #inside = re.sub(ut.negative_lookbehind(r"'") + r"\\'", r"'", inside) inside = str_repr[1:-1] # Escape double quotes inside = re.sub(no_bs + r'"', r'\\"', inside) # Unescape single quotes inside = re.sub(no_bs + bs_ + r"'", r"'", inside) # Append external double quotes str_repr = '"' + inside + '"' return str_repr else: return '"' + ut.reprfunc('\'' + s)[2:]
import utool as ut import re vim = 0 SLASH = re.escape('\\') LCURL = re.escape(r'{') RCURL = re.escape(r'}') NOT_LCURL = '[^}]' NOPREV_LCURL = ut.negative_lookbehind(r'{', vim=1) ANY_NONGREEDY = '.' + ut.nongreedy_kleene_star(vim=vim) NONEXT_BSLASH = ut.negative_lookahead(SLASH, vim=vim) def GROUP(x): return '(' + x + ')' title = 'section' #tks = [ # ('section', 's'), # ('subsection', 't'), # ('subsubsection', 'u'), ##] #print('--langdef=tex') #print('--langmap=tex:.tex') # --regex-tex=/\\label\{([^}]*)\}/\1/l,label/ # --regex-tex=/\\label\{([^}]*)\}/\1/l,label/ # --regex-tex=/^\s*\\section\{([^}]*)\}/\1/s,section/
import utool as ut import re vim = 0 SLASH = re.escape('\\') LCURL = re.escape(r'{') RCURL = re.escape(r'}') NOT_LCURL = '[^}]' NOPREV_LCURL = ut.negative_lookbehind(r'{', vim=1) ANY_NONGREEDY = '.' + ut.nongreedy_kleene_star(vim=vim) NONEXT_BSLASH = ut.negative_lookahead(SLASH, vim=vim) def GROUP(x): return '(' + x + ')' title = 'section' #tks = [ # ('section', 's'), # ('subsection', 't'), # ('subsubsection', 'u'), ##] #print('--langdef=tex') #print('--langmap=tex:.tex') # --regex-tex=/\\label\{([^}]*)\}/\1/l,label/ # --regex-tex=/\\label\{([^}]*)\}/\1/l,label/ # --regex-tex=/^\s*\\section\{([^}]*)\}/\1/s,section/