示例#1
0
def read_tex(src):
    r"""Read next expression from buffer

    :param Buffer src: a buffer of tokens
    """
    c = next(src)
    if c.startswith('$'):
        name = '$$' if c.startswith('$$') else '$'
        return TexEnv(name, [c[len(name):-len(name)]], nobegin=True)
    if c == '\\':
        if src.peek().startswith('item '):
            mode, expr = 'command', TexCmd(src.peek()[:4], (),
                TokenWithPosition.join(next(src).split(' ')[1:], glue=' ').strip())
        elif src.peek() == 'begin':
            mode, expr = next(src), TexEnv(Arg.parse(src.forward(3)).value)
        else:
            mode, candidate, expr = 'command', next(src), None
            for i, c in enumerate(candidate):
                if c.isspace():
                    expr = TexCmd(candidate[:i], (), candidate[i+1:])
                    break
            if not expr:
                expr = TexCmd(candidate)
        while src.peek() in ARG_START_TOKENS:
            expr.args.append(read_tex(src))
        if mode == 'begin':
            read_env(src, expr)
        if src.startswith('$'):
            expr.add_contents(read_tex(src))
        return expr
    if c.startswith('\\'):
        return TexCmd(c[1:])
    if c in ARG_START_TOKENS:
        return read_arg(src, c)
    return c
示例#2
0
文件: reader.py 项目: jdbrice/TexSoup
def read_tex(src):
    r"""Read next expression from buffer

    :param Buffer src: a buffer of tokens
    """
    c = next(src)
    if c.startswith('$'):
        name = '$$' if c.startswith('$$') else '$'
        expr = TexEnv(name, [], nobegin=True)
        return read_math_env(src, expr)
    if c.startswith('\\'):
        command = TokenWithPosition(c[1:], src.position)
        if command == 'item':
            extra = src.forward_until(lambda string: any(
                [string.startswith(s) for s in {'\n', '\end', '\item'}]))
            mode, expr = 'command', TexCmd(
                command, (),
                TokenWithPosition.join(extra.split(' '), glue=' ').strip())
        elif command == 'begin':
            mode, expr, _ = 'begin', TexEnv(src.peek(1)), src.forward(3)
        else:
            mode, expr = 'command', TexCmd(command)

        # TODO: allow only one line break
        # TODO: should really be handled by tokenizer
        candidate_index = src.num_forward_until(lambda s: not s.isspace())
        src.forward(candidate_index)

        while src.peek() in ARG_START_TOKENS:
            expr.args.append(read_tex(src))
        if not expr.args:
            src.backward(candidate_index)
        if mode == 'begin':
            read_env(src, expr)
        return expr
    if c in ARG_START_TOKENS:
        return read_arg(src, c)
    return c
示例#3
0
def read_item(src):
    r"""Read the item content.

    There can be any number of whitespace characters between \item and the first
    non-whitespace character. However, after that first non-whitespace
    character, the item can only tolerate one successive line break at a time.

    \item can also take an argument.

    :param Buffer src: a buffer of tokens
    :return: contents of the item and any item arguments
    """
    stringify = lambda s: TokenWithPosition.join(s.split(' '), glue=' ')

    def criterion(s):
        """Catch the first non-whitespace character"""
        return not any([s.startswith(substr) for substr in string.whitespace])

    # Item argument such as in description environment
    arg = []
    if src.peek() in ARG_START_TOKENS:
        c = next(src)
        arg.append(read_arg(src, c))
    last = stringify(src.forward_until(criterion))
    if last.startswith(' '):
        last = last[1:]
    extra = [last]

    while src.hasNext() and not src.startswith('\n\n') and \
            not src.startswith('\item') and \
            not src.startswith('\end') and \
            not (hasattr(last, 'endswith') and last.endswith('\n\n')
                 and len(extra) > 1):
        last = read_tex(src)
        extra.append(last)
    return extra, arg
示例#4
0
 def stringify(s):
     return TokenWithPosition.join(s.split(' '), glue=' ')