示例#1
0
文件: lexer.py 项目: pberkes/Bento
def scan_field_id(token, state, stream, lexdata):
    # When a candidate is found, do as follows:
    # - save the candidate
    # - eat any whitespace
    # - if next is colon, candidate is an identifier, emit both
    # identifier and colon
    candidate = token
    token = stream.peek()
    if token.type == "WS":
        token = stream.peek()
    if token.type == "COLON":
        # We do have a identifier, so replace WORD token by the
        # right keyword token
        candidate = _new_token(META_FIELDS_ID[candidate.value], candidate)

    try:
        field_type = FIELD_TYPE[candidate.type]
    except KeyError:
        data = lexdata.splitlines()
        msg = ["Error while tokenizing %r (missing colon ?)" %  candidate.value]
        msg += ["    Line %d -> %r" % (candidate.lineno, data[candidate.lineno-1])]
        raise SyntaxError("\n".join(msg))
    try:
        state = _FIELD_TYPE_TO_STATE[field_type]
    except KeyError:
        raise ValueError("Unknown state transition for type %s" % field_type)

    queue = [candidate]
    queue.append(gen_next(stream))
    nxt = gen_next(stream)
    return queue, nxt, state
示例#2
0
文件: lexer.py 项目: pberkes/Bento
def _skip_ws(tok, stream, state, internal):
    while tok.type  in ["NEWLINE", "WS"]:
        if tok.type == "NEWLINE" and len(internal.words_stack) == 0:
            nxt = stream.peek()
            if not nxt.type == "INDENT":
                state = "SCANNING_FIELD_ID"
            else:
                tok = gen_next(stream)
            return tok, state
        tok = gen_next(stream)
    return tok, state
示例#3
0
文件: lexer.py 项目: pberkes/Bento
def skip_until_eol(stream, t):
    try:
        prev = stream.previous()
    except ValueError:
        prev = None
    while t.type != "NEWLINE":
        t = gen_next(stream)
    # FIXME: ideally, we would like to remove EOL for comments which span the
    # full line, but we need access to the token before the comment delimiter
    # to do so, as we don't want to remove EOL for inline commeng (e.g. 'foo #
    # comment')
    if prev and t.type == "NEWLINE" and prev.type in ('NEWLINE', 'INDENT'):
        t = gen_next(stream)
    return t
示例#4
0
文件: utils.py 项目: pberkes/Bento
 def next(self):
     c = gen_next(self._gen)
     if len(self._cache) == 2:
         old, new = self._cache
         self._cache = [new]
     self._cache.append(c)
     return c
示例#5
0
文件: utils.py 项目: pberkes/Bento
 def _peek_no_dummy(self):
     if self._cache:
         return self._cache
     else:
         i = gen_next(self._it)
         self._cache = i
         return i
示例#6
0
文件: utils.py 项目: pberkes/Bento
 def next(self):
     if self._cache:
         i = self._cache
         self._cache = None
         return i
     else:
         return gen_next(self._it)
示例#7
0
文件: lexer.py 项目: pberkes/Bento
def multiline_tokenizer(token, state, stream, internal):
    stack = internal.stack
    queue = []

    if internal.stack_level is None:
        internal.stack_level = [len(internal.stack)]
    stack_level = internal.stack_level

    if token.type == "INDENT":
        stack.append(token)
        queue.insert(0, token)
    elif token.type == "DEDENT":
        stack.pop(0)
        if len(stack) < 1:
            state = "SCANNING_FIELD_ID"
        queue.insert(0, token)
    elif token.type == "NEWLINE":
        saved_newline = token
        # Case where there is a single, non indented line for the field, i.e.:
        # Description: a description
        if (len(stack) == stack_level[0] and stream.peek().type != "INDENT"):
            state = "SCANNING_FIELD_ID"
            internal.stack_level = None
        elif stream.peek().type == "DEDENT":
            try:
                while stream.peek().type == "DEDENT":
                    token = gen_next(stream)
                    queue.insert(0, token)
                    stack.pop()
            except StopIteration:
                pass
            if len(stack) == stack_level[0]:
                state = "SCANNING_FIELD_ID"
                internal.stack_level = None
            else:
                queue.append(saved_newline)
        else:
            queue.insert(0, token)
    else:
        queue.insert(0, token)

    try:
        token = gen_next(stream)
    except StopIteration:
        token = None
    return queue, token, state
示例#8
0
文件: utils.py 项目: pberkes/Bento
 def _peek_dummy(self):
     if self._cache:
         return self._cache
     else:
         try:
             i = gen_next(self._it)
         except StopIteration:
             return self._dummy
         self._cache = i
         return i
示例#9
0
文件: lexer.py 项目: pberkes/Bento
def tokenize_conditional(stream, token):
    ret = []

    token.type = CONDITIONAL_ID[token.value]
    ret.append(token)

    queue = []
    nxt = stream.peek()
    if not nxt.type in ["COLON", "NEWLINE"]:
        while nxt.type not in ["COLON", "NEWLINE"]:
            if nxt.type not in ["WS"]:
                queue.append(nxt)
            nxt = gen_next(stream)
        queue.append(nxt)

    for q in queue:
        if q.value in CONDITIONAL_ID.keys():
            q.type = CONDITIONAL_ID[q.value]
        ret.append(q)

    return ret, gen_next(stream)
示例#10
0
文件: lexer.py 项目: pberkes/Bento
def singleline_tokenizer(token, state, stream):
    if token.type == "NEWLINE":
        state = "SCANNING_FIELD_ID"
        queue = []
    else:
        queue = [token]

    try:
        tok = gen_next(stream)
    except StopIteration:
        tok = None

    return queue, tok, state
示例#11
0
文件: lexer.py 项目: pberkes/Bento
def word_tokenizer(token, state, stream):
    queue = []
    state = "SCANNING_FIELD_ID"

    try:
        while token.type != "NEWLINE":
            if token.type == "WORD":
                queue.append(token)
            token = gen_next(stream)
    except StopIteration:
        token = None

    return queue, token, state
示例#12
0
文件: lexer.py 项目: pberkes/Bento
def comma_list_tokenizer(token, state, stream, internal):
    queue = []
    state = "SCANNING_FIELD_ID"

    def _filter_ws_before_comma(lst):
        ret = []
        for i, item in enumerate(lst):
            if item.type == "WS":
                if i < len(lst) and lst[i+1].type == "COMMA":
                    pass
                elif i > 0 and lst[i-1].type == "COMMA":
                    pass
                else:
                    ret.append(item)
            else:
                ret.append(item)
        return ret

    try:
        if token.type != "NEWLINE":
            token, state = _skip_ws(token, stream, state, internal)
        while token.type not in ("NEWLINE",):
            queue.append(token)
            token = gen_next(stream)
        # Eat newline
        token = gen_next(stream)
        if token.type == "INDENT":
            internal.stack.append(token)
            while token.type != "DEDENT":
                if token.type != "NEWLINE":
                    queue.append(token)
                token = gen_next(stream)
            if token.type == "DEDENT":
                internal.stack.pop(0)
            queue.append(token)
        return _filter_ws_before_comma(queue), gen_next(stream), state
    except StopIteration:
        return _filter_ws_before_comma(queue), None, "EOF"
示例#13
0
文件: lexer.py 项目: pberkes/Bento
def merge_escaped(stream):
    stream = Peeker(stream, EOF)
    queue = []

    t = gen_next(stream)
    while t:
        if t.escaped:
            queue.append(t)
        else:
            if t.type == "WORD":
                if queue:
                    queue.append(t)
                    n = stream.peek()
                    if not n.escaped:
                        t.value = "".join([c.value for c in queue])
                        yield t
                        queue = []
                else:
                    n = stream.peek()
                    if n.escaped:
                        queue.append(t)
                    else:
                        yield t
            else:
                if queue:
                    queue[-1].value = "".join([c.value for c in queue])
                    queue[-1].type = "WORD"
                    yield queue[-1]
                    queue = []
                yield t
        try:
            t = gen_next(stream)
        except StopIteration:
            if queue:
                t.value = "".join([c.value for c in queue])
                t.type = "WORD"
                yield t
            return
示例#14
0
文件: lexer.py 项目: pberkes/Bento
def detect_escaped(stream):
    """Post process the given stream to generate escaped character for
    characters preceded by the escaping token."""
    for t in stream:
        if ESCAPING_CHAR[t.type]:
            try:
                t = gen_next(stream)
            except StopIteration:
                raise SyntaxError("EOF while escaping token %r (line %d)" %
                                  (t.value, t.lineno-1))
            t.escaped = True
        else:
            t.escaped = False
        yield t
示例#15
0
文件: lexer.py 项目: pberkes/Bento
def post_process(stream, lexdata):
    # XXX: this is awfully complicated...
    class _Internal(object):
        def __init__(self):
            self.stack = []
            self.words_stack = []
            self.stack_level = None
    internal = _Internal()

    state = "SCANNING_FIELD_ID"

    stream = Peeker(stream)
    i = gen_next(stream)
    while i:
        if state == "SCANNING_FIELD_ID":
            if i.value in CONDITIONAL_ID.keys():
                queue, i = tokenize_conditional(stream, i)
                for q in queue:
                    yield q
            elif i.value in META_FIELDS_ID.keys():
                queue, i, state = scan_field_id(i, state, stream, lexdata)
                for q in queue:
                    yield q
            else:
                queue, i = find_next(i, stream, internal)
                for q in queue:
                    yield q
        elif state == "SCANNING_SINGLELINE_FIELD":
            queue, i, state = singleline_tokenizer(i, state, stream)
            for q in queue:
                yield q
        elif state == "SCANNING_MULTILINE_FIELD":
            queue, i, state = multiline_tokenizer(i, state, stream, internal)
            while len(queue) > 0:
                yield queue.pop()
        elif state == "SCANNING_WORD_FIELD":
            queue, i, state = word_tokenizer(i, state, stream)
            for t in queue:
                yield t
        elif state == "SCANNING_WORDS_FIELD":
            queue, i, state = words_tokenizer(i, state, stream, internal)
            for q in queue:
                yield q
        elif state == "SCANNING_COMMA_LIST_FIELD":
            queue, i, state = comma_list_tokenizer(i, state, stream, internal)
            for q in queue:
                yield q
        else:
            raise ValueError("Unknown state: %s" % state)
示例#16
0
文件: lexer.py 项目: pberkes/Bento
def find_next(token, stream, internal):
    queue = []

    if token.type != "NEWLINE":
        if token.type == "INDENT":
            internal.stack.append(token)
        elif token.type == "DEDENT":
            internal.stack.pop(0)
        queue.append(token)

    try:
        tok = gen_next(stream)
    except StopIteration:
        tok = None

    return queue, tok
示例#17
0
文件: lexer.py 项目: pberkes/Bento
def words_tokenizer(token, state, stream, internal):
    token, state = _skip_ws(token, stream, state, internal)

    if state == "SCANNING_WORDS_FIELD":
        words_stack = internal.words_stack
        if token.type == "INDENT":
            words_stack.append(token)
        elif token.type == "DEDENT":
            words_stack.pop(0)
            if len(words_stack) < 1:
                state = "SCANNING_FIELD_ID"
                internal.words_stack = []
        queue = [token]
    else:
        queue = []
    try:
        tok = gen_next(stream)
    except StopIteration:
        tok = None
    return queue, tok, state
示例#18
0
文件: lexer.py 项目: pberkes/Bento
 def token(self, *a, **kw):
     try:
         return gen_next(self.token_stream)
     except StopIteration:
         return None
示例#19
0
文件: lexer.py 项目: pberkes/Bento
def indent_generator(toks):
    """Post process the given stream of tokens to generate INDENT/DEDENT
    tokens.
    
    Note
    ----
    Each generated token's value is the total amount of spaces from the
    beginning of the line.
    
    The way indentation tokens are generated is similar to how it works in
    python."""
    stack = [0]

    # Dummy token to track the token just before the current one
    former = LexToken()
    former.type = "NEWLINE"
    former.value = "dummy"
    former.lineno = 0
    former.lexpos = -1

    def generate_dedent(stck, tok):
        amount = stck.pop(0)
        return new_dedent(amount, tok)

    for token in toks:
        if former.type == "NEWLINE":
            if token.type == "WS":
                indent = len(token.value)
            else:
                indent = 0

            if indent == stack[0]:
                former = token
                if indent > 0:
                    token = gen_next(toks)
                    former = token
                    yield token
                else:
                    yield former
            elif indent > stack[0]:
                stack.insert(0, indent)
                ind = new_indent(indent, token)
                former = ind
                yield ind
            elif indent < stack[0]:
                if not indent in stack:
                    raise ValueError("Wrong indent at line %d" % token.lineno)
                while stack[0] > indent:
                    former = generate_dedent(stack, token)
                    yield former
                if stack[0] > 0:
                    former = gen_next(toks)
                    yield former
                else:
                    former = token
                    yield token
        else:
            former = token
            yield token

    # Generate additional DEDENT so that the number of INDENT/DEDENT always
    # match
    while len(stack) > 1:
        former = generate_dedent(stack, token)
        yield former