Пример #1
0
 def tokenize_string_literal(self):
     """
     Tokenize a text string literal, yields
     Token(TokenKind.STRING_LITERAL, 1, 7) for stream
     containing '"string"'.
     """
     yield from self.tokenize_delimiter()
     start = self.stream.tell()
     read_char = self.stream.read(1)
     if read_char == '"':
         literal_start = self.stream.tell()
         read_char = self.stream.read(1)
         literal_end = self.stream.tell()
         while read_char and read_char not in '"':
             literal_end = self.stream.tell()
             read_char = self.stream.read(1)
         if not read_char:
             self.stream.seek(start)
             raise TokenizationError(
                 "Reached end of stream while reading string literal"
             )
         yield Token(TokenKind.STRING_LITERAL, literal_start, literal_end)
     else:
         self.stream.seek(start)
         raise TokenizationError(f"Expected string at {start}")
Пример #2
0
 def tokenize_delimiter(self):
     start = self.stream.tell()
     read_char = self.stream.read(1)
     if read_char != b"\0":
         self.stream.seek(start)
         raise TokenizationError(f"Expected delimiter at {start}")
     return iter([])
Пример #3
0
 def binary_ended_tokenizer():
     tok = next(tokenizer())
     start = self.stream.tell()
     read_char = self.stream.read(1)
     if read_char != b"\0":
         self.stream.seek(start)
         raise TokenizationError(f"Expected delimiter at {start}")
     yield tok
Пример #4
0
 def tokenize_end_of_file(self):
     start = self.stream.tell()
     r = self.stream.read(1)
     if r:
         self.stream.seek(start)
         raise TokenizationError(
             f"Expected end of file or new tag at {start} got {r}"
         )
     return iter([])
Пример #5
0
 def tokenize_end_of_file(self):
     yield from self.tokenize_delimiter()
     start = self.stream.tell()
     read_char = self.stream.read(1)
     if read_char:
         self.stream.seek(start)
         raise TokenizationError(
             f"Expected end of file or new tag at {start} got {read_char}"
         )
Пример #6
0
    def word_tokenizer():
        start = stream.tell()

        token = stream.read(word_len)
        if token == word:
            end = stream.tell()
            yield Token(kind, end - word_len, end)
        else:
            stream.seek(start)
            raise TokenizationError(f"Token {repr(token)} did not match {word}")
Пример #7
0
    def tokenize_comment(self):
        """
        Tokenize a comment from the start of stream.

        Note: does not actually yield a token for the comment,
        just raises StopIteration after taking a comment.

        """
        start = self.stream.tell()
        read_char = self.stream.read(1)
        if read_char == "#":
            read_char = self.stream.read(1)
            while read_char and read_char != "#":
                read_char = self.stream.read(1)
            if not read_char:
                self.stream.seek(start)
                raise TokenizationError("Reached end of stream while reading comment")
            return iter([])
        else:
            self.stream.seek(start)
            raise TokenizationError(f"Expected comment at {start}")
Пример #8
0
    def tokenize_space(self):
        start = self.stream.tell()
        read_char = self.stream.read(1)
        if not read_char.isspace():
            self.stream.seek(start)
            raise TokenizationError(f"Expected space at start, got {read_char}")

        while read_char.isspace():
            first_non_space = self.stream.tell()
            read_char = self.stream.read(1)
        self.stream.seek(first_non_space)
        return iter([])
Пример #9
0
def tokenize_header(stream):
    start = stream.tell()

    header = stream.read(8)
    if header == b"roff-bin":
        read_char = stream.read(1)
        if read_char != b"\0":
            stream.seek(start)
            raise TokenizationError(
                f"Expected delimiter after header token got {read_char}")
        yield Token(TokenKind.ROFF_BIN, start, start + 8)
    elif header == "roff-asc":
        yield Token(TokenKind.ROFF_ASC, start, start + 8)
    elif header == b"roff-asc":
        raise WrongFileModeError(
            "Ascii formatted roff file was opened in binary mode!")
    elif header == "roff-bin":
        raise WrongFileModeError(
            "Binary formatted roff file was opened in text mode!")
    else:
        stream.seek(start)
        raise TokenizationError(f"Did not find roff header, got {header}.")
Пример #10
0
 def tokenizer():
     start = self.stream.tell()
     last_alnum = start
     read_char = self.stream.read(1)
     while read_char and not read_char == b"\0":
         last_alnum = self.stream.tell()
         read_char = self.stream.read(1)
     if read_char != b"\0":
         self.stream.seek(start)
         raise TokenizationError(
             f"could not tokenize string at {start}")
     else:
         yield Token(kind, start, last_alnum)
Пример #11
0
    def one_of_tokenizer():
        did_yield = False
        errors = []
        for tok in tokenizers:
            try:
                yield from tok()
                did_yield = True
                break
            except TokenizationError as err:
                errors.append(str(err))

        if not did_yield:
            raise TokenizationError(
                "Tokenization failed, due to one of\n*" + ("\n*".join(errors))
            )
Пример #12
0
    def tokenize_name(self):
        yield from self.tokenize_delimiter()

        start = self.stream.tell()
        length = 0

        read_char = self.stream.read(1)
        while read_char and not read_char.isspace():
            length += 1
            read_char = self.stream.read(1)

        if length < 1:
            self.stream.seek(start)
            raise TokenizationError(f"could not tokenize name at {start}")

        yield Token(TokenKind.NAME, start, start + length)
Пример #13
0
 def tokenize_numeric_value(self):
     """
     Tokenize any text numeric value, yields
     Token(TokenKind.NUMERIC_VALUE, 0, 3) for stream
     containing "1.0".
     """
     yield from self.tokenize_delimiter()
     start = self.stream.tell()
     end = start
     read_char = self.stream.read(1)
     if read_char and read_char.isnumeric() or read_char == "-":
         while read_char and (read_char.isnumeric() or read_char in ".eE+-"):
             end = self.stream.tell()
             read_char = self.stream.read(1)
     if end - start < 1:
         self.stream.seek(start)
         raise TokenizationError(f"Expected numeric value at {start}")
     else:
         self.stream.seek(end)
         yield Token(TokenKind.NUMERIC_VALUE, start, end)
Пример #14
0
def tokenlen(tokenkind):
    """
    For fixed bytesize types return the number of bytes used
    to store that type.

    :param tokenkind: A fixed bytesize type, eg. TokenKind.BOOL.
    :returns: The number of bytes used for that type.
    """
    if tokenkind == TokenKind.BOOL:
        return 1
    elif tokenkind == TokenKind.BYTE:
        return 1
    elif tokenkind == TokenKind.INT:
        return 4
    elif tokenkind == TokenKind.FLOAT:
        return 4
    elif tokenkind == TokenKind.DOUBLE:
        return 8
    else:
        raise TokenizationError(
            f"Attempted to read non-fixed size type {tokenkind}")