Пример #1
0
    def tokenize(self, string):
        token_specs = [('SKIP', r'\r+|\s*\|[^\n]*'), ('NL', r'\n'),
                       ('KEYWORD', r'\[.+?\]'), ('WORD', r'[^ \n\t\r\f\v=]+'),
                       ('WS', r'[ \t\r\f\v]+'), ('EQ', r'='),
                       ('MISMATCH', r'.')]

        tokens, token_regex = tokenize_init(token_specs)

        for mo in re.finditer(token_regex, string, re.DOTALL):
            kind = mo.lastgroup

            if kind == 'SKIP':
                pass
            elif kind != 'MISMATCH':
                value = mo.group(kind)

                if kind == 'KEYWORD':
                    keyword = value.lower().replace('_', ' ')

                    if keyword in KEYWORDS:
                        kind = keyword

                tokens.append(Token(kind, value, mo.start()))
            else:
                raise TokenizeError(string, mo.start())

        return tokens
Пример #2
0
    def tokenize(self, string):
        keywords = set([
            'BA_', 'BA_DEF_', 'BA_DEF_DEF_', 'BA_DEF_DEF_REL_', 'BA_DEF_REL_',
            'BA_DEF_SGTYPE_', 'BA_REL_', 'BA_SGTYPE_', 'BO_', 'BO_TX_BU_',
            'BS_', 'BU_', 'BU_BO_REL_', 'BU_EV_REL_', 'BU_SG_REL_', 'CAT_',
            'CAT_DEF_', 'CM_', 'ENVVAR_DATA_', 'EV_', 'EV_DATA_', 'FILTER',
            'NS_', 'NS_DESC_', 'SG_', 'SG_MUL_VAL_', 'SGTYPE_', 'SGTYPE_VAL_',
            'SIG_GROUP_', 'SIG_TYPE_REF_', 'SIG_VALTYPE_', 'SIGTYPE_VALTYPE_',
            'VAL_', 'VAL_TABLE_', 'VERSION'
        ])

        names = {
            'LPAREN': '(',
            'RPAREN': ')',
            'LBRACE': '[',
            'RBRACE': ']',
            'COMMA': ',',
            'AT': '@',
            'SCOLON': ';',
            'COLON': ':',
            'PIPE': '|',
            'SIGN': '+/-'
        }

        token_specs = [('SKIP', r'[ \r\n\t]+|//.*?\n'),
                       ('NUMBER', r'-?\d+\.?\d*([eE][+-]?\d+)?'),
                       ('WORD', r'[A-Za-z0-9_]+'),
                       ('STRING', r'"(\\"|[^"])*?"'), ('LPAREN', r'\('),
                       ('RPAREN', r'\)'), ('LBRACE', r'\['), ('RBRACE', r'\]'),
                       ('COMMA', r','), ('PIPE', r'\|'), ('AT', r'@'),
                       ('SIGN', r'[+-]'), ('SCOLON', r';'), ('COLON', r':'),
                       ('MISMATCH', r'.')]

        tokens, token_regex = tokenize_init(token_specs)

        for mo in re.finditer(token_regex, string, re.DOTALL):
            kind = mo.lastgroup

            if kind == 'SKIP':
                pass
            elif kind == 'STRING':
                value = mo.group(kind)[1:-1].replace('\\"', '"')
                tokens.append(Token(kind, value, mo.start()))
            elif kind != 'MISMATCH':
                value = mo.group(kind)

                if value in keywords:
                    kind = value

                if kind in names:
                    kind = names[kind]

                tokens.append(Token(kind, value, mo.start()))
            else:
                raise TokenizeError(string, mo.start())

        return tokens
Пример #3
0
    def test_tokenize_error(self):
        datas = [(2, 'hej', 'Invalid syntax at line 1, column 3: "he>>!<<j"'),
                 (0, 'a\nb\n', 'Invalid syntax at line 1, column 1: ">>!<<a"'),
                 (1, 'a\nb\n', 'Invalid syntax at line 1, column 2: "a>>!<<"'),
                 (2, 'a\nb\n', 'Invalid syntax at line 2, column 1: ">>!<<b"')]

        for offset, text, message in datas:
            with self.assertRaises(TokenizeError) as cm:
                raise TokenizeError(text, offset)

            self.assertEqual(cm.exception.text, text)
            self.assertEqual(cm.exception.offset, offset)
            self.assertEqual(str(cm.exception), message)
Пример #4
0
    def tokenize(self, string):
        names = {
            'LPAREN': '(',
            'RPAREN': ')',
            'LBRACE': '[',
            'RBRACE': ']',
            'COMMA': ',',
            'ASSIGN': '=',
            'ENUMS': '{ENUMS}',
            'SIGNALS': '{SIGNALS}',
            'SEND': '{SEND}',
            'RECEIVE': '{RECEIVE}',
            'SENDRECEIVE': '{SENDRECEIVE}',
            'U': '/u:',
            'F': '/f:',
            'O': '/o:',
            'MIN': '/min:',
            'MAX': '/max:',
            'D': '/d:',
            'LN': '/ln:',
            'E': '/e:',
            'P': '/p:',
            'M': '-m',
            'H': '-h',
            'B': '-b',
            'S': '-s',
            'T': '-t',
            'V': '-v'
        }

        re_string = r'"(\\"|[^"])*?"'

        token_specs = [('SKIP', r'[ \r\n\t]+'), ('COMMENT', r'//.*?\n'),
                       ('NUMBER', r'-?\d+\.?[0-9A-F]*([eE][+-]?\d+)?'),
                       ('STRING', re_string),
                       ('U', r'/u:({}|\S+)'.format(re_string)), ('F', r'/f:'),
                       ('O', r'/o:'), ('MIN', r'/min:'), ('MAX', r'/max:'),
                       ('D', r'/d:'), ('LN', r'/ln:'), ('E', r'/e:'),
                       ('P', r'/p:'), ('M', r'\-m'), ('H', r'\-h'),
                       ('B', r'\-b'), ('S', r'\-s'), ('T', r'\-t'),
                       ('V', r'\-v'), ('LPAREN', r'\('), ('RPAREN', r'\)'),
                       ('LBRACE', r'\['), ('RBRACE', r'\]'), ('COMMA', r','),
                       ('ASSIGN', r'='), ('ENUMS', r'\{ENUMS\}'),
                       ('SIGNALS', r'\{SIGNALS\}'), ('SEND', r'\{SEND\}'),
                       ('RECEIVE', r'\{RECEIVE\}'),
                       ('SENDRECEIVE', r'\{SENDRECEIVE\}'),
                       ('WORD', r'[^\s=\(\]\-]+'), ('MISMATCH', r'.')]

        tokens, token_regex = tokenize_init(token_specs)

        for mo in re.finditer(token_regex, string, re.DOTALL):
            kind = mo.lastgroup

            if kind == 'SKIP':
                pass
            elif kind == 'STRING':
                value = mo.group(kind)[1:-1].replace('\\"', '"')
                tokens.append(Token(kind, value, mo.start()))
            elif kind != 'MISMATCH':
                value = mo.group(kind)

                if value in self.KEYWORDS:
                    kind = value

                if kind in names:
                    kind = names[kind]

                tokens.append(Token(kind, value, mo.start()))
            else:
                raise TokenizeError(string, mo.start())

        return tokens
Пример #5
0
 def tokenize(self, text):
     raise TokenizeError(text, 5)
Пример #6
0
    def tokenize(self, string):
        keywords = set([
            'FormatVersion',
            'Title',
            'Enum',
            'Sig',
            'ID',
            'Len',
            'Mux',
            'CycleTime',
            'Timeout',
            'MinInterval',
            'Sig',
        ])

        names = {
            'LPAREN': '(',
            'RPAREN': ')',
            'LBRACE': '[',
            'RBRACE': ']',
            'COMMA': ',',
            'ASSIGN': '=',
            'ENUMS': '{ENUMS}',
            'SIGNALS': '{SIGNALS}',
            'SEND': '{SEND}',
            'RECEIVE': '{RECEIVE}',
            'SENDRECEIVE': '{SENDRECEIVE}',
            'U': '/u:',
            'F': '/f:',
            'O': '/o:',
            'MIN': '/min:',
            'MAX': '/max:',
            'D': '/d:',
            'LN': '/ln:',
            'E': '/e:',
            'M': '-m'
        }

        token_specs = [('SKIP', r'[ \r\n\t]+|//.*?\n'),
                       ('NUMBER', r'-?\d+\.?\d*([eE][+-]?\d+)?'),
                       ('WORD', r'[A-Za-z0-9_\*]+'),
                       ('STRING', r'"(\\"|[^"])*?"'), ('LPAREN', r'\('),
                       ('RPAREN', r'\)'), ('LBRACE', r'\['), ('RBRACE', r'\]'),
                       ('COMMA', r','), ('ASSIGN', r'='),
                       ('ENUMS', r'\{ENUMS\}'), ('SIGNALS', r'\{SIGNALS\}'),
                       ('SEND', r'\{SEND\}'), ('RECEIVE', r'\{RECEIVE\}'),
                       ('SENDRECEIVE', r'\{SENDRECEIVE\}'), ('U', r'/u:'),
                       ('F', r'/f:'), ('O', r'/o:'), ('MIN', r'/min:'),
                       ('MAX', r'/max:'), ('D', r'/d:'), ('LN', r'/ln:'),
                       ('E', r'/e:'), ('M', r'\-m'), ('MISMATCH', r'.')]

        tokens, token_regex = tokenize_init(token_specs)

        for mo in re.finditer(token_regex, string, re.DOTALL):
            kind = mo.lastgroup

            if kind == 'SKIP':
                pass
            elif kind == 'STRING':
                value = mo.group(kind)[1:-1].replace('\\"', '"')
                tokens.append(Token(kind, value, mo.start()))
            elif kind != 'MISMATCH':
                value = mo.group(kind)

                if value in keywords:
                    kind = value

                if kind in names:
                    kind = names[kind]

                tokens.append(Token(kind, value, mo.start()))
            else:
                raise TokenizeError(string, mo.start())

        return tokens