Exemplo n.º 1
0
    def tokenizer(self, source_file):
        startPos = 0
        source_json = {}
        source_json['lexic'] = []
        i = -1
        while True:
            i += 1
            token_match = None
            for el in self.tokens_def:
                token_match = el['re'].match(source_file, startPos)
                token_found = el['name']
                if (token_match is not None and token_match.group() != ''):
                    break

            if (token_match == None):
                print(i, ':', startPos, ':', ':Unable to find a match')
                break
            else:
                token_value = token_match.group(0)
                token = Token(token_found, token_value, startPos)
                source_json['lexic'].append(token.get_json_node())
                print(i, ':', startPos, ':', token_value.__len__(), ':"',
                      token_found, '":', token_value)
                startPos += token_value.__len__()

            if (startPos >= source_file.__len__()):
                break
        return json.dumps(source_json, indent=3)