Exemplo n.º 1
0
def java_tokenize(line):
    #stream = InputStream(line.decode('utf-8', 'ignore'))
    stream = InputStream(line)
    lexer = JavaLexer(stream)
    tokens = CommonTokenStream(lexer)
    tokens.fetch(100000)
    return tokens.tokens
Exemplo n.º 2
0
def java_tokenize(line):
    # stream = InputStream(line.decode('utf-8', 'ignore'))
    stream = InputStream(line)
    lexer = JavaLexer(stream)
    tokens = CommonTokenStream(lexer)
    tokens.fetch(100000)
    # tokenlist=set([token.text for token in tokens.tokens])
    token_list = list([token.text for token in tokens.tokens])
    return token_list  # tokens.tokens