Пример #1
0
 def execute(self, context):
     language = context.scene.hl_language
     lexer = find_lexer_class(language)
     
     code = context.active_object.data.body
     text = highlight(code, lexer(), RawTokenFormatter()).decode()
     process(text, context.active_object)
     return {'FINISHED'}
Пример #2
0
def generate_syntax_objects(code):

    print_time_stamp()
    make_materials()
    add_fonts()

    seq_yielder = get_unique_sequential_name()

    # process data
    code_as_raw = highlight(code, Python3Lexer(), RawTokenFormatter())
    pre_split_lines = code_as_raw.decode('utf-8')

    # there is a hidden tab inside the regex here.
    post_split_lines = pre_split_lines.split(r"""Token.Text	'\n'""")

    # write to objects
    write_lines(post_split_lines, seq_yielder)
Пример #3
0
def test_invalid_raw_token():
    # These should not throw exceptions.
    assert (highlight("Tolkien", RawTokenLexer(),
                      RawTokenFormatter()) == b"Token.Error\t'Tolkien\\n'\n")
    assert (highlight("Tolkien\t'x'", RawTokenLexer(),
                      RawTokenFormatter()) == b"Token\t'x'\n")
    assert (highlight(
        "Token.Text\t42", RawTokenLexer(),
        RawTokenFormatter()) == b"Token.Error\t'Token.Text\\t42\\n'\n")
    assert (highlight(
        "Token.Text\t'", RawTokenLexer(),
        RawTokenFormatter()) == b'Token.Error\t"Token.Text\\t\'\\n"\n')
    assert (highlight("Token.Text\t'α'", RawTokenLexer(),
                      RawTokenFormatter()) == b"Token.Text\t'\\u03b1'\n")
    assert (highlight("Token.Text\tu'α'", RawTokenLexer(),
                      RawTokenFormatter()) == b"Token.Text\t'\\u03b1'\n")
    assert (highlight(b"Token.Text\t'\xff'", RawTokenLexer(),
                      RawTokenFormatter()) == b"Token.Text\t'\\xff'\n")
Пример #4
0
def test_raw_token():
    code = "2 + α"
    raw = highlight(code, PythonLexer(), RawTokenFormatter())
    html = highlight(code, PythonLexer(), HtmlFormatter())

    assert highlight(raw, RawTokenLexer(), RawTokenFormatter()) == raw
    assert highlight(raw, RawTokenLexer(), HtmlFormatter()) == html
    assert highlight(raw.decode(), RawTokenLexer(), HtmlFormatter()) == html

    raw_gz = highlight(code, PythonLexer(), RawTokenFormatter(compress="gz"))
    assert gzip.decompress(raw_gz) == raw
    assert highlight(raw_gz, RawTokenLexer(compress="gz"),
                     RawTokenFormatter()) == raw
    assert (highlight(raw_gz.decode("latin1"), RawTokenLexer(compress="gz"),
                      RawTokenFormatter()) == raw)

    raw_bz2 = highlight(code, PythonLexer(), RawTokenFormatter(compress="bz2"))
    assert bz2.decompress(raw_bz2) == raw
    assert highlight(raw_bz2, RawTokenLexer(compress="bz2"),
                     RawTokenFormatter()) == raw
    assert (highlight(raw_bz2.decode("latin1"), RawTokenLexer(compress="bz2"),
                      RawTokenFormatter()) == raw)
Пример #5
0
def extract_class_and_method(java_dir, base_path):
    """ 为每个java文件提取class,function,attribute,name.
        以文件的hash为key,存为json文件
    """
    if not os.path.isdir(java_dir):
        return None
    files = os.listdir(java_dir)
    names_dict = {}
    for f in files:
        h, i = os.path.splitext(f)
        if i == '.java':
            classnames = set()
            methodnames = set()
            attributenames = set()
            names = set()
            with open(os.path.join(java_dir, f)) as fl:
                cont = fl.read()
                x = highlight(cont, JavaLexer(), RawTokenFormatter())
                for y in str(x, encoding='utf-8').splitlines():
                    ys = y.split('\t')
                    if ys[0] == 'Token.Name.Class':
                        classnames.add(eval(ys[1]))
                    elif ys[0] == 'Token.Name.Function':
                        methodnames.add(eval(ys[1]))
                    elif ys[0] == 'Token.Name.Attribute':
                        attributenames.add(eval(ys[1]))
                    elif ys[0] == 'Token.Name':
                        names.add(eval(ys[1]))
            names_dict[h] = {
                'NC': list(classnames),
                'NF': list(methodnames),
                'NA': list(attributenames),
                'N': list(names)
            }
    repo = java_dir.strip('/').split('/')[-1]
    jf = os.path.join(base_path, '{}.names.json'.format(repo))
    json.dump(names_dict, open(jf, 'w'))
    return names_dict
Пример #6
0
    if os.path.isabs(args.source_file):
        source_file = args.source_file
    else:
        source_file = os.path.realpath(
            os.path.join(os.getcwd(), args.source_file))

    if not args.verbose:
        args.verbose = 0
    setup_logger(args.verbose)

    log.debug('[PARSING] {}'.format(source_file))
    with open(source_file, 'r') as f:
        file_content = f.read()

    # `Pygments` lexing.
    lexed_content = highlight(file_content, CoqLexer(), RawTokenFormatter())

    # Load the entire file contents into RAM as string
    # IMPROVMENT: Enhance the `RawTokenFormatter` class to stream this data
    parsed_content = str(lexed_content, encoding='utf-8').splitlines()
    del lexed_content

    # Regular expression matching a raw token line.
    regex = re.compile("Token\.((?:\w+\.?)+)\s['|\"](.*)['|\"]\n?")

    # Detect errors in file syntax
    parsing_errors = check_raw_token_syntax(parsed_content, regex)

    # If the line format does not fit with regex, log an error
    # and exit the application
    if len(parsing_errors):
Пример #7
0
                syntax_width = create_syntax_block(caret, syntax_object)
                caret.x += syntax_width

        caret.x = 0.0

        print('----newline')
        caret.y -= line_height


# ----------------- main loop

print_time_stamp()
pymat = bpy.data.materials
make_materials(material_library)
add_fonts()

seq_yielder = get_unique_sequential_name()

# ----------------- make raw data

code = bpy.context.edit_text.as_string()
code_as_raw = highlight(code, Python3Lexer(), RawTokenFormatter())

# ----------------- process data

pre_split_lines = code_as_raw.decode('utf-8')
post_split_lines = pre_split_lines.split(r"""Token.Text '\n'""")

# ----------------- write data

write_lines(post_split_lines)
Пример #8
0
def pygment_mul_line(java_lines):
    string = '\n'.join(java_lines)
    if string == '':
        return list(), dict()
    x = highlight(string, JavaLexer(), RawTokenFormatter())
    x = str(x, encoding='utf-8')
    tokenList = list()
    variableDict = dict()
    nameNum, attNum, clsNum, fucNum = 0, 0, 0, 0
    otherDict = dict()
    floatNum, numberNum, strNum = 0, 0, 0
    for y in x.splitlines():
        ys = y.split('\t')
        # print(ys)
        s = eval(ys[1])
        if s == '\n':
            tokenList.append('<nl>')
        elif s == 'NewBlock':
            tokenList.append('<nb>')
        elif s.isspace():
            lines = s.count('\n')
            for _ in range(lines):
                tokenList.append('<nl>')
        elif "Token.Literal.Number.Float" == ys[0]:
            if s not in otherDict:
                sT = 'FLOAT{}'.format(floatNum)
                otherDict[s] = sT
                floatNum += 1
            tokenList.append(otherDict[s])
        elif ys[0].startswith('Token.Literal.Number'):
            if s not in otherDict:
                sT = 'NUMBER{}'.format(numberNum)
                otherDict[s] = sT
                numberNum += 1
            tokenList.append(otherDict[s])
        elif ys[0].startswith('Token.Literal.String'):
            if s not in otherDict:
                sT = 'STRING{}'.format(strNum)
                otherDict[s] = sT
                strNum += 1
            tokenList.append(otherDict[s])
        elif "Token.Name.Namespace" == ys[0]:
            tokenList.append('NAMESPACE')
        elif "Token.Comment.Single" == ys[0]:
            tokenList.append('SINGLE')
            tokenList.append('<nl>')
        elif "Token.Comment.Multiline" == ys[0]:
            lines = s.count('\n')
            for _ in range(lines):
                tokenList.append('COMMENT')
                tokenList.append('<nl>')
            tokenList.append('COMMENT')
        elif 'Token.Name.Decorator' == ys[0]:
            tokenList.append('@')
            tokenList.append(s[1:].lower())
        elif 'Token.Name' == ys[0]:
            if s not in variableDict:
                sT = 'n{}'.format(nameNum)
                variableDict[s] = sT
                nameNum += 1
            tokenList.append(s)
        elif 'Token.Name.Attribute' == ys[0]:
            if s not in variableDict:
                sT = 'a{}'.format(attNum)
                variableDict[s] = sT
                attNum += 1
            tokenList.append(s)
        elif 'Token.Name.Class' == ys[0]:
            if s not in variableDict:
                sT = 'c{}'.format(clsNum)
                variableDict[s] = sT
                clsNum += 1
            tokenList.append(s)
        elif 'Token.Name.Function' == ys[0]:
            if s not in variableDict:
                sT = 'f{}'.format(fucNum)
                variableDict[s] = sT
                fucNum += 1
            tokenList.append(s)
        else:
            a = s.splitlines()
            for i in a:
                if i != '' and not i.isspace():
                    tokenList.append(i)
                tokenList.append('<nl>')
            tokenList.pop()
    return tokenList, variableDict
Пример #9
0
def pygment_one_line(linestring):
    l = list()
    namelist = list()
    attributelist = list()
    classlist = list()
    functionlist = list()
    if len(linestring) < 1 or linestring.startswith(
            '+++') or linestring.startswith('---'):
        return l, namelist, attributelist, classlist, functionlist
    st = linestring[0]
    # print(st)
    linestring = linestring[1:].strip()
    if linestring == '':
        return l, namelist, attributelist, classlist, functionlist
    if st == '@':
        l.append((2, '<NewBlock>'))
        linestring = linestring[linestring.find('@@') + 3:].strip()
        if linestring == '':
            return l, namelist, attributelist, classlist, functionlist
        cls = 2
    elif st == ' ':
        cls = 2
    elif st == '-':
        cls = 1
    elif st == '+':
        cls = 3
    else:
        return l, namelist, attributelist, classlist, functionlist
    if linestring.startswith('/*') or linestring.startswith(
            '*') or linestring.endswith('*/'):
        l.append((cls, 'JAVADOC'))
        return l, namelist, attributelist, classlist, functionlist
    x = highlight(linestring, JavaLexer(), RawTokenFormatter())
    x = str(x, encoding='utf-8')
    for y in x.splitlines():
        ys = y.split('\t')
        print(ys)
        s = eval(ys[1]).strip(' \t\n\r')
        if s != '':
            # print(ys)
            if "Token.Literal.Number.Float" == ys[0]:
                l.append((cls, 'FLOAT'))
            elif "Token.Literal.Number.Integer" == ys[0]:
                l.append((cls, 'INTEGER'))
            elif "Token.Literal.Number.Hex" == ys[0]:
                l.append((cls, 'HEX'))
            elif "Token.Literal.String" == ys[0]:
                l.append((cls, 'STRING'))
            elif "Token.Literal.String.Char" == ys[0]:
                l.append((cls, 'CHAR'))
            elif "Token.Name.Namespace" == ys[0]:
                l.append((cls, 'NAMESPACE'))
            elif "Token.Comment.Single" == ys[0]:
                l.append((cls, 'SINGLE'))
            elif "Token.Comment.Multiline" == ys[0]:
                l.append((cls, 'MULTILINE'))
            elif 'Token.Name.Decorator' == ys[0]:
                l.append((cls, 'DECORATOR'))
            elif 'Token.Name' == ys[0]:
                namelist.append(s)
                l.append((cls, s))
            elif 'Token.Name.Attribute' == ys[0]:
                attributelist.append(s)
                l.append((cls, s))
            elif 'Token.Name.Class' == ys[0]:
                classlist.append(s)
                l.append((cls, s))
            elif 'Token.Name.Function' == ys[0]:
                functionlist.append(s)
                l.append((cls, s))
            else:
                l.append((cls, s))
    # print(l)
    return l, namelist, attributelist, classlist, functionlist
Пример #10
0
def function Foo
    for i in range(5):
        print("hello world!")
end function

while i < 10
begin
    inc i
    print(i)
end

def function Bar
    for i in range(5):
        print("hello world!")
end function

goto 10
"""

print(highlight(code, FooLangLexer(), TerminalFormatter()))
input()

tokens = highlight(code, FooLangLexer(), RawTokenFormatter())

tokens = tokens.decode()

for token in tokens.split("\n"):
    foobar = token.split("\t")
    if len(foobar) == 2:
        print("{token:30}    {value}".format(token=foobar[0], value=foobar[1]))
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import RawTokenFormatter

code = """
for i in range(1, 11):
    print("Hello world!")
"""

tokens = highlight(code, PythonLexer(), RawTokenFormatter())

tokens = tokens.decode()

for token in tokens.split("\n"):
    foobar = token.split("\t")
    if len(foobar) == 2:
        print("{token:30}    {value}".format(token=foobar[0], value=foobar[1]))
Пример #12
0
text.pack() # розташувати
code = u'print "hello" # коментар' # рядок Python коду 
text.insert("end", code) # вставити текст в текстовий віджет

# конфігурувати теги текстового віджету
text.tag_configure("Token.Keyword", foreground='blue', font=('arial', 10, 'bold'))
text.tag_configure("Token.Text", foreground='black', font=('arial', 10, 'normal'))
text.tag_configure("Token.Literal.String", foreground='red', font=('arial', 10, 'normal'))
text.tag_configure("Token.Comment", foreground='darkgreen', font=('arial', 10, 'normal'))

code = text.get("1.0", "end-1c") # отримати текст з текстового віджету
text.delete("1.0", "end") # видалити весь текст з текстового віджету

# перший спосіб:
from pygments import highlight # повертає відформатований текст
for line in highlight(code, PythonLexer(), RawTokenFormatter()).split("\n"): # для кожного рядка тексту, відформатованого за допомогою PythonLexer() та RawTokenFormatter()
    pair=line.split("\t") # розділити рядок символом табуляції
    if pair!=['']: # якщо пара не пуста
        (token, s) = pair
        print token, eval(s) # вивести на консоль
        text.insert("end", eval(s), token) # вставити текст з тегом в віджет

# другий спосіб:
#from pygments import lex # лексичний аналізатор, повертає ітератор токенів
#for token, content in lex(code, PythonLexer()):
#    print token, content
#    text.insert("end", content, str(token))
      
root.mainloop() # головний цикл програми
"""
![](fig.png)