Python tokenize示例，tokenize.tokenize Python示例

示例#1

0

显示文件

文件： preppy.py 项目： AndyKovv/hostel

 def __preppy__vlhs__(s,NAME=token.NAME,ENDMARKER=token.ENDMARKER):
     L = []
     try:
         tokenize.tokenize(BytesIO(s.strip()).readline,lambda *a: L.append(a))
     except:
         return False
     return len(L)==2 and L[0][0]==NAME and L[1][0]==ENDMARKER

示例#2

0

显示文件

文件： python.py 项目： plone/Products.PortalTransforms

 def __call__(self):
     """ Parse and send the colored source.
     """
     # store line offsets in self.lines
     self.lines = [0, 0]
     pos = 0
     while True:
         pos = self.raw.find(b'\n', pos) + 1
         if not pos:
             break
         self.lines.append(pos)
     self.lines.append(len(self.raw))
     # parse the source and write it
     self.pos = 0
     text = BytesIO(self.raw)
     self.out.write(b'<pre class="python">\n')
     try:
         if six.PY2:
             tokenize.tokenize(text.readline, self.format_tokenizer)
         else:
             for args in tokenize.tokenize(text.readline):
                 self.format_tokenizer(*args)
     except tokenize.TokenError as ex:
         msg = ex.args[0]
         line = ex.args[1][0]
         self.out.write(b"<h5 class='error>'ERROR: %s%s</h5>" % (
             msg, self.raw[self.lines[line]:]))
     self.out.write(b'\n</pre>\n')
     return safe_nativestring(self.out.getvalue())

示例#3

0

显示文件

文件： findUnusedObjects.py 项目： kai66673/qt-creator

def findUsages():
    global directory, objMap, sharedFolders
    suffixes = (".py", ".csv", ".tsv")
    directories = [directory]
    # avoid folders that will be processed anyhow
    for shared in sharedFolders:
        skip = False
        tmpS = shared + "/"
        for folder in directories:
            tmpD = folder + "/"
            if platform.system() in ('Microsoft', 'Windows'):
                tmpS = tmpS.lower()
                tmpD = tmpD.lower()
            if tmpS.startswith(tmpD):
                skip = True
                break
        if not skip:
            directories.append(shared)

    for directory in directories:
        for root, dirnames, filenames in os.walk(directory):
            for filename in filter(lambda x: x.endswith(suffixes), filenames):
                currentFile = open(os.path.join(root, filename))
                if filename.endswith(".py"):
                    tokenize.tokenize(currentFile.readline, handle_token)
                elif filename.endswith(".csv"):
                    handleDataFiles(currentFile, ",")
                elif filename.endswith(".tsv"):
                    handleDataFiles(currentFile, "\t")
                currentFile.close()
    currentFile = open(objMap)
    tokenize.tokenize(currentFile.readline, handle_token)
    currentFile.close()

示例#4

0

显示文件

文件： colorize_db_t.py 项目： RuralCat/CLipPYME

    def format(self):
        """ Parse and send the colored source.
        """
        # store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        
        while 1:
            pos = string.find(self.raw, '\n', pos) + 1
            if not pos: break
            self.lines.append(pos)
        self.lines.append(len(self.raw))

        # parse the source and write it
        self.pos = 0
        text = cStringIO.StringIO(self.raw)
        self.out.write(self.stylesheet)
        self.out.write('<pre class="code">\n')
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
            msg = ex[0]
            line = ex[1][0]
            self.out.write("<h3>ERROR: %s</h3>%s\n" % (
                msg, self.raw[self.lines[line]:]))
            if self.cover_flag:
                self.out.write('</span>')
                self.cover_flag = False

示例#5

0

显示文件

文件： sikuli2html.py 项目： AndroidMarv/sikuli

    def format(self, filename):
        global HEADER
        # store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        while 1:
           pos = string.find(self.raw, '\n', pos) + 1
           if not pos: break
           self.lines.append(pos)
        self.lines.append(len(self.raw))

        # parse the source and write it
        self.pos = 0
        text = cStringIO.StringIO(self.raw)
        HEADER = HEADER.replace("$FILE", filename)
        if LOCAL_CONVERT:
           HEADER = HEADER.replace("$HIDE_INFO", "display: none;")
        self.out.write(HEADER)
        try:
           tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
           msg = ex[0]
           line = ex[1][0]
           self.out.write("<h3>ERROR: %s</h3>%s\n" % (
              msg, self.raw[self.lines[line]:]))
           self.out.write('</font></pre>')

示例#6

0

显示文件

文件： pythfilter.py 项目： JosephBoyd/pylinda

def filter(filename):
    global name, module_has_docstring

    path,name = os.path.split(filename)
    root,ext  = os.path.splitext(name)

    output("namespace "+root+" {\n",(0,0))

    # set module name for tok_eater to use if there's a module doc string
    name = root

    sys.stderr.write('Filtering "'+filename+'"...')
    f = open(filename)
    tokenize.tokenize(f.readline, tok_eater)
    f.close()
    print_comment((0,0))

    output("\n",(0,0))
    output("}  // end of namespace\n",(0,0))

    if not module_has_docstring:
        # Put in default namespace documentation
        output('/** \\namespace '+root+' \n',(0,0))
        output('    \\brief Module "%s" */\n'%(root),(0,0))

    for s in outbuffer:
        outfile.write(s)

示例#7

0

显示文件

文件： highlighter.py 项目： creatorrr/IHeartPy

        def format(self):
                """ Parse and send the colorized source to output."""

                # Store line offsets in self.lines
                self.lines = [0, 0]
                pos = 0

                while 1:
                        pos = string.find(self.raw, '\n', pos) + 1
                        if not pos: break
                        self.lines.append(pos)

                self.lines.append(len(self.raw))
                    
                # Parse the source and write it
                self.pos = 0
                text = cStringIO.StringIO(self.raw)
                self.out.write('<pre><font face="Lucida,Courier New">')

                try:
                        tokenize.tokenize(text.readline, self) # self as handler callable

                except tokenize.TokenError, ex:
                        msg = ex[0]
                        line = ex[1][0]
                        self.out.write("<h3>ERROR: %s</h3>%s\n" % (msg, self.raw[self.lines[line]:]))

示例#8

0

显示文件

文件： contract.py 项目： Bafou/SVL

def _read_block(input, startlineno):
    r"""Read an indented block of expressions

    startlineno is *zero* origined line number.

    pre::
        input.readline  # must have readline function

    Examples:
    #>>> _read_block(StringIO('\tfoo:\n'), 0)
    #0
    >>> _read_block(StringIO('\tpost[]: True\n'), 0)
    ('post', [], [('True', 1)], 1)
    >>> _read_block(StringIO('\tpre: 5 + 6 > 10\n'), 0)
    ('pre', [], [('5 + 6 > 10', 1)], 1)
    >>> _read_block(StringIO('\tpost:\n\t\t5 + 6 < 12\n\t\t2 + 2 == 4\n'), 0)
    ('post', [], [('5 + 6 < 12', 2), ('2 + 2 == 4', 3)], 3)
    >>> _read_block(StringIO('\tpost[foo.bar]: # changes\n' \
    ...                      '\t\tlen(foo.bar) > 0\n'), 0)
    ('post', [['foo', 'bar']], [('len ( foo . bar ) > 0', 2)], 2)

    Handles double colons (for re-structured text)::
    >>> _read_block(StringIO('\tpre:: 5 + 6 > 10\n'), 0)
    ('pre', [], [('5 + 6 > 10', 1)], 1)
    """
    t = tokenizer(input, startlineno)
    try:
        tokenize.tokenize(input.readline, t.next)
    except Done:
        pass
    input.seek(t.offset)
    return (t.keyword, t.decls, t.exprs, t.endlineno)

示例#9

0

显示文件

文件： test_tokenize.py 项目： chidea/GoPythonDLLWrapper

    def check_roundtrip(self, f):
        """
        Test roundtrip for `untokenize`. `f` is an open file or a string.
        The source code in f is tokenized to both 5- and 2-tuples.
        Both sequences are converted back to source code via
        tokenize.untokenize(), and the latter tokenized again to 2-tuples.
        The test fails if the 3 pair tokenizations do not match.

        When untokenize bugs are fixed, untokenize with 5-tuples should
        reproduce code that does not contain a backslash continuation
        following spaces.  A proper test should test this.
        """
        # Get source code and original tokenizations
        if isinstance(f, str):
            code = f.encode('utf-8')
        else:
            code = f.read()
            f.close()
        readline = iter(code.splitlines(keepends=True)).__next__
        tokens5 = list(tokenize(readline))
        tokens2 = [tok[:2] for tok in tokens5]
        # Reproduce tokens2 from pairs
        bytes_from2 = untokenize(tokens2)
        readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
        tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
        self.assertEqual(tokens2_from2, tokens2)
        # Reproduce tokens2 from 5-tuples
        bytes_from5 = untokenize(tokens5)
        readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
        tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
        self.assertEqual(tokens2_from5, tokens2)

示例#10

0

显示文件

文件： colourize.py 项目： LaoMa3953/ulipad

    def format(self, linenumber=True):
        """ Parse and send the colored source.
        """
        # store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        while 1:
            pos = self.raw.find('\n', pos) + 1
            if not pos: break
            self.lines.append(pos)
        self.lines.append(len(self.raw))

        # write line numbers
        if linenumber:
            self.result.append('<table border="0"><tr><td align="right" valign="top">')
            self.result.append('<td align="right" valign="top"><pre><font face="Lucida,Courier New" color="%s">' % _colors[_TEXT])
            for idx in range(1, len(self.lines)-1):
                self.result.append('%3d \n' % idx)
            self.result.append('</font></pre></td><td valign="top">')

        # parse the source and write it
        self.pos = 0
        text = StringIO.StringIO(self.raw)
        self.result.append('<pre><font face="Lucida,Courier New">')
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
            msg = ex[0]
            line = ex[1][0]
            self.result.append("<h3>ERROR: %s</h3>%s\n" % (
                msg, self.raw[self.lines[line]:]))

示例#11

0

显示文件

文件： grok.py 项目： nkzxw/solrex

    def parse(self, source):
        """ Parse and send the colored source.
        """
        self.source = string.expandtabs(source)
        self.tokenlist = []

        # store line offsets in self.offset
        self.offset = [0, 0]
        self.lines = 0
        pos = 0
        while pos < len(self.source):
            self.lines = self.lines + 1
            pos = string.find(self.source, '\n', pos) + 1
            if not pos: break
            self.offset.append(pos)
        self.offset.append(len(self.source))

        # parse the source
        self.pos = 0
        text = cStringIO.StringIO(self.source)
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
            msg = ex[0]
            line = ex[1][0]
            raise ParseError("ERROR %s\n%s" % (
                msg, self.source[self.offset[line]:]))

示例#12

0

显示文件

文件： text_python.py 项目： steveyen/moingo

    def format(self, formatter):
        """ Parse and send the colored source.
        """
        # store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        while 1:
            try:
                pos = self.raw.index('\n', pos) + 1
            except ValueError:
                break
            self.lines.append(pos)
        self.lines.append(len(self.raw))

        self.result = [] # collects output

        self._code_id = hash_new('sha1', self.raw.encode(config.charset)).hexdigest()
        self.result.append(formatter.code_area(1, self._code_id, 'ColorizedPython', self.show_num, self.num_start, self.num_step))
        self.formatter = formatter
        self.result.append(formatter.code_line(1))
        #len('%d' % (len(self.lines)-1, )))

        # parse the source and write it
        self.pos = 0
        text = StringIO.StringIO(self.raw)
        try:
            tokenize.tokenize(text.readline, self)
        except IndentationError, ex:
            msg = ex[0]
            errmsg = (self.formatter.linebreak() +
                      self.formatter.strong(1) + "ERROR: %s" % msg + self.formatter.strong(0) +
                      self.formatter.linebreak())
            self.result.append(errmsg)

示例#13

0

显示文件

文件： html_colorize.py 项目： Angeleena/selenium

    def colorize(self):
        """
        Return an HTML string that renders the source code for the
        module that was specified in the constructor.
        """
        # Initialize all our state variables
        self.pos = 0
        self.cur_line = []
        self.context = []
        self.indents = []
        self.lineno = 1
        self.def_name = None

        # Load the module's text.
        self.text = open(self.module_filename).read()
        self.text = self.text.expandtabs().rstrip()+'\n'

        # Construct the line_offsets table.
        self.find_line_offsets()

        num_lines = self.text.count('\n')+1
        self.linenum_size = len(`num_lines+1`)
        
        # Call the tokenizer, and send tokens to our `tokeneater()`
        # method.  If anything goes wrong, then fall-back to using
        # the input text as-is (with no colorization).
        try:
            output = StringIO()
            self.out = output.write
            tokenize.tokenize(StringIO(self.text).readline, self.tokeneater)
            html = output.getvalue()
        except tokenize.TokenError, ex:
            html = self.text

示例#14

0

显示文件

文件： sentence.py 项目： lhl/songclub

 def stringioize(self, string):
     """(internal)
     
     the following is really just a stupid hack to emulate the quirky
     behavior of the string tokenizer in java; it is a historical artifact
     that just isn't badly broken enough to require being removed yet.
     """
     self.tokens = []
     self._neg = None
     fd = StringIO.StringIO(string)
     tokenize.tokenize(fd.readline,self.eat)
     self.reset()
     sn = self.next()
     try:
         while sn.ttype != tokenize.ERRORTOKEN:
             sn = self.next()
         # this is the best part.  It works completely by accident.
         # After 3 tries, you end up with a """ on the end of your
         # string, which is a multi-line string -- the tokenizer
         # will throw an exception for that (god knows why it
         # doesn't throw an exception for an EOF in a single-line
         # string...)
         self.stringioize(string+'"')
     except:
         pass
         # import traceback
         # traceback.print_exc()
     self.reset()

示例#15

0

显示文件

文件： pygettext.py 项目： poyhsiao/betapyweb

 def __waiting(self, ttype, tstring, lineno):
     opts = self.__options
     # Do docstring extractions, if enabled
     if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
         # module docstring?
         if self.__freshmodule:
             if ttype == tokenize.STRING:
                 self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
                 self.__freshmodule = 0
             elif ttype not in (tokenize.COMMENT, tokenize.NL):
                 self.__freshmodule = 0
             return
         # class docstring?
         if ttype == tokenize.NAME and tstring in ('class', 'def'):
             self.__state = self.__suiteseen
             return
     if ttype == tokenize.NAME and tstring in opts.keywords:
         self.__state = self.__keywordseen
     
     # In order to extract messages encapsulated in a string; for example, in view.mako:
     # var vLogTypeList = Array("${_('System')}", "${_('Traffic')}");
     pattern = '(%s)\(.*\)' % '|'.join(opts.keywords)
     if ttype == tokenize.STRING and re.search(pattern, tstring):
         tstring = tstring.strip('\'"')
         tokenize.tokenize(StringIO.StringIO(tstring).readline, self)

示例#16

0

显示文件

文件： spyceCompile.py 项目： derobert/debianlink-xbmc

 def findMultiLineQuote(s):
   quotelist = []
   def eatToken(type, string, begin, end, _, quotelist=quotelist):
     if type == token.STRING and RE_MULTI_LINE_QUOTE_BEGIN.match(string):
       quotelist.append((string, begin,end))
   tokenize.tokenize(StringIO(s).readline, eatToken)
   return quotelist

示例#17

0

显示文件

文件： pythonParser.py 项目： bigbigbug/workspace

    def format(self, showLineNums=0):
        """ Parse and send the colored source.
        """
        # store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        while 1:
            pos = string.find(self.raw, '\n', pos) + 1
            if not pos: break
            self.lines.append(pos)
        self.lines.append(len(self.raw))

        # write line numbers
        if showLineNums:
            self.lineNums=cStringIO.StringIO()
            self.lineNums.write('<pre>')
            for idx in range(1, len(self.lines)-1):
                self.lineNums.write('%3d \n' % idx)
            self.lineNums.write('</pre>')

        #self.out.write('<pre>')

        # parse the source and write it
        self.pos = 0
        text = cStringIO.StringIO(self.raw)
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
            pass
            msg = ex[0]
            line = ex[1][0]
            self.out.write('[ERROR: %s]<font color="red">%s</font>\n' % (
                msg, self.raw[self.lines[line]:]))

示例#18

0

显示文件

文件： pyColourize.py 项目： FreakTheMighty/pyjamas

    def format(self, formatter, form):
        ''' Parse and send the colored source.
        '''
        # Store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0

        # Gather lines
        while 1:
            pos = string.find(self.raw, '\n', pos) + 1
            if not pos: break
            self.lines.append(pos)
        self.lines.append(len(self.raw))

        # Wrap text in a filelike object
        self.pos = 0
        text = cStringIO.StringIO(self.raw)

        # Html start
        self.doPageStart()

        # Parse the source.
        ## Tokenize calls the __call__ 
        ## function for each token till done.
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
            msg = ex[0]
            line = ex[1][0]
            self.out.write("<h3>ERROR: %s</h3>%s\n" % (
                msg, self.raw[self.lines[line]:]))

示例#19

0

显示文件

文件： python.py 项目： CGTIC/Plone_SP

    def format(self):
        """ Parse and send the colored source.
        """
        # store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        while True:
            pos = string.find(self.raw, '\n', pos) + 1
            if not pos:
                break
            self.lines.append(pos)
        self.lines.append(len(self.raw))

        # parse the source and write it
        self.pos = 0
        text = StringIO(self.raw)
        self.out.write('<pre class="python">\n')
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError as ex:
            msg = ex[0]
            line = ex[1][0]
            self.out.write("<h5 class='error>'ERROR: %s%s</h5>" % (
                msg, self.raw[self.lines[line]:]))
        self.out.write('\n</pre>\n')

示例#20

0

显示文件

文件： parseConfig.py 项目： hexdump42/eddie-tool

def readFile(file, state):
    """
    readFile( filename, State-object)
     Open the config file 'filename' and pass file descriptor to the tokenizer.

    Returns: nothing
    """

    # Get the directory name of the current config file
    #state.dir = file[:string.rfind(file, '/')]+'/'
    state.dir = os.path.dirname( file )

    try:
            conf = open(file, 'r')
    except IOError:
        print "Error opening file '%s'" % file;
        log.log( "<parseConfig>readFile(), Error, Cannot open '%s' - skipping" % (file), 4 )
        return

    # add this filename to the list of config files
    configfiles.append(file)

    # Let tokenize.tokenize() parse the file into tokens which it will pass to
    # state.tokeneater() which will parse the tokens and create something
    # meaningful.
    try:
        tokenize.tokenize(conf.readline, state.tokeneater)
    except tokenize.TokenError, msg:
        raise config.ParseFailure, "Syntax error, %s"%(msg)

示例#21

0

显示文件

文件： python.py 项目： mikejamesthompson/orgsites

    def format(self, formatter):
        """ Parse and send the colored source.
        """
        # store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        while 1:
            pos = self.raw.find('\n', pos) + 1
            if not pos: break
            self.lines.append(pos)
        self.lines.append(len(self.raw))

        self._code_id = sha.new(self.raw.encode(config.charset)).hexdigest()
        self.request.write(formatter.code_area(1, self._code_id, 'ColorizedPython', self.show_num, self.num_start, self.num_step))
        self.formatter = formatter
        self.request.write(formatter.code_line(1))
        #len('%d' % (len(self.lines)-1, )))
        
        # parse the source and write it
        self.pos = 0
        text = StringIO.StringIO(self.raw)
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
            msg = ex[0]
            line = ex[1][0]
            self.request.write("<b>ERROR: %s</b><br>%s\n" % (
                msg, self.formatter.text(self.raw[self.lines[line]:])))

示例#22

0

显示文件

文件： inspector.py 项目： thraxil/gtreed

def inspect_traceback(tb):
    """Inspect a traceback and its frame, returning source for the expression
    where the exception was raised, with simple variable replacement performed
    and the line on which the exception was raised marked with '>>'
    """
    log.debug('inspect traceback %s', tb)

    # we only want the innermost frame, where the exception was raised
    while tb.tb_next:
        tb = tb.tb_next
        
    frame = tb.tb_frame
    lines, exc_line = tbsource(tb)
        
    # figure out the set of lines to grab.
    inspect_lines, mark_line = find_inspectable_lines(lines, exc_line)
    src = StringIO(textwrap.dedent(''.join(inspect_lines)))
    exp = Expander(frame.f_locals, frame.f_globals)

    while inspect_lines:
        try:
            tokenize.tokenize(src.readline, exp)
        except tokenize.TokenError, e:
            # this can happen if our inspectable region happens to butt up
            # against the end of a construct like a docstring with the closing
            # """ on separate line
            log.debug("Tokenizer error: %s", e)
            inspect_lines.pop(0)
            mark_line -= 1
            src = StringIO(textwrap.dedent(''.join(inspect_lines)))
            exp = Expander(frame.f_locals, frame.f_globals)
            continue
        break

示例#23

0

显示文件

文件： Color.py 项目： auag92/n2dm

    def format(self, formatter, form):
        """ Parse and send the colored source.
        """
        # store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        while 1:
            pos = string.find(self.raw, '\n', pos) + 1
            if not pos: break
            self.lines.append(pos)
        self.lines.append(len(self.raw))

        # parse the source and write it
        self.pos = 0
        text = cStringIO.StringIO(self.raw)
        self.out.write("""<?xml version="1.0" encoding="utf-8" ?>
        <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
        <head>
        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
        <meta name="generator" content="Color.py" />
        <title>%s</title>
        <link rel="stylesheet" href="../doc.css" type="text/css" />
        </head>
        <body>
        <pre class="literal-block">""" % self.title)
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
            msg = ex[0]
            line = ex[1][0]
            self.out.write("<h3>ERROR: %s</h3>%s\n" % (
                msg, self.raw[self.lines[line]:]))

示例#24

0

显示文件

文件： JESTabnanny.py 项目： Bail-jw/mediacomp-jes

def check(file):


    if os.path.isdir(file) and not os.path.islink(file):

        names = os.listdir(file)
        for name in names:
            fullname = os.path.join(file, name)
            if (os.path.isdir(fullname) and
                not os.path.islink(fullname) or
                os.path.normcase(name[-3:]) == ".py"):
                check(fullname)
        return


    f = open(file)

    reset_globals()
    try:
        tokenize.tokenize(f.readline, tokeneater)



    except NannyNag, nag:
        badline = nag.get_lineno()

        return badline

示例#25

0

显示文件

文件： introspect.py 项目： oneApple/Phoenix

def getTokens(command):
    """Return list of token tuples for command."""

    # In case the command is unicode try encoding it
    if isinstance(command, str):
        try:
            command = command.encode('utf-8')
        except UnicodeEncodeError:
            pass # otherwise leave it alone
                
    f = StringIO(command)
    # tokens is a list of token tuples, each looking like: 
    # (type, string, (srow, scol), (erow, ecol), line)
    tokens = []
    # Can't use list comprehension:
    #   tokens = [token for token in tokenize.generate_tokens(f.readline)]
    # because of need to append as much as possible before TokenError.
    try:
##        This code wasn't backward compatible with Python 2.1.3.
##
##        for token in tokenize.generate_tokens(f.readline):
##            tokens.append(token)

        # This works with Python 2.1.3 (with nested_scopes).
        if not PY3:
            def eater(*args):
                tokens.append(args)
            tokenize.tokenize_loop(f.readline, eater)
        else:
            tokenize.tokenize(f.readline)
    except tokenize.TokenError:
        # This is due to a premature EOF, which we expect since we are
        # feeding in fragments of Python code.
        pass
    return tokens

示例#26

0

显示文件

文件： k_pygettext.py 项目： bigbigbug/workspace

	def extracts(self):
		# calculate escapes
		make_escapes(self.options.escape)

		# calculate all keywords
		self.options.keywords.extend(default_keywords)

		# slurp through all the files
		eater = TokenEater(self.options)
		fp = self.pythonCode
		closep = 1
		try:
			# eater.set_filename(self.filename)
			try:
				tokenize.tokenize(fp.readline, eater)
			except tokenize.TokenError, e:
				print >> sys.stderr, '%s: %s, line %d, column %d' % (
					e[0], filename, e[1][0], e[1][1])
		finally:
			if closep:
				fp.close()

		# write the output
		fp = sys.stdout
		closep = 0
		res=[]
		try:
			res=eater.write(fp)
		finally:
			if closep:
				fp.close()
		return res

示例#27

0

显示文件

文件： extract.py 项目： staeff/i18ndude

def py_strings(dir, domain="none", exclude=()):
    """Retrieve all Python messages from `dir` that are in the `domain`.
    """
    eater = TokenEater()
    make_escapes(0)
    for filename in find_files(
            # We want to include cpy and vpy scripts as well
            # dir, '*.py', exclude=('extract.py', 'pygettext.py')+tuple(exclude)):  # noqa
            dir,
            '*.*py',
            exclude=('extract.py', 'pygettext.py') + tuple(exclude)
        ):
        fp = codecs.open(filename, 'r', DEFAULT_CHARSET)
        try:
            eater.set_filename(filename)
            try:
                tokenize.tokenize(fp.readline, eater)
            except tokenize.TokenError, e:
                print >> sys.stderr, '%s: %s, line %d, column %d' % (
                    e[0], filename, e[1][0], e[1][1])
        finally:
            fp.close()
    # One limitation of the Python message extractor is that it cannot
    # determine the domain of the string, since it is not contained anywhere
    # directly. The only way this could be done is by loading the module and
    # inspect the '_' function. For now we simply assume that all the found
    # strings have the domain the user specified.
    return eater.getCatalog()

示例#28

0

显示文件

文件： python_parser.py 项目： CJ-Wright/pyreport

    def __call__(self, raw):
        """ Parse and send the colored source.
        """
        self.out = cStringIO.StringIO()
        self.raw = raw.expandtabs().strip()
        # store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        while 1:
            pos = self.raw.find('\n', pos) + 1
            if not pos: break
            self.lines.append(pos)
        self.lines.append(len(self.raw))
        #
        # parse the source and write it
        self.pos = 0
        text = cStringIO.StringIO(self.raw)
        self.out.write("<table width=100% cellpadding=0 cellspacing=0 " +
                     """onclick="toggle_hidden('pysrc%d','toggle%d');"><tr>
                        <td rowspan="3"> """ % (self.pysrcid, self.pysrcid) )
        self.out.write("""<div class="pysrc" id="pysrc%dinv" style="display:
                       none">...</div>"""% self.pysrcid)
        self.out.write('<div class="pysrc" id="pysrc%d" style="display: block ">'% self.pysrcid)

        try:
            tokenize.tokenize(text.readline, self.format)
        except tokenize.TokenError, ex:
            msg = ex[0]
            line = ex[1][0]
            print >> self.out, ("<h3>ERROR: %s</h3>%s" %
                (msg, self.raw[self.lines[line]:]))

示例#29

0

显示文件

文件： inspect.py 项目： 1018365842/FreeIMU

def getblock(lines):
    """Extract the block of code at the top of the given list of lines."""
    blockfinder = BlockFinder()
    try:
        tokenize.tokenize(iter(lines).next, blockfinder.tokeneater)
    except (EndOfBlock, IndentationError):
        pass
    return lines[:blockfinder.last]

示例#30

0

显示文件

def filter(inp, out, writer=HTMLWriter):
    out.write('<pre>\n')
    printer = TokenPrinter(writer(out.write).write).printtoken
    try:
        tokenize.tokenize(inp.readline, printer)
    except tokenize.TokenError:
        pass
    out.write('</pre>\n')

示例#31

0

显示文件

文件： tex.py 项目： wangdanhui970520/mada

          if token.LPAR <= toktype and toktype <= token.OP:
               toktype = token.OP
          elif toktype == token.NAME and keyword.iskeyword(toktext):
               toktype = _KEYWORD
          elif toktype == token.NAME and toktext in _colors.keys():
               toktype = toktext
               
          style = _styles.get(toktype, _styles[_TEXT])
 
          # send text
          out.write('<span class="%s">' % style)
          out.write(cgi.escape(toktext))
          out.write('</span>')

     try:
          tokenize.tokenize(text.readline, call)
     except tokenize.TokenError, ex:
          msg = ex[0]
          line = ex[1][0]
          out.write("<h3>ERROR: %s</h3>%s\n" % (msg, raw[lines[line]:]))
          return 1

     out.write('</font></pre></table>')

     info = str(source[1])
     
     if os.path.isfile(info):
         sout = open(info)
         progs = sout.read()
         sout.close()

示例#32

0

显示文件

def tokenize_python(code, keep_comments=False, process_strings=True):
    assert isinstance(code, str)
    code = code.replace(r"\r", "")
    code = code.replace("\r", "")
    tokens = []

    try:
        iterator = tokenize.tokenize(BytesIO(code.encode("utf-8")).readline)
    except SyntaxError as excep:
        raise SyntaxError(excep)

    removed_docstr = 0
    while True:
        try:
            toktype, tok, _, _, line = next(iterator)
        except (
                tokenize.TokenError,
                IndentationError,
                SyntaxError,
                UnicodeDecodeError,
        ) as e:
            raise ValueError(
                f'Impossible to parse tokens because of incorrect source code "{e}" ...'
            )
        except StopIteration:
            raise Exception(f"End of iterator before ENDMARKER token.")

        if toktype == tokenize.ENCODING or toktype == tokenize.NL:
            continue

        elif toktype == tokenize.NEWLINE:
            if removed_docstr == 1:
                removed_docstr = 0
                continue
            tokens.append("NEW_LINE")

        elif toktype == tokenize.COMMENT:
            if keep_comments:
                com = process_string(
                    tok,
                    PYTHON_CHAR2TOKEN,
                    PYTHON_TOKEN2CHAR,
                    True,
                    do_whole_processing=process_strings,
                )
                if len(com) > 0:
                    tokens.append(com)
            else:
                continue

        elif toktype == tokenize.STRING:
            if tok == line.strip():  # docstring
                if not keep_comments:
                    removed_docstr = 1
                    continue
                else:
                    coms = process_string(
                        tok,
                        PYTHON_CHAR2TOKEN,
                        PYTHON_TOKEN2CHAR,
                        False,
                        do_whole_processing=process_strings,
                    )
                    if len(coms) > 0:
                        tokens.append(coms)
                    else:
                        removed_docstr = 1
            else:
                tokens.append(
                    process_string(
                        tok,
                        PYTHON_CHAR2TOKEN,
                        PYTHON_TOKEN2CHAR,
                        False,
                        do_whole_processing=process_strings,
                    ))

        elif toktype == tokenize.INDENT:
            tokens.append("INDENT")

        elif toktype == tokenize.DEDENT:
            # empty block
            if tokens[-1] == "INDENT":
                tokens = tokens[:-1]
            else:
                tokens.append("DEDENT")

        elif toktype == tokenize.ENDMARKER:
            tokens.append("ENDMARKER")
            break

        else:
            tokens.append(tok)

    assert tokens[-1] == "ENDMARKER", "Error, no end marker"
    return tokens[:-1]

示例#33

0

显示文件

def preprocess(lines):
    lines = [l.rstrip() for l in lines]
    data = [(lnr, l) for lnr, l in enumerate(lines)]

    # Handle line continuation.
    no_backslash = []
    while data:
        lnr, l = data.pop(0)

        while l.endswith("\\"):
            l = l[:-1]
            try:
                _, nextl = data.pop(0)
                l += nextl
            except IndexError:
                break

        no_backslash.append((lnr, l.strip()))

    variables = dict(mem for mem in inspect.getmembers(math)
                     if not mem[0].startswith("_"))
    variables.update({c: Reg(c) for c in string.ascii_lowercase})
    variables["pow"] = pow
    variables["math"] = math
    variables["data"] = Data
    num_instructions = 0

    # Label pass.
    for lnr, l in no_backslash:
        if l.startswith("#") or not l: continue

        parse = re.search("^([a-zA-Z_][a-zA-Z0-9_]+)\s*(.*)", l)
        if parse is None:
            raise SyntaxError("Syntax error on line {}:\n{}".format(
                lnr + 1, lines[lnr]))

        ident, rest = parse.groups()
        if rest.startswith(":"):
            rest = rest[1:].lstrip()
            if not (rest.startswith("#") or not rest):
                raise SyntaxError(
                    "Trailing characters after label on line {}:\n{}".format(
                        lnr + 1, lines[lnr]))

            if ident in variables and isinstance(variables[ident], Label):
                raise SyntaxError(
                    "Duplicate label name on line {}:\n{}".format(
                        lnr + 1, lines[lnr]))

            variables[ident] = Label(num_instructions, ident)

        elif not rest.startswith("="):
            num_instructions += 1

    # Read instructions and assignments.
    instructions = []
    for lnr, l in no_backslash:
        if l.startswith("#") or not l: continue

        # Syntax already checked last time.
        ident, rest = re.search("^([a-zA-Z_][a-zA-Z0-9_]+)\s*(.*)", l).groups()

        # Strip comments (reuse Python's tokenizer to correctly handle comments in strings, etc).
        tokens = tokenize.tokenize(io.BytesIO(rest.encode("utf-8")).readline)
        stripped_tokens = []
        for typ, tok, _, _, _ in tokens:
            if typ == tokenize.COMMENT: continue
            stripped_tokens.append((typ, tok))
        rest = tokenize.untokenize(stripped_tokens).decode("utf-8")

        # Assignment.
        if rest.startswith("="):
            if ident in variables and isinstance(variables[ident], Label):
                raise SyntaxError(
                    "Overwriting label name on line {}:\n{}".format(
                        lnr + 1, lines[lnr]))

            variables[ident] = eval(rest[1:], variables)

        # Instruction.
        elif not rest.startswith(":"):
            args = list(eval("(None, {})".format(rest), variables)[1:])
            check_instr_arguments(ident, args, lnr, lines)
            instructions.append(Instr(lnr, ident, args))

    return instructions

示例#34

0

显示文件

	def _remake_command(self, cmd, selector=None, receiver=None):
		from tokenize import tokenize, untokenize, NAME, OP, STRING
		DOT = (OP, '.')
		COLON = (OP, ':')
		COMMA = (OP, ',')
		OBRAC = (OP, '[')
		CBRAC = (OP, ']')
		OPAR = (OP, '(')
		CPAR = (OP, ')')
		from io import BytesIO
		recommand = []

		if receiver:
			recommand += [(NAME, receiver), OBRAC, COLON, CBRAC, (OP, '='), ]

		try:
			cmd_encode = cmd.encode('utf-8')
		except AttributeError:
			cmd_encode = str(cmd).encode('utf-8')
		dims = len(self.shape)
		g = tokenize(BytesIO(cmd_encode).readline)
		if selector is None:
			screen_tokens = [COLON,]
		else:
			# try:
			# 	slicer_encode = selector.encode('utf-8')
			# except AttributeError:
			# 	slicer_encode = str(selector).encode('utf-8')
			# screen_tokens = [(toknum, tokval) for toknum, tokval, _, _, _ in tokenize(BytesIO(slicer_encode).readline)]
			screen_tokens = [(NAME, 'selector'), ]
		for toknum, tokval, _, _, _ in g:
			if toknum == NAME and tokval in self.data:
				# replace NAME tokens
				partial = [(NAME, 'self'), DOT, (NAME, 'data'), DOT, (NAME, tokval), OBRAC, ]
				partial += screen_tokens
				if len(self._groupnode._v_children[tokval].shape)>1:
					partial += [COMMA, COLON, ]
				if len(self._groupnode._v_children[tokval].shape)>2:
					partial += [COMMA, COLON, ]
				if len(self._groupnode._v_children[tokval].shape)>3:
					partial += [COMMA, COLON, ]
				partial += [CBRAC,]
				recommand.extend(partial)
			elif toknum == NAME and tokval in self.lookup:
				# replace NAME tokens
				partial = [(NAME, 'self'), DOT, (NAME, 'lookup'), DOT, (NAME, tokval), OBRAC, ]
				partial += screen_tokens
				if len(self._groupnode._v_children[tokval].shape) > 1:
					partial += [COMMA, COLON, ]
				if len(self._groupnode._v_children[tokval].shape) > 2:
					partial += [COMMA, COLON, ]
				if len(self._groupnode._v_children[tokval].shape) > 3:
					partial += [COMMA, COLON, ]
				partial += [CBRAC, ]
				recommand.extend(partial)
			else:
				recommand.append((toknum, tokval))
		# print("<recommand>")
		# print(recommand)
		# print("</recommand>")
		ret = untokenize(recommand).decode('utf-8')
		from .util.aster import asterize
		return asterize(ret, mode="exec" if receiver is not None else "eval"), ret

示例#35

0

显示文件

    def split_to_lines(self, source):
        lines = []
        current_line = 1
        current_col = 0
        buffer = ""
        current_type = None
        source_io = io.BytesIO(source.encode())
        formatter = Formatter()

        def readline():
            return formatter.format(
                formatter.escape(source_io.readline().decode())).encode()

        tokens = tokenize.tokenize(readline)
        line = ""
        for token_info in tokens:
            token_type, token_string, start, end, _ = token_info
            lineno = start[0]
            if lineno == 0:
                # Encoding line
                continue

            if token_type == tokenize.ENDMARKER:
                # End of source
                if current_type is None:
                    current_type = self.TOKEN_DEFAULT

                line += "<{}>{}</>".format(self._theme[current_type], buffer)
                lines.append(line)
                break

            if lineno > current_line:
                if current_type is None:
                    current_type = self.TOKEN_DEFAULT

                diff = lineno - current_line
                if diff > 1:
                    lines += [""] * (diff - 1)

                line += "<{}>{}</>".format(self._theme[current_type],
                                           buffer.rstrip("\n"))

                # New line
                lines.append(line)
                line = ""
                current_line = lineno
                current_col = 0
                buffer = ""

            if token_string in self.KEYWORDS:
                new_type = self.TOKEN_KEYWORD
            elif token_string in self.BUILTINS or token_string == "self":
                new_type = self.TOKEN_BUILTIN
            elif token_type == tokenize.STRING:
                new_type = self.TOKEN_STRING
            elif token_type == tokenize.NUMBER:
                new_type = self.TOKEN_NUMBER
            elif token_type == tokenize.COMMENT:
                new_type = self.TOKEN_COMMENT
            elif token_type == tokenize.OP:
                new_type = self.TOKEN_OP
            elif token_type == tokenize.NEWLINE:
                continue
            else:
                new_type = self.TOKEN_DEFAULT

            if current_type is None:
                current_type = new_type

            if start[1] > current_col:
                buffer += token_info.line[current_col:start[1]]

            if current_type != new_type:
                line += "<{}>{}</>".format(self._theme[current_type], buffer)
                buffer = ""
                current_type = new_type

            if lineno < end[0]:
                # The token spans multiple lines
                token_lines = token_string.split("\n")
                line += "<{}>{}</>".format(self._theme[current_type],
                                           token_lines[0])
                lines.append(line)
                for token_line in token_lines[1:-1]:
                    lines.append("<{}>{}</>".format(self._theme[current_type],
                                                    token_line))

                current_line = end[0]
                buffer = token_lines[-1][:end[1]]
                line = ""
                continue

            buffer += token_string
            current_col = end[1]
            current_line = lineno

        return lines

示例#36

0

显示文件

    def build_code_comment_pairs(self):
        if not self.functions:
            code_path = self.base_path / "code.pkl"
            if isfile(code_path):
                self.functions = pickle.load(open(code_path, "rb"))
            else:
                raise RuntimeWarning("Function dataset has not been built!!")

        filepath = self.base_path / "{}.pkl".format(self.name)
        code_filepath = self.base_path / "clean_code_data.pkl"
        if isfile(filepath) and isfile(code_filepath):
            self.code_comment_pairs = pickle.load(open(filepath, "rb"))
            self.clean_code_data = pickle.load(open(code_filepath, "rb"))
            return

        num_docs = 0
        for idx, (identifier, code) in enumerate(tqdm(self.functions.items())):
            found_doc = False
            clean_code, clean_doc = list(), ""
            try:
                token_code = list(
                    tok.tokenize(BytesIO(code.encode('utf-8')).readline))
                for tok_type, token, (line, _), _, full_line in token_code:
                    if tok_type == tok.COMMENT or tok_type == tok.ENCODING:
                        continue

                    if tok_type == tok.STRING and ("\"\"\"" in token
                                                   or "'''" in token):
                        full_line = full_line.strip()
                        if full_line.endswith("'''") or full_line.endswith(
                                "\"\"\""):
                            for tok_type2, token2, (
                                    line2, _), _, full_line2 in token_code:
                                if line2 == line - 1 and "def" in full_line2:
                                    found_doc = True
                                    break
                                elif line2 >= line:
                                    break

                            if found_doc:
                                clean_token = token.strip("\"\"\"").strip(
                                    "'''").strip("r\"\"\"").strip()

                                double_newline = clean_token.find("\n\n")
                                if double_newline > 1:
                                    clean_token = clean_token[:double_newline]

                                param_idx = clean_token.find("Parameters\n")
                                param_colon = clean_token.find("Parameters:\n")
                                arrow_idx = clean_token.find(">>>")
                                long_line = clean_token.find("----------\n")
                                example_colon = clean_token.find("Example::\n")
                                examples_colon = clean_token.find(
                                    "Examples::\n")
                                refs_colon = clean_token.find("References::\n")
                                examples = clean_token.find("Examples\n")
                                example_Usage = clean_token.find(
                                    "Example Usage:\n")
                                example_usage = clean_token.find(
                                    "Example usage:\n")
                                requirements = clean_token.find(
                                    "Requirements\n")
                                see_also_idx = clean_token.find("See Also\n")

                                indices = [
                                    s for s in [
                                        param_idx, param_colon, arrow_idx,
                                        long_line, example_colon, examples,
                                        examples_colon, refs_colon,
                                        example_usage, example_Usage,
                                        requirements, see_also_idx
                                    ] if s >= 0
                                ]
                                if len(indices) > 0:
                                    clean_doc += clean_token[:min(indices)]
                                else:
                                    clean_doc += clean_token

                                # if "----------" in clean_doc or "Example" in clean_doc:
                                #     print(clean_token)

                                clean_doc = clean_doc.strip()

                                if len(clean_doc) > 1:
                                    num_docs += 1
                                else:
                                    found_doc = False
                        else:
                            clean_code.append("<STRING>")
                    elif tok_type == tok.NEWLINE or tok_type == tok.NL:
                        clean_code.append("<NEWLINE>")
                    elif tok_type == tok.INDENT:
                        clean_code.append("<TAB>")
                    elif tok_type == tok.DEDENT:
                        clean_code.append("<UNTAB>")
                    elif tok_type == tok.ENDMARKER:
                        clean_code.append("<END>")
                    elif tok_type == tok.NUMBER:
                        number_sequence = clean_number(token)
                        clean_code.extend(number_sequence)
                    elif tok_type == tok.STRING:
                        clean_code.append("<STRING>")
                    elif tok_type == tok.NAME:
                        if token in RESERVED_WORDS or token in BUILTIN_FUNCTIONS:
                            clean_code.append(token)
                        else:
                            identifier_sequence = clean_identifier(token)
                            clean_code.extend(identifier_sequence)
                    else:
                        clean_code.extend(token.split())

                self.clean_code_data[identifier] = clean_code
                if found_doc:
                    clean_doc = word_tokenize(clean_doc)
                    clean_doct_str = " ".join(clean_doc)
                    first_period = clean_doct_str.find(" . ")
                    if 0 < first_period < 5:
                        second_period = clean_doct_str.find(
                            " . ", first_period + 3)
                        clean_doct_str = clean_doct_str[:second_period + 3]
                    elif first_period > 0:
                        clean_doct_str = clean_doct_str[:first_period + 3]

                    clean_doc = clean_doct_str.split()
                    clean_doc = superclean_docstring(clean_doc)

                    if len(clean_code) <= 3000 and len(clean_doc) <= 300:
                        clean_code = ["<BoC>"] + clean_code + ["<EoC>"]
                        clean_doc = ["<BoL>"] + clean_doc + ["<EoL>"]
                        self.code_comment_pairs[identifier] = (clean_code,
                                                               clean_doc)
            except tok.TokenError as e:
                print(e)

        # sys.exit()

        code = [(name, code)
                for name, (code, comm) in self.code_comment_pairs.items()]
        print("Sorting code")
        code.sort(key=lambda tup: tup[1])
        print("Code is sorted")

        list_of_dup_lists = list()
        for idx, (name1, code1) in enumerate(tqdm(code, desc="Finding dups")):
            if idx < len(code):
                dup_list = list()
                for (name2, code2) in code[idx + 1:]:
                    codestr1 = " ".join(code1)
                    codestr2 = " ".join(code2)
                    if codestr1 == codestr2:
                        dup_list.extend([name1, name2])
                    else:
                        break

                if len(dup_list) > 0:
                    dup_list = list(set(dup_list))
                    dup_list.sort(
                        key=lambda tup: (len(tup[0]), tup[1], tup[0]))
                    list_of_dup_lists.append(dup_list)

        prev_length = len(self.code_comment_pairs.keys())
        for dup_list in list_of_dup_lists:
            for key in dup_list[1:]:
                if key in self.code_comment_pairs:
                    del self.code_comment_pairs[key]
        new_length = len(self.code_comment_pairs.keys())
        print("Code/comm had {} examples, now has {} examples".format(
            prev_length, new_length))

        code = [(name, code) for name, code in self.clean_code_data.items()]
        print("Sorting code")
        code.sort(key=lambda tup: tup[1])
        print("Code is sorted")

        list_of_dup_lists = list()
        for idx, (name1, code1) in enumerate(tqdm(code, desc="Finding dups")):
            if idx < len(code):
                dup_list = list()
                for (name2, code2) in code[idx + 1:]:
                    codestr1 = " ".join(code1)
                    codestr2 = " ".join(code2)
                    if codestr1 == codestr2:
                        dup_list.extend([name1, name2])
                    else:
                        break

                if len(dup_list) > 0:
                    dup_list = list(set(dup_list))
                    dup_list.sort(
                        key=lambda tup: (len(tup[0]), tup[1], tup[0]))
                    list_of_dup_lists.append(dup_list)

        prev_length = len(self.clean_code_data.keys())
        for dup_list in list_of_dup_lists:
            for key in dup_list[1:]:
                if key in self.clean_code_data:
                    del self.clean_code_data[key]
        new_length = len(self.clean_code_data.keys())
        print("Full code had {} examples, now has {} examples".format(
            prev_length, new_length))

        pickle.dump(self.code_comment_pairs, open(filepath, "wb"))
        pickle.dump(self.clean_code_data, open(code_filepath, "wb"))

示例#37

0

显示文件

 def set_by_str(self, f):
     tk_list = list(tokenize(BytesIO(
         f.strip().encode('utf-8')).readline))[1:-1]
     self.token_list = [tk.string for tk in tk_list]
     self.type_list = [token.tok_name[tk.type] for tk in tk_list]

示例#38

0

显示文件

    def text(self, etype, evalue, etb, context=5):
        """Return a nice text document describing the traceback."""

        # some locals
        try:
            etype = etype.__name__
        except AttributeError:
            pass
        Colors = self.Colors  # just a shorthand + quicker name lookup
        ColorsNormal = Colors.Normal  # used a lot
        col_scheme = self.color_scheme_table.active_scheme_name
        indent = ' ' * INDENT_SIZE
        em_normal = '%s\n%s%s' % (Colors.valEm, indent, ColorsNormal)
        undefined = '%sundefined%s' % (Colors.em, ColorsNormal)
        exc = '%s%s%s' % (Colors.excName, etype, ColorsNormal)

        # some internal-use functions
        def text_repr(value):
            """Hopefully pretty robust repr equivalent."""
            # this is pretty horrible but should always return *something*
            try:
                return pydoc.text.repr(value)
            except KeyboardInterrupt:
                raise
            except:
                try:
                    return repr(value)
                except KeyboardInterrupt:
                    raise
                except:
                    try:
                        # all still in an except block so we catch
                        # getattr raising
                        name = getattr(value, '__name__', None)
                        if name:
                            # ick, recursion
                            return text_repr(name)
                        klass = getattr(value, '__class__', None)
                        if klass:
                            return '%s instance' % text_repr(klass)
                    except KeyboardInterrupt:
                        raise
                    except:
                        return 'UNRECOVERABLE REPR FAILURE'

        def eqrepr(value, repr=text_repr):
            return '=%s' % repr(value)

        def nullrepr(value, repr=text_repr):
            return ''

        # meat of the code begins
        try:
            etype = etype.__name__
        except AttributeError:
            pass

        if self.long_header:
            # Header with the exception type, python version, and date
            pyver = 'Python ' + string.split(
                sys.version)[0] + ': ' + sys.executable
            date = time.ctime(time.time())

            head = '%s%s%s\n%s%s%s\n%s' % (Colors.topline, '-' * 75,
                                           ColorsNormal, exc, ' ' *
                                           (75 - len(str(etype)) - len(pyver)),
                                           pyver, string.rjust(date, 75))
            head += "\nA problem occured executing Python code.  Here is the sequence of function"\
                    "\ncalls leading up to the error, with the most recent (innermost) call last."
        else:
            # Simplified header
            head = '%s%s%s\n%s%s' % (
                Colors.topline, '-' * 75, ColorsNormal, exc,
                string.rjust('Traceback (most recent call last)',
                             75 - len(str(etype))))
        frames = []
        # Flush cache before calling inspect.  This helps alleviate some of the
        # problems with python 2.3's inspect.py.
        linecache.checkcache()
        # Drop topmost frames if requested
        try:
            # Try the default getinnerframes and Alex's: Alex's fixes some
            # problems, but it generates empty tracebacks for console errors
            # (5 blanks lines) where none should be returned.
            #records = inspect.getinnerframes(etb, context)[self.tb_offset:]
            #print 'python records:', records # dbg
            records = _fixed_getinnerframes(etb, context, self.tb_offset)
            #print 'alex   records:', records # dbg
        except:

            # FIXME: I've been getting many crash reports from python 2.3
            # users, traceable to inspect.py.  If I can find a small test-case
            # to reproduce this, I should either write a better workaround or
            # file a bug report against inspect (if that's the real problem).
            # So far, I haven't been able to find an isolated example to
            # reproduce the problem.
            inspect_error()
            traceback.print_exc(file=Term.cerr)
            info(
                '\nUnfortunately, your original traceback can not be constructed.\n'
            )
            return ''

        # build some color string templates outside these nested loops
        tpl_link = '%s%%s%s' % (Colors.filenameEm, ColorsNormal)
        tpl_call = 'in %s%%s%s%%s%s' % (Colors.vName, Colors.valEm,
                                        ColorsNormal)
        tpl_call_fail  = 'in %s%%s%s(***failed resolving arguments***)%s' % \
                         (Colors.vName, Colors.valEm, ColorsNormal)
        tpl_local_var = '%s%%s%s' % (Colors.vName, ColorsNormal)
        tpl_global_var = '%sglobal%s %s%%s%s' % (Colors.em, ColorsNormal,
                                                 Colors.vName, ColorsNormal)
        tpl_name_val = '%%s %s= %%s%s' % (Colors.valEm, ColorsNormal)
        tpl_line = '%s%%s%s %%s' % (Colors.lineno, ColorsNormal)
        tpl_line_em = '%s%%s%s %%s%s' % (Colors.linenoEm, Colors.line,
                                         ColorsNormal)

        # now, loop over all records printing context and info
        abspath = os.path.abspath
        for frame, file, lnum, func, lines, index in records:
            #print '*** record:',file,lnum,func,lines,index  # dbg
            try:
                file = file and abspath(file) or '?'
            except OSError:
                # if file is '<console>' or something not in the filesystem,
                # the abspath call will throw an OSError.  Just ignore it and
                # keep the original file string.
                pass
            link = tpl_link % file
            try:
                args, varargs, varkw, locals = inspect.getargvalues(frame)
            except:
                # This can happen due to a bug in python2.3.  We should be
                # able to remove this try/except when 2.4 becomes a
                # requirement.  Bug details at http://python.org/sf/1005466
                inspect_error()
                traceback.print_exc(file=Term.cerr)
                info("\nIPython's exception reporting continues...\n")

            if func == '?':
                call = ''
            else:
                # Decide whether to include variable details or not
                var_repr = self.include_vars and eqrepr or nullrepr
                try:
                    call = tpl_call % (
                        func,
                        inspect.formatargvalues(
                            args, varargs, varkw, locals,
                            formatvalue=var_repr))
                except KeyError:
                    # Very odd crash from inspect.formatargvalues().  The
                    # scenario under which it appeared was a call to
                    # view(array,scale) in NumTut.view.view(), where scale had
                    # been defined as a scalar (it should be a tuple). Somehow
                    # inspect messes up resolving the argument list of view()
                    # and barfs out. At some point I should dig into this one
                    # and file a bug report about it.
                    inspect_error()
                    traceback.print_exc(file=Term.cerr)
                    info("\nIPython's exception reporting continues...\n")
                    call = tpl_call_fail % func

            # Initialize a list of names on the current line, which the
            # tokenizer below will populate.
            names = []

            def tokeneater(token_type, token, start, end, line):
                """Stateful tokeneater which builds dotted names.

                The list of names it appends to (from the enclosing scope) can
                contain repeated composite names.  This is unavoidable, since
                there is no way to disambguate partial dotted structures until
                the full list is known.  The caller is responsible for pruning
                the final list of duplicates before using it."""

                # build composite names
                if token == '.':
                    try:
                        names[-1] += '.'
                        # store state so the next token is added for x.y.z names
                        tokeneater.name_cont = True
                        return
                    except IndexError:
                        pass
                if token_type == tokenize.NAME and token not in keyword.kwlist:
                    if tokeneater.name_cont:
                        # Dotted names
                        names[-1] += token
                        tokeneater.name_cont = False
                    else:
                        # Regular new names.  We append everything, the caller
                        # will be responsible for pruning the list later.  It's
                        # very tricky to try to prune as we go, b/c composite
                        # names can fool us.  The pruning at the end is easy
                        # to do (or the caller can print a list with repeated
                        # names if so desired.
                        names.append(token)
                elif token_type == tokenize.NEWLINE:
                    raise IndexError

            # we need to store a bit of state in the tokenizer to build
            # dotted names
            tokeneater.name_cont = False

            def linereader(file=file, lnum=[lnum], getline=linecache.getline):
                line = getline(file, lnum[0])
                lnum[0] += 1
                return line

            # Build the list of names on this line of code where the exception
            # occurred.
            try:
                # This builds the names list in-place by capturing it from the
                # enclosing scope.
                tokenize.tokenize(linereader, tokeneater)
            except IndexError:
                # signals exit of tokenizer
                pass
            except tokenize.TokenError, msg:
                _m = ("An unexpected error occurred while tokenizing input\n"
                      "The following traceback may be corrupted or invalid\n"
                      "The error message is: %s\n" % msg)
                error(_m)

            # prune names list of duplicates, but keep the right order
            unique_names = uniq_stable(names)

            # Start loop over vars
            lvals = []
            if self.include_vars:
                for name_full in unique_names:
                    name_base = name_full.split('.', 1)[0]
                    if name_base in frame.f_code.co_varnames:
                        if locals.has_key(name_base):
                            try:
                                value = repr(eval(name_full, locals))
                            except:
                                value = undefined
                        else:
                            value = undefined
                        name = tpl_local_var % name_full
                    else:
                        if frame.f_globals.has_key(name_base):
                            try:
                                value = repr(eval(name_full, frame.f_globals))
                            except:
                                value = undefined
                        else:
                            value = undefined
                        name = tpl_global_var % name_full
                    lvals.append(tpl_name_val % (name, value))
            if lvals:
                lvals = '%s%s' % (indent, em_normal.join(lvals))
            else:
                lvals = ''

            level = '%s %s\n' % (link, call)

            if index is None:
                frames.append(level)
            else:
                frames.append('%s%s' % (level, ''.join(
                    _formatTracebackLines(lnum, index, lines, Colors, lvals,
                                          col_scheme))))

示例#39

0

显示文件

文件： utils.py 项目： Amonbe8080/Django-Class

def tokenize_module(module):
    with module.stream() as stream:
        readline = stream.readline
        return list(tokenize.tokenize(readline))

示例#40

0

显示文件

    eater = TokenEater(options)
    for filename in args:
        if filename == '-':
            if options.verbose:
                print _('Reading standard input')
            fp = sys.stdin
            closep = 0
        else:
            if options.verbose:
                print _('Working on %s') % filename
            fp = open(filename)
            closep = 1
        try:
            eater.set_filename(filename)
            try:
                tokenize.tokenize(fp.readline, eater)
            except tokenize.TokenError, e:
                print >> sys.stderr, '%s: %s, line %d, column %d' % (
                    e[0], filename, e[1][0], e[1][1])
        finally:
            if closep:
                fp.close()

    # write the output
    if options.outfile == '-':
        fp = sys.stdout
        closep = 0
    else:
        if options.outpath:
            options.outfile = os.path.join(options.outpath, options.outfile)
        fp = open(options.outfile, 'w')

示例#41

0

显示文件

def make_new_code_method_from_source(source, func_name, cls_name):

    tokens = []
    attributes = set()

    using_self = False

    g = tokenize(BytesIO(source.encode("utf-8")).readline)
    for toknum, tokval, _, _, _ in g:
        # logger.debug((tok_name[toknum], tokval))

        if using_self == "self":
            if toknum == OP and tokval == ".":
                using_self = tokval
                continue
            elif toknum == OP and tokval in (",", ")"):
                tokens.append((NAME, "self"))
                using_self = False
            else:
                raise NotImplementedError(
                    f"self{tokval} not supported by Transonic")

        if using_self == ".":
            if toknum == NAME:
                using_self = False
                tokens.append((NAME, "self_" + tokval))
                attributes.add(tokval)
                continue
            else:
                raise NotImplementedError

        if toknum == NAME and tokval == "self":
            using_self = "self"
            continue

        tokens.append((toknum, tokval))

    attributes = sorted(attributes)

    attributes_self = ["self_" + attr for attr in attributes]

    index_self = tokens.index((NAME, "self"))

    tokens_attr = []
    for ind, attr in enumerate(attributes_self):
        tokens_attr.append((NAME, attr))
        tokens_attr.append((OP, ","))

    if tokens[index_self + 1] == (OP, ","):
        del tokens[index_self + 1]

    tokens = tokens[:index_self] + tokens_attr + tokens[index_self + 1:]

    index_func_name = tokens.index((NAME, func_name))
    name_new_func = f"__for_method__{cls_name}__{func_name}"
    tokens[index_func_name] = (NAME, name_new_func)
    # change recursive calls
    if func_name in attributes:
        attributes.remove(func_name)
        index_rec_calls = [
            index for index, (name, value) in enumerate(tokens)
            if value == "self_" + func_name
        ]
        # delete the occurrence of "self_" + func_name in function parameter
        del tokens[index_rec_calls[0] + 1]
        del tokens[index_rec_calls[0]]
        # consider the two deletes
        offset = -2
        # adapt all recurrence calls
        for ind in index_rec_calls[1:]:
            # adapt the index to the inserts and deletes
            ind += offset
            tokens[ind] = (tokens[ind][0], name_new_func)
            # put the attributes in parameter
            for attr in reversed(attributes):
                tokens.insert(ind + 2, (1, ","))
                tokens.insert(ind + 2, (1, "self_" + attr))
            # consider the inserts
            offset += len(attributes) * 2
    new_code = untokenize(tokens).decode("utf-8")

    return new_code, attributes, name_new_func

示例#42

0

显示文件

文件： _splitter.py 项目： al3xl0g0/colabtools

def split(s):
  """Splits one last token that needs to be autocompleted."""
  # Treat magics specially, since they don't follow python syntax
  # and require '%%' symbols to be preserved
  magic_match = re.search(r'%%?\w+$', s)
  if magic_match:
    return magic_match.group(0)

  s2 = s.rstrip()
  if s != s2:
    # If there is whitespace at the end of the string
    # the completion token is empty
    return ''
  tokens = []

  # Remove front whitespace, somehow it confuses tokenizer
  s = s.lstrip()

  try:
    # Convert input into readline analog
    lines = s.split('\n')
    # Add '\n to all lines except last one.
    lines[:-1] = [line + '\n' for line in lines[:-1]]
    # tokenize.tokenize has a different signature in python2 and python3.
    #
    # In both cases, it's important to gather tokens as we go: many inputs from
    # users are often incomplete python expressions, which will land us in the
    # `tokenize.TokenError` case below with an unexpected EOF.
    if six.PY3:
      # For python3, we need to yield lines of bytes, but our input is unicode,
      # so we decode each as we go.
      line_iterator = (line.encode('utf8') for line in lines)
      for out in tokenize.tokenize(line_iterator.__next__):
        tokens.append(out)
    else:
      readline = (e for e in lines).next
      accumulate = lambda *args: tokens.append(args)
      tokenize.tokenize(readline, accumulate)
  except tokenize.TokenError:
    # Tokenizer failed, usually an indication of not-terminated strings.
    # Remove all quotes and return the last sequence of not-spaces
    if not tokens:
      s = s.replace('"', ' ').replace("'", ' ').split()
      return s[-1] if s else ''
  except Exception:  # pylint: disable=broad-except
    # If all else fails, use poor's man tokenizer
    s = s.split()
    return s[-1] if s else ''

  # First we check if there is unfinished quoted string.
  for each in reversed(tokens):
    if each[_TOKEN_TYPE] == tokenize.ERRORTOKEN and each[_TOKEN] in {
        "'", '"', '"""', "'''"
    }:
      line = each[_TOKEN_END][0] - 1
      col = each[_TOKEN_END][1]
      return lines[line][col:]

  start_token = _find_expression_start(tokens)

  if start_token >= len(tokens):
    # This prevents us from generating random completions when there is
    # no completion to be generated
    return _last_real_token(tokens)

  start_pos = tokens[start_token][_TOKEN_START]

  first_line_index = start_pos[0] - 1
  if first_line_index >= len(lines):
    return _last_real_token(tokens)

  first_line = lines[first_line_index][start_pos[1]:]
  result = first_line + ''.join(lines[first_line_index + 1:])
  return result

示例#43

0

显示文件

文件： hook_parameters.py 项目： null-p4n/spanky.py

def map_params(s, params):
    try:
        if s.split()[-1].startswith("http"):
            s = " ".join(s.split()[0:-1])
    except:
        pass

    # TODO: this is discord specific
    if s.endswith(">"):
        s = s[0:s.rfind("<")]

    g = tokenize(BytesIO(s.strip().encode('utf-8')).readline)

    # Go through the parsed tokens and eliminate tokens used for string formatting
    input_toks = []
    for toknum, tokval, _, _, _ in g:
        if toknum > token.N_TOKENS or toknum in [
                token.ENDMARKER, token.ENCODING, token.NEWLINE
        ]:
            continue

        input_toks.append((tokval, toknum))

    # Create a dictionary with the implicit parameter values
    output_vals = OrderedDict()
    for var in params:
        output_vals[var] = params[var]["default"]

    # Go through the input
    intoks = deque([None])
    idx_in = 0
    idx_out = 0
    while idx_in < len(input_toks) and idx_out < len(output_vals):
        args = input_toks[idx_in][0]
        param = list(output_vals.keys())[idx_out]

        # If '-' operator is found, then look ahead of the inputs
        if args == "-" and idx_in != len(input_toks) - 1:
            idx_in += 1
            intoks.append(args)
            continue

        # Check if it's a number
        num_sign = 1
        if intoks[-1] == "-":
            if input_toks[idx_in][1] != NUMBER:
                args = intoks[-1] + args
            else:
                num_sign = -1

        if params[param]["type"] == "int":
            output_vals[param] = int(args) * num_sign
        elif params[param]["type"] == "float":
            output_vals[param] = float(args) * num_sign
        else:
            output_vals[param] = args

        idx_in += 1
        idx_out += 1
        intoks.append(args)

    return output_vals

示例#44

0

显示文件

 def update_event(self, inp=-1):
     self.set_output_val(0, tokenize.tokenize(self.input(0)))

示例#45

0

显示文件

文件： utils.py 项目： patsaylor/altair

def get_docstring_and_rest(filename):
    """Separate ``filename`` content between docstring and the rest

    Strongly inspired from ast.get_docstring.

    Parameters
    ----------
    filename: str
        The path to the file containing the code to be read

    Returns
    -------
    docstring: str
        docstring of ``filename``
    category: list
        list of categories specified by the "# category:" comment
    rest: str
        ``filename`` content without the docstring
    lineno: int
         the line number on which the code starts

    Notes
    -----
    This function adapted from the sphinx-gallery project; license: BSD-3
    https://github.com/sphinx-gallery/sphinx-gallery/
    """
    node, content = _parse_source_file(filename)

    # Find the category comment
    find_category = re.compile('^#\s*category:\s*(.*)$', re.MULTILINE)
    match = find_category.search(content)
    if match is not None:
        category = match.groups()[0]
        # remove this comment from the content
        content = find_category.sub('', content)
    else:
        category = None


    if node is None:
        return SYNTAX_ERROR_DOCSTRING, category, content, 1

    if not isinstance(node, ast.Module):
        raise TypeError("This function only supports modules. "
                        "You provided {0}".format(node.__class__.__name__))
    try:
        # In python 3.7 module knows its docstring.
        # Everything else will raise an attribute error
        docstring = node.docstring

        import tokenize
        from io import BytesIO
        ts = tokenize.tokenize(BytesIO(content).readline)
        ds_lines = 0
        # find the first string according to the tokenizer and get
        # it's end row
        for tk in ts:
            if tk.exact_type == 3:
                ds_lines, _ = tk.end
                break
        # grab the rest of the file
        rest = '\n'.join(content.split('\n')[ds_lines:])
        lineno = ds_lines + 1

    except AttributeError:
        # this block can be removed when python 3.6 support is dropped
        if node.body and isinstance(node.body[0], ast.Expr) and \
                         isinstance(node.body[0].value, ast.Str):
            docstring_node = node.body[0]
            docstring = docstring_node.value.s
            # python2.7: Code was read in bytes needs decoding to utf-8
            # unless future unicode_literals is imported in source which
            # make ast output unicode strings
            if hasattr(docstring, 'decode') and not isinstance(docstring, six.text_type):
                docstring = docstring.decode('utf-8')
            lineno = docstring_node.lineno  # The last line of the string.
            # This get the content of the file after the docstring last line
            # Note: 'maxsplit' argument is not a keyword argument in python2
            rest = content.split('\n', lineno)[-1]
            lineno += 1
        else:
            docstring, rest = '', ''

    if not docstring:
        raise ValueError(('Could not find docstring in file "{0}". '
                          'A docstring is required for the example gallery.')
                         .format(filename))
    return docstring, category, rest, lineno

示例#46

0

显示文件

文件： py2nb.py 项目： manishmarahatta/pyorg2ipynb

#!/usr/bin/env python3

import sys
import tokenize
import nbformat.v4 as nbf4

import re

FILE = sys.argv[1]

nb_cells = []
flag_nl, buffer, pline = 0, list(), -1
for token in tokenize.tokenize(open(FILE, 'rb').readline):
    # print(token)
    if token.end == (0, 0): continue  # auto encoding
    if token.start[0] == 1 and token.line.startswith('#!'):
        continue  # auto encoding
    if token.type == 4: continue  # line break

    if token.start[0] > pline: buffer.append(token.line)
    pline = token.start[0]

    if token.type == 57:  # comment
        if re.search('^# \*+ ', token.line):
            buffer.pop()
            block = "".join(buffer).strip()
            if block: nb_cells.append(nbf4.new_code_cell(source=block))
            heading = token.line.replace('*', '#')
            nb_cells.append(nbf4.new_markdown_cell(source=heading.strip()))
            buffer.clear()

示例#47

0

显示文件

文件： rule_lint_engine.py 项目： isidentical/Fixit

def _get_tokens(source: bytes) -> Sequence[tokenize.TokenInfo]:
    return tuple(tokenize.tokenize(io.BytesIO(source).readline))

示例#48

0

显示文件

def _compile(s, fname):
    tokens = tokenize.tokenize(s)
    t = parse.parse(s, tokens)
    r = encode.encode(fname, s, t)
    return r

示例#49

0

显示文件

文件： utils.py 项目： thautwarm/Cyberbrain

def _tokenize_string(s):
    return tokenize.tokenize(io.BytesIO(s.encode("utf-8")).readline)

示例#50

0

显示文件

文件： dos2unix.py 项目： tracecutter/viol

 def tokens(readline, tokeneater):
     for token in tokenize.tokenize(readline):
         yield tokeneater(*token)

示例#51

0

显示文件

文件： __init__.py 项目： axiros/xc.reactive_python2.7

    def replacetokens(tokens, fullname):
        """Transform a stream of tokens from raw to Python 3.

        It is called by the custom module loading machinery to rewrite
        source/tokens between source decoding and compilation.

        Returns a generator of possibly rewritten tokens.

        The input token list may be mutated as part of processing. However,
        its changes do not necessarily match the output token stream.

        REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
        OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
        """
        futureimpline = False

        # The following utility functions access the tokens list and i index of
        # the for i, t enumerate(tokens) loop below
        def _isop(j, *o):
            """Assert that tokens[j] is an OP with one of the given values"""
            try:
                return tokens[j].type == token.OP and tokens[j].string in o
            except IndexError:
                return False

        def _findargnofcall(n):
            """Find arg n of a call expression (start at 0)

            Returns index of the first token of that argument, or None if
            there is not that many arguments.

            Assumes that token[i + 1] is '('.

            """
            nested = 0
            for j in range(i + 2, len(tokens)):
                if _isop(j, ')', ']', '}'):
                    # end of call, tuple, subscription or dict / set
                    nested -= 1
                    if nested < 0:
                        return None
                elif n == 0:
                    # this is the starting position of arg
                    return j
                elif _isop(j, '(', '[', '{'):
                    nested += 1
                elif _isop(j, ',') and nested == 0:
                    n -= 1

            return None

        def _ensureunicode(j):
            """Make sure the token at j is a unicode string

            This rewrites a string token to include the unicode literal prefix
            so the string transformer won't add the byte prefix.

            Ignores tokens that are not strings. Assumes bounds checking has
            already been done.

            """
            st = tokens[j]
            if st.type == token.STRING and st.string.startswith(("'", '"')):
                tokens[j] = st._replace(string='u%s' % st.string)

        for i, t in enumerate(tokens):
            # Convert most string literals to byte literals. String literals
            # in Python 2 are bytes. String literals in Python 3 are unicode.
            # Most strings in Mercurial are bytes and unicode strings are rare.
            # Rather than rewrite all string literals to use ``b''`` to indicate
            # byte strings, we apply this token transformer to insert the ``b``
            # prefix nearly everywhere.
            if t.type == token.STRING:
                s = t.string

                # Preserve docstrings as string literals. This is inconsistent
                # with regular unprefixed strings. However, the
                # "from __future__" parsing (which allows a module docstring to
                # exist before it) doesn't properly handle the docstring if it
                # is b''' prefixed, leading to a SyntaxError. We leave all
                # docstrings as unprefixed to avoid this. This means Mercurial
                # components touching docstrings need to handle unicode,
                # unfortunately.
                if s[0:3] in ("'''", '"""'):
                    yield t
                    continue

                # If the first character isn't a quote, it is likely a string
                # prefixing character (such as 'b', 'u', or 'r'. Ignore.
                if s[0] not in ("'", '"'):
                    yield t
                    continue

                # String literal. Prefix to make a b'' string.
                yield t._replace(string='b%s' % t.string)
                continue

            # Insert compatibility imports at "from __future__ import" line.
            # No '\n' should be added to preserve line numbers.
            if (t.type == token.NAME and t.string == 'import'
                    and all(u.type == token.NAME for u in tokens[i - 2:i])
                    and [u.string
                         for u in tokens[i - 2:i]] == ['from', '__future__']):
                futureimpline = True
            if t.type == token.NEWLINE and futureimpline:
                futureimpline = False
                if fullname == 'mercurial.pycompat':
                    yield t
                    continue
                r, c = t.start
                l = (b'; from mercurial.pycompat import '
                     b'delattr, getattr, hasattr, setattr, xrange, '
                     b'open, unicode\n')
                for u in tokenize.tokenize(io.BytesIO(l).readline):
                    if u.type in (tokenize.ENCODING, token.ENDMARKER):
                        continue
                    yield u._replace(start=(r, c + u.start[1]),
                                     end=(r, c + u.end[1]))
                continue

            # This looks like a function call.
            if t.type == token.NAME and _isop(i + 1, '('):
                fn = t.string

                # *attr() builtins don't accept byte strings to 2nd argument.
                if (fn in ('getattr', 'setattr', 'hasattr', 'safehasattr')
                        and not _isop(i - 1, '.')):
                    arg1idx = _findargnofcall(1)
                    if arg1idx is not None:
                        _ensureunicode(arg1idx)

                # .encode() and .decode() on str/bytes/unicode don't accept
                # byte strings on Python 3.
                elif fn in ('encode', 'decode') and _isop(i - 1, '.'):
                    for argn in range(2):
                        argidx = _findargnofcall(argn)
                        if argidx is not None:
                            _ensureunicode(argidx)

                # It changes iteritems/values to items/values as they are not
                # present in Python 3 world.
                elif fn in ('iteritems', 'itervalues'):
                    yield t._replace(string=fn[4:])
                    continue

            # Emit unmodified token.
            yield t

示例#52

0

显示文件

matcher = re.compile('def .*\(.*\)\:|class .*\(.*\)\:')

source_file = os.path.join(os.getcwd(), '../snickers/main.py')
#proc = subprocess.Popen("egrep 'def|class' {}".format(source_file), stdout=subprocess.PIPE)
#readline = proc.readline

readline = os.popen("egrep 'def|class' {}".format(source_file)).readline
# source_file_handle = open(source_file, 'rb')
# readline = source_file_handle.readline

# Types = type: def, params: [], parent_node: <pointer>
# Types = type: class, children: [], parent_node: <pointer>
module_objects = []

tokens_g = tokenize.tokenize(readline)
look_for = 'class_or_function'
current_node = module_objects
indentation_count = 0

def extract_interesting_lines(string):
    ss = string.split("\n")

def state_append(c_node, obj, current_indentation):
    obj['parent_node'] = c_node
    obj['current_indentation'] = current_indentation
    if type(c_node) == list:
        c_node.append(obj)
    elif type(c_node) == dict:
        if c_node['type'] in ['class', 'def'] and obj['type'] in ['def', 'class']:
            c_node['children'].append(obj)

示例#53

0

显示文件

 def __setTokens__(self):
     # g = tokenize(BytesIO(self._codes.encode('utf-8')).readline)  # tokenize the string
     prev_num = -1
     prev_val = None
     prev_end = -1
     self.__ReplaceReserved__()
     # Split _codes line by line and identify each line
     ss = self._processed_codes.splitlines()
     # pdb.set_trace()
     for line in ss:
         try:
             # call python tokenize.tokenize and get the returned generator g
             g = tokenize(BytesIO(line.encode('utf-8')).readline)  # tokenize the string
             try:
                 for toknum, tokval, starrt, eend, _ in g:
                     # pdb.set_trace()
                     chop_start = 0
                     chop_end = len(tokval) - 1
                     # pdb.set_trace()
                     # if the token type is NAME / OP / NUMBER and not only consists of [,)\-\"';\[\]|..+]+
                     if (toknum in [NAME, OP, NUMBER, ERRORTOKEN] and re.compile( r"^(?<![a-zA-Z])([\ ,):\"';\[\]}\{]+|\.\.+)(?![a-zA-Z])$").search(tokval) == None):
                         # pdb.set_trace()
                         # Take xx( / &lt / &gt as one token, instead of two, eg. xx and (
                         if (((prev_num == NAME and tokval == '(') or (
                                 prev_val == '&' and (tokval == 'lt' or tokval == 'gt'))) and prev_end == starrt):
                             self._tokens[-1] = self._tokens[-1] + tokval
                         elif (tokval == '('):
                             pass
                         elif (toknum == NUMBER and int(tokval) in self._reserve_codes):
                             self._tokens.append(self._reserve_codes[int(tokval)])
                         else:
                             self._tokens.append(tokval)
                     # For comment / string, code
                     elif (toknum in [COMMENT, STRING]):
                         # pdb.set_trace()
                         if (toknum == STRING):
                             # remove starting and ending ' / "
                             while ((tokval[chop_start] == '"' or tokval[chop_start] == "'") and chop_start < chop_end):
                                 chop_start += 1
                             while ((tokval[chop_end] == '"' or tokval[chop_end] == "'") and chop_start < chop_end):
                                 chop_end -= 1
                         else:
                             # remove starting # / ''' / """
                             while ((tokval[chop_start] == '#' and chop_start < chop_end)
                                    or (chop_end >= chop_start + 3 and tokval[chop_start:chop_start + 3] == "'''")
                                    or (chop_end >= chop_start + 3 and tokval[chop_start:chop_start + 3] == '"""')):
                                 if (tokval[chop_start] == '#'):
                                     chop_start += 1
                                 else:
                                     chop_start += 3
                         if (chop_start < chop_end or (tokval[chop_start] not in ['#', "'", '"'])):
                             words = CodesTokenizer(tokval[chop_start:chop_end + 1])._tokens
                             if (words):
                                 self._tokens.extend(words)
                     prev_num = toknum
                     prev_val = tokval
                     prev_end = eend
             except Exception as e:
                 # print("Error in __setTokens__", e, line)
                 # pdb.set_trace()
                 pass
         except Exception as e:
             print("Error in __setTokens__", e, line)
             pdb.set_trace()
             pass

示例#54

0

显示文件

def old_mark_text_ranges(node, source: bytes):
    """
    Node is an AST, source is corresponding source as string.
    Function adds recursively attributes end_lineno and end_col_offset to each node
    which has attributes lineno and col_offset.
    """
    def _extract_tokens(tokens, lineno, col_offset, end_lineno,
                        end_col_offset):
        return list(
            filter(
                (lambda tok: tok.start[0] >= lineno and
                 (tok.start[1] >= col_offset or tok.start[0] > lineno
                  ) and tok.end[0] <= end_lineno and
                 (tok.end[1] <= end_col_offset or tok.end[0] < end_lineno
                  ) and tok.string != ""),
                tokens,
            ))

    def _mark_text_ranges_rec(node, tokens, prelim_end_lineno,
                              prelim_end_col_offset):
        """
        Returns the earliest starting position found in given tree,
        this is convenient for internal handling of the siblings
        """

        # set end markers to this node
        if "lineno" in node._attributes and "col_offset" in node._attributes:
            tokens = _extract_tokens(
                tokens,
                node.lineno,
                node.col_offset,
                prelim_end_lineno,
                prelim_end_col_offset,
            )
            try:
                tokens = _mark_end_and_return_child_tokens(
                    node, tokens, prelim_end_lineno, prelim_end_col_offset)
            except Exception:
                logging.getLogger("thonny").warning("Problem with marking %s",
                                                    node)
                # fallback to incorrect marking instead of exception
                node.incorrect_range = True
                node.end_lineno = node.lineno
                node.end_col_offset = node.col_offset + 1

        # mark its children, starting from last one
        # NB! need to sort children because eg. in dict literal all keys come first and then all values
        children = list(_get_ordered_child_nodes(node))
        for child in reversed(children):
            (prelim_end_lineno, prelim_end_col_offset) = _mark_text_ranges_rec(
                child, tokens, prelim_end_lineno, prelim_end_col_offset)

        if "lineno" in node._attributes and "col_offset" in node._attributes:
            # new "front" is beginning of this node
            prelim_end_lineno = node.lineno
            prelim_end_col_offset = node.col_offset

        return (prelim_end_lineno, prelim_end_col_offset)

    def _strip_trailing_junk_from_expressions(tokens):
        while (tokens[-1].type not in (
                token.RBRACE,
                token.RPAR,
                token.RSQB,
                token.NAME,
                token.NUMBER,
                token.STRING,
                token.ELLIPSIS,
        ) and tokens[-1].string !=
               "..."  # See https://bugs.python.org/issue31394
               and tokens[-1].string not in ")}]" or tokens[-1].string in [
                   "and",
                   "as",
                   "assert",
                   "class",
                   "def",
                   "del",
                   "elif",
                   "else",
                   "except",
                   "finally",
                   "for",
                   "from",
                   "global",
                   "if",
                   "import",
                   "in",
                   "is",
                   "lambda",
                   "not",
                   "or",
                   "try",
                   "while",
                   "with",
                   "yield",
               ]):
            del tokens[-1]

    def _strip_trailing_extra_closers(tokens, remove_naked_comma):
        level = 0
        for i in range(len(tokens)):
            if tokens[i].string in "({[":
                level += 1
            elif tokens[i].string in ")}]":
                level -= 1

            if level == 0 and tokens[i].string == "," and remove_naked_comma:
                tokens[:] = tokens[0:i]
                return

            if level < 0:
                tokens[:] = tokens[0:i]
                return

    def _strip_unclosed_brackets(tokens):
        level = 0
        for i in range(len(tokens) - 1, -1, -1):
            if tokens[i].string in "({[":
                level -= 1
            elif tokens[i].string in ")}]":
                level += 1

            if level < 0:
                tokens[:] = tokens[0:i]
                level = 0  # keep going, there may be more unclosed brackets

    def _mark_end_and_return_child_tokens(node, tokens, prelim_end_lineno,
                                          prelim_end_col_offset):
        """
        # shortcut
        node.end_lineno = prelim_end_lineno
        node.end_col_offset = prelim_end_col_offset
        return tokens
        """
        # prelim_end_lineno and prelim_end_col_offset are the start of
        # next positioned node or end of source, ie. the suffix of given
        # range may contain keywords, commas and other stuff not belonging to current node

        # Function returns the list of tokens which cover all its children

        if isinstance(node, ast.stmt):
            # remove empty trailing lines
            while tokens[-1].type in (
                    tokenize.NL,
                    tokenize.COMMENT,
                    token.NEWLINE,
                    token.INDENT,
            ) or tokens[-1].string in (":", "else", "elif", "finally",
                                       "except"):
                del tokens[-1]

        else:
            _strip_trailing_extra_closers(
                tokens, not isinstance(node, (ast.Tuple, ast.Lambda)))
            _strip_trailing_junk_from_expressions(tokens)
            _strip_unclosed_brackets(tokens)

        # set the end markers of this node
        node.end_lineno = tokens[-1].end[0]
        node.end_col_offset = tokens[-1].end[1]

        # Peel off some trailing tokens which can't be part any
        # positioned child node.
        # TODO: maybe cleaning from parent side is better than
        # _strip_trailing_junk_from_expressions

        # Remove trailing empty parens from no-arg call
        if isinstance(node, ast.Call) and _tokens_text(tokens[-2:]) == "()":
            del tokens[-2:]

        # Remove trailing full slice
        elif isinstance(node, ast.Subscript):
            if _tokens_text(tokens[-3:]) == "[:]":
                del tokens[-3:]

            elif _tokens_text(tokens[-4:]) == "[::]":
                del tokens[-4:]

        # Attribute name would confuse the "value" of Attribute
        elif isinstance(node, ast.Attribute):
            assert tokens[-1].type == token.NAME
            del tokens[-1]
            _strip_trailing_junk_from_expressions(tokens)

        return tokens

    all_tokens = list(tokenize.tokenize(io.BytesIO(source).readline))
    source_lines = source.splitlines(True)
    _fix_ast_problems(node, source_lines, all_tokens)
    prelim_end_lineno = len(source_lines)
    prelim_end_col_offset = len(source_lines[len(source_lines) - 1])
    _mark_text_ranges_rec(node, all_tokens, prelim_end_lineno,
                          prelim_end_col_offset)

示例#55

0

显示文件

def main():
    global default_keywords
    try:
        opts, args = getopt.getopt(
            sys.argv[1:],
            "ad:DEhk:Kno:p:S:Vvw:x:X:",
            [
                "extract-all",
                "default-domain=",
                "escape",
                "help",
                "keyword=",
                "no-default-keywords",
                "add-location",
                "no-location",
                "output=",
                "output-dir=",
                "style=",
                "verbose",
                "version",
                "width=",
                "exclude-file=",
                "docstrings",
                "no-docstrings",
            ],
        )
    except getopt.error as msg:
        usage(1, msg)

    # for holding option values
    class Options:
        # constants
        GNU = 1
        SOLARIS = 2
        # defaults
        extractall = 0  # FIXME: currently this option has no effect at all.
        escape = 0
        keywords = []
        outpath = ""
        outfile = "messages.pot"
        writelocations = 1
        locationstyle = GNU
        verbose = 0
        width = 78
        excludefilename = ""
        docstrings = 0
        nodocstrings = {}

    options = Options()
    locations = {"gnu": options.GNU, "solaris": options.SOLARIS}

    # parse options
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage(0)
        elif opt in ("-a", "--extract-all"):
            options.extractall = 1
        elif opt in ("-d", "--default-domain"):
            options.outfile = arg + ".pot"
        elif opt in ("-E", "--escape"):
            options.escape = 1
        elif opt in ("-D", "--docstrings"):
            options.docstrings = 1
        elif opt in ("-k", "--keyword"):
            options.keywords.append(arg)
        elif opt in ("-K", "--no-default-keywords"):
            default_keywords = []
        elif opt in ("-n", "--add-location"):
            options.writelocations = 1
        elif opt in ("--no-location", ):
            options.writelocations = 0
        elif opt in ("-S", "--style"):
            options.locationstyle = locations.get(arg.lower())
            if options.locationstyle is None:
                usage(1, _("Invalid value for --style: %s") % arg)
        elif opt in ("-o", "--output"):
            options.outfile = arg
        elif opt in ("-p", "--output-dir"):
            options.outpath = arg
        elif opt in ("-v", "--verbose"):
            options.verbose = 1
        elif opt in ("-V", "--version"):
            print(_("pygettext.py (xgettext for Python) %s") % __version__)
            sys.exit(0)
        elif opt in ("-w", "--width"):
            try:
                options.width = int(arg)
            except ValueError:
                usage(1, _("--width argument must be an integer: %s") % arg)
        elif opt in ("-x", "--exclude-file"):
            options.excludefilename = arg
        elif opt in ("-X", "--no-docstrings"):
            fp = open(arg)
            try:
                while 1:
                    line = fp.readline()
                    if not line:
                        break
                    options.nodocstrings[line[:-1]] = 1
            finally:
                fp.close()

    # calculate escapes
    make_escapes(not options.escape)

    # calculate all keywords
    options.keywords.extend(default_keywords)

    # initialize list of strings to exclude
    if options.excludefilename:
        try:
            fp = open(options.excludefilename)
            options.toexclude = fp.readlines()
            fp.close()
        except IOError:
            print(_("Can't read --exclude-file: %s") % options.excludefilename,
                  file=sys.stderr)
            sys.exit(1)
    else:
        options.toexclude = []

    # resolve args to module lists
    expanded = []
    for arg in args:
        if arg == "-":
            expanded.append(arg)
        else:
            expanded.extend(getFilesForName(arg))
    args = expanded

    # slurp through all the files
    eater = TokenEater(options)
    for filename in args:
        if filename == "-":
            if options.verbose:
                print(_("Reading standard input"))
            fp = sys.stdin.buffer
            closep = 0
        else:
            if options.verbose:
                print(_("Working on %s") % filename)
            fp = open(filename, "rb")
            closep = 1
        try:
            eater.set_filename(filename)
            try:
                tokens = tokenize.tokenize(fp.readline)
                for _token in tokens:
                    eater(*_token)
            except tokenize.TokenError as e:
                print("%s: %s, line %d, column %d" %
                      (e.args[0], filename, e.args[1][0], e.args[1][1]),
                      file=sys.stderr)
        finally:
            if closep:
                fp.close()

    # write the output
    if options.outfile == "-":
        fp = sys.stdout
        closep = 0
    else:
        if options.outpath:
            options.outfile = os.path.join(options.outpath, options.outfile)
        fp = open(options.outfile, "w")
        closep = 1
    try:
        eater.write(fp)
    finally:
        if closep:
            fp.close()

示例#56

0

显示文件

文件： reindent.py 项目： EnTeQuAk/dmlt

 def run(self):
     tokenize.tokenize(self.getline, self.tokeneater)
     # Remove trailing empty lines.
     lines = self.lines
     while lines and lines[-1] == "\n":
         lines.pop()
     # Sentinel.
     stats = self.stats
     stats.append((len(lines), 0))
     # Map count of leading spaces to # we want.
     have2want = {}
     # Program after transformation.
     after = self.after = []
     # Copy over initial empty lines -- there's nothing to do until
     # we see a line with *something* on it.
     i = stats[0][0]
     after.extend(lines[1:i])
     for i in range(len(stats) - 1):
         thisstmt, thislevel = stats[i]
         nextstmt = stats[i + 1][0]
         have = getlspace(lines[thisstmt])
         want = thislevel * 4
         if want < 0:
             # A comment line.
             if have:
                 # An indented comment line.  If we saw the same
                 # indentation before, reuse what it most recently
                 # mapped to.
                 want = have2want.get(have, -1)
                 if want < 0:
                     # Then it probably belongs to the next real stmt.
                     for j in xrange(i + 1, len(stats) - 1):
                         jline, jlevel = stats[j]
                         if jlevel >= 0:
                             if have == getlspace(lines[jline]):
                                 want = jlevel * 4
                             break
                 if want < 0:  # Maybe it's a hanging
                     # comment like this one,
                     # in which case we should shift it like its base
                     # line got shifted.
                     for j in xrange(i - 1, -1, -1):
                         jline, jlevel = stats[j]
                         if jlevel >= 0:
                             want = have + getlspace(after[jline-1]) - \
                                    getlspace(lines[jline])
                             break
                 if want < 0:
                     # Still no luck -- leave it alone.
                     want = have
             else:
                 want = 0
         assert want >= 0
         have2want[have] = want
         diff = want - have
         if diff == 0 or have == 0:
             after.extend(lines[thisstmt:nextstmt])
         else:
             for line in lines[thisstmt:nextstmt]:
                 if diff > 0:
                     if line == "\n":
                         after.append(line)
                     else:
                         after.append(" " * diff + line)
                 else:
                     remove = min(getlspace(line), -diff)
                     after.append(line[remove:])
     return self.raw != self.after

示例#57

0

显示文件

文件： token_helpers.py 项目： zhanzehua/fe

def tokens(source_code: bytes) -> Iterator[TokenInfo]:
    """
    Return an iterator over the tokens in a python source string.
    """
    return tokenize(BytesIO(source_code).readline)

示例#58

0

显示文件

# python -m tokenize 180_language_tokenize.py
import tokenize

with tokenize.open('180_language_tokenize.py') as f:
    tokens = tokenize.generate_tokens(f.readline)
    for token in tokens:
        print(token)

import tokenize

with open('180_language_tokenize.py', 'rb') as f:
    tokens = tokenize.tokenize(f.readline)
    for token in tokens:
        print(token)

示例#59

0

显示文件

文件： inspect.py 项目： walkabout21/OpenModelSphere

def getblock(lines):
    """Extract the block of code at the top of the given list of lines."""
    try:
        tokenize.tokenize(ListReader(lines).readline, BlockFinder().tokeneater)
    except EndOfBlock, eob:
        return lines[:eob.args[0]]

示例#60

0

显示文件

文件： pygettext.py 项目： Vitali64GitHub/Lanchie

def main():
    global default_keywords
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'ad:DEhk:Kno:p:S:Vvw:x:X:',
            ['extract-all', 'default-domain=', 'escape', 'help', 'keyword=', 'no-default-keywords',
             'add-location', 'no-location', 'output=', 'output-dir=', 'style=', 'verbose',
             'version', 'width=', 'exclude-file=', 'docstrings', 'no-docstrings', ])
    except getopt.error as msg:
        usage(1, msg)

    # for holding option values
    class Options:
        # constants
        GNU = 1
        SOLARIS = 2
        # defaults
        extractall = 0  # FIXME: currently this option has no effect at all.
        escape = 0
        keywords = []
        outpath = ''
        outfile = 'messages.pot'
        writelocations = 1
        locationstyle = GNU
        verbose = 0
        width = 78
        excludefilename = ''
        docstrings = 0
        nodocstrings = {}

    options = Options()
    locations = {'gnu': options.GNU, 'solaris': options.SOLARIS,}

    # parse options
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-a', '--extract-all'):
            options.extractall = 1
        elif opt in ('-d', '--default-domain'):
            options.outfile = arg + '.pot'
        elif opt in ('-E', '--escape'):
            options.escape = 1
        elif opt in ('-D', '--docstrings'):
            options.docstrings = 1
        elif opt in ('-k', '--keyword'):
            options.keywords.append(arg)
        elif opt in ('-K', '--no-default-keywords'):
            default_keywords = []
        elif opt in ('-n', '--add-location'):
            options.writelocations = 1
        elif opt in ('--no-location',):
            options.writelocations = 0
        elif opt in ('-S', '--style'):
            options.locationstyle = locations.get(arg.lower())
            if options.locationstyle is None:
                usage(1, _('Invalid value for --style: %s') % arg)
        elif opt in ('-o', '--output'):
            options.outfile = arg
        elif opt in ('-p', '--output-dir'):
            options.outpath = arg
        elif opt in ('-v', '--verbose'):
            options.verbose = 1
        elif opt in ('-V', '--version'):
            print(_('pygettext.py (xgettext for Python) %s') % __version__)
            sys.exit(0)
        elif opt in ('-w', '--width'):
            try:
                options.width = int(arg)
            except ValueError:
                usage(1, _('--width argument must be an integer: %s') % arg)
        elif opt in ('-x', '--exclude-file'):
            options.excludefilename = arg
        elif opt in ('-X', '--no-docstrings'):
            fp = open(arg)
            try:
                while 1:
                    line = fp.readline()
                    if not line:
                        break
                    options.nodocstrings[line[:-1]] = 1
            finally:
                fp.close()

    # calculate escapes
    make_escapes(not options.escape)

    # calculate all keywords
    options.keywords.extend(default_keywords)

    # initialize list of strings to exclude
    if options.excludefilename:
        try:
            fp = open(options.excludefilename)
            options.toexclude = fp.readlines()
            fp.close()
        except IOError:
            print(_("Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr)
            sys.exit(1)
    else:
        options.toexclude = []

    # resolve args to module lists
    expanded = []
    for arg in args:
        if arg == '-':
            expanded.append(arg)
        else:
            expanded.extend(getFilesForName(arg))
    args = expanded

    # slurp through all the files
    eater = TokenEater(options)
    for filename in args:
        if filename == '-':
            if options.verbose:
                print(_('Reading standard input'))
            fp = sys.stdin.buffer
            closep = 0
        else:
            if options.verbose:
                print(_('Working on %s') % filename)
            fp = open(filename, 'rb')
            closep = 1
        try:
            eater.set_filename(filename)
            try:
                tokens = tokenize.tokenize(fp.readline)
                for _token in tokens:
                    eater(*_token)
            except tokenize.TokenError as e:
                print('%s: %s, line %d, column %d' % (
                    e.args[0], filename, e.args[1][0], e.args[1][1]), file=sys.stderr)
        finally:
            if closep:
                fp.close()

    # write the output
    if options.outfile == '-':
        fp = sys.stdout
        closep = 0
    else:
        if options.outpath:
            options.outfile = os.path.join(options.outpath, options.outfile)
        fp = open(options.outfile, 'w')
        closep = 1
    try:
        eater.write(fp)
    finally:
        if closep:
            fp.close()