示例#1
0
 def __init__(self, ignore_whitespace):
     self.scope = ScopeStack(ignore_whitespace)
     self.ignore_whitespace = ignore_whitespace
     self.Mode = []
     self.NewLine = False
 def setUp(self):
     self.scope = ScopeStack()
示例#3
0
class RazorLexer(object):
    """Encapsulates the razor token logic"""

    @staticmethod
    def create(ignore_whitespace=False):
        """Creates the rules bound to a new lexer instance"""
        lex = RazorLexer(ignore_whitespace)
        lex.rules = (
            (Token.NEWLINE, (r"[\r]?[\n][ \t]*", bind(lex.new_line))),
            (Token.ESCAPED, (r"@@", bind(lex.escaped))),
            (Token.LINECOMMENT, (r"@#[^\n]*?$", bind(lex.line_comment))),
            (Token.ONELINE, (r"@(?:import|from|model) .+$", bind(lex.one_line))),
            (Token.MULTILINE, (r"@\w*.*:$", bind(lex.multiline))),
            (Token.PARENEXPRESSION, (r"@!?\(", bind(lex.paren_expression))),
            (Token.EXPRESSION,
             (r"@!?(\w+(?:(?:\[.+\])|(?:\(.*\)))?(?:\.[a-zA-Z]+(?:(?:\[.+\])|(?:\(.*\)))?)*)", bind(lex.expression))),
            (Token.XMLFULLSTART, (r"[ \t]*<\w[^@\n]*?>", bind(lex.xml_start))),
            (Token.XMLSTART, (r"[ \t]*<\w[^@\n>]*", bind(lex.xml_start))),
            (Token.XMLEND, (r"[ \t]*</[^@\n]+[>]", bind(lex.xml_end))),
            (Token.XMLSELFCLOSE, (r"[^@]+/>[ \t]*", bind(lex.xml_self_close))),
            (Token.TEXT, (r"[^@\n<]+", bind(lex.text))),
        )
        lex.multilineRules = (
            (Token.EMPTYLINE, (r"[\r]?[\n][ \t]*$", bind(lex.empty_line))),
            (Token.EXPLICITMULTILINEEND, (r"[\r]?[\n][ \t]*\w*.*:@", bind(lex.multiline_end))),
            (Token.NEWLINE, (r"[\r]?[\n][ \t]*", bind(lex.new_line))),
            (Token.XMLFULLSTART, (r"[ \t]*<\w[^@\n]*?>", bind(lex.xml_start))),
            (Token.XMLSTART, (r"[ \t]*<\w[^@\n>]*", bind(lex.xml_start))),
            (Token.XMLEND, (r"[ \t]*</[^@\n]+[>]", bind(lex.xml_end))),
            (Token.XMLSELFCLOSE, (r"[^@]+/>[ \t]*", bind(lex.xml_self_close))),
            (Token.MULTILINE, (r"\w*.*:$", bind(lex.multiline))),
            (Token.PRINTLINE, (r"[ \t]*print[ \t]*[(][ \t]*['\"].*[\"'][ \t]*[)]", bind(lex.print_line))),
            (Token.CODE, (r".+", bind(lex.code))),
        )
        lex.lexer = sexylexer.Lexer(lex.rules, lex.multilineRules)
        return lex

    def __init__(self, ignore_whitespace):
        self.scope = ScopeStack(ignore_whitespace)
        self.ignore_whitespace = ignore_whitespace
        self.Mode = []
        self.NewLine = False

    def scan(self, text):
        """Tokenize an input string"""
        if self.ignore_whitespace:
            return self.lexer.scan(text.lstrip())
        return self.lexer.scan(text)

    # Token Parsers
    @staticmethod
    def should_escape(token):
        """Returns false if this token should not be html escaped"""
        return token[1] != '!'

    def xml_start(self, scanner, token):
        self.push_mode(scanner)
        scanner.Mode = sexylexer.ScannerMode.Text
        token = re.sub("[ \t]*<text>", "", token)
        if self.NewLine:
            self.NewLine = False
            return self.scope.indentstack.get_scope_indentation()[0] + token.replace("'", "\\'")
        return token.replace("'", "\\'")

    def xml_end(self, scanner, token):
        self.pop_mode(scanner)
        token = re.sub("[ \t]*</text>", "", token)
        if self.NewLine:
            self.NewLine = False
            return self.scope.indentstack.get_scope_indentation()[0] + token.replace("'", "\\'")
        return token.replace("'", "\\'")

    def xml_self_close(self, scanner, token):
        self.pop_mode(scanner)
        if self.NewLine:
            self.NewLine = False
            return self.scope.indentstack.get_scope_indentation()[0] + token.replace("'", "\\'")
        return token.replace("'", "\\'")

    def paren_expression(self, scanner, token):
        """Performs paren matching to find the end of a parenthesis expression"""
        start = scanner._position
        plevel = 1
        end = start
        for c in scanner.input[start:]:
            if plevel == 0:
                # Halt when we close our braces
                break;
            elif c == '(':
                plevel += 1
            elif c == ')':
                plevel -= 1
            elif c == '\n':
                # Halt at new line
                break
            end += 1
        # parse exception
        if plevel != 0:
            raise sexylexer.InvalidTokenError()
        scanner._position = end

        # Our token here is either @!( or @(
        if not self.should_escape(token):
            return scanner.input[start:end - 1]
        # We wrap the expression in a call to cgi.escape
        return "cgi.escape(str(" + scanner.input[start:end - 1] + "))"

    def multiline(self, scanner, token):
        """Handles multiline expressions"""
        if token == "@:":
            self.Mode.append(sexylexer.ScannerMode.Text)

            # sketchy situation here.
            scanner.Mode = sexylexer.ScannerMode.CODE

            def pop_multiline():
                self.pop_mode(scanner)

            self.scope.indentstack.mark_scope(pop_multiline)
            # We have to move past the end of line (this is a special case)
            # $ matches at the end of a line so it should be just +1
            scanner._position += 1
            return None
        else:
            # # Convert helper syntax to a real python function
            # if token.lower().startswith("@helper"):
            #     token = token.lower().replace("helper", "def", 1)
            self.scope.enter_scope()
            return token.lstrip('@')

    def multiline_end(self, scanner, token):
        scanner.Mode = sexylexer.ScannerMode.Text
        self.pop_mode(scanner)
        scanner._position += 1
        return token.rstrip(':@')

    def escaped(self, scanner, token):
        """Escapes the @ token directly"""
        return "@"

    def expression(self, scanner, token):
        if not self.should_escape(token):
            return token[2:]
        return "cgi.escape(str(" + token[1:] + "))"

    def one_line(self, scanner, token):
        lower_token = token.lower()
        if lower_token.startswith("@model"):
            return "isinstance(model, " + token[token.rindex(' '):] + ")"
        else:
            return token[1:]

    def line_comment(self, scanner, token):
        """Ignores comments by returning None"""
        # Move the parser past the newline character
        scanner._position += 1
        return None

    def text(self, scanner, token):
        """Returns text escaped with ' escaped"""
        return token.replace("'", "\\'")

    def print_line(self, scanner, token):
        self.pop_mode(scanner)
        token = re.match("([ \t]*print[ \t]*[(][ \t]*['\"])(.*)([\"'][ \t]*[)])", token).group(2)
        if self.NewLine:
            self.NewLine = False
            return self.scope.indentstack.get_scope_indentation()[0] + token
        return token

    def code(self, scanner, token):
        """Returns text escaped with ' escaped"""
        return token

    def new_line(self, scanner, token):
        """Handles indention scope"""
        self.NewLine = True
        nline = token.index('\n') + 1
        token = token[nline:]
        self.scope.handle_indentation(token)
        if self.ignore_whitespace:
            return ""
        return token[self.scope.indentstack.get_scope_indentation()[1]:]

    def empty_line(self, scanner, token):
        # Ignore empty line
        return None

    def pop_mode(self, scanner):
        if len(self.Mode) > 0:
            scanner.Mode = self.Mode.pop()

    def push_mode(self, scanner):
        if len(self.Mode) > 0 or scanner.Mode == sexylexer.ScannerMode.CODE:
            self.Mode.append(scanner.Mode)
示例#4
0
 def setUp(self):
     self.scope = ScopeStack()
class ScopeStackTest(unittest.TestCase):
    def setUp(self):
        self.scope = ScopeStack()

    def testScopeStartsAtZero(self):
        self.assertEquals(0, self.scope.get_scope(), "Scope didn't start at zero")

    def testCallback(self):
        """Tests that the scope stack will callback when not in a scope"""
        counter = CallbackCounter()

        def scopeCallback(counter):
            counter.count += 1

        callback = lambda: scopeCallback(counter)

        # Push a callback onto stack
        self.scope.handle_indentation("")
        self.scope.indentstack.mark_scope(callback)

        # Calls the stack with a deeper indent
        self.scope.handle_indentation(STEP)
        self.assertEquals(0, self.scope.get_scope())
        self.assertEquals(0, counter.count)

        # Falls back to the original scope
        self.scope.handle_indentation("")
        self.assertEquals(1, counter.count)

    def testSingleScope(self):
        """Tests that a single scope is registered correctly"""
        self.scope.handle_indentation("")
        self.scope.enter_scope()
        self.scope.handle_indentation(STEP)
        self.assertEquals(1, self.scope.get_scope())

        self.scope.handle_indentation(2 * STEP)
        self.assertEquals(1, self.scope.get_scope())

        self.scope.handle_indentation(STEP)
        self.assertEquals(1, self.scope.get_scope())

        self.scope.handle_indentation("")
        self.assertEquals(0, self.scope.get_scope())

    def testMultiScope(self):
        """Tests a multiscope callback is called correctly"""
        self.scope.handle_indentation("")
        self.assertEquals(0, self.scope.get_scope())
        self.scope.enter_scope()

        self.scope.handle_indentation(STEP)
        self.assertEquals(1, self.scope.get_scope())
        self.scope.enter_scope()

        self.scope.handle_indentation(2 * STEP)
        self.assertEquals(2, self.scope.get_scope())
        self.scope.enter_scope()

        self.scope.handle_indentation(2 * STEP)
        self.assertEquals(2, self.scope.get_scope())

        self.scope.handle_indentation(STEP)
        self.assertEquals(1, self.scope.get_scope())

        self.scope.handle_indentation("")
        self.assertEquals(0, self.scope.get_scope())
示例#6
0
文件: lex.py 项目: pate/pyRazor
 def __init__(self, ignore_whitespace):
   self.scope = ScopeStack(ignore_whitespace)
   self.ignore_whitespace = ignore_whitespace
示例#7
0
class ScopeStackTest(unittest.TestCase):
    def setUp(self):
        self.scope = ScopeStack()

    def testScopeStartsAtZero(self):
        self.assertEquals(0, self.scope.getScope(),
                          "Scope didn't start at zero")

    def testCallback(self):
        """Tests that the scope stack will callback when not in a scope"""
        counter = CallbackCounter()

        def scopeCallback(counter):
            counter.count += 1

        callback = lambda: scopeCallback(counter)

        # Push a callback onto stack
        self.scope.handleIndentation(0)
        self.scope.indentstack.markScope(callback)

        # Calls the stack with a deeper indent
        self.scope.handleIndentation(STEP)
        self.assertEquals(0, self.scope.getScope())
        self.assertEquals(0, counter.count)

        # Falls back to the original scope
        self.scope.handleIndentation(0)
        self.assertEquals(1, counter.count)

    def testSingleScope(self):
        """Tests that a single scope is registered correctly"""
        self.scope.handleIndentation(0)
        self.scope.enterScope()
        self.scope.handleIndentation(STEP)
        self.assertEquals(1, self.scope.getScope())

        self.scope.handleIndentation(2 * STEP)
        self.assertEquals(1, self.scope.getScope())

        self.scope.handleIndentation(STEP)
        self.assertEquals(1, self.scope.getScope())

        self.scope.handleIndentation(0)
        self.assertEquals(0, self.scope.getScope())

    def testMultiScope(self):
        """Tests a multiscope callback is called correctly"""
        self.scope.handleIndentation(0)
        self.assertEquals(0, self.scope.getScope())
        self.scope.enterScope()

        self.scope.handleIndentation(STEP)
        self.assertEquals(1, self.scope.getScope())
        self.scope.enterScope()

        self.scope.handleIndentation(2 * STEP)
        self.assertEquals(2, self.scope.getScope())
        self.scope.enterScope()

        self.scope.handleIndentation(2 * STEP)
        self.assertEquals(2, self.scope.getScope())

        self.scope.handleIndentation(STEP)
        self.assertEquals(1, self.scope.getScope())

        self.scope.handleIndentation(0)
        self.assertEquals(0, self.scope.getScope())
示例#8
0
 def __init__(self, ignore_whitespace):
     self.scope = ScopeStack(ignore_whitespace)
     self.ignore_whitespace = ignore_whitespace
示例#9
0
文件: lex.py 项目: pate/pyRazor
class RazorLexer(object):
  """Encapsulates the razor token logic"""
  @staticmethod
  def create(ignore_whitespace = False):
    """Creates the rules bound to a new lexer instance"""
    lex = RazorLexer(ignore_whitespace)
    lex.rules = (
        (Token.NEWLINE, (r"[\r]?[\n][ \t]*", bind(lex.newline))),
        (Token.ESCAPED, (r"@@", bind(lex.escaped))),
        (Token.COMMENT, (r"@#.*#@", bind(lex.comment))),
        (Token.LINECOMMENT, (r"@#.*$", bind(lex.linecomment))),
        (Token.ONELINE, (r"@(?:import|from|model) .+$", bind(lex.oneline))),
        (Token.MULTILINE, (r"@\w*.*:$", bind(lex.multiline))),
        (Token.PARENEXPRESSION, (r"@!?\(", bind(lex.paren_expression))),
        (Token.EXPRESSION, (r"@!?(\w+(?:(?:\[.+\])|(?:\(.*\)))?(?:\.[a-zA-Z]+(?:(?:\[.+\])|(?:\(.*\)))?)*)", bind(lex.expression))),
        (Token.TEXT, (r"[^@\r\n]+", bind(lex.text))),
    )
    lex.lexer = sexylexer.Lexer(lex.rules)
    return lex

  def __init__(self, ignore_whitespace):
    self.scope = ScopeStack(ignore_whitespace)
    self.ignore_whitespace = ignore_whitespace

  def scan(self, text):
    """Tokenize an input string"""
    if self.ignore_whitespace:
      return self.lexer.scan(text.lstrip())
    return self.lexer.scan(text)

  # Token Parsers
  def shouldEscape(self, token):
    """Returns false if this token should not be html escaped"""
    return token[1] != '!'

  def paren_expression(self, scanner, token):
    """Performs paren matching to find the end of a parenthesis expression"""
    start = scanner._position
    plevel = 1
    end = start
    for c in scanner.input[start:]:
      if plevel == 0:
        # Halt when we close our braces
        break;
      elif c == '(':
        plevel += 1
      elif c == ')':
        plevel -= 1
      elif c == '\n':
        # Halt at new line
        break
      end += 1
    # parse exception
    if plevel != 0:
      raise sexylexer.InvalidTokenError()
    scanner._position = end

    # Our token here is either @!( or @(
    if not self.shouldEscape(token):
      return scanner.input[start:end-1]
    # We wrap the expression in a call to cgi.escape
    return "cgi.escape(str(" + scanner.input[start:end-1] + "))"

  def multiline(self, scanner, token):
    """Handles multiline expressions"""
    if token == "@:":
      #TODO(alusco): Actually implement multiple rules instead of this
      #sketchy situation here.
      scanner.ignoreRules = True
      def pop_multiline():
        scanner.ignoreRules = False
      self.scope.indentstack.markScope(pop_multiline)
      # We have to move past the end of line (this is a special case)
      # $ matches at the end of a line so it should be just +1
      scanner._position += 1
      return None
    else:
      # Convert helper syntax to a real python function
      if token.lower().startswith("@helper"):
        token = token.lower().replace("helper", "def", 1)
      self.scope.enterScope()
      return token[1:]

  def escaped(self, scanner, token):
    """Escapes the @ token directly"""
    return "@"

  def expression(self, scanner, token):
    if not self.shouldEscape(token):
      return token[2:]
    return "cgi.escape(str(" + token[1:] + "))"

  def oneline(self, scanner, token):
    lower_token = token.lower()
    if lower_token.startswith("@model"):
      return "isinstance(model, " + token[token.rindex(' '):] + ")"
    else:
      return token[1:]

  def comment(self, scanner, token):
    """Ignores inline comments returning None"""
    return None

  def linecomment(self, scanner, token):
    """Ignores comments by returning None"""
    # Move the parser past the newline character
    scanner._position += 1
    return None

  def text(self, scanner, token):
    """Returns text escaped with ' escaped"""
    return token.replace("'","\\'")

  def newline(self, scanner, token):
    """Handles indention scope"""
    nline = token.index('\n')+1
    token = token[nline:]
    self.scope.handleIndentation(len(token))
    if self.ignore_whitespace:
      return ""
    return token[self.scope.indentstack.getScopeIndentation():]
示例#10
0
class RazorLexer(object):
    """Encapsulates the razor token logic"""
    @staticmethod
    def create(ignore_whitespace=False):
        """Creates the rules bound to a new lexer instance"""
        lex = RazorLexer(ignore_whitespace)
        lex.rules = (
            (Token.NEWLINE, (r"[\r]?[\n][ \t]*", bind(lex.newline))),
            (Token.ESCAPED, (r"@@", bind(lex.escaped))),
            (Token.COMMENT, (r"@#.*#@", bind(lex.comment))),
            (Token.LINECOMMENT, (r"@#.*$", bind(lex.linecomment))),
            (Token.ONELINE, (r"@(?:import|from|model) .+$",
                             bind(lex.oneline))),
            (Token.MULTILINE, (r"@\w*.*:$", bind(lex.multiline))),
            (Token.PARENEXPRESSION, (r"@!?\(", bind(lex.paren_expression))),
            (Token.EXPRESSION,
             (r"@!?(\w+(?:(?:\[.+\])|(?:\(.*\)))?(?:\.[a-zA-Z]+(?:(?:\[.+\])|(?:\(.*\)))?)*)",
              bind(lex.expression))),
            (Token.TEXT, (r"[^@\r\n]+", bind(lex.text))),
        )
        lex.lexer = sexylexer.Lexer(lex.rules)
        return lex

    def __init__(self, ignore_whitespace):
        self.scope = ScopeStack(ignore_whitespace)
        self.ignore_whitespace = ignore_whitespace

    def scan(self, text):
        """Tokenize an input string"""
        if self.ignore_whitespace:
            return self.lexer.scan(text.lstrip())
        return self.lexer.scan(text)

    # Token Parsers
    def shouldEscape(self, token):
        """Returns false if this token should not be html escaped"""
        return token[1] != '!'

    def paren_expression(self, scanner, token):
        """Performs paren matching to find the end of a parenthesis expression"""
        start = scanner._position
        plevel = 1
        end = start
        for c in scanner.input[start:]:
            if plevel == 0:
                # Halt when we close our braces
                break
            elif c == '(':
                plevel += 1
            elif c == ')':
                plevel -= 1
            elif c == '\n':
                # Halt at new line
                break
            end += 1
        # parse exception
        if plevel != 0:
            raise sexylexer.InvalidTokenError()
        scanner._position = end

        # Our token here is either @!( or @(
        if not self.shouldEscape(token):
            return scanner.input[start:end - 1]
        # We wrap the expression in a call to cgi.escape
        return "cgi.escape(str(" + scanner.input[start:end - 1] + "))"

    def multiline(self, scanner, token):
        """Handles multiline expressions"""
        if token == "@:":
            #TODO(alusco): Actually implement multiple rules instead of this
            #sketchy situation here.
            scanner.ignoreRules = True

            def pop_multiline():
                scanner.ignoreRules = False

            self.scope.indentstack.markScope(pop_multiline)
            # We have to move past the end of line (this is a special case)
            # $ matches at the end of a line so it should be just +1
            scanner._position += 1
            return None
        else:
            # Convert helper syntax to a real python function
            if token.lower().startswith("@helper"):
                token = token.lower().replace("helper", "def", 1)
            self.scope.enterScope()
            return token[1:]

    def escaped(self, scanner, token):
        """Escapes the @ token directly"""
        return "@"

    def expression(self, scanner, token):
        if not self.shouldEscape(token):
            return token[2:]
        return "cgi.escape(str(" + token[1:] + "))"

    def oneline(self, scanner, token):
        lower_token = token.lower()
        if lower_token.startswith("@model"):
            return "isinstance(model, " + token[token.rindex(' '):] + ")"
        else:
            return token[1:]

    def comment(self, scanner, token):
        """Ignores inline comments returning None"""
        return None

    def linecomment(self, scanner, token):
        """Ignores comments by returning None"""
        # Move the parser past the newline character
        scanner._position += 1
        return None

    def text(self, scanner, token):
        """Returns text escaped with ' escaped"""
        return token.replace("'", "\\'")

    def newline(self, scanner, token):
        """Handles indention scope"""
        nline = token.index('\n') + 1
        token = token[nline:]
        self.scope.handleIndentation(len(token))
        if self.ignore_whitespace:
            return ""
        return token[self.scope.indentstack.getScopeIndentation():]
示例#11
0
 def __init__(self):
     self.indentTag = None
     self.pattern = re.compile(r"""\s+(?P<name>[_a-zA-Z0-9]+)\s*(\(|:)""")
     self.scopeStack = ScopeStack()
     self.preScope = Scope()
     self.bytecodes = Bytecodes()
示例#12
0
class Disassemble(object):
    def __init__(self):
        self.indentTag = None
        self.pattern = re.compile(r"""\s+(?P<name>[_a-zA-Z0-9]+)\s*(\(|:)""")
        self.scopeStack = ScopeStack()
        self.preScope = Scope()
        self.bytecodes = Bytecodes()

    #
    # 处理缩进的方法
    #
    def getIndent(self, line):
        for index, char in enumerate(line):
            if char == " " or char == "\t":
                continue
            return line[:index]
        return line

    def isIndented(self, line):
        return line[0] == " " or line[0] == "\t"

    def getIndentTag(self, source):
        for lineNum, line in source.items():
            if not self.indentTag and self.isIndented(line):
                self.indentTag = self.getIndent(line)
                return

    def getIndentLevel(self, line):
        if not self.indentTag:
            return 0
        indent = self.getIndent(line)
        return indent.count(self.indentTag)

    #
    # 处理scope name的方法
    #
    def getScopeName(self, line):
        # 必须处理'def   f  (  a  ):  '这样的定义方式,所以最好的方案是使用re
        matchObj = self.pattern.search(line)
        if not matchObj:
            return None
        return matchObj.group("name")

    def getScope(self, line):
        scope = Scope()
        scope.name = self.getScopeName(line)
        scope.indentLevel = self.getIndentLevel(line)
        return scope

    def containScopeTag(self, line):
        stripedLine = line.strip()
        return stripedLine.startswith("def") or stripedLine.startswith("class")

    def enterNewScope(self, curScope):
        if curScope.indentLevel > self.preScope.indentLevel:
            return True
        else:
            return False

    def exitLastScope(self, curScope):
        if curScope.indentLevel < self.preScope.indentLevel:
            return True
        else:
            return False

    #
    # 解析源文件的方法
    #
    def dis(self, scopeName=None):
        oldStdout = sys.stdout
        output = StringIO.StringIO()
        sys.stdout = output
        sdis.dis(scopeName)
        sys.stdout = oldStdout
        content = output.getvalue()
        output.close()
        if scopeName:
            return content.split("\n")[1:]
        else:
            return content.split("\n")[:]

    def parseDis(self, bytecodes):
        for bytecode in bytecodes:
            if not bytecode:
                continue
            matchObj = self.bytecodePattern.search(bytecode)
            if not matchObj:
                print "can not parse"
            print "%s\t%s\t%s\t\t%s\t%s" % (
                matchObj.group("lineNum"),
                matchObj.group("opOffset"),
                matchObj.group("opCode"),
                matchObj.group("opArg"),
                matchObj.group("const"),
            )

    def readPyFile(self, fileName):
        lines = open(fileName).readlines()
        # 在.py文件中,第一行的行号应该为1,而不是0,所以这里需要插入一个占位的值
        lines.insert(0, "*")
        pySource = dict(enumerate(lines))
        return pySource

    def getDesassembledSource(self, source):
        result = []
        for lineNum, line in source.items():
            if not line.strip():
                result.append("")
                continue

            if lineNum == 0:
                continue

            indentLevel = self.getIndentLevel(line)
            if not self.indentTag:
                preSpace = "##"
            else:
                preSpace = self.indentTag * indentLevel + "##"

            result.append(line.rstrip())
            bytecodes4line = self.bytecodes.getBytecodes4Line(lineNum)
            for bytecode in bytecodes4line:
                result.append(preSpace + bytecode.__str__())
        return "\r\n".join(result)

    def recognizeScope(self, source):
        self.getIndentTag(source)
        for lineNum, line in source.items():
            if self.containScopeTag(line):
                curScope = self.getScope(line)
                self.scopeStack.popUntil(curScope)
                self.scopeStack.push(curScope)
                scopeName = "%s" % self.scopeStack
                # scopeName形式为$.A.func
                # 而dis工具只能解析形式为A.func的字符串
                self.bytecodes.parseDisResult(scopeName, self.dis(scopeName[2:]))
                source[lineNum] = line.rstrip() + (" #%s" % scopeName)
        # 处理module本身的字节码指令序列
        # $是module自身的scopeName
        self.bytecodes.parseDisResult("$", self.dis())

    def parsePyFile(self, fileName):
        pySource = self.readPyFile(fileName)
        sdis.read(fileName)
        self.recognizeScope(pySource)
        return self.getDesassembledSource(pySource)
示例#13
0
 def __init__(self):
     self.indentTag = None
     self.pattern = re.compile(r"""\s+(?P<name>[_a-zA-Z0-9]+)\s*(\(|:)""")
     self.scopeStack = ScopeStack()
     self.preScope = Scope()
     self.bytecodes = Bytecodes()
示例#14
0
class Disassemble(object):
    def __init__(self):
        self.indentTag = None
        self.pattern = re.compile(r"""\s+(?P<name>[_a-zA-Z0-9]+)\s*(\(|:)""")
        self.scopeStack = ScopeStack()
        self.preScope = Scope()
        self.bytecodes = Bytecodes()

    #
    #处理缩进的方法
    #
    def getIndent(self, line):
        for index, char in enumerate(line):
            if char == ' ' or char == '\t':
                continue
            return line[:index]
        return line

    def isIndented(self, line):
        return line[0] == ' ' or line[0] == '\t'

    def getIndentTag(self, source):
        for lineNum, line in source.items():
            if not self.indentTag and self.isIndented(line):
                self.indentTag = self.getIndent(line)
                return

    def getIndentLevel(self, line):
        if not self.indentTag:
            return 0
        indent = self.getIndent(line)
        return indent.count(self.indentTag)

    #
    #处理scope name的方法
    #
    def getScopeName(self, line):
        #必须处理'def   f  (  a  ):  '这样的定义方式,所以最好的方案是使用re
        matchObj = self.pattern.search(line)
        if not matchObj:
            return None
        return matchObj.group('name')

    def getScope(self, line):
        scope = Scope()
        scope.name = self.getScopeName(line)
        scope.indentLevel = self.getIndentLevel(line)
        return scope

    def containScopeTag(self, line):
        stripedLine = line.strip()
        return stripedLine.startswith('def') or stripedLine.startswith('class')

    def enterNewScope(self, curScope):
        if curScope.indentLevel > self.preScope.indentLevel:
            return True
        else:
            return False

    def exitLastScope(self, curScope):
        if curScope.indentLevel < self.preScope.indentLevel:
            return True
        else:
            return False

    #
    #解析源文件的方法
    #
    def dis(self, scopeName=None):
        oldStdout = sys.stdout
        output = StringIO.StringIO()
        sys.stdout = output
        sdis.dis(scopeName)
        sys.stdout = oldStdout
        content = output.getvalue()
        output.close()
        if scopeName:
            return content.split('\n')[1:]
        else:
            return content.split('\n')[:]

    def parseDis(self, bytecodes):
        for bytecode in bytecodes:
            if not bytecode:
                continue
            matchObj = self.bytecodePattern.search(bytecode)
            if not matchObj:
                print 'can not parse'
            print '%s\t%s\t%s\t\t%s\t%s' % (
                matchObj.group('lineNum'), matchObj.group('opOffset'),
                matchObj.group('opCode'), matchObj.group('opArg'),
                matchObj.group('const'))

    def readPyFile(self, fileName):
        lines = open(fileName).readlines()
        #在.py文件中,第一行的行号应该为1,而不是0,所以这里需要插入一个占位的值
        lines.insert(0, '*')
        pySource = dict(enumerate(lines))
        return pySource

    def getDesassembledSource(self, source):
        result = []
        for lineNum, line in source.items():
            if not line.strip():
                result.append('')
                continue

            if lineNum == 0:
                continue

            indentLevel = self.getIndentLevel(line)
            if not self.indentTag:
                preSpace = '##'
            else:
                preSpace = self.indentTag * indentLevel + '##'

            result.append(line.rstrip())
            bytecodes4line = self.bytecodes.getBytecodes4Line(lineNum)
            for bytecode in bytecodes4line:
                result.append(preSpace + bytecode.__str__())
        return '\r\n'.join(result)

    def recognizeScope(self, source):
        self.getIndentTag(source)
        for lineNum, line in source.items():
            if self.containScopeTag(line):
                curScope = self.getScope(line)
                self.scopeStack.popUntil(curScope)
                self.scopeStack.push(curScope)
                scopeName = '%s' % self.scopeStack
                #scopeName形式为$.A.func
                #而dis工具只能解析形式为A.func的字符串
                self.bytecodes.parseDisResult(scopeName,
                                              self.dis(scopeName[2:]))
                source[lineNum] = line.rstrip() + (" #%s" % scopeName)
        #处理module本身的字节码指令序列
        #$是module自身的scopeName
        self.bytecodes.parseDisResult('$', self.dis())

    def parsePyFile(self, fileName):
        pySource = self.readPyFile(fileName)
        sdis.read(fileName)
        self.recognizeScope(pySource)
        return self.getDesassembledSource(pySource)