Exemplos de Lexer em Python, exemplos de tools.Lexer em Python

Exemplo n.º 1

0

Exibir arquivo

 def setUp(self):
     '''
     Don't run Moses server here (just incremental tests)
     - Take all the .orig test files and create the IndexBuilders and ScopeAnalysts here
     '''
     self.testDir = Folder("./testing/test_files/")
     self.clearTextFiles = self.fileSort(
         self.testDir.baseFileNames("*.orig.js"))
     print("Files: " + str(self.clearTextFiles))
     self.obsfuscatedTextFiles = self.fileSort(
         self.testDir.baseFileNames("*.obs.js"))
     self.postTextFiles = self.fileSort(
         self.testDir.baseFileNames("*.post_input.js"))
     self.clearTextFiles = [
         os.path.join(self.testDir.path, file)
         for file in self.clearTextFiles
     ]
     self.obsfuscatedTextFiles = [
         os.path.join(self.testDir.path, file)
         for file in self.obsfuscatedTextFiles
     ]
     self.postTextFiles = [
         os.path.join(self.testDir.path, file)
         for file in self.postTextFiles
     ]
     #print(self.testDir.path)
     #print(self.clearTextFiles)
     self.clearLexed = [Lexer(file) for file in self.clearTextFiles]
     self.obsLexed = [Lexer(file) for file in self.obsfuscatedTextFiles]
     self.postText = [
         "".join(open(file, "r").readlines()) for file in self.postTextFiles
     ]

Exemplo n.º 2

0

Exibir arquivo

    def testFiles(self):
        tf = [1, 5, 6, 7, 8, 9, 10, 11]
        #tf = [11]

        for i in tf:
            print("-----------------------------------------------------")
            lexed = Lexer(self.fileList[i - 1])
            ib = IndexBuilder(lexed.tokenList)
            #print(ib)
            sa = ScopeAnalyst(self.fileList[i - 1])
            print(sa)
            nameCount = {}
            #TODO: Grab only the non-globals to look at (get the start key and look it up)
            for variable in sa.nameDefScope2pos.keys():
                start = sa.nameDefScope2pos[variable]
                name = variable[0]
                if (not sa.isGlobal[(name, start)]):
                    if (name in nameCount):
                        nameCount[name] += 1
                    else:
                        nameCount[name] = 1
                    print(
                        str(name) + " : " +
                        str(sa.nameDefScope2pos[variable]) + " -> " +
                        str(ib.revFlatMat[sa.nameDefScope2pos[variable]]) +
                        " Manual: " + str(self.file_definitions[i][name]))
                    assert (ib.revFlatMat[sa.nameDefScope2pos[variable]][0]
                            in self.file_definitions[i][name])

            #Finally make sure that the count of definitions matches our manual check.
            for name, count in nameCount.iteritems():
                print(name + " : " + str(count) + " =?= " +
                      str(len(self.file_definitions[i][name])))
                assert (len(self.file_definitions[i][name]) == count)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: r.py Projeto: renesugar/jsNaughty

def processFile(js_file_name):

    candidates = []

    lexer = Lexer(js_file_name)
    iBuilder = IndexBuilder(lexer.tokenList)

    scopeAnalyst = ScopeAnalyst(js_file_name)
    nameOrigin = scopeAnalyst.nameOrigin
    isGlobal = scopeAnalyst.isGlobal
    nameDefScope2pos = scopeAnalyst.nameDefScope2pos

    for (name, def_scope) in nameOrigin.iterkeys():
        pos = nameDefScope2pos[(name, def_scope)]

        (lin, col) = iBuilder.revFlatMat[pos]
        scope = iBuilder.revTokMap[(lin, col)]

        glb = isGlobal.get((name, pos), True)


        if name != 'TOKEN_LITERAL_STRING' and \
                name != 'TOKEN_LITERAL_NUMBER':
            candidates.append((scope, name, pos, (lin, col), glb, def_scope))

    print
    print
    for c in sorted(candidates, key=lambda e: e[0]):
        (scope, name, pos, (lin, col), glb, def_scope) = c

        if name == 'n' or name == 'calendarEventId':
            print '\t', scope, name, pos, (lin, col), glb
            print '\t\t', def_scope

Exemplo n.º 4

0

Exibir arquivo

Arquivo: sanityCheck.py Projeto: renesugar/jsNaughty

        def load(pth):
            lexer = Lexer(pth)
            iBuilder = IndexBuilder(lexer.tokenList)

            scopeAnalyst = ScopeAnalyst(
                os.path.join(os.path.dirname(os.path.realpath(__file__)), pth))

            return (iBuilder, scopeAnalyst)

Exemplo n.º 5

0

Exibir arquivo

 def testFiles(self):
     #Known bugs:  The definitions of sum and numberEquals in test_file1 seem to be pointing to the wrong instance...
     i = 1
     lexed = Lexer(self.fileList[0])
     ib = IndexBuilder(lexed.tokenList)
     sa = ScopeAnalyst(self.fileList[0])
     for variable in sa.nameDefScope2pos.keys():
         print(
             str(variable[0]) + " : " + str(sa.nameDefScope2pos[variable]) +
             " -> " + str(ib.revFlatMat[sa.nameDefScope2pos[variable]]))

Exemplo n.º 6

0

Exibir arquivo

Arquivo: evalRenamingHelper.py Projeto: renesugar/jsNaughty

def processFile(l):
    
    js_file_name = l
    
    candidates = []
    
    if(True):    
    #try:
        print(js_file_name)
        lexer = Lexer(js_file_name)
        return IndexBuilder(lexer.tokenList)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: testStats.py Projeto: renesugar/jsNaughty

 def testfileDebug(self):
     for f in self.fileList:
         print("---------------------------------- " + f +
               " ----------------------------------")
         orig = f + ".js"
         min = f + ".u.js"
         lo = Lexer(orig)
         lm = Lexer(min)
         print(
             "---------------------------------- original text ----------------------------------"
         )
         print(lo.programText)
         print(
             "---------------------------------- minified text ----------------------------------"
         )
         print(lm.programText)
         for id in self.ids:
             to_read = f + id + ".js"
             print("---------------------------------- " + to_read +
                   " ----------------------------------")
             lexed = Lexer(to_read)
             print(
                 "---------------------------------- text ----------------------------------"
             )
             print(lexed.programText)
             print(
                 "---------------------------------- tokenlist ----------------------------------"
             )
             print(lexed.tokenList)
             ib = IndexBuilder(lexed.tokenList)
             print(
                 "---------------------------------- IndexBuilder ----------------------------------"
             )
             print(ib)
             sa = ScopeAnalyst(to_read)
             print(
                 "---------------------------------- ScopeAnalyst ----------------------------------"
             )
             print(sa)

Exemplo n.º 8

0

Exibir arquivo

def processFile(l):

    js_file_name = l

    candidates = []

    try:
        lexer = Lexer(os.path.join(results_path, js_file_name))
        iBuilder = IndexBuilder(lexer.tokenList)

        scopeAnalyst = ScopeAnalyst(os.path.join(results_path, js_file_name))
        nameOrigin = scopeAnalyst.nameOrigin
        isGlobal = scopeAnalyst.isGlobal
        nameDefScope2pos = scopeAnalyst.nameDefScope2pos

        for (name, def_scope) in nameOrigin.iterkeys():
            pos = nameDefScope2pos[(name, def_scope)]

            (lin, col) = iBuilder.revFlatMat[pos]
            scope = iBuilder.revTokMap[(lin, col)]

            glb = isGlobal.get((name, pos), True)

            #             print name, def_scope, pos, scope, glb #, (lin,col)

            #             if not isGlobal.get((name, pos), True):
            #                 scope = def_scope.replace("\"","")
            #                 i = scope.find('[variables][_values]')
            #                 if i > -1:
            #                     scope = scope[:i+len('[variables][_values]')]
            #                 i = scope.find('[functions][_values]')
            #                 if i > -1:
            #                     scope = scope[:i+len('[functions][_values]')]

            if name != 'TOKEN_LITERAL_STRING' and \
                    name != 'TOKEN_LITERAL_NUMBER':
                candidates.append((scope, name, glb))

    except:
        return (js_file_name, None, 'ScopeAnalyst fail')


#     print 'candidates------------------'
#     for candidate in candidates:
#         print candidate

    return (js_file_name, 'OK', candidates)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: runSimplifiedExperiment.py Projeto: renesugar/jsNaughty

def summarizeUnscopedTranslation(renaming_map, f_path, translation_strategy,
                                 output_path, base_name, name_candidates,
                                 name_positions, iBuilder):

    nc = []

    f_base = os.path.basename(f_path)
    training_strategy = f_base.split('.')[1]
    tmp_path = '%s.%s.js' % (f_base[:-3], translation_strategy)
    o_path = '%s.%s.unscoped.%s.js' % (base_name, training_strategy,
                                       translation_strategy)

    #     print f_path, f_base, training_strategy, tmp_path, o_path, base_name

    writeTmpLines(renameHashed(iBuilder, name_positions, renaming_map),
                  tmp_path)

    clear = Beautifier()
    ok = clear.run(tmp_path, os.path.join(output_path, o_path))
    if not ok:
        return False

    try:
        lexer = Lexer(os.path.join(output_path, o_path))
        iBuilder_local = IndexBuilder(lexer.tokenList)

        scopeAnalyst_local = ScopeAnalyst(os.path.join(output_path, o_path))
    except:
        return False

    nameOrigin = scopeAnalyst_local.nameOrigin
    isGlobal = scopeAnalyst_local.isGlobal

    for (name, def_scope) in nameOrigin.iterkeys():

        pos = scopeAnalyst_local.nameDefScope2pos[(name, def_scope)]

        if not False:  #isGlobal.get((name, pos), True):
            (lin, col) = iBuilder_local.revFlatMat[pos]
            (tok_lin, tok_col) = iBuilder_local.revTokMap[(lin, col)]

            nc.append(
                ('%s.unscoped.%s' % (training_strategy, translation_strategy),
                 def_scope, tok_lin, tok_col, isGlobal.get(
                     (name, pos), True), name, '', ''))

    return nc

Exemplo n.º 10

0

Exibir arquivo

Arquivo: buildTestCorpus.py Projeto: renesugar/jsNaughty

def processFile(l):

    js_file_path = l[0]

    pid = int(multiprocessing.current_process().ident)

    try:
        # Temp files to be created during processing
        path_tmp = 'tmp_%d.js' % pid
        path_tmp_b = 'tmp_%d.b.js' % pid

        # Strip comments, replace literals, etc
        try:
            prepro = Preprocessor(os.path.join(corpus_root, js_file_path))
            prepro.write_temp_file(path_tmp)
        except:
            cleanup(pid)
            return (js_file_path, None, 'Preprocessor fail')

        # Pass through beautifier to fix layout
        clear = Beautifier()
        ok = clear.run(path_tmp, path_tmp_b)

        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'Beautifier fail')

        try:
            iBuilder_clear = IndexBuilder(Lexer(path_tmp_b).tokenList)
        except:
            cleanup(pid)
            return (js_file_path, None, 'IndexBuilder fail')

        n_lines = len(iBuilder_clear.tokens)
        max_line_len = max([len(l) for l in iBuilder_clear.tokens])

        cleanup(pid)
        return (js_file_path, n_lines, max_line_len)

    except Exception, e:
        cleanup(pid)
        return (js_file_path, None, str(e))

Exemplo n.º 11

0

Exibir arquivo

Arquivo: countGlobals.py Projeto: renesugar/jsNaughty

def processFile(l):
    base_name = l[0]
    js_file_path = l[1]
    print(base_name)
    print(js_file_path)
    #if(True):
    try:
        lexed = Lexer(js_file_path)
        ib = IndexBuilder(lexed.tokenList)
        sa = ScopeAnalyst(js_file_path)
        #num globals = all in is_global == True + all unique names
        #in name2CharPositions not in is_global
        base_global = set(
            [name for name, value in sa.isGlobal.iteritems() if value == True])
        #Get all known names in the file.
        known_names = set([name for name, value in sa.isGlobal.iteritems()])
        for name, loc in ib.name2CharPositions.iteritems():
            if (name not in known_names):  #if never seen, its a global
                base_global.add(name)

        return [base_name, len(base_global)]
    except:
        return [base_name, None]

Exemplo n.º 12

0

Exibir arquivo

Arquivo: beautifyCorpus.py Projeto: renesugar/jsNaughty

def processFile(row):

    js_file_path = os.path.join(corpus_root, row[0])

    pid = int(multiprocessing.current_process().ident)
    base_name = os.path.splitext(os.path.basename(js_file_path))[0]

    # Temp files to be created during processing
    temp_files = {
        'path_tmp': 'tmp_%d.js' % pid,
        'path_tmp_b': 'tmp_%d.b.js' % pid,
        'path_tmp_b_a': 'tmp_%d.b.a.js' % pid,
        'path_tmp_u': 'tmp_%d.u.js' % pid,
        'path_tmp_u_a': 'tmp_%d.u.a.js' % pid
    }

    try:
        # Pass through beautifier to fix layout:
        #         # - once through JSNice without renaming
        #         jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename'])
        #
        #         (ok, _out, _err) = jsNiceBeautifier.run(js_file_path,
        #                                                temp_files['path_tmp'])
        #         if not ok:
        #             cleanup(temp_files)
        #             return (js_file_path, False, 'JSNice Beautifier fail')
        #
        #
        #         # Weird JSNice renamings despite --no-rename
        #         try:
        #             before = set([token for (token, token_type) in
        #                           Lexer(js_file_path).tokenList
        #                           if is_token_subtype(token_type, Token.Name)])
        #             after = set([token for (token, token_type) in
        #                           Lexer(temp_files['path_tmp']).tokenList
        #                           if is_token_subtype(token_type, Token.Name)])
        #
        #             if not before == after:
        #                 return (js_file_path, False, 'Weird JSNice renaming')
        #
        #         except:
        #             cleanup(temp_files)
        #             return (js_file_path, False, 'Lexer fail')

        # Strip comments, replace literals, etc
        try:
            prepro = Preprocessor(os.path.join(corpus_root, js_file_path))
            prepro.write_temp_file(temp_files['path_tmp'])
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Preprocessor fail')

        # - and another time through uglifyjs pretty print only
        clear = Beautifier()
        ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, False, 'Beautifier fail')

        # Minify
        ugly = Uglifier()
        ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, False, 'Uglifier fail')

        # Num tokens before vs after
        try:
            tok_clear = Lexer(temp_files['path_tmp_b']).tokenList
            tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList
        except:
            cleanup(temp_files)
            return (js_file_path, False, 'Lexer fail')

        # For now only work with minified files that have
        # the same number of tokens as the originals
        if not len(tok_clear) == len(tok_ugly):
            cleanup(temp_files)
            return (js_file_path, False, 'Num tokens mismatch')

        # Align minified and clear files, in case the beautifier
        # did something weird
        try:
            aligner = Aligner()
            # This is already the baseline corpus, no (smart) renaming yet
            aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u'])
        except:
            cleanup(temp_files)
            return (js_file_path, False, 'Aligner fail')

        # Check if minification resulted in any change
        # It's not very interesting otherwise
        if open(temp_files['path_tmp_b_a']).read() == \
                open(temp_files['path_tmp_u_a']).read():
            cleanup(temp_files)
            return (js_file_path, False, 'Not minified')

        try:
            lex_ugly = Lexer(temp_files['path_tmp_u_a'])
            _iBuilder_ugly = IndexBuilder(lex_ugly.tokenList)
        except:
            cleanup(temp_files)
            return (js_file_path, False, 'IndexBuilder fail')

        # Store original and uglified versions
        ok = clear.run(temp_files['path_tmp_b_a'],
                       os.path.join(output_path, '%s.js' % base_name))
        if not ok:
            cleanup(temp_files)
            cleanupProcessed(base_name)
            return (js_file_path, False, 'Beautifier fail')

        ok = clear.run(temp_files['path_tmp_u_a'],
                       os.path.join(output_path, '%s.u.js' % base_name))
        if not ok:
            cleanup(temp_files)
            cleanupProcessed(base_name)
            return (js_file_path, False, 'Beautifier fail')

        cleanup(temp_files)
        return (js_file_path, True, 'OK')

    except Exception, e:
        cleanup(temp_files)
        return (js_file_path, False, str(e))

Exemplo n.º 13

0

Exibir arquivo

Arquivo: prepCorpus.py Projeto: renesugar/jsNaughty

def processFile(js_file_path):

    try:

        # Num tokens before vs after
        try:
            tok1 = Lexer(os.path.join(files_root, 'orig',
                                      js_file_path)).tokenList
            tok2 = Lexer(os.path.join(files_root, 'no_renaming',
                                      js_file_path)).tokenList
            #             tok3 = Lexer(os.path.join(files_root, 'basic_renaming', js_file_path)).tokenList
            #             tok4 = Lexer(os.path.join(files_root, 'normalized', js_file_path)).tokenList
            tok5 = Lexer(
                os.path.join(files_root, 'hash_def_one_renaming',
                             js_file_path)).tokenList
            tok6 = Lexer(
                os.path.join(files_root, 'hash_def_two_renaming',
                             js_file_path)).tokenList
        except:
            return (js_file_path, None, 'Lexer fail')

        # For now only work with minified files that have
        # the same number of tokens as the originals
        if not len(set([len(tok1), len(tok2), len(tok5), len(tok6)])) == 1:
            return (js_file_path, None, 'Num tokens mismatch')

        clear = Beautifier()
        # Align minified and clear files, in case the beautifier
        # did something weird
        aligner = Aligner()

        (aligned1, aligned2) = aligner.web_align(tok1, tok2)

        (ok, beautified1, _err) = clear.web_run(aligned1)
        tok11 = WebLexer(beautified1).tokenList

        (ok, beautified2, _err) = clear.web_run(aligned2)
        tok22 = WebLexer(beautified2).tokenList

        (aligned5, aligned2) = aligner.web_align(tok5, tok2)

        (ok, beautified5, _err) = clear.web_run(aligned5)
        tok55 = WebLexer(beautified5).tokenList

        (aligned6, aligned2) = aligner.web_align(tok6, tok2)

        (ok, beautified6, _err) = clear.web_run(aligned6)
        tok66 = WebLexer(beautified6).tokenList

        #         try:
        #             aligner = Aligner()
        #             # This is already the baseline corpus, no (smart) renaming yet
        #             aligner.align(temp_files['path_tmp_b'],
        #                           temp_files['path_tmp_u'])
        #         except:
        #             return (js_file_path, None, 'Aligner fail')

        try:
            iBuilder1 = IndexBuilder(tok11)
            iBuilder2 = IndexBuilder(tok22)
            #             iBuilder3 = IndexBuilder(tok3)
            #             iBuilder4 = IndexBuilder(tok4)
            iBuilder5 = IndexBuilder(tok55)
            iBuilder6 = IndexBuilder(tok66)
        except:
            return (js_file_path, None, 'IndexBuilder fail')

        # Check that at least one variable was renamed during minification
        orig_names = set([
            token for line in iBuilder1.tokens for (token_type, token) in line
            if is_token_subtype(token_type, Token.Name)
        ])
        ugly_names = set([
            token for line in iBuilder2.tokens for (token_type, token) in line
            if is_token_subtype(token_type, Token.Name)
        ])
        if not len(orig_names.difference(ugly_names)):
            return (js_file_path, None, 'Not minified')

        orig = []
        no_renaming = []
        #         basic_renaming = []
        #         normalized = []
        hash_def_one_renaming = []
        hash_def_two_renaming = []

        for _line_idx, line in enumerate(iBuilder1.tokens):
            orig.append(' '.join([t for (_tt, t) in line]) + "\n")

        for _line_idx, line in enumerate(iBuilder2.tokens):
            no_renaming.append(' '.join([t for (_tt, t) in line]) + "\n")


#         for _line_idx, line in enumerate(iBuilder3.tokens):
#             basic_renaming.append(' '.join([t for (_tt,t) in line]) + "\n")

#         for _line_idx, line in enumerate(iBuilder4.tokens):
#             normalized.append(' '.join([t for (_tt,t) in line]) + "\n")

        for _line_idx, line in enumerate(iBuilder5.tokens):
            hash_def_one_renaming.append(' '.join([t for (_tt, t) in line]) +
                                         "\n")

        for _line_idx, line in enumerate(iBuilder6.tokens):
            hash_def_two_renaming.append(' '.join([t for (_tt, t) in line]) +
                                         "\n")

        return (
            js_file_path,
            orig,
            no_renaming,
            #                 basic_renaming,
            #                 normalized,
            hash_def_one_renaming,
            hash_def_two_renaming)

    except Exception, e:
        return (js_file_path, None, str(e))

Exemplo n.º 14

0

Exibir arquivo

Arquivo: renameAndUglify.py Projeto: renesugar/jsNaughty

def processFile(l):
    
    js_file_path = l[0]
    
    if js_file_path in seen:
        return (js_file_path, None, 'Skipped')
    
    pid = int(multiprocessing.current_process().ident)
    
    # Temp files to be created during processing
    temp_files = {'path_tmp': 'tmp_%d.js' % pid,
                  'path_tmp_b': 'tmp_%d.b.js' % pid,
                  'path_tmp_b_n': 'tmp_%d.b.n.js' % pid,
                  'path_tmp_u': 'tmp_%d.u.js' % pid,
                  'path_tmp_u_n': 'tmp_%d.u.n.js' % pid,
                  'path_tmp_b_a': 'tmp_%d.b.a.js' % pid,
                  'path_tmp_u_a': 'tmp_%d.u.a.js' % pid}
    
    try:        
        # Strip comments, replace literals, etc
        try:
            prepro = Preprocessor(os.path.join(corpus_root, js_file_path))
            prepro.write_temp_file(temp_files['path_tmp'])
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Preprocessor fail')
        
        
        # Pass through beautifier to fix layout:
        # - once through JSNice without renaming
#         jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename'])
#         
#         (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp'], 
#                                                 temp_files['path_tmp_b_n'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'JSNice Beautifier fail')
        
        
#         # - and another time through uglifyjs pretty print only 
#         clear = Beautifier()
#         ok = clear.run(temp_files['path_tmp_b_n'], 
#                        temp_files['path_tmp_b'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')
        
#         # JSNice is down! 
        clear = Beautifier()
        ok = clear.run(temp_files['path_tmp'], 
                       temp_files['path_tmp_b_n'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Beautifier fail')
        # Normalize
        norm = Normalizer()
        ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), 
                                 temp_files['path_tmp_b_n']),
                      False, 
                      temp_files['path_tmp_b'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Normalizer fail')
        
        
        
        # Minify
        ugly = Uglifier()
        ok = ugly.run(temp_files['path_tmp_b'], 
                      temp_files['path_tmp_u_n'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Uglifier fail')
        # Normalize
        norm = Normalizer()
        ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), 
                                 temp_files['path_tmp_u_n']),
                      False, 
                      temp_files['path_tmp_u'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Normalizer fail')
        
        
        
        # Num tokens before vs after
        try:
            tok_clear = Lexer(temp_files['path_tmp_b']).tokenList
            tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Lexer fail')
        
        # For now only work with minified files that have
        # the same number of tokens as the originals
        if not len(tok_clear) == len(tok_ugly):
            cleanup(temp_files)
            return (js_file_path, None, 'Num tokens mismatch')
        
        
        # Align minified and clear files, in case the beautifier 
        # did something weird
        try:
            aligner = Aligner()
            # This is already the baseline corpus, no (smart) renaming yet
            aligner.align(temp_files['path_tmp_b'], 
                          temp_files['path_tmp_u'])
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Aligner fail')
        
        try:
            lex_clear = Lexer(temp_files['path_tmp_b_a'])
            iBuilder_clear = IndexBuilder(lex_clear.tokenList)
            
            lex_ugly = Lexer(temp_files['path_tmp_u_a'])
            iBuilder_ugly = IndexBuilder(lex_ugly.tokenList)
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'IndexBuilder fail')
        
        
        
        # Normalize
        norm = Normalizer()
        ok = norm.run(os.path.join(os.path.dirname(os.path.realpath(__file__)), 
                                 temp_files['path_tmp_b']),
                      True, 
                      temp_files['path_tmp_u_n'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Normalizer fail')
        
        try:
            lex_norm = Lexer(temp_files['path_tmp_u_n'])
            iBuilder_norm = IndexBuilder(lex_norm.tokenList)
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'IndexBuilder fail')
        
        normalized = []
        for line_idx, line in enumerate(iBuilder_norm.tokens):
            normalized.append(' '.join([t for (_tt,t) in line]) + "\n")
        
        
        
        # Compute scoping: name2scope is a dictionary where keys
        # are (name, start_index) tuples and values are scope identifiers. 
        # Note: start_index is a flat (unidimensional) index, 
        # not a (line_chr_idx, col_chr_idx) index.
        try:
            scopeAnalyst = ScopeAnalyst(os.path.join(
                                 os.path.dirname(os.path.realpath(__file__)), 
                                 temp_files['path_tmp_u_a']))
#             _name2defScope = scopeAnalyst.resolve_scope()
#             _isGlobal = scopeAnalyst.isGlobal
#             _name2useScope = scopeAnalyst.resolve_use_scope()
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'ScopeAnalyst fail')
        
        orig = []
        no_renaming = []
        
        for line_idx, line in enumerate(iBuilder_ugly.tokens):
            orig.append(' '.join([t for (_tt,t) in \
                                  iBuilder_clear.tokens[line_idx]]) + "\n")
            
            no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n")
            
#         # Simple renaming: disambiguate overloaded names using scope id
        basic_renaming = renameUsingScopeId(scopeAnalyst, 
                                            iBuilder_ugly)
        
        # More complicated renaming: collect the context around  
        # each name (global variables, API calls, punctuation)
        # and build a hash of the concatenation.
#         hash_renaming = renameUsingHashAllPrec(scopeAnalyst, 
#                                                 iBuilder_ugly,
#                                                 debug=True)
        
        hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, 
                                                   iBuilder_ugly, 
                                                   twoLines=False,
                                                   debug=False)

        hash_def_two_renaming = renameUsingHashDefLine(scopeAnalyst, 
                                                    iBuilder_ugly, 
                                                    twoLines=True,
                                                    debug=False)

        cleanup(temp_files)
        return (js_file_path,
                orig, 
                no_renaming, 
                basic_renaming,
                normalized, 
#                 hash_renaming,
                hash_def_one_renaming,
                hash_def_two_renaming)
        
    except Exception, e:
        cleanup(temp_files)
        return (js_file_path, None, str(e))

Exemplo n.º 15

0

Exibir arquivo

Arquivo: hashRenaming.py Projeto: renesugar/jsNaughty


    
input_file = os.path.abspath(sys.argv[1])
output_file = os.path.abspath(sys.argv[2])
mode = int(sys.argv[3])


prepro = Preprocessor(input_file)
prepro.write_temp_file('tmp.js')

clear = Beautifier()
ok = clear.run('tmp.js', 
               'tmp.b.js')
  
lexer = Lexer('tmp.b.js')
iBuilder = IndexBuilder(lexer.tokenList)

scopeAnalyst = ScopeAnalyst(os.path.join(
                         os.path.dirname(os.path.realpath(__file__)), 
                         'tmp.b.js'))

hash_renaming = renameUsingHashDefLine(scopeAnalyst, 
                                   iBuilder, 
                                   twoLines=False,
                                   debug=mode)

with open(output_file, 'w') as f:
    f.writelines(hash_renaming)
# writeTmpLines(hash_renaming, output_file)

Exemplo n.º 16

0

Exibir arquivo

    def deobfuscateJS(self, obfuscatedCode, transactionID):
        proxy = xmlrpclib.ServerProxy("http://godeep.cs.ucdavis.edu:8080/RPC2")

        mosesParams = {}
        candidates = []
        baseDir = "/home/ccasal/temp/"
        tempFile = baseDir + str(transactionID) + "_temp.js"
        lm_path = "/data/bogdanv/deobfuscator/experiments/corpora/corpus.lm.970k/js.blm.lm"

        preproFile = baseDir + str(transactionID) + "_prepro.js"
        beautFile = baseDir + str(transactionID) + "_beaut.js"

        # Strip comments, replace literals, etc
        try:
            prepro = WebPreprocessor(obfuscatedCode)
            #TODO replace with: prepro = WebPreprocessor(text)
            prepro.write_temp_file(preproFile)
        except:
            cleanup([preproFile])
            print("Preprocessor failed")
            return ("Preprocessor Failed")

        clear = Beautifier()
        #TODO: Need a text version of beautifier to avoid the file read and write.
        #(ok, beautText, err) = clear.webRun(preproText)
        ok = clear.run(preproFile, beautFile)
        print(ok)
        if (not ok):
            cleanup([preproFile, beautFile])
            return ("Beautifier Failed")
            #quit()

        try:
            lex_ugly = Lexer(beautFile)
            iBuilder_ugly = IndexBuilder(lex_ugly.tokenList)
        except:
            cleanup([preproFile, beautFile])
            print("IndexBuilder fail")
            return ("IndexBuilder Failed")

        lex_ugly.write_temp_file(tempFile)

        #Do Scope related tasks
        #a raw text version
        try:
            scopeAnalyst = ScopeAnalyst(tempFile)
        except:
            cleanup({"temp": tempFile})
            print("ScopeAnalyst Fail")
            return ("ScopeAnalyst Failed")

        #Do Rename related tasks
        #In our case, I don't think we need to actually do anything for no_renaming
        #no_renaming = []
        #for _line_idx, line in enumerate(iBuilder_ugly.tokens):
        #    no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n")

        #Hash_def_one_renaming
        #beautText = renameUsingHashDefLine(scopeAnalyst,
        #                                               iBuilder_ugly,
        #                                               twoLines=False,
        #                                                debug=False)
        print(lex_ugly.collapsedText)
        mosesParams["text"] = lex_ugly.collapsedText
        mosesParams["align"] = "true"
        mosesParams["report-all-factors"] = "true"

        results = proxy.translate(
            mosesParams)  # __request("translate", mosesParams)
        rawText = Postprocessor(results["nbest"])
        translation = rawText.getProcessedOutput()

        #Send to output:
        cleanup([preproFile, beautFile, tempFile])
        return (translation)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: runSimplifiedExperiment.py Projeto: renesugar/jsNaughty

def processFile(l):

    js_file_path = l[0]
    base_name = os.path.splitext(os.path.basename(js_file_path))[0]

    pid = int(multiprocessing.current_process().ident)

    temp_files = {
        'path_tmp': 'tmp_%d.js' % pid,
        'path_tmp_b': 'tmp_%d.b.js' % pid,
        'path_tmp_b_1': 'tmp_%d.b.1.js' % pid,
        'path_tmp_b_2': 'tmp_%d.b.2.js' % pid,
        'path_tmp_b_a': 'tmp_%d.b.a.js' % pid,
        'path_tmp_u': 'tmp_%d.u.js' % pid,
        'path_tmp_u_a': 'tmp_%d.u.a.js' % pid,
        'path_tmp_unugly': 'tmp_%d.n2p.js' % pid,
        'path_tmp_unugly_1': 'tmp_%d.n2p.1.js' % pid,
        'path_tmp_unugly_2': 'tmp_%d.n2p.2.js' % pid,
        'path_tmp_jsnice': 'tmp_%d.jsnice.js' % pid,
        'f2': 'tmp_%d.no_renaming.js' % pid,
        #                   'f3': 'tmp_%d.basic_renaming.js' % pid,
        #                   'f4': 'tmp_%d.hash_renaming.js' % pid,
        'f5': 'tmp_%d.hash_def_one_renaming.js' % pid,
        #                   'f6': 'tmp_%d.hash_def_two_renaming.js' % pid,
        'f7': 'tmp_%d.hash_def_one_renaming_fb.js' % pid,
        'path_orig': os.path.join(output_path, '%s.js' % base_name),
        'path_ugly': os.path.join(output_path, '%s.u.js' % base_name),
        'path_unugly': os.path.join(output_path, '%s.n2p.js' % base_name),
        'path_jsnice': os.path.join(output_path, '%s.jsnice.js' % base_name)
    }

    #     for strategy in ['js', 'lm.js', 'len.js', 'freqlen.js']:
    #         for renaming in ['no_renaming', 'hash_def_one_renaming']:
    #             temp_files['path_tmp_%s_%s' % (renaming, strategy)] = \
    #                     'tmp_%d.%s.%s' % (pid, renaming, strategy)

    candidates = []

    #     if True:
    try:

        # Strip comments, replace literals, etc
        try:
            prepro = Preprocessor(os.path.join(corpus_root, js_file_path))
            prepro.write_temp_file(temp_files['path_tmp'])
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Preprocessor fail')

        # Pass through beautifier to fix layout
        clear = Beautifier()
        ok = clear.run(temp_files['path_tmp'], temp_files['path_tmp_b'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Beautifier fail')

#         # Pass through beautifier to fix layout
#         clear = Beautifier()
#         ok = clear.run(temp_files['path_tmp'],
#                        temp_files['path_tmp_b_1'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')
#
#         jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename'])
#
#         (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp_b_1'],
#                                                 temp_files['path_tmp_b_2'])
#         if not ok:
#             cleanup(temp_files)
#             print js_file_path, _err
#             return (js_file_path, None, 'JSNice Beautifier fail')
#
#         ok = clear.run(temp_files['path_tmp_b_2'],
#                        temp_files['path_tmp_b'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')
#
#
#         # Weird JSNice renamings despite --no-rename
#         try:
#             before = set([token for (token, token_type) in
#                           Lexer(temp_files['path_tmp_b_1']).tokenList
#                           if is_token_subtype(token_type, Token.Name)])
#             after = set([token for (token, token_type) in
#                           Lexer(temp_files['path_tmp_b']).tokenList
#                           if is_token_subtype(token_type, Token.Name)])
#
#             if not before == after:
#                 return (js_file_path, None, 'Weird JSNice renaming')
#
#         except:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Lexer fail')

# Minify
        ugly = Uglifier()
        ok = ugly.run(temp_files['path_tmp_b'], temp_files['path_tmp_u'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Uglifier fail')

        # Num tokens before vs after
        try:
            tok_clear = Lexer(temp_files['path_tmp_b']).tokenList
            tok_ugly = Lexer(temp_files['path_tmp_u']).tokenList
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Lexer fail')

        # For now only work with minified files that have
        # the same number of tokens as the originals
        if not len(tok_clear) == len(tok_ugly):
            cleanup(temp_files)
            return (js_file_path, None, 'Num tokens mismatch')

        # Align minified and clear files, in case the beautifier
        # did something weird
        try:
            aligner = Aligner()
            # This is already the baseline corpus, no (smart) renaming yet
            aligner.align(temp_files['path_tmp_b'], temp_files['path_tmp_u'])
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'Aligner fail')



        if open(temp_files['path_tmp_b']).read() == \
                open(temp_files['path_tmp_u']).read():
            cleanup(temp_files)
            return (js_file_path, None, 'Not minified')

        try:
            lex_ugly = Lexer(temp_files['path_tmp_u_a'])
            iBuilder_ugly = IndexBuilder(lex_ugly.tokenList)
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'IndexBuilder fail')

        ############################################################
        # From now on only work with path_tmp_b_a and path_tmp_u_a
        ############################################################

        # Store original and uglified versions
        ok = clear.run(temp_files['path_tmp_b_a'], temp_files['path_orig'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Beautifier fail')

        ok = clear.run(temp_files['path_tmp_u_a'], temp_files['path_ugly'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Beautifier fail')

        # Run the JSNice from http://www.nice2predict.org
        unuglifyJS = UnuglifyJS()
        (ok, _out, _err) = unuglifyJS.run(temp_files['path_tmp_u_a'],
                                          temp_files['path_tmp_unugly'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Nice2Predict fail')

        ok = clear.run(temp_files['path_tmp_unugly'],
                       temp_files['path_unugly'])
        if not ok:
            cleanup(temp_files)
            return (js_file_path, None, 'Beautifier fail')

#         ok = clear.run(temp_files['path_tmp_unugly'],
#                        temp_files['path_tmp_unugly_1'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')
#
#         (ok, _out, _err) = jsNiceBeautifier.run(temp_files['path_tmp_unugly_1'],
#                                                 temp_files['path_tmp_unugly_2'])
#         if not ok:
#             cleanup(temp_files)
#             print js_file_path, _err
#             return (js_file_path, None, 'JSNice Beautifier fail')
#
#         ok = clear.run(temp_files['path_tmp_unugly_2'],
#                        temp_files['path_unugly'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')

        try:
            lexer = Lexer(temp_files['path_unugly'])
            iBuilder = IndexBuilder(lexer.tokenList)
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'IndexBuilder fail')

        try:
            scopeAnalyst = ScopeAnalyst(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             temp_files['path_unugly']))
            nameOrigin = scopeAnalyst.nameOrigin
            isGlobal = scopeAnalyst.isGlobal

            for (name, def_scope) in nameOrigin.iterkeys():

                pos = scopeAnalyst.nameDefScope2pos[(name, def_scope)]
                (lin, col) = iBuilder.revFlatMat[pos]
                (tok_lin, tok_col) = iBuilder.revTokMap[(lin, col)]

                candidates.append(('Nice2Predict', def_scope, tok_lin, tok_col,
                                   isGlobal.get((name, pos),
                                                True), name, '', ''))
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'ScopeAnalyst fail')

#         # Run the JSNice from http://www.jsnice.org
#         jsNice = JSNice()
#         (ok, _out, _err) = jsNice.run(temp_files['path_tmp_u_a'],
#                                       temp_files['path_tmp_jsnice'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'JSNice fail')
#
#         ok = clear.run(temp_files['path_tmp_jsnice'],
#                        temp_files['path_jsnice'])
#         if not ok:
#             cleanup(temp_files)
#             return (js_file_path, None, 'Beautifier fail')
#
#         try:
#             lexer = Lexer(temp_files['path_jsnice'])
#             iBuilder = IndexBuilder(lexer.tokenList)
#         except:
#             cleanup(temp_files)
#             return (js_file_path, None, 'IndexBuilder fail')
#
#         try:
#             scopeAnalyst = ScopeAnalyst(os.path.join(
#                                  os.path.dirname(os.path.realpath(__file__)),
#                                  temp_files['path_jsnice']))
#             nameOrigin = scopeAnalyst.nameOrigin
#             isGlobal = scopeAnalyst.isGlobal
#
#             for (name, def_scope) in nameOrigin.iterkeys():
#
#                 pos = scopeAnalyst.nameDefScope2pos[(name, def_scope)]
#                 (lin,col) = iBuilder.revFlatMat[pos]
#                 (tok_lin,tok_col) = iBuilder.revTokMap[(lin,col)]
#
#                 candidates.append(('JSNice', def_scope,
#                                    tok_lin, tok_col,
#                                    isGlobal.get((name, pos), True),
#                                    name, '',''))
#         except:
#             cleanup(temp_files)
#             return (js_file_path, None, 'ScopeAnalyst fail')

# Compute scoping: name2scope is a dictionary where keys
# are (name, start_index) tuples and values are scope identifiers.
# Note: start_index is a flat (unidimensional) index,
# not a (line_chr_idx, col_chr_idx) index.
        try:
            scopeAnalyst = ScopeAnalyst(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             temp_files['path_tmp_u_a']))
        except:
            cleanup(temp_files)
            return (js_file_path, None, 'ScopeAnalyst fail')

        # Baseline translation: No renaming, no scoping
        no_renaming = []
        for _line_idx, line in enumerate(iBuilder_ugly.tokens):
            no_renaming.append(' '.join([t for (_tt, t) in line]) + "\n")

        with open(temp_files['f2'], 'w') as f_no_renaming:
            f_no_renaming.writelines(no_renaming)

        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.no_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation_no_renaming,
         _err) = moses.run(temp_files['f2'])

        nc = processTranslationUnscoped(translation_no_renaming, iBuilder_ugly,
                                        lm_path, temp_files['f2'], output_path,
                                        base_name)
        if nc:
            candidates += nc

#  translation, iBuilder, lm_path,
#                                f_path, output_path, base_name
# Default translation: No renaming
#         no_renaming = []
#         for _line_idx, line in enumerate(iBuilder_ugly.tokens):
#             no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n")
#
#         with open(temp_files['f2'], 'w') as f_no_renaming:
#             f_no_renaming.writelines(no_renaming)
#
#         moses = MosesDecoder(ini_path=os.path.join(ini_path, \
#                            'train.no_renaming', 'tuning', 'moses.ini'))
#         (_moses_ok, translation, _err) = moses.run(temp_files['f2'])

        nc = processTranslationScoped(translation_no_renaming, iBuilder_ugly,
                                      scopeAnalyst, lm_path, temp_files['f2'],
                                      output_path, base_name)
        if nc:
            candidates += nc

        # More complicated renaming: collect the context around
        # each name (global variables, API calls, punctuation)
        # and build a hash of the concatenation.
        hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst,
                                                       iBuilder_ugly,
                                                       twoLines=False,
                                                       debug=False)
        with open(temp_files['f5'], 'w') as f_hash_def_one_renaming:
            f_hash_def_one_renaming.writelines(hash_def_one_renaming)

#        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
#                           'train.hash_def_one_renaming', 'tuning', 'moses.ini'))
#        (_moses_ok,
#            translation_hash_renaming,
#            _err) = moses.run(temp_files['f5'])

        mosesParams = {}
        mosesParams["text"] = hash_def_one_renaming  #lex_ugly.collapsedText
        #mosesParams["align"] = "true"
        #mosesParams["report-all-factors"] = "true"

        mresults = proxy.translate(
            mosesParams)  # __request("translate", mosesParams)
        rawText = Postprocessor(mresults["nbest"])
        translation_hash_renaming = rawText.getProcessedOutput()

        nc = processTranslationScoped(translation_hash_renaming, iBuilder_ugly,
                                      scopeAnalyst, lm_path, temp_files['f5'],
                                      output_path, base_name)
        if nc:
            candidates += nc


#        nc = processTranslationScopedFallback(translation_hash_renaming,
#                                              translation_no_renaming,
#                                              iBuilder_ugly,
#                                              scopeAnalyst,
#                                              lm_path,
#                                              temp_files['f7'],
#                                              output_path,
#                                              base_name)
#        if nc:
#            candidates += nc

        cleanup(temp_files)
        cleanupRenamed(pid)
        return (js_file_path, 'OK', candidates)

    except Exception, e:
        cleanup(temp_files)
        cleanupRenamed(pid)
        return (js_file_path, None, str(e).replace("\n", ""))

Exemplo n.º 18

0

Exibir arquivo

def processFile(l):
    
    def localCleanup(output_path, base_names):
        for base_name in base_names:
            tryRemove(os.path.join(output_path, base_name))
    
    js_file_path = l[0]
    base_name = os.path.splitext(os.path.basename(js_file_path))[0]
    
    pid = int(multiprocessing.current_process().ident)

    candidates = []
    
    try:
#     if True:
        # Temp files to be created during processing
        path_tmp = 'tmp_%d.js' % (pid)
        path_tmp_b = 'tmp_%d.b.js' % (pid)
        path_tmp_b_a = 'tmp_%d.b.a.js' % (pid)
        path_tmp_u = 'tmp_%d.u.js' % (pid)
        path_tmp_u_a = 'tmp_%d.u.a.js' % (pid)
        path_tmp_unugly = 'tmp_%d.n2p.js' % (pid)
        path_tmp_jsnice = 'tmp_%d.jsnice.js' % (pid)
        
        f2 = 'tmp_%d.no_renaming.js' % (pid)
        f3 = 'tmp_%d.basic_renaming.js' % (pid)
        f4 = 'tmp_%d.hash_renaming.js' % (pid)
        f5 = 'tmp_%d.hash_def_one_renaming.js' % (pid)
        f6 = 'tmp_%d.hash_def_two_renaming.js' % (pid)
        
        path_orig = '%s.js' % (base_name)
        path_ugly = '%s.u.js' % (base_name)
        path_unugly = '%s.n2p.js' % (base_name)
        path_jsnice = '%s.jsnice.js' % (base_name)
        
        # Strip comments, replace literals, etc
        try:
            prepro = Preprocessor(os.path.join(corpus_root, js_file_path))
            prepro.write_temp_file(path_tmp)
        except:
            cleanup(pid)
            return (js_file_path, None, 'Preprocessor fail')
        
        # Pass through beautifier to fix layout
        clear = Beautifier()
        ok = clear.run(path_tmp, path_tmp_b+'.tmp1')
        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'Beautifier 1 fail')
         
        jsNiceBeautifier = JSNice(flags=['--no-types', '--no-rename'])
        
        (ok, _out, _err) = jsNiceBeautifier.run(path_tmp_b+'.tmp1', path_tmp_b+'.tmp2')
        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'JSNice Beautifier 1 fail')

        ok = clear.run(path_tmp_b+'.tmp2', path_tmp_b)
        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'Beautifier 1 fail')
         
        # Minify
        ugly = Uglifier()
        ok = ugly.run(path_tmp_b, path_tmp_u)
        
        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'Uglifier fail')
        
        # Num tokens before vs after
        try:
            tok_clear = Lexer(path_tmp_b).tokenList
            tok_ugly = Lexer(path_tmp_u).tokenList
        except:
            cleanup(pid)
            return (js_file_path, None, 'Lexer fail')
       
        # For now only work with minified files that have
        # the same number of tokens as the originals
        if not len(tok_clear) == len(tok_ugly):
            cleanup(pid)
            return (js_file_path, None, 'Num tokens mismatch')
        
        # Align minified and clear files, in case the beautifier 
        # did something weird
        try:
            aligner = Aligner()
            # This is already the baseline corpus, no (smart) renaming yet
            aligner.align(path_tmp_b, path_tmp_u)
        except:
            cleanup(pid)
            return (js_file_path, None, 'Aligner fail')
        
        try:
#             iBuilder_clear = IndexBuilder(Lexer(path_tmp_b_a).tokenList)
            iBuilder_ugly = IndexBuilder(Lexer(path_tmp_u_a).tokenList)
        except:
            cleanup(pid)
            return (js_file_path, None, 'IndexBuilder fail')
        
        
        # Store original and uglified versions
        ok = clear.run(path_tmp_u_a, os.path.join(output_path, path_ugly))
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly])
            return (js_file_path, None, 'Beautifier 2 fail')
        
        ok = clear.run(path_tmp_b_a, os.path.join(output_path, path_orig))
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig])
            return (js_file_path, None, 'Beautifier 3 fail')
        
        
        # Run the JSNice from http://www.nice2predict.org
        unuglifyJS = UnuglifyJS()
        (ok, _out, _err) = unuglifyJS.run(path_tmp_b_a, path_tmp_unugly)
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig])
            return (js_file_path, None, 'Nice2Predict fail')
        
        ok = clear.run(path_tmp_unugly, path_tmp_unugly+'.tmp1')
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, path_unugly])
            return (js_file_path, None, 'Beautifier 4 fail')
        
        (ok, _out, _err) = jsNiceBeautifier.run(path_tmp_unugly+'.tmp1', path_tmp_unugly+'.tmp2')
        if not ok:
            cleanup(pid)
            return (js_file_path, None, 'JSNice Beautifier 2 fail')
    
        ok = clear.run(path_tmp_unugly+'.tmp2', os.path.join(output_path, path_unugly))
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, path_unugly])
            return (js_file_path, None, 'Beautifier 4 fail')

        try:
            scopeAnalyst = ScopeAnalyst(os.path.join(
                                 os.path.dirname(os.path.realpath(__file__)), 
                                 path_tmp_unugly))
            nameOrigin = scopeAnalyst.nameOrigin
            for (name, def_scope) in nameOrigin.iterkeys():
                candidates.append(('Nice2Predict', def_scope, name, '', ''))
        except:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, path_unugly])
            return (js_file_path, None, 'ScopeAnalyst fail')
    
    
    
        # Run the JSNice from http://www.jsnice.org
        jsNice = JSNice()
        (ok, _out, _err) = jsNice.run(path_tmp_b_a, path_tmp_jsnice)
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, path_unugly])
            return (js_file_path, None, 'JSNice fail')

        ok = clear.run(path_tmp_jsnice, os.path.join(output_path, path_jsnice))
        if not ok:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, \
                                       path_unugly, path_jsnice])
            return (js_file_path, None, 'Beautifier 5 fail')
        
        try:
            scopeAnalyst = ScopeAnalyst(os.path.join(
                                 os.path.dirname(os.path.realpath(__file__)), 
                                 path_tmp_jsnice))
            nameOrigin = scopeAnalyst.nameOrigin
            for (name, def_scope) in nameOrigin.iterkeys():
                candidates.append(('JSNice', def_scope, name, '', ''))
        except:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, \
                                       path_unugly, path_jsnice])
            return (js_file_path, None, 'ScopeAnalyst fail')
        
        
        
        # Compute scoping: name2scope is a dictionary where keys
        # are (name, start_index) tuples and values are scope identifiers. 
        # Note: start_index is a flat (unidimensional) index, 
        # not a (line_chr_idx, col_chr_idx) index.
        try:
            scopeAnalyst = ScopeAnalyst(os.path.join(
                                 os.path.dirname(os.path.realpath(__file__)), 
                                 path_tmp_u_a))
            _name2defScope = scopeAnalyst.resolve_scope()
            _isGlobal = scopeAnalyst.isGlobal
            _name2useScope = scopeAnalyst.resolve_use_scope()
        except:
            cleanup(pid)
            localCleanup(output_path, [path_ugly, path_orig, \
                                       path_unugly, path_jsnice])
            return (js_file_path, None, 'ScopeAnalyst fail')
        
        
        no_renaming = []
        for _line_idx, line in enumerate(iBuilder_ugly.tokens):
            no_renaming.append(' '.join([t for (_tt,t) in line]) + "\n")
        
        with open(f2, 'w') as f_no_renaming:
            f_no_renaming.writelines(no_renaming)
        
        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.no_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation, _err) = moses.run(f2)

        nc = processTranslation(translation, iBuilder_ugly, 
                       scopeAnalyst, lm_path, f2,
                       output_path, base_name, clear)
        if nc:
            candidates += nc
        
        
        # Simple renaming: disambiguate overloaded names using scope id
        basic_renaming = renameUsingScopeId(scopeAnalyst, iBuilder_ugly)
        with open(f3, 'w') as f_basic_renaming:
            f_basic_renaming.writelines(basic_renaming)
        
        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.basic_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation, _err) = moses.run(f3)
        nc = processTranslation(translation, iBuilder_ugly, 
                       scopeAnalyst, lm_path, f3,
                       output_path, base_name, clear)
        if nc:
            candidates += nc
            
        
        # More complicated renaming: collect the context around  
        # each name (global variables, API calls, punctuation)
        # and build a hash of the concatenation.
        hash_renaming = renameUsingHashAllPrec(scopeAnalyst, 
                                               iBuilder_ugly,
                                               debug=False)
#         print hash_renaming
        with open(f4, 'w') as f_hash_renaming:
            f_hash_renaming.writelines(hash_renaming)
        
        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.hash_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation, _err) = moses.run(f4)
        
        nc = processTranslation(translation, iBuilder_ugly, 
                       scopeAnalyst, lm_path, f4,
                       output_path, base_name, clear)
        if nc:
            candidates += nc
        
        hash_def_one_renaming = renameUsingHashDefLine(scopeAnalyst, 
                                                   iBuilder_ugly, 
                                                   twoLines=False,
                                                   debug=False)
        with open(f5, 'w') as f_hash_def_one_renaming:
            f_hash_def_one_renaming.writelines(hash_def_one_renaming)

        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.hash_def_one_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation, _err) = moses.run(f5)
        
        nc = processTranslation(translation, iBuilder_ugly, 
                       scopeAnalyst, lm_path, f5,
                       output_path, base_name, clear)
        if nc:
            candidates += nc
            

        hash_def_two_renaming = renameUsingHashDefLine(scopeAnalyst, 
                                                   iBuilder_ugly, 
                                                   twoLines=True,
                                                   debug=False)
        with open(f6, 'w') as f_hash_def_two_renaming: 
            f_hash_def_two_renaming.writelines(hash_def_two_renaming)
        
        moses = MosesDecoder(ini_path=os.path.join(ini_path, \
                           'train.hash_def_two_renaming', 'tuning', 'moses.ini'))
        (_moses_ok, translation, _err) = moses.run(f6)
        
        nc = processTranslation(translation, iBuilder_ugly, 
                       scopeAnalyst, lm_path, f6,
                       output_path, base_name, clear)
        if nc:
            candidates += nc
            
        
        cleanup(pid)
        cleanupRenamed(pid)
        return (js_file_path, 'OK', candidates)


    except Exception, e:
        cleanup(pid)
        cleanupRenamed(pid)
        return (js_file_path, None, str(e).replace("\n", ""))

Exemplo n.º 19

0

Exibir arquivo

    def testFiles(self):
        #TODO: Automated checks against the files.
        #Known bugs:  The definitions of sum and numberEquals in test_file1 seem to be pointing to the wrong instance...
        i = 1
        for nextFile in self.fileList:
            print(nextFile)
            lexed = Lexer(nextFile)
            ib = IndexBuilder(lexed.tokenList)
            sa = ScopeAnalyst(nextFile)
            s_min = ScopeAnalyst(
                os.path.join(self.testDir.path, "test_file1.obs.js"))
            #print(s_min.name2defScope)
            #print("TokenList----------------------------------------------------------------")
            #print(lexed.tokenList)
            #print("Index Builder----------------------------------------------------------------")
            #print(ib)
            #print("Scope Analyst----------------------------------------------------------------")
            #print(sa)
            vm = VariableMetrics(sa, ib, lexed.tokenList)
            #print("VM----------------------------------------------------------------")
            #print(vm)
            #print("VM----------------------------------------------------------------")
            for var in vm.getVariables():
                print(var)
                print(
                    "Num Lines,Max Lines,Global Def,Global Usage,For,While,Literal Def,Literal Usage,Max Length Line,Ave Line Length"
                )
                print vm.getNameMetrics(var)

            #Automated tests:
            csv_file = os.path.join(self.testDir.path,
                                    "test_file" + str(i) + ".csv")
            print(csv_file)
            if (os.path.exists(csv_file)):
                with open(csv_file, 'r') as f:
                    csv_reader = csv.reader(f, delimiter=",")
                    #Skip header
                    next(csv_reader, None)
                    for row in csv_reader:
                        key = (row[0], row[1])
                        print(key)
                        (num_lines, max_lines, external_def, external_use,
                         in_for, in_while, literal_def, literal_use,
                         max_length_line,
                         ave_line_length) = vm.getNameMetrics(key)
                        self.assertTrue(num_lines == int(row[2]))
                        self.assertTrue(max_lines == int(row[3]))
                        self.assertTrue(external_def == self.asBool(row[4]))
                        self.assertTrue(external_use == int(row[5]))
                        self.assertTrue(in_for == int(row[6]))
                        self.assertTrue(in_while == int(row[7]))
                        self.assertTrue(literal_def == self.asBool(row[8]))
                        self.assertTrue(literal_use == int(row[9]))
                        self.assertTrue(max_length_line == int(row[10]))
                        self.assertAlmostEqual(ave_line_length,
                                               float(row[11]),
                                               places=3)

            else:
                print("no manually annotated csv file for: " + nextFile)

            break