def processFile(l):

    js_file_path = l[0]
    base_name = os.path.splitext(os.path.basename(js_file_path))[0]

    temp_files = {
        'orig': '%s.js' % base_name,
        'minified': '%s.u.js' % base_name,
        'n2p': '%s.n2p.js' % base_name

    for r_strategy in RS.all():
        temp_files['%s' % (r_strategy)] = \
                    '%s.%s.js' % (base_name, r_strategy)

        for c_strategy in CS.all():
            temp_files['%s_%s' % (r_strategy, c_strategy)] = \
                    '%s.%s.%s.js' % (base_name, r_strategy, c_strategy)

    for k, v in temp_files.iteritems():
        temp_files[k] = os.path.join(output_path, v)

    candidates = []
    #Minified Name -> Original Name (name, def_scope) -> (name, def_scope)
    min_name_map = {}
    #Hashed Name -> Minified Name (name, def_scope) -> (name, def_scope)
    hash_name_map = {}
    #Minified Name -> jsnice name  (name, def_scope) -> (name, def_scope)
    jsnice_name_map = {}
    #Output Lines for the suggestoin_model.csv
    model_rows = []

        js_text = open(os.path.join(corpus_root, js_file_path), 'r').read()

        # Strip comments, replace literals, etc
            prepro = WebLMPreprocessor(js_text)
            prepro_text = str(prepro)
            return (js_file_path, None, 'Preprocessor fail')

        # Pass through beautifier to fix layout
        clear = Beautifier()
        (ok, tmp_beautified_text, _err) = clear.web_run(prepro_text)
        if not ok:
            return (js_file_path, None, 'Beautifier fail')

        # Minify
        ugly = Uglifier()
        (ok, tmp_minified_text, _err) = ugly.web_run(tmp_beautified_text)
        if not ok:
            return (js_file_path, None, 'Uglifier fail')

        # Align minified and clear files, in case the beautifier
        # did something weird
            aligner = Aligner()
            (aligned_clear, aligned_minified) = aligner.web_align(
            return (js_file_path, None, 'Aligner fail')

        # Pass through beautifier to fix layout
        (ok, beautified_text, _err) = clear.web_run(aligned_clear)
        if not ok:
            return (js_file_path, None, 'Beautifier fail')
        (ok, minified_text, _err) = clear.web_run(aligned_minified)
        if not ok:
            return (js_file_path, None, 'Beautifier fail')

        # Num tokens before vs after
            lex_clear = WebLexer(beautified_text)
            tok_clear = lex_clear.tokenList

            lex_ugly = WebLexer(minified_text)
            tok_ugly = lex_ugly.tokenList
            return (js_file_path, None, 'Lexer fail')

        # For now only work with minified files that have
        # the same number of tokens as the originals
        if not len(tok_clear) == len(tok_ugly):
            return (js_file_path, None, 'Num tokens mismatch')

        if beautified_text == minified_text:
            return (js_file_path, None, 'Not minified')

        #    iBuilder_clear = IndexBuilder(lex_clear.tokenList)
        #    return (js_file_path, None, "IndexBuilder fail on original file.")

            iBuilder_ugly = IndexBuilder(lex_ugly.tokenList)
            return (js_file_path, None, 'IndexBuilder fail')

        with open(temp_files['orig'], 'w') as f:

        with open(temp_files['minified'], 'w') as f:

#         try:
#             orig_lexer = WebLexer(beautified_text)
#             orig_iBuilder = IndexBuilder(orig_lexer.tokenList)
#             orig_scopeAnalyst = WebScopeAnalyst(beautified_text)
#         except:
#             return (js_file_path, None, 'IndexBuilder/Scoper fail on original')

#     Nice2Predict

# BV: Next block left out until I figure out the pipe issue
# BV: Update: I couldn't pipe input to N2P. TODO: FIX
# Run the JSNice from http://www.nice2predict.org
        unuglifyJS = UnuglifyJS()
        (ok, n2p_text, _err) = unuglifyJS.run(temp_files['minified'])
        if not ok:
            return (js_file_path, None, 'Nice2Predict fail')

        (ok, n2p_text_beautified, _err) = clear.web_run(n2p_text)
        if not ok:
            return (js_file_path, None, 'Beautifier fail')

        with open(temp_files['n2p'], 'w') as f:

            n2p_lexer = WebLexer(n2p_text_beautified)
            n2p_iBuilder = IndexBuilder(n2p_lexer.tokenList)
            n2p_scopeAnalyst = WebScopeAnalyst(n2p_text_beautified)
            return (js_file_path, None, 'IndexBuilder / ScopeAnalyst fail')

        # Save some translation stats to compare different methods
        ts = TranslationSummarizer()
        candidates += [['n2p', ''] + x for x in ts.compute_summary_unscoped(
            n2p_iBuilder, n2p_scopeAnalyst)]

        # All other JSNaughty variants

            scopeAnalyst = WebScopeAnalyst(minified_text)
            return (js_file_path, None, 'ScopeAnalyst minified fail')

            scopeAnalyst_clear = WebScopeAnalyst(beautified_text)
            return (js_file_path, None, 'ScopeAnalyst clear fail')

        #if(not check(iBuilder_clear, scopeAnalyst_clear, n2p_iBuilder, n2p_scopeAnalyst)):
        #    return (js_file_path, None, 'JsNice restructured file. Skipping..')

        #Map the original names to the minified counterparts.
        orderedVarsNew = sorted(scopeAnalyst.name2defScope.keys(),
                                key=lambda x: x[1])
        orderedVarsOld = sorted(scopeAnalyst_clear.name2defScope.keys(),
                                key=lambda x: x[1])
        orderedVarsN2p = sorted(n2p_scopeAnalyst.name2defScope.keys(),
                                key=lambda x: x[1])

        if (len(orderedVarsOld) != len(orderedVarsNew)):
            return (js_file_path, None,
                    "Old and New Name lists different length")

        if (len(orderedVarsOld) != len(orderedVarsN2p)):
            return (js_file_path, None,
                    "JsNice and Old Name lists different length")

        for i in range(0, len(orderedVarsOld)):
            name_old = orderedVarsOld[i][0]
            def_scope_old = scopeAnalyst_clear.name2defScope[orderedVarsOld[i]]

            name_new = orderedVarsNew[i][0]
            def_scope_new = scopeAnalyst.name2defScope[orderedVarsNew[i]]
            min_name_map[(name_new, def_scope_new)] = (name_old, def_scope_old)

            name_n2p = orderedVarsN2p[i][0]
            def_scope_n2p = scopeAnalyst.name2defScope[orderedVarsNew[i]]
            jsnice_name_map[(name_new, def_scope_new)] = (name_n2p,

        #Once we have the scopeAnalyst, iBuilder, and tokenlist for the minified
        #version, we can get the name properties
#        vm = VariableMetrics(scopeAnalyst, iBuilder_ugly, lex_ugly.tokenList)
#        variableKeySet = vm.getVariables()
#        for variableKey in variableKeySet:
#            name_features[variableKey] = vm.getNameMetrics(variableKey)

        (_name_positions, \
         _use_scopes) = prepHelpers(iBuilder_ugly, scopeAnalyst)

        # Try different renaming strategies (hash, etc)
        for r_strategy, proxy in proxies:

            # Rename input prior to translation
            preRen = PreRenamer()
            after_text = preRen.rename(r_strategy, iBuilder_ugly, scopeAnalyst)

            (ok, beautified_after_text, _err) = clear.web_run(after_text)
            if not ok:
                return (js_file_path, None, 'Beautifier fail')

            # Save renamed input to disk for future inspection
            with open(temp_files['%s' % (r_strategy)], 'w') as f:

            a_lexer = WebLexer(beautified_after_text)
            a_iBuilder = IndexBuilder(a_lexer.tokenList)
            a_scopeAnalyst = WebScopeAnalyst(beautified_after_text)

            if (r_strategy == RS.HASH_ONE or r_strategy == RS.HASH_TWO):
                #                 try:
                #                     scopeAnalyst_hash = WebScopeAnalyst(beautified_after_text) #This should be beautified_after_text instead of after_text
                #                 except:
                #                     return (js_file_path, None, "ScopeAnalyst hash fail")

                #Map the hashed names to the minified counterparts.
                orderedVarsMin = sorted(scopeAnalyst.name2defScope.keys(),
                                        key=lambda x: x[1])
                orderedVarsHash = sorted(a_scopeAnalyst.name2defScope.keys(),
                                         key=lambda x: x[1])

                if (len(orderedVarsMin) != len(orderedVarsHash)):
                    return (js_file_path, None,
                            "Hash and Min lists different length")

                for i in range(0, len(orderedVarsHash)):
                    name_hash = orderedVarsHash[i][0]
                    def_scope_hash = a_scopeAnalyst.name2defScope[

                    name_min = orderedVarsMin[i][0]
                    def_scope_min = scopeAnalyst.name2defScope[
                                   def_scope_hash)] = (name_min, def_scope_min)

            # We can switch this back once we train models on a corpus with literals
            # lx = WebLexer(a_iBuilder.get_text())
            lx = WebLexer(a_iBuilder.get_text_wo_literals())

            # Translate renamed input
            md = WebMosesDecoder(proxy)
            (ok, translation, _err) = md.run(lx.collapsedText)
            if not ok:
                return (js_file_path, None, 'Moses translation fail')

            (a_name_positions, a_position_names,
             a_use_scopes) = prepHelpers(a_iBuilder, a_scopeAnalyst)

            nc = []

            if translation is not None:
                # Parse moses output
                mp = MosesParser()

                name_candidates = mp.parse(translation, a_iBuilder,
                # name_candidates is a dictionary of dictionaries:
                # keys are (name, def_scope) tuples;
                # values are suggested translations with the sets
                # of line numbers on which they appear.

                # Update name_candidates with some default values
                # (in this case the translation without any renaming)
                # if the translation is empty
                if r_strategy == RS.NONE:
                    # RS.NONE should always be first, by construction
                    name_candidates_default = name_candidates
                    scopeAnalyst_default = a_scopeAnalyst
                    iBuilder_default = a_iBuilder
                    for key_default, suggestions in name_candidates_default.iteritems(
                        #                         (name_default, def_scope_default) = key_default

                        pos_default = scopeAnalyst_default.nameDefScope2pos[
                        (lin, col) = iBuilder_default.revFlatMat[pos_default]
                         line_idx) = iBuilder_default.revTokMap[(lin, col)]

                         def_scope) = a_position_names[line_num][line_idx]
                        key = (name, def_scope)

                        for name_translation, lines in suggestions.iteritems():
                            name_candidates.setdefault(key, {})
                                name_translation, set([]))

                # **** BV: This might be all we need to combine Naughty & Nice
                name_candidates_copy = deepcopy(name_candidates)
                for key, suggestions in name_candidates_copy.iteritems():

                    if r_strategy == RS.NONE:
                        (name_n2p, def_scope_n2p) = jsnice_name_map[key]
                         def_scope_n2p) = jsnice_name_map[hash_name_map.get(
                             key, key)]

                    for name_translation, lines in suggestions.iteritems():
                        name_candidates.setdefault(key, {})
                        name_candidates[key].setdefault(name_n2p, set([]))

                cc = ConsistencyController(debug_mode=False)
                ts = TranslationSummarizer()

                # An identifier may have been translated inconsistently
                # across different lines (Moses treats each line independently).
                # Try different strategies to resolve inconsistencies, if any
                for c_strategy in CS.all():

                    # Compute renaming map (x -> length, y -> width, ...)
                    # Note that x,y here are names after (hash) renaming
                    (temp_renaming_map, seen) = cc.computeRenaming(
                        c_strategy, name_candidates, a_name_positions,
                        a_use_scopes, a_iBuilder, lm_path, {}, hash_name_map)

                    # After computeRenaming, we have both the entropies stored
                    # if we are in LMDrop strategy and have the suggestions
                    # frequency from name_candidates.  Fill in suggestion_Features
                    #                    if(c_strategy == CS.LMDROP and r_strategy not in suggestion_features):
                    #                        assert(cc.suggestion_cache != None)
                    #                        suggestion_features[r_strategy] = {}
                    #                        """
                    #                        name_candidates: dict
                    #                            name_candidates[(name, def_scope)][name_translation]
                    #                            = set of line numbers in the translation
                    #                        """
                    #                        for variableKey, suggestionDictionary in name_candidates.iteritems():
                    #                            for suggestionName, linesSuggested in suggestionDictionary.iteritems():
                    #                                # I need to revert variableKey[0] in the suggestion from its hash to its original minified name.
                    #                                if(r_strategy == RS.HASH_ONE or r_strategy == RS.HASH_TWO):
                    #                                    unhashedKey = hash_name_map[variableKey]
                    #                                    suggestionKey = (unhashedKey[0], unhashedKey[1], suggestionName)
                    #                                else:
                    #                                    suggestionKey = (variableKey[0], variableKey[1], suggestionName)
                    #                                entropyVals = cc.suggestion_cache.getEntropyStats(variableKey, suggestionName)
                    #                                if(entropyVals != (ENTROPY_ERR, ENTROPY_ERR, ENTROPY_ERR, ENTROPY_ERR)):
                    #                                    suggestionValue = [len(linesSuggested)] + \
                    #                                                       list(getSuggestionStats(suggestionName)) + \
                    #                                                       list(entropyVals)
                    #                                    suggestion_features[r_strategy][suggestionKey] = suggestionValue

                    # Fall back on original names in input, if
                    # no translation was suggested
                    postRen = PostRenamer()
                    renaming_map = postRen.updateRenamingMap(
                        a_name_positions, position_names, a_use_scopes,
                        temp_renaming_map, seen, r_strategy)

                    # Apply renaming map and save output for future inspection
                    renamed_text = postRen.applyRenaming(
                        a_iBuilder, a_name_positions, renaming_map)

                    (ok, beautified_renamed_text,
                     _err) = clear.web_run(renamed_text)
                    if not ok:
                        return (js_file_path, None, 'Beautifier fail')
                    with open(temp_files['%s_%s' % (r_strategy, c_strategy)],
                              'w') as f:

                    # Save some stats about which names were renamed to what
                    # This is what enables the comparison between the different
                    # methods.
                    r = [[c_strategy] + x for x in ts.compute_summary_scoped(
                        renaming_map, name_candidates, a_iBuilder,

                    if not r:
                        return (js_file_path, None, 'Compute summary failed')
                    nc += r

            if nc:
                candidates += [[r_strategy] + x for x in nc]

        #create the rows for the suggestion_model.csv

#        for r_strategy in RS.all():
#            for suggestionKey, s_feat in suggestion_features[r_strategy].iteritems():
#                variableKey = (suggestionKey[0], suggestionKey[1])
#                original_name = min_name_map[variableKey][0]
#                js_nice_name = jsnice_name_map[variableKey][0]
#                n_feat = list(name_features[variableKey])
#                #Convert the def_scope to an equivalent, but smaller, easier to read key: (line_num, token_num)
#                newKey = scopeAnalyst.nameDefScope2pos[variableKey]
#                (keyLine, keyToken) = iBuilder_ugly.revFlatMat[newKey]
#                model_rows.append([original_name, r_strategy, suggestionKey[0], keyLine, keyToken, suggestionKey[2], js_nice_name] + n_feat + s_feat)

        return (js_file_path, 'OK', candidates, model_rows)

    except Exception, e:
        return (js_file_path, None, str(e).replace("\n", ""), model_rows)
def getMosesTranslation(proxy,
    A helper function so that we can run multiple different renaming
    strategies through moses in a more modular and hopefully parallelizable
    manner.  It performs hashing/no hashing preparation of the file for
    the renaming strategy specified by r_stategy, and then calls the
    appropriate moses_server.
    proxy: A pointer to which port the appropriate moses server is listening in on
    for this particular renaming strategy.

    r_strategy: One of the renaming strategies from RenamingStrategies
    RS: A renaming strategies object.
    a_beautifier: a beautify object to make sure the renamed text is 
    cleanly formatted.
    iBuilder_ugly: Index Builder for the minified file.
    scopeAnalyst_ugly: Scope Analyst for the minified file.
    start: The starting time for the preprocessing step.  Used for performance
    debug_mode: Print debug information? (True/False - defaults to False)
    (status, error, translation, name_candidates, 
            a_iBuilder, a_scopeAnalyst, a_name_positions, 
            a_position_names, a_use_scopes, hash_name_map,
            pre_time, rn_time, m_time, post_start)
    status: Did this complete without error?  If False, then the rest of the output
    besides error will be empty/null.
    error: What is the reason for the failure?  If status is True (successful
    completion) this is "".
    translation: The raw Moses output
    name_candidates: The set of Moses suggestions for this renaming
    a_iBuilder,a_scopeAnalyst: Index Builder and Scope Analyst for this renaming
    a_name_positions, a_posistion_names, a_use_scopes: Addition tracking info
    hash_name_map: a map from the hashed names to the original minified names 
    rn_time, m_time, lex_time, post_start: The duration of the
    renaming, Moses translation steps, and lexing steps along with the start time for the
    postprocessing of the Moses output. 
    rn_start = time.time()

    #We need both the base_text and the hashed_text.
    preRen = PreRenamer()
    if (debug_mode):
    #We always need the non hashed names as a fallback.
        after_text = preRen.rename(r_strategy, iBuilder_ugly,
        return (False, "Renaming failed for " + str(r_strategy), "", {}, None,
                None, {}, {}, {}, {}, 0, 0, 0, 0)

    (ok, beautified_after_text, _err) = a_beautifier.web_run(after_text)
    if not ok:
        return (False,
                "Beautifier failed on the renamed text for " + str(r_strategy),
                "", {}, None, None, {}, {}, {}, {}, 0, 0, 0, 0)

    # Align hashed and non hashed  files, in case the beautifier
    # line wrapped the extended lines.
        aligner = Aligner()
        (aligned_after, aligned_before) = aligner.web_align(
        return (False,
                "Aligner failed on the renamed text for " + str(r_strategy),
                "", {}, None, None, {}, {}, {}, {}, 0, 0, 0, 0)

    #print("--------Aligned After-------")

    a_lexer = WebLexer(aligned_after)
    a_iBuilder = IndexBuilder(a_lexer.tokenList)
    a_scopeAnalyst = WebScopeAnalyst(aligned_after)

    hash_name_map = {}

    if (r_strategy == RS.HASH_ONE or r_strategy == RS.HASH_TWO):

        #Something below here is buggy...
        orderedVarsMin = sorted(scopeAnalyst_ugly.name2defScope.keys(),
                                key=lambda x: x[1])
        orderedVarsHash = sorted(a_scopeAnalyst.name2defScope.keys(),
                                 key=lambda x: x[1])
        #print("Min len: " + str(len(orderedVarsMin)))
        #print("Hash len: " + str(len(orderedVarsHash)))
        if (len(orderedVarsMin) != len(orderedVarsHash)):
            return (False, "Mismatch between minified and hashed names.", "",
                    {}, a_iBuilder, a_scopeAnalyst, {}, {}, {}, {}, 0, 0, 0, 0)

        for i in range(0, len(orderedVarsHash)):
            name_hash = orderedVarsHash[i][0]
            def_scope_hash = a_scopeAnalyst.name2defScope[orderedVarsHash[i]]

            name_min = orderedVarsMin[i][0]
            def_scope_min = scopeAnalyst_ugly.name2defScope[orderedVarsMin[i]]
            hash_name_map[(name_hash, def_scope_hash)] = (name_min,

    if (debug_mode):
        print("HASH NAME MAP LEN: " + str(len(hash_name_map)))

    # We can switch this back once we train models on a corpus with literals
    # lx = WebLexer(a_iBuilder.get_text())
    lx = WebLexer(a_iBuilder.get_text_wo_literals())
    #print("-----------------Moses In ----------------------")
    #line_subset = a_scopeAnalyst.getMinifiableLines(a_iBuilder)
    #line_list = sorted(list(line_subset))
    #line_map = {}
    #m_line = 0
    #for next_line in line_list:
    #    line_map[m_line] = next_line
    #    m_line += 1
    #lx = WebLexer(a_iBuilder.get_text_on_lines_wo_literals(line_subset))

    #Performance measures -> wrap up the preprocessing/ renaming
    end = time.time()
    rn_time = end - rn_start
    m_start = time.time()
    #    print("Invoking Moses.")
    #    print(lx.collapsedText)
    # Translate renamed input
    #md = WebMosesDecoder(proxy)
    #(ok, translation, _err) = md.run(lx.collapsedText)
    (ok, translation, _err) = segmentedTranslation(lx, SEGMENTED_TRANS_SIZE,
                                                   proxy, debug_mode)
    if not ok:
        return (False, "Moses server failed for " + str(r_strategy),
                translation, {}, a_iBuilder, a_scopeAnalyst, {}, {}, {},
                hash_name_map, 0, 0, 0, 0)

    m_end = time.time()
    m_time = m_end - m_start

    post_start = time.time()

    (a_name_positions, a_position_names,
     a_use_scopes) = prepHelpers(a_iBuilder, a_scopeAnalyst)

    if translation is not None:
        # Parse moses output
        mp = MosesParser()
        if (debug_mode):

        name_candidates = mp.parse(translation, a_iBuilder,
                                   a_position_names)  #,

        #A slightly modified version of parse to remap the moses
        #output lines to the correct original lines.
        #name_candidates = mp.parse_subset(translation,
        #                                  a_iBuilder,
        #                                  a_position_names,
        #                                  line_map)

    lex_time = lx.build_time + a_lexer.build_time
    return (True, "", translation, name_candidates, a_iBuilder, a_scopeAnalyst,
            a_name_positions, a_position_names, a_use_scopes, hash_name_map,
            rn_time, m_time, lex_time, post_start)