def run_on_js(filename, passes, js_engine, jcache): if isinstance(jcache, bool) and jcache: jcache = shared.JCache if jcache: shared.JCache.ensure() if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix_end = js.find('\n', suffix_start) suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. generated = set(eval(suffix[len(suffix_marker) + 1:])) # Find markers start_funcs_marker = '// EMSCRIPTEN_START_FUNCS\n' end_funcs_marker = '// EMSCRIPTEN_END_FUNCS\n' start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) assert (start_funcs >= 0) == (end_funcs >= 0) == (not not suffix) asm_registerize = 'asm' in passes and 'registerize' in passes if asm_registerize: start_asm_marker = '// EMSCRIPTEN_START_ASM\n' end_asm_marker = '// EMSCRIPTEN_END_ASM\n' start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) if not suffix and jcache: # JCache cannot be used without metadata, since it might reorder stuff, and that's dangerous since only generated can be reordered # This means jcache does not work after closure compiler runs, for example. But you won't get much benefit from jcache with closure # anyhow (since closure is likely the longest part of the build). if DEBUG: print >> sys.stderr, 'js optimizer: no metadata, so disabling jcache' jcache = False if suffix: if not asm_registerize: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals: buf = [] def process(line): if len(line) > 0 and (line.startswith( ('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): Finals.buf.append(line) return False return True js = '\n'.join(filter(process, js.split('\n'))) post = '\n'.join(Finals.buf) + '\n' + post post = end_funcs_marker + post else: # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] minifier = Minifier(js, js_engine) asm_shell_pre, asm_shell_post = minifier.minify_shell( asm_shell, 'compress' in passes).split('EMSCRIPTEN_FUNCS();') asm_shell_post = asm_shell_post.replace('});', '})') pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() #if DEBUG: print >> sys.stderr, 'minify info:', minify_info # remove suffix if no longer needed if 'last' in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find('\n', suffix_start) post = post[:suffix_start] + post[suffix_end:] else: pre = '' post = '' # Pick where to split into chunks, so that (1) they do not oom in node/uglify, and (2) we can run them in parallel # If we have metadata, we split only the generated code, and save the pre and post on the side (and do not optimize them) parts = map(lambda part: part, js.split('\n}\n')) funcs = [] for i in range(len(parts)): func = parts[i] if i < len(parts) - 1: func += '\n}\n' # last part needs no } m = func_sig.search(func) if m: ident = m.group(2) else: if suffix: continue # ignore whitespace ident = 'anon_%d' % i assert ident funcs.append((ident, func)) parts = None total_size = len(js) js = None cores = int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size, jcache.get_cachename('jsopt') if jcache else None) if jcache: # load chunks from cache where we can # TODO: ignore small chunks cached_outputs = [] def load_from_cache(chunk): keys = [chunk] shortkey = shared.JCache.get_shortkey( keys) # TODO: share shortkeys with later code out = shared.JCache.get(shortkey, keys) if out: cached_outputs.append(out) return False return True chunks = filter(load_from_cache, chunks) if len(cached_outputs) > 0: if DEBUG: print >> sys.stderr, ' loading %d jsfuncchunks from jcache' % len( cached_outputs) else: cached_outputs = [] if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(suffix_marker) if asm_registerize: f.write('\n') f.write('// MINIFY_INFO:' + minify_info) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] if len(filenames) > 0: # XXX Use '--nocrankshaft' to disable crankshaft to work around v8 bug 1895, needed for older v8/node (node 0.6.8+ should be ok) commands = map( lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, filenames) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d, using %d cores (total: %.2f MB)' % ( len(chunks), chunk_size, cores, total_size / (1024 * 1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d' % ( len(chunks), chunk_size) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] for filename in filenames: temp_files.note(filename) filename += '.jo.js' f = open(filename, 'w') f.write(pre) for out_file in filenames: f.write(open(out_file).read()) f.write('\n') if jcache: for cached in cached_outputs: f.write(cached) # TODO: preserve order f.write('\n') f.write(post) # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write('\n') f.close() if jcache: # save chunks to cache for i in range(len(chunks)): chunk = chunks[i] keys = [chunk] shortkey = shared.JCache.get_shortkey(keys) shared.JCache.set(shortkey, keys, open(filenames[i]).read()) if DEBUG and len(chunks) > 0: print >> sys.stderr, ' saving %d jsfuncchunks to jcache' % len( chunks) return filename
def run_on_js(filename, passes, js_engine, source_map=False, extra_info=None, just_split=False, just_concat=False): if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix_end = js.find('\n', suffix_start) suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs or not suffix: logging.critical('Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s, suffix_start: %s' % (start_funcs, end_funcs, suffix_start)) sys.exit(1) minify_globals = 'minifyNames' in passes and 'asm' in passes if minify_globals: passes = map(lambda p: p if p != 'minifyNames' else 'minifyLocals', passes) start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = 'closure' in passes if closure: passes = filter(lambda p: p != 'closure', passes) # we will do it manually cleanup = 'cleanup' in passes if cleanup: passes = filter(lambda p: p != 'cleanup', passes) # we will do it manually split_memory = 'splitMemory' in passes if not minify_globals: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals: buf = [] def process(line): if len(line) > 0 and (line.startswith(('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): Finals.buf.append(line) return False return True js = '\n'.join(filter(process, js.split('\n'))) post = '\n'.join(Finals.buf) + '\n' + post post = end_funcs_marker + post else: # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line minifier = Minifier(js, js_engine) def check_symbol_mapping(p): if p.startswith('symbolMap='): minifier.symbols_file = p.split('=')[1] return False if p == 'profilingFuncs': minifier.profiling_funcs = True return False return True passes = filter(check_symbol_mapping, passes) asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, 'minifyWhitespace' in passes, source_map).split('EMSCRIPTEN_FUNCS();'); asm_shell_post = asm_shell_post.replace('});', '})'); pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() #if DEBUG: print >> sys.stderr, 'minify info:', minify_info # remove suffix if no longer needed if suffix and 'last' in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find('\n', suffix_start) post = post[:suffix_start] + post[suffix_end:] total_size = len(js) funcs = split_funcs(js, just_split) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = 1 if source_map else int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) if not just_split: intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) else: # keep same chunks as before chunks = map(lambda f: f[1], funcs) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks)) funcs = None if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(suffix_marker) if minify_globals: if extra_info: for key, value in extra_info.iteritems(): assert key not in minify_info or value == minify_info[key], [key, value, minify_info[key]] minify_info[key] = value f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(minify_info)) elif extra_info: f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(extra_info)) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] if len(filenames) > 0: if not use_native(passes, source_map) or not get_native_optimizer(): commands = map(lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + (['--debug'] if source_map else []) + passes, filenames) else: # use the native optimizer shared.logging.debug('js optimizer using native') assert not source_map # XXX need to use js optimizer commands = map(lambda filename: [get_native_optimizer(), filename] + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks)) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] for filename in filenames: temp_files.note(filename) if closure or cleanup or split_memory: # run on the shell code, everything but what we js-optimize start_asm = '// EMSCRIPTEN_START_ASM\n' end_asm = '// EMSCRIPTEN_END_ASM\n' cl_sep = 'wakaUnknownBefore(); var asm=wakaUnknownAfter(global,env,buffer)\n' cle = temp_files.get('.cl.js').name c = open(cle, 'w') pre_1, pre_2 = pre.split(start_asm) post_1, post_2 = post.split(end_asm) c.write(pre_1) c.write(cl_sep) c.write(post_2) c.close() cld = cle if split_memory: if DEBUG: print >> sys.stderr, 'running splitMemory on shell code' cld = run_on_chunk(js_engine + [JS_OPTIMIZER, cld, 'splitMemoryShell']) f = open(cld, 'a') f.write(suffix_marker) f.close() if closure: if DEBUG: print >> sys.stderr, 'running closure on shell code' cld = shared.Building.closure_compiler(cld, pretty='minifyWhitespace' not in passes) temp_files.note(cld) elif cleanup: if DEBUG: print >> sys.stderr, 'running cleanup on shell code' next = cld + '.cl.js' temp_files.note(next) proc = subprocess.Popen(js_engine + [JS_OPTIMIZER, cld, 'noPrintMetadata', 'JSDCE'] + (['minifyWhitespace'] if 'minifyWhitespace' in passes else []), stdout=open(next, 'w')) proc.communicate() assert proc.returncode == 0 cld = next coutput = open(cld).read() coutput = coutput.replace('wakaUnknownBefore();', start_asm) after = 'wakaUnknownAfter' start = coutput.find(after) end = coutput.find(')', start) pre = coutput[:start] + '(function(global,env,buffer) {\n' + pre_2[pre_2.find('{')+1:] post = post_1 + end_asm + coutput[end+1:] filename += '.jo.js' f = open(filename, 'w') f.write(pre); pre = None if not just_concat: # sort functions by size, to make diffing easier and to improve aot times funcses = [] for out_file in filenames: funcses.append(split_funcs(open(out_file).read(), False)) funcs = [item for sublist in funcses for item in sublist] funcses = None def sorter(x, y): diff = len(y[1]) - len(x[1]) if diff != 0: return diff if x[0] < y[0]: return 1 elif x[0] > y[0]: return -1 return 0 if not os.environ.get('EMCC_NO_OPT_SORT'): funcs.sort(sorter) if 'last' in passes and len(funcs) > 0: count = funcs[0][1].count('\n') if count > 3000: print >> sys.stderr, 'warning: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz, and/or trying OUTLINING_LIMIT to break them up (see settings.js; note that the parameter there affects AST nodes, while we measure lines here, so the two may not match up)' % (count, funcs[0][0]) for func in funcs: f.write(func[1]) funcs = None else: # just concat the outputs for out_file in filenames: f.write(open(out_file).read()) f.write('\n') f.write(post); # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write('\n') f.close() return filename
def run_on_js(filename, passes, js_engine, source_map=False, extra_info=None, just_split=False, just_concat=False): if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix_end = js.find('\n', suffix_start) suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs or not suffix: logging.critical( 'Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s, suffix_start: %s' % (start_funcs, end_funcs, suffix_start)) sys.exit(1) minify_globals = 'minifyNames' in passes and 'asm' in passes if minify_globals: passes = map(lambda p: p if p != 'minifyNames' else 'minifyLocals', passes) start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = 'closure' in passes if closure: passes = filter(lambda p: p != 'closure', passes) # we will do it manually cleanup = 'cleanup' in passes if cleanup: passes = filter(lambda p: p != 'cleanup', passes) # we will do it manually split_memory = 'splitMemory' in passes if not minify_globals: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals: buf = [] def process(line): if len(line) > 0 and (line.startswith( ('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): Finals.buf.append(line) return False return True js = '\n'.join(filter(process, js.split('\n'))) post = '\n'.join(Finals.buf) + '\n' + post post = end_funcs_marker + post else: # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line minifier = Minifier(js, js_engine) def check_symbol_mapping(p): if p.startswith('symbolMap='): minifier.symbols_file = p.split('=')[1] return False if p == 'profilingFuncs': minifier.profiling_funcs = True return False return True passes = filter(check_symbol_mapping, passes) asm_shell_pre, asm_shell_post = minifier.minify_shell( asm_shell, 'minifyWhitespace' in passes, source_map).split('EMSCRIPTEN_FUNCS();') asm_shell_post = asm_shell_post.replace('});', '})') pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() #if DEBUG: print >> sys.stderr, 'minify info:', minify_info # remove suffix if no longer needed if suffix and 'last' in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find('\n', suffix_start) post = post[:suffix_start] + post[suffix_end:] total_size = len(js) funcs = split_funcs(js, just_split) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = 1 if source_map else int( os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) if not just_split: intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) else: # keep same chunks as before chunks = map(lambda f: f[1], funcs) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len( funcs), 'actual num chunks:', len( chunks), 'chunk size range:', max(map(len, chunks)), '-', min( map(len, chunks)) funcs = None if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(suffix_marker) if minify_globals: if extra_info: for key, value in extra_info.iteritems(): assert key not in minify_info or value == minify_info[ key], [key, value, minify_info[key]] minify_info[key] = value f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(minify_info)) elif extra_info: f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(extra_info)) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] with ToolchainProfiler.profile_block('run_optimizer'): if len(filenames) > 0: if not use_native(passes, source_map) or not get_native_optimizer(): commands = map( lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + (['--debug'] if source_map else []) + passes, filenames) else: # use the native optimizer shared.logging.debug('js optimizer using native') assert not source_map # XXX need to use js optimizer commands = map( lambda filename: [get_native_optimizer(), filename] + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % ( len(chunks), cores, total_size / (1024 * 1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) try: # Shut down the pool, since otherwise processes are left alive and would only be lazily terminated, # and in other parts of the toolchain we also build up multiprocessing pools. pool.terminate() pool.join() except Exception, e: # On Windows we get occassional "Access is denied" errors when attempting to tear down the pool, ignore these. logging.debug( 'Attempting to tear down multiprocessing pool failed with an exception: ' + str(e)) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % ( len(chunks)) filenames = [run_on_chunk(command) for command in commands] else:
def run_on_js(filename, gen_hash_info=False): js_engine = shared.NODE_JS js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code equivalentfn_hash_info = None passed_in_filename = filename # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs: logging.critical( 'Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s)' % (start_funcs, end_funcs)) sys.exit(1) if not gen_hash_info: equivalentfn_hash_info = js[js.rfind('//'):] start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line asm_shell_pre, asm_shell_post = process_shell( js, js_engine, asm_shell, equivalentfn_hash_info).split('EMSCRIPTEN_FUNCS();') asm_shell_post = asm_shell_post.replace('});', '})') pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post if not gen_hash_info: # We don't need the extra info at the end post = post[:post.rfind('//')].strip() else: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] post = end_funcs_marker + post total_size = len(js) funcs = split_funcs(js, False) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len( funcs), 'actual num chunks:', len( chunks), 'chunk size range:', max(map(len, chunks)), '-', min( map(len, chunks)) funcs = None if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) if not gen_hash_info: f.write('\n') f.write(equivalentfn_hash_info) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] old_filenames = filenames[:] if len(filenames) > 0: commands = map( lambda filename: js_engine + [ DUPLICATE_FUNCTION_ELIMINATOR, filename, '--gen-hash-info' if gen_hash_info else '--use-hash-info', '--no-minimize-whitespace' ], filenames) if DEBUG and commands is not None: print >> sys.stderr, [ ' '.join(command if command is not None else '(null)') for command in commands ] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % ( len(chunks), cores, total_size / (1024 * 1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % ( len(chunks)) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] json_files = [] # We're going to be coalescing the files back at the end # Just replace the file list with the ones provided in # the command list - and save off the generated Json if gen_hash_info: json_files = filenames[:] filenames = old_filenames[:] for filename in filenames: temp_files.note(filename) filename += '.jo.js' f = open(filename, 'w') f.write(pre) pre = None # sort functions by size, to make diffing easier and to improve aot times funcses = [] for out_file in filenames: funcses.append(split_funcs(open(out_file).read(), False)) funcs = [item for sublist in funcses for item in sublist] funcses = None def sorter(x, y): diff = len(y[1]) - len(x[1]) if diff != 0: return diff if x[0] < y[0]: return 1 elif x[0] > y[0]: return -1 return 0 if not os.environ.get('EMCC_NO_OPT_SORT'): funcs.sort(sorter) for func in funcs: f.write(func[1]) funcs = None f.write('\n') f.write(post) # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write('\n') if gen_hash_info and len(json_files) > 0: write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename) f.close() return filename
def run_on_js(filename, passes, js_engine, jcache, source_map=False, extra_info=None, just_split=False, just_concat=False): if isinstance(jcache, bool) and jcache: jcache = shared.JCache if jcache: shared.JCache.ensure() if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix_end = js.find('\n', suffix_start) suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. generated = set(eval(suffix[len(suffix_marker) + 1:])) # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) know_generated = suffix or start_funcs >= 0 minify_globals = 'minifyNames' in passes and 'asm' in passes if minify_globals: passes = map(lambda p: p if p != 'minifyNames' else 'minifyLocals', passes) start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = 'closure' in passes if closure: passes = filter(lambda p: p != 'closure', passes) # we will do it manually cleanup = 'cleanup' in passes if cleanup: passes = filter(lambda p: p != 'cleanup', passes) # we will do it manually if not know_generated and jcache: # JCache cannot be used without metadata, since it might reorder stuff, and that's dangerous since only generated can be reordered # This means jcache does not work after closure compiler runs, for example. But you won't get much benefit from jcache with closure # anyhow (since closure is likely the longest part of the build). if DEBUG: print >> sys.stderr, 'js optimizer: no metadata, so disabling jcache' jcache = False if know_generated: if not minify_globals: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals: buf = [] def process(line): if len(line) > 0 and (line.startswith( ('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): Finals.buf.append(line) return False return True js = '\n'.join(filter(process, js.split('\n'))) post = '\n'.join(Finals.buf) + '\n' + post post = end_funcs_marker + post else: # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line minifier = Minifier(js, js_engine) def check_symbol_mapping(p): if p.startswith('symbolMap='): minifier.symbols_file = p.split('=')[1] return False return True passes = filter(check_symbol_mapping, passes) asm_shell_pre, asm_shell_post = minifier.minify_shell( asm_shell, 'minifyWhitespace' in passes, source_map).split('EMSCRIPTEN_FUNCS();') asm_shell_post = asm_shell_post.replace('});', '})') pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() #if DEBUG: print >> sys.stderr, 'minify info:', minify_info # remove suffix if no longer needed if suffix and 'last' in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find('\n', suffix_start) post = post[:suffix_start] + post[suffix_end:] else: pre = '' post = '' def split_funcs(js, just_split=False): if just_split: return map(lambda line: ('(json)', line), js.split('\n')) # Pick where to split into chunks, so that (1) they do not oom in node/uglify, and (2) we can run them in parallel # If we have metadata, we split only the generated code, and save the pre and post on the side (and do not optimize them) parts = map(lambda part: part, js.split('\n}\n')) funcs = [] for i in range(len(parts)): func = parts[i] if i < len(parts) - 1: func += '\n}\n' # last part needs no } m = func_sig.search(func) if m: ident = m.group(1) else: if know_generated: continue # ignore whitespace ident = 'anon_%d' % i assert ident funcs.append((ident, func)) return funcs total_size = len(js) funcs = split_funcs(js, just_split) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = 1 if source_map else int( os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) if not just_split: intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify( funcs, chunk_size, jcache.get_cachename('jsopt') if jcache else None) else: # keep same chunks as before chunks = map(lambda f: f[1], funcs) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len( funcs), 'actual num chunks:', len( chunks), 'chunk size range:', max(map(len, chunks)), '-', min( map(len, chunks)) funcs = None if jcache: # load chunks from cache where we can # TODO: ignore small chunks cached_outputs = [] def load_from_cache(chunk): keys = [chunk] shortkey = shared.JCache.get_shortkey( keys) # TODO: share shortkeys with later code out = shared.JCache.get(shortkey, keys) if out: cached_outputs.append(out) return False return True chunks = filter(load_from_cache, chunks) if len(cached_outputs) > 0: if DEBUG: print >> sys.stderr, ' loading %d jsfuncchunks from jcache' % len( cached_outputs) else: cached_outputs = [] if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(suffix_marker) if minify_globals: if extra_info: for key, value in extra_info.iteritems(): assert key not in minify_info or value == minify_info[ key], [key, value, minify_info[key]] minify_info[key] = value f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(minify_info)) elif extra_info: f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(extra_info)) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] if len(filenames) > 0: if not use_native(passes, source_map) or not get_native_optimizer(): commands = map( lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + (['--debug'] if source_map else []) + passes, filenames) else: # use the native optimizer shared.logging.debug('js optimizer using native') assert not source_map # XXX need to use js optimizer commands = map( lambda filename: [get_native_optimizer(), filename] + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % ( len(chunks), cores, total_size / (1024 * 1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % ( len(chunks)) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] for filename in filenames: temp_files.note(filename) if closure or cleanup: # run on the shell code, everything but what we js-optimize start_asm = '// EMSCRIPTEN_START_ASM\n' end_asm = '// EMSCRIPTEN_END_ASM\n' cl_sep = 'wakaUnknownBefore(); var asm=wakaUnknownAfter(global,env,buffer)\n' cle = temp_files.get('.cl.js').name c = open(cle, 'w') pre_1, pre_2 = pre.split(start_asm) post_1, post_2 = post.split(end_asm) c.write(pre_1) c.write(cl_sep) c.write(post_2) c.close() if closure: if DEBUG: print >> sys.stderr, 'running closure on shell code' cld = shared.Building.closure_compiler(cle, pretty='minifyWhitespace' not in passes) else: if DEBUG: print >> sys.stderr, 'running cleanup on shell code' cld = cle + '.js' subprocess.Popen( js_engine + [JS_OPTIMIZER, cle, 'noPrintMetadata'] + (['minifyWhitespace'] if 'minifyWhitespace' in passes else []), stdout=open(cld, 'w')).communicate() temp_files.note(cld) coutput = open(cld).read() coutput = coutput.replace('wakaUnknownBefore();', start_asm) after = 'wakaUnknownAfter' start = coutput.find(after) end = coutput.find(')', start) pre = coutput[:start] + '(function(global,env,buffer) {\n' + pre_2[ pre_2.find('{') + 1:] post = post_1 + end_asm + coutput[end + 1:] filename += '.jo.js' f = open(filename, 'w') f.write(pre) pre = None if not just_concat: # sort functions by size, to make diffing easier and to improve aot times funcses = [] for out_file in filenames: funcses.append(split_funcs(open(out_file).read())) funcs = [item for sublist in funcses for item in sublist] funcses = None def sorter(x, y): diff = len(y[1]) - len(x[1]) if diff != 0: return diff if x[0] < y[0]: return 1 elif x[0] > y[0]: return -1 return 0 funcs.sort(sorter) if 'last' in passes and len(funcs) > 0: count = funcs[0][1].count('\n') if count > 3000: print >> sys.stderr, 'warning: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz, and/or trying OUTLINING_LIMIT to break them up (see settings.js; note that the parameter there affects AST nodes, while we measure lines here, so the two may not match up)' % ( count, funcs[0][0]) for func in funcs: f.write(func[1]) funcs = None else: # just concat the outputs for out_file in filenames: f.write(open(out_file).read()) assert not jcache f.write('\n') if jcache: for cached in cached_outputs: f.write(cached) # TODO: preserve order f.write('\n') f.write(post) # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write('\n') f.close() if jcache: # save chunks to cache for i in range(len(chunks)): chunk = chunks[i] keys = [chunk] shortkey = shared.JCache.get_shortkey(keys) shared.JCache.set(shortkey, keys, open(filenames[i]).read()) if DEBUG and len(chunks) > 0: print >> sys.stderr, ' saving %d jsfuncchunks to jcache' % len( chunks) return filename
def run_on_js(filename, passes, js_engine, jcache): if isinstance(jcache, bool) and jcache: jcache = shared.JCache if jcache: shared.JCache.ensure() if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix_end = js.find('\n', suffix_start) suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. generated = set(eval(suffix[len(suffix_marker)+1:])) # Find markers start_funcs_marker = '// EMSCRIPTEN_START_FUNCS\n' end_funcs_marker = '// EMSCRIPTEN_END_FUNCS\n' start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) #assert (start_funcs >= 0) == (end_funcs >= 0) == (not not suffix) minify_globals = 'registerizeAndMinify' in passes and 'asm' in passes if minify_globals: passes = map(lambda p: p if p != 'registerizeAndMinify' else 'registerize', passes) start_asm_marker = '// EMSCRIPTEN_START_ASM\n' end_asm_marker = '// EMSCRIPTEN_END_ASM\n' start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = 'closure' in passes if closure: passes = filter(lambda p: p != 'closure', passes) # we will do it manually if not suffix and jcache: # JCache cannot be used without metadata, since it might reorder stuff, and that's dangerous since only generated can be reordered # This means jcache does not work after closure compiler runs, for example. But you won't get much benefit from jcache with closure # anyhow (since closure is likely the longest part of the build). if DEBUG: print >>sys.stderr, 'js optimizer: no metadata, so disabling jcache' jcache = False if suffix: if not minify_globals: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals: buf = [] def process(line): if len(line) > 0 and (line.startswith(('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): Finals.buf.append(line) return False return True js = '\n'.join(filter(process, js.split('\n'))) post = '\n'.join(Finals.buf) + '\n' + post post = end_funcs_marker + post else: # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] minifier = Minifier(js, js_engine) asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, 'minifyWhitespace' in passes).split('EMSCRIPTEN_FUNCS();'); asm_shell_post = asm_shell_post.replace('});', '})'); pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() #if DEBUG: print >> sys.stderr, 'minify info:', minify_info # remove suffix if no longer needed if 'last' in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find('\n', suffix_start) post = post[:suffix_start] + post[suffix_end:] else: pre = '' post = '' # Pick where to split into chunks, so that (1) they do not oom in node/uglify, and (2) we can run them in parallel # If we have metadata, we split only the generated code, and save the pre and post on the side (and do not optimize them) parts = map(lambda part: part, js.split('\n}\n')) funcs = [] for i in range(len(parts)): func = parts[i] if i < len(parts)-1: func += '\n}\n' # last part needs no } m = func_sig.search(func) if m: ident = m.group(2) else: if suffix: continue # ignore whitespace ident = 'anon_%d' % i assert ident funcs.append((ident, func)) parts = None total_size = len(js) js = None cores = int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size, jcache.get_cachename('jsopt') if jcache else None) if jcache: # load chunks from cache where we can # TODO: ignore small chunks cached_outputs = [] def load_from_cache(chunk): keys = [chunk] shortkey = shared.JCache.get_shortkey(keys) # TODO: share shortkeys with later code out = shared.JCache.get(shortkey, keys) if out: cached_outputs.append(out) return False return True chunks = filter(load_from_cache, chunks) if len(cached_outputs) > 0: if DEBUG: print >> sys.stderr, ' loading %d jsfuncchunks from jcache' % len(cached_outputs) else: cached_outputs = [] if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(suffix_marker) if minify_globals: f.write('\n') f.write('// EXTRA_INFO:' + minify_info) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] if len(filenames) > 0: # XXX Use '--nocrankshaft' to disable crankshaft to work around v8 bug 1895, needed for older v8/node (node 0.6.8+ should be ok) commands = map(lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, filenames) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d, using %d cores (total: %.2f MB)' % (len(chunks), chunk_size, cores, total_size/(1024*1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d' % (len(chunks), chunk_size) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] for filename in filenames: temp_files.note(filename) if closure: # run closure on the shell code, everything but what we js-optimize start_asm = '// EMSCRIPTEN_START_ASM\n' end_asm = '// EMSCRIPTEN_END_ASM\n' closure_sep = 'wakaUnknownBefore(); var asm=wakaUnknownAfter(global,env,buffer)\n' closuree = temp_files.get('.closure.js').name c = open(closuree, 'w') pre_1, pre_2 = pre.split(start_asm) post_1, post_2 = post.split(end_asm) c.write(pre_1) c.write(closure_sep) c.write(post_2) c.close() closured = shared.Building.closure_compiler(closuree, pretty='minifyWhitespace' not in passes) temp_files.note(closured) coutput = open(closured).read() coutput = coutput.replace('wakaUnknownBefore();', '') after = 'wakaUnknownAfter' start = coutput.find(after) end = coutput.find(')', start) pre = coutput[:start] + '(function(global,env,buffer) {\n' + start_asm + pre_2[pre_2.find('{')+1:] post = post_1[:post_1.rfind('}')] + '\n' + end_asm + '\n})' + coutput[end+1:] filename += '.jo.js' f = open(filename, 'w') f.write(pre); for out_file in filenames: f.write(open(out_file).read()) f.write('\n') if jcache: for cached in cached_outputs: f.write(cached); # TODO: preserve order f.write('\n') f.write(post); # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write('\n') f.close() if jcache: # save chunks to cache for i in range(len(chunks)): chunk = chunks[i] keys = [chunk] shortkey = shared.JCache.get_shortkey(keys) shared.JCache.set(shortkey, keys, open(filenames[i]).read()) if DEBUG and len(chunks) > 0: print >> sys.stderr, ' saving %d jsfuncchunks to jcache' % len(chunks) return filename
def run_on_js(filename, passes, js_engine, source_map=False, extra_info=None, just_split=False, just_concat=False): if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != "\n": js = js.replace(os.linesep, "\n") # we assume \n in the splitting code # Find suffix suffix_marker = "// EMSCRIPTEN_GENERATED_FUNCTIONS" suffix_start = js.find(suffix_marker) suffix = "" if suffix_start >= 0: suffix_end = js.find("\n", suffix_start) suffix = js[suffix_start:suffix_end] + "\n" # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. generated = set(eval(suffix[len(suffix_marker) + 1 :])) # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) know_generated = suffix or start_funcs >= 0 minify_globals = "minifyNames" in passes and "asm" in passes if minify_globals: passes = map(lambda p: p if p != "minifyNames" else "minifyLocals", passes) start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = "closure" in passes if closure: passes = filter(lambda p: p != "closure", passes) # we will do it manually cleanup = "cleanup" in passes if cleanup: passes = filter(lambda p: p != "cleanup", passes) # we will do it manually if know_generated: if not minify_globals: pre = js[: start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker) :] js = js[start_funcs + len(start_funcs_marker) : end_funcs] if "asm" not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals: buf = [] def process(line): if len(line) > 0 and ( line.startswith(("Module[", "if (globalScope)")) or line.endswith('["X"]=1;') ): Finals.buf.append(line) return False return True js = "\n".join(filter(process, js.split("\n"))) post = "\n".join(Finals.buf) + "\n" + post post = end_funcs_marker + post else: # We need to split out the asm shell as well, for minification pre = js[: start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = ( js[start_asm + len(start_asm_marker) : start_funcs + len(start_funcs_marker)] + """ EMSCRIPTEN_FUNCS(); """ + js[end_funcs + len(end_funcs_marker) : end_asm + len(end_asm_marker)] ) js = js[start_funcs + len(start_funcs_marker) : end_funcs] # we assume there is a maximum of one new name per line minifier = Minifier(js, js_engine) def check_symbol_mapping(p): if p.startswith("symbolMap="): minifier.symbols_file = p.split("=")[1] return False if p == "profilingFuncs": minifier.profiling_funcs = True return False return True passes = filter(check_symbol_mapping, passes) asm_shell_pre, asm_shell_post = minifier.minify_shell( asm_shell, "minifyWhitespace" in passes, source_map ).split("EMSCRIPTEN_FUNCS();") asm_shell_post = asm_shell_post.replace("});", "})") pre += asm_shell_pre + "\n" + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() # if DEBUG: print >> sys.stderr, 'minify info:', minify_info # remove suffix if no longer needed if suffix and "last" in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find("\n", suffix_start) post = post[:suffix_start] + post[suffix_end:] else: pre = "" post = "" total_size = len(js) funcs = split_funcs(js, just_split, know_generated) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = 1 if source_map else int(os.environ.get("EMCC_CORES") or multiprocessing.cpu_count()) if not just_split: intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) else: # keep same chunks as before chunks = map(lambda f: f[1], funcs) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >>sys.stderr, "chunkification: num funcs:", len(funcs), "actual num chunks:", len( chunks ), "chunk size range:", max(map(len, chunks)), "-", min(map(len, chunks)) funcs = None if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get(".jsfunc_%d.js" % i).name f = open(temp_file, "w") f.write(chunk) f.write(suffix_marker) if minify_globals: if extra_info: for key, value in extra_info.iteritems(): assert key not in minify_info or value == minify_info[key], [key, value, minify_info[key]] minify_info[key] = value f.write("\n") f.write("// EXTRA_INFO:" + json.dumps(minify_info)) elif extra_info: f.write("\n") f.write("// EXTRA_INFO:" + json.dumps(extra_info)) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] if len(filenames) > 0: if not use_native(passes, source_map) or not get_native_optimizer(): commands = map( lambda filename: js_engine + [JS_OPTIMIZER, filename, "noPrintMetadata"] + (["--debug"] if source_map else []) + passes, filenames, ) else: # use the native optimizer shared.logging.debug("js optimizer using native") assert not source_map # XXX need to use js optimizer commands = map(lambda filename: [get_native_optimizer(), filename] + passes, filenames) # print [' '.join(command) for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >>sys.stderr, "splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)" % ( len(chunks), cores, total_size / (1024 * 1024.0), ) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >>sys.stderr, "splitting up js optimization into %d chunks" % (len(chunks)) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] for filename in filenames: temp_files.note(filename) if closure or cleanup: # run on the shell code, everything but what we js-optimize start_asm = "// EMSCRIPTEN_START_ASM\n" end_asm = "// EMSCRIPTEN_END_ASM\n" cl_sep = "wakaUnknownBefore(); var asm=wakaUnknownAfter(global,env,buffer)\n" cle = temp_files.get(".cl.js").name c = open(cle, "w") pre_1, pre_2 = pre.split(start_asm) post_1, post_2 = post.split(end_asm) c.write(pre_1) c.write(cl_sep) c.write(post_2) c.close() if closure: if DEBUG: print >>sys.stderr, "running closure on shell code" cld = shared.Building.closure_compiler(cle, pretty="minifyWhitespace" not in passes) else: if DEBUG: print >>sys.stderr, "running cleanup on shell code" cld = cle + ".js" subprocess.Popen( js_engine + [JS_OPTIMIZER, cle, "noPrintMetadata"] + (["minifyWhitespace"] if "minifyWhitespace" in passes else []), stdout=open(cld, "w"), ).communicate() temp_files.note(cld) coutput = open(cld).read() coutput = coutput.replace("wakaUnknownBefore();", start_asm) after = "wakaUnknownAfter" start = coutput.find(after) end = coutput.find(")", start) pre = coutput[:start] + "(function(global,env,buffer) {\n" + pre_2[pre_2.find("{") + 1 :] post = post_1 + end_asm + coutput[end + 1 :] filename += ".jo.js" f = open(filename, "w") f.write(pre) pre = None if not just_concat: # sort functions by size, to make diffing easier and to improve aot times funcses = [] for out_file in filenames: funcses.append(split_funcs(open(out_file).read(), False, know_generated)) funcs = [item for sublist in funcses for item in sublist] funcses = None def sorter(x, y): diff = len(y[1]) - len(x[1]) if diff != 0: return diff if x[0] < y[0]: return 1 elif x[0] > y[0]: return -1 return 0 if not os.environ.get("EMCC_NO_OPT_SORT"): funcs.sort(sorter) if "last" in passes and len(funcs) > 0: count = funcs[0][1].count("\n") if count > 3000: print >>sys.stderr, "warning: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz, and/or trying OUTLINING_LIMIT to break them up (see settings.js; note that the parameter there affects AST nodes, while we measure lines here, so the two may not match up)" % ( count, funcs[0][0], ) for func in funcs: f.write(func[1]) funcs = None else: # just concat the outputs for out_file in filenames: f.write(open(out_file).read()) f.write("\n") f.write(post) # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write("\n") f.close() return filename
def run_on_js(filename, passes, js_engine, jcache, source_map=False, extra_info=None, just_split=False, just_concat=False): if isinstance(jcache, bool) and jcache: jcache = shared.JCache if jcache: shared.JCache.ensure() if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix_end = js.find('\n', suffix_start) suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. generated = set(eval(suffix[len(suffix_marker)+1:])) # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) know_generated = suffix or start_funcs >= 0 minify_globals = 'minifyNames' in passes and 'asm' in passes if minify_globals: passes = map(lambda p: p if p != 'minifyNames' else 'minifyLocals', passes) start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = 'closure' in passes if closure: passes = filter(lambda p: p != 'closure', passes) # we will do it manually cleanup = 'cleanup' in passes if cleanup: passes = filter(lambda p: p != 'cleanup', passes) # we will do it manually if not know_generated and jcache: # JCache cannot be used without metadata, since it might reorder stuff, and that's dangerous since only generated can be reordered # This means jcache does not work after closure compiler runs, for example. But you won't get much benefit from jcache with closure # anyhow (since closure is likely the longest part of the build). if DEBUG: print >>sys.stderr, 'js optimizer: no metadata, so disabling jcache' jcache = False if know_generated: if not minify_globals: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals: buf = [] def process(line): if len(line) > 0 and (line.startswith(('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): Finals.buf.append(line) return False return True js = '\n'.join(filter(process, js.split('\n'))) post = '\n'.join(Finals.buf) + '\n' + post post = end_funcs_marker + post else: # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line minifier = Minifier(js, js_engine) def check_symbol_mapping(p): if p.startswith('symbolMap='): minifier.symbols_file = p.split('=')[1] return False return True passes = filter(check_symbol_mapping, passes) asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, 'minifyWhitespace' in passes, source_map).split('EMSCRIPTEN_FUNCS();'); asm_shell_post = asm_shell_post.replace('});', '})'); pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() #if DEBUG: print >> sys.stderr, 'minify info:', minify_info # remove suffix if no longer needed if suffix and 'last' in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find('\n', suffix_start) post = post[:suffix_start] + post[suffix_end:] else: pre = '' post = '' def split_funcs(js, just_split=False): if just_split: return map(lambda line: ('(json)', line), js.split('\n')) # Pick where to split into chunks, so that (1) they do not oom in node/uglify, and (2) we can run them in parallel # If we have metadata, we split only the generated code, and save the pre and post on the side (and do not optimize them) parts = map(lambda part: part, js.split('\n}\n')) funcs = [] for i in range(len(parts)): func = parts[i] if i < len(parts)-1: func += '\n}\n' # last part needs no } m = func_sig.search(func) if m: ident = m.group(1) else: if know_generated: continue # ignore whitespace ident = 'anon_%d' % i assert ident funcs.append((ident, func)) return funcs total_size = len(js) funcs = split_funcs(js, just_split) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = 1 if source_map else int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size, jcache.get_cachename('jsopt') if jcache else None) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: intended size:', chunk_size, 'num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks)) funcs = None if jcache: # load chunks from cache where we can # TODO: ignore small chunks cached_outputs = [] def load_from_cache(chunk): keys = [chunk] shortkey = shared.JCache.get_shortkey(keys) # TODO: share shortkeys with later code out = shared.JCache.get(shortkey, keys) if out: cached_outputs.append(out) return False return True chunks = filter(load_from_cache, chunks) if len(cached_outputs) > 0: if DEBUG: print >> sys.stderr, ' loading %d jsfuncchunks from jcache' % len(cached_outputs) else: cached_outputs = [] if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(suffix_marker) if minify_globals: if extra_info: for key, value in extra_info.iteritems(): assert key not in minify_info or value == minify_info[key], [key, value, minify_info[key]] minify_info[key] = value f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(minify_info)) elif extra_info: f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(extra_info)) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] if len(filenames) > 0: # XXX Use '--nocrankshaft' to disable crankshaft to work around v8 bug 1895, needed for older v8/node (node 0.6.8+ should be ok) commands = map(lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + (['--debug'] if source_map else []) + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d, using %d cores (total: %.2f MB)' % (len(chunks), chunk_size, cores, total_size/(1024*1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d' % (len(chunks), chunk_size) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] for filename in filenames: temp_files.note(filename) if closure or cleanup: # run on the shell code, everything but what we js-optimize start_asm = '// EMSCRIPTEN_START_ASM\n' end_asm = '// EMSCRIPTEN_END_ASM\n' cl_sep = 'wakaUnknownBefore(); var asm=wakaUnknownAfter(global,env,buffer)\n' cle = temp_files.get('.cl.js').name c = open(cle, 'w') pre_1, pre_2 = pre.split(start_asm) post_1, post_2 = post.split(end_asm) c.write(pre_1) c.write(cl_sep) c.write(post_2) c.close() if closure: if DEBUG: print >> sys.stderr, 'running closure on shell code' cld = shared.Building.closure_compiler(cle, pretty='minifyWhitespace' not in passes) else: if DEBUG: print >> sys.stderr, 'running cleanup on shell code' cld = cle + '.js' subprocess.Popen(js_engine + [JS_OPTIMIZER, cle, 'noPrintMetadata'] + (['minifyWhitespace'] if 'minifyWhitespace' in passes else []), stdout=open(cld, 'w')).communicate() temp_files.note(cld) coutput = open(cld).read() coutput = coutput.replace('wakaUnknownBefore();', start_asm) after = 'wakaUnknownAfter' start = coutput.find(after) end = coutput.find(')', start) pre = coutput[:start] + '(function(global,env,buffer) {\n' + pre_2[pre_2.find('{')+1:] post = post_1 + end_asm + coutput[end+1:] filename += '.jo.js' f = open(filename, 'w') f.write(pre); pre = None if not just_concat: # sort functions by size, to make diffing easier and to improve aot times funcses = [] for out_file in filenames: funcses.append(split_funcs(open(out_file).read())) funcs = [item for sublist in funcses for item in sublist] funcses = None def sorter(x, y): diff = len(y[1]) - len(x[1]) if diff != 0: return diff if x[0] < y[0]: return 1 elif x[0] > y[0]: return -1 return 0 funcs.sort(sorter) if 'last' in passes and len(funcs) > 0: count = funcs[0][1].count('\n') if count > 3000: print >> sys.stderr, 'warning: Output contains some very large functions (%s lines in %s), consider building source files with -Os or -Oz, and/or trying OUTLINING_LIMIT to break them up (see settings.js; note that the parameter there affects AST nodes, while we measure lines here, so the two may not match up)' % (count, funcs[0][0]) for func in funcs: f.write(func[1]) funcs = None else: # just concat the outputs for out_file in filenames: f.write(open(out_file).read()) assert not jcache f.write('\n') if jcache: for cached in cached_outputs: f.write(cached); # TODO: preserve order f.write('\n') f.write(post); # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write('\n') f.close() if jcache: # save chunks to cache for i in range(len(chunks)): chunk = chunks[i] keys = [chunk] shortkey = shared.JCache.get_shortkey(keys) shared.JCache.set(shortkey, keys, open(filenames[i]).read()) if DEBUG and len(chunks) > 0: print >> sys.stderr, ' saving %d jsfuncchunks to jcache' % len(chunks) return filename
def run_on_js(filename, gen_hash_info=False): js_engine=shared.NODE_JS js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code equivalentfn_hash_info = None passed_in_filename = filename # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs: logging.critical('Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s)' % (start_funcs, end_funcs)) sys.exit(1) if not gen_hash_info: equivalentfn_hash_info = js[js.rfind('//'):] start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell_pre = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] # Prevent "uglify" from turning 0.0 into 0 in variables' initialization. To do this we first replace 0.0 with # ZERO$DOT$ZERO and then replace it back. asm_shell_pre = re.sub(r'(\S+\s*=\s*)0\.0', r'\1ZERO$DOT$ZERO', asm_shell_pre) asm_shell_post = js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] asm_shell = asm_shell_pre + '\nEMSCRIPTEN_FUNCS();\n' + asm_shell_post js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line asm_shell_pre, asm_shell_post = process_shell(js, js_engine, asm_shell, equivalentfn_hash_info).split('EMSCRIPTEN_FUNCS();'); asm_shell_pre = re.sub(r'(\S+\s*=\s*)ZERO\$DOT\$ZERO', r'\g<1>0.0', asm_shell_pre) asm_shell_post = asm_shell_post.replace('});', '})'); pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post if not gen_hash_info: # We don't need the extra info at the end post = post[:post.rfind('//')].strip() else: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] post = end_funcs_marker + post total_size = len(js) funcs = split_funcs(js, False) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks)) funcs = None if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) if not gen_hash_info: f.write('\n') f.write(equivalentfn_hash_info) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] old_filenames = filenames[:] if len(filenames) > 0: commands = map(lambda filename: js_engine + [DUPLICATE_FUNCTION_ELIMINATOR, filename, '--gen-hash-info' if gen_hash_info else '--use-hash-info', '--no-minimize-whitespace'], filenames) if DEBUG and commands is not None: print >> sys.stderr, [' '.join(command if command is not None else '(null)') for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.)) pool = shared.Building.get_multiprocessing_pool() filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks)) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] json_files = [] # We're going to be coalescing the files back at the end # Just replace the file list with the ones provided in # the command list - and save off the generated Json if gen_hash_info: json_files = filenames[:] filenames = old_filenames[:] for filename in filenames: temp_files.note(filename) filename += '.jo.js' f = open(filename, 'w') f.write(pre); pre = None # sort functions by size, to make diffing easier and to improve aot times funcses = [] for out_file in filenames: funcses.append(split_funcs(open(out_file).read(), False)) funcs = [item for sublist in funcses for item in sublist] funcses = None def sorter(x, y): diff = len(y[1]) - len(x[1]) if diff != 0: return diff if x[0] < y[0]: return 1 elif x[0] > y[0]: return -1 return 0 if not os.environ.get('EMCC_NO_OPT_SORT'): funcs.sort(sorter) for func in funcs: f.write(func[1]) funcs = None f.write('\n') f.write(post); # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write('\n') if gen_hash_info and len(json_files) > 0: write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename) f.close() return filename
def run_on_js(filename, passes, js_engine, jcache): if isinstance(jcache, bool) and jcache: jcache = shared.JCache if jcache: shared.JCache.ensure() if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != "\n": js = js.replace(os.linesep, "\n") # we assume \n in the splitting code # Find suffix suffix_marker = "// EMSCRIPTEN_GENERATED_FUNCTIONS" suffix_start = js.find(suffix_marker) suffix = "" if suffix_start >= 0: suffix_end = js.find("\n", suffix_start) suffix = js[suffix_start:suffix_end] + "\n" # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. generated = set(eval(suffix[len(suffix_marker) + 1 :])) # Find markers start_funcs_marker = "// EMSCRIPTEN_START_FUNCS\n" end_funcs_marker = "// EMSCRIPTEN_END_FUNCS\n" start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) assert (start_funcs >= 0) == (end_funcs >= 0) == (not not suffix) asm_registerize = "asm" in passes and "registerize" in passes if asm_registerize: start_asm_marker = "// EMSCRIPTEN_START_ASM\n" end_asm_marker = "// EMSCRIPTEN_END_ASM\n" start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) if not suffix and jcache: # JCache cannot be used without metadata, since it might reorder stuff, and that's dangerous since only generated can be reordered # This means jcache does not work after closure compiler runs, for example. But you won't get much benefit from jcache with closure # anyhow (since closure is likely the longest part of the build). if DEBUG: print >>sys.stderr, "js optimizer: no metadata, so disabling jcache" jcache = False if suffix: if not asm_registerize: pre = js[: start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker) :] js = js[start_funcs + len(start_funcs_marker) : end_funcs] if "asm" not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals: buf = [] def process(line): if len(line) > 0 and ( line.startswith(("Module[", "if (globalScope)")) or line.endswith('["X"]=1;') ): Finals.buf.append(line) return False return True js = "\n".join(filter(process, js.split("\n"))) post = "\n".join(Finals.buf) + "\n" + post post = end_funcs_marker + post else: # We need to split out the asm shell as well, for minification pre = js[: start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = ( js[start_asm + len(start_asm_marker) : start_funcs + len(start_funcs_marker)] + """ EMSCRIPTEN_FUNCS(); """ + js[end_funcs + len(end_funcs_marker) : end_asm + len(end_asm_marker)] ) js = js[start_funcs + len(start_funcs_marker) : end_funcs] minifier = Minifier(js, js_engine) asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, "compress" in passes).split( "EMSCRIPTEN_FUNCS();" ) asm_shell_post = asm_shell_post.replace("});", "})") pre += asm_shell_pre + "\n" + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() # if DEBUG: print >> sys.stderr, 'minify info:', minify_info # remove suffix if no longer needed if "last" in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find("\n", suffix_start) post = post[:suffix_start] + post[suffix_end:] else: pre = "" post = "" # Pick where to split into chunks, so that (1) they do not oom in node/uglify, and (2) we can run them in parallel # If we have metadata, we split only the generated code, and save the pre and post on the side (and do not optimize them) parts = map(lambda part: part, js.split("\n}\n")) funcs = [] for i in range(len(parts)): func = parts[i] if i < len(parts) - 1: func += "\n}\n" # last part needs no } m = func_sig.search(func) if m: ident = m.group(2) else: if suffix: continue # ignore whitespace ident = "anon_%d" % i assert ident funcs.append((ident, func)) parts = None total_size = len(js) js = None cores = int(os.environ.get("EMCC_CORES") or multiprocessing.cpu_count()) intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size, jcache.get_cachename("jsopt") if jcache else None) if jcache: # load chunks from cache where we can # TODO: ignore small chunks cached_outputs = [] def load_from_cache(chunk): keys = [chunk] shortkey = shared.JCache.get_shortkey(keys) # TODO: share shortkeys with later code out = shared.JCache.get(shortkey, keys) if out: cached_outputs.append(out) return False return True chunks = filter(load_from_cache, chunks) if len(cached_outputs) > 0: if DEBUG: print >>sys.stderr, " loading %d jsfuncchunks from jcache" % len(cached_outputs) else: cached_outputs = [] if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get(".jsfunc_%d.js" % i).name f = open(temp_file, "w") f.write(chunk) f.write(suffix_marker) if asm_registerize: f.write("\n") f.write("// MINIFY_INFO:" + minify_info) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] if len(filenames) > 0: # XXX Use '--nocrankshaft' to disable crankshaft to work around v8 bug 1895, needed for older v8/node (node 0.6.8+ should be ok) commands = map(lambda filename: js_engine + [JS_OPTIMIZER, filename, "noPrintMetadata"] + passes, filenames) # print [' '.join(command) for command in commands] cores = min(cores, filenames) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >>sys.stderr, "splitting up js optimization into %d chunks of size %d, using %d cores (total: %.2f MB)" % ( len(chunks), chunk_size, cores, total_size / (1024 * 1024.0), ) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >>sys.stderr, "splitting up js optimization into %d chunks of size %d" % (len(chunks), chunk_size) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] for filename in filenames: temp_files.note(filename) filename += ".jo.js" f = open(filename, "w") f.write(pre) for out_file in filenames: f.write(open(out_file).read()) f.write("\n") if jcache: for cached in cached_outputs: f.write(cached) # TODO: preserve order f.write("\n") f.write(post) # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write("\n") f.close() if jcache: # save chunks to cache for i in range(len(chunks)): chunk = chunks[i] keys = [chunk] shortkey = shared.JCache.get_shortkey(keys) shared.JCache.set(shortkey, keys, open(filenames[i]).read()) if DEBUG and len(chunks) > 0: print >>sys.stderr, " saving %d jsfuncchunks to jcache" % len(chunks) return filename
def run_on_js(filename, gen_hash_info=False): js_engine=shared.NODE_JS js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code equivalentfn_hash_info = None passed_in_filename = filename # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs: logging.critical('Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s)' % (start_funcs, end_funcs)) sys.exit(1) if not gen_hash_info: equivalentfn_hash_info = js[js.rfind('//'):] start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line asm_shell_pre, asm_shell_post = process_shell(js, js_engine, asm_shell, equivalentfn_hash_info).split('EMSCRIPTEN_FUNCS();'); asm_shell_post = asm_shell_post.replace('});', '})'); pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post if not gen_hash_info: # We don't need the extra info at the end post = post[:post.rfind('//')].strip() else: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] post = end_funcs_marker + post total_size = len(js) funcs = split_funcs(js, False) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks)) funcs = None if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) if not gen_hash_info: f.write('\n') f.write(equivalentfn_hash_info) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] old_filenames = filenames[:] if len(filenames) > 0: commands = map(lambda filename: js_engine + [DUPLICATE_FUNCTION_ELIMINATOR, filename, '--gen-hash-info' if gen_hash_info else '--use-hash-info', '--no-minimize-whitespace'], filenames) if DEBUG and commands is not None: print >> sys.stderr, [' '.join(command if command is not None else '(null)') for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) try: # Shut down the pool, since otherwise processes are left alive and would only be lazily terminated, # and in other parts of the toolchain we also build up multiprocessing pools. pool.terminate() pool.join() except Exception, e: # On Windows we get occassional "Access is denied" errors when attempting to tear down the pool, ignore these. logging.debug('Attempting to tear down multiprocessing pool failed with an exception: ' + str(e)) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks)) filenames = [run_on_chunk(command) for command in commands]
def run_on_js(filename, gen_hash_info=False): js_engine = shared.NODE_JS js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code equivalentfn_hash_info = None passed_in_filename = filename # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs: logging.critical( 'Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s)' % (start_funcs, end_funcs)) sys.exit(1) if not gen_hash_info: equivalentfn_hash_info = js[js.rfind('//'):] start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line asm_shell_pre, asm_shell_post = process_shell( js, js_engine, asm_shell, equivalentfn_hash_info).split('EMSCRIPTEN_FUNCS();') asm_shell_post = asm_shell_post.replace('});', '})') pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post if not gen_hash_info: # We don't need the extra info at the end post = post[:post.rfind('//')].strip() else: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] post = end_funcs_marker + post total_size = len(js) funcs = split_funcs(js, False) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len( funcs), 'actual num chunks:', len( chunks), 'chunk size range:', max(map(len, chunks)), '-', min( map(len, chunks)) funcs = None if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) if not gen_hash_info: f.write('\n') f.write(equivalentfn_hash_info) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] old_filenames = filenames[:] if len(filenames) > 0: commands = map( lambda filename: js_engine + [ DUPLICATE_FUNCTION_ELIMINATOR, filename, '--gen-hash-info' if gen_hash_info else '--use-hash-info', '--no-minimize-whitespace' ], filenames) if DEBUG and commands is not None: print >> sys.stderr, [ ' '.join(command if command is not None else '(null)') for command in commands ] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % ( len(chunks), cores, total_size / (1024 * 1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) try: # Shut down the pool, since otherwise processes are left alive and would only be lazily terminated, # and in other parts of the toolchain we also build up multiprocessing pools. pool.terminate() pool.join() except Exception, e: # On Windows we get occassional "Access is denied" errors when attempting to tear down the pool, ignore these. logging.debug( 'Attempting to tear down multiprocessing pool failed with an exception: ' + str(e)) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % ( len(chunks)) filenames = [run_on_chunk(command) for command in commands]
def run_on_js(filename, passes, js_engine, source_map=False, extra_info=None, just_split=False, just_concat=False): if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix_end = js.find('\n', suffix_start) suffix = js[suffix_start:suffix_end] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs or not suffix: logging.critical('Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s, suffix_start: %s' % (start_funcs, end_funcs, suffix_start)) sys.exit(1) minify_globals = 'minifyNames' in passes and 'asm' in passes if minify_globals: passes = map(lambda p: p if p != 'minifyNames' else 'minifyLocals', passes) start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) closure = 'closure' in passes if closure: passes = filter(lambda p: p != 'closure', passes) # we will do it manually cleanup = 'cleanup' in passes if cleanup: passes = filter(lambda p: p != 'cleanup', passes) # we will do it manually split_memory = 'splitMemory' in passes if not minify_globals: pre = js[:start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker):] js = js[start_funcs + len(start_funcs_marker):end_funcs] if 'asm' not in passes: # can have Module[..] and inlining prevention code, push those to post class Finals: buf = [] def process(line): if len(line) > 0 and (line.startswith(('Module[', 'if (globalScope)')) or line.endswith('["X"]=1;')): Finals.buf.append(line) return False return True js = '\n'.join(filter(process, js.split('\n'))) post = '\n'.join(Finals.buf) + '\n' + post post = end_funcs_marker + post else: # We need to split out the asm shell as well, for minification pre = js[:start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] + ''' EMSCRIPTEN_FUNCS(); ''' + js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] js = js[start_funcs + len(start_funcs_marker):end_funcs] # we assume there is a maximum of one new name per line minifier = Minifier(js, js_engine) def check_symbol_mapping(p): if p.startswith('symbolMap='): minifier.symbols_file = p.split('=')[1] return False if p == 'profilingFuncs': minifier.profiling_funcs = True return False return True passes = filter(check_symbol_mapping, passes) asm_shell_pre, asm_shell_post = minifier.minify_shell(asm_shell, 'minifyWhitespace' in passes, source_map).split('EMSCRIPTEN_FUNCS();'); asm_shell_post = asm_shell_post.replace('});', '})'); pre += asm_shell_pre + '\n' + start_funcs_marker post = end_funcs_marker + asm_shell_post + post minify_info = minifier.serialize() #if DEBUG: print >> sys.stderr, 'minify info:', minify_info # remove suffix if no longer needed if suffix and 'last' in passes: suffix_start = post.find(suffix_marker) suffix_end = post.find('\n', suffix_start) post = post[:suffix_start] + post[suffix_end:] total_size = len(js) funcs = split_funcs(js, just_split) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = 1 if source_map else int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) if not just_split: intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) else: # keep same chunks as before chunks = map(lambda f: f[1], funcs) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >> sys.stderr, 'chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks)) funcs = None if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(suffix_marker) if minify_globals: if extra_info: for key, value in extra_info.iteritems(): assert key not in minify_info or value == minify_info[key], [key, value, minify_info[key]] minify_info[key] = value f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(minify_info)) elif extra_info: f.write('\n') f.write('// EXTRA_INFO:' + json.dumps(extra_info)) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] with ToolchainProfiler.profile_block('run_optimizer'): if len(filenames) > 0: if not use_native(passes, source_map) or not get_native_optimizer(): commands = map(lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + (['--debug'] if source_map else []) + passes, filenames) else: # use the native optimizer shared.logging.debug('js optimizer using native') assert not source_map # XXX need to use js optimizer commands = map(lambda filename: [get_native_optimizer(), filename] + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) try: # Shut down the pool, since otherwise processes are left alive and would only be lazily terminated, # and in other parts of the toolchain we also build up multiprocessing pools. pool.terminate() pool.join() except Exception, e: # On Windows we get occassional "Access is denied" errors when attempting to tear down the pool, ignore these. logging.debug('Attempting to tear down multiprocessing pool failed with an exception: ' + str(e)) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks)) filenames = [run_on_chunk(command) for command in commands] else:
def run_on_js(filename, gen_hash_info=False): js_engine = shared.NODE_JS js = open(filename).read() if os.linesep != "\n": js = js.replace(os.linesep, "\n") # we assume \n in the splitting code equivalentfn_hash_info = None passed_in_filename = filename # Find markers start_funcs = js.find(start_funcs_marker) end_funcs = js.rfind(end_funcs_marker) if start_funcs < 0 or end_funcs < start_funcs: logging.critical( "Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s)" % (start_funcs, end_funcs) ) sys.exit(1) if not gen_hash_info: equivalentfn_hash_info = js[js.rfind("//") :] start_asm = js.find(start_asm_marker) end_asm = js.rfind(end_asm_marker) assert (start_asm >= 0) == (end_asm >= 0) # We need to split out the asm shell as well, for minification pre = js[: start_asm + len(start_asm_marker)] post = js[end_asm:] asm_shell = ( js[start_asm + len(start_asm_marker) : start_funcs + len(start_funcs_marker)] + """ EMSCRIPTEN_FUNCS(); """ + js[end_funcs + len(end_funcs_marker) : end_asm + len(end_asm_marker)] ) js = js[start_funcs + len(start_funcs_marker) : end_funcs] # we assume there is a maximum of one new name per line asm_shell_pre, asm_shell_post = process_shell(js, js_engine, asm_shell, equivalentfn_hash_info).split( "EMSCRIPTEN_FUNCS();" ) asm_shell_post = asm_shell_post.replace("});", "})") pre += asm_shell_pre + "\n" + start_funcs_marker post = end_funcs_marker + asm_shell_post + post if not gen_hash_info: # We don't need the extra info at the end post = post[: post.rfind("//")].strip() else: pre = js[: start_funcs + len(start_funcs_marker)] post = js[end_funcs + len(end_funcs_marker) :] js = js[start_funcs + len(start_funcs_marker) : end_funcs] post = end_funcs_marker + post total_size = len(js) funcs = split_funcs(js, False) js = None # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks cores = int(os.environ.get("EMCC_CORES") or multiprocessing.cpu_count()) intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size) chunks = filter(lambda chunk: len(chunk) > 0, chunks) if DEBUG and len(chunks) > 0: print >>sys.stderr, "chunkification: num funcs:", len(funcs), "actual num chunks:", len( chunks ), "chunk size range:", max(map(len, chunks)), "-", min(map(len, chunks)) funcs = None if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get(".jsfunc_%d.js" % i).name f = open(temp_file, "w") f.write(chunk) if not gen_hash_info: f.write("\n") f.write(equivalentfn_hash_info) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] old_filenames = filenames[:] if len(filenames) > 0: commands = map( lambda filename: js_engine + [ DUPLICATE_FUNCTION_ELIMINATOR, filename, "--gen-hash-info" if gen_hash_info else "--use-hash-info", "--no-minimize-whitespace", ], filenames, ) if DEBUG and commands is not None: print >>sys.stderr, [" ".join(command if command is not None else "(null)") for command in commands] cores = min(cores, len(filenames)) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >>sys.stderr, "splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)" % ( len(chunks), cores, total_size / (1024 * 1024.0), ) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >>sys.stderr, "splitting up js optimization into %d chunks" % (len(chunks)) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] json_files = [] # We're going to be coalescing the files back at the end # Just replace the file list with the ones provided in # the command list - and save off the generated Json if gen_hash_info: json_files = filenames[:] filenames = old_filenames[:] for filename in filenames: temp_files.note(filename) filename += ".jo.js" f = open(filename, "w") f.write(pre) pre = None # sort functions by size, to make diffing easier and to improve aot times funcses = [] for out_file in filenames: funcses.append(split_funcs(open(out_file).read(), False)) funcs = [item for sublist in funcses for item in sublist] funcses = None def sorter(x, y): diff = len(y[1]) - len(x[1]) if diff != 0: return diff if x[0] < y[0]: return 1 elif x[0] > y[0]: return -1 return 0 if not os.environ.get("EMCC_NO_OPT_SORT"): funcs.sort(sorter) for func in funcs: f.write(func[1]) funcs = None f.write("\n") f.write(post) # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write("\n") if gen_hash_info and len(json_files) > 0: write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename) f.close() return filename
def run_on_js(filename, passes, js_engine, jcache): if isinstance(jcache, bool) and jcache: jcache = shared.JCache if jcache: shared.JCache.ensure() if type(passes) == str: passes = [passes] js = open(filename).read() if os.linesep != '\n': js = js.replace(os.linesep, '\n') # we assume \n in the splitting code # Find suffix suffix_marker = '// EMSCRIPTEN_GENERATED_FUNCTIONS' suffix_start = js.find(suffix_marker) suffix = '' if suffix_start >= 0: suffix = js[suffix_start:js.find('\n', suffix_start)] + '\n' # if there is metadata, we will run only on the generated functions. If there isn't, we will run on everything. generated = set(eval(suffix[len(suffix_marker)+1:])) if not suffix and jcache: # JCache cannot be used without metadata, since it might reorder stuff, and that's dangerous since only generated can be reordered # This means jcache does not work after closure compiler runs, for example. But you won't get much benefit from jcache with closure # anyhow (since closure is likely the longest part of the build). if DEBUG: print >>sys.stderr, 'js optimizer: no metadata, so disabling jcache' jcache = False # If we process only generated code, find that and save the rest on the side func_sig = re.compile('( *)function (_[\w$]+)\(') if suffix: pos = 0 gen_start = 0 gen_end = 0 while 1: m = func_sig.search(js, pos) if not m: break pos = m.end() indent = m.group(1) ident = m.group(2) if ident in generated: if not gen_start: gen_start = m.start() assert gen_start gen_end = js.find('\n%s}\n' % indent, m.end()) + (3 + len(indent)) assert gen_end > gen_start pre = js[:gen_start] post = js[gen_end:] if 'last' in passes: post = post.replace(suffix, '') # no need to write out the metadata - nothing after us needs it js = js[gen_start:gen_end] else: pre = '' post = '' # Pick where to split into chunks, so that (1) they do not oom in node/uglify, and (2) we can run them in parallel # If we have metadata, we split only the generated code, and save the pre and post on the side (and do not optimize them) parts = map(lambda part: part, js.split('\n}\n')) funcs = [] for i in range(len(parts)): func = parts[i] if i < len(parts)-1: func += '\n}\n' # last part needs no } m = func_sig.search(func) if m: ident = m.group(2) else: if suffix: continue # ignore whitespace ident = 'anon_%d' % i assert ident funcs.append((ident, func)) parts = None total_size = len(js) js = None cores = int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count()) intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = shared.chunkify(funcs, chunk_size, jcache.get_cachename('jsopt') if jcache else None) if jcache: # load chunks from cache where we can # TODO: ignore small chunks cached_outputs = [] def load_from_cache(chunk): keys = [chunk] shortkey = shared.JCache.get_shortkey(keys) # TODO: share shortkeys with later code out = shared.JCache.get(shortkey, keys) if out: cached_outputs.append(out) return False return True chunks = filter(load_from_cache, chunks) if len(cached_outputs) > 0: if DEBUG: print >> sys.stderr, ' loading %d jsfuncchunks from jcache' % len(cached_outputs) else: cached_outputs = [] if len(chunks) > 0: def write_chunk(chunk, i): temp_file = temp_files.get('.jsfunc_%d.js' % i).name f = open(temp_file, 'w') f.write(chunk) f.write(suffix) f.close() return temp_file filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] else: filenames = [] if len(filenames) > 0: # XXX Use '--nocrankshaft' to disable crankshaft to work around v8 bug 1895, needed for older v8/node (node 0.6.8+ should be ok) commands = map(lambda filename: js_engine + [JS_OPTIMIZER, filename, 'noPrintMetadata'] + passes, filenames) #print [' '.join(command) for command in commands] cores = min(cores, filenames) if len(chunks) > 1 and cores >= 2: # We can parallelize if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d, using %d cores (total: %.2f MB)' % (len(chunks), chunk_size, cores, total_size/(1024*1024.)) pool = multiprocessing.Pool(processes=cores) filenames = pool.map(run_on_chunk, commands, chunksize=1) else: # We can't parallize, but still break into chunks to avoid uglify/node memory issues if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks of size %d' % (len(chunks), chunk_size) filenames = [run_on_chunk(command) for command in commands] else: filenames = [] for filename in filenames: temp_files.note(filename) filename += '.jo.js' f = open(filename, 'w') f.write(pre); for out_file in filenames: f.write(open(out_file).read()) f.write('\n') if jcache: for cached in cached_outputs: f.write(cached); # TODO: preserve order f.write('\n') f.write(post); # No need to write suffix: if there was one, it is inside post which exists when suffix is there f.write('\n') f.close() if jcache: # save chunks to cache for i in range(len(chunks)): chunk = chunks[i] keys = [chunk] shortkey = shared.JCache.get_shortkey(keys) shared.JCache.set(shortkey, keys, open(filenames[i]).read()) if DEBUG and len(chunks) > 0: print >> sys.stderr, ' saving %d jsfuncchunks to jcache' % len(chunks) return filename