def encode_dir(dict_file, binjs_encode, in_path, out_path, skip_errors=True, copy_source=True): types = idl.parse_es6_idl() ty_script = types.interfaces['Script'] string_dict = strings.read_dict(dict_file, with_signature=True) in_path = os.path.abspath(in_path) out_path = os.path.abspath(out_path) ignored_out_directory = tempfile.TemporaryDirectory() for root, _, sources in os.walk(in_path): # 1. Prepare destination directory suffix = os.path.relpath(root, in_path) dest_root = os.path.join(out_path, suffix) print('Encoding from {root} to {dest_root}'.format( root=root, dest_root=dest_root)) os.makedirs(dest_root, exist_ok=True) for source in sources: source_path = os.path.join(root, source) if not source[-3:] == '.js': print('...skipping {}'.format(source_path)) continue # 2. Extract AST print('Preprocessing {}'.format(source_path)) process = subprocess.run([ binjs_encode, '--quiet', '--show-ast', '--in', source_path, '--out', ignored_out_directory.name ], capture_output=True) try: proggy = json.loads(process.stdout.decode('utf-8')) # 3. Encode dest_path = os.path.join(dest_root, source[:-3] + '.binjs') print('Encoding {source_path} => {dest_path}'.format( source_path=source_path, dest_path=dest_path)) dest_file = open(dest_path, 'wb') format.write(types, string_dict, ty_script, proggy, dest_file) # 4. Copy source file if copy_source: shutil.copy(source_path, dest_root) except: if skip_errors: print('...does not parse') else: raise
def read(types, string_dict, ty, inp): '''Decompresses ast from a byte stream and returns an AST. >>> import json >>> import ast, idl, strings >>> types = idl.parse_es6_idl() >>> ty_script = types.interfaces['Script'] >>> tree_in = ast.load_test_ast('y5R7cnYctJv.js.dump') >>> #tree_in = ast.load_test_ast('three.min.js.dump') >>> string_dict = strings.prepare_dict(types, [(ty_script, tree_in)]) >>> buf = io.BytesIO() >>> write(types, string_dict, ty_script, tree_in, buf) >>> buf.tell() 1884 >>> buf.seek(0) 0 >>> tree_out = read(types, string_dict, ty_script, buf) >>> #assert json.dumps(tree_in) == json.dumps(tree_out) >>> s_in = json.dumps(tree_in, indent=1).split('\\n') >>> s_out = json.dumps(tree_out, indent=1).split('\\n') >>> for i, (l_in, l_out) in enumerate(zip(s_in, s_out)): ... if l_in != l_out: ... print(f'{i:3d} {l_in}') ... print(f' {l_out}') ... print('mismatch') ... break Now try to round-trip something which uses laziness: >>> import opt >>> tree_in = opt.optimize(tree_in) lazified 1 functions >>> buf = io.BytesIO() >>> write(types, string_dict, ty_script, tree_in, buf) >>> buf.tell() 1898 >>> buf.seek(0) 0 >>> tree_out = read(types, string_dict, ty_script, buf) >>> assert json.dumps(tree_in) == json.dumps(tree_out) Now try to round-trip something which misses the dictionary: >>> del string_dict[-10:-3] >>> buf = io.BytesIO() >>> write(types, string_dict, ty_script, tree_in, buf) >>> buf.tell() 1934 >>> buf.seek(0) 0 >>> tree_out = read(types, string_dict, ty_script, buf) >>> assert json.dumps(tree_in) == json.dumps(tree_out) ''' # Read the local string table local_strings = strings.read_dict(inp, with_signature=False) string_dict = local_strings + string_dict # Read the probability models model_reader = encode.ModelReader(types, string_dict, inp) m = model_reader.read(ty) def read_piece(ty): tree = encode.decode(types, m, ty, inp) # Read the dictionary of lazy parts # TODO: We don't need this; it is implicit in the tree we just read. num_lazy_parts = bits.read_varint(inp) lazy_offsets = [0] for _ in range(num_lazy_parts): lazy_size = bits.read_varint(inp) lazy_offsets.append(lazy_offsets[-1] + lazy_size) lazy_offsets = list( map(lambda offset: offset + inp.tell(), lazy_offsets)) def restore_lazy_part(ty, attr, index): inp.seek(lazy_offsets[index]) part = read_piece(attr.resolved_ty) assert inp.tell() == lazy_offsets[ index + 1], f'{inp.tell()}, {lazy_offsets[index + 1]}' return part restorer = lazy.LazyMemberRestorer(types, restore_lazy_part) tree = restorer.replace(ty, tree) inp.seek(lazy_offsets[-1]) return tree tree = read_piece(ty) type_checker = tycheck.TypeChecker(types) type_checker.check_any(ty, tree) return tree
def decode(dict_file, in_file, out_file): types = idl.parse_es6_idl() ty_script = types.interfaces['Script'] string_dict = strings.read_dict(dict_file, with_signature=True) proggy = format.read(types, string_dict, ty_script, in_file) json.dump(proggy, out_file)
def encode(dict_file, in_file, out_file): types = idl.parse_es6_idl() ty_script = types.interfaces['Script'] string_dict = strings.read_dict(dict_file, with_signature=True) proggy = json.loads(in_file.read()) format.write(types, string_dict, ty_script, proggy, out_file)