def unpack(pak_path, out_path): pak_dir = os.path.dirname(pak_path) pak_id = os.path.splitext(os.path.basename(pak_path))[0] data = data_pack.ReadDataPack(pak_path) # Associate numerical grit IDs to strings. # For example 120045 -> 'IDR_SETTINGS_ABOUT_PAGE_HTML' resource_ids = dict() resources_path = os.path.join(pak_dir, 'grit', pak_id + '.h') with open(resources_path) as resources_file: for line in resources_file: res = re.match('#define ([^ ]+) (\d+)', line) if res: resource_ids[int(res.group(2))] = res.group(1) assert resource_ids # Associate numerical string IDs to files. resource_filenames = dict() resources_map_path = os.path.join(pak_dir, 'grit', pak_id + '_map.cc') with open(resources_map_path) as resources_map: for line in resources_map: res = re.match(' {"([^"]+)", ([^}]+)', line) if res: resource_filenames[res.group(2)] = res.group(1) assert resource_filenames # Extract packed files, while preserving directory structure. for (resource_id, text) in data.resources.iteritems(): filename = resource_filenames[resource_ids[resource_id]] dirname = os.path.join(out_path, os.path.dirname(filename)) if not os.path.exists(dirname): os.makedirs(dirname) with open(os.path.join(out_path, filename), 'w') as file: file.write(text)
def _PrintMain(args): pak = data_pack.ReadDataPack(args.pak_file) encoding = 'binary' if pak.encoding == 1: encoding = 'utf-8' elif pak.encoding == 2: encoding = 'utf-16' else: encoding = '?' + str(pak.encoding) print 'Encoding:', encoding try_decode = encoding.startswith('utf') # Print IDs in ascending order, since that's the order in which they appear in # the file (order is lost by Python dict). for resource_id in sorted(pak.resources): data = pak.resources[resource_id] desc = '<binary>' if try_decode: try: desc = unicode(data, encoding) if len(desc) > 60: desc = desc[:60] + u'...' desc = desc.replace('\n', '\\n') except UnicodeDecodeError: pass sha1 = hashlib.sha1(data).hexdigest()[:10] line = u'Entry(id={}, len={}, sha1={}): {}'.format( resource_id, len(data), sha1, desc) print line.encode('utf-8')
def Unpack(pak_path, out_path, pak_base_dir, excludes): pak_dir = os.path.dirname(pak_path) pak_id = os.path.splitext(os.path.basename(pak_path))[0] data = data_pack.ReadDataPack(pak_path) # Associate numerical grit IDs to strings. # For example 120045 -> 'IDR_SETTINGS_ABOUT_PAGE_HTML' resource_ids = dict() resources_path = os.path.join(pak_dir, 'grit', pak_id + '.h') with open(resources_path) as resources_file: for line in resources_file: res = re.match('^#define (\S*).* (\d+)\)?$', line) if res: resource_ids[int(res.group(2))] = res.group(1) assert resource_ids # Associate numerical string IDs to files. resource_filenames = dict() resources_map_path = os.path.join(pak_dir, 'grit', pak_id + '_map.cc') with open(resources_map_path) as resources_map: for line in resources_map: res = ParseLine(line) if res: resource_filenames[res.group(2)] = res.group(1) assert resource_filenames root_dir = pak_base_dir if pak_base_dir else pak_dir # Extract packed files, while preserving directory structure. for (resource_id, text) in data.resources.iteritems(): UnpackResource(root_dir, out_path, excludes or [], resource_filenames[resource_ids[resource_id]], text)
def _ExtractMain(args): pak = data_pack.ReadDataPack(args.pak_file) for resource_id, payload in pak.resources.iteritems(): path = os.path.join(args.output_dir, str(resource_id)) with open(path, 'w') as f: f.write(payload)
def _ExtractMain(args): pak = data_pack.ReadDataPack(args.pak_file) if args.textual_id: info_dict = data_pack.ReadGrdInfo(args.pak_file) for resource_id, payload in pak.resources.items(): filename = (info_dict[resource_id].textual_id if args.textual_id else str(resource_id)) path = os.path.join(args.output_dir, filename) with open(path, 'w') as f: f.write(payload)
def main(): parser = optparse.OptionParser( usage='Usage: %prog --pak-file PAK_FILE --header HEADER --cpp CPP\n') parser.add_option('-i', '--pak-file', action='store', dest='pak_file', help='The .pak file to be extracted.') parser.add_option('', '--header', action='store', dest='header_file', help='Header file to be generated.') parser.add_option('', '--cpp', action='store', dest='cpp_file', help='C++ file to be generated.') (options, _) = parser.parse_args() if (not options.pak_file or not options.header_file or not options.cpp_file): parser.print_help() sys.exit(-1) header_file = open(options.header_file, 'w+') cpp_file = open(options.cpp_file, 'w+') pak_contents = DataPack.ReadDataPack(options.pak_file) resourceIds = [] header_contents = dict() cpp_contents = dict() definitions = [] for (resId, data) in pak_contents.resources.iteritems(): resourceIds.append(resId) hex_values = ['0x{0:02x}'.format(ord(char)) for char in data] f = lambda A, n=12: [A[i:i+n] for i in range(0, len(A), n)] hex_values_string = ',\n '.join(', '.join(x) for x in f(hex_values)) cpp_definition = \ 'const unsigned char kResource%s[%d] = {\n %s \n};' % \ (str(resId), len(hex_values), hex_values_string) definitions.append(cpp_definition) header_file_contents = Template(header_template).substitute(header_contents) header_file.write(header_file_contents) header_file.close() map_initializer = [] for resId in resourceIds: insert_statement = \ 'resources_.insert(std::pair<int, ResourceEntry>(\n' \ ' %s, ResourceEntry(kResource%s, arraysize(kResource%s))));' map_initializer.append( \ insert_statement % (str(resId), str(resId), str(resId))) cpp_contents['definitions']= '\n'.join(definitions) cpp_contents['header_file_name'] = os.path.basename(options.header_file) cpp_contents['map_initializer'] = '\n '.join(map_initializer) cpp_file_contents = Template(cpp_template).substitute(cpp_contents) cpp_file.write(cpp_file_contents) cpp_file.close()
def generate(from_dir, to_dir, localizable_list_path, infoplist_template_path, resources_header_path, locales): """Generates the <locale>.lproj directories and files.""" id_map = read_resources_header(resources_header_path) localizable_ids = read_id_list(localizable_list_path) infoplist_template = read_jinja2_template(infoplist_template_path) # Generate string files for each locale for locale in locales: pack = data_pack.ReadDataPack( os.path.join(os.path.join(from_dir, '%s.pak' % locale))) lproj_dir = format_lproj_dir(to_dir, locale) if not os.path.exists(lproj_dir): os.makedirs(lproj_dir) # Generate Localizable.strings localizable_strings_path = os.path.join(lproj_dir, LOCALIZABLE_STRINGS) try: with codecs.open(localizable_strings_path, 'w', 'utf-16') as f: for id_str in localizable_ids: id_value = id_map.get(id_str) if not id_value: raise LocalizeException( 'Could not find "%s" in %s' % (id_str, resources_header_path)) localized_data = pack.resources.get(id_value) if not localized_data: raise LocalizeException( 'Could not find localized string in %s for %s (%d)' % (localizable_strings_path, id_str, id_value)) f.write(u'"%s" = "%s";\n' % (id_str, decode_and_escape(localized_data))) except: sys.stderr.write('Error while creating %s\n' % localizable_strings_path) raise # Generate InfoPlist.strings infoplist_strings_path = os.path.join(lproj_dir, INFOPLIST_STRINGS) try: with codecs.open(infoplist_strings_path, 'w', 'utf-16') as f: infoplist = infoplist_template.render( ids=LocalizedStringJinja2Adapter(id_map, pack)) f.write(infoplist) except: sys.stderr.write('Error while creating %s\n' % infoplist_strings_path) raise
def _ListMain(args): pak = data_pack.ReadDataPack(args.pak_file) if args.textual_id or args.path: info_dict = data_pack.ReadGrdInfo(args.pak_file) fmt = ''.join([ '{id}', ' = {textual_id}' if args.textual_id else '', ' @ {path}' if args.path else '', '\n' ]) for resource_id in sorted(pak.resources): item = info_dict[resource_id] args.output.write( fmt.format(textual_id=item.textual_id, id=item.id, path=item.path)) else: for resource_id in sorted(pak.resources): args.output.write('%d\n' % resource_id)
def _PrintMain(args): pak = data_pack.ReadDataPack(args.pak_file) if args.textual_id: info_dict = data_pack.ReadGrdInfo(args.pak_file) output = args.output encoding = 'binary' if pak.encoding == 1: encoding = 'utf-8' elif pak.encoding == 2: encoding = 'utf-16' else: encoding = '?' + str(pak.encoding) output.write('version: {}\n'.format(pak.version)) output.write('encoding: {}\n'.format(encoding)) output.write('num_resources: {}\n'.format(len(pak.resources))) output.write('num_aliases: {}\n'.format(len(pak.aliases))) breakdown = ', '.join('{}: {}'.format(*x) for x in pak.sizes) output.write('total_size: {} ({})\n'.format(pak.sizes.total, breakdown)) try_decode = args.decode and encoding.startswith('utf') # Print IDs in ascending order, since that's the order in which they appear in # the file (order is lost by Python dict). for resource_id in sorted(pak.resources): data = pak.resources[resource_id] canonical_id = pak.aliases.get(resource_id, resource_id) desc = '<data>' if try_decode: try: desc = six.text_type(data, encoding) if len(desc) > 60: desc = desc[:60] + u'...' desc = desc.replace('\n', '\\n') except UnicodeDecodeError: pass sha1 = hashlib.sha1(data).hexdigest()[:10] if args.textual_id: textual_id = info_dict[resource_id].textual_id canonical_textual_id = info_dict[canonical_id].textual_id output.write( u'Entry(id={}, canonical_id={}, size={}, sha1={}): {}\n'. format(textual_id, canonical_textual_id, len(data), sha1, desc).encode('utf-8')) else: output.write( u'Entry(id={}, canonical_id={}, size={}, sha1={}): {}\n'. format(resource_id, canonical_id, len(data), sha1, desc).encode('utf-8'))
def Unpack(pak_path, out_path): pak_dir = os.path.dirname(pak_path) pak_id = os.path.splitext(os.path.basename(pak_path))[0] data = data_pack.ReadDataPack(pak_path) # Associate numerical grit IDs to strings. # For example 120045 -> 'IDR_SETTINGS_ABOUT_PAGE_HTML' resource_ids = dict() resources_path = os.path.join(pak_dir, 'grit', pak_id + '.h') with open(resources_path) as resources_file: for line in resources_file: res = re.match('^#define (\S*).* (\d+)\)?$', line) if res: resource_ids[int(res.group(2))] = res.group(1) assert resource_ids # Associate numerical string IDs to files. resource_files = dict() resources_map_path = os.path.join(pak_dir, 'grit', pak_id + '_map.cc') with open(resources_map_path) as resources_map: for line in resources_map: res = ParseLine(line) if res: resource_files[res.group(2)] = ResourceFile( path=res.group(1), gzipped=res.group(3) == 'true') assert resource_files # Extract packed files, while preserving directory structure. for (resource_id, text) in data.resources.iteritems(): resource_file = resource_files[resource_ids[resource_id]] file_path = resource_file.path file_gzipped = resource_file.gzipped file_dir = os.path.join(out_path, os.path.dirname(file_path)) if not os.path.exists(file_dir): os.makedirs(file_dir) if file_gzipped: text = UngzipString(text) with open(os.path.join(out_path, file_path), 'w') as f: f.write(text)
def _ListMain(args): resources, _ = data_pack.ReadDataPack(args.pak_file) for resource_id in sorted(resources.keys()): args.output.write('%d\n' % resource_id)
def PrintPakAnalysis(apk_filename, min_pak_resource_size, out_dir): """Print sizes of all resources in all pak files in |apk_filename|.""" print print 'Analyzing pak files in %s...' % apk_filename # A structure for holding details about a pak file. Pak = collections.namedtuple( 'Pak', ['filename', 'compress_size', 'file_size', 'resources']) # Build a list of Pak objets for each pak file. paks = [] apk = zipfile.ZipFile(apk_filename, 'r') try: for i in (x for x in apk.infolist() if IsPakFileName(x.filename)): with tempfile.NamedTemporaryFile() as f: f.write(apk.read(i.filename)) f.flush() paks.append( Pak(i.filename, i.compress_size, i.file_size, data_pack.ReadDataPack(f.name).resources)) finally: apk.close() # Output the overall pak file summary. total_files = len(paks) total_compress_size = sum(pak.compress_size for pak in paks) total_file_size = sum(pak.file_size for pak in paks) print 'Total pak files: %d' % total_files print 'Total compressed size: %s' % _FormatBytes(total_compress_size) print 'Total uncompressed size: %s' % _FormatBytes(total_file_size) print if not paks: return # Output the table of details about all pak files. print '%25s%11s%21s%21s' % ('FILENAME', 'RESOURCES', 'COMPRESSED SIZE', 'UNCOMPRESSED SIZE') for pak in sorted(paks, key=operator.attrgetter('file_size'), reverse=True): print '%25s %10s %12s %6.2f%% %12s %6.2f%%' % ( pak.filename, len(pak.resources), _FormatBytes(pak.compress_size), 100.0 * pak.compress_size / total_compress_size, _FormatBytes( pak.file_size), 100.0 * pak.file_size / total_file_size) print print 'Analyzing pak resources in %s...' % apk_filename # Calculate aggregate stats about resources across pak files. resource_count_map = collections.defaultdict(int) resource_size_map = collections.defaultdict(int) seen_data_ids = set() alias_overhead_bytes = 4 resource_overhead_bytes = 6 for pak in paks: for k, v in pak.resources.iteritems(): resource_count_map[k] += 1 if id(v) not in seen_data_ids: seen_data_ids.add(id(v)) resource_size_map[k] += resource_overhead_bytes + len(v) else: resource_size_map[k] += alias_overhead_bytes # Output the overall resource summary. total_resource_size = sum(resource_size_map.values()) total_resource_count = len(resource_count_map) assert total_resource_size <= total_file_size print 'Total pak resources: %s' % total_resource_count print 'Total uncompressed resource size: %s' % _FormatBytes( total_resource_size) print if not out_dir or not os.path.isdir(out_dir): return resource_id_name_map, resources_id_header_map = _AnnotatePakResources( out_dir) # Output the table of details about all resources across pak files. print print '%56s %5s %17s' % ('RESOURCE', 'COUNT', 'UNCOMPRESSED SIZE') for i in sorted(resource_size_map, key=resource_size_map.get, reverse=True): if resource_size_map[i] < min_pak_resource_size: break print '%56s %5s %9s %6.2f%%' % (resource_id_name_map.get( i, i), resource_count_map[i], _FormatBytes( resource_size_map[i]), 100.0 * resource_size_map[i] / total_resource_size) # Print breakdown on a per-grd file basis. size_by_header = collections.defaultdict(int) for resid, size in resource_size_map.iteritems(): size_by_header[resources_id_header_map.get(resid, 'unknown')] += size print print '%80s %17s' % ('HEADER', 'UNCOMPRESSED SIZE') for header in sorted(size_by_header, key=size_by_header.get, reverse=True): if size_by_header[header] < min_pak_resource_size: break print '%80s %9s %6.2f%%' % (header, _FormatBytes( size_by_header[header]), 100.0 * size_by_header[header] / total_resource_size)
def _ListMain(args): pak = data_pack.ReadDataPack(args.pak_file) for resource_id in sorted(pak.resources): args.output.write('%d\n' % resource_id)
def _GetFilesInPak(pakname): '''Get a set of the files that were actually included in the .pak output. ''' return set(data_pack.ReadDataPack(pakname).resources.values())