Exemplos de ManifestComparison em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: legendary.models.manifest

Classe / Tipo: ManifestComparison

Exemplos em hotexamples.com: 2

ManifestComparison em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de legendary.models.manifest.ManifestComparison em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

create(2)

Métodos Frequentes

create (2)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: manager_gui.py Projeto: koraynilay/legendary

def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None, patch=True, resume=True, file_prefix_filter=None, file_exclude_filter=None, file_install_tag=None, processing_optimization=False) -> AnalysisResult: """ Run analysis on manifest and old manifest (if not None) and return a result with a summary resources required in order to install the provided manifest. :param manifest: Manifest to install :param old_manifest: Old manifest to patch from (if applicable) :param patch: Patch instead of redownloading the entire file :param resume: Continue based on resume file if it exists :param file_prefix_filter: Only download files that start with this prefix :param file_exclude_filter: Exclude files with this prefix from download :param file_install_tag: Only install files with the specified tag :param processing_optimization: Attempt to optimize processing order and RAM usage :return: AnalysisResult """ analysis_res = AnalysisResult() analysis_res.install_size = sum(fm.file_size for fm in manifest.file_manifest_list.elements) analysis_res.biggest_chunk = max(c.window_size for c in manifest.chunk_data_list.elements) analysis_res.biggest_file_size = max(f.file_size for f in manifest.file_manifest_list.elements) is_1mib = analysis_res.biggest_chunk == 1024 * 1024 self.log.debug(f'Biggest chunk size: {analysis_res.biggest_chunk} bytes (== 1 MiB? {is_1mib})') self.log.debug(f'Creating manifest comparison...') mc = ManifestComparison.create(manifest, old_manifest) analysis_res.manifest_comparison = mc if resume and self.resume_file and os.path.exists(self.resume_file): self.log.info('Found previously interrupted download. Download will be resumed if possible.') try: missing = 0 mismatch = 0 completed_files = set() for line in open(self.resume_file).readlines(): file_hash, _, filename = line.strip().partition(':') _p = os.path.join(self.dl_dir, filename) if not os.path.exists(_p): self.log.debug(f'File does not exist but is in resume file: "{_p}"') missing += 1 elif file_hash != manifest.file_manifest_list.get_file_by_path(filename).sha_hash.hex(): mismatch += 1 else: completed_files.add(filename) if missing: self.log.warning(f'{missing} previously completed file(s) are missing, they will be redownloaded.') if mismatch: self.log.warning(f'{mismatch} existing file(s) have been changed and will be redownloaded.') # remove completed files from changed/added and move them to unchanged for the analysis. mc.added -= completed_files mc.changed -= completed_files mc.unchanged |= completed_files self.log.info(f'Skipping {len(completed_files)} files based on resume data.') except Exception as e: self.log.warning(f'Reading resume file failed: {e!r}, continuing as normal...') # Install tags are used for selective downloading, e.g. for language packs additional_deletion_tasks = [] if file_install_tag is not None: if isinstance(file_install_tag, str): file_install_tag = [file_install_tag] files_to_skip = set(i.filename for i in manifest.file_manifest_list.elements if not any((fit in i.install_tags) or (not fit and not i.install_tags) for fit in file_install_tag)) self.log.info(f'Found {len(files_to_skip)} files to skip based on install tag.') mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip for fname in sorted(files_to_skip): additional_deletion_tasks.append(FileTask(fname, delete=True, silent=True)) # if include/exclude prefix has been set: mark all files that are not to be downloaded as unchanged if file_exclude_filter: if isinstance(file_exclude_filter, str): file_exclude_filter = [file_exclude_filter] file_exclude_filter = [f.lower() for f in file_exclude_filter] files_to_skip = set(i.filename for i in manifest.file_manifest_list.elements if any(i.filename.lower().startswith(pfx) for pfx in file_exclude_filter)) self.log.info(f'Found {len(files_to_skip)} files to skip based on exclude prefix.') mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip if file_prefix_filter: if isinstance(file_prefix_filter, str): file_prefix_filter = [file_prefix_filter] file_prefix_filter = [f.lower() for f in file_prefix_filter] files_to_skip = set(i.filename for i in manifest.file_manifest_list.elements if not any(i.filename.lower().startswith(pfx) for pfx in file_prefix_filter)) self.log.info(f'Found {len(files_to_skip)} files to skip based on include prefix(es)') mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip if file_prefix_filter or file_exclude_filter or file_install_tag: self.log.info(f'Remaining files after filtering: {len(mc.added) + len(mc.changed)}') # correct install size after filtering analysis_res.install_size = sum(fm.file_size for fm in manifest.file_manifest_list.elements if fm.filename in mc.added) if mc.removed: analysis_res.removed = len(mc.removed) self.log.debug(f'{analysis_res.removed} removed files') if mc.added: analysis_res.added = len(mc.added) self.log.debug(f'{analysis_res.added} added files') if mc.changed: analysis_res.changed = len(mc.changed) self.log.debug(f'{analysis_res.changed} changed files') if mc.unchanged: analysis_res.unchanged = len(mc.unchanged) self.log.debug(f'{analysis_res.unchanged} unchanged files') if processing_optimization and len(manifest.file_manifest_list.elements) > 100_000: self.log.warning('Manifest contains too many files, processing optimizations will be disabled.') processing_optimization = False elif processing_optimization: self.log.info('Processing order optimization is enabled, analysis may take a few seconds longer...') # count references to chunks for determining runtime cache size later references = Counter() fmlist = sorted(manifest.file_manifest_list.elements, key=lambda a: a.filename.lower()) for fm in fmlist: self.hash_map[fm.filename] = fm.sha_hash.hex() # chunks of unchanged files are not downloaded so we can skip them if fm.filename in mc.unchanged: analysis_res.unchanged += fm.file_size continue for cp in fm.chunk_parts: references[cp.guid_num] += 1 if processing_optimization: s_time = time.time() # reorder the file manifest list to group files that share many chunks # 4 is mostly arbitrary but has shown in testing to be a good choice min_overlap = 4 # ignore files with less than N chunk parts, this speeds things up dramatically cp_threshold = 5 remaining_files = {fm.filename: {cp.guid_num for cp in fm.chunk_parts} for fm in fmlist if fm.filename not in mc.unchanged} _fmlist = [] # iterate over all files that will be downloaded and pair up those that share the most chunks for fm in fmlist: if fm.filename not in remaining_files: continue _fmlist.append(fm) f_chunks = remaining_files.pop(fm.filename) if len(f_chunks) < cp_threshold: continue best_overlap, match = 0, None for fname, chunks in remaining_files.items(): if len(chunks) < cp_threshold: continue overlap = len(f_chunks & chunks) if overlap > min_overlap and overlap > best_overlap: best_overlap, match = overlap, fname if match: _fmlist.append(manifest.file_manifest_list.get_file_by_path(match)) remaining_files.pop(match) fmlist = _fmlist opt_delta = time.time() - s_time self.log.debug(f'Processing optimizations took {opt_delta:.01f} seconds.') # determine reusable chunks and prepare lookup table for reusable ones re_usable = defaultdict(dict) if old_manifest and mc.changed and patch: self.log.debug('Analyzing manifests for re-usable chunks...') for changed in mc.changed: old_file = old_manifest.file_manifest_list.get_file_by_path(changed) new_file = manifest.file_manifest_list.get_file_by_path(changed) existing_chunks = defaultdict(list) off = 0 for cp in old_file.chunk_parts: existing_chunks[cp.guid_num].append((off, cp.offset, cp.offset + cp.size)) off += cp.size for cp in new_file.chunk_parts: key = (cp.guid_num, cp.offset, cp.size) for file_o, cp_o, cp_end_o in existing_chunks[cp.guid_num]: # check if new chunk part is wholly contained in the old chunk part if cp_o <= cp.offset and (cp.offset + cp.size) <= cp_end_o: references[cp.guid_num] -= 1 re_usable[changed][key] = file_o + (cp.offset - cp_o) analysis_res.reuse_size += cp.size break last_cache_size = current_cache_size = 0 # set to determine whether a file is currently cached or not cached = set() # Using this secondary set is orders of magnitude faster than checking the deque. chunks_in_dl_list = set() # This is just used to count all unique guids that have been cached dl_cache_guids = set() # run through the list of files and create the download jobs and also determine minimum # runtime cache requirement by simulating adding/removing from cache during download. self.log.debug('Creating filetasks and chunktasks...') for current_file in fmlist: # skip unchanged and empty files if current_file.filename in mc.unchanged: continue elif not current_file.chunk_parts: self.tasks.append(FileTask(current_file.filename, empty=True)) continue existing_chunks = re_usable.get(current_file.filename, None) chunk_tasks = [] reused = 0 for cp in current_file.chunk_parts: ct = ChunkTask(cp.guid_num, cp.offset, cp.size) # re-use the chunk from the existing file if we can if existing_chunks and (cp.guid_num, cp.offset, cp.size) in existing_chunks: reused += 1 ct.chunk_file = current_file.filename ct.chunk_offset = existing_chunks[(cp.guid_num, cp.offset, cp.size)] else: # add to DL list if not already in it if cp.guid_num not in chunks_in_dl_list: self.chunks_to_dl.append(cp.guid_num) chunks_in_dl_list.add(cp.guid_num) # if chunk has more than one use or is already in cache, # check if we need to add or remove it again. if references[cp.guid_num] > 1 or cp.guid_num in cached: references[cp.guid_num] -= 1 # delete from cache if no references left if references[cp.guid_num] < 1: current_cache_size -= analysis_res.biggest_chunk cached.remove(cp.guid_num) ct.cleanup = True # add to cache if not already cached elif cp.guid_num not in cached: dl_cache_guids.add(cp.guid_num) cached.add(cp.guid_num) current_cache_size += analysis_res.biggest_chunk else: ct.cleanup = True chunk_tasks.append(ct) if reused: self.log.debug(f' + Reusing {reused} chunks from: {current_file.filename}') # open temporary file that will contain download + old file contents self.tasks.append(FileTask(current_file.filename + u'.tmp', fopen=True)) self.tasks.extend(chunk_tasks) self.tasks.append(FileTask(current_file.filename + u'.tmp', close=True)) # delete old file and rename temporary self.tasks.append(FileTask(current_file.filename, delete=True, rename=True, temporary_filename=current_file.filename + u'.tmp')) else: self.tasks.append(FileTask(current_file.filename, fopen=True)) self.tasks.extend(chunk_tasks) self.tasks.append(FileTask(current_file.filename, close=True)) # check if runtime cache size has changed if current_cache_size > last_cache_size: self.log.debug(f' * New maximum cache size: {current_cache_size / 1024 / 1024:.02f} MiB') last_cache_size = current_cache_size self.log.debug(f'Final cache size requirement: {last_cache_size / 1024 / 1024} MiB.') analysis_res.min_memory = last_cache_size + (1024 * 1024 * 32) # add some padding just to be safe # Todo implement on-disk caching to avoid this issue. if analysis_res.min_memory > self.max_shared_memory: shared_mib = f'{self.max_shared_memory / 1024 / 1024:.01f} MiB' required_mib = f'{analysis_res.min_memory / 1024 / 1024:.01f} MiB' suggested_mib = round(self.max_shared_memory / 1024 / 1024 + (analysis_res.min_memory - self.max_shared_memory) / 1024 / 1024 + 32) if processing_optimization: message = f'Try running legendary with "--enable-reordering --max-shared-memory {suggested_mib:.0f}"' else: message = 'Try running legendary with "--enable-reordering" to reduce memory usage, ' \ f'or use "--max-shared-memory {suggested_mib:.0f}" to increase the limit.' raise MemoryError(f'Current shared memory cache is smaller than required: {shared_mib} < {required_mib}. ' + message) # calculate actual dl and patch write size. analysis_res.dl_size = \ sum(c.file_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list) analysis_res.uncompressed_dl_size = \ sum(c.window_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list) # add jobs to remove files for fname in mc.removed: self.tasks.append(FileTask(fname, delete=True)) self.tasks.extend(additional_deletion_tasks) analysis_res.num_chunks_cache = len(dl_cache_guids) self.chunk_data_list = manifest.chunk_data_list self.analysis = analysis_res return analysis_res

Exemplo n.º 2

0

Exibir arquivo

def run_analysis(self, manifest: Manifest, old_manifest: Manifest = None, patch=True, resume=True, file_prefix_filter=None, file_exclude_filter=None, file_install_tag=None) -> AnalysisResult: """ Run analysis on manifest and old manifest (if not None) and return a result with a summary resources required in order to install the provided manifest. :param manifest: Manifest to install :param old_manifest: Old manifest to patch from (if applicable) :param patch: Patch instead of redownloading the entire file :param resume: Continue based on resume file if it exists :param file_prefix_filter: Only download files that start with this prefix :param file_exclude_filter: Exclude files with this prefix from download :return: AnalysisResult """ analysis_res = AnalysisResult() analysis_res.install_size = sum( fm.file_size for fm in manifest.file_manifest_list.elements) analysis_res.biggest_chunk = max( c.window_size for c in manifest.chunk_data_list.elements) analysis_res.biggest_file_size = max( f.file_size for f in manifest.file_manifest_list.elements) is_1mib = analysis_res.biggest_chunk == 1024 * 1024 self.log.debug( f'Biggest chunk size: {analysis_res.biggest_chunk} bytes (== 1 MiB? {is_1mib})' ) self.log.debug(f'Creating manifest comparison...') mc = ManifestComparison.create(manifest, old_manifest) analysis_res.manifest_comparison = mc if resume and self.resume_file and os.path.exists(self.resume_file): try: completed_files = set( i.strip() for i in open(self.resume_file).readlines()) # remove completed files from changed/added and move them to unchanged for the analysis. mc.added -= completed_files mc.changed -= completed_files mc.unchanged |= completed_files self.log.debug( f'Skipped {len(completed_files)} files based on resume data!' ) except Exception as e: self.log.warning( f'Reading resume file failed: {e!r}, continuing as normal...' ) # Not entirely sure what install tags are used for, only some titles have them. # Let's add it for testing anyway. if file_install_tag: files_to_skip = set(i.filename for i in manifest.file_manifest_list.elements if file_install_tag not in i.install_tags) self.log.info( f'Found {len(files_to_skip)} files to skip based on install tag.' ) mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip # if include/exclude prefix has been set: mark all files that are not to be downloaded as unchanged if file_exclude_filter: file_exclude_filter = file_exclude_filter.lower() files_to_skip = set(i for i in mc.added | mc.changed if i.lower().startswith(file_exclude_filter)) self.log.info( f'Found {len(files_to_skip)} files to skip based on exclude prefix.' ) mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip if file_prefix_filter: file_prefix_filter = file_prefix_filter.lower() files_to_skip = set( i for i in mc.added | mc.changed if not i.lower().startswith(file_prefix_filter)) self.log.info( f'Found {len(files_to_skip)} files to skip based on include prefix.' ) mc.added -= files_to_skip mc.changed -= files_to_skip mc.unchanged |= files_to_skip if file_prefix_filter or file_exclude_filter or file_install_tag: self.log.info( f'Remaining files after filtering: {len(mc.added) + len(mc.changed)}' ) # correct install size after filtering analysis_res.install_size = sum( fm.file_size for fm in manifest.file_manifest_list.elements if fm.filename in mc.added) if mc.removed: analysis_res.removed = len(mc.removed) self.log.debug(f'{analysis_res.removed} removed files') if mc.added: analysis_res.added = len(mc.added) self.log.debug(f'{analysis_res.added} added files') if mc.changed: analysis_res.changed = len(mc.changed) self.log.debug(f'{analysis_res.changed} changed files') if mc.unchanged: analysis_res.unchanged = len(mc.unchanged) self.log.debug(f'{analysis_res.unchanged} unchanged files') # count references to chunks for determining runtime cache size later references = Counter() for fm in manifest.file_manifest_list.elements: # chunks of unchanged files are not downloaded so we can skip them if fm.filename in mc.unchanged: analysis_res.unchanged += fm.file_size continue for cp in fm.chunk_parts: references[cp.guid_num] += 1 # determine reusable chunks and prepare lookup table for reusable ones re_usable = defaultdict(dict) if old_manifest and mc.changed and patch: self.log.debug('Analyzing manifests for re-usable chunks...') for changed in mc.changed: old_file = old_manifest.file_manifest_list.get_file_by_path( changed) new_file = manifest.file_manifest_list.get_file_by_path( changed) existing_chunks = dict() off = 0 for cp in old_file.chunk_parts: existing_chunks[(cp.guid_num, cp.offset, cp.size)] = off off += cp.size for cp in new_file.chunk_parts: key = (cp.guid_num, cp.offset, cp.size) if key in existing_chunks: references[cp.guid_num] -= 1 re_usable[changed][key] = existing_chunks[key] analysis_res.reuse_size += cp.size last_cache_size = current_cache_size = 0 # set to determine whether a file is currently cached or not cached = set() # Using this secondary set is orders of magnitude faster than checking the deque. chunks_in_dl_list = set() # This is just used to count all unique guids that have been cached dl_cache_guids = set() # run through the list of files and create the download jobs and also determine minimum # runtime cache requirement by simulating adding/removing from cache during download. self.log.debug('Creating filetasks and chunktasks...') for current_file in sorted(manifest.file_manifest_list.elements, key=lambda a: a.filename.lower()): # skip unchanged and empty files if current_file.filename in mc.unchanged: continue elif not current_file.chunk_parts: self.tasks.append(FileTask(current_file.filename, empty=True)) continue existing_chunks = re_usable.get(current_file.filename, None) chunk_tasks = [] reused = 0 for cp in current_file.chunk_parts: ct = ChunkTask(cp.guid_num, cp.offset, cp.size) # re-use the chunk from the existing file if we can if existing_chunks and (cp.guid_num, cp.offset, cp.size) in existing_chunks: reused += 1 ct.chunk_file = current_file.filename ct.chunk_offset = existing_chunks[(cp.guid_num, cp.offset, cp.size)] else: # add to DL list if not already in it if cp.guid_num not in chunks_in_dl_list: self.chunks_to_dl.append(cp.guid_num) chunks_in_dl_list.add(cp.guid_num) # if chunk has more than one use or is already in cache, # check if we need to add or remove it again. if references[cp.guid_num] > 1 or cp.guid_num in cached: references[cp.guid_num] -= 1 # delete from cache if no references left if references[cp.guid_num] < 1: current_cache_size -= analysis_res.biggest_chunk cached.remove(cp.guid_num) ct.cleanup = True # add to cache if not already cached elif cp.guid_num not in cached: dl_cache_guids.add(cp.guid_num) cached.add(cp.guid_num) current_cache_size += analysis_res.biggest_chunk else: ct.cleanup = True chunk_tasks.append(ct) if reused: self.log.debug( f' + Reusing {reused} chunks from: {current_file.filename}' ) # open temporary file that will contain download + old file contents self.tasks.append( FileTask(current_file.filename + u'.tmp', fopen=True)) self.tasks.extend(chunk_tasks) self.tasks.append( FileTask(current_file.filename + u'.tmp', close=True)) # delete old file and rename temproary self.tasks.append( FileTask(current_file.filename, delete=True, rename=True, temporary_filename=current_file.filename + u'.tmp')) else: self.tasks.append(FileTask(current_file.filename, fopen=True)) self.tasks.extend(chunk_tasks) self.tasks.append(FileTask(current_file.filename, close=True)) # check if runtime cache size has changed if current_cache_size > last_cache_size: self.log.debug( f' * New maximum cache size: {current_cache_size / 1024 / 1024:.02f} MiB' ) last_cache_size = current_cache_size self.log.debug( f'Final cache size requirement: {last_cache_size / 1024 / 1024} MiB.' ) analysis_res.min_memory = last_cache_size + ( 1024 * 1024 * 32) # add some padding just to be safe # Todo implement on-disk caching to avoid this issue. if analysis_res.min_memory > self.max_shared_memory: shared_mib = f'{self.max_shared_memory / 1024 / 1024:.01f} MiB' required_mib = f'{analysis_res.min_memory / 1024 / 1024:.01} MiB' raise MemoryError( f'Current shared memory cache is smaller than required! {shared_mib} < {required_mib}' ) # calculate actual dl and patch write size. analysis_res.dl_size = \ sum(c.file_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list) analysis_res.uncompressed_dl_size = \ sum(c.window_size for c in manifest.chunk_data_list.elements if c.guid_num in chunks_in_dl_list) # add jobs to remove files for fname in mc.removed: self.tasks.append(FileTask(fname, delete=True)) analysis_res.num_chunks_cache = len(dl_cache_guids) self.chunk_data_list = manifest.chunk_data_list self.analysis = analysis_res return analysis_res