def read_file_or_url(in_file_or_url, path_searcher=None, encoding='utf-8', save_to_path=None, checksum=None): need_to_download = not utils.check_file_checksum(save_to_path, checksum) if not need_to_download: # if save_to_path contains the correct data just read it by recursively # calling read_file_or_url return read_file_or_url(save_to_path, encoding=encoding) match = protocol_header_re.match(in_file_or_url) if not match: # it's a local file local_file_path = in_file_or_url if path_searcher is not None: local_file_path = path_searcher.find_file(local_file_path) if local_file_path: if 'Win' in utils.get_current_os_names(): local_file_path = os.path.abspath(local_file_path) else: local_file_path = os.path.realpath(local_file_path) else: raise FileNotFoundError("Could not locate local file", local_file_path) if encoding is None: fd = open(local_file_path, "rb") else: fd = open(local_file_path, "r", encoding=encoding) buffer = fd.read() else: session = pyinstl.connectionBase.connection_factory().get_session(in_file_or_url) response = session.get(in_file_or_url, timeout=(33.05, 180.05)) response.raise_for_status() buffer = response.text buffer = utils.unicodify(buffer) # make sure text is unicode if save_to_path and in_file_or_url != save_to_path: with open(save_to_path, "w") as wfd: wfd.write(buffer) return buffer
def download_and_cache_file_or_url(in_url, cache_folder, translate_url_callback=None, expected_checksum=None): """ download file to given cache folder if checksum is supplied and the a file with that checksum exists in cache folder - download can be avoided otherwise download the file :return: path of the downloaded file """ if os.path.isfile(cache_folder): # happens sometimes... safe_remove_file(cache_folder) if not os.path.isdir(cache_folder): os.makedirs(cache_folder, exist_ok=True) url_file_name = last_url_item(in_url) cached_file_name = expected_checksum if expected_checksum else url_file_name cached_file_path = os.path.join(cache_folder, cached_file_name) if expected_checksum is None: # no checksum? -> force download safe_remove_file(cached_file_path) if os.path.isfile(cached_file_path): # file exists? -> make sure it has the right checksum if not utils.check_file_checksum(cached_file_path, expected_checksum): safe_remove_file(cached_file_path) if not os.path.isfile(cached_file_path): # need to download contents_buffer = read_from_file_or_url(in_url, translate_url_callback, expected_checksum, encoding=None) if contents_buffer: with open(cached_file_path, "wb") as wfd: make_open_file_read_write_for_all(wfd) wfd.write(contents_buffer) return cached_file_path
def read_include_node(self, i_node, *args, **kwargs): if i_node.isScalar(): kwargs['original-path-to-file'] = i_node.value resolved_file_name = config_vars.resolve_str(i_node.value) self.read_yaml_file(resolved_file_name, *args, **kwargs) elif i_node.isSequence(): for sub_i_node in i_node: self.read_include_node(sub_i_node, *args, **kwargs) elif i_node.isMapping(): if "url" in i_node: file_was_downloaded_and_read = False kwargs['original-path-to-file'] = i_node["url"].value resolved_file_url = config_vars.resolve_str( i_node["url"].value) expected_checksum = None if "checksum" in i_node: expected_checksum = config_vars.resolve_str( i_node["checksum"].value) try: file_path = utils.download_from_file_or_url( in_url=resolved_file_url, config_vars=config_vars, in_target_path=None, translate_url_callback=connectionBase.translate_url, cache_folder=self.get_aux_cache_dir(make_dir=True), expected_checksum=expected_checksum) self.read_yaml_file(file_path, *args, **kwargs) file_was_downloaded_and_read = True except (FileNotFoundError, urllib.error.URLError): ignore = kwargs.get('ignore_if_not_exist', False) if ignore: self.progress( f"'ignore_if_not_exist' specified, ignoring FileNotFoundError for {resolved_file_url}" ) else: raise if "copy" in i_node and file_was_downloaded_and_read: self.batch_accum.set_current_section('post') for copy_destination in i_node["copy"]: need_to_copy = True destination_file_resolved_path = utils.ExpandAndResolvePath( config_vars.resolve_str(copy_destination.value)) if destination_file_resolved_path.is_file( ) and expected_checksum is not None: checksums_match = utils.check_file_checksum( file_path=destination_file_resolved_path, expected_checksum=expected_checksum) need_to_copy = not checksums_match if need_to_copy: self.batch_accum += MakeDir( destination_file_resolved_path.parent, chowner=True) self.batch_accum += CopyFileToFile( file_path, destination_file_resolved_path, hard_links=False, copy_owner=True)
def read_include_node(self, i_node, *args, **kwargs): if i_node.isScalar(): resolved_file_name = var_stack.ResolveStrToStr(i_node.value) self.read_yaml_file(resolved_file_name, *args, **kwargs) elif i_node.isSequence(): for sub_i_node in i_node: self.read_include_node(sub_i_node, *args, **kwargs) elif i_node.isMapping(): if "url" in i_node: cached_files_dir = self.get_default_sync_dir(continue_dir="cache", make_dir=True) resolved_file_url = var_stack.ResolveStrToStr(i_node["url"].value) cached_file_path = None expected_checksum = None if "checksum" in i_node: expected_checksum = var_stack.ResolveStrToStr(i_node["checksum"].value) cached_file_path = os.path.join(cached_files_dir, expected_checksum) expected_signature = None public_key_text = None if "sig" in i_node: expected_signature = var_stack.ResolveStrToStr(i_node["sig"].value) public_key_text = self.provision_public_key_text() if expected_checksum is None: self.read_yaml_file(resolved_file_url, *args, **kwargs) cached_file_path = resolved_file_url else: try: utils.download_from_file_or_url(resolved_file_url,cached_file_path, connectionBase.translate_url, cache=True, public_key=public_key_text, textual_sig=expected_signature, expected_checksum=expected_checksum) self.read_yaml_file(cached_file_path, *args, **kwargs) except (FileNotFoundError, urllib.error.URLError): ignore = kwargs.get('ignore_if_not_exist', False) if ignore: print("'ignore_if_not_exist' specified, ignoring FileNotFoundError for", resolved_file_url) else: raise if "copy" in i_node: self.batch_accum.set_current_section('post') for copy_destination in i_node["copy"]: need_to_copy = True destination_file_resolved_path = var_stack.ResolveStrToStr(copy_destination.value) if os.path.isfile(destination_file_resolved_path) and expected_checksum is not None: checksums_match = utils.check_file_checksum(file_path=destination_file_resolved_path, expected_checksum=expected_checksum) need_to_copy = not checksums_match if need_to_copy: destination_folder, destination_file_name = os.path.split(copy_destination.value) self.batch_accum += self.platform_helper.mkdir(destination_folder) self.batch_accum += self.platform_helper.copy_tool.copy_file_to_file(cached_file_path, var_stack.ResolveStrToStr(copy_destination.value), link_dest=True)
def can_copy_be_avoided(self, dir_item, source_items): retVal = False if "__REPAIR_INSTALLED_ITEMS__" not in self.main_install_targets: # look for Info.xml as first choice, Info.plist is seconds choice info_item = next((i for i in source_items if i.leaf=="Info.xml"), None) or next((i for i in source_items if i.leaf=="Info.plist"), None) if info_item: # no info item - return False destination_folder = var_stack.ResolveStrToStr(self.current_destination_folder) dir_item_parent, dir_item_leaf = os.path.split(var_stack.ResolveStrToStr(dir_item.path)) info_item_abs_path = os.path.join(destination_folder, dir_item_leaf, info_item.path[len(dir_item.path)+1:]) retVal = utils.check_file_checksum(info_item_abs_path, info_item.checksum) return retVal
def __call__(self, *args, **kwargs): PythonBatchCommandBase.__call__(self, *args, **kwargs) session = kwargs['session'] with MakeDir(self.path.parent, report_own_progress=False) as dir_maker: dir_maker() with open(self.path, "wb") as fo: timeout_seconds = int(config_vars.get("CURL_MAX_TIME", 480)) read_data = session.get(self.url, timeout=timeout_seconds) read_data.raise_for_status() # must raise in case of an error. Server might return json/xml with error details, we do not want that fo.write(read_data.content) checksum_ok = utils.check_file_checksum(self.path, self.checksum) if not checksum_ok: raise ValueError(f"bad checksum for {self.path} even after re-download")
def unwtar_with_checks(tar_files, target_folder, tar_real_name): with utils.Timer_CM("unwtar_with_checks") as utc: ok_files = 0 to_untar_files = 0 with MultiFileReader("br", tar_files) as fd: with tarfile.open(fileobj=fd) as tar: the_pax_headers = tar.pax_headers for item in tar.getmembers(): checksum_good = utils.check_file_checksum(os.path.join(target_folder, item.path), the_pax_headers[item.path]) if not checksum_good: to_untar_files += 1 tar.extract(item, target_folder) else: ok_files += 1 print(" ", "unwtar_with_checks:", tar_files[0], to_untar_files, "files unwtarred,", ok_files, "not unwtarred")
def read_file_or_url(in_file_or_url, config_vars, path_searcher=None, encoding='utf-8', save_to_path=None, checksum=None, connection_obj=None): need_to_download = not utils.check_file_checksum(save_to_path, checksum) if not need_to_download: # if save_to_path contains the correct data just read it by recursively # calling read_file_or_url return read_file_or_url(save_to_path, config_vars, encoding=encoding) match = protocol_header_re.match(os.fspath(in_file_or_url)) actual_file_path = in_file_or_url if not match: # it's a local file if path_searcher is not None: actual_file_path = path_searcher.find_file(actual_file_path) if actual_file_path: if 'Win' in utils.get_current_os_names(): actual_file_path = os.path.abspath(actual_file_path) else: actual_file_path = os.path.realpath(actual_file_path) else: raise FileNotFoundError( f"Could not locate local file {in_file_or_url}") if encoding is None: read_mod = "rb" else: read_mod = "r" with open(actual_file_path, "r", encoding=encoding) as rdf: buffer = rdf.read() else: assert connection_obj, "no connection_obj given" session = connection_obj.get_session(in_file_or_url) response = session.get(in_file_or_url, timeout=(33.05, 180.05)) response.raise_for_status() buffer = response.text buffer = utils.unicodify(buffer) # make sure text is unicode if save_to_path and in_file_or_url != save_to_path: with open(save_to_path, "w") as wfd: utils.chown_chmod_on_fd(wfd) wfd.write(buffer) return buffer, actual_file_path
def download_and_cache_file_or_url(in_url, config_vars, cache_folder: Path, translate_url_callback=None, expected_checksum=None): """ download file to given cache folder if checksum is supplied and the a file with that checksum exists in cache folder - download can be avoided otherwise download the file :return: path of the downloaded file """ if cache_folder.is_file(): # happens sometimes... safe_remove_file(cache_folder) cache_folder.mkdir(parents=True, exist_ok=True) url_file_name = last_url_item(in_url) cached_file_name = expected_checksum if expected_checksum else url_file_name cached_file_path = cache_folder.joinpath(cached_file_name) if expected_checksum is None: # no checksum? -> force download safe_remove_file(cached_file_path) if cached_file_path.is_file( ): # file exists? -> make sure it has the right checksum if not utils.check_file_checksum(cached_file_path, expected_checksum): safe_remove_file(cached_file_path) if not cached_file_path.is_file(): # need to download contents_buffer = read_from_file_or_url(in_url, config_vars, translate_url_callback, expected_checksum, encoding=None) if contents_buffer: with open(cached_file_path, "wb") as wfd: chown_chmod_on_fd(wfd) wfd.write(contents_buffer) return cached_file_path
def set_sync_locations_for_active_items(self): # get_sync_folders_and_sources_for_active_iids returns: [(iid, direct_sync_indicator, source, source_tag, install_folder),...] # direct_sync_indicator will be None unless the items has "direct_sync" section in index.yaml # source is the relative path as it appears in index.yaml # adjusted source is the source prefixed with $(SOURCE_PREFIX) -- it needed # source_tag is one of '!dir', '!dir_cont', '!file' # install_folder is where the sources should be copied to OR, in case of direct syn where they should be synced to # install_folder will be None for those items that require only sync not copy (such as Icons) # # for each file item in the source this function will set the full path where to download the file: item.download_path # and the top folder common to all items in a single source: item.download_root sync_and_source = self.items_table.get_sync_folders_and_sources_for_active_iids( ) items_to_update = list() local_repo_sync_dir = os.fspath(config_vars["LOCAL_REPO_SYNC_DIR"]) config_vars.setdefault("ALL_SYNC_DIRS", local_repo_sync_dir) for iid, direct_sync_indicator, source, source_tag, install_folder in sync_and_source: direct_sync = self.get_direct_sync_status_from_indicator( direct_sync_indicator) resolved_source_parts = source.split("/") if install_folder: resolved_install_folder = config_vars.resolve_str( install_folder) else: resolved_install_folder = install_folder if source_tag in ('!dir', '!dir_cont'): if direct_sync: # for direct-sync source, if one of the sources is Info.xml and it exists on disk AND source & file # have the same checksum, then no sync is needed at all. All the above is not relevant in repair mode. need_to_sync = True if not self.update_mode: info_xml_item = self.info_map_table.get_file_item( "/".join((source, "Info.xml"))) if info_xml_item: info_xml_of_target = config_vars.resolve_str( "/".join( (resolved_install_folder, resolved_source_parts[-1], "Info.xml"))) need_to_sync = not utils.check_file_checksum( info_xml_of_target, info_xml_item.checksum) if need_to_sync: config_vars["ALL_SYNC_DIRS"].append( resolved_install_folder) item_paths = self.info_map_table.get_recursive_paths_in_dir( dir_path=source, what="any") self.progress( f"mark for download {len(item_paths)} files of {iid}/{source}" ) if source_tag == '!dir': source_parent = "/".join( resolved_source_parts[:-1]) for item in item_paths: items_to_update.append({ "_id": item['_id'], "download_path": config_vars.resolve_str("/".join( (resolved_install_folder, item['path'][len(source_parent) + 1:]))), "download_root": config_vars.resolve_str("/".join( (resolved_install_folder, resolved_source_parts[-1]))) }) else: # !dir_cont source_parent = source for item in item_paths: items_to_update.append({ "_id": item['_id'], "download_path": config_vars.resolve_str("/".join( (resolved_install_folder, item['path'][len(source_parent) + 1:]))), "download_root": resolved_install_folder }) else: num_ignored_files = self.info_map_table.ignore_file_paths_of_dir( dir_path=source) if num_ignored_files < 1: num_ignored_files = "" # sqlite curs.rowcount does not always returns the number of effected rows self.progress( f"avoid download {num_ignored_files} files of {iid}, Info.xml has not changed" ) else: item_paths = self.info_map_table.get_recursive_paths_in_dir( dir_path=source) self.progress( f"mark for download {len(item_paths)} files of {iid}/{source}" ) for item in item_paths: items_to_update.append({ "_id": item['_id'], "download_path": config_vars.resolve_str("/".join( (local_repo_sync_dir, item['path']))), "download_root": None }) elif source_tag == '!file': # if the file was wtarred and split it would have multiple items items_for_file = self.info_map_table.get_required_paths_for_file( source) self.progress( f"mark for download {len(items_for_file)} files of {iid}/{source}" ) if direct_sync: config_vars["ALL_SYNC_DIRS"].append( resolved_install_folder) for item in items_for_file: items_to_update.append({ "_id": item['_id'], "download_path": config_vars.resolve_str("/".join( (resolved_install_folder, item['leaf']))), "download_root": config_vars.resolve_str(item.download_path) }) else: for item in items_for_file: items_to_update.append( { "_id": item['_id'], "download_path": config_vars.resolve_str("/".join( (local_repo_sync_dir, item['path']))), "download_root": None } ) # no need to set item.download_root here - it will not be used self.info_map_table.update_downloads(items_to_update)
def set_sync_locations_for_active_items(self): # get_sync_folders_and_sources_for_active_iids returns: [(iid, direct_sync_indicator, source, source_tag, install_folder),...] # direct_sync_indicator will be None unless the items has "direct_sync" section in index.yaml # source is the relative path as it appears in index.yaml # adjusted source is the source prefixed with $(SOURCE_PREFIX) -- it needed # source_tag is one of '!dir', '!dir_cont', '!file' # install_folder is where the sources should be copied to OR, in case of direct syn where they should be synced to # install_folder will be None for those items that require only sync not copy (such as Icons) # # for each file item in the source this function will set the full path where to download the file: item.download_path # and the top folder common to all items in a single source: item.download_root sync_and_source = self.items_table.get_sync_folders_and_sources_for_active_iids() items_to_update = list() for iid, direct_sync_indicator, source, source_tag, install_folder in sync_and_source: direct_sync = self.get_direct_sync_status_from_indicator(direct_sync_indicator) resolved_source_parts = source.split("/") if install_folder: resolved_install_folder = var_stack.ResolveStrToStr(install_folder) else: resolved_install_folder = install_folder local_repo_sync_dir = var_stack.ResolveVarToStr("LOCAL_REPO_SYNC_DIR") if source_tag in ('!dir', '!dir_cont'): if direct_sync: # for direct-sync source, if one of the sources is Info.xml and it exists on disk AND source & file # have the same checksum, then no sync is needed at all. All the above is not relevant in repair mode. need_to_sync = True if "__REPAIR_INSTALLED_ITEMS__" not in self.main_install_targets: info_xml_item = self.info_map_table.get_file_item("/".join((source, "Info.xml"))) if info_xml_item: info_xml_of_target = var_stack.ResolveStrToStr("/".join((resolved_install_folder, resolved_source_parts[-1], "Info.xml"))) need_to_sync = not utils.check_file_checksum(info_xml_of_target, info_xml_item.checksum) if need_to_sync: item_paths = self.info_map_table.get_file_paths_of_dir(dir_path=source) if source_tag == '!dir': source_parent = "/".join(resolved_source_parts[:-1]) for item in item_paths: items_to_update.append({"_id": item['_id'], "download_path": var_stack.ResolveStrToStr("/".join((resolved_install_folder, item['path'][len(source_parent)+1:]))), "download_root": var_stack.ResolveStrToStr("/".join((resolved_install_folder, resolved_source_parts[-1])))}) else: # !dir_cont source_parent = source for item in item_paths: items_to_update.append({"_id": item['_id'], "download_path": var_stack.ResolveStrToStr("/".join((resolved_install_folder, item['path'][len(source_parent)+1:]))), "download_root": resolved_install_folder}) else: num_ignored_files = self.info_map_table.ignore_file_paths_of_dir(dir_path=source) if num_ignored_files < 1: num_ignored_files = "" # sqlite curs.rowcount does not always returns the number of effected rows self.progress("avoid download {} files of {}, Info.xml has not changed".format(num_ignored_files, iid)) else: item_paths = self.info_map_table.get_file_paths_of_dir(dir_path=source) for item in item_paths: items_to_update.append({"_id": item['_id'], "download_path": var_stack.ResolveStrToStr("/".join((local_repo_sync_dir, item['path']))), "download_root": None}) elif source_tag == '!file': # if the file was wtarred and split it would have multiple items items_for_file = self.info_map_table.get_required_paths_for_file(source) if direct_sync: for item in items_for_file: items_to_update.append({"_id": item['_id'], "download_path": var_stack.ResolveStrToStr("/".join((resolved_install_folder, item['leaf']))), "download_root": var_stack.ResolveStrToStr(item.download_path)}) else: for item in items_for_file: items_to_update.append({"_id": item['_id'], "download_path": var_stack.ResolveStrToStr("/".join((local_repo_sync_dir, item['path']))), "download_root": None}) # no need to set item.download_root here - it will not be used self.info_map_table.update_downloads(items_to_update)