def save_skeleton(path: PathLike, adjacency: List[List[int]], coordinates: np.ndarray): path = Path(path) assert not path.exists() skeleton = {'adjacency': adjacency, 'coordinates': coordinates.tolist()} with path.open('wb') as file: pickle.dump(skeleton, file)
def write_loom(filename: PathLike, adata: AnnData, write_obsm_varm: bool = False): filename = Path(filename) row_attrs = {k: np.array(v) for k, v in adata.var.to_dict("list").items()} row_names = adata.var_names row_dim = row_names.name if row_names.name is not None else "var_names" row_attrs[row_dim] = row_names.values col_attrs = {k: np.array(v) for k, v in adata.obs.to_dict("list").items()} col_names = adata.obs_names col_dim = col_names.name if col_names.name is not None else "obs_names" col_attrs[col_dim] = col_names.values if adata.X is None: raise ValueError("loompy does not accept empty matrices as data") if write_obsm_varm: for key in adata.obsm.keys(): col_attrs[key] = adata.obsm[key] for key in adata.varm.keys(): row_attrs[key] = adata.varm[key] elif len(adata.obsm.keys()) > 0 or len(adata.varm.keys()) > 0: logger.warning( f"The loom file will lack these fields:\n" f"{adata.obsm.keys() | adata.varm.keys()}\n" f"Use write_obsm_varm=True to export multi-dimensional annotations" ) layers = {"": adata.X.T} for key in adata.layers.keys(): layers[key] = adata.layers[key].T from loompy import create if filename.exists(): filename.unlink() create(fspath(filename), layers, row_attrs=row_attrs, col_attrs=col_attrs)
def __init__(self, path: os.PathLike = pathlib.Path.home() / ".scidd_cache", name: str = "_SciDD_API_Cache.sqlite"): self._dbFilepath = path / name # the full path + filename for the cache # create database path if needed if not path.exists(): try: os.makedirs(path) except FileExistsError as e: logger.debug( f"Path '{path}' appears not to exist, but 'os.makedirs(path)' is raising FileExistsError: {e}" ) except OSError as e: raise OSError( f"Unable to create specified path '{path}'; error: {e} ") if path.is_symlink(): # or os.path.islink(fp) if not os.path.exists(os.readlink(path)): # broken link raise Exception( f"The path where the SciDD cache is expected ('{path}') is symlink pointing to a target that is no longer there. " + "Either remove the symlink or fix the destination.") self._initialize_database()
def create_dump_file(self, workingdirypath: PathLike, filepath: PathLike) -> bool: if not isinstance(filepath, Path): return False ext = filepath.suffix if ext not in ['.sql', '.txt', '.dump', '.zip']: return False if not isinstance(workingdirypath, Path) or not workingdirypath.is_dir: return False try: tmpfilepath = workingdirypath.joinpath(filepath.stem + '.sql') cmd = 'mysqldump -h {} -P {} -u {} -p{} --skip-comments {} > {}'.format( self.dbparams.db_host, self.dbparams.db_port, self.dbparams.db_user, self.dbparams.db_passwd, self.dbparams.db_name, str(tmpfilepath.absolute())) subprocess.check_call(cmd, shell=True) if ext == '.zip': with ZipFile(filepath, 'w', ZIP_DEFLATED) as outfile: outfile.write(tmpfilepath, tmpfilepath.name) tmpfilepath.unlink() else: shutil.move(str(tmpfilepath.absolute()), str(filepath.absolute())) except Exception: return False return True
def _add_header_to_file( path: PathLike, spdx_info: SpdxInfo, template: Template, template_is_commented: bool, style: Optional[str], force_multi: bool = False, out=sys.stdout, ) -> int: """Helper function.""" # pylint: disable=too-many-arguments result = 0 if style is not None: style = NAME_STYLE_MAP[style] else: style = _get_comment_style(path) if style is None: out.write(_("Skipped unrecognised file {path}").format(path=path)) out.write("\n") return result with path.open("r", encoding="utf-8", newline="") as fp: text = fp.read() # Detect and remember line endings for later conversion. line_ending = detect_line_endings(text) # Normalise line endings. text = text.replace(line_ending, "\n") try: output = find_and_replace_header( text, spdx_info, template=template, template_is_commented=template_is_commented, style=style, force_multi=force_multi, ) except CommentCreateError: out.write( _("Error: Could not create comment for '{path}'").format( path=path)) out.write("\n") result = 1 except MissingSpdxInfo: out.write( _("Error: Generated comment header for '{path}' is missing" " copyright lines or license expressions. The template is" " probably incorrect. Did not write new header.").format( path=path)) out.write("\n") result = 1 else: with path.open("w", encoding="utf-8", newline=line_ending) as fp: fp.write(output) # TODO: This may need to be rephrased more elegantly. out.write(_("Successfully changed header of {path}").format(path=path)) out.write("\n") return result
def init_dir(adir: PathLike, exist_ok=False, parents=False, rmtree=False): adir = Path(adir) if adir.is_dir(): if rmtree: shutil.rmtree(adir) adir.mkdir(exist_ok=exist_ok, parents=parents) return adir
def put_license_in_file(spdx_identifier: str, root: PathLike = None, destination: PathLike = None) -> None: """Download a license and put it in the correct file. This function exists solely for convenience. :param spdx_identifier: SPDX identifier of the license. :param root: The root of the project. :param destination: An override path for the destination of the license. :raises requests.RequestException: if the license could not be downloaded. :raises FileExistsError: if the license file already exists. """ header = "" if destination is None: licenses_path = find_licenses_directory(root=root) licenses_path.mkdir(exist_ok=True) destination = licenses_path / "".join((spdx_identifier, ".txt")) else: is_exception = spdx_identifier in EXCEPTION_MAP header = ("Valid-{licexc}-Identifier: {identifier}\n" "{licexc}-Text:\n\n".format( identifier=spdx_identifier, licexc="Exception" if is_exception else "License", )) destination = Path(destination) if destination.exists(): raise FileExistsError(errno.EEXIST, "File exists", str(destination)) text = download_license(spdx_identifier) with destination.open("w") as fp: fp.write(header) fp.write(text)
def write_loom(filename: PathLike, adata: AnnData, write_obsm_varm: bool = False): filename = Path(filename) row_attrs = {k: np.array(v) for k, v in adata.var.to_dict('list').items()} row_attrs['var_names'] = adata.var_names.values col_attrs = {k: np.array(v) for k, v in adata.obs.to_dict('list').items()} col_attrs['obs_names'] = adata.obs_names.values if adata.X is None: raise ValueError('loompy does not accept empty matrices as data') if write_obsm_varm: for key in adata.obsm.keys(): col_attrs[key] = adata.obsm[key] for key in adata.varm.keys(): row_attrs[key] = adata.varm[key] else: if len(adata.obsm.keys()) > 0 or len(adata.varm.keys()) > 0: logger.warning( 'The loom file will lack these fields:\n{}\n' 'Use write_obsm_varm=True to export multi-dimensional annotations' .format(adata.obsm.keys() + adata.varm.keys())) layers = {'': adata.X.T} for key in adata.layers.keys(): layers[key] = adata.layers[key].T from loompy import create if filename.exists(): filename.unlink() create(fspath(filename), layers, row_attrs=row_attrs, col_attrs=col_attrs)
def extract_download( url: str, directory: PathLike, filenames: [str] = None, known_hash: str = None, overwrite: bool = False, ): if not isinstance(directory, Path): directory = Path(directory) if filenames is None: filenames = [] if not directory.exists(): directory.mkdir(parents=True, exist_ok=True) temporary_filename = directory / 'temp.tar.gz' logging.debug(f'downloading {url} -> {temporary_filename}') temporary_filename = pooch.retrieve(url, known_hash=known_hash, fname=temporary_filename) logging.debug(f'extracting {temporary_filename} -> {directory}') with tarfile.open(temporary_filename) as local_file: if len(filenames) > 0: for filename in filenames: if filename in local_file.getnames(): path = directory / filename if not path.exists() or overwrite: if path.exists(): os.remove(path) local_file.extract(filename, directory) else: local_file.extractall(directory)
def _write_aspect_lex(parsed_data: Union[str, PathLike], generated_aspect_lex: dict, out_dir: PathLike): parsed_docs = _load_parsed_docs_from_dir(parsed_data) aspect_dict = {} max_examples = 20 label = 'AS' for doc in parsed_docs.values(): for sent_text, _ in doc.sent_iter(): for term, lemma in generated_aspect_lex.items(): if term in sent_text.lower(): _find_aspect_in_sentence(term, lemma, sent_text, aspect_dict, label, max_examples, False) if lemma != '' and lemma in sent_text.lower(): _find_aspect_in_sentence(term, lemma, sent_text, aspect_dict, label, max_examples, True) # write aspect lex to file header_row = ["Term", "Alias1", "Alias2", "Alias3"] for k in range(1, max_examples + 1): header_row.append("Example" + str(k)) aspect_table = [header_row] for [term, lemma], sentences in aspect_dict.items(): term_row = [term, lemma, '', ''] for sent in sentences: term_row.append(sent) aspect_table.append(term_row) out_dir.mkdir(parents=True, exist_ok=True) out_file_path = out_dir / 'generated_aspect_lex.csv' _write_table(aspect_table, out_file_path) print('Aspect lexicon written to {}'.format(out_file_path))
def plot_dataset( dataset: xr.Dataset, num_chains: int = 8, therm_frac: float = 0., title: str = None, outdir: os.PathLike = None, subplots_kwargs: dict[str, Any] = None, plot_kwargs: dict[str, Any] = None, ext: str = 'png', ): plot_kwargs = {} if plot_kwargs is None else plot_kwargs subplots_kwargs = {} if subplots_kwargs is None else subplots_kwargs if outdir is None: import os tstamp = get_timestamp('%Y-%m-%d-%H%M%S') outdir = Path(os.getcwd()).joinpath('plots', f'plots-{tstamp}') outdir.mkdir(exist_ok=True, parents=True) for idx, (key, val) in enumerate(dataset.data_vars.items()): color = f'C{idx%9}' plot_kwargs['color'] = color fig, subfigs, ax = plot_metric( val=val.values, key=str(key), title=title, outdir=None, therm_frac=therm_frac, num_chains=num_chains, plot_kwargs=plot_kwargs, subplots_kwargs=subplots_kwargs, ) if outdir is not None: outfile = Path(outdir).joinpath(f'{key}.{ext}') Path(outfile.parent).mkdir(exist_ok=True, parents=True) outfile = outfile.as_posix() if subfigs is not None: # edgecolor = plt.rcParams['axes.edgecolor'] plt.rcParams['axes.edgecolor'] = plt.rcParams['axes.facecolor'] ax = subfigs[0].subplots(1, 1) # ax = fig[1].subplots(constrained_layout=True) cbar_kwargs = { # 'location': 'top', # 'orientation': 'horizontal', } im = val.plot(ax=ax, cbar_kwargs=cbar_kwargs) # ax.set_ylim(0, ) im.colorbar.set_label(f'{key}') # , labelpad=1.25) sns.despine(subfigs[0], top=True, right=True, left=True, bottom=True) if outdir is not None: print(f'Saving figure to: {outfile}') plt.savefig(outfile, dpi=400, bbox_inches='tight') else: fig.savefig(outfile, dpi=400, bbox_inches='tight') return dataset
def write_csvs( dirname: PathLike, adata: AnnData, skip_data: bool = True, sep: str = ',' ): """See :meth:`~anndata.AnnData.write_csvs`. """ dirname = Path(dirname) if dirname.suffix == '.csv': dirname = dirname.with_suffix('') logger.info("writing '.csv' files to %s", dirname) if not dirname.is_dir(): dirname.mkdir(parents=True, exist_ok=True) dir_uns = dirname / 'uns' if not dir_uns.is_dir(): dir_uns.mkdir(parents=True, exist_ok=True) d = dict( obs=adata._obs, var=adata._var, obsm=adata._obsm.to_df(), varm=adata._varm.to_df(), ) if not skip_data: d['X'] = pd.DataFrame( adata._X.toarray() if issparse(adata._X) else adata._X ) d_write = {**d, **adata._uns} not_yet_raised_sparse_warning = True for key, value in d_write.items(): if issparse(value): if not_yet_raised_sparse_warning: warnings.warn( 'Omitting to write sparse annotation.', WriteWarning ) not_yet_raised_sparse_warning = False continue filename = dirname if key not in {'X', 'var', 'obs', 'obsm', 'varm'}: filename = dir_uns filename /= f'{key}.csv' df = value if not isinstance(value, pd.DataFrame): value = np.array(value) if np.ndim(value) == 0: value = value[None] try: df = pd.DataFrame(value) except Exception as e: warnings.warn( f'Omitting to write {key!r} of type {type(e)}.', WriteWarning, ) continue df.to_csv( filename, sep=sep, header=key in {'obs', 'var', 'obsm', 'varm'}, index=key in {'obs', 'var'}, )
def __init__(self, path: os.PathLike): self._path = path self._update = False if path.exists(): with path.open("r") as fh: self._state = json.load(fh) else: self._state = None
def create_opmap_from_file(file_path: os.PathLike) -> Dict[str, int]: """Return an opcode map dictionary of OPNAME : OPCODE from a JSON file. The JSON file must enumerate a complete opmap for the specified Python version. Even if only a few bytes have been swapped, all operations and opcodes must have a value for the version specified. Parameters ---------- file_path : os.PathLike The path to the JSON remapping file. This file *must* follow this format. .. code-block:: { "python_version": "<major>.<minor>(.<patch>)", "remapped_opcodes": [ { "opcode": 1, "opname": "POP_TOP", "remapped_value": 5 }, { "opcode": 2, "opname": "ROT_TWO", "remapped_value": 4 }, ... Returns ------- Dict[str, int] A dictionary of OPNAME : OPCODE. For example:: { 'POP_TOP': 5, 'ROT_TWO': 4, ... } """ if not file_path.exists(): raise FileNotFoundError(file_path) remappings: Dict[str, int] = {} with file_path.open("r") as remapping_file: file_json: str = json.loads(remapping_file.read()) version: str = file_json["python_version"] subdict: Dict[str, Union[str, int]] for subdict in file_json["remapped_opcodes"]: remappings[subdict["opname"]] = subdict["remapped_value"] if not validate_opmap(version, remappings): raise RuntimeError("[!] Opcode map is not valid!") return remappings
def write_csvs(dirname: PathLike, adata: AnnData, skip_data: bool = True, sep: str = ","): """See :meth:`~anndata.AnnData.write_csvs`.""" dirname = Path(dirname) if dirname.suffix == ".csv": dirname = dirname.with_suffix("") logger.info(f"writing .csv files to {dirname}") if not dirname.is_dir(): dirname.mkdir(parents=True, exist_ok=True) dir_uns = dirname / "uns" if not dir_uns.is_dir(): dir_uns.mkdir(parents=True, exist_ok=True) d = dict( obs=adata._obs, var=adata._var, obsm=adata._obsm.to_df(), varm=adata._varm.to_df(), ) if not skip_data: d["X"] = pd.DataFrame( adata._X.toarray() if issparse(adata._X) else adata._X) d_write = {**d, **adata._uns} not_yet_raised_sparse_warning = True for key, value in d_write.items(): if issparse(value): if not_yet_raised_sparse_warning: warnings.warn("Omitting to write sparse annotation.", WriteWarning) not_yet_raised_sparse_warning = False continue filename = dirname if key not in {"X", "var", "obs", "obsm", "varm"}: filename = dir_uns filename /= f"{key}.csv" df = value if not isinstance(value, pd.DataFrame): value = np.array(value) if np.ndim(value) == 0: value = value[None] try: df = pd.DataFrame(value) except Exception as e: warnings.warn( f"Omitting to write {key!r} of type {type(e)}.", WriteWarning, ) continue df.to_csv( filename, sep=sep, header=key in {"obs", "var", "obsm", "varm"}, index=key in {"obs", "var"}, )
def repository_root(path: PathLike = None) -> Path: if path is None: path = __file__ if not isinstance(path, Path): path = Path(path) if path.is_file(): path = path.parent if '.git' in (child.name for child in path.iterdir()) or path == path.parent: return path else: return repository_root(path.parent)
def download_mesh(url: str, directory: PathLike, overwrite: bool = False): if not isinstance(directory, Path): directory = Path(directory) if not directory.exists(): directory.mkdir(parents=True, exist_ok=True) if not (directory / 'fort.14').exists() or overwrite: logging.info(f'downloading mesh files to {directory}') extract_download(url, directory, ['fort.13', 'fort.14']) return directory
def is_venv_python(interpreter: os.PathLike) -> bool: """Check if the given interpreter path is from a virtualenv""" interpreter = Path(interpreter) if interpreter.parent.parent.joinpath("pyvenv.cfg").exists(): return True if os.getenv("VIRTUAL_ENV"): try: interpreter.relative_to(os.getenv("VIRTUAL_ENV")) except ValueError: pass else: return True return False
def load_vol(filename: os.PathLike, dtype=np.float32, mmap_mode: str = None, shape: tuple = None) -> np.ndarray: """Load data from `.vol` file. The image shape is deduced from the `.vol.info` file. If this file is not present, the shape can be specified using the `shape` keyword. Parameters ---------- filename : os.PathLike Path to the file. dtype : dtype, optional Numpy dtype of the data. mmap_mode : None, optional If not None, open the file using memory mapping. For more info on the modes, see: https://numpy.org/doc/stable/reference/generated/numpy.memmap.html shape : tuple, optional Tuple of three ints specifying the shape of the data (order: z, y, x). Returns ------- result : np.ndarray Data stored in the file. """ filename = Path(filename) if not filename.exists(): raise IOError(f'No such file: {filename}') try: filename_info = filename.with_suffix(filename.suffix + '.info') if not shape: info = read_info(filename_info) shape = info['NUM_Z'], info['NUM_Y'], info['NUM_X'] except FileNotFoundError: raise ValueError( f'Info file not found: {filename_info.name}, specify ' 'the volume shape using the `shape` parameter.') from None result: np.ndarray if mmap_mode: result = np.memmap(filename, dtype=dtype, shape=shape, mode=mmap_mode) else: result = np.fromfile(filename, dtype=dtype) result = result.reshape(shape) return result
def train( self, episodes: int, validation: int = None, save_path: PathLike = None, gamma: float = 1.0, epsilon: float = 0.2, ) -> None: """ :param episodes: :param validation: :param save_path: path to save models and logs. :param gamma: γ - discount factor. Is used to balance immediate and future reward. :param epsilon: ε - chance to get random move in ε-greedy policy """ if save_path: save_path = pathlib.Path(save_path) / f'SIGMOID_24_negative_reward_gamma_{gamma}_epsilon_{epsilon}_q_learning' save_path.mkdir(exist_ok=False, parents=False) for episode in tqdm(range(episodes)): if validation is not None and not (episode + 1) % validation: self.validate(path=save_path) if save_path: self.save(save_path, episode + 1) self.model.train() players = (self.agent_cls(self.model), self.agent_cls(self.model)) game = bg.Game(players=players) with self.e_greedy_get_action(episode, epsilon=epsilon): for agent, new_board, prev_board, move, available_moves in game.play_step_by_step(): agent: agents.NNAgent pred_q = agent.estimate(board=prev_board) if new_board.status: reward = new_board.status self.update(pred_q, torch.Tensor([reward])) with prev_board.reverse() as reversed_board: self.update(agent.estimate(board=reversed_board), torch.Tensor([-reward])) break else: estimated_moves = list(agent.estimate_moves(available_moves=available_moves, board=prev_board)) agent_checkers, opp_checkers = prev_board.to_schema() if estimated_moves: max_q = np.max(estimated_moves) new_q = gamma * max_q else: # it is too bad, if we could not make any step. new_q = torch.Tensor([-1]) self.update(pred_q, new_q)
def open_dataset(source: os.PathLike, **kwargs) -> xr.Dataset: logger.info("extracting dem from %s\n", source) if isinstance(source, pathlib.Path): source = source.as_posix() if source.lower().startswith("http"): # URL kwargs.update({"engine": "pydap"}) dataset = xr.open_dataset(source, **kwargs) elif source.lower().endswith("tif"): # GeoTiff data_array = xr.open_rasterio(source, parse_coordinates=True, **kwargs) dataset = data_array.to_dataset( name="elevation").squeeze().reset_coords(drop=True) else: # NetCDF dataset = xr.open_dataset(source, **kwargs) return dataset
def validate_paths(src: PathLike, dst: Optional[PathLike] = None, date_fmt: Optional[str] = None) -> tuple[Path, Path]: src = Path(src) dst = Path(dst) if dst else src.parent timestamp = datetime.now().strftime(date_fmt) if date_fmt else '' if not src.is_file(): raise FileNotFoundError(f'Failed to locate specified file {src}') if dst.is_dir(): dst = dst / (src.stem + timestamp) elif not dst.parent.is_dir(): raise NotADirectoryError( f'Failed to find destination directory {dst.parent}') return src.absolute(), dst.absolute()
def _add_header_to_file( path: PathLike, spdx_info: SpdxInfo, template: Template, template_is_commented: bool, style: Optional[str], out=sys.stdout, ) -> int: """Helper function.""" # pylint: disable=too-many-arguments result = 0 if style is not None: style = NAME_STYLE_MAP[style] else: style = _get_comment_style(path) with path.open("r") as fp: text = fp.read() try: output = find_and_replace_header( text, spdx_info, template=template, template_is_commented=template_is_commented, style=style, ) except CommentCreateError: out.write( _("Error: Could not create comment for '{path}'").format( path=path)) out.write("\n") result = 1 except MissingSpdxInfo: out.write( _("Error: Generated comment header for '{path}' is missing" " copyright lines or license expressions. The template is" " probably incorrect. Did not write new header.").format( path=path)) out.write("\n") result = 1 else: with path.open("w") as fp: fp.write(output) # TODO: This may need to be rephrased more elegantly. out.write(_("Successfully changed header of {path}").format(path=path)) out.write("\n") return result
def normalize(path: PathLike, steps: int = 5, chunk_size: int = 512, threshold: float = 0.20, cp_isolation: List[str] = None, cp_exclusion: List[str] = None, preemptive_behaviour: bool = True) -> CharsetMatch: """ Take a (text-based) file path and try to create another file next to it, this time using UTF-8. """ results = from_path(path, steps, chunk_size, threshold, cp_isolation, cp_exclusion, preemptive_behaviour) filename = basename(path) target_extensions = list(splitext(filename)) if len(results) == 0: raise IOError( 'Unable to normalize "{}", no encoding charset seems to fit.'. format(filename)) result = results.best() target_extensions[0] += '-' + result.encoding # type: ignore with open('{}'.format(path.replace(filename, ''.join(target_extensions))), 'wb') as fp: fp.write(result.output() # type: ignore ) return result # type: ignore
def setup( self, # pylint: disable=arguments-differ fig: mpl.figure.Figure, outfile: os.PathLike, dpi: _ty.Optional[float] = None, ndigit: _ty.Optional[int] = None, ) -> None: """Set the output file properties. Parameters ---------- fig : `~matplotlib.figure.Figure` The figure to grab the rendered frames from. outfile : str The filename of the resulting movie file. dpi : float, optional The dpi of the output file. This, with the figure size, controls the size in pixels of the resulting movie file. Default is ``fig.dpi``. ndigit : int, optional Number of digits to leave space for in numbered file names. """ outfile = os.fsdecode(outfile) if '.' in outfile: frame_prefix, self.frame_format = outfile.rsplit('.', 1) else: frame_prefix = outfile super().setup(fig, outfile, dpi, frame_prefix=frame_prefix) if ndigit is not None: self._ndigit = ndigit self.fname_format_str = f'%s%%0{self._ndigit}d.%s'
def get_local_data_path( path: PathLike, download_if_missing: bool = True, base_url: str = DATA_URL, base_path: PathLike = DATA_DIR, ) -> PathLike: """Returns the local file path of a dataset url If the requested local file corresponding to the url of the dataset does not exist, it is downloaded form the url and the local path is returned Args: path: name of the subdirectory implicitly car download_if_missing: download the dataset if it is not present locally base_url: base url of data repository base_path: base path where the datasets are cached locally Returns: usable local path to the file Raises: IOError if file does not exist and download is set to False """ url = urljoin(str(base_url), str(path)) path = Path(base_path) / path create_data_dir(path.parent) if not path.is_file(): if download_if_missing: download(url, path) else: raise IOError(f"Dataset {path} is missing.") return path
def download_zip_folder_from_google_drive(file_id: str, destination: os.PathLike, show_size: bool = False, skip_if_exists: bool = True): """Download and extract a ZIP file from Google Drive. Args: file_id (str): the Google Drive file ID destination (os.PathLike): the destination folder show_size (bool, optional): whether to display a progress bar. Defaults to False. skip_if_exists (bool, optional): if true, will do nothing when the destination path exists already. Defaults to True. """ destination = URI(destination) if skip_if_exists and destination.exists(): logger.info( f"Not downloading {file_id} to {destination} again because it already exists" ) return with tempfile.TemporaryDirectory() as tmp_dir: zip_file = Path(tmp_dir) / f"{destination.name}.zip" logger.info(f"Downloading {file_id} to {zip_file}") gdd.download_file_from_google_drive(file_id=file_id, dest_path=zip_file, overwrite=True, showsize=show_size) logger.info(f"Unzipping {zip_file} to {destination}") shutil.rmtree(destination, ignore_errors=True) with zipfile.ZipFile(zip_file, "r") as f: f.extractall(destination, _get_members(f)) logger.info(f"Finished downloading {file_id} to {destination}")
def download_zip_folder(url: str, destination: os.PathLike, show_size: bool = False, skip_if_exists: bool = True): """Download and extract a ZIP folder from a URL. The file is first downloaded to a temporary location and then extracted to the target folder. Args: url (str): the URL of the ZIP file destination (os.PathLike): the destination folder show_size (bool, optional): whether to display a progress bar. Defaults to False. skip_if_exists (bool, optional): if true, will do nothing when the destination path exists already. Defaults to True. """ destination = URI(destination) if skip_if_exists and destination.exists(): logger.info( f"Not downloading {url} to {destination} again because it already exists" ) return with tempfile.TemporaryDirectory() as tmp_dir: zip_file = Path(tmp_dir) / f"{destination.name}.zip" download_file(url, zip_file, show_size) logger.info(f"Unzipping {zip_file} to {destination}") shutil.rmtree(destination, ignore_errors=True) with zipfile.ZipFile(zip_file, "r") as f: f.extractall(destination, _get_members(f)) logger.info(f"Finished downloading {url} to {destination}")
def get_block_size(device: os.PathLike) -> int: """Look up the device block size (in bytes) in sysfs. This value is also used as the sector size in this script. If there's an error in looking up the4 value, 512 is used. """ device_path: typing.Union[str, bytes] if isinstance(device, os.PathLike): device_path = device.__fspath__() else: device_path = device if isinstance(device_path, str): device_regex = r"(?:/dev/)?(\w+)" elif isinstance(device_path, bytes): device_regex = rb"(?:/dev/)?(\w+)" else: # This should never be reached, as the spec for os.PathLike is that # __fspath__() returns either str or bytes. raise RuntimeError( "__fspath__() returned something other than str or bytes") match = re.match(device_regex, device_path) # The only way this assertion should fail is if the string given includes # whitespace, or has no characters at all. assert match is not None device_name = match.group(1) block_size_path = f"/sys/class/block/{device_name}/queue/logical_block_size" if not os.path.exists(block_size_path): log.warning( "'%s' is not a block device, defaulting to %d-byte sectors", device, DEFAULT_SECTOR_SIZE) log.debug("'%s' does not exist", block_size_path) return DEFAULT_SECTOR_SIZE with open(block_size_path, "r") as sys_block_size: return int(sys_block_size.read().strip())
def __init__(self, hamtide_dataset_directory: PathLike = None): if hamtide_dataset_directory is None: hamtide_dataset_directory = self.OPENDAP_URL else: try: if Path(hamtide_dataset_directory).exists(): hamtide_dataset_directory = Path(hamtide_dataset_directory) if len(list(hamtide_dataset_directory.glob('*.nc'))) == 0: raise FileNotFoundError( f'no NetCDF files found at ' f'"{hamtide_dataset_directory}"') except OSError: raise ValueError('given resource must be a local path') super().__init__(hamtide_dataset_directory) datasets = {'elevation': {}, 'velocity': {}} for variable in datasets.keys(): datasets[variable].update({ constituent.lower(): { 'path': None, 'dataset': None } for constituent in self.constituents }) self.datasets = datasets
def _aparc_aseg_projection( self, aparc_aseg_volume: os.PathLike, aparc_aseg_volume_path: os.PathLike, projection: np.ndarray, ras: Union[np.ndarray, list], fs_to_conn_indices_mapping: dict, background_volume: Volume, background_volume_path: os.PathLike, snapshot_name: str, conn_measure: Union[np.ndarray, list]): try: slice = aparc_aseg_volume.slice_volume(projection, ras) except IndexError: new_ras = aparc_aseg_volume.get_center_point() slice = aparc_aseg_volume.slice_volume( projection, new_ras) msg = "The volume center point has been used for %s snapshot of %s." self.logger.info(msg, projection, aparc_aseg_volume_path) x_axis_coords, y_axis_coords, aparc_aseg_matrix = slice for i, row in enumerate(aparc_aseg_matrix): for j, el in enumerate(row): if el > 0: if el in fs_to_conn_indices_mapping: idx = fs_to_conn_indices_mapping.get(el) new_val = conn_measure[int(idx)] aparc_aseg_matrix[i, j] = new_val else: aparc_aseg_matrix[i, j] = -1 if background_volume_path == '': self.writer.write_matrix(x_axis_coords, y_axis_coords, aparc_aseg_matrix, self.generate_file_name(projection, snapshot_name), 'hot') else: try: bx_axis_coords, by_axis_coords, bvolume_matrix = background_volume.slice_volume( projection, ras) except IndexError: new_ras = aparc_aseg_volume.get_center_point() bx_axis_coords, by_axis_coords, bvolume_matrix = background_volume.slice_volume( projection, new_ras) self.logger.info("The volume center point has been used for %s snapshot of %s and %s.", projection, aparc_aseg_volume_path, background_volume_path) self.writer.write_2_matrices(bx_axis_coords, by_axis_coords, bvolume_matrix, x_axis_coords, y_axis_coords, aparc_aseg_matrix, self.generate_file_name(projection, snapshot_name))
import io import pathlib try: from os import PathLike, fspath, fsencode, fsdecode except ImportError: class PathLike(abc.ABC): """Abstract base class for implementing the file system path protocol.""" @abc.abstractmethod def __fspath__(self): """Return the file system path representation of the object.""" raise NotImplementedError PathLike.register(pathlib.Path) def fspath(path): """Return the string representation of the path. If str or bytes is passed in, it is returned unchanged. If __fspath__() returns something other than str or bytes then TypeError is raised. If this function is given something that is not str, bytes, or os.PathLike then TypeError is raised. """ if isinstance(path, (str, bytes)): return path if isinstance(path, pathlib.Path): return str(path)