def load_simulation(fn, simulation_type, find_outputs=False): """ Load a simulation time series object of the specified simulation type. Parameters ---------- fn : str, os.Pathlike, or byte (types supported by os.path.expandusers) Name of the data file or directory. simulation_type : str E.g. 'Enzo' find_outputs : bool Defaults to False Raises ------ FileNotFoundError If fn is not found. yt.utilities.exceptions.YTSimulationNotIdentified If simulation_type is unknown. """ fn = str(lookup_on_disk_data(fn)) try: cls = simulation_time_series_registry[simulation_type] except KeyError as e: raise YTSimulationNotIdentified(simulation_type) from e return cls(fn, find_outputs=find_outputs)
def load(fn, *args, **kwargs): """ Load a Dataset or DatasetSeries object. The data format is automatically discovered, and the exact return type is the corresponding subclass of :class:`yt.data_objects.static_output.Dataset`. A :class:`yt.data_objects.time_series.DatasetSeries` is created if the first argument is a pattern. Parameters ---------- fn : str, os.Pathlike, or byte (types supported by os.path.expandusers) A path to the data location. This can be a file name, directory name, a glob pattern, or a url (for data types that support it). Additional arguments, if any, are passed down to the return class. Returns ------- :class:`yt.data_objects.static_output.Dataset` object If fn is a single path, create a Dataset from the appropriate subclass. :class:`yt.data_objects.time_series.DatasetSeries` If fn is a glob pattern (i.e. containing wildcards '[]?!*'), create a series. Raises ------ FileNotFoundError If fn does not match any existing file or directory. yt.utilities.exceptions.YTUnidentifiedDataType If fn matches existing files or directories with undetermined format. yt.utilities.exceptions.YTAmbiguousDataType If the data format matches more than one class of similar specilization levels. """ fn = os.path.expanduser(fn) if any(wildcard in fn for wildcard in "[]?!*"): from yt.data_objects.time_series import DatasetSeries return DatasetSeries(fn, *args, **kwargs) # This will raise FileNotFoundError if the path isn't matched # either in the current dir or yt.config.ytcfg['data_dir_directory'] if not fn.startswith("http"): fn = str(lookup_on_disk_data(fn)) candidates = [] for cls in output_type_registry.values(): if cls._is_valid(fn, *args, **kwargs): candidates.append(cls) # Find only the lowest subclasses, i.e. most specialised front ends candidates = find_lowest_subclasses(candidates) if len(candidates) == 1: return candidates[0](fn, *args, **kwargs) if len(candidates) > 1: raise YTAmbiguousDataType(fn, candidates) raise YTUnidentifiedDataType(fn, *args, **kwargs)
def load_sample(fn, progressbar: bool = True, timeout=None, **kwargs): """ Load sample data with yt. Simple wrapper around `yt.load` to include fetching data with pooch. The data registry table can be retrieved and visualized using `yt.sample_data.api.get_data_registry_table`. This function requires pandas and pooch to be installed. Parameters ---------- fn : str The `filename` of the dataset to load, as defined in the data registry table. progressbar: bool display a progress bar (tqdm). timeout: float or int (optional) Maximal waiting time, in seconds, after which download is aborted. `None` means "no limit". This parameter is directly passed to down to requests.get via pooch.HTTPDownloader Any additional keyword argument is passed down to `yt.load`. Note that in case of collision with predefined keyword arguments as set in the data registry, the ones passed to this function take priority. """ from yt.sample_data.api import ( _download_sample_data_file, _get_test_data_dir_path, get_data_registry_table, ) pooch_logger = pooch.utils.get_logger() topdir, _, specific_file = str(fn).partition(os.path.sep) registry_table = get_data_registry_table() # PR 3089 # note: in the future the registry table should be reindexed # so that the following line can be replaced with # # specs = registry_table.loc[fn] # # however we don't want to do it right now because the "filename" column is # currently incomplete try: specs = registry_table.query(f"`filename` == '{topdir}'").iloc[0] except IndexError as err: raise KeyError(f"Could not find '{fn}' in the registry.") from err if not specs["load_name"]: raise ValueError( "Registry appears to be corrupted: could not find a 'load_name' entry for this dataset." ) kwargs = {**specs["load_kwargs"], **kwargs} try: data_dir = lookup_on_disk_data(fn) except FileNotFoundError: mylog.info("'%s' is not available locally. Looking up online.", fn) else: # if the data is already available locally, `load_sample` # only acts as a thin wrapper around `load` loadable_path = data_dir.joinpath(specs["load_name"], specific_file) mylog.info("Sample dataset found in '%s'", data_dir) if timeout is not None: mylog.info("Ignoring the `timeout` keyword argument received.") return load(loadable_path, **kwargs) try: save_dir = _get_test_data_dir_path() except FileNotFoundError: mylog.warning( "yt test data directory is not properly set up. " "Data will be saved to the current work directory instead.") save_dir = Path.cwd() # effectively silence the pooch's logger and create our own log instead pooch_logger.setLevel(100) mylog.info("Downloading from %s", specs["url"]) # downloading via a pooch.Pooch instance behind the scenes filename = urlsplit(specs["url"]).path.split("/")[-1] tmp_file = _download_sample_data_file(filename, progressbar=progressbar, timeout=timeout) # pooch has functionalities to unpack downloaded archive files, # but it needs to be told in advance that we are downloading a tarball. # Since that information is not necessarily trival to guess from the filename, # we rely on the standard library to perform a conditional unpacking instead. if tarfile.is_tarfile(tmp_file): mylog.info("Untaring downloaded file to '%s'", save_dir) with tarfile.open(tmp_file) as fh: fh.extractall(save_dir) os.remove(tmp_file) else: os.replace(tmp_file, save_dir) loadable_path = Path.joinpath(save_dir, fn, specs["load_name"], specific_file) if specific_file and not loadable_path.exists(): raise ValueError(f"Could not find file '{specific_file}'.") return load(loadable_path, **kwargs)