def getDebugDataset(config): """Loads a dataset from the config and makes ist reasonably small. The config syntax works as in :func:`getSeqDataset`. See there for more extensive documentation. Parameters ---------- config : dict An edflow config, with at least the keys ``debugdataset`` and nested inside it ``dataset``, ``debug_length``, defining the basedataset and its size. Returns ------- :class:`SubDataset`: A dataset based on the basedataset of the specifed length. """ ks = "debugdataset" base_dset = get_implementations_from_config(config[ks], ["dataset"])["dataset"] base_dset = base_dset(config=config) indices = np.arange(config[ks]["debug_length"]) return SubDataset(base_dset, indices)
def standalone_eval_csv_file(path_to_csv, callbacks, additional_kwargs={}): """Runs all given callbacks on the data in the :class:`EvalDataFolder` constructed from the given csv.abs Parameters ---------- path_to_csv : str Path to the csv file. callbacks : list(str or Callable) Import commands used to construct the functions applied to the Data extracted from :attr:`path_to_csv`. additional_kwargs : dict Keypath-value pairs added to the config, which is extracted from the ``model_outputs.csv``. Returns ------- The collected outputs of the callbacks. """ import importlib from edflow.main import get_implementations_from_config from edflow.config import update_config import sys sys.path.append(os.getcwd()) # convenience: load implementations from cwd out_data = EvalDataFolder(path_to_csv) config = read_meta_data(path_to_csv) update_config(config, additional_kwargs) dataset_str = config["dataset"] impl = get_implementations_from_config(config, ["dataset"]) in_data = impl["dataset"](config) if not isinstance(callbacks, list): callbacks = [callbacks] outputs = [] for cb in callbacks: if isinstance(cb, str): module = ".".join(cb.split(".")[:-1]) module = importlib.import_module(module) cb = getattr(module, cb.split(".")[-1]) outputs += [ cb(os.path.dirname(path_to_csv), in_data, out_data, config) ] return outputs
def getSeqDataset(config): """This allows to not define a dataset class, but use a baseclass and a `length` and `step` parameter in the supplied `config` to load and sequentialize a dataset. A config passed to edflow would the look like this: .. code-block:: yaml dataset: edflow.data.dataset.getSeqDataSet model: Some Model iterator: Some Iterator seqdataset: dataset: import.path.to.your.basedataset length: 3 step: 1 fid_key: fid ``getSeqDataSet`` will import the base ``dataset`` and pass it to :class:`SequenceDataset` together with ``length`` and ``step`` to make the actually used dataset. Parameters ---------- config : dict An edflow config, with at least the keys ``seqdataset`` and nested inside it ``dataset``, ``seq_length`` and ``seq_step``. Returns ------- :class:`SequenceDataset` A Sequence Dataset based on the basedataset. """ ks = "seqdataset" base_dset = get_implementations_from_config(config[ks], ["dataset"])["dataset"] base_dset = base_dset(config=config) S = SequenceDataset( base_dset, config[ks]["length"], config[ks]["step"], fid_key=config[ks]["fid_key"], ) return S
def standalone_eval_csv_file(path_to_csv, callbacks, additional_kwargs={}, other_config=None): """Runs all given callbacks on the data in the :class:`EvalDataFolder` constructed from the given csv.abs Parameters ---------- path_to_csv : str Path to the csv file. callbacks : dict(name: str or Callable) Import commands used to construct the functions applied to the Data extracted from :attr:`path_to_csv`. additional_kwargs : dict Keypath-value pairs added to the config, which is extracted from the ``model_outputs.csv``. These will overwrite parameters in the original config extracted from the csv. other_config : str Path to additional config used to update the existing one as taken from the ``model_outputs.csv`` . Cannot overwrite the dataset. Only used for callbacks. Parameters in this other config will overwrite the parameters in the original config and those of the commandline arguments. Returns ------- outputs: dict The collected outputs of the callbacks. """ from edflow.main import get_implementations_from_config from edflow.config import update_config import yaml if other_config is not None: with open(other_config, "r") as f: other_config = yaml.full_load(f) else: other_config = {} out_data = EvalDataFolder(path_to_csv) config = read_meta_data(path_to_csv) dataset_str = config["dataset"] impl = get_implementations_from_config(config, ["dataset"]) in_data = impl["dataset"](config) update_config(config, additional_kwargs) config.update(other_config) config_callbacks, callback_kwargs = config2cbdict(config) callbacks.update(config_callbacks) callbacks = load_callbacks(callbacks) root = os.path.dirname(path_to_csv) outputs = apply_callbacks(callbacks, root, in_data, out_data, config, callback_kwargs) return outputs
def load_dataset(config): dataset = get_implementations_from_config({"dataset": "transition_network.encodings_dataset.encodings"}, ["dataset"])[ "dataset" ](config) return dataset