Пример #1
0
def getDebugDataset(config):
    """Loads a dataset from the config and makes ist reasonably small.
    The config syntax works as in :func:`getSeqDataset`. See there for
    more extensive documentation.

    Parameters
    ----------
    config : dict
	An edflow config, with at least the keys
            ``debugdataset`` and nested inside it ``dataset``,
            ``debug_length``, defining the basedataset and its size.

    Returns
    -------
    :class:`SubDataset`:
        A dataset based on the basedataset of the specifed length.
    """

    ks = "debugdataset"
    base_dset = get_implementations_from_config(config[ks],
                                                ["dataset"])["dataset"]
    base_dset = base_dset(config=config)
    indices = np.arange(config[ks]["debug_length"])

    return SubDataset(base_dset, indices)
Пример #2
0
def standalone_eval_csv_file(path_to_csv, callbacks, additional_kwargs={}):
    """Runs all given callbacks on the data in the :class:`EvalDataFolder`
    constructed from the given csv.abs

    Parameters
    ----------
    path_to_csv : str
        Path to the csv file.
    callbacks : list(str or Callable)
        Import commands used to construct the functions applied to the Data
        extracted from :attr:`path_to_csv`.
    additional_kwargs : dict
        Keypath-value pairs added to the config, which is extracted from
        the ``model_outputs.csv``.

    Returns
    -------
    The collected outputs of the callbacks.

    """

    import importlib
    from edflow.main import get_implementations_from_config
    from edflow.config import update_config

    import sys

    sys.path.append(os.getcwd())  # convenience: load implementations from cwd

    out_data = EvalDataFolder(path_to_csv)

    config = read_meta_data(path_to_csv)
    update_config(config, additional_kwargs)

    dataset_str = config["dataset"]
    impl = get_implementations_from_config(config, ["dataset"])
    in_data = impl["dataset"](config)

    if not isinstance(callbacks, list):
        callbacks = [callbacks]

    outputs = []
    for cb in callbacks:
        if isinstance(cb, str):
            module = ".".join(cb.split(".")[:-1])
            module = importlib.import_module(module)

            cb = getattr(module, cb.split(".")[-1])

        outputs += [
            cb(os.path.dirname(path_to_csv), in_data, out_data, config)
        ]

    return outputs
Пример #3
0
def getSeqDataset(config):
    """This allows to not define a dataset class, but use a baseclass and a
    `length` and `step` parameter in the supplied `config` to load and
    sequentialize a dataset.

    A config passed to edflow would the look like this:

    .. code-block:: yaml

        dataset: edflow.data.dataset.getSeqDataSet
        model: Some Model
        iterator: Some Iterator

        seqdataset:
                dataset: import.path.to.your.basedataset
                length: 3
                step: 1
                fid_key: fid

    ``getSeqDataSet`` will import the base ``dataset`` and pass it to
    :class:`SequenceDataset` together with ``length`` and ``step`` to
    make the actually used dataset.

    Parameters
    ----------
    config : dict
	An edflow config, with at least the keys
            ``seqdataset`` and nested inside it ``dataset``, ``seq_length`` and
            ``seq_step``.

    Returns
    -------
    :class:`SequenceDataset`
        A Sequence Dataset based on the basedataset.
    """

    ks = "seqdataset"
    base_dset = get_implementations_from_config(config[ks],
                                                ["dataset"])["dataset"]
    base_dset = base_dset(config=config)

    S = SequenceDataset(
        base_dset,
        config[ks]["length"],
        config[ks]["step"],
        fid_key=config[ks]["fid_key"],
    )

    return S
Пример #4
0
def standalone_eval_csv_file(path_to_csv,
                             callbacks,
                             additional_kwargs={},
                             other_config=None):
    """Runs all given callbacks on the data in the :class:`EvalDataFolder`
    constructed from the given csv.abs

    Parameters
    ----------
    path_to_csv : str
        Path to the csv file.
    callbacks : dict(name: str or Callable)
        Import commands used to construct the functions applied to the Data
        extracted from :attr:`path_to_csv`.
    additional_kwargs : dict
        Keypath-value pairs added to the config, which is extracted from
        the ``model_outputs.csv``. These will overwrite parameters in the
        original config extracted from the csv.
    other_config : str
        Path to additional config used to update the existing one as taken from
        the ``model_outputs.csv`` . Cannot overwrite the dataset. Only used for
        callbacks. Parameters in this other config will overwrite the
        parameters in the original config and those of the commandline
        arguments.

    Returns
    -------
    outputs: dict
        The collected outputs of the callbacks.
    """

    from edflow.main import get_implementations_from_config
    from edflow.config import update_config
    import yaml

    if other_config is not None:
        with open(other_config, "r") as f:
            other_config = yaml.full_load(f)
    else:
        other_config = {}

    out_data = EvalDataFolder(path_to_csv)

    config = read_meta_data(path_to_csv)

    dataset_str = config["dataset"]
    impl = get_implementations_from_config(config, ["dataset"])
    in_data = impl["dataset"](config)

    update_config(config, additional_kwargs)
    config.update(other_config)

    config_callbacks, callback_kwargs = config2cbdict(config)
    callbacks.update(config_callbacks)

    callbacks = load_callbacks(callbacks)

    root = os.path.dirname(path_to_csv)

    outputs = apply_callbacks(callbacks, root, in_data, out_data, config,
                              callback_kwargs)

    return outputs
Пример #5
0
def load_dataset(config):
    dataset = get_implementations_from_config({"dataset": "transition_network.encodings_dataset.encodings"}, ["dataset"])[
        "dataset"
    ](config)
    return dataset