示例#1
0
def get_data_loader(data_config: dict, split: str) -> Optional[DataLoader]:
    """
    Return the corresponding data loader.

    Can't be placed in the same file of loader interfaces as it causes import cycle.

    :param data_config: a dictionary containing configuration for data
    :param split: must be train/valid/test
    :return: DataLoader or None, returns None if the split or dir is empty.
    """
    if split not in KNOWN_DATA_SPLITS:
        raise ValueError(
            f"split must be one of {KNOWN_DATA_SPLITS}, got {split}")

    if split not in data_config:
        return None
    data_dir_paths = data_config[split].get("dir", None)
    if data_dir_paths is None or data_dir_paths == "":
        return None

    if isinstance(data_dir_paths, str):
        data_dir_paths = [data_dir_paths]
    # replace ~ with user home path
    data_dir_paths = list(map(os.path.expanduser, data_dir_paths))
    for data_dir_path in data_dir_paths:
        if not os.path.isdir(data_dir_path):
            raise ValueError(
                f"Data directory path {data_dir_path} for split {split}"
                f" is not a directory or does not exist")

    # prepare data loader config
    data_loader_config = deepcopy(data_config)
    data_loader_config = {
        k: v
        for k, v in data_loader_config.items() if k not in KNOWN_DATA_SPLITS
    }
    data_loader_config["name"] = data_loader_config.pop("type")

    default_args = dict(
        data_dir_paths=data_dir_paths,
        file_loader=REGISTRY.get(category=FILE_LOADER_CLASS,
                                 key=data_config[split]["format"]),
        labeled=data_config[split]["labeled"],
        sample_label="sample" if split == "train" else "all",
        seed=None if split == "train" else 0,
    )
    data_loader: DataLoader = REGISTRY.build_data_loader(
        config=data_loader_config, default_args=default_args)
    return data_loader
示例#2
0
def get_data_loader(data_config: dict, mode: str) -> Optional[DataLoader]:
    """
    Return the corresponding data loader.
    Can't be placed in the same file of loader interfaces as it causes import cycle.
    :param data_config: a dictionary containing configuration for data
    :param mode: string, must be train/valid/test
    :return: DataLoader or None, returns None if the data_dir_paths is empty
    """
    assert mode in ["train", "valid",
                    "test"], "mode must be one of train/valid/test"

    data_dir_paths = data_config["dir"].get(mode, None)
    if data_dir_paths is None or data_dir_paths == "":
        return None
    if isinstance(data_dir_paths, str):
        data_dir_paths = [data_dir_paths]
    # replace ~ with user home path
    data_dir_paths = list(map(os.path.expanduser, data_dir_paths))
    for data_dir_path in data_dir_paths:
        if not os.path.isdir(data_dir_path):
            raise ValueError(
                f"Data directory path {data_dir_path} for mode {mode}"
                f" is not a directory or does not exist")

    # prepare data loader config
    data_loader_config = deepcopy(data_config)
    data_loader_config.pop("dir")
    data_loader_config.pop("format")
    data_loader_config["name"] = data_loader_config.pop("type")

    default_args = dict(
        data_dir_paths=data_dir_paths,
        file_loader=REGISTRY.get(category=FILE_LOADER_CLASS,
                                 key=data_config["format"]),
        labeled=data_config["labeled"],
        sample_label="sample" if mode == "train" else "all",
        seed=None if mode == "train" else 0,
    )
    data_loader = REGISTRY.build_data_loader(config=data_loader_config,
                                             default_args=default_args)
    return data_loader