Python create_dataset示例

编程语言: Python

命名空间/包名称: catalyst.utils

方法/功能: create_dataset

hotexamples.com的示例: 2

Python create_dataset - 已找到2个示例。这些是从开源项目中提取的最受好评的catalyst.utils.create_dataset现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： tag2label.py 项目： vsokhatskyi/catalyst

def _prepare_df_from_dirs(in_dirs, tag_column_name, recursive: bool = False):
    dfs = []
    splitted_dirs = in_dirs.strip(",").split(",")

    def process_fn(x):
        if len(splitted_dirs) == 1:
            # remove all in_dir part from path
            return x.replace(f"{in_dir}", "")
        else:
            # leaves last part of in_dir path,
            #  which identifies separate in_dir
            return x.replace(f"{in_dir}", f"{in_dir.split('/')[-2]}/")

    for in_dir in splitted_dirs:
        if not in_dir.endswith("/"):
            in_dir = f"{in_dir}/"

        dataset = create_dataset(f"{in_dir}/**",
                                 process_fn=process_fn,
                                 recursive=recursive)

        dfs.append(
            create_dataframe(dataset, columns=[tag_column_name, "filepath"]))

    df = pd.concat(dfs).reset_index(drop=True)
    return df

示例#2

显示文件

文件： dataset.py 项目： catalyst-team/dl-course

def get_cat_dogs_dataset(
    dirs: str = "/app/data/data_cat_dogs/*",
    extension: str = "*.jpg",
    test_size: float = 0.2,
    random_state: int = 42,
    tag_file_path: tp.Optional[str] = None,
) -> tp.Tuple[tp.Dict[str, tp.Any], tp.Dict[str, tp.Any], int]:
    dataset = utils.create_dataset(dirs=dirs, extension=extension)
    df = utils.create_dataframe(dataset, columns=["class", "filepath"])

    tag_to_label = utils.get_dataset_labeling(df, "class")
    if tag_file_path is not None:
        with open(tag_file_path, "w") as file:
            json.dump(tag_to_label, file)

    df_with_labels = utils.map_dataframe(
        df,
        tag_column="class",
        class_column="label",
        tag2class=tag_to_label,
        verbose=False,
    )

    train_data, valid_data = utils.split_dataframe_train_test(
        df_with_labels, test_size=test_size, random_state=random_state)
    return (
        train_data.to_dict("records"),
        valid_data.to_dict("records"),
        len(tag_to_label),
    )