Exemplo n.º 1
0
def _extract_file(file_path: pathlib.Path) -> pathlib.Path:
    """Extract the file_path, returns the saved folder."""
    save_folder = file_path.parent
    if file_path.suffix not in ['.zip', '.tar', '.gz', '.tgz']:
        return save_folder
    reader = core.create_reader(file_path)
    compressed_files = set(reader.list_files())
    existed_files = set(core.create_reader(save_folder).list_files())
    uncompressed_files = compressed_files.difference(existed_files)
    if len(uncompressed_files):
        logging.info(
            f'Extracting {str(file_path)} to {str(save_folder.resolve())}')
        for p in tqdm.tqdm(uncompressed_files):
            out = save_folder / p
            if not out.parent.exists(): out.parent.mkdir(parents=True)
            with out.open('wb') as f:
                f.write(reader.open(p).read())
    return save_folder
Exemplo n.º 2
0
def stanford_dogs():
    reader = core.create_reader('https://www.kaggle.com/jessicali9530/stanford-dogs-dataset')
    images = reader.list_images()
    entries = []
    for img in images:
        xml_fp = 'annotations/Annotation/'+img.parent.name+'/'+img.stem
        for label in object_detection.parse_voc_annotation(reader.open(xml_fp)):
            label.file_path = str(img)
            entries.append(label)
    return Dataset(pd.DataFrame(entries), reader)
Exemplo n.º 3
0
 def from_label_func(cls, data_path: Union[str, Sequence[str]],
                     label_func: Callable[[pathlib.Path], Optional[pathlib.Path]],
                     pixel_to_class_func: Callable[[core.Reader], Dict[Sequence[int], str]]):
     reader = core.create_reader(data_path)
     all_image_paths = reader.list_images()
     pairs = []
     for p in all_image_paths:
         if label_func(p):
             pairs.append({'file_path':p, 'label_file_path':label_func(p)})
     pixel_to_class = pixel_to_class_func(reader)
     return Dataset(pd.DataFrame(pairs), reader, pixel_to_class)
Exemplo n.º 4
0
def wheat():
    reader = core.create_reader('https://www.kaggle.com/c/global-wheat-detection')
    df = pd.read_csv(reader.open('train.csv'))
    bbox = df.bbox.str.split(',', expand=True)
    xmin = bbox[0].str.strip('[ ').astype(float) / df.width
    ymin = bbox[1].str.strip(' ').astype(float) / df.height
    df = pd.DataFrame({
            'file_path':'train/'+df.image_id+'.jpg',
            'xmin':xmin,
            'ymin':ymin,
            'xmax':bbox[2].str.strip(' ').astype(float) / df.width + xmin,
            'ymax':bbox[3].str.strip(' ]').astype(float) / df.height + ymin,
            'class_name':df.source})
    return Dataset(df, reader)
Exemplo n.º 5
0
def read_csv(data_path: Union[str, Sequence[str]], columns=None):
    header = 0 if columns else 'infer'
    reader = core.create_reader(data_path)
    filenames = [
        p.replace('#', '/').replace('?select=',
                                    '/').replace('+', ' ').split('/')[-1]
        for p in core.listify(data_path)
    ]
    dfs = [
        pd.read_csv(reader.open(f), header=header, names=columns)
        for f in filenames
    ]
    df = dfs[0] if len(dfs) == 1 else pd.concat(dfs, axis=0, ignore_index=True)
    return df, reader
Exemplo n.º 6
0
    def from_voc(cls, data_path: Union[str, Sequence[str]], image_folders: str,
                 annotation_folders: str):
        """Create a dataset when data are stored in the VOC format.

        :param data_path: Either a URL or a local path. For the former, data will be downloaded automatically.
        :param folders: The folders containing all example images.
        :return: The created dataset.
        """

        reader = core.create_reader(data_path)
        dfs = []
        for image_folder, annotation_folder in zip(
                core.listify(image_folders), core.listify(annotation_folders)):
            dfs.append(_parse_voc(reader, image_folder, annotation_folder))
        df = pd.concat(dfs, axis=0, ignore_index=True)
        return cls(df, reader)
Exemplo n.º 7
0
    def from_label_func(
            cls, data_path: Union[str, Sequence[str]],
            label_func: Callable[[pathlib.Path], str]) -> 'Dataset':
        """Create a dataset from a function that maps a image path to its class name.

        :param data_path: Either a URL or a local path. For the former, data will be downloaded automatically.
        :param label_func: A function takes an image path (an instance :class:`pathlib.Path`) to return a string class name or a None to skip this image.
        :return: The created dataset.
        :param data_path:
        """
        reader = core.create_reader(data_path)
        entries = []
        for file_path in reader.list_images():
            lbl = label_func(file_path)
            if lbl: entries.append({'file_path': file_path, 'class_name': lbl})
        df = pd.DataFrame(entries)
        return cls(df, reader)