Exemplo n.º 1
0
    def download(self):
        """
        Download, and unzip in the correct location.
        Returns:

        """
        import urllib
        import zipfile

        if self.check_exists():
            return

        # download files
        try:
            os.makedirs(self.root)
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, filename)
            ext = os.path.splitext(file_path)[1]
            with open(file_path, 'wb') as f:
                f.write(data.read())
            if ext == '.zip':
                with zipfile.ZipFile(file_path) as zip_f:
                    zip_f.extractall(self.root)
                os.unlink(file_path)

        print('Done!')
Exemplo n.º 2
0
    def download(self):
        """Download the EMNIST data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import gzip
        import shutil
        import zipfile

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        print('Downloading ' + self.url)
        data = urllib.request.urlopen(self.url)
        filename = self.url.rpartition('/')[2]
        raw_folder = os.path.join(self.root, self.raw_folder)
        file_path = os.path.join(raw_folder, filename)
        with open(file_path, 'wb') as f:
            f.write(data.read())

        print('Extracting zip archive')
        with zipfile.ZipFile(file_path) as zip_f:
            zip_f.extractall(raw_folder)
        os.unlink(file_path)
        gzip_folder = os.path.join(raw_folder, 'gzip')
        for gzip_file in os.listdir(gzip_folder):
            if gzip_file.endswith('.gz'):
                print('Extracting ' + gzip_file)
                with open(os.path.join(raw_folder, gzip_file.replace('.gz', '')), 'wb') as out_f, \
                        gzip.GzipFile(os.path.join(gzip_folder, gzip_file)) as zip_f:
                    out_f.write(zip_f.read())
        shutil.rmtree(gzip_folder)

        # process and save as torch files
        for split in self.splits:
            print('Processing ' + split)
            training_set = (
                read_image_file(os.path.join(raw_folder, 'emnist-{}-train-images-idx3-ubyte'.format(split))),
                read_label_file(os.path.join(raw_folder, 'emnist-{}-train-labels-idx1-ubyte'.format(split)))
            )
            test_set = (
                read_image_file(os.path.join(raw_folder, 'emnist-{}-test-images-idx3-ubyte'.format(split))),
                read_label_file(os.path.join(raw_folder, 'emnist-{}-test-labels-idx1-ubyte'.format(split)))
            )
            with open(os.path.join(self.root, self.processed_folder, self._training_file(split)), 'wb') as f:
                torch.save(training_set, f)
            with open(os.path.join(self.root, self.processed_folder, self._test_file(split)), 'wb') as f:
                torch.save(test_set, f)

        print('Done!')
    def save_dataset(self):
        """"""
        import gzip

        if self._check_exists(self.processed_folder):
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())
            with open(file_path.replace('.gz', ''), 'wb') as out_f, \
                    gzip.GzipFile(file_path) as zip_f:
                out_f.write(zip_f.read())
            os.unlink(file_path)

        # process and save as torch files
        print('Processing...')

        training_set = (read_image_file(
            os.path.join(self.root, self.raw_folder,
                         'train-images-idx3-ubyte')),
                        read_label_file(
                            os.path.join(self.root, self.raw_folder,
                                         'train-labels-idx1-ubyte')))
        test_set = (read_image_file(
            os.path.join(self.root, self.raw_folder,
                         't10k-images-idx3-ubyte')),
                    read_label_file(
                        os.path.join(self.root, self.raw_folder,
                                     't10k-labels-idx1-ubyte')))
        with open(
                os.path.join(self.root, self.processed_folder,
                             self.training_file), 'wb') as f:
            torch.save(training_set, f)
        with open(
                os.path.join(self.root, self.processed_folder, self.test_file),
                'wb') as f:
            torch.save(test_set, f)

        print('Done!')
Exemplo n.º 4
0
    def download(self):
        """Download the Moving MNIST data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import gzip

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())
            with open(file_path.replace('.gz', ''), 'wb') as out_f, \
                    gzip.GzipFile(file_path) as zip_f:
                out_f.write(zip_f.read())
            os.unlink(file_path)

        # process and save as torch files
        print('Processing...')

        training_set = torch.from_numpy(
            np.load(
                os.path.join(self.root, self.raw_folder,
                             'mnist_test_seq.npy')).swapaxes(0,
                                                             1)[:-self.split])
        test_set = torch.from_numpy(
            np.load(
                os.path.join(self.root, self.raw_folder,
                             'mnist_test_seq.npy')).swapaxes(0,
                                                             1)[-self.split:])

        with open(
                os.path.join(self.root, self.processed_folder,
                             self.training_file), 'wb') as f:
            torch.save(training_set, f)
        with open(
                os.path.join(self.root, self.processed_folder, self.test_file),
                'wb') as f:
            torch.save(test_set, f)

        print('Done!')
Exemplo n.º 5
0
    def _download_files(self):
        for key, url in self.urls.items():
            filename = url.rpartition('/')[2]
            file_path = join(self.root, self.raw_folder, filename)

            if not os.path.exists(file_path) and\
              not os.path.exists(file_path.replace('.zip', '')):

                print('Downloading ' + url)
                data = urllib.request.urlopen(url)
                with open(file_path, 'wb') as f:
                    f.write(data.read())
Exemplo n.º 6
0
    def download(self):
        """Download the MusicNet data if it doesn't exist in ``raw_folder`` already."""
        from six.moves import urllib
        import gzip

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        filename = self.url.rpartition('/')[2]
        file_path = os.path.join(self.root, self.raw_folder, filename)
        if not os.path.exists(file_path):
            print('Downloading ' + self.url)
            data = urllib.request.urlopen(self.url)
            with open(file_path, 'wb') as f:
                # stream the download to disk (it might not fit in memory!)
                while True:
                    chunk = data.read(16*1024)
                    if not chunk:
                        break
                    f.write(chunk)

        if not all(map(lambda f: os.path.exists(os.path.join(self.root, f)), self.extracted_folders)):
            print('Extracting ' + filename)
            if call(["tar", "-xf", file_path, '-C', self.root, '--strip', '1']) != 0:
                raise OSError("Failed tarball extraction")

        # process and save as torch files
        print('Processing...')

        self.process_data(self.test_data)

        trees = self.process_labels(self.test_labels)
        with open(os.path.join(self.root, self.test_labels, self.test_tree), 'wb') as f:
            pickle.dump(trees, f)

        self.process_data(self.train_data)

        trees = self.process_labels(self.train_labels)
        with open(os.path.join(self.root, self.train_labels, self.train_tree), 'wb') as f:
            pickle.dump(trees, f)

        print('Download Complete')
Exemplo n.º 7
0
    def download(self):
        """Download the REDE data if it doesn't exist in `processed_folder` already."""
        from six.moves import urllib

        if self._check_exists():
            return

        # Make directories.
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        try:
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        # Download dataset.
        for url in self.urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())

        # Process and save as torch files.
        print('Processing...')

        full_set = (
            read_file(
                os.path.join(self.root, self.raw_folder,
                             '1848-62-111-images.pkl')),
            read_file(
                os.path.join(self.root, self.raw_folder,
                             '1848-5-parameters.pkl'))
            # read_file(os.path.join(self.root, self.raw_folder, '1848-frequencies_modes.pkl'))
        )
        with open(
                os.path.join(self.root, self.processed_folder, self.full_file),
                'wb') as f:
            torch.save(full_set, f)

        print('Done!')
Exemplo n.º 8
0
    def download(self):
        from six.moves import urllib
        import zipfile

        if self._check_exists():
            return

        try:
            os.makedirs(os.path.join(self.root, self.splits_folder))
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for k, url in self.vinyals_split_sizes.items():
            print('== Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[-1]
            file_path = os.path.join(self.root, self.splits_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())

        for url in self.urls:
            print('== Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())
            file_processed = os.path.join(self.root, self.processed_folder)
            print("== Unzip from " + file_path + " to " + file_processed)
            zip_ref = zipfile.ZipFile(file_path, 'r')
            zip_ref.extractall(file_processed)
            zip_ref.close()
        print("Download finished.")
Exemplo n.º 9
0
    def download(self):
        """Download the MNIST data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import gzip

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        if not os.path.exists(
                os.path.join(self.root, self.raw_folder,
                             'notMNIST_small.mat')):
            for url in self.urls:
                print('Downloading ' + url)
                data = urllib.request.urlopen(url)
                filename = url.rpartition('/')[2]
                file_path = os.path.join(self.root, self.raw_folder, filename)
                with open(file_path, 'wb') as f:
                    f.write(data.read())

        # process and save as torch files
        print('Processing...')
        import scipy.io as sio

        data = sio.loadmat(
            os.path.join(self.root, self.raw_folder, 'notMNIST_small.mat'))
        images = torch.ByteTensor(data['images']).permute(
            2, 0, 1)  # The data is stored as HxWxN, need to permute!
        labels = torch.LongTensor(data['labels'])

        data_set = (
            images,
            labels,
        )

        with open(
                os.path.join(self.root, self.processed_folder,
                             self.training_file), 'wb') as f:
            torch.save(data_set, f)

        print('Done!')
def download_url(url, folder, log=True):
    if log:
        print('Downloading', url)

    makedirs(folder)

    data = urllib.request.urlopen(url)
    filename = url.rpartition('/')[2]
    path = osp.join(folder, filename)

    with open(path, 'wb') as f:
        f.write(data.read())

    return path
Exemplo n.º 11
0
    def download(self):
        """Download the CamVid data if it doesn't exist in processed_folder already."""
        self.raw_folder.mkdir(exist_ok=True, parents=True)
        self.processed_folder.mkdir(exist_ok=True, parents=True)

        print(f'Downloading {self.urls["raw"]}')

        data = urllib.request.urlopen(self.urls["raw"])
        with tempfile.NamedTemporaryFile('w') as tmp:
            tmp.write(data.read())
            with zipfile.ZipFile(tmp.name) as zip_f:
                zip_f.extractall(self.raw_folder)

        print(f'Downloading {self.urls["labels"]}')

        data = urllib.request.urlopen(self.urls["labels"])
        with tempfile.NamedTemporaryFile('wb') as tmp:
            tmp.write(data.read())
            with zipfile.ZipFile(tmp.name) as zip_f:
                zip_f.extractall(self.raw_folder / 'LabeledApproved_full')

        print(f'Downloading {self.urls["classes"]}')

        data = urllib.request.urlopen(self.urls["classes"])
        with open(self.processed_folder / 'label_colors.txt', 'wb') as class_list:
            class_list.write(data.read())

        # process and save as torch files
        print('Processing...')

        self.class_to_idx, colours = self.read_label_file(self.processed_folder / 'label_colors.txt')

        with h5py.File(self.training_file, 'w') as f_train, h5py.File(self.test_file, 'w') as f_test:
            self.process_raw_image_files(self.raw_folder / '701_StillsRaw_full', f_train, f_test)
            self.process_label_image_files(self.raw_folder / 'LabeledApproved_full', colours, f_train, f_test)

        print('Done!')
Exemplo n.º 12
0
    def download(self):
        """Download the rar files data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import rarfile

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[-1]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())
            with rarfile.RarFile(file_path) as rar_f:
                rar_f.extractall(self.raw_folder)
            #os.unlink(file_path)

        # process and save as torch files
        print('Processing...')
        train_set = read_rar_file(
            rarfile.RarFile(
                os.path.join(self.root, self.raw_folder,
                             'experimental_dataset_2013.rar')))
        test_set = read_rar_file(
            rarfile.RarFile(
                os.path.join(self.root, self.raw_folder,
                             'icdar2013_benchmarking_dataset.rar')))
        with open(
                os.path.join(self.root, self.processed_folder,
                             self.training_file), 'wb') as f:
            torch.save(train_set, f)
        with open(
                os.path.join(self.root, self.processed_folder, self.test_file),
                'wb') as f:
            torch.save(test_set, f)

        print('Done!')
Exemplo n.º 13
0
def load_l8(path, use_hr, use_mr, use_lr):
    bands_selected = []
    if use_hr:
        bands_selected = bands_selected + L8_BANDS_HR
    if use_mr:
        bands_selected = bands_selected + L8_BANDS_MR
    if use_lr:
        bands_selected = bands_selected + L8_BANDS_LR
    bands_selected = sorted(bands_selected)
    with rasterio.open(path) as data:
        l8 = data.read(bands_selected)
    l8 = l8.astype(np.float32)
    l8 = np.clip(l8, 0, 1)
    l8 = normalize_L8(l8)
    return l8
Exemplo n.º 14
0
def load_s2(path, use_hr, use_mr, use_lr):
    bands_selected = []
    if use_hr:
        bands_selected = bands_selected + S2_BANDS_HR
    if use_mr:
        bands_selected = bands_selected + S2_BANDS_MR
    if use_lr:
        bands_selected = bands_selected + S2_BANDS_LR
    bands_selected = sorted(bands_selected)
    with rasterio.open(path) as data:
        s2 = data.read(bands_selected)
    s2 = s2.astype(np.float32)
    s2 = np.clip(s2, 0, 10000)
    s2 = normalize_S2(s2)
    return s2
Exemplo n.º 15
0
def load_s1(path, imgTransform):
    with rasterio.open(path) as data:
        band1 = data.read(1)
        band2 = data.read(2)

    band1 = band1.astype(np.float32)
    band1 = np.nan_to_num(band1)
    band1 = np.clip(band1, -25, 0)
    if not imgTransform:
        band1 /= 25
        band1 += 1

    band2 = band2.astype(np.float32)
    band2 = np.nan_to_num(band2)
    band2 = np.clip(band2, -25, 0)
    if not imgTransform:
        band2 /= 25
        band2 += 1

    # band3 = abs(band2 - band1)
    # band3 /= 25

    s1 = np.stack((band1, band2))  # , band3))
    return s1
Exemplo n.º 16
0
 def download_data(url,save_path):
     '''
     数据下载工具,当原始数据不存在时会进行下载
     :param url:
     :param save_path:
     :return:
     '''
     print('downloading data from {}'.format(url))
     if not os.path.exists(save_path):
         os.makedirs(save_path)
     data=urllib.request.urlopen(url)
     filename='DD.zip'
     with open(os.path.join(save_path,filename),'wb') as f:
         f.write(data.read())
     return True
Exemplo n.º 17
0
    def download(self):
        from six.moves import urllib
        import zipfile

        print('\n-- Loading PhotoTour dataset: {}'.format(self.name))

        if self._check_exists():
            print('Found cached data {}'.format(self.data_file))
            return

        # download files
        try:
            os.makedirs(self.root)
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        if not self._check_downloaded():
            url = self.urls[self.name]
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, filename)

            print('Downloading {}\nDownloading {}\n\nIt might take while. '
                  'Please grab yourself a coffee and relax.\n'.format(
                      url, file_path))

            with open(file_path, 'wb') as f:
                f.write(data.read())

            print('Extracting data {}\n'.format(self.data_down))

            with zipfile.ZipFile(file_path, 'r') as z:
                z.extractall(self.data_dir)
            os.unlink(file_path)

        # process and save as torch files
        print('Caching data {}'.format(self.data_file))

        data_set = (read_image_file(self.data_dir, self.image_ext, self.size,
                                    self.lens[self.name]),
                    read_info_file(self.data_dir, self.info_file),
                    read_matches_files(self.data_dir, self.matches_files))

        with open(self.data_file, 'wb') as f:
            torch.save(data_set, f)
 def __init__(self, root, json, vocab, transform=None):
     """Set the path for images, captions and vocabulary wrapper.
     
     Args:
         root: image directory.
         json: coco annotation file path.
         vocab: vocabulary wrapper.
         transform: image transformer.
     """
     self.root = root
     with open(json, "r") as data:
         dict_data = ast.literal_eval(data.read())
     self.coco = dict_data
     self.ids = list(self.coco.keys())
     self.vocab = vocab
     self.transform = transform
Exemplo n.º 19
0
    def data_download(self):
        """Download the VCTK data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import tarfile

        raw_abs_dir = os.path.join(self.root, self.raw_folder)
        splits = ['train', 'valid', 'test']
        processed_abs_dirs = [os.path.join(self.root, self.processed_folder, \
                                           split) for split in splits]
        dset_abs_path = os.path.join(self.root, self.raw_folder,
                                     self.dset_path)
        if self._check_exists():
            return raw_abs_dir, dset_abs_path, processed_abs_dirs, splits

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise
        try:
            for processed_abs_dir in processed_abs_dirs:
                os.makedirs(processed_abs_dir)
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        url = self.url
        print('Downloading ' + url)
        filename = url.rpartition('/')[2]
        file_path = os.path.join(self.root, self.raw_folder, filename)
        if not os.path.isfile(file_path):
            data = urllib.request.urlopen(url)
            with open(file_path, 'wb') as f:
                f.write(data.read())
        if not os.path.exists(dset_abs_path):
            with tarfile.open(file_path) as zip_f:
                zip_f.extractall(raw_abs_dir)
        else:
            print("Using existing raw folder")
        if not self.dev_mode:
            os.unlink(file_path)
        return raw_abs_dir, dset_abs_path, processed_abs_dirs, splits
Exemplo n.º 20
0
    def download(self):
        """Download the MNIST data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import gzip

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())
            with open(file_path.replace('.gz', ''), 'wb') as out_f, \
                    gzip.GzipFile(file_path) as zip_f:
                out_f.write(zip_f.read())
            os.unlink(file_path)

        # process and save as torch files
        print('Processing...')

        training_set = (
            read_image_file(os.path.join(self.root, self.raw_folder, 'train-images-idx3-ubyte')),
            read_label_file(os.path.join(self.root, self.raw_folder, 'train-labels-idx1-ubyte'))
        )
        test_set = (
            read_image_file(os.path.join(self.root, self.raw_folder, 't10k-images-idx3-ubyte')),
            read_label_file(os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte'))
        )
        with open(os.path.join(self.root, self.processed_folder, self.training_file), 'wb') as f:
            torch.save(training_set, f)
        with open(os.path.join(self.root, self.processed_folder, self.test_file), 'wb') as f:
            torch.save(test_set, f)

        print('Done!')
Exemplo n.º 21
0
def maybe_download(root):
    from six.moves import urllib
    import zipfile

    processed_path = os.path.join(root, 'processed')
    splits_dirs = {
        'meta_train': os.path.join(processed_path, 'images_background'),
        'meta_test': os.path.join(processed_path, 'images_evaluation')
    }
    if check_exists(splits_dirs):
        return splits_dirs

    # download files
    data_urls = [
        'https://github.com/brendenlake/omniglot/raw/master/python/images_background.zip',
        'https://github.com/brendenlake/omniglot/raw/master/python/images_evaluation.zip'
    ]

    raw_folder = 'raw'
    processed_folder = 'processed'
    try:
        os.makedirs(os.path.join(root, raw_folder))
        os.makedirs(os.path.join(root, processed_folder))
    except OSError as e:
        if e.errno == errno.EEXIST:
            pass
        else:
            raise

    for url in data_urls:
        print('== Downloading ' + url)
        data = urllib.request.urlopen(url)
        filename = url.rpartition('/')[2]
        file_path = os.path.join(root, raw_folder, filename)
        with open(file_path, 'wb') as f:
            f.write(data.read())
        file_processed = os.path.join(root, processed_folder)
        print("== Unzip from " + file_path + " to " + file_processed)
        zip_ref = zipfile.ZipFile(file_path, 'r')
        zip_ref.extractall(file_processed)
        zip_ref.close()
    print("Download finished.")
    return splits_dirs
Exemplo n.º 22
0
def load_lc(path, no_savanna=False, igbp=True):

    # load labels
    with rasterio.open(path) as data:
        lc = data.read(1)

    # convert IGBP to dfc2020 classes
    if igbp:
        lc = np.take(DFC2020_CLASSES, lc)
    else:
        lc = lc.astype(np.int64)

    # adjust class scheme to ignore class savanna
    if no_savanna:
        lc[lc == 3] = 0
        lc[lc > 3] -= 1

    # convert to zero-based labels and set ignore mask
    lc -= 1
    lc[lc == -1] = 255
    return lc
Exemplo n.º 23
0
Arquivo: qm7.py Projeto: zizai/se3cnn
    def download(self):
        from six.moves import urllib

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(self.root)
        except OSError as e:
            if e.errno == os.errno.EEXIST:
                pass
            else:
                raise

        print('Downloading ' + self.url)
        data = urllib.request.urlopen(self.url)
        file_path = os.path.join(self.root, self.mat_file)
        with open(file_path, 'wb') as f:
            f.write(data.read())

        print('Done!')
Exemplo n.º 24
0
    def download(self):
        from six.moves import urllib
        import numpy as np

        if self._check_exists():
            return

        # Download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        print('Downloading ' + self.url)
        data = urllib.request.urlopen(self.url)
        filename = os.path.basename(self.url)
        file_path = os.path.join(self.root, self.raw_folder, filename)
        with open(file_path, 'wb') as f:
            f.write(data.read())

        # Process and save as torch files
        print('Processing...')

        with open(file_path, 'rb') as f:
            raw_dataset = np.load(f)
            training_set = (255 * torch.ByteTensor(raw_dataset['imgs']),
                            torch.ByteTensor(raw_dataset['latents_classes']))

        with open(
                os.path.join(self.root, self.processed_folder,
                             self.training_file), 'wb') as f:
            torch.save(training_set, f)

        print('Done!')
Exemplo n.º 25
0
    def download(self):
        from six.moves import urllib
        import gzip

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())
Exemplo n.º 26
0
    def __download(self):
        """
        Downloads the KMNIST dataset from the web if dataset
        hasn't already been downloaded.
        """
        from six.moves import urllib

        if self.__check_exists():
            return

        print("Downloading KMNIST dataset")

        urls = [
            'http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-images-idx3-ubyte.gz',
            'http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz',
            'http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-images-idx3-ubyte.gz',
            'http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-labels-idx1-ubyte.gz',
        ]

        # download files
        try:
            os.makedirs(self.__path)
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.__path, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())

        print('Done!')
Exemplo n.º 27
0
    def download(self):
        """Download the FGVC-Aircraft data if it doesn't exist already."""
        from six.moves import urllib
        import tarfile

        if self._check_exists():
            return

        # prepare to download data to PARENT_DIR/fgvc-aircraft-2013.tar.gz
        print('Downloading %s ... (may take a few minutes)' % self.url)

        parent_dir = os.path.abspath(os.path.join(self.root, os.pardir))
        tar_name = self.url.rpartition('/')[-1]
        tar_path = os.path.join(parent_dir, tar_name)
        data = urllib.request.urlopen(self.url)

        # download .tar.gz file
        with open(tar_path, 'wb') as f:
            f.write(data.read())

        # extract .tar.gz to PARENT_DIR/fgvc-aircraft-2013b
        data_folder = tar_path.strip('.tar.gz')
        print('Extracting %s to %s ... (may take a few minutes)' %
              (tar_path, data_folder))
        tar = tarfile.open(tar_path)
        tar.extractall(parent_dir)

        # if necessary, rename data folder to self.root
        if not os.path.samefile(data_folder, self.root):
            print('Renaming %s to %s ...' % (data_folder, self.root))
            os.rename(data_folder, self.root)

        # delete .tar.gz file
        print('Deleting %s ...' % tar_path)
        os.remove(tar_path)

        print('Done!')
Exemplo n.º 28
0
    def download(self):
        """Download CODH char shapes data if it doesn't exist in
        processed_folder already."""

        from six.moves import urllib
        import zipfile

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for book_id in self.book_ids:
            url = self.download_url_format.format(book_id, book_id)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)

            if self._check_integrity(file_path, self.zips_md5[filename]):
                print('File already downloaded and verified: ' + filename)
                continue

            print('Downloading ' + url)
            with open(file_path, 'wb') as f:
                f.write(data.read())

            print('Extracting data: ' + filename)
            with zipfile.ZipFile(file_path, 'r') as zip_ref:
                target_dir = file_path.replace('.zip', '')
                zip_ref.extractall(target_dir)
            # remove download zip file
            os.unlink(file_path)
Exemplo n.º 29
0
    def download(self):
        """Download the MNIST data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import gzip

        print("download: trying to download")
        if self._check_exists():
            print("download: already exists so exiting")
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())
            with open(file_path.replace('.gz', ''), 'wb') as out_f, \
                    gzip.GzipFile(file_path) as zip_f:
                out_f.write(zip_f.read())
            os.unlink(file_path)

        # process and save as torch files
        print('Processing...')
        train_label, train_non_few_shot_ids, train_few_shot_ids = read_label_file(
            os.path.join(self.root, self.raw_folder,
                         'train-labels-idx1-ubyte'), self.few_shot_class)
        train_img = read_image_file(os.path.join(self.root, self.raw_folder,
                                                 'train-images-idx3-ubyte'),
                                    non_few_shot_ids=train_non_few_shot_ids)

        training_set = (train_img, train_label)

        test_label, test_non_few_shot_ids, test_few_shot_ids = read_label_file(
            os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte'),
            self.few_shot_class)
        test_img = read_image_file(os.path.join(self.root, self.raw_folder,
                                                't10k-images-idx3-ubyte'),
                                   few_shot_ids=test_few_shot_ids)

        if self.test_emnist:
            print("Download: Entering Emnist test")
            from emnist import extract_test_samples
            images, labels = extract_test_samples('letters')
            print(images.shape)
            print(labels.shape)
            #randomly grab a letter
            import random
            rand_letter_idx = random.randint(0, 25)
            #idx for selected letter clas
            test_sample_ids = np.where(labels < 10)[0]
            np.random.seed(10)
            np.random.shuffle(test_sample_ids)

            print('test_sample_ids_len', len(test_sample_ids))
            #grab labels and images from that class
            labels = labels[test_sample_ids]
            images = images[test_sample_ids]
            print("After selecting one class")
            print(images.shape)
            print(labels.shape)
            #assert(self.few_shot_class not in labels)
            if self.max_test_sample:
                test_set = {
                    torch.ByteTensor(list(images[:self.max_test_sample])).view(
                        -1, 28, 28),
                    torch.LongTensor(list(labels[:self.max_test_sample]))
                }
            else:
                test_set = {
                    torch.ByteTensor(list(images)).view(-1, 28, 28),
                    torch.LongTensor(list(labels))
                }
        else:
            # test_label, test_non_few_shot_ids, test_few_shot_ids=  read_label_file(os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte'), self.few_shot_class)
            # test_img = read_image_file(os.path.join(self.root, self.raw_folder, 't10k-images-idx3-ubyte'), few_shot_ids=test_few_shot_ids)
            if (self.max_test_sample):
                print('testing max test sample')
                test_set = (test_img[:self.max_test_sample],
                            test_label[:self.max_test_sample])

            else:
                test_set = (test_img, test_label)
        print('confirming test size')
        #print(len(test_set[0]), len(test_set[1]))
        with open(
                os.path.join(self.root, self.processed_folder,
                             self.training_file), 'wb') as f:
            torch.save(training_set, f)
        with open(
                os.path.join(self.root, self.processed_folder, self.test_file),
                'wb') as f:
            torch.save(test_set, f)

        print('Done!')
Exemplo n.º 30
0
    def download(self):
        """Download the MNISTM data."""
        # import essential packages
        from six.moves import urllib
        import gzip
        import pickle
        from torchvision import datasets

        # check if dataset already exists
        if self._check_exists():
            return

        # make data dirs
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        # download pkl files
        logging.info("Downloading " + self.url)
        filename = self.url.rpartition("/")[2]
        file_path = os.path.join(self.root, self.raw_folder, filename)
        if not os.path.exists(file_path.replace(".gz", "")):
            data = urllib.request.urlopen(self.url)
            with open(file_path, "wb") as f:
                f.write(data.read())
            with open(file_path.replace(".gz", ""), "wb") as out_f, gzip.GzipFile(
                file_path
            ) as zip_f:
                out_f.write(zip_f.read())
            os.unlink(file_path)

        # process and save as torch files
        logging.info("Processing...")

        # load MNIST-M images from pkl file
        with open(file_path.replace(".gz", ""), "rb") as f:
            mnist_m_data = pickle.load(f, encoding="bytes")
        mnist_m_train_data = torch.ByteTensor(mnist_m_data[b"train"])
        mnist_m_test_data = torch.ByteTensor(mnist_m_data[b"test"])

        # get MNIST labels
        mnist_train_labels = datasets.MNIST(
            root=self.mnist_root, train=True, download=True
        ).targets
        mnist_test_labels = datasets.MNIST(
            root=self.mnist_root, train=False, download=True
        ).targets

        # save MNIST-M dataset
        training_set = (mnist_m_train_data, mnist_train_labels)
        test_set = (mnist_m_test_data, mnist_test_labels)
        with open(
            os.path.join(self.root, self.processed_folder, self.training_file), "wb"
        ) as f:
            torch.save(training_set, f)
        with open(
            os.path.join(self.root, self.processed_folder, self.test_file), "wb"
        ) as f:
            torch.save(test_set, f)

        logging.info("[DONE]")
Exemplo n.º 31
0
    def download(self):
        """Download the MNIST data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import gzip

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())
            with open(file_path.replace('.gz', ''), 'wb') as out_f, \
                    gzip.GzipFile(file_path) as zip_f:
                out_f.write(zip_f.read())
            os.unlink(file_path)

        # process and save as torch files
        print('Processing...')

        train_label, train_remove_mask = read_label_file(
            os.path.join(self.root, self.raw_folder,
                         'train-labels-idx1-ubyte'),
            remove_label=self.get_rid_of)
        train_data = read_image_file(os.path.join(self.root, self.raw_folder,
                                                  'train-images-idx3-ubyte'),
                                     remove_mask=train_remove_mask)

        training_set = (train_data, train_label)
        #training_set = (
        #    read_image_file(os.path.join(self.root, self.raw_folder, 'train-images-idx3-ubyte')),
        #    read_label_file(os.path.join(self.root, self.raw_folder, 'train-labels-idx1-ubyte'))
        #)
        test_label, test_remove_mask = read_label_file(
            os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte'),
            remove_label=self.get_rid_of)
        test_data = read_image_file(os.path.join(self.root, self.raw_folder,
                                                 't10k-images-idx3-ubyte'),
                                    remove_mask=test_remove_mask)
        test_set = (test_data, test_label)
        #test_set = (
        #    read_image_file(os.path.join(self.root, self.raw_folder, 't10k-images-idx3-ubyte')),
        #    read_label_file(os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte'))
        #)
        with open(
                os.path.join(self.root, self.processed_folder,
                             self.training_file), 'wb') as f:
            torch.save(training_set, f)
        with open(
                os.path.join(self.root, self.processed_folder, self.test_file),
                'wb') as f:
            torch.save(test_set, f)

        print('Done!')
Exemplo n.º 32
0
    def download(self):
        """Download the EMNIST data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import gzip
        import shutil
        import zipfile

        if self._check_exists():
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        print('Downloading ' + self.url)
        data = urllib.request.urlopen(self.url)
        filename = self.url.rpartition('/')[2]
        raw_folder = os.path.join(self.root, self.raw_folder)
        file_path = os.path.join(raw_folder, filename)
        with open(file_path, 'wb') as f:
            f.write(data.read())

        print('Extracting zip archive')
        with zipfile.ZipFile(file_path) as zip_f:
            zip_f.extractall(raw_folder)
        os.unlink(file_path)
        gzip_folder = os.path.join(raw_folder, 'gzip')
        for gzip_file in os.listdir(gzip_folder):
            if gzip_file.endswith('.gz'):
                print('Extracting ' + gzip_file)
                with open(os.path.join(raw_folder, gzip_file.replace('.gz', '')), 'wb') as out_f, \
                        gzip.GzipFile(os.path.join(gzip_folder, gzip_file)) as zip_f:
                    out_f.write(zip_f.read())
        shutil.rmtree(gzip_folder)

        # process and save as torch files
        for split in self.splits:
            print('Processing ' + split)
            training_set = (
                read_image_file(
                    os.path.join(
                        raw_folder,
                        'emnist-{}-train-images-idx3-ubyte'.format(split))),
                read_label_file(
                    os.path.join(
                        raw_folder,
                        'emnist-{}-train-labels-idx1-ubyte'.format(split))))
            test_set = (
                read_image_file(
                    os.path.join(
                        raw_folder,
                        'emnist-{}-test-images-idx3-ubyte'.format(split))),
                read_label_file(
                    os.path.join(
                        raw_folder,
                        'emnist-{}-test-labels-idx1-ubyte'.format(split))))
            with open(
                    os.path.join(self.root, self.processed_folder,
                                 self._training_file(split)), 'wb') as f:
                torch.save(training_set, f)
            with open(
                    os.path.join(self.root, self.processed_folder,
                                 self._test_file(split)), 'wb') as f:
                torch.save(test_set, f)

        print('Done!')
Exemplo n.º 33
0
    def download(self):
        """Download the MNIST data."""
        # import essential packages
        from six.moves import urllib
        import gzip
        import pickle
        from torchvision import datasets

        # check if dataset already exists
        if self._check_exists():
            return

        # make data dirs
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        # download pkl files
        print('Downloading ' + self.url)
        filename = self.url.rpartition('/')[2]
        file_path = os.path.join(self.root, self.raw_folder, filename)
        if not os.path.exists(file_path.replace('.gz', '')):
            data = urllib.request.urlopen(self.url)
            with open(file_path, 'wb') as f:
                f.write(data.read())
            with open(file_path.replace('.gz', ''), 'wb') as out_f, \
                    gzip.GzipFile(file_path) as zip_f:
                out_f.write(zip_f.read())
            os.unlink(file_path)

        # process and save as torch files
        print('Processing...')

        # load MNIST-M images from pkl file
        with open(file_path.replace('.gz', ''), "rb") as f:
            mnist_m_data = pickle.load(f, encoding='bytes')
        mnist_m_train_data = torch.ByteTensor(mnist_m_data[b'train'])
        mnist_m_test_data = torch.ByteTensor(mnist_m_data[b'test'])

        # get MNIST labels
        mnist_train_labels = datasets.MNIST(root=self.mnist_root,
                                            train=True,
                                            download=True).train_labels
        mnist_test_labels = datasets.MNIST(root=self.mnist_root,
                                           train=False,
                                           download=True).test_labels

        # save MNIST-M dataset
        training_set = (mnist_m_train_data, mnist_train_labels)
        test_set = (mnist_m_test_data, mnist_test_labels)
        with open(os.path.join(self.root,
                               self.processed_folder,
                               self.training_file), 'wb') as f:
            torch.save(training_set, f)
        with open(os.path.join(self.root,
                               self.processed_folder,
                               self.test_file), 'wb') as f:
            torch.save(test_set, f)

        print('Done!')
    def download(self):
        if not os.path.isdir(self.root):
            os.makedirs(self.root)

        log_path = os.path.join(self.root, "train.csv")
        if not os.path.isfile(log_path):
            print("Download log...", flush=True)
            data = urllib.request.urlopen(self.url_train_log)
            with open(log_path, 'wb') as f:
                f.write(data.read())

        keys = [
            '', 'ID', 'x_crit', 'y_crit', 'source_ID', 'z_source', 'z_lens',
            'mag_source', 'ein_area', 'n_crit', 'r_source', 'crit_area',
            'n_pix_source', 'source_flux', 'n_pix_lens', 'lens_flux',
            'n_source_im', 'mag_eff', 'sb_contrast', 'color_diff', 'n_gal_3',
            'n_gal_5', 'n_gal_10', 'halo_mass', 'star_mass', 'mag_lens',
            'n_sources'
        ]
        assert len(keys) == 27
        with open(log_path, 'rt') as csvfile:
            reader = csv.reader(csvfile, delimiter=',')
            data = [x for x in reader if len(x) == 27 and not 'ID' in x]
            data = [{k: float(x) if x else math.nan
                     for k, x in zip(keys, xs)} for xs in data]
            self.data = {x['ID']: x for x in data}

        gz_path = os.path.join(self.root, "datapack2.0train.tar.gz")
        if not os.path.isfile(gz_path):
            print("Download...", flush=True)
            data = urllib.request.urlopen(self.url_train)
            with open(gz_path, 'wb') as f:
                f.write(data.read())

        tar_path = os.path.join(self.root, "datapack2.0train.tar")
        if not os.path.isfile(tar_path):
            print("Decompress...", flush=True)
            import gzip
            import shutil
            with gzip.open(gz_path, 'rb') as f_in:
                with open(tar_path, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)

        dir_path = os.path.join(self.root, "datapack2.0train")
        if not os.path.isdir(dir_path):
            print("Extract...", flush=True)
            import tarfile
            tar = tarfile.open(tar_path)
            tar.extractall(dir_path)
            tar.close()

        # print("Open tar...", flush=True)
        # import tarfile
        # self.tar = tarfile.open(tar_path)

        self.files = list(
            zip(*(sorted(
                glob.glob(
                    os.path.join(dir_path, "Public/{}/*.fits".format(band))))
                  for band in ("EUC_VIS", "EUC_J", "EUC_Y", "EUC_H"))))
        assert all(
            len({x.split('-')[-1]
                 for x in fs}) == 1 for fs in self.files)