Пример #1
0
    def process(self):
        extract_zip(self.raw_paths[0], self.raw_dir, log=False)
        dir_path = os.path.join(self.raw_dir, 'pseudomonas')
        smiles_idx = {}
        data_lists = {}

        for i, split in enumerate(self.splits):
            df = pd.read_csv(os.path.join(dir_path, split + '.csv')).drop('id', axis=1, errors='ignore').dropna(1)
            data_lists[split] = []

            for row in df.itertuples(False, None):
                smiles_idx.setdefault(row[0], len(smiles_idx))
                data = self._process_row(*row)

                if self.pre_transform is not None:
                    data = self.pre_transform(data)

                if self.pre_filter is None or self.pre_filter(data):
                    data_lists[split].append(data)

            data, slices = self.collate(data_lists[split])
            torch.save((data, slices), self.processed_paths[i])

        splits = []

        for fold in range(10):
            idx = []

            for split in ['train', 'dev', 'test']:
                df = pd.read_csv(os.path.join(dir_path, 'train_cv', f'fold_{fold}', split + '.csv'))
                idx.append(torch.LongTensor(df['smiles'].map(lambda s: smiles_idx[s])))

            splits.append(tuple(idx))

        torch.save(tuple(splits), self.processed_paths[-1])
Пример #2
0
 def download(self):
     path = download_url(self.url, self.root)
     extract_zip(path, self.root)
     os.unlink(path)
     shutil.rmtree(self.raw_dir)
     name = 'shapenet'
     os.rename(osp.join(self.root, name), self.raw_dir)
Пример #3
0
 def download(self):
     path = download_url(self.url, self.root)
     extract_zip(path, self.root)
     os.unlink(path)
     folder = osp.join(self.root, "modelnet40_normal_resampled")
     shutil.rmtree(self.raw_dir)
     os.rename(folder, self.raw_dir)
Пример #4
0
 def download(self):
     path = download_url(self.url, self.root)
     extract_zip(path, self.root)
     os.unlink(path)
     shutil.rmtree(self.raw_dir)
     name = self.url.split('/')[-1].split('.')[0]
     os.rename(osp.join(self.root, name), self.raw_dir)
Пример #5
0
 def download(self):
     path = osp.join(self.root, 'raw.zip')
     gdd.download_file_from_google_drive(self.file_id, path)
     extract_zip(path, self.root)
     os.unlink(path)
     shutil.rmtree(self.raw_dir)
     os.rename(osp.join(self.root, 'DBP15K'), self.raw_dir)
    def process(self):
        folders = sorted(glob(osp.join(self.raw_dir, 'FaceTalk_*')))
        if len(folders) == 0:
            extract_zip(self.raw_paths[0], self.raw_dir, log=False)
            folders = sorted(glob(osp.join(self.raw_dir, 'FaceTalk_*')))

        train_data_list, test_data_list = [], []
        for folder in folders:
            for i, category in enumerate(self.categories):
                files = sorted(glob(osp.join(folder, category, '*.ply')))
                for j, f in enumerate(files):
                    data = read_ply(f)
                    data.y = torch.tensor([i], dtype=torch.long)
                    if self.pre_filter is not None and\
                       not self.pre_filter(data):
                        continue
                    if self.pre_transform is not None:
                        data = self.pre_transform(data)

                    if (j % 100) < 90:
                        train_data_list.append(data)
                    else:
                        test_data_list.append(data)

        torch.save(self.collate(train_data_list), self.processed_paths[0])
        torch.save(self.collate(test_data_list), self.processed_paths[1])
Пример #7
0
 def download(self):
     path = download_url(self.urls[self.name], self.root)
     extract_zip(path, self.root)
     os.unlink(path)
     folder = osp.join(self.root, 'ModelNet{}'.format(self.name))
     shutil.rmtree(self.raw_dir)
     os.rename(folder, self.raw_dir)
Пример #8
0
 def download(self):
     raw_folders = os.listdir(self.raw_dir)
     if len(raw_folders) == 0:
         if not os.path.exists(osp.join(self.root, self.zip_name)):
             log.info("WARNING: You are downloading S3DIS dataset")
             log.info(
                 "Please, register yourself by filling up the form at {}".
                 format(self.form_url))
             log.info("***")
             log.info(
                 "Press any key to continue, or CTRL-C to exit. By continuing, you confirm filling up the form."
             )
             input("")
             gdown.download(self.download_url,
                            osp.join(self.root, self.zip_name),
                            quiet=False)
         extract_zip(os.path.join(self.root, self.zip_name), self.root)
         shutil.rmtree(self.raw_dir)
         os.rename(osp.join(self.root, self.file_name), self.raw_dir)
         shutil.copy(self.path_file, self.raw_dir)
         cmd = "patch -ruN -p0 -d  {} < {}".format(
             self.raw_dir, osp.join(self.raw_dir, "s3dis.patch"))
         os.system(cmd)
     else:
         intersection = len(
             set(self.folders).intersection(set(raw_folders)))
         if intersection != 6:
             shutil.rmtree(self.raw_dir)
             os.makedirs(self.raw_dir)
             self.download()
    def process(self):
        extract_zip(self.raw_paths[0], self.raw_dir, log=True)
        path = os.path.join(self.raw_dir, 'data_5_all')

        files = [os.path.join(path, f) for f in os.listdir(path)]
        N = len(files)

        x = np.load(files[0])

        data_shape = x['data'].T.shape
        label_shape = x['labels'].T.shape

        with h5py.File(os.path.join(self.processed_paths[0]), 'w') as hf:
            dset = hf.create_dataset("data",
                                     shape=(N,) + data_shape,
                                     chunks=(1,) + data_shape,
                                     dtype='f4',
                                     compression='gzip')
            lset = hf.create_dataset("labels",
                                     shape=(N,) + label_shape,
                                     chunks=(1,) + label_shape,
                                     dtype='f4',
                                     compression='gzip')

            i = 0
            for f in tqdm(files):
                x = np.load(f)

                dset[i] = np.ascontiguousarray(x['data'].T)
                lset[i] = np.ascontiguousarray(x['labels'].T)
                i += 1

        shutil.rmtree(path)
Пример #10
0
    def process(self):
        print('Processing...')
        fps = glob(osp.join(self.raw_dir, '*/*/*.ply'))
        if len(fps) == 0:
            extract_zip(self.raw_paths[0], self.raw_dir, log=False)
            fps = glob(osp.join(self.raw_dir, '*/*/*.ply'))

        print("Number of meshes: {}".format(len(fps)))
        train_data_list, test_data_list = [], []
        for idx, fp in enumerate(tqdm(fps)):
            #if idx % 2 == 0:
            data = read_mesh(fp)
            if self.pre_transform is not None:
                data = self.pre_transform(data)

            if self.split == 'interpolation':
                if (idx % 100) < 10:
                    test_data_list.append(data)
                else:
                    train_data_list.append(data)
            elif self.split == 'extrapolation':
                if fp.split('/')[-2] == self.test_exp:
                    test_data_list.append(data)
                else:
                    train_data_list.append(data)
            else:
                raise RuntimeError((
                    'Expected the split of interpolation or extrapolation, but'
                    ' found {}').format(self.split))

        torch.save(self.collate(train_data_list), self.processed_paths[0])
        torch.save(self.collate(test_data_list), self.processed_paths[1])
Пример #11
0
 def download(self):
     # we download the raw RGBD file for the train and the validation data
     folder = osp.join(self.raw_dir, self.mode)
     log.info("Download elements in the file {}...".format(folder))
     for url in self.dict_urls[self.mode]:
         path = download_url(url, folder, self.verbose)
         extract_zip(path, folder, self.verbose)
         os.unlink(path)
 def download(self):
     path = download_url(self.url, self.root)
     extract_zip(path, self.root)
     os.unlink(path)
     os.unlink(osp.join(self.root, 'README'))
     os.unlink(osp.join(self.root, 'demo_showAnno.m'))
     shutil.rmtree(self.raw_dir)
     os.rename(osp.join(self.root, 'WILLOW-ObjectClass'), self.raw_dir)
Пример #13
0
 def download(self):
     if self.name == 'ml-100k':
         url = 'http://files.grouplens.org/datasets/movielens/ml-100k.zip'
     path = download_url(url, self.root)
     extract_zip(path=path, folder=self.raw_dir, log=self.name)
     os.unlink(path)
     for file in glob.glob(os.path.join(self.raw_dir, self.name, '*')):
         shutil.move(file, self.raw_dir)
Пример #14
0
    def download(self):
        # Prepare raw data directory.
        shutil.rmtree(self.raw_dir)

        # Download and extract the dataset.
        path = download_url(self.dataset_url, self.raw_dir)
        extract_zip(path, self.raw_dir)
        os.unlink(path)
Пример #15
0
 def download(self):
     if self.is_test: return
     path = download_url(self.url, self.root)
     extract_zip(path, self.root)
     os.unlink(path)
     shutil.rmtree(self.raw_dir)
     name = self.url.split("/")[-1].split(".")[0]
     os.rename(osp.join(self.root, name), self.raw_dir)
Пример #16
0
 def download(self):
     url = self.cleaned_url if self.cleaned else self.url
     folder = osp.join(self.root, self.name)
     path = download_url('{}/{}.zip'.format(url, self.name), folder)
     extract_zip(path, folder)
     os.unlink(path)
     shutil.rmtree(self.raw_dir)
     os.rename(osp.join(folder, self.name), self.raw_dir)
Пример #17
0
 def download(self):
     path = download_url(
         'https://www.dropbox.com/s/s3n05sw0zg27fz3/human_seg.tar.gz',
         self.root)
     extract_zip(path, self.root)
     os.unlink(path)
     folder = osp.join(self.root, 'human_seg')
     os.rename(folder, self.raw_dir)
Пример #18
0
    def download(self):
        file_path = download_url(self.raw_url, self.raw_dir)
        extract_zip(file_path, self.raw_dir)
        os.unlink(file_path)

        file_path = download_url(self.raw_url2, self.raw_dir)
        os.rename(osp.join(self.raw_dir, '3195404'),
                  osp.join(self.raw_dir, 'uncharacterized.txt'))
Пример #19
0
 def download(self):
     shutil.rmtree(self.raw_dir)
     path = download_url(self.url, self.root)
     extract_zip(path, self.root)
     os.rename(osp.join(self.root, 'net_aminer'), self.raw_dir)
     os.unlink(path)
     path = download_url(self.y_url, self.raw_dir)
     extract_zip(path, self.raw_dir)
     os.unlink(path)
Пример #20
0
    def download(self):
        shutil.rmtree(self.raw_dir)
        path = download_url(self.url, self.root)
        extract_zip(path, self.root)
        os.rename(osp.join(self.root, 'molecules'), self.raw_dir)
        os.unlink(path)

        for split in ['train', 'val', 'test']:
            download_url(self.split_url.format(split), self.raw_dir)
Пример #21
0
def download(raw_path):
    """Downloads the raw data from URL

    :param raw_path: download path
    :type raw_path: str
    """
    path = download_url(URL, raw_path)
    extract_zip(path, raw_path)
    os.unlink(path)
Пример #22
0
def download(raw_path):
    """Downloads the raw data from URL

    :param raw_path: download path
    :type raw_path: str
    """
    path = download_url(URL, raw_path)
    extract_zip(path, raw_path)
    os.unlink(path)
    os.rename(f'{raw_path}/ml-10M100K', f'{raw_path}/ml-10m')
Пример #23
0
 def download(self):
     for name in self.available_datasets[self.name]:
         path = download_url('{}/{}'.format(self.url, name), self.raw_dir)
         if name.endswith('.tar.gz'):
             extract_tar(path, self.raw_dir)
         elif name.endswith('.gz'):
             extract_gz(path, self.raw_dir)
         elif name.endswith('.zip'):
             extract_zip(path, self.raw_dir)
         os.unlink(path)
Пример #24
0
 def download(self):
     path = download_url(self.url, self.raw_dir)
     extract_zip(path, self.raw_dir)
     for f in ['node-feat', 'node-label', 'relations']:
         shutil.move(osp.join(self.raw_dir, 'mag', 'raw', f), self.raw_dir)
     shutil.move(osp.join(self.raw_dir, 'mag', 'split'), self.raw_dir)
     shutil.move(
         osp.join(self.raw_dir, 'mag', 'raw', 'num-node-dict.csv.gz'),
         self.raw_dir)
     shutil.rmtree(osp.join(self.raw_dir, 'mag'))
     os.remove(osp.join(self.raw_dir, 'mag.zip'))
 def download(self):
     url = self.url.format(self.datasets[self.name])
     path = download_url(url, self.raw_dir)
     extract_zip(path, self.raw_dir)
     os.unlink(path)
     path = osp.join(self.raw_dir, f'{self.name}.attr')
     if self.name == 'mag':
         path = osp.join(self.raw_dir, self.name)
     for name in self.raw_file_names:
         os.rename(osp.join(path, name), osp.join(self.raw_dir, name))
     shutil.rmtree(path)
Пример #26
0
 def download(self):
     # we download the raw RGBD file for the train and the validation data
     folder = osp.join(self.raw_dir, self.mode)
     if files_exist([folder]):  # pragma: no cover
         log.warning("already downloaded {}".format(self.mode))
         return
     log.info("Download elements in the file {}...".format(folder))
     for url in self.dict_urls[self.mode]:
         path = download_url(url, folder, self.verbose)
         extract_zip(path, folder, self.verbose)
         os.unlink(path)
Пример #27
0
 def download(self):
     path = osp.join(self.raw_dir, self.dataset)
     if not osp.exists(path):
         raise FileExistsError(
             'PartNet can only downloaded via application. '
             'See details in https://cs.stanford.edu/~kaichun/partnet/')
     # path = download_url(self.url, self.root)
     extract_zip(path, self.root)
     os.unlink(path)
     shutil.rmtree(self.raw_dir)
     name = self.url.split(os.sep)[-1].split('.')[0]
     os.rename(osp.join(self.root, name), self.raw_dir)
Пример #28
0
 def download(self):
     from ogb.utils.url import decide_download, download_url, extract_zip
     url = self.meta_info['url']
     if decide_download(url):
         path = download_url(url, self.original_root)
         extract_zip(path, self.original_root)
         print(f'Removing {path}')
         os.unlink(path)
         print(f'Removing {self.root}')
         shutil.rmtree(self.root)
         print(f'Moving {osp.join(self.original_root, self.download_name)} to {self.root}')
         shutil.move(osp.join(self.original_root, self.download_name), self.root)
 def download(self):
     from google_drive_downloader import GoogleDriveDownloader as gdd
     path = osp.join(self.raw_dir, f'{self.name}.zip')
     gdd.download_file_from_google_drive(self.datasets[self.name], path)
     extract_zip(path, self.raw_dir)
     os.unlink(path)
     path = osp.join(self.raw_dir, f'{self.name}.attr')
     if self.name == 'mag':
         path = osp.join(self.raw_dir, self.name)
     for name in self.raw_file_names:
         os.rename(osp.join(path, name), osp.join(self.raw_dir, name))
     shutil.rmtree(path)
Пример #30
0
    def download(self):
        path = download_url(self.urls[self.name], self.root)
        extract_zip(path, self.root)
        os.unlink(path)
        folder = osp.join(self.root, f'ModelNet{self.name}')
        shutil.rmtree(self.raw_dir)
        os.rename(folder, self.raw_dir)

        # Delete osx metadata generated during compression of ModelNet10
        metadata_folder = osp.join(self.root, '__MACOSX')
        if osp.exists(metadata_folder):
            shutil.rmtree(metadata_folder)