Exemplo n.º 1
0
    def process(self):
        from ase.io import read

        extract_tar(self.raw_paths[0], self.raw_dir, log=False)
        materials = read(osp.join(self.raw_dir, 'structures.xyz'), index=':')
        bandgaps = np.loadtxt(osp.join(self.raw_dir, 'bandgaps.csv'))

        data_list = []
        for material, bandgap in zip(materials, bandgaps):
            pos = torch.from_numpy(material.get_positions()).to(torch.float)
            z = torch.from_numpy(material.get_atomic_numbers()).to(torch.int64)
            y = torch.tensor([float(bandgap)])
            data_list.append(Data(z=z, pos=pos, y=y))

        train_data = data_list[:10000]
        test_data = data_list[10000:]

        if self.pre_filter is not None:
            train_data = [d for d in train_data if self.pre_filter(d)]
            test_data = [d for d in test_data if self.pre_filter(d)]

        if self.pre_transform is not None:
            train_data = [self.pre_transform(d) for d in train_data]
            test_data = [self.pre_transform(d) for d in test_data]

        torch.save(self.collate(train_data), self.processed_paths[0])
        torch.save(self.collate(test_data), self.processed_paths[1])
Exemplo n.º 2
0
 def download(self):
     url = self.processed_url if rdkit is None else self.raw_url
     file_path = download_url(url, self.raw_dir)
     print('------------')
     print(self.raw_dir)
     print('--------------')
     extract_tar(file_path, self.raw_dir)
     os.unlink(file_path)
Exemplo n.º 3
0
 def download(self):
     for name in self.available_datasets[self.name]:
         path = download_url(f'{self.url}/{name}', self.raw_dir)
         if name.endswith('.tar.gz'):
             extract_tar(path, self.raw_dir)
         elif name.endswith('.gz'):
             extract_gz(path, self.raw_dir)
         os.unlink(path)
Exemplo n.º 4
0
 def download(self):
     for name in self.available_datasets[self.name]:
         path = download_url('{}/{}'.format(self.url, name), self.raw_dir)
         print(path)
         if name.endswith('.tar.gz'):
             extract_tar(path, self.raw_dir)
         elif name.endswith('.gz'):
             extract_gz(path, self.raw_dir)
         os.unlink(path)
 def download(self):
     folder = osp.join(self.root, self.name)
     for file in self.available_sets_df[self.name]:
         path = download_url('{}/{}'.format(self.url, file), folder)
         if file.endswith('.tar.gz'):
             extract_tar(path, folder)
         elif file.endswith('.gz'):
             extract_gz(path, folder)
         os.unlink(path)
     shutil.rmtree(self.raw_dir)
     os.rename(folder, self.raw_dir)
     print('Raw\n{}'.format(self.raw_dir))
Exemplo n.º 6
0
    def download(self):
        path = download_url(self.image_url, self.raw_dir)
        extract_tar(path, self.raw_dir, mode='r')
        os.unlink(path)
        image_path = osp.join(self.raw_dir, 'TrainVal', 'VOCdevkit', 'VOC2011')
        os.rename(image_path, osp.join(self.raw_dir, 'images'))
        shutil.rmtree(osp.join(self.raw_dir, 'TrainVal'))

        path = download_url(self.annotation_url, self.raw_dir)
        extract_tar(path, self.raw_dir)
        os.unlink(path)

        path = download_url(self.split_url, self.raw_dir)
        os.rename(path, osp.join(self.raw_dir, 'splits.npz'))
Exemplo n.º 7
0
 def download(self):
     # Download and extract the archive.
     filename = self.url.rpartition('/')[2]
     path = osp.join(self.raw_dir, filename)
     if osp.exists(path):  # pragma: no cover
         print(f"Using existing file {filename}")
     else:
         # - We need to use the system 'wget' because urllib does not work
         #   with Dropbox.
         print(f'Downloading {self.url}')
         os.system(f"wget {self.url} -O {path} -q")
     # - Extract the downloaded archive.
     extract_tar(path, self.raw_dir)
     # Delete the archive.
     os.unlink(path)
Exemplo n.º 8
0
 def download(self):
     path = download_url(self.url, self.root)
     extract_tar(path, self.root)
     os.unlink(path)
     shutil.rmtree(self.raw_dir)
     os.rename(osp.join(self.root, 'nell_data'), self.raw_dir)
Exemplo n.º 9
0
 def download(self):
     path = download_url(self.url, self.raw_dir)
     extract_tar(path, self.raw_dir)
     os.unlink(path)
Exemplo n.º 10
0
 def download(self):
     archive = download_url(self.raw_url, self.raw_dir)
     extract_tar(archive, self.raw_dir)
     os.remove(archive)
Exemplo n.º 11
0
 def download(self):
     path = download_url(self.url.format(self.name), self.root)
     extract_tar(path, self.raw_dir)
     os.unlink(path)
Exemplo n.º 12
0
 def download(self):
     file_path = download_url(self.url, self.raw_dir)
     extract_tar(file_path, self.raw_dir, mode='r')
     os.unlink(file_path)