def download(self): if self._check_integrity(): print('Files already downloaded and verified') return download_and_extract_archive(self.url, self.root, filename=self.filename, md5=self.tgz_md5)
def __init__(self, root, train=True, transform=None, target_transform=None): super(Cifar100, self).__init__(root, transform=transform, target_transform=target_transform) base_folder = 'cifar-100-python' url = "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" filename = "cifar-100-python.tar.gz" tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85' train_list = [ ['train', '16019d7e3df5f24257cddd939b257f8d'], ] test_list = [ ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'], ] meta = { 'filename': 'meta', 'key': 'fine_label_names', 'md5': '7973b15100ade9c7d40fb424638fde48', } targets = [] data = [] self.train = train download_and_extract_archive(url, self.root, filename=filename, md5=tgz_md5) if self.train: downloaded_list = train_list else: downloaded_list = test_list for file_name, checksum in downloaded_list: file_path = os.path.join(self.root, base_folder, file_name) with open(file_path, 'rb') as f: entry = pickle.load(f, encoding='latin1') #img = pil_loader(file_path) data.extend(entry['data']) data = np.vstack(data).reshape(-1, 3, 32, 32) data = data.transpose((0, 2, 3, 1)) if 'labels' in entry: targets.extend(entry['labels']) else: targets.extend(entry['fine_labels']) self.data = data self.targets = targets
def download(self) -> None: """Download the EMNIST data if it doesn't exist in processed_folder already.""" import shutil if self._check_exists(): return os.makedirs(self.raw_folder, exist_ok=True) os.makedirs(self.processed_folder, exist_ok=True) # download files print('Downloading and extracting zip archive') download_and_extract_archive(self.url, download_root=self.raw_folder, filename="emnist.zip", remove_finished=True, md5=self.md5) gzip_folder = os.path.join(self.raw_folder, 'gzip') for gzip_file in os.listdir(gzip_folder): if gzip_file.endswith('.gz'): extract_archive(os.path.join(gzip_folder, gzip_file), gzip_folder) # process and save as torch files for split in self.splits: print('Processing ' + split) training_set = ( read_image_file( os.path.join( gzip_folder, 'emnist-{}-train-images-idx3-ubyte'.format(split))), read_label_file( os.path.join( gzip_folder, 'emnist-{}-train-labels-idx1-ubyte'.format(split)))) test_set = ( read_image_file( os.path.join( gzip_folder, 'emnist-{}-test-images-idx3-ubyte'.format(split))), read_label_file( os.path.join( gzip_folder, 'emnist-{}-test-labels-idx1-ubyte'.format(split)))) with open( os.path.join(self.processed_folder, self._training_file(split)), 'wb') as f: torch.save(training_set, f) with open( os.path.join(self.processed_folder, self._test_file(split)), 'wb') as f: torch.save(test_set, f) shutil.rmtree(gzip_folder) print('Done!')
def download(self) -> None: """Download the MNIST data if it doesn't exist in processed_folder already.""" if self._check_exists(): return os.makedirs(self.raw_folder, exist_ok=True) os.makedirs(self.processed_folder, exist_ok=True) # download files for url, md5 in self.resources: filename = url.rpartition('/')[2] download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5) # process and save as torch files print('Processing...') training_set = (read_image_file( os.path.join(self.raw_folder, 'train-images-idx3-ubyte')), read_label_file( os.path.join(self.raw_folder, 'train-labels-idx1-ubyte'))) test_set = (read_image_file( os.path.join(self.raw_folder, 't10k-images-idx3-ubyte')), read_label_file( os.path.join(self.raw_folder, 't10k-labels-idx1-ubyte'))) with open(os.path.join(self.processed_folder, self.training_file), 'wb') as f: torch.save(training_set, f) with open(os.path.join(self.processed_folder, self.test_file), 'wb') as f: torch.save(test_set, f) print('Done!')
# Append parent dir to sys path. parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) os.sys.path.insert(0, parentdir) MODEL_URL = 'https://s3.eu-central-1.amazonaws.com/nlp-cube/{}.tar.gz' MODEL_LOCATION = 'corpus/trained_models/{}' EMBEDDINGS_NAME = 'wiki.{}.vec' FACEBOOK_EMBEDDINGS_URL = 'https://s3-us-west-1.amazonaws.com/fasttext-vectors/' FACEBOOK_EMBEDDINGS_LOCATION = 'corpus/' if __name__ == '__main__': parser = optparse.OptionParser() parser.add_option('--language', action='store', dest='language') (params, _) = parser.parse_args(sys.argv) if not params.language: print('\nRun the script in the following manner:\n' 'python scripts/download_model.py --language ro\n') sys.exit(1) # Download Facebook embeddings for the provided language. name = EMBEDDINGS_NAME.format(params.language) language_url = FACEBOOK_EMBEDDINGS_URL + name location = FACEBOOK_EMBEDDINGS_LOCATION + name download_file(language_url, location) # Download model from S3, for provided language model_url = MODEL_URL.format(params.language) model_location = MODEL_LOCATION.format(params.language) download_and_extract_archive(model_url, model_location)
EMBEDDINGS_NAME = 'wiki.{}.vec' FACEBOOK_EMBEDDINGS_URL = 'https://s3-us-west-1.amazonaws.com/fasttext-vectors/' FACEBOOK_EMBEDDINGS_LOCATION = 'corpus/' logger = logging.getLogger(__name__) if __name__ == '__main__': parser = optparse.OptionParser() parser.add_option('--language', action='store', dest='language') (params, _) = parser.parse_args(sys.argv) if not params.language: print('\nRun the script in the following manner:\n' 'python scripts/download_data.py --language ro\n') sys.exit(1) # Download Treebank CONLL Universal Dependencies data. download_and_extract_archive(TREEBANK_CONLL, TREEBANK_LOCATION) # Download test CONLL Universal Dependencies data. download_and_extract_archive(TEST_DATA_CONLL, TEST_DATA_LOCATION) # Download conll17_ud_eval script download_file(UD_EVAL_SCRIPT, UD_EVAL_LOCATION) # Download Facebook embeddings for the provided language. name = EMBEDDINGS_NAME.format(params.language) language_url = FACEBOOK_EMBEDDINGS_URL + name location = FACEBOOK_EMBEDDINGS_LOCATION + name download_file(language_url, location)