def download_and_extract(group, msg=None, skip_existing=True): filtered_urls = filter_urls(group) print( f'Downloading {group if msg is None else msg} ({len(filtered_urls)} file{"s" if len(filtered_urls) != 1 else ""})...\n' ) for url, dest, md5 in filtered_urls: # skip if path already exists if os.path.exists(dest) and skip_existing: print( f'Path {dest} exists; skipping.\n(To not skip, use option --overwrite.)' ) continue # download archive file and check md5 hash fn = gdown.download(url=url, quiet=False) assert compute_md5( fn) == md5, 'Downloaded file failed MD5 hash! Exiting...' print('MD5 passed.') # make target path, extract, and delete archive os.makedirs(dest, exist_ok=True) gdown.extractall(path=fn, to=dest) os.remove(fn) print('Extracted.') print('\nDone.\n\n')
def setup_repository(): # Downloading, extracting models. models_url = 'https://drive.google.com/uc?id=1DBIl8JyXEo6YdM9uNyo3vrv5T2WsYSXT' models_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources', 'models') os.makedirs(models_path, exist_ok=True) md5 = '434775bebd64910e01f4198eab251666' models_archive_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'models.zip') gdown.cached_download(url=models_url, path=models_archive_path, md5=md5) gdown.extractall(path=models_archive_path, to=models_path) os.remove(models_archive_path) # Setting up the data folder with runtime_config.ini file data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources', 'data') os.makedirs(data_path, exist_ok=True) runtime_config_path = os.path.join(data_path, 'runtime_config.ini') if os.path.exists(runtime_config_path): os.remove(runtime_config_path) pfile = open(runtime_config_path, 'w') pfile.write("[Predictions]\n") pfile.write("non_overlapping=true\n") pfile.write( "reconstruction_method=probabilities #probabilities, thresholding\n") pfile.write( "reconstruction_order=resample_first #resample_first, resample_second\n" ) pfile.write("probability_threshold=0.4\n") pfile.close()
def download_file_with_progress(url, output_dir): """ Utility borrowed from gpt-2-simple. """ CHUNK_SIZE = 1024 * 1024 if 'drive.google.com' in url: cwd = os.getcwd() os.chdir(output_dir) downloaded_file = gdown.download(url) os.chdir(cwd) return extractall(os.path.join(output_dir, downloaded_file)) filename = os.path.basename(urlparse(url).path) r = requests.get(url, stream=True) with open(os.path.join(output_dir, filename), 'wb') as f: with tqdm(ncols=100, desc='Downloading file from ' + url) as pbar: for chunk in r.iter_content(chunk_size=CHUNK_SIZE): f.write(chunk) pbar.update(CHUNK_SIZE) return extractall(os.path.join(output_dir, filename))
def setup_repository(): # Downloading, extracting models. models_url = 'https://drive.google.com/uc?id=1QJZWF9CzgOiYzjzsRSu2LOkrzi2S6j_U' models_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources', 'models') os.makedirs(models_path, exist_ok=True) md5 = '8920cc50fee3505e958307fa11088c0d' models_archive_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'models.zip') gdown.cached_download(url=models_url, path=models_archive_path, md5=md5) gdown.extractall(path=models_archive_path, to=models_path) os.remove(models_archive_path) # Setting up the data folder with runtime_config.ini file data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources', 'data') os.makedirs(data_path, exist_ok=True) runtime_config_path = os.path.join(data_path, 'runtime_config.ini') if os.path.exists(runtime_config_path): os.remove(runtime_config_path) pfile = open(runtime_config_path, 'w') pfile.write("[Predictions]\n") pfile.write("non_overlapping=true\n") pfile.write( "reconstruction_method=probabilities #probabilities, thresholding\n") pfile.write( "reconstruction_order=resample_first #resample_first, resample_second\n" ) pfile.write("probability_threshold=0.4\n") pfile.close()
def setup_repository(): # Downloading, extracting models. models_url = 'https://drive.google.com/uc?id=1ga08d8QQfAHOgTSKiPpIN7f_owuicNUA' models_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources', 'models') os.makedirs(models_path, exist_ok=True) md5 = '55c66e000de9077e483635029f740901' models_archive_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'models.zip') gdown.cached_download(url=models_url, path=models_archive_path, md5=md5) gdown.extractall(path=models_archive_path, to=models_path) os.remove(models_archive_path)
def download_model(model_path): # download temp = 'cache/temp.zip' gdown.download(MODEL_URL, temp, quiet=False) # unzip print('unzipping file') gdown.extractall(temp) os.remove(temp) # rename os.rename(glob.glob('cache/*.pt')[0], model_path)
def download_taskonomy_features(save_dir: str): for task, url in config.taskonomy_features_urls.items(): tmp_file_name = os.path.join("tmp", task + "_encoder_output" + ".zip") if not os.path.isdir(save_dir): os.makedirs(save_dir) if not os.path.isdir("tmp"): os.mkdir("tmp") if not os.path.isfile(tmp_file_name): gdown.download(url, tmp_file_name, quiet=False) gdown.extractall(tmp_file_name, save_dir) else: print(tmp_file_name, "already exists, skipping download")
def download_gdrive(url=None, file_id=None, file_name=None, data_folder=None, extract_all=False, **kwargs): assert url or file_id, "Either google drive download url or file id must be specified." base_url = "https://drive.google.com/uc?id={file_id}" if url: file_id, is_download_link = gdown.parse_url.parse_url(url) elif file_id: url = base_url.format(file_id=file_id) # folder to save this particular file data_folder = data_folder if data_folder else file_id data_folder = os.path.join(get_data_root(), data_folder) if not os.path.exists(data_folder): os.makedirs(data_folder) file_name = file_name if file_name else "gdrive_{file_id}.zip" file_path = os.path.join(data_folder, file_name) if not os.path.exists(file_path): logging.info("Start to download files on Google Drive...") downloaded_file_path = gdown.download(url, **kwargs) os.rename(downloaded_file_path, file_path) if extract_all: logging.info("Extracting zip file...") files = gdown.extractall(file_path) return file_path, files else: return file_path
def _download_SSF_model(model_name: str): url = SSF_model_urls[model_name] save_dir = os.path.join(Path.exp_dir(), "SSF") tmp_file_name = os.path.join("tmp", model_name + ".zip") if not os.path.isdir(save_dir): os.makedirs(save_dir) if not os.path.isdir("tmp"): os.mkdir("tmp") if not os.path.isfile(tmp_file_name): gdown.download(url, tmp_file_name, quiet=False) gdown.extractall(tmp_file_name, save_dir) else: print(tmp_file_name, "already exists, skipping download")
def download_examples(): gdown.download(id="1AAcJ17ghTVcw_nRICX5IAwhqmtaRP6fd", output='examples.zip') gdown.extractall('examples.zip', to='examples/')
parser = argparse.ArgumentParser() parser.add_argument('--savedir', default='./') parser.add_argument('--naturalproofs', action='store_true') parser.add_argument('--tokenized', action='store_true') parser.add_argument('--checkpoint', action='store_true') parser.add_argument('--other', action='store_true') args = parser.parse_args() os.makedirs(args.savedir, exist_ok=True) if args.naturalproofs: url = 'https://drive.google.com/uc?id=1vgohULQD7HfbotskkVX4li9YanIeG3u1' out = os.path.join(args.savedir, 'naturalproofs.tar.gz') gdown.download(url, out, quiet=False) gdown.extractall(out, os.path.join(args.savedir, 'data')) process = subprocess.Popen( ['python', 'naturalproofs/naturalproofs_stein.py', '--outdir', os.path.join(args.savedir, 'data')] ).wait() if args.tokenized: url = 'https://drive.google.com/uc?id=1OCIvcCyKTyRJeV7QiHdtQQhPJ6QknMpV' out = os.path.join(args.savedir, 'tok.tar.gz') gdown.download(url, out, quiet=False) gdown.extractall(out, os.path.join(args.savedir, 'data')) if args.checkpoint: url = 'https://drive.google.com/uc?id=1uIBeI7fw5vJBhDOl2WL3SbXWmzHgfK3W' out = os.path.join(args.savedir, 'ckpt.tar.gz') gdown.download(url, out, quiet=False) gdown.extractall(out, os.path.join(args.savedir, 'ckpt'))
def download(self) -> None: if not self.root_dir.exists(): url = "https://github.com/yuxng/YCB_Video_toolbox.git" cmd = f"git clone {url} {self.root_dir}" subprocess.check_call(shlex.split(cmd)) gdown.extractall(self.root_dir + ".zip")
def postprocess(file: str): gdown.extractall(file) file_extracted = path.Path(file).parent / "models" file_extracted.move(cls._root_dir)
def download_directory(url: str, directory: str, verbose: bool = True): filename = directory + "download.zip" gdown.download(url, filename, quiet=not verbose) gdown.extractall(filename)