def extract_files(result): files = result.get('files', []) for file in files: file['path'] = '{}{}{}'.format(folder['path'], file['name'].replace('/', '_'), '/' if is_folder(file) else '') return files
def recurse(file): if is_folder(file): return concat( of([file]), list_folder_recursively(file), ) else: return of([file])
def recurse(file): if is_folder(file): return concat( of([file]), list_folder_recursively(credentials, file, retries), ) else: return of([file])
def download_directory(file: dict, destination: str, matching: str = None, delete_after_download: bool = False) -> Observable: destination = os.path.abspath(destination) def get_destination(f): relative_path = f['path'][len(file['path']):] next_destination = '{}{}{}'.format( destination, '' if destination[-1] == '/' else '/', relative_path) return next_destination def initial_stats(files): return { 'progress': 0, 'total_files': len(files), 'total_bytes': sum([int(f.get('size', 0)) for f in files]), 'downloaded_files': 0, 'downloaded_bytes': 0 } def update_stats(stats, download): downloaded_files = stats['downloaded_files'] + ( 0 if download['progress'] < 1 else 1) downloaded_bytes = stats['downloaded_bytes'] + download[ 'downloaded_bytes'] progress = downloaded_bytes / stats['total_bytes'] return { 'progress': progress, 'total_files': stats['total_files'], 'total_bytes': stats['total_bytes'], 'downloaded_files': downloaded_files, 'downloaded_bytes': downloaded_bytes } def is_file_matching(f): return not matching or fnmatch.fnmatch(f['path'], matching) def delete_downloaded(downloaded): if delete_after_download: return delete_file(downloaded['file']).pipe( map(lambda _: downloaded)) else: return of(downloaded) def filter_files(files): return [f for f in files if not is_folder(f) and is_file_matching(f)] if is_folder(file): return list_folder_recursively(file).pipe( map(lambda files: filter_files(files)), flat_map(lambda files: of(True).pipe( flat_map(lambda _: of(*files).pipe( flat_map(lambda f: download_file(f, get_destination(f))), flat_map(delete_downloaded))), scan(update_stats, initial_stats(files))))) else: return download_file(file, destination)
def filter_files(files): seen_file_ids = set() unique_files = [] for f in files: file_id = f['id'] if file_id not in seen_file_ids: seen_file_ids.add(file_id) unique_files.append(f) filtered = [ f for f in unique_files if not is_folder(f) and is_file_matching(f) ] return filtered
def download(credentials, file: dict, destination: str, matching: str = None, delete_after_download: bool = False, retries: int = 5) -> Observable: logging.debug('downloading {} to {}'.format(file, destination)) destination = os.path.abspath(destination) def get_file_destination(f): relative_path = f['path'][len(file['path']):] next_destination = '{}{}{}'.format( destination, '' if destination[-1] == '/' else '/', relative_path) return next_destination def is_file_matching(f): return not matching or fnmatch.fnmatch(f['path'], matching) def delete_downloaded(f): if delete_after_download: try: return delete_file(credentials, f).pipe(flat_map(lambda _: empty())) except HttpError: logging.warning( 'Failed to delete downloaded file {}'.format(file)) return empty() else: return empty() def filter_files(files): seen_file_ids = set() unique_files = [] for f in files: file_id = f['id'] if file_id not in seen_file_ids: seen_file_ids.add(file_id) unique_files.append(f) filtered = [ f for f in unique_files if not is_folder(f) and is_file_matching(f) ] return filtered def download_file(f, dest): total_bytes = int(f['size']) def next_chunk(downloader): status, done = downloader.next_chunk() logging.debug('downloaded chunk from {} to {}: {}'.format( file, destination, status)) return 1.0 if done else status.progress() def download_from_drive(destination_file): def create_downloader(): request = get_service(credentials).files().get_media( fileId=f['id']) return MediaIoBaseDownload(fd=destination_file, request=request, chunksize=CHUNK_SIZE) downloader = create_downloader() return forever().pipe( map(lambda _: next_chunk(downloader)), take_while(lambda p: p < 1, inclusive=True), flat_map(lambda p: progress( default_message= 'Downloaded {downloaded_files} of {total_files} files ({downloaded} of {total})', message_key='tasks.drive.download_folder', downloaded_bytes=int(total_bytes * p), downloaded=format_bytes(int(total_bytes * p)), total_bytes=total_bytes, total=format_bytes(total_bytes), file=f))) def action(): return using_file(file=dest, mode='wb', to_observable=download_from_drive) os.makedirs(os.path.dirname(dest), exist_ok=True) initial_progress = progress( default_message= 'Downloaded {downloaded_files} of {total_files} files ({downloaded} of {total})', message_key='tasks.drive.download_folder', downloaded_bytes=0, downloaded='0 bytes', total_bytes=total_bytes, total=format_bytes(total_bytes), file=f) touch_stream = interval(TOUCH_PERIOD).pipe( flat_map(lambda _: touch(credentials, f))) download_stream = enqueue(credentials, queue=_drive_downloads, action=action, retries=retries, description='Download {} to {}'.format( f, dest)).pipe(aside(touch_stream)) return concat(initial_progress, download_stream, delete_downloaded(f)) def download_folder(folder): def aggregate_progress(progresses: list): total_files = len(progresses) total_bytes = sum([int(p.file['size']) for p in progresses]) downloaded_files = len( [p for p in progresses if p.downloaded_bytes == p.total_bytes]) downloaded_bytes = sum([p.downloaded_bytes for p in progresses]) return progress( default_message= 'Downloaded {downloaded_files} of {total_files} files ({downloaded} of {total})', message_key='tasks.drive.download_folder', downloaded_files=downloaded_files, downloaded_bytes=downloaded_bytes, downloaded=format_bytes(downloaded_bytes), total_files=total_files, total_bytes=total_bytes, total=format_bytes(total_bytes)) return list_folder_recursively(credentials, folder).pipe( map(lambda files: filter_files(files)), flat_map(lambda files: combine_latest( *[download_file(f, get_file_destination(f)) for f in files]) if files else empty()), flat_map(aggregate_progress)) if is_folder(file): return download_folder(file) else: return download_file(file, destination)
def filter_files(files): return [f for f in files if not is_folder(f) and is_file_matching(f)]
def download( credentials, file: dict, destination: str, matching: str = None, delete_after_download: bool = False, ) -> Observable: logging.debug('downloading {} to {}'.format(file, destination)) destination = os.path.abspath(destination) def get_file_destination(f): relative_path = f['path'][len(file['path']):] next_destination = '{}{}{}'.format( destination, '' if destination[-1] == '/' else '/', relative_path) return next_destination def is_file_matching(f): return not matching or fnmatch.fnmatch(f['path'], matching) def delete_downloaded(f): if delete_after_download: return delete_file(credentials, f).pipe(flat_map(lambda _: empty())) else: return empty() def filter_files(files): return [f for f in files if not is_folder(f) and is_file_matching(f)] def download_file(f, dest): total_bytes = int(f['size']) def next_chunk(downloader): status, done = downloader.next_chunk() logging.debug('downloaded chunk from {} to {}: {}'.format( file, destination, status)) return 1.0 if done else status.progress() def download_from_drive(destination_file): def create_downloader(): request = get_service(credentials).files().get_media( fileId=f['id']) return MediaIoBaseDownload(fd=destination_file, request=request, chunksize=CHUNK_SIZE) downloader = create_downloader() return forever().pipe( map(lambda _: next_chunk(downloader)), take_while(lambda progress: progress < 1, inclusive=True), map( lambda progress: { 'downloaded_bytes': int(total_bytes * progress), 'total_bytes': total_bytes, 'file': f })) def action(): return using_file(file=dest, mode='wb', to_observable=download_from_drive) os.makedirs(os.path.dirname(dest), exist_ok=True) initial_progress_stream = of({ 'downloaded_bytes': 0, 'total_bytes': total_bytes, 'file': f }) touch_stream = interval(TOUCH_PERIOD).pipe( flat_map(lambda _: touch(credentials, f))) download_stream = enqueue(credentials, queue=_drive_downloads, action=action, retries=0, description='Download {} to {}'.format( f, dest)).pipe(aside(touch_stream)) return concat(initial_progress_stream, download_stream, delete_downloaded(f)) def download_folder(folder): def aggregate_progress(progresses: list): total_files = len(progresses) total_bytes = sum([int(p['file']['size']) for p in progresses]) downloaded_files = len([ p for p in progresses if p['downloaded_bytes'] == p['total_bytes'] ]) downloaded_bytes = sum([p['downloaded_bytes'] for p in progresses]) return { 'downloaded_files': downloaded_files, 'downloaded_bytes': downloaded_bytes, 'total_files': total_files, 'total_bytes': total_bytes } return list_folder_recursively(credentials, folder).pipe( map(lambda files: filter_files(files)), flat_map(lambda files: combine_latest( *[download_file(f, get_file_destination(f)) for f in files])), map(aggregate_progress)) if is_folder(file): return download_folder(file) else: return download_file(file, destination)