def _complete_resumable( env: str, pnum: str, filename: str, token: str, url: str, bar: Bar, session: Any = requests, mtime: Optional[str] = None, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, ) -> dict: tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) token = tokens.get("access_token") if tokens else token headers = {'Authorization': f'Bearer {token}'} if mtime: headers['Modified-Time'] = mtime debug_step('completing resumable') resp = session.patch(url, headers=headers) resp.raise_for_status() bar.finish() debug_step('finished') return {'response': json.loads(resp.text), 'tokens': tokens}
def get_resumable( env: str, pnum: str, token: str, filename: Optional[str] = None, upload_id: Optional[str] = None, dev_url: Optional[str] = None, backend: str = 'files', is_dir: bool = False, key: Optional[str] = None, session: Any = requests, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, ) -> dict: """ List uploads which can be resumed. Returns ------- dict, {overview: {filename, chunk_size, max_chunk, id}, tokens: {}} """ if not dev_url: filename = f'/{quote(format_filename(filename))}' if filename else '' endpoint = f'resumables{filename}' url = f'{file_api_url(env, pnum, backend, endpoint=endpoint)}' else: url = dev_url if upload_id: url = '{0}?id={1}'.format(url, upload_id) elif not upload_id and is_dir and key: url = '{0}?key={1}'.format(url, quote(key, safe='')) debug_step(f'fetching resumables info, using: {url}') tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) token = tokens.get("access_token") if tokens else token headers = {'Authorization': f'Bearer {token}'} resp = session.get(url, headers=headers) data = json.loads(resp.text) return {'overview': data, 'tokens': tokens}
def export_delete( env: str, pnum: str, token: str, filename: str, session: Any = requests, group: Optional[str] = None, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, ) -> requests.Response: tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) token = tokens.get("access_token") if tokens else token endpoint = f'export/{filename}' url = f'{file_api_url(env, pnum, "files", endpoint=endpoint)}' headers = {'Authorization': f'Bearer {token}'} print(f'deleting: {filename}') resp = session.delete(url, headers=headers) resp.raise_for_status() return {'response': resp, 'tokens': tokens}
def _continue_resumable( env: str, pnum: str, filename: str, token: str, to_resume: str, group: Optional[str] = None, verify: bool = False, dev_url: Optional[str] = None, backend: str = 'files', is_dir: bool = False, session: Any = requests, set_mtime: bool = False, public_key: Optional[libnacl.public.PublicKey] = None, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, ) -> dict: """ Continue a resumable upload, reding a file, from the appopriate byte offset, chunk-by-chunk and performaing a PATCH request per chunk. Optional chunk md5 verification before resume. """ tokens = {} url = _resumable_url(env, pnum, filename, dev_url, backend, is_dir, group=group) headers = {'Authorization': f'Bearer {token}'} current_mtime = os.stat(filename).st_mtime if set_mtime else None if set_mtime: headers['Modified-Time'] = str(current_mtime) max_chunk = to_resume['max_chunk'] chunksize = to_resume['chunk_size'] previous_offset = to_resume['previous_offset'] next_offset = to_resume['next_offset'] upload_id = to_resume['id'] server_chunk_md5 = str(to_resume['md5sum']) chunk_num = max_chunk + 1 print(f'Resuming upload with id: {upload_id}') bar = _init_progress_bar(chunk_num, chunksize, filename) for chunk, enc_nonce, enc_key, ch_size in lazy_reader( filename, chunksize, previous_offset, next_offset, verify, server_chunk_md5, public_key=public_key, ): tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) if tokens: token = tokens.get("access_token") refresh_token = tokens.get("refresh_token") refresh_target = get_claims(token).get('exp') headers['Authorization'] = f'Bearer {token}' if public_key: headers['Content-Type'] = 'application/octet-stream+nacl' headers['Nacl-Nonce'] = nacl_encode_header(enc_nonce) headers['Nacl-Key'] = nacl_encode_header(enc_key) headers['Nacl-Chunksize'] = str(ch_size) parmaterised_url = '{0}?chunk={1}&id={2}'.format( url, str(chunk_num), upload_id) debug_step(f'sending chunk {chunk_num}, using {parmaterised_url}') with Retry(session.patch, parmaterised_url, headers, chunk) as retriable: if retriable.get("new_session"): session = retriable.get("new_session") resp = retriable.get("resp") resp.raise_for_status() data = json.loads(resp.text) bar.next() upload_id = data['id'] chunk_num = data.get("max_chunk") + 1 if not group: group = '{0}-member-group'.format(pnum) parmaterised_url = '{0}?chunk={1}&id={2}&group={3}'.format( url, 'end', upload_id, group) resp = _complete_resumable( env, pnum, filename, token, parmaterised_url, bar, session=session, mtime=str(current_mtime), api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, ) if not tokens: tokens = resp.get('tokens') return { 'response': resp.get('response'), 'tokens': tokens, 'session': session }
def _start_resumable( env: str, pnum: str, filename: str, token: str, chunksize: int, group: Optional[str] = None, dev_url: Optional[str] = None, stop_at: Optional[int] = None, backend: str = 'files', is_dir: bool = False, session: Any = requests, set_mtime: bool = False, public_key: Optional[libnacl.public.PublicKey] = None, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, ) -> dict: """ Start a new resumable upload, reding a file, chunk-by-chunk and performaing a PATCH request per chunk. """ url = _resumable_url(env, pnum, filename, dev_url, backend, is_dir, group=group) headers = {'Authorization': f'Bearer {token}'} current_mtime = os.stat(filename).st_mtime if set_mtime else None if set_mtime: headers['Modified-Time'] = str(current_mtime) chunk_num = 1 for chunk, enc_nonce, enc_key, ch_size in lazy_reader( filename, chunksize, public_key=public_key): tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) if tokens: token = tokens.get("access_token") refresh_token = tokens.get("refresh_token") refresh_target = get_claims(token).get('exp') headers['Authorization'] = f'Bearer {token}' if public_key: headers['Content-Type'] = 'application/octet-stream+nacl' headers['Nacl-Nonce'] = nacl_encode_header(enc_nonce) headers['Nacl-Key'] = nacl_encode_header(enc_key) headers['Nacl-Chunksize'] = str(ch_size) if chunk_num == 1: parmaterised_url = '{0}?chunk={1}'.format(url, str(chunk_num)) else: parmaterised_url = '{0}?chunk={1}&id={2}'.format( url, str(chunk_num), upload_id) debug_step(f'sending chunk {chunk_num}, using {parmaterised_url}') with Retry(session.patch, parmaterised_url, headers, chunk) as retriable: if retriable.get("new_session"): session = retriable.get("new_session") resp = retriable.get("resp") resp.raise_for_status() data = json.loads(resp.text) if chunk_num == 1: upload_id = data['id'] print('Upload id: {0}'.format(upload_id)) bar = _init_progress_bar(chunk_num, chunksize, filename) bar.next() if stop_at: if chunk_num == stop_at: print('stopping at chunk {0}'.format(chunk_num)) return {'response': data} chunk_num = data.get("max_chunk") + 1 if not group: group = '{0}-member-group'.format(pnum) parmaterised_url = '{0}?chunk={1}&id={2}&group={3}'.format( url, 'end', upload_id, group) resp = _complete_resumable( env, pnum, filename, token, parmaterised_url, bar, session=session, mtime=str(current_mtime), api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, ) if not tokens: tokens = resp.get('tokens') return { 'response': resp.get('response'), 'tokens': tokens, 'session': session }
def export_get( env: str, pnum: str, filename: str, token: str, chunksize: int = 4096, etag: Optional[str] = None, dev_url: Optional[str] = None, backend: str = 'files', session: Any = requests, no_print_id: bool = False, set_mtime: bool = False, nobar: bool = False, target_dir: Optional[str] = None, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, public_key: Optional[libnacl.public.PublicKey] = None, ) -> dict: """ Download a file to the current directory. Parameters ---------- env: 'test' or 'prod', or 'alt' pnum: project number filename: filename to download token: JWT chunksize: bytes per iteration etag: content reference for remote resource dev_url: development url backend: API backend session: requests.session no_print_id: supress printing the download id set_mtime: set local file mtime to be the same as remote resource nobar: disable the progress bar target_dir: where to save the file locally api_key: client specific JWT allowing token refresh refresh_token: a JWT with which to obtain a new access token refresh_target: time around which to refresh (within a default range) public_key: encrypt/decrypt data on-the-fly """ tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) token = tokens.get("access_token") if tokens else token filemode = 'wb' current_file_size = None headers = {'Authorization': f'Bearer {token}'} if etag: debug_step(f'download_id: {etag}') filemode = 'ab' if os.path.lexists(filename): current_file_size = os.stat(filename).st_size debug_step(f'found {filename} with {current_file_size} bytes') headers['Range'] = 'bytes={0}-'.format(current_file_size) else: debug_step(f'{filename} not found') headers['Range'] = 'bytes=0-' if dev_url: url = dev_url else: urlpath = '' if backend == 'survey' else 'export/' endpoint = f'{urlpath}{filename}' # make provision for unsatisfactory semantics if backend in ['export', 'files']: service = 'files' elif backend == 'survey': service = backend url = f'{file_api_url(env, pnum, service, endpoint=endpoint)}' debug_step(f'fecthing file info using: {url}') resp = session.head(url, headers=headers) resp.raise_for_status() try: download_id = resp.headers['Etag'] if not no_print_id: print('Download id: {0}'.format(download_id)) except KeyError: print( 'Warning: could not retrieve download id, resumable download will not work' ) download_id = None total_file_size = int(resp.headers['Content-Length']) if not nobar: bar = _init_export_progress_bar(unquote(filename), current_file_size, total_file_size, chunksize) filename = filename if not target_dir else os.path.normpath( f'{target_dir}/{filename}') destination_dir = os.path.dirname(filename) if destination_dir and not os.path.lexists(destination_dir): debug_step(f'creating directory: {destination_dir}') os.makedirs(destination_dir) if public_key: debug_step('generating nonce and key') nonce = nacl_gen_nonce() key = nacl_gen_key() enc_nonce = nacl_encrypt_header(public_key, nonce) enc_key = nacl_encrypt_header(public_key, key) headers['Nacl-Nonce'] = nacl_encode_header(enc_nonce) headers['Nacl-Key'] = nacl_encode_header(enc_key) headers['Nacl-Chunksize'] = str(chunksize) with session.get(url, headers=headers, stream=True) as r: r.raise_for_status() with open(unquote(filename), filemode) as f: for chunk in r.iter_content(chunk_size=chunksize): if chunk: if public_key: chunk = nacl_decrypt_data(chunk, nonce, key) f.write(chunk) if not nobar: bar.next() if not nobar: bar.next() if not nobar: bar.finish() if set_mtime: err = 'could not set Modified-Time' err_consequence = 'incremental sync will not work for this file' try: mtime = float(resp.headers.get('Modified-Time')) debug_step(f'setting mtime for {filename} to {mtime}') os.utime(filename, (mtime, mtime)) except TypeError: print(f'{err}: {filename} - {err_consequence}') print( 'issue most likely due to not getting the correct header from the API' ) print(f'please report the issue: {HELP_URL}') except OSError: print(f'{err}: {filename} - {err_consequence}') print('issue due to local operating system problem') return {'filename': filename, 'tokens': tokens}
def streamfile( env: str, pnum: str, filename: str, token: str, chunksize: int = 4096, group: Optional[str] = None, backend: str = 'files', is_dir: bool = False, session: Any = requests, set_mtime: bool = False, public_key: Optional[libnacl.public.PublicKey] = None, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, ) -> dict: """ Idempotent, lazy data upload from files. Parameters ---------- env: 'test', 'prod', or 'alt' pnum: project number filename: path to file token: JWT, access token chunksize: bytes to read per chunk group: name of file group which should own upload backend: which API backend to send data to is_dir: True if uploading a directory of files, will create a different URL structure session: e.g. requests.session set_mtime: if True send information about the file's client-side mtime, asking the server to set it remotely public_key: encrypt data on-the-fly (with automatic server-side decryption) api_key: client specific JWT allowing token refresh refresh_token: a JWT with which to obtain a new access token refresh_target: time around which to refresh (within a default range) """ tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) token = tokens.get('access_token') if tokens else token resource = upload_resource_name(filename, is_dir, group=group) endpoint = f"stream/{resource}?group={group}" url = f'{file_api_url(env, pnum, backend, endpoint=endpoint)}' headers = {'Authorization': f'Bearer {token}'} debug_step(f'streaming data to {url}') if set_mtime: current_mtime = os.stat(filename).st_mtime headers['Modified-Time'] = str(current_mtime) if public_key: nonce, key = nacl_gen_nonce(), nacl_gen_key() enc_nonce = nacl_encrypt_header(public_key, nonce) enc_key = nacl_encrypt_header(public_key, key) headers['Content-Type'] = 'application/octet-stream+nacl' headers['Nacl-Nonce'] = nacl_encode_header(enc_nonce) headers['Nacl-Key'] = nacl_encode_header(enc_key) headers['Nacl-Chunksize'] = str(chunksize) else: # so the lazy_reader knows to return bytes only nonce, key = True, True with Retry( session.put, url, headers, lazy_reader( filename, chunksize, with_progress=True, public_key=public_key, nonce=nonce, key=key, ), ) as retriable: if retriable.get("new_session"): session = retriable.get("new_session") resp = retriable.get("resp") resp.raise_for_status() return {'response': resp, 'tokens': tokens, 'session': session}