def _delete_remote_resource(self, resource: str) -> str: """ Choose a function, invoke it to delete a remote resource. """ delete_funcs = { 'export': export_delete, 'import': import_delete, } debug_step(f'deleting: {resource}') resp = delete_funcs[self.remote_key]( self.env, self.pnum, self.token, resource, session=self.session, group=self.group, api_key=self.api_key, refresh_token=self.refresh_token, refresh_target=self.refresh_target, ) if resp.get('tokens'): self.token = resp.get('tokens').get('access_token') self.refresh_token = resp.get('tokens').get('refresh_token') self.refresh_target = get_claims(self.token).get('exp') return resource
def _transfer_local_to_remote( self, resource: str, integrity_reference: Optional[str] = None, ) -> str: """ Upload a resource to the remote destination, either as a basic stream, or a resumable - depending on the size of the $CHUNK_THRESHOLD. """ if not os.path.lexists(resource): print(f'WARNING: could not find {resource} on local disk') return resource if os.stat(resource).st_size > self.chunk_threshold: resp = initiate_resumable( self.env, self.pnum, resource, self.token, chunksize=self.chunk_size, group=self.group, verify=True, is_dir=True, session=self.session, set_mtime=self.sync_mtime, public_key=self.public_key, api_key=self.api_key, refresh_token=self.refresh_token, refresh_target=self.refresh_target, ) else: resp = streamfile( self.env, self.pnum, resource, self.token, group=self.group, is_dir=True, session=self.session, set_mtime=self.sync_mtime, public_key=self.public_key, api_key=self.api_key, refresh_token=self.refresh_token, refresh_target=self.refresh_target, ) if resp.get("session"): debug_step("renewing session") self.session = resp.get("session") if resp.get('tokens'): self.token = resp.get('tokens').get('access_token') self.refresh_token = resp.get('tokens').get('refresh_token') self.refresh_target = get_claims(self.token).get('exp') return resource
def _transfer_remote_to_local( self, resource: str, integrity_reference: Optional[str] = None, ) -> str: """ Download a resource from the remote location, resuming if local data is found, and it the integrity reference did not change since the first portion was downloaded. """ target = os.path.dirname(resource) target = target if not self.target_dir else os.path.normpath( f'{self.target_dir}/{target}') if not os.path.lexists(target): debug_step(f'creating directory: {target}') os.makedirs(target) resp = export_get( self.env, self.pnum, resource, self.token, session=self.session, etag=integrity_reference, no_print_id=True, set_mtime=self.sync_mtime, backend=self.remote_key, target_dir=self.target_dir, api_key=self.api_key, refresh_token=self.refresh_token, refresh_target=self.refresh_target, public_key=public_key, ) if resp.get('tokens'): self.token = resp.get('tokens').get('access_token') self.refresh_token = resp.get('tokens').get('refresh_token') self.refresh_target = get_claims(self.token).get('exp') return resource
def _continue_resumable( env: str, pnum: str, filename: str, token: str, to_resume: str, group: Optional[str] = None, verify: bool = False, dev_url: Optional[str] = None, backend: str = 'files', is_dir: bool = False, session: Any = requests, set_mtime: bool = False, public_key: Optional[libnacl.public.PublicKey] = None, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, ) -> dict: """ Continue a resumable upload, reding a file, from the appopriate byte offset, chunk-by-chunk and performaing a PATCH request per chunk. Optional chunk md5 verification before resume. """ tokens = {} url = _resumable_url(env, pnum, filename, dev_url, backend, is_dir, group=group) headers = {'Authorization': f'Bearer {token}'} current_mtime = os.stat(filename).st_mtime if set_mtime else None if set_mtime: headers['Modified-Time'] = str(current_mtime) max_chunk = to_resume['max_chunk'] chunksize = to_resume['chunk_size'] previous_offset = to_resume['previous_offset'] next_offset = to_resume['next_offset'] upload_id = to_resume['id'] server_chunk_md5 = str(to_resume['md5sum']) chunk_num = max_chunk + 1 print(f'Resuming upload with id: {upload_id}') bar = _init_progress_bar(chunk_num, chunksize, filename) for chunk, enc_nonce, enc_key, ch_size in lazy_reader( filename, chunksize, previous_offset, next_offset, verify, server_chunk_md5, public_key=public_key, ): tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) if tokens: token = tokens.get("access_token") refresh_token = tokens.get("refresh_token") refresh_target = get_claims(token).get('exp') headers['Authorization'] = f'Bearer {token}' if public_key: headers['Content-Type'] = 'application/octet-stream+nacl' headers['Nacl-Nonce'] = nacl_encode_header(enc_nonce) headers['Nacl-Key'] = nacl_encode_header(enc_key) headers['Nacl-Chunksize'] = str(ch_size) parmaterised_url = '{0}?chunk={1}&id={2}'.format( url, str(chunk_num), upload_id) debug_step(f'sending chunk {chunk_num}, using {parmaterised_url}') with Retry(session.patch, parmaterised_url, headers, chunk) as retriable: if retriable.get("new_session"): session = retriable.get("new_session") resp = retriable.get("resp") resp.raise_for_status() data = json.loads(resp.text) bar.next() upload_id = data['id'] chunk_num = data.get("max_chunk") + 1 if not group: group = '{0}-member-group'.format(pnum) parmaterised_url = '{0}?chunk={1}&id={2}&group={3}'.format( url, 'end', upload_id, group) resp = _complete_resumable( env, pnum, filename, token, parmaterised_url, bar, session=session, mtime=str(current_mtime), api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, ) if not tokens: tokens = resp.get('tokens') return { 'response': resp.get('response'), 'tokens': tokens, 'session': session }
def _start_resumable( env: str, pnum: str, filename: str, token: str, chunksize: int, group: Optional[str] = None, dev_url: Optional[str] = None, stop_at: Optional[int] = None, backend: str = 'files', is_dir: bool = False, session: Any = requests, set_mtime: bool = False, public_key: Optional[libnacl.public.PublicKey] = None, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, ) -> dict: """ Start a new resumable upload, reding a file, chunk-by-chunk and performaing a PATCH request per chunk. """ url = _resumable_url(env, pnum, filename, dev_url, backend, is_dir, group=group) headers = {'Authorization': f'Bearer {token}'} current_mtime = os.stat(filename).st_mtime if set_mtime else None if set_mtime: headers['Modified-Time'] = str(current_mtime) chunk_num = 1 for chunk, enc_nonce, enc_key, ch_size in lazy_reader( filename, chunksize, public_key=public_key): tokens = maybe_refresh(env, pnum, api_key, token, refresh_token, refresh_target) if tokens: token = tokens.get("access_token") refresh_token = tokens.get("refresh_token") refresh_target = get_claims(token).get('exp') headers['Authorization'] = f'Bearer {token}' if public_key: headers['Content-Type'] = 'application/octet-stream+nacl' headers['Nacl-Nonce'] = nacl_encode_header(enc_nonce) headers['Nacl-Key'] = nacl_encode_header(enc_key) headers['Nacl-Chunksize'] = str(ch_size) if chunk_num == 1: parmaterised_url = '{0}?chunk={1}'.format(url, str(chunk_num)) else: parmaterised_url = '{0}?chunk={1}&id={2}'.format( url, str(chunk_num), upload_id) debug_step(f'sending chunk {chunk_num}, using {parmaterised_url}') with Retry(session.patch, parmaterised_url, headers, chunk) as retriable: if retriable.get("new_session"): session = retriable.get("new_session") resp = retriable.get("resp") resp.raise_for_status() data = json.loads(resp.text) if chunk_num == 1: upload_id = data['id'] print('Upload id: {0}'.format(upload_id)) bar = _init_progress_bar(chunk_num, chunksize, filename) bar.next() if stop_at: if chunk_num == stop_at: print('stopping at chunk {0}'.format(chunk_num)) return {'response': data} chunk_num = data.get("max_chunk") + 1 if not group: group = '{0}-member-group'.format(pnum) parmaterised_url = '{0}?chunk={1}&id={2}&group={3}'.format( url, 'end', upload_id, group) resp = _complete_resumable( env, pnum, filename, token, parmaterised_url, bar, session=session, mtime=str(current_mtime), api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, ) if not tokens: tokens = resp.get('tokens') return { 'response': resp.get('response'), 'tokens': tokens, 'session': session }
def initiate_resumable( env: str, pnum: str, filename: str, token: str, chunksize: Optional[int] = None, new: bool = False, group: Optional[str] = None, verify: bool = False, upload_id: Optional[str] = None, dev_url: Optional[str] = None, stop_at: Optional[int] = None, backend: str = 'files', is_dir: bool = False, session: Any = requests, set_mtime: bool = False, public_key: Optional[libnacl.public.PublicKey] = None, api_key: Optional[str] = None, refresh_token: Optional[str] = None, refresh_target: Optional[int] = None, ) -> dict: """ Performs a resumable upload, either by resuming a partial one, or by starting a new one. Parameters ---------- env: 'test' or 'prod' pnum: project numnber filename: filename token: JWT chunksize: user specified chunkszie in bytes new: flag to enable resume group: group owner after upload verify: verify md5 chunk integrity before resume upload_id: identifies the resumable dev_url: pass a complete url (useful for development) stop_at: chunk number at which to stop upload (useful for development) backend: API backend is_dir: bool, True if uploading a directory of files, will create a different URL structure session: requests.session set_mtime: if True send information about the file's client-side mtime, asking the server to set it remotely public_key: encrypt data on-the-fly (with automatic server-side decryption) api_key: client specific JWT allowing token refresh refresh_token: a JWT with which to obtain a new access token refresh_target: time around which to refresh (within a default range) """ to_resume = False if not new: key = _resumable_key(is_dir, filename) data = get_resumable( env, pnum, token, filename, upload_id, dev_url, backend, is_dir=is_dir, key=key, session=session, api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, ) if data.get('tokens'): tokens = data.get('tokens') token = tokens.get("access_token") refresh_token = tokens.get("refresh_token") refresh_target = get_claims(token).get('exp') if not data.get('overview', {}).get('id'): pass else: to_resume = data.get('overview') if dev_url: dev_url = dev_url.replace('resumables', 'stream') if to_resume: try: return _continue_resumable( env, pnum, filename, token, to_resume, group, verify, dev_url, backend, is_dir, session=session, set_mtime=set_mtime, public_key=public_key, api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, ) except Exception as e: print(e) return else: return _start_resumable( env, pnum, filename, token, chunksize, group, dev_url, stop_at, backend, is_dir, session=session, set_mtime=set_mtime, public_key=public_key, api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, )
def cli( pnum: str, guide: str, env: str, group: str, basic: bool, upload: str, upload_id: str, resume_list: bool, resume_delete: str, resume_delete_all: bool, download: str, download_id: str, download_list: bool, version: bool, verbose: bool, config_show: bool, config_delete: bool, session_delete: bool, register: bool, ignore_prefixes: str, ignore_suffixes: str, upload_cache_show: bool, upload_cache_delete: str, upload_cache_delete_all: bool, cache_disable: bool, download_cache_show: str, download_cache_delete: str, download_cache_delete_all: bool, upload_sync: str, download_sync: str, cache_sync: bool, keep_missing: bool, keep_updated: bool, download_delete: str, api_key: str, encrypt: bool, chunk_size: int, resumable_threshold: int, ) -> None: """tacl - TSD API client.""" if not env: env = "ec-prod" if pnum and pnum.startswith("ec") else "prod" token = None if verbose: os.environ['DEBUG'] = '1' # 1. Determine necessary authentication options if (upload or resume_list or resume_delete or resume_delete_all or upload_sync): if basic: requires_user_credentials, token_type = False, TOKENS[env][ 'upload'] else: requires_user_credentials, token_type = True, TOKENS[env]['upload'] elif download or download_list or download_sync or download_delete: if env == 'alt' and basic: requires_user_credentials, token_type = False, TOKENS[env][ 'download'] elif env != 'alt' and basic: click.echo('download not authorized with basic auth') sys.exit(1) else: requires_user_credentials, token_type = True, TOKENS[env][ 'download'] else: requires_user_credentials = False auth_method = "iam" if env.startswith("ec-") or ( pnum and pnum.startswith("ec")) else "tsd" # 2. Try to get a valid access token if requires_user_credentials: check_api_connection(env) if not pnum: click.echo('missing pnum argument') sys.exit(1) auth_required = False debug_step(f'using login session with {env}:{pnum}:{token_type}') debug_step('checking if login session has expired') expired = session_is_expired(env, pnum, token_type) if expired: click.echo('your session has expired, please authenticate') auth_required = True debug_step('checking if login session will expire soon') expires_soon = session_expires_soon(env, pnum, token_type) if expires_soon: click.echo('your session expires soon') if click.confirm('Do you want to refresh your login session?'): auth_required = True else: auth_required = False if not expires_soon and expired: auth_required = True if not api_key: api_key = get_api_key(env, pnum) if auth_required: username, password, otp = get_user_credentials(env) token, refresh_token = get_jwt_two_factor_auth( env, pnum, api_key, username, password, otp, token_type, auth_method=auth_method, ) if token: debug_step('updating login session') session_update(env, pnum, token_type, token, refresh_token) else: token = session_token(env, pnum, token_type) debug_step(f'using token from existing login session') refresh_token = session_refresh_token(env, pnum, token_type) if refresh_token: debug_step(f'using refresh token from existing login session') debug_step( f'refreshes remaining: {get_claims(refresh_token).get("counter")}' ) debug_step(refresh_token) elif not requires_user_credentials and basic: if not pnum: click.echo('missing pnum argument') sys.exit(1) check_api_connection(env) if not api_key: api_key = get_api_key(env, pnum) debug_step('using basic authentication') token, refresh_token = get_jwt_basic_auth(env, pnum, api_key) if (requires_user_credentials or basic) and not token: click.echo('authentication failed') sys.exit(1) # 3. Given a valid access token, perform a given action if token: refresh_target = get_claims(token).get('exp') if encrypt: if not LIBSODIUM_AVAILABLE: click.echo( "libsodium system dependency missing - end-to-end encryption not available" ) public_key = None else: debug_step('Using end-to-end encryption') public_key = nacl_get_server_public_key(env, pnum, token) else: public_key = None group = f'{pnum}-member-group' if not group else group if upload: if os.path.isfile(upload): if upload_id or os.stat(upload).st_size > as_bytes( resumable_threshold): debug_step(f'starting resumable upload') resp = initiate_resumable( env, pnum, upload, token, chunksize=as_bytes(chunk_size), group=group, verify=True, upload_id=upload_id, public_key=public_key, api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, ) else: debug_step('starting upload') resp = streamfile( env, pnum, upload, token, group=group, public_key=public_key, ) else: click.echo(f'uploading directory {upload}') upload = construct_correct_upload_path(upload) uploader = SerialDirectoryUploader( env, pnum, upload, token, group, prefixes=ignore_prefixes, suffixes=ignore_suffixes, use_cache=True if not cache_disable else False, public_key=public_key, chunk_size=as_bytes(chunk_size), chunk_threshold=as_bytes(resumable_threshold), api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, ) uploader.sync() elif upload_sync: if os.path.isfile(upload_sync): sys.exit('--upload-sync takes a directory as an argument') click.echo(f'uploading directory {upload_sync}') upload_sync = construct_correct_upload_path(upload_sync) syncer = SerialDirectoryUploadSynchroniser( env, pnum, upload_sync, token, group, prefixes=ignore_prefixes, suffixes=ignore_suffixes, use_cache=False if not cache_sync else True, sync_mtime=True, keep_missing=keep_missing, keep_updated=keep_updated, remote_key='import', public_key=public_key, chunk_size=as_bytes(chunk_size), chunk_threshold=as_bytes(resumable_threshold), api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, ) syncer.sync() elif resume_list: debug_step('listing resumables') overview = get_resumable(env, pnum, token) print_resumables_list(overview) elif resume_delete: filename = None debug_step('deleting resumable') delete_resumable(env, pnum, token, filename, resume_delete) elif resume_delete_all: debug_step('deleting all resumables') delete_all_resumables(env, pnum, token) elif download: filename = download debug_step('starting file export') resp = export_head(env, pnum, filename, token) if resp.headers.get('Content-Type') == 'directory': click.echo(f'downloading directory: {download}') downloader = SerialDirectoryDownloader( env, pnum, download, token, prefixes=ignore_prefixes, suffixes=ignore_suffixes, use_cache=True if not cache_disable else False, remote_key='export', api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, public_key=public_key, ) downloader.sync() else: export_get( env, pnum, filename, token, etag=download_id, public_key=public_key, ) elif download_list: debug_step('listing export directory') data = export_list(env, pnum, token) print_export_list(data) elif download_delete: debug_step(f'deleting {download_delete}') export_delete(env, pnum, token, download_delete) elif download_sync: filename = download_sync debug_step('starting directory sync') resp = export_head(env, pnum, filename, token) if resp.headers.get('Content-Type') != 'directory': sys.exit('directory sync does not apply to files') syncer = SerialDirectoryDownloadSynchroniser( env, pnum, download_sync, token, prefixes=ignore_prefixes, suffixes=ignore_suffixes, use_cache=False if not cache_sync else True, sync_mtime=True, keep_missing=keep_missing, keep_updated=keep_updated, remote_key='export', api_key=api_key, refresh_token=refresh_token, refresh_target=refresh_target, public_key=public_key, ) syncer.sync() return # 4. Optionally perform actions which do no require authentication else: if (upload_cache_show or upload_cache_delete or upload_cache_delete_all or download_cache_show or download_cache_delete or download_cache_delete_all) and not pnum: sys.exit( 'cache operations are project specific - missing pnum argument' ) # 4.1 Interact with config, sessions, and caches if config_show: print_config() elif config_delete: delete_config() elif session_delete: session_clear() elif upload_cache_show: cache = UploadCache(env, pnum) cache.print() elif upload_cache_delete: cache = UploadCache(env, pnum) cache.destroy(key=upload_cache_delete) delete_cache = UploadDeleteCache(env, pnum) delete_cache.destroy(key=upload_cache_delete) elif upload_cache_delete_all: cache = UploadCache(env, pnum) cache.destroy_all() delete_cache = UploadDeleteCache(env, pnum) delete_cache.destroy_all() elif download_cache_show: cache = DownloadCache(env, pnum) cache.print() elif download_cache_delete: cache = DownloadCache(env, pnum) cache.destroy(key=download_cache_delete) delete_cache = DownloadDeleteCache(env, pnum) delete_cache.destroy(key=download_cache_delete) elif download_cache_delete_all: cache = DownloadCache(env, pnum) cache.destroy_all() delete_cache = DownloadDeleteCache(env, pnum) delete_cache.destroy_all() # 4.2 Register a client elif register: prod = "1 - for normal production usage" fx = "2 - for use over fx03 network" test = "3 - for testing" ec_prod = "4 - for Educloud normal production usage" ec_test = "5 - for Educloud testing" prompt = "Choose the API environment by typing one of the following numbers" choice = input( f"""{prompt}:\n{prod}\n{fx}\n{test}\n{ec_prod}\n{ec_test}\n > """ ) if choice not in '12345': click.echo(f'Invalid choice: {choice} for API environment') sys.exit(1) choices = { '1': 'prod', '2': 'alt', '3': 'test', '4': 'ec-prod', '5': 'ec-test' } env = choices[choice] check_api_connection(env) username, password, otp = get_user_credentials(env) if env.startswith('ec-'): auth_method = 'iam' pnum = input('ec project > ') else: pnum = username.split('-')[0] key = get_tsd_api_key(env, pnum, username, password, otp, auth_method=auth_method) update_config(env, pnum, key) click.echo( f'Successfully registered for {pnum}, and API environment hosted at {ENV[env]}' ) # 4.3 Introspection elif version: print_version_info() elif guide: text = GUIDES.get(guide, f'no guide found for {guide}') click.echo(text) else: click.echo('tacl --help, for basic help') click.echo('tacl --guide topics, for extended help') return
def maybe_refresh( env: str, pnum: str, api_key: str, access_token: str, refresh_token: str, refresh_target: int, before_min: int = 5, after_min: int = 10, force: bool = False, ) -> dict: """ Try to refresh an access token, using a refresh token. This is tried when the currnet time falls within a window around the time given by refresh_target, by default within 5 minutes before, or 1 after. If force == True, then a refesh will be performed regardless. Access and refresh tokens are returned to clients in pairs, with refresh tokens having a decrementing counter each time they are used. Each time a successfull token refresh happens, the session will be updated with the new token pair. When the refresh token is exhausted, the last access token is issued without a new refresh token, which means that the next call to this function will be with refresh_token == None. When this happens, the function will return the access token provided by the caller, since it can no longer be refreshed. If for some reason the refresh operation fails, then the access token provided by the caller is returned. """ if not refresh_token or not refresh_target: if access_token: debug_step( 'no refresh token provided, re-using current access token') return {'access_token': access_token} else: debug_step('no refresh or access token provided') return {} else: token_type = get_claims(access_token).get('name') target = datetime.fromtimestamp(refresh_target) now = datetime.now().timestamp() start = (target - timedelta(minutes=before_min)).timestamp() end = (target + timedelta(minutes=after_min)).timestamp() if now >= start and now <= end or force: if force: debug_step('forcing refresh') access, refresh = refresh_access_token(env, pnum, api_key, refresh_token) if access and refresh: session_update(env, pnum, token_type, access, refresh) debug_step( f"refreshes remaining: {get_claims(refresh).get('counter')}" ) return {'access_token': access, 'refresh_token': refresh} if access and not refresh: session_update(env, pnum, token_type, access, refresh) debug_step('refreshes remaining: 0') tokens = {'access_token': access} else: session_update(env, pnum, token_type, access_token, refresh) debug_step('could not refresh, using existing access token') return {'access_token': access_token}