def get_data_dl_s3_client(): user_id = get_jwt_field(get_cookie_vars(app.current_request.headers), 'urs-user-id') session = get_role_session(user_id=user_id) params = {} params['config'] = bc_Config(**get_bcconfig(user_id)) client = session.client('s3', **params) return client
def get_bc_config_client(user_id): params = {} now = time.time() if user_id not in bc_client_cache: # This a new user, generate a new bc_Config client params['config'] = bc_Config(**get_bcconfig(user_id)) session = get_role_session(user_id=user_id) bc_client_cache[user_id] = { "client": session.client('s3', **params), "timestamp": now } elif now - bc_client_cache[user_id]["timestamp"] >= (50 * 60): # Replace the client if is more than 50 minutes old log.info(f"Replacing old bc_Config_client for user {user_id}") params['config'] = bc_Config(**get_bcconfig(user_id)) session = get_role_session(user_id=user_id) bc_client_cache[user_id] = { "client": session.client('s3', **params), "timestamp": now } return bc_client_cache[user_id]["client"]
def get_data_dl_s3_client(): cookievars = get_cookie_vars(app.current_request.headers) if cookievars: user_id = cookievars['urs-user-id'] else: user_id = None session = get_role_session(user_id=user_id) params = {} BCCONFIG = {'user_agent': "Egress App for userid={0}".format(user_id)} if os.getenv('S3_SIGNATURE_VERSION'): BCCONFIG['signature_version'] = os.getenv('S3_SIGNATURE_VERSION') params['config'] = bc_Config(**BCCONFIG) client = session.client('s3', **params) return client
def try_download_from_bucket(bucket, filename, user_profile): # Attempt to pull userid from profile user_id = None if isinstance(user_profile, dict): if 'urs-user-id' in user_profile: user_id = user_profile['urs-user-id'] elif 'uid' in user_profile: user_id = user_profile['uid'] log.info("User Id for download is {0}".format(user_id)) is_in_region = check_in_region_request( app.current_request.context['identity']['sourceIp']) creds = get_role_creds(user_id, is_in_region) session = get_role_session(creds=creds, user_id=user_id) try: bucket_region = get_bucket_region(session, bucket) except ClientError as e: log.error( f'ClientError while {user_id} tried downloading {bucket}/{filename}: {e}' ) template_vars = { 'contentstring': 'There was a problem accessing download data.', 'title': 'Data Not Available' } headers = {} return make_html_response(template_vars, headers, 500, 'error.html') log.debug('this region: {}'.format( os.getenv('AWS_DEFAULT_REGION', 'env var doesnt exist'))) if bucket_region != os.getenv('AWS_DEFAULT_REGION'): log.warning( "bucket {0} is in region {1}, we are in region {2}! " + "This is double egress in Proxy mode!".format( bucket, bucket_region, os.getenv('AWS_DEFAULT_REGION'))) params = {} # now that we know where the bucket is, connect in THAT region params['config'] = bc_Config(**get_bcconfig(user_id)) client = session.client('s3', bucket_region, **params) log.info("Attempting to download s3://{0}/{1}".format(bucket, filename)) try: # Make sure this file exists, don't ACTUALLY download range_header = get_range_header_val() if not range_header: client.get_object(Bucket=bucket, Key=filename) redirheaders = {} else: client.get_object(Bucket=bucket, Key=filename, Range=range_header) redirheaders = {'Range': range_header} # Generate URL presigned_url = get_presigned_url(creds, bucket, filename, bucket_region, 24 * 3600, user_id) s3_host = urlparse(presigned_url).netloc log.debug("Presigned URL host was {0}".format(s3_host)) log.info("Using REDIRECT because no PROXY in egresslambda") return make_redriect(presigned_url, redirheaders, 303) except ClientError as e: log.warning("Could not download s3://{0}/{1}: {2}".format( bucket, filename, e)) # Watch for bad range request: if e.response['ResponseMetadata']['HTTPStatusCode'] == 416: return Response(body='Invalid Range', status_code=416, headers={}) template_vars = { 'contentstring': 'Could not find requested data.', 'title': 'Data Not Available' } headers = {} return make_html_response(template_vars, headers, 404, 'error.html')
def try_download_from_bucket(bucket, filename, user_profile): # Attempt to pull userid from profile user_id = None if isinstance(user_profile, dict): if 'urs-user-id' in user_profile: user_id = user_profile['urs-user-id'] elif 'uid' in user_profile: user_id = user_profile['uid'] log.info("User Id for download is {0}".format(user_id)) is_in_region = check_in_region_request( app.current_request.context['identity']['sourceIp']) creds = get_role_creds(user_id, is_in_region) session = get_role_session(creds=creds, user_id=user_id) params = {} BCCONFIG = { "user_agent": "RAIN Egress App for userid={0}".format(user_id), "s3": { "addressing_style": "path" }, "connect_timeout": 600, "read_timeout": 600, "retries": { "max_attempts": 10 } } if os.getenv('S3_SIGNATURE_VERSION'): BCCONFIG['signature_version'] = os.getenv('S3_SIGNATURE_VERSION') # Figure out bucket region try: bucket_region = session.client( 's3', **params).get_bucket_location(Bucket=bucket)['LocationConstraint'] bucket_region = 'us-east-1' if not bucket_region else bucket_region log.debug("bucket {0} is in region {1}".format(bucket, bucket_region)) except ClientError as e: # We hit here if the download role cannot access a bucket, or if it doesn't exist log.error("Coud not access download bucket {0}: {1}".format(bucket, e)) template_vars = { 'contentstring': 'There was a problem accessing download data.', 'title': 'Data Not Available' } headers = {} return make_html_response(template_vars, headers, 500, 'error.html') log.debug('this region: {}'.format( os.getenv('AWS_DEFAULT_REGION', 'env var doesnt exist'))) if bucket_region != os.getenv('AWS_DEFAULT_REGION'): log.warning( "bucket {0} is in region {1}, we are in region {2}! This is double egress in Proxy mode!" .format(bucket, bucket_region, os.getenv('AWS_DEFAULT_REGION'))) # now that we know where the bucket is, connect in THAT region params['config'] = bc_Config(**BCCONFIG) client = session.client('s3', bucket_region, **params) log.info("Attempting to download s3://{0}/{1}".format(bucket, filename)) try: # Make sure this file exists, don't ACTUALLY download range_header = get_range_header_val() if not range_header: client.get_object(Bucket=bucket, Key=filename) redirheaders = {} else: client.get_object(Bucket=bucket, Key=filename, Range=range_header) redirheaders = {'Range': range_header} # Generate URL presigned_url = get_presigned_url(creds, bucket, filename, bucket_region, 24 * 3600, user_id) s3_host = urlparse(presigned_url).netloc log.debug("Presigned URL host was {0}".format(s3_host)) log.info("Using REDIRECT because no PROXY in egresslambda") return make_redriect(presigned_url, redirheaders, 303) except ClientError as e: log.warning("Could not download s3://{0}/{1}: {2}".format( bucket, filename, e)) # Watch for bad range request: if e.response['ResponseMetadata']['HTTPStatusCode'] == 416: return Response(body='Invalid Range', status_code=416, headers={}) template_vars = { 'contentstring': 'Could not find requested data.', 'title': 'Data Not Available' } headers = {} return make_html_response(template_vars, headers, 404, 'error.html')
def try_download_from_bucket(bucket, filename, user_profile, headers: dict): # Attempt to pull userid from profile user_id = None if isinstance(user_profile, dict): if 'urs-user-id' in user_profile: user_id = user_profile['urs-user-id'] elif 'uid' in user_profile: user_id = user_profile['uid'] log.info("User Id for download is {0}".format(user_id)) log_context(user_id=user_id) t0 = time.time() is_in_region = check_in_region_request( app.current_request.context['identity']['sourceIp']) t1 = time.time() creds, offset = get_role_creds(user_id, is_in_region) t2 = time.time() session = get_role_session(creds=creds, user_id=user_id) t3 = time.time() try: bucket_region = get_bucket_region(session, bucket) t4 = time.time() except ClientError as e: try: code = e.response['ResponseMetadata']['HTTPStatusCode'] except (AttributeError, KeyError, IndexError): code = 400 log.debug(f'response: {e.response}') log.error( f'ClientError while {user_id} tried downloading {bucket}/{filename}: {e}' ) cumulus_log_message('failure', code, 'GET', { 'reason': 'ClientError', 's3': f'{bucket}/{filename}' }) template_vars = { 'contentstring': 'There was a problem accessing download data.', 'title': 'Data Not Available', 'requestid': get_request_id(), } headers = {} return make_html_response(template_vars, headers, code, 'error.html') log.debug('this region: {}'.format( os.getenv('AWS_DEFAULT_REGION', 'env var doesnt exist'))) if bucket_region != os.getenv('AWS_DEFAULT_REGION'): log.warning( "bucket {0} is in region {1}, we are in region {2}! " + "This is double egress in Proxy mode!".format( bucket, bucket_region, os.getenv('AWS_DEFAULT_REGION'))) client = get_bc_config_client(user_id) log.debug('timing for try_download_from_bucket(): ') log.debug('ET for check_in_region_request(): {}s'.format(t1 - t0)) log.debug('ET for get_role_creds(): {}s'.format(t2 - t1)) log.debug('ET for get_role_session(): {}s'.format(t3 - t2)) log.debug('ET for get_bucket_region(): {}s'.format(t4 - t3)) log.debug('ET for total: {}'.format(t4 - t0)) log.info("Attempting to download s3://{0}/{1}".format(bucket, filename)) try: # Make sure this file exists, don't ACTUALLY download range_header = get_range_header_val() if not range_header: if not os.getenv("SUPPRESS_HEAD"): client.head_object(Bucket=bucket, Key=filename) redirheaders = {} else: if not os.getenv("SUPPRESS_HEAD"): client.head_object(Bucket=bucket, Key=filename, Range=range_header) redirheaders = {'Range': range_header} expires_in = 3600 - offset redirheaders['Cache-Control'] = 'private, max-age={0}'.format( expires_in - 60) if isinstance(headers, dict): log.debug(f'adding {headers} to redirheaders {redirheaders}') redirheaders.update(headers) # Generate URL presigned_url = get_presigned_url(creds, bucket, filename, bucket_region, expires_in, user_id) s3_host = urlparse(presigned_url).netloc log.debug("Presigned URL host was {0}".format(s3_host)) return make_redirect(presigned_url, redirheaders, 303) except ClientError as e: # Watch for bad range request: if e.response['ResponseMetadata']['HTTPStatusCode'] == 416: # cumulus uses this log message for metrics purposes. log.error( f"Invalid Range 416, Could not get range {get_range_header_val()} s3://{bucket}/{filename}: {e}" ) cumulus_log_message( 'failure', 416, 'GET', { 'reason': 'Invalid Range', 's3': f'{bucket}/{filename}', 'range': get_range_header_val() }) return Response(body='Invalid Range', status_code=416, headers={}) # cumulus uses this log message for metrics purposes. log.warning("Could not download s3://{0}/{1}: {2}".format( bucket, filename, e)) template_vars = { 'contentstring': 'Could not find requested data.', 'title': 'Data Not Available', 'requestid': get_request_id(), } headers = {} cumulus_log_message( 'failure', 404, 'GET', { 'reason': 'Could not find requested data', 's3': f'{bucket}/{filename}' }) return make_html_response(template_vars, headers, 404, 'error.html')