def _sync_metadata(echo, metadata, datastore_root, attempt): if metadata.TYPE == 'local': def echo_none(*args, **kwargs): pass path = os.path.join( datastore_root, MetaflowDataStore.filename_with_attempt_prefix( 'metadata.tgz', attempt)) url = urlparse(path) bucket = url.netloc key = url.path.lstrip('/') s3, err = get_s3_client() try: s3.head_object(Bucket=bucket, Key=key) # If we are here, we can download the object with util.TempDir() as td: tar_file_path = os.path.join(td, 'metadata.tgz') with open(tar_file_path, 'wb') as f: s3.download_fileobj(bucket, key, f) with tarfile.open(tar_file_path, 'r:gz') as tar: tar.extractall(td) copy_tree( os.path.join(td, DATASTORE_LOCAL_DIR), LocalDataStore.get_datastore_root_from_config(echo_none), update=True) except err as e: # noqa F841 pass
def _one_boto_op(self, op, url): error = '' for i in range(NUM_S3OP_RETRIES): tmp = NamedTemporaryFile(dir=self._tmpdir, prefix='metaflow.s3.one_file.', delete=False) try: s3, _ = get_s3_client() op(s3, tmp.name) return tmp.name except ClientError as err: error_code = s3op.normalize_client_error(err) if error_code == 404: raise MetaflowS3NotFound(url) elif error_code == 403: raise MetaflowS3AccessDenied(url) elif error_code == 'NoSuchBucket': raise MetaflowS3URLException("Specified S3 bucket doesn't exist.") error = str(err) except Exception as ex: # TODO specific error message for out of disk space error = str(ex) os.unlink(tmp.name) # add some jitter to make sure retries are not synchronized time.sleep(2**i + random.randint(0, 10)) raise MetaflowS3Exception("S3 operation failed.\n"\ "Key requested: %s\n"\ "Error: %s" % (url, error))
def task_finished(self, step_name, flow, graph, is_task_ok, retry_count, max_retries): if self.ds_root: # We have a local metadata service so we need to persist it to the datastore. # Note that the datastore is *always* s3 (see runtime_task_created function) with util.TempDir() as td: tar_file_path = os.path.join(td, 'metadata.tgz') with tarfile.open(tar_file_path, 'w:gz') as tar: # The local metadata is stored in the local datastore # which, for batch jobs, is always the DATASTORE_LOCAL_DIR tar.add(DATASTORE_LOCAL_DIR) # At this point we upload what need to s3 s3, _ = get_s3_client() with open(tar_file_path, 'rb') as f: path = os.path.join( self.ds_root, MetaflowDataStore.filename_with_attempt_prefix( 'metadata.tgz', retry_count)) url = urlparse(path) s3.upload_fileobj(f, url.netloc, url.path.lstrip('/'))
def worker(queue, mode): try: from metaflow.datastore.util.s3util import get_s3_client s3, _ = get_s3_client() while True: url = queue.get() if url is None: break if mode == 'download': tmp = NamedTemporaryFile(dir='.', delete=False) try: s3.download_file(url.bucket, url.path, tmp.name) os.rename(tmp.name, url.local) except: # TODO specific error message for out of disk space os.unlink(tmp.name) raise else: s3.upload_file(url.local, url.bucket, url.path) except: traceback.print_exc() sys.exit(ERROR_WORKER_EXCEPTION)
def reset_client(self, hard_reset=False): if hard_reset or self._s3_client is None: from metaflow.datastore.util.s3util import get_s3_client self._s3_client, self._s3_client_error = get_s3_client()
def worker(result_file_name, queue, mode): # Interpret mode, it can either be a single op or something like # info_download or info_upload which implies: # - for download: we need to return the information as well # - for upload: we need to not overwrite the file if it exists modes = mode.split('_') pre_op_info = False if len(modes) > 1: pre_op_info = True mode = modes[1] else: mode = modes[0] def op_info(url): try: head = s3.head_object(Bucket=url.bucket, Key=url.path) to_return = { 'error': None, 'size': head['ContentLength'], 'content_type': head['ContentType'], 'metadata': head['Metadata'] } except ClientError as err: error_code = normalize_client_error(err) if error_code == 404: to_return = {'error': ERROR_URL_NOT_FOUND, 'raise_error': err} elif error_code == 403: to_return = { 'error': ERROR_URL_ACCESS_DENIED, 'raise_error': err } else: to_return = {'error': error_code, 'raise_error': err} return to_return with open(result_file_name, 'w') as result_file: try: from metaflow.datastore.util.s3util import get_s3_client s3, _ = get_s3_client() while True: url, idx = queue.get() if url is None: break if mode == 'info': result = op_info(url) orig_error = result.get('raise_error', None) if orig_error: del result['raise_error'] with open(url.local, 'w') as f: json.dump(result, f) elif mode == 'download': result_info = None is_missing = False if pre_op_info: result_info = op_info(url) if result_info['error'] == ERROR_URL_NOT_FOUND: is_missing = True result_file.write("%d %d\n" % (idx, -ERROR_URL_NOT_FOUND)) elif result_info['error'] == ERROR_URL_ACCESS_DENIED: is_missing = True result_file.write("%d %d\n" % (idx, -ERROR_URL_ACCESS_DENIED)) elif result_info['error'] is not None: raise result_info['raise_error'] if is_missing: continue tmp = NamedTemporaryFile(dir='.', delete=False) try: if url.range is None: s3.download_file(url.bucket, url.path, tmp.name) else: # We do get_object. We don't actually do any retries # here because the higher levels will do the retry if # needed resp = s3.get_object(Bucket=url.bucket, Key=url.path, Range=url.range) code = str( resp['ResponseMetadata']['HTTPStatusCode']) if code[0] == '2': tmp.write(resp['Body'].read()) else: # TODO: Better raised error raise RuntimeError("Could not load file") tmp.close() os.rename(tmp.name, url.local) except ClientError as err: error_code = normalize_client_error(err) if error_code == 404: pass # We skip this else: raise except: # TODO specific error message for out of disk space tmp.close() os.unlink(tmp.name) raise # If we have metadata that we retrieved, we also write it out # to a file if result_info: with open('%s_meta' % url.local, mode='w') as f: args = {'size': result_info['size']} if result_info['content_type']: args['content_type'] = result_info[ 'content_type'] if result_info['metadata'] is not None: args['metadata'] = result_info['metadata'] json.dump(args, f) # Finally, we push out the size to the result_pipe since # the size is used for verification and other purposes and # we want to avoid file operations for this simple process result_file.write("%d %d\n" % (idx, result_info['size'])) else: # This is upload, if we have a pre_op, it means we do not # want to overwrite do_upload = False if pre_op_info: result_info = op_info(url) if result_info['error'] == ERROR_URL_NOT_FOUND: # We only upload if the file is not found do_upload = True else: # No pre-op so we upload do_upload = True if do_upload: extra = None if url.content_type or url.metadata: extra = {} if url.content_type: extra['ContentType'] = url.content_type if url.metadata is not None: extra['Metadata'] = url.metadata s3.upload_file(url.local, url.bucket, url.path, ExtraArgs=extra) # We indicate that the file was uploaded result_file.write("%d %d\n" % (idx, 0)) except: traceback.print_exc() sys.exit(ERROR_WORKER_EXCEPTION)
def reset_client(self, hard_reset=False): from metaflow.datastore.util.s3util import get_s3_client if hard_reset or self.s3 is None: self.s3, _ = get_s3_client()
def reset_client(self, new_client=None, hard_reset=False): if new_client: self._s3_client = new_client if hard_reset or self._s3_client is None: from metaflow.datastore.util.s3util import get_s3_client self._s3_client, _ = get_s3_client()