Exemplo n.º 1
0
def _sync_metadata(echo, metadata, datastore_root, attempt):
    if metadata.TYPE == 'local':

        def echo_none(*args, **kwargs):
            pass

        path = os.path.join(
            datastore_root,
            MetaflowDataStore.filename_with_attempt_prefix(
                'metadata.tgz', attempt))
        url = urlparse(path)
        bucket = url.netloc
        key = url.path.lstrip('/')
        s3, err = get_s3_client()
        try:
            s3.head_object(Bucket=bucket, Key=key)
            # If we are here, we can download the object
            with util.TempDir() as td:
                tar_file_path = os.path.join(td, 'metadata.tgz')
                with open(tar_file_path, 'wb') as f:
                    s3.download_fileobj(bucket, key, f)
                with tarfile.open(tar_file_path, 'r:gz') as tar:
                    tar.extractall(td)
                copy_tree(
                    os.path.join(td, DATASTORE_LOCAL_DIR),
                    LocalDataStore.get_datastore_root_from_config(echo_none),
                    update=True)
        except err as e:  # noqa F841
            pass
Exemplo n.º 2
0
 def task_finished(self, step_name, flow, graph, is_task_ok, retry_count, max_retries):
     if self.ds_root:
         # We have a local metadata service so we need to persist it to the datastore.
         # Note that the datastore is *always* s3 (see runtime_task_created function)
         with util.TempDir() as td:
             tar_file_path = os.path.join(td, 'metadata.tgz')
             with tarfile.open(tar_file_path, 'w:gz') as tar:
                 # The local metadata is stored in the local datastore
                 # which, for batch jobs, is always the DATASTORE_LOCAL_DIR
                 tar.add(DATASTORE_LOCAL_DIR)
             # At this point we upload what need to s3
             s3, _ = get_s3_client()
             with open(tar_file_path, 'rb') as f:
                 path = os.path.join(
                     self.ds_root,
                     MetaflowDataStore.filename_with_attempt_prefix(
                         'metadata.tgz', retry_count))
                 url = urlparse(path)
                 s3.upload_fileobj(f, url.netloc, url.path.lstrip('/'))