def test_s3_full_serialize():
    s3 = storage.S3(
        bucket="bucket",
        key="key",
        secrets=["hidden", "auth"],
        labels=["foo", "bar"],
        add_default_labels=False,
    )
示例#2
0
def test_s3_empty_serialize():
    s3 = storage.S3(bucket="bucket")
    serialized = S3Schema().dump(s3)

    assert serialized
    assert serialized["__version__"] == prefect.__version__
    assert serialized["bucket"]
    assert not serialized["key"]
    assert serialized["secrets"] == []
示例#3
0
def test_s3_serialize_with_flows():
    s3 = storage.S3(bucket="bucket", key="key", secrets=["hidden", "auth"])
    f = prefect.Flow("test")
    s3.flows["test"] = "key"
    serialized = S3Schema().dump(s3)

    assert serialized
    assert serialized["__version__"] == prefect.__version__
    assert serialized["bucket"] == "bucket"
    assert serialized["key"] == "key"
    assert serialized["flows"] == {"test": "key"}

    deserialized = S3Schema().load(serialized)
    assert f.name in deserialized
    assert deserialized.secrets == ["hidden", "auth"]
示例#4
0
def test_s3_full_serialize():
    s3 = storage.S3(
        bucket="bucket",
        key="key",
        secrets=["hidden", "auth"],
        labels=["foo", "bar"],
        add_default_labels=False,
    )
    serialized = S3Schema().dump(s3)

    assert serialized
    assert serialized["__version__"] == prefect.__version__
    assert serialized["bucket"] == "bucket"
    assert serialized["key"] == "key"
    assert serialized["secrets"] == ["hidden", "auth"]
示例#5
0
def configure_flow_storage(cluster: Cluster, secrets):
    if cluster.flow_storage_protocol == S3_PROTOCOL:
        key = secrets[cluster.flow_storage_options.key]
        secret = secrets[cluster.flow_storage_options.secret]
        flow_storage = storage.S3(
            bucket=cluster.flow_storage,
            client_options={
                "aws_access_key_id": key,
                "aws_secret_access_key": secret
            },
        )
        return flow_storage
    elif cluster.flow_storage_protocol == ABFS_PROTOCOL:
        secret = secrets[cluster.flow_storage_options.secret]
        flow_storage = storage.Azure(container=cluster.flow_storage,
                                     connection_string=secret)
        return flow_storage
    else:
        raise UnsupportedFlowStorage
def register_recipe(recipe: BaseRecipe):
    flow_name = "test-noaa-flow"
    definition = yaml.safe_load(
        """
        networkMode: awsvpc
        cpu: 1024
        memory: 2048
        containerDefinitions:
            - name: flow
        """
    )
    outputs = retrieve_stack_outputs()
    tags = generate_tags(flow_name)

    fs_remote = S3FileSystem()
    target = FSSpecTarget(
        fs_remote,
        root_path=f"s3://{outputs['cache_bucket_name_output']}/target/oisst/",
    )
    recipe.target = target
    recipe.input_cache = CacheFSSpecTarget(
        fs_remote,
        root_path=(f"s3://{outputs['cache_bucket_name_output']}/cache/oisst/"),
    )
    recipe.metadata_cache = target

    executor = PrefectPipelineExecutor()
    pipeline = recipe.to_pipelines()
    flow = executor.pipelines_to_plan(pipeline)

    flow.storage = storage.S3(bucket=outputs["storage_bucket_name_output"])
    flow.run_config = ECSRun(
        image=os.environ["BAKERY_IMAGE"],
        labels=json.loads(os.environ["PREFECT__CLOUD__AGENT__LABELS"]),
        task_definition=definition,
        run_task_kwargs={"tags": tags["tag_list"]},
    )
    flow.executor = DaskExecutor(
        cluster_class="dask_cloudprovider.aws.FargateCluster",
        cluster_kwargs={
            "image": os.environ["BAKERY_IMAGE"],
            "vpc": outputs["vpc_output"],
            "cluster_arn": outputs["cluster_arn_output"],
            "task_role_arn": outputs["task_role_arn_output"],
            "execution_role_arn": outputs["task_execution_role_arn_output"],
            "security_groups": [outputs["security_group_output"]],
            "scheduler_cpu": 256,
            "scheduler_mem": 512,
            "worker_cpu": 1024,
            "worker_mem": 2048,
            "scheduler_timeout": "15 minutes",
            "tags": tags["tag_dict"],
        },
        adapt_kwargs={"maximum": 10},
    )

    for flow_task in flow.tasks:
        flow_task.run = set_log_level(flow_task.run)

    flow.name = flow_name
    project_name = os.environ["PREFECT_PROJECT"]
    flow.register(project_name=project_name)
def test_s3_serialize_with_flows():
    s3 = storage.S3(bucket="bucket", key="key", secrets=["hidden", "auth"])
def test_s3_empty_serialize():
    s3 = storage.S3(bucket="bucket")