Пример #1
0
def test_create_s3_storage_init_args():
    storage = S3(
        aws_access_key_id="id",
        aws_secret_access_key="secret",
        aws_session_token="session",
        bucket="bucket",
        key="key",
    )
    assert storage
    assert storage.flows == dict()
    assert storage.aws_access_key_id == "id"
    assert storage.aws_secret_access_key == "secret"
    assert storage.aws_session_token == "session"
    assert storage.bucket == "bucket"
    assert storage.key == "key"
Пример #2
0
def test_get_flow_s3_from_init_key_run(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(download_fileobj=MagicMock(return_value=client))
    monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3)

    f = Flow("test")

    monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f))

    storage = S3(bucket="bucket", key="key")

    downloaded_flow = storage.get_flow()
    assert isinstance(downloaded_flow, Flow)
    state = downloaded_flow.run()
    assert state.is_successful()
Пример #3
0
def test_get_flow_s3_not_in_storage(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(download_fileobj=MagicMock(return_value=client))
    monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3)

    f = Flow("test")

    monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f))

    storage = S3(bucket="bucket", key="test")

    assert f.name not in storage

    with pytest.raises(ValueError):
        storage.get_flow("test/test")
Пример #4
0
def test_upload_flow_to_s3_client_error(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(upload_fileobj=MagicMock(return_value=client))
    boto3.upload_fileobj.side_effect = ClientError({}, None)
    monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3)

    storage = S3(bucket="bucket")

    f = Flow("test")
    assert f.name not in storage
    assert storage.add_flow(f)

    with pytest.raises(ClientError):
        storage.build()
    assert boto3.upload_fileobj.called
Пример #5
0
def test_upload_flow_to_s3_key_format(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(upload_fileobj=MagicMock(return_value=client))
    monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3)

    storage = S3(bucket="bucket")

    f = Flow("test")
    assert storage.add_flow(f)
    assert storage.build()

    assert boto3.upload_fileobj.call_args[1]["Bucket"] == "bucket"
    key = boto3.upload_fileobj.call_args[1]["Key"].split("/")

    assert key[0] == "test"
    assert key[1]
Пример #6
0
def test_create_s3_storage_init_args():
    storage = S3(
        bucket="bucket",
        key="key",
        client_options={"endpoint_url": "http://some-endpoint", "use_ssl": False,},
        secrets=["auth"],
    )
    assert storage
    assert storage.flows == dict()
    assert storage.bucket == "bucket"
    assert storage.key == "key"
    assert storage.client_options == {
        "endpoint_url": "http://some-endpoint",
        "use_ssl": False,
    }
    assert storage.secrets == ["auth"]
Пример #7
0
def test_serialize_s3_storage():
    storage = S3(
        bucket="bucket",
        client_options={
            "endpoint_url": "http://some-endpoint",
            "use_ssl": False
        },
    )
    serialized_storage = storage.serialize()

    assert serialized_storage["type"] == "S3"
    assert serialized_storage["bucket"] == "bucket"
    assert serialized_storage["client_options"] == {
        "endpoint_url": "http://some-endpoint",
        "use_ssl": False,
    }
Пример #8
0
def test_get_flow_s3_bucket_key(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(download_fileobj=MagicMock(return_value=client))
    monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3)

    f = Flow("test")

    monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f))

    storage = S3(bucket="bucket", key="key")

    assert f.name not in storage
    flow_location = storage.add_flow(f)

    assert storage.get_flow(flow_location)
    assert boto3.download_fileobj.call_args[1]["Bucket"] == "bucket"
    assert boto3.download_fileobj.call_args[1]["Key"] == "key"
Пример #9
0
def test_get_flow_s3_from_init_key(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(download_fileobj=MagicMock(return_value=client))
    monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3)

    f = Flow("test")

    monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f))

    storage = S3(bucket="bucket", key="key")

    assert len(storage.flows) == 0
    assert len(storage._flows) == 0

    downloaded_flow = storage.get_flow()

    assert downloaded_flow.name == "test"
Пример #10
0
def test_local_agent_deploy_processes_s3_storage(monkeypatch, runner_token):

    popen = MagicMock()
    monkeypatch.setattr("prefect.agent.local.agent.Popen", popen)

    agent = LocalAgent()
    agent.deploy_flow(
        flow_run=GraphQLResult(
            {
                "flow": GraphQLResult({"storage": S3(bucket="test").serialize()}),
                "id": "id",
            }
        )
    )

    assert popen.called
    assert len(agent.processes) == 1
Пример #11
0
def test_upload_flow_to_s3_flow_byte_stream(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(upload_fileobj=MagicMock(return_value=client))
    monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3)

    storage = S3(bucket="bucket")

    f = Flow("test")
    assert storage.add_flow(f)
    assert storage.build()

    flow_as_bytes = boto3.upload_fileobj.call_args[0][0]
    assert isinstance(flow_as_bytes, io.BytesIO)

    new_flow = cloudpickle.loads(flow_as_bytes.read())
    assert new_flow.name == "test"

    state = new_flow.run()
    assert state.is_successful()
Пример #12
0
def test_get_flow_s3(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(download_fileobj=MagicMock(return_value=client))
    monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3)

    f = Flow("test")

    monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f))

    storage = S3(bucket="bucket")

    with pytest.raises(ValueError):
        storage.get_flow()

    assert f.name not in storage
    flow_location = storage.add_flow(f)

    assert storage.get_flow(flow_location)
    assert boto3.download_fileobj.called
    assert f.name in storage
Пример #13
0
def test_boto3_client_property(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(client=MagicMock(return_value=client))
    monkeypatch.setattr("boto3.client", boto3)

    storage = S3(
        bucket="bucket",
        aws_access_key_id="id",
        aws_secret_access_key="secret",
        aws_session_token="session",
    )

    boto3_client = storage._boto3_client
    assert boto3_client
    boto3.assert_called_with(
        "s3",
        aws_access_key_id="id",
        aws_secret_access_key="secret",
        aws_session_token="session",
    )
Пример #14
0
def test_create_s3_storage_init_args():
    storage = S3(
        aws_access_key_id="id",
        aws_secret_access_key="secret",
        aws_session_token="session",
        bucket="bucket",
        key="key",
        client_options={
            "endpoint_url": "http://some-endpoint",
            "use_ssl": False,
        },
    )
    assert storage
    assert storage.flows == dict()
    assert storage.aws_access_key_id == "id"
    assert storage.aws_secret_access_key == "secret"
    assert storage.aws_session_token == "session"
    assert storage.bucket == "bucket"
    assert storage.key == "key"
    assert storage.client_options == {
        "endpoint_url": "http://some-endpoint",
        "use_ssl": False,
    }
Пример #15
0
def test_build_script_upload(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(upload_fileobj=MagicMock(return_value=client))
    monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3)

    storage = S3(bucket="bucket",
                 stored_as_script=True,
                 local_script_path="local.py",
                 key="key")

    f = Flow("test")
    assert f.name not in storage
    assert storage.add_flow(f)
    assert storage.build()
    assert f.name in storage

    assert boto3.upload_file.called
    assert boto3.upload_file.call_args[0] == ("local.py", "bucket", "key")

    boto3.upload_file.side_effect = ClientError({}, None)

    with pytest.raises(ClientError):
        storage.build()
Пример #16
0
def test_get_flow_s3_runs(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(download_fileobj=MagicMock(return_value=client))
    monkeypatch.setattr("prefect.environments.storage.S3._boto3_client", boto3)

    f = Flow("test")

    monkeypatch.setattr("cloudpickle.loads", MagicMock(return_value=f))

    storage = S3(bucket="bucket")

    assert f.name not in storage
    flow_location = storage.add_flow(f)

    new_flow = storage.get_flow(flow_location)
    assert boto3.download_fileobj.called
    assert f.name in storage

    assert isinstance(new_flow, Flow)
    assert new_flow.name == "test"
    assert len(new_flow.tasks) == 0

    state = new_flow.run()
    assert state.is_successful()
Пример #17
0
def test_boto3_client_property(monkeypatch):
    client = MagicMock()
    boto3 = MagicMock(client=MagicMock(return_value=client))
    monkeypatch.setattr("boto3.client", boto3)

    storage = S3(
        bucket="bucket",
        client_options={"endpoint_url": "http://some-endpoint", "use_ssl": False,},
    )

    credentials = dict(
        ACCESS_KEY="id", SECRET_ACCESS_KEY="secret", SESSION_TOKEN="session"
    )
    with context(secrets=dict(AWS_CREDENTIALS=credentials)):
        boto3_client = storage._boto3_client
    assert boto3_client
    boto3.assert_called_with(
        "s3",
        aws_access_key_id="id",
        aws_secret_access_key="secret",
        aws_session_token="session",
        endpoint_url="http://some-endpoint",
        use_ssl=False,
    )
Пример #18
0
def test_serialize_s3_storage():
    storage = S3(bucket="bucket")
    serialized_storage = storage.serialize()

    assert serialized_storage["type"] == "S3"
                     save_to_neo=True,
                     writers={})
    try:
        search = search_term
        job_name = "qanon"
        limit = 10000000
        for df in fh.search_time_range(
                tp=tp,
                Search=search,
                Since=datetime.strftime(start, "%Y-%m-%d %H:%M:%S"),
                Until=datetime.strftime(current, "%Y-%m-%d %H:%M:%S"),
                job_name=job_name,
                Limit=10000000,
                stride_sec=30):
            logger.info('got: %s', len(df) if not (df is None) else 'None')
            logger.info('proceed to next df')
    except Exception as e:
        logger.error("job exception", exc_info=True)
        raise e
    logger.info("job finished")


schedule = IntervalSchedule(interval=timedelta(seconds=30), )
storage = S3(bucket=S3_BUCKET)

#with Flow("covid-19 stream-single") as flow:
#with Flow("covid-19 stream", storage=storage, schedule=schedule) as flow:
with Flow("qanon stream", schedule=schedule) as flow:
    run_stream()
flow.run()
Пример #20
0
import os

import boto3
from prefect import Parameter, Flow, task
from prefect.environments import LocalEnvironment
from prefect.environments.storage import S3

# testing boto directly
s3 = boto3.resource("s3")

for bucket in s3.buckets.all():
    print(bucket.name)


# testing boto via Prefect
@task
def say_hello(person: str) -> None:
    print("Hello, {}!".format(person))


with Flow("Say hi!") as flow:
    name = Parameter("name")
    say_hello(name)

storage = S3(bucket="my-prefect-flows")
# also tried, after setting AWS_CREDENTIALS:
# storage = S3(bucket="REDACTED", secrets=["AWS_CREDENTIALS"])

flow.storage = storage

flow.register(project_name="Demo")
Пример #21
0
from prefect import Flow, task
from prefect.environments.storage import S3


@task
def extract():
    return [1, 2, 3]


@task
def transform(data):
    return [i * 10 for i in data]


@task
def load(data):
    print("Here's your data: {}".format(data))


with Flow("ETL-s3-reg-demo",
          storage=S3(
              bucket="my-prefect-flows",
              secrets=["AWS_CREDENTIALS"],
          )) as flow:
    e = extract()
    t = transform(e)
    l = load(t)

flow.register(project_name="Demo")
# print(flow_id)
Пример #22
0
def test_create_s3_storage():
    storage = S3(bucket="test")
    assert storage
    assert storage.logger
Пример #23
0
# from prefect.environments import RemoteEnvironment


@task
def extract():
    """Get a list of data"""
    return [1, 2, 3]


@task
def transform(data):
    """Multiply the input by 10"""
    return [i * 10 for i in data]


@task
def load(data):
    """Print the data to indicate it was received"""
    print("Here's your data: {}".format(data))


from prefect import Flow

with Flow("s3-storage", storage=S3(bucket="my-prefect-flows")) as flow:
    e = extract()
    t = transform(e)
    l = load(t)

flow.register(project_name="Demo")
Пример #24
0
            },
        }),
        run,
    )
    assert env_vars["KEY1"] == "VAL1"
    assert env_vars["KEY2"] == "OVERRIDE"
    assert env_vars["PREFECT__LOGGING__LEVEL"] == "TEST"
    assert working_dir in env_vars["PYTHONPATH"]


@pytest.mark.parametrize(
    "storage",
    [
        Local(directory="test"),
        GCS(bucket="test"),
        S3(bucket="test"),
        Azure(container="test"),
        GitLab("test/repo", path="path/to/flow.py"),
        Bitbucket(project="PROJECT", repo="test-repo", path="test-flow.py"),
        CodeCommit("test/repo", path="path/to/flow.py"),
        Webhook(
            build_request_kwargs={"url": "test-service/upload"},
            build_request_http_method="POST",
            get_flow_request_kwargs={"url": "test-service/download"},
            get_flow_request_http_method="GET",
        ),
    ],
)
def test_local_agent_deploy_processes_valid_storage(storage, monkeypatch):
    popen = MagicMock()
    monkeypatch.setattr("prefect.agent.local.agent.Popen", popen)