示例#1
0
def test_add_flow_to_github_storage():
    storage = GitHub(repo="test/repo", path="flow.py")

    f = Flow("test")
    assert f.name not in storage
    assert storage.add_flow(f) == "flow.py"
    assert f.name in storage
示例#2
0
def test_serialize_github_storage():
    storage = GitHub(repo="test/repo", path="flow.py", secrets=["auth"])
    serialized_storage = storage.serialize()

    assert serialized_storage["type"] == "GitHub"
    assert serialized_storage["repo"] == "test/repo"
    assert serialized_storage["path"] == "flow.py"
    assert serialized_storage["secrets"] == ["auth"]
示例#3
0
def test_github_access_token_errors_if_provided_and_not_found(monkeypatch):
    mock_github = MagicMock(wraps=github.Github)
    monkeypatch.setattr("github.Github", mock_github)
    storage = GitHub(repo="test/repo",
                     path="flow.py",
                     access_token_secret="MISSING")
    with context(secrets={}):
        with pytest.raises(Exception, match="MISSING"):
            storage._get_github_client()
示例#4
0
def test_add_flow_to_github_already_added():
    storage = GitHub(repo="test/repo", path="flow.py")

    f = Flow("test")
    assert f.name not in storage
    assert storage.add_flow(f) == "flow.py"
    assert f.name in storage

    with pytest.raises(ValueError):
        storage.add_flow(f)
示例#5
0
def test_github_access_token_secret(monkeypatch, secret_name, secret_arg):
    orig_github = github.Github
    mock_github = MagicMock(wraps=github.Github)
    monkeypatch.setattr("github.Github", mock_github)
    storage = GitHub(repo="test/repo",
                     path="flow.py",
                     access_token_secret=secret_arg)
    with context(secrets={secret_name: "TEST-VAL"}):
        client = storage._get_github_client()
    assert isinstance(client, orig_github)
    assert mock_github.call_args[0][0] == "TEST-VAL"
示例#6
0
def test_github_base_url(monkeypatch):
    orig_github = github.Github
    mock_github = MagicMock(wraps=github.Github)
    monkeypatch.setattr("github.Github", mock_github)
    storage = GitHub(
        repo="test/repo",
        path="flow.py",
        access_token_secret="TEST",
        base_url="https://some-url",
    )
    with context(secrets={"TEST": "TEST-VAL"}):
        client = storage._get_github_client()
    assert isinstance(client, orig_github)
    assert mock_github.call_args[1]["base_url"] == "https://some-url"
示例#7
0
def test_get_flow_github(monkeypatch):
    f = Flow("test")

    github = MagicMock()
    monkeypatch.setattr("prefect.utilities.git.Github", github)

    extract_flow_from_file = MagicMock(return_value=f)
    monkeypatch.setattr("prefect.storage.github.extract_flow_from_file",
                        extract_flow_from_file)

    storage = GitHub(repo="test/repo", path="flow", ref="my_branch")

    assert f.name not in storage
    storage.add_flow(f)

    new_flow = storage.get_flow(f.name)
    assert extract_flow_from_file.call_args[1]["flow_name"] == f.name
    assert new_flow.run()
示例#8
0
def test_create_github_storage_init_args():
    storage = GitHub(repo="test/repo",
                     path="flow.py",
                     ref="my_branch",
                     secrets=["auth"])
    assert storage
    assert storage.flows == dict()
    assert storage.repo == "test/repo"
    assert storage.path == "flow.py"
    assert storage.ref == "my_branch"
    assert storage.secrets == ["auth"]
示例#9
0
def test_github_client_property(monkeypatch):
    github = MagicMock()
    monkeypatch.setattr("prefect.utilities.git.Github", github)

    storage = GitHub(repo="test/repo", path="flow.py")

    credentials = "ACCESS_TOKEN"
    with context(secrets=dict(GITHUB_ACCESS_TOKEN=credentials)):
        github_client = storage._github_client
    assert github_client
    github.assert_called_with("ACCESS_TOKEN")
示例#10
0
def test_get_flow(github_client, ref, caplog):
    storage = GitHub(repo="test/repo", path="flow.py", ref=ref)
    storage.add_flow(Flow("test"))

    f = storage.get_flow("test")
    assert github_client.get_repo.call_args[0][0] == "test/repo"
    repo = github_client.get_repo.return_value

    assert repo.get_commit.call_args[0][0] == ref or "main"
    assert repo.get_contents.call_args[0][0] == "flow.py"
    assert repo.get_contents.call_args[1]["ref"] == "mycommitsha"

    assert f.name == "test"
    state = f.run()
    assert state.is_successful()

    msg = "Downloading flow from GitHub storage - repo: 'test/repo', path: 'flow.py'"
    if ref is not None:
        msg += f", ref: {ref!r}"
    assert msg in caplog.text
    assert "Flow successfully downloaded. Using commit: mycommitsha" in caplog.text
示例#11
0
def test_get_flow_missing_repo(github_client, caplog):
    github_client.get_repo.side_effect = github.UnknownObjectException(404, {})

    storage = GitHub(repo="test/repo", path="flow.py")
    storage.add_flow(Flow("test"))

    with pytest.raises(github.UnknownObjectException):
        storage.get_flow("test")

    assert "Repo 'test/repo' not found." in caplog.text
示例#12
0
def test_get_flow_missing_file(github_client, ref, caplog):
    repo = github_client.get_repo.return_value
    repo.get_contents.side_effect = github.UnknownObjectException(404, {})

    storage = GitHub(repo="test/repo", path="flow.py", ref=ref)
    storage.add_flow(Flow("test"))

    ref = ref or "main"

    with pytest.raises(github.UnknownObjectException):
        storage.get_flow("test")

    assert f"File 'flow.py' not found in repo 'test/repo', ref {ref!r}" in caplog.text
示例#13
0
def test_get_flow_github(monkeypatch):
    f = Flow("test")

    github = MagicMock()
    monkeypatch.setattr("prefect.utilities.git.Github", github)

    monkeypatch.setattr(
        "prefect.storage.github.extract_flow_from_file",
        MagicMock(return_value=f),
    )

    with pytest.raises(ValueError):
        storage = GitHub(repo="test/repo")
        storage.get_flow()

    storage = GitHub(repo="test/repo", path="flow")

    assert f.name not in storage
    flow_location = storage.add_flow(f)

    new_flow = storage.get_flow(flow_location, ref="my_branch")
    assert new_flow.run()
示例#14
0
    """Multiply the input by 10"""
    return datum * 10


@task
def load(data):
    """Print the data to indicate it was received"""
    print("Here's your data: {}".format(data))


# Some configuration is required, see https://docs.prefect.io/orchestration/flow_config/overview.html
with Flow(
        "ETL",
        storage=GitHub(
            repo="dylanbhughes/pgr_examples_2",
            path="my_flow.py",
            secrets=["GITHUB_ACCESS_TOKEN"],
            ref=os.environ["PREFECT_FLOW_BRANCH_NAME"],
        ),
        run_config=DockerRun(
            image="prefecthq/prefect:latest",
            labels=[os.environ["PREFECT_FLOW_LABEL"]],
            env={
                "PREFECT_FLOW_BRANCH_NAME":
                os.environ["PREFECT_FLOW_BRANCH_NAME"],
                "PREFECT_FLOW_LABEL": os.environ["PREFECT_FLOW_LABEL"],
                "PREFECT_PROJECT_NAME": os.environ["PREFECT_PROJECT_NAME"],
            },
        ),
        executor=LocalDaskExecutor(scheduler="threads", num_workers=3),
) as flow:
    e = extract()
示例#15
0
# flows/my_flow.py

from prefect import task, Flow
from prefect.executors.dask import DaskExecutor
from prefect.storage import GitHub
from prefect.run_configs import KubernetesRun


@task
def get_data():
    return [1, 2, 3, 4, 5]


@task
def print_data(data):
    print(data)


with Flow("file-based-flow",
          executor=DaskExecutor("tcp://dask-scheduler:8786"),
          run_config=KubernetesRun(),
          storage=GitHub(
                  repo="pheadra/prefect",  # name of repo
                  path="flows/my_flow.py",  # location of flow file in repo
                  ref="main"
          )) as flow:
    data = get_data()
    print_data(data)
schedule = IntervalSchedule(interval=timedelta(minutes=30))
with Flow("Configurable Mapper", schedule=schedule) as flow:
    count = Parameter("count", default=10)
    sleep_length = Parameter("sleep_length", default=10)

    i = CreateIterable()(count=count)

    node1_1 = Node(
        name="Mapped Node",
        task_run_name=lambda **kwargs: emojize(
            f"{convert(kwargs['i'], to='ordinal')} child {emojis[kwargs['i'] if kwargs['i'] <= len(emojis) else random.randint(0, len(emojis) - 1)]}",
            use_aliases=True,
            variant="emoji_type",
        ),
    ).map(i=i, sleep_length=unmapped(sleep_length))

flow.environment = LocalEnvironment(
    labels=[],
    executor=LocalDaskExecutor(scheduler="threads", num_workers=6),
)

flow.storage = GitHub(
    repo="znicholasbrown/project-schematics",
    path="flows/Configurable_Mapper.py",
    access_token_secret="NICHOLAS_GITHUB_ACCESS",
)

# flow.run(run_on_schedule=False)
flow.register(project_name="PROJECT: Schematics")
示例#17
0
                 project_name=prefect_project_name)
}

# Create a prefect's flow object with some configuration
flow_nwp_00 = create_flow_download(run=00, **settings)
flow_nwp_12 = create_flow_download(run=12, **settings)

flow_list = [flow_nwp_00, flow_nwp_12]
for flow in flow_list:
    # Configure how this code will be passed to the prefect agents
    # In this case, prefect will get this file from github
    repo_ref = os.getenv("DATAFETCH__STORAGE__REPO__REF", default="master")
    print(f"Registering Using GitHub repo ref {repo_ref}")
    flow.storage = GitHub(
        repo="steph-ben/datafetch-config",  # name of repo
        ref=repo_ref,
        path="projects/gfs/fetch.py",  # location of flow file in repo
        secrets=["GITHUB_ACCESS_TOKEN"]  # name of personal access token secret
    )

    # Configure how this code will be executed
    # In this case, prefect will run this inside a docker container
    flow.run_config = DockerRun(image="stephben/datafetch")

    # Configure where tasks status will be stored
    # See :
    #   - https://docs.prefect.io/core/concepts/results.html
    #   - https://docs.prefect.io/core/advanced_tutorials/using-results.html
    flow.result = PrefectResult()

if __name__ == "__main__":
    show_prefect_cli_helper(flow_list=flow_list)
示例#18
0

@task
def first_task():
    my_logger("This is normal data")
    my_logger("This is sensitive data")
    return 1


@task
def second_task():
    my_logger("This is normal data")
    my_logger("This is sensitive data")
    return 1


@task
def third_task():
    from prefect.engine.signals import FAIL
    raise FAIL(message=my_logger("This is sensitive data"))


with Flow("Filtered Logging Demo",
          storage=GitHub(repo="kmoonwright/utility_flows",
                         path="logging_demo/log_filter.py",
                         access_token_secret="GITHUB_ACCESS_TOKEN")) as flow:
    first = first_task()
    second = second_task(upstream_tasks=[first])
    third_task(upstream_tasks=[second])

flow.register(project_name="logging-demo")
示例#19
0

cloud = StartFlowRun(
    flow_name="ETL - Docker",
    project_name="PGR Examples",
)
local = StartFlowRun(
    flow_name="ETL - Local",
    project_name="PGR Examples",
)

with Flow(
        "Orchestrator Flow",
        storage=GitHub(
            repo="dylanbhughes/pgr_examples_3",
            path="orchestrator.py",
            secrets=["GITHUB_ACCESS_TOKEN"],
        ),
        run_config=DockerRun(image="prefecthq/prefect:latest",
                             labels=["pgr docker"]),
        executor=LocalDaskExecutor(scheduler="threads", num_workers=3),
) as flow:
    input_string = Parameter(name="input_string", required=True)
    manual_switch = Parameter(name="cloud_or_local",
                              required=False,
                              default=None)
    cloud_or_local_result = run_locally_or_in_cloud(
        input_string=input_string, manual_switch=manual_switch)

    switch(
        cloud_or_local_result,
示例#20
0
@task(log_stdout=True)
def extract(input_string):
    print(input_string)
    return [1, 2, 3, 4, 5, 6]


@task
def transform(number):
    return number * 2


@task
def load(numbers):
    print(f"Uploaded {numbers} to Snowflake")


with Flow(
        "ETL - Local",
        storage=GitHub(
            repo="dylanbhughes/pgr_examples_3",
            path="local_flow.py",
            secrets=["GITHUB_ACCESS_TOKEN"],
        ),
        run_config=LocalRun(labels=["pgr local"]),
        executor=LocalDaskExecutor(scheduler="threads", num_workers=3),
) as flow:
    input_string = Parameter(name="input_string", required=True)
    numbers = extract(input_string=input_string)
    tranformed_numbers = transform.map(numbers)
    result = load(numbers=tranformed_numbers)
示例#21
0
def test_create_github_storage():
    storage = GitHub(repo="test/repo", path="flow.py")
    assert storage
    assert storage.logger
示例#22
0
from prefect.tasks.prefect.flow_run import StartFlowRun

new_flow = StartFlowRun(flow_name="Skip Flow", project_name="Jenny")


@task(name="")
def sleep_for_x(x):
    time.sleep(x)
    prefect.artifacts.create_link("ftp://ftp-server/my-file.csv")


with Flow(name="Start Flow") as flow:
    x = Parameter('x', default=22, required=True)
    sleep_for_x(x)
    new_flow()

flow.run_config = KubernetesRun(cpu_request=2, memory_request="2Gi")
# flow.run_config = LocalRun(

#     labels=['runConfig']
# )

flow.storage = GitHub(
    repo="bestdan/pifect",  # name of repo
    path="src/childFlow.py",  # location of flow file in repo
    access_token_secret=
    "Jen_Github_token"  # name of personal access token secret
)

flow.register('Jenny')
示例#23
0
@prefect.task
def processing1(fp: str):
    logger = prefect.context.get("logger")
    logger.info(f"Doing some processing1 on {fp} ...")


@prefect.task
def processing2(fp: str):
    logger = prefect.context.get("logger")
    logger.info(f"Doing some processing2 on {fp} ...")


with prefect.Flow("gfs-post-processing", result=PrefectResult()) as flow:
    fp = prefect.Parameter("fp")

    p1 = processing1(fp)
    p2 = processing2(fp)
    p2.set_upstream(p1)

repo_ref = os.getenv("DATAFETCH__STORAGE__REPO__REF", default="master")
print(f"Registering Using GitHub repo ref {repo_ref}")
flow.storage = GitHub(repo="steph-ben/datafetch-config",
                      ref=repo_ref,
                      path="projects/gfs/post_process.py",
                      secrets=["GITHUB_ACCESS_TOKEN"])
flow.run_config = DockerRun()

if __name__ == "__main__":
    from datafetch.utils import show_prefect_cli_helper
    show_prefect_cli_helper(flow_list=[flow])
示例#24
0
# schedule = IntervalSchedule(interval=datetime.timedelta(minutes=1)) # stopping schedule to test faster
# interval schedule here means flow will run in every 1 min ########

# we have state handler for flow as well so there are two types task state handlers and flow state handlers
with Flow("my etl flow", state_handlers=[failed_alert]) as f:
    db_table = create_table()
    raw = get_complaint_data()
    parsed = parse_complaint_data(
        raw
    )  # functional api (implicitly defining using output of one as input of other)
    populate_table = store_complaints(parsed)
    populate_table.set_upstream(
        db_table)  # imperative api (explicitly defining)

f.register(project_name="tutorial1")

f.storage = GitHub(
    repo="ashwani021994/Learning-ML",  # name of repo
    path="/test_code_2.py")

# Note:
# 1.) Trigger failed is subclass of failed class and it occurs when an upstream tasks fails
# 2.) we can explicitly make a task give final status as FAIL OR SUCCESS by
# using prefect signals instead the way we did above by raise exception
# 3.) LocalResultHandler places my result from the task in
# /prefect/results directory( i am not able to find prefect folder) data written is a pickle file
# 4.) Every time we call f.register a new version of flow is generated and older version appears in archive
# 5>) One another advantage of using cache on server is that the result handler writes data to database.
# If process dies in middle on local , cache is gone from memory , if similar happens on server ,
# cache can be retrieved from results on database.
示例#25
0
import os

from prefect import Flow, task, Parameter
from prefect.storage import GitHub
from prefect.run_configs import LocalRun


@task(log_stdout=True)
def greet(name):
    greeting = os.environ.get("GREETING", "Hello")
    print(f"{greeting}, {name}!")


with Flow("test-github") as flow:
    name = Parameter("name")
    greet(name)

flow.storage = GitHub("jcrist/prefect-hacking", path="test_github.py")
flow.run_config = LocalRun(env={"GREETING": "Hello"})
示例#26
0
                          name="SQL-stuff"
                          # commit: bool = False,
                          )
#--------------------------------------------------------------
# Flow context
#--------------------------------------------------------------
with Flow("github_flow") as f:

    password = EnvVarSecret(prefect.config.sql_server.password_var)

    logger = prefect.context.get("logger")
    thing = Parameter("thing", default=["Thing 1"])
    d = dog(thing)

    s = sql_task(password=password)

    v = view_sql(s)

#--------------------------------------------------------------
# Closing Details
#--------------------------------------------------------------
f.run_config = LocalRun(env={
    "PREFECT__USER_CONFIG_PATH":
    '/Users/peytonrunyan/TRP/prefect/config.toml'
})

f.storage = GitHub(repo="peyton-trp/prefect-test",
                   path="simple_flow.py",
                   secrets=["GITHUB_ACCESS_TOKEN"])

f.register("cat_flow")
示例#27
0
def build_example(path):
    """Build an example located at a specific path.

    Args:
        - path (str): the path to the example source file.

    Returns:
        - markdown (str): the rendered example in markdown
        - flows (Dict[str, Flow]): the flows found in the example
    """
    from prefect import Flow
    from prefect.storage import GitHub
    from prefect.run_configs import UniversalRun

    # Use the current commit (if specified in the environment)
    ref = os.getenv("GIT_SHA", "master")

    with open(path, "r", encoding="utf-8") as f:
        contents = f.read()

    namespace = {}
    exec(contents, namespace)

    try:
        header = namespace["__doc__"]
        tree = ast.parse(contents)
        offset = tree.body[1].lineno - 1
    except Exception as exc:
        raise ValueError(
            f"No docstring header found for example at {path}") from exc

    flows = {}
    relpath = os.path.relpath(path, start=ROOT)
    for f in namespace.values():
        if isinstance(f, Flow):
            f.storage = GitHub("PrefectHQ/prefect", path=relpath, ref=ref)
            if not f.run_config:
                f.run_config = UniversalRun()
            f.run_config.labels.add("prefect-examples")
            flows[f.name] = f.serialize(build=True)

    source = "\n".join(contents.splitlines()[offset:]).strip()

    res = subprocess.run(
        [sys.executable, path],
        capture_output=True,
        check=True,
        env={"PREFECT__LOGGING__FORMAT": "%(levelname)s | %(message)s"},
    )
    output = res.stdout.decode("utf-8").strip()

    register_lines = [
        f"prefect register --json https://docs.prefect.io/examples.json"
    ]
    for name in sorted(flows):
        register_lines.append(f"    --name {name!r}")
    register_lines.append(f"    --project 'Prefect Examples'")

    rendered = EXAMPLE_TEMPLATE.format(
        header=header,
        source=source,
        output=output,
        ref=ref,
        relpath=relpath,
        register_cmd=" \\\n".join(register_lines),
    ).lstrip()

    return rendered, flows
示例#28
0
#         flow_name="Random State Generator"
#     )(task_run_name=num_of_flow_runs)

# with Flow("Flow Run Generator") as flow3:
#     num_of_flows = Parameter("num_of_flows", default=5)
#     my_flow_runs = generate_list(num_of_flows)
#     create_flow_runs.map(my_flow_runs)

# flow3.storage = Local(add_default_labels=False)
# flow3.register(project_name="Demos")


# ATTEMPT 3
@task
def generate_list(length):
    return ["Random State Generator" for name in range(length)]


create_flow_runs = StartFlowRun(project_name="State Generators",
                                flow_name="Random State Generator")

with Flow("Flow Run Generator") as flow3:
    num_of_flows = Parameter("num_of_flows", default=5)
    my_flow_runs = generate_list(num_of_flows)
    create_flow_runs.map(my_flow_runs)

flow3.storage = GitHub(repo="kmoonwright/utility_flows",
                       path="state_generators/3_flow_run_generator.py",
                       access_token_secret="GITHUB_ACCESS_TOKEN")

flow3.register(project_name="State Generators")
示例#29
0
        "Meta Data": meta_data,
        "Time Series (15min)": data
    })


@task
def persist_data_in_influx(injector: Injector,
                           av_response: InterdayResponseModel,
                           secrets: Dict[str, str]):
    influx_v2_client = injector.get(InfluxDBClient)
    influx_v2_client.write_api(SYNCHRONOUS).write(
        secrets['INFLUX_V2_BUCKET'],
        record=interday_response_model_to_points(av_response))


schedule = IntervalSchedule(interval=timedelta(hours=24))

with Flow("scrap-stock", schedule) as flow:
    injector = create_secret_injector_task()
    token_renewal_result = renew_token_task(injector)
    secrets = fetch_secret_task('common', 'kv', injector)
    stocks = Parameter("stocks", default=["GOOGL", "MSFT"])
    av_response = scrap_stock.map(stocks, secrets=unmapped(secrets))
    persist_data_in_influx.map(injector=unmapped(injector),
                               av_response=av_response,
                               secrets=unmapped(secrets))

flow.storage = GitHub(repo="piokra/prefect-tutorial", path="scrap_stock.py")

flow.run()
                        states: [{ flow_run_id: $flowRunId, state: $state }]
                        }
                    ) {
                        states {
                        id
                        status
                        message
                        }
                    }
                }
            """,
                                            variables={
                                                "flowRunId": prev_flow_run_id,
                                                "state": {
                                                    "type": "Skipped"
                                                }
                                            })


with Flow("Previous Flow Run State Changer") as flow2:
    t1 = log_prev_flow_run_id()
    t2 = log_prev_num()
    secret = PrefectSecret("PERSONAL_ACCESS_TOKEN")
    t3 = change_prev_flow_state(t2, secret)
    create_link(prefect.context.get("prev_flow_run_id"))

flow2.add_edge(t1, t2)
flow2.storage = GitHub(repo="kmoonwright/utility_flows",
                       path="state_generators/2_prev_run_state_changer.py",
                       access_token_secret="GITHUB_ACCESS_TOKEN")
flow2.register(project_name="State Generators")