示例#1
0
def generate_list():
    return [1, 2, 3]


@task
def do_something(n):
    return n


@task
def fail(x):
    print(x)
    raise ValueError()


result = LocalResult(location="{task_full_name}.pb")
with Flow(
    "Restart Me",
    storage=Local(
        stored_as_script=True,
        path="/Users/josh/Desktop/code/Dummy-Flows/restartme.py",
    ),
    result=result,
) as flow:
    lst = generate_list()
    d = do_something.map(lst)
    fail(d)

environment = LocalEnvironment(executor=DaskExecutor())
flow.environment = environment
示例#2
0
def test_shell_initializes_and_multiline_output_optionally_returns_all_lines():
    with Flow(name="test") as f:
        task = ShellTask(return_all=True)(command="echo -n 'hello world\n42'")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == ["hello world", "42"]
示例#3
0
def test_shell_runs_other_shells():
    with Flow(name="test") as f:
        task = ShellTask(shell="zsh")(command="echo -n $ZSH_NAME")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "zsh"
示例#4
0
def test_shell_task_raises_fail_if_cmd_fails():
    with Flow(name="test") as f:
        task = ShellTask()(command="ls surely_a_dir_that_doesnt_exist")
    out = f.run()
    assert out.is_failed()
    assert "Command failed with exit code" in str(out.result[task].message)
示例#5
0
def test_shell_initializes_and_multiline_output_returns_last_line():
    with Flow(name="test") as f:
        task = ShellTask()(command="echo -n 'hello world\n42'")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "42"
示例#6
0
    # dicionary values: https://docs.prefect.io/core/tutorials/task-guide.html#adding-tasks-to-flows

    # Cron clock generator: https://crontab-generator.org/
    # 0 6 * * * means run every day at 6am
    schedule = Schedule(clocks=[CronClock("0 6 * * *")])
    # alternatively: crontab -l to list all crontabs, or see generator to generate the crontab
    # there's also launchctl and using plist files.
    # there's also using the calendar.
    # View Python command with `ps`
    # Rename `ps` command: https://stackoverflow.com/a/49097964/2138773
    # https://github.com/dvarrazzo/py-setproctitle
    from setproctitle import setproctitle
    setproctitle('prefect: ds arxiv')

    # using the Imperitive API: https://docs.prefect.io/core/concepts/flows.html#imperative-api
    with Flow('Build Arxiv', state_handlers=[slack_handler]) as flow:

        # Dates
        date_today = datetime.now().strftime('%Y-%m-%d')

        # Begin the flow. Will fail if len(df) = 0
        # FIXME: date_query is actually overwritten within the function to just be the max date. See function for details.
        df_full = df_get_arxiv(Constant(arx_list), Constant(arx_dict))
        filter_to_date = determine_filter_date(df_full)
        df = filter_df_arxiv(df=df_full, filter_to_date=filter_to_date)

        # Creating the Post folder, save the dataframe there, and build the rmd
        dir_post = create_dir_post(date_published=date_today)
        dir_post.set_dependencies(upstream_tasks=[df])
        written_df = write_df_to_csv(df=df, dir_post=dir_post)
        fp_post = copy_rmd_template(dir_post)
示例#7
0
    def test_no_raise_on_remote_env(self):
        flow = Flow("THIS IS A TEST", environment=RemoteEnvironment())

        assert healthchecks.environment_dependency_check([flow]) is None
示例#8
0
def storage_flow_runner(service: str):
    with Flow('Install Storage') as flow:
        installer = PrefectAddServiceStorage()
        installer.create_service_bucket(service)
    return flow.run()
示例#9
0
from prefect import Flow, task, context
from schemas.customer import CustomerList
from converters import csv_converter


@task
def generate(schema, start, iterations):
    return schema.generate_list(iterations=iterations)


@task
def convert_to_csv(data, header):
    return csv_converter(data, header)


@task
def printer(data):
    print(data)


with Flow("customer-list") as flow:
    customer_list = CustomerList(seed="PATRICK")
    customer_list2 = CustomerList(seed="PATRICK")

    data = convert_to_csv(generate(customer_list, 0, 2), True)
    printer(data)

    data = convert_to_csv(generate(customer_list2, 0, 2), False)
    printer(data)

flow.run()
示例#10
0
import prefect
from prefect import task, Flow

@task
def hello_task():
    logger = prefect.context.get("logger")
    logger.info("Hello, Cloud!")

with Flow("Hello World") as flow:
    hello_task()

flow.register(project_name='Hello World')
flow.run_agent()
示例#11
0
from prefect import task, Flow
from prefect.environments.execution import DaskKubernetesEnvironment
from prefect.environments.storage import Docker


@task
def first_task():
    return [100] * 100


@task
def compute(x):
    return x * 100


with Flow(
        "dktest",
        environment=DaskKubernetesEnvironment(min_workers=1, max_workers=3),
        storage=Docker(registry_url="joshmeek18", image_name="flows"),
) as flow:
    one = first_task()
    result = compute.map(one)
    result2 = compute.map(one)
    result3 = compute.map(one)

# flow.deploy(project_name="Demo")
flow.visualize()
示例#12
0
    default_shap = run_default_shap_impl(model_state, data_to_explain)
    match = compare_results_impl(default_shap, my_shap_distributed)
    if match is True:
        print("Results match!")
    else:
        print("Results don't match!")
    print('done')


distributed = True
if __name__ == '__main__':
    cluster = LocalCluster(n_workers=5)
    serv_address = cluster.scheduler.address
    # test()

    with Flow("shap pipeline") as flow:
        name = Parameter('name')
        # load data from CSV and get a dataframe
        df = etl(name)
        # Train randomforest model
        model_state = create_model(df)
        # get data to explain: returns test dataframe rows from start to end index
        data_to_explain = get_data_to_explain(model_state, 0, 5)
        # Run my serial (non-distributed) implementation of shap
        my_shap = run_my_shap(model_state, data_to_explain)
        # Run the distributed version
        my_shap_distributed = run_distributed_shap(model_state,
                                                   data_to_explain)
        # my_shap_distributed = run_distributed_shap(model_state, data_to_explain)
        # Run the default shap python library implementation
        default_shap = run_default_shap(model_state, data_to_explain)
示例#13
0
        return list(range(random.randint(1, 10)))


class Node(Task):
    def run(self):
        self.logger.info(f'{self.name} running...')
        time.sleep(5)
        if random.random() > 0.99:
            raise ValueError(f'{self.name} failed :(')
        else:
            self.logger.info(f'{self.name} complete.')
            return list(range(random.randint(1, 10)))


schedule = IntervalSchedule(interval=timedelta(minutes=30))
with Flow("Long Flow Run", schedule=schedule) as Long_Flow_Run:
    root = Root()
    version = Version()(upstream_tasks=[root])
    node1_1 = Node(name="Node 1_1").map(upstream_tasks=[version])
    node1_2 = Node(name="Node 1_2").map(upstream_tasks=[node1_1])
    node1_3 = Node(name="Node 1_3").map(upstream_tasks=[node1_2])
    node1_4 = Node(name="Node 1_4").map(upstream_tasks=[node1_3])
    node1_5 = Node(name="Node 1_5").map(upstream_tasks=[node1_4])
    node1_6 = Node(name="Node 1_6").map(upstream_tasks=[node1_5])
    node1_7 = Node(name="Node 1_7").map(upstream_tasks=[node1_6])
    node1_8 = Node(name="Node 1_8").map(upstream_tasks=[node1_7])
    node1_9 = Node(name="Node 1_9").map(upstream_tasks=[node1_8])
    node1_10 = Node(name="Node 1_10").map(upstream_tasks=[node1_9])
    node1_11 = Node(name="Node 1_11").map(upstream_tasks=[node1_10])
    node1_12 = Node(name="Node 1_12").map(upstream_tasks=[node1_11])
    node1_13 = Node(name="Node 1_13").map(upstream_tasks=[node1_12])
@task
def check_if_even(value):
    return value % 2 == 0


@task
def print_odd(value):
    print("{} is odd!".format(value))


@task
def print_even(value):
    print("{} is even!".format(value))


with Flow("Check Even/Odd") as f:
    value = Parameter("value")
    is_even = check_if_even(value)

    even = print_even(value)
    odd = print_odd(value)

    ifelse(is_even, even, odd)

# Prints '2 is even!'
f.run(value=2)

# Prints '1 is odd!'
f.run(value=1)

f.visualize()
示例#15
0
            amt_metrics['AMT_ANALYST_HOLD'].get(date, pd.NA),
            'INS_ANALYST_SELL':
            ins_metrics['INS_ANALYST_SELL'].get(date, pd.NA),
            'INS_ANALYST_BUY':
            ins_metrics['INS_ANALYST_BUY'].get(date, pd.NA),
            'INS_ANALYST_HOLD':
            ins_metrics['INS_ANALYST_HOLD'].get(date, pd.NA)
        })
    return result


@task
def load(result_dict):
    """
    This function prints results. Two tables INS metrics ans AMT metrics.

    :param result_dict: dict with results
    :return: None
    """
    print(result_dict)


if __name__ == '__main__':
    with Flow('ms-etl') as flow:
        url = ('https://www.marketbeat.com/stocks/'
               'NASDAQ/MSFT/price-target/?MostRecent=0')
        soup = extract(url)
        res = transform(soup)
        load(res)
    flow.run()
示例#16
0
from prefect import task, Flow, Parameter
from prefect.engine.result import NoResult

@task()
def vals():
    return [1, 2, 3]

@task()
def ret(x):
    return 1

with Flow('a') as f:
    p = Parameter('p')
    v = vals()
    a = ret.map(v)
    b = ret.map(p)

f.register(project_name="Demo")
示例#17
0
from prefect import Flow, task, unmapped, Parameter
from prefect.engine.results import LocalResult
from prefect.engine.executors import LocalDaskExecutor, DaskExecutor
from prefect.engine.cache_validators import all_parameters

lr = LocalResult(location="{flow_name}-{task_name}-{x}-{y}.pkl",
                 validators=all_parameters)


@task(log_stdout=True, checkpoint=True)
def add(x, y):
    print(f"add ran with {x} {y}")
    try:
        return sum(x) + y
    except TypeError:
        return x + y


with Flow("iterated map", result=lr) as flow:
    y = unmapped(Parameter("y", default=7))
    x = Parameter("x", default=[1, 2, 3])
    mapped_result = add.map(x, y=y)
    out = add(mapped_result, y)

if __name__ == "__main__":
    flow.run(executor=DaskExecutor())
示例#18
0
import prefect
from prefect import Flow, task
import time
from datetime import timedelta


@task(timeout=11)
def log_me():
    logger = prefect.context.get("logger")
    logger.info("LOGGED")
    return "LOGGER"


with Flow("loggin") as flow:
    log_me()

from prefect.environments import LocalEnvironment
from prefect.engine.executors import DaskExecutor

flow.environment = LocalEnvironment(executor=DaskExecutor())

flow.register(project_name="Demo")
示例#19
0
    def test_no_raise_on_normal_flow(self):
        flow = Flow("THIS IS A TEST")

        assert healthchecks.environment_dependency_check([flow]) is None
示例#20
0
文件: build.py 项目: viral-nft/moap
"""This module holds the Prefect flow definition.

Description
-----------
This flow will pull Twitter trends and put them on a picture for NFT sale.

Author
------
Viral NFT <*****@*****.**>

Created
-------
March 30, 2021, 16:41:15
"""

from prefect import Flow

# NOTE: It is highly advised not to import `src.config` in this module.
from src.tasks import Trends, Tweets

###############################################################################
# Initialize flow.
flow = Flow(name="Trend grabber and image generator")

trends = Trends()
tweets = Tweets()

with flow:
    trends()
###############################################################################
示例#21
0
from prefect import task, Flow
from datetime import timedelta
from prefect.schedules import IntervalSchedule
import pendulum


@task
def say_hello():
    print("Hello, world!")


schedule = IntervalSchedule(interval=timedelta(days=1),
                            start_date=pendulum.datetime(2010, 1, 1))

with Flow("interval-schedule", schedule) as flow:
    say_hello()

flow.run(run_on_schedule=True)
# flow.register(project_name="Demo", version_group_id="custom_int")

pd = pendulum.datetime(2010, 1, 1)

pd.add(days=1)
示例#22
0
    else:
        append_write = "w"  # make a new file if not

    with open("./results/" + FILENAME_RESULTS, append_write) as f:
        f.write(f"TYPE: {INSTRUCTION_TYPE} \n")
        f.write(f"ACC DEV: {output['dev']['score']} \n")
        f.write(f"ACC TEST: {output['test']['score']} \n")
        f.write("=========================== \n \n")

    logger.info(f"TYPE: {INSTRUCTION_TYPE} \n")
    logger.info(f"ACC DEV: {output['dev']['score']} \n")
    logger.info(f"ACC TEST: {output['test']['score']} \n")
    logger.info("=========================== \n \n")


with Flow("Running the Transformers for Pair Classification") as flow1:
    with tags("train"):
        train_input = prepare_rico_task(train_path,
                                        type_instructions=INSTRUCTION_TYPE)
        train_dataset = prepare_rico_layout_lm_task(train_input["data"])
    with tags("dev"):
        dev_input = prepare_rico_task(dev_path,
                                      type_instructions=INSTRUCTION_TYPE)
        dev_dataset = prepare_rico_layout_lm_task(dev_input["data"])
    with tags("test"):
        test_input = prepare_rico_task(test_path,
                                       type_instructions=INSTRUCTION_TYPE)
        test_dataset = prepare_rico_layout_lm_task(test_input["data"])
    outputs = layout_lm_trainer_task(
        train_dataset=train_dataset,
        dev_dataset=dev_dataset,
示例#23
0
def test_shell_initializes_with_basic_cmd():
    with Flow(name="test") as f:
        task = ShellTask(command="echo -n 'hello world'")()
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "hello world"
示例#24
0
from prefect import task, Flow, Parameter
import prefect

logger = prefect.utilities.logging.get_logger()


@task
def print_plus_one(x):
    print(x + 1)
    logger.warning(x + 1)


with Flow('default-param') as flow:
    x = Parameter('x', default=2)
    print_plus_one(x=x)

flow.register(project_name="Demo")
示例#25
0
def test_shell_returns_none_if_empty_output():
    with Flow(name="test") as f:
        task = ShellTask()(command="ls > /dev/null")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result is None
示例#26
0
from prefect import task, Flow

@task
def say_hello():
    print("Hello, world!")

with Flow("Hello world flow") as flow:
    say_hello()

state = flow.run()
示例#27
0
def test_shell_raises_if_no_command_provided():
    with Flow(name="test") as f:
        ShellTask()()
    with pytest.raises(TypeError):
        with raise_on_exception():
            assert f.run()
示例#28
0
        f'~/github/ds-arxiv/python/resources/rmd_template.Rmd')
    fp_post = os.path.join(dir_post, 'news.Rmd')
    shutil.copy(fp_template, fp_post)
    return fp_post


@task
def knit_rmd_to_html(fp_post, written_df: bool):
    """Renders to HTML"""
    if written_df:
        cmd = f'Rscript -e \'rmarkdown::render(\"{fp_post}\")\''
        os.system(cmd)


if __name__ == '__main__':
    with Flow('parse_arxiv') as flow:

        # Default is to filter to yesterday's publications
        df = df_get_arxiv(arx_list, arx_dict, '2019-12-24')

        today = datetime.now().strftime('%Y-%m-%d')

        # Creating the Post folder, save the dataframe there, and build the rmd
        dir_post = create_dir_post()
        written_df = write_df_to_csv(df=df, dir_post=dir_post)
        fp_post = copy_rmd_template(dir_post)
        knit = knit_rmd_to_html(fp_post=fp_post, written_df=written_df)
        gcp = git_commit_push()

    flow.run()
示例#29
0
        'C': 'Amarela',
        'D': 'Parda',
        'E': 'Indigena',
        'F': "",
        ' ': ""
    })
    return filtro[['cor']]


@task
def join_data(df, idadecent, idadequadrado, cor, estcivil):
    final = pd.concat([df, idadecent, idadequadrado, cor, estcivil], axis=1)
    final = final[[
        'CO_GRUPO', 'TP_SEXO', 'cor', 'estcivil', 'idadecent', 'idade2'
    ]]
    logger = prefect.context.get("logger")
    logger.info(final.head().to_json())
    final.to_csv('enade_tratado.csv', index=False)


with Flow("Enade", schedule) as flow:
    path = get_raw_data()
    filtro = aplica_filtros(path)
    idadecent = constroi_idade_centralizada(filtro)
    idadequadrado = constroi_idade_cent_quad(idadecent)
    estcivil = constroi_est_civil(filtro)
    cor = constroi_cor(filtro)
    j = join_data(filtro, idadecent, idadequadrado, cor, estcivil)

flow.register(project_name="igti", idempotency_key=flow.serialized_hash())
flow.run_agent(token="htoyS1CWdSn8PmX3ZoW8wA")
示例#30
0
from prefect import Flow, Parameter, task
from prefect.engine.signals import LOOP


@task(max_retries=5, retry_delay=timedelta(seconds=2))
def compute_large_fibonacci(M):
    # we extract the accumulated task loop result from context
    loop_payload = prefect.context.get("task_loop_result", {})

    n = loop_payload.get("n", 1)
    fib = loop_payload.get("fib", 1)

    next_fib = requests.post("https://nemo.api.stdlib.com/[email protected]/",
                             data={
                                 "nth": n
                             }).json()

    if next_fib > M:
        return fib  # return statements end the loop

    raise LOOP(message=f"Fib {n}={next_fib}",
               result=dict(n=n + 1, fib=next_fib))


with Flow("fibonacci") as flow:
    M = Parameter("M")
    fib_num = compute_large_fibonacci(M)

flow_state = flow.run(M=100)
print(flow_state.result[fib_num].result)  # 89