Пример #1
0
 def test_is_pickleable_after_start(self):
     e = LocalDaskExecutor()
     with e.start():
         post = cloudpickle.loads(cloudpickle.dumps(e))
         assert isinstance(post, LocalDaskExecutor)
            raise ValueError(f"{self.name} failed :(")
        else:
            self.logger.info(f"{self.name} complete.")
            return list(range(5))


storage = GitHub(
    repo="znicholasbrown/project-schematics",
    path="flows/CaptureProductMetrics.py",
    secrets=["GITHUB_AUTH_TOKEN"],
    ref="master",
)

environment = LocalEnvironment(
    labels=[],
    executor=LocalDaskExecutor(scheduler="threads", num_workers=6),
)

schedule = IntervalSchedule(interval=timedelta(minutes=5))
with Flow(
        "Capture Product Metrics",
        schedule=schedule,
        storage=storage,
        environment=environment,
) as flow:
    version = Version()

    root = Root(checkpoint=False)(upstream_tasks=[version])

    node1_1 = Node(name="Fetch Users")(upstream_tasks=[root])
    node1_2 = Node(name="Fetch Extra Params")(upstream_tasks=[root])
Пример #3
0
 def test_wait(self):
     e = LocalDaskExecutor()
     with e.start():
         assert e.wait(1) == 1
         assert e.wait(prefect) is prefect
         assert e.wait(e.submit(lambda: 1)) == 1
         assert e.wait(e.submit(lambda x: x, 1)) == 1
         assert e.wait(e.submit(lambda x: x, x=1)) == 1
         assert e.wait(e.submit(lambda: prefect)) is prefect
Пример #4
0
 def test_is_pickleable(self):
     e = LocalDaskExecutor()
     post = cloudpickle.loads(cloudpickle.dumps(e))
     assert isinstance(post, LocalDaskExecutor)
Пример #5
0
 def test_responds_to_kwargs(self):
     e = LocalDaskExecutor(scheduler="threads")
     assert e.scheduler == "threads"
Пример #6
0
 def test_start_yields_cfg(self):
     with LocalDaskExecutor(scheduler="threads").start() as cfg:
         assert cfg["scheduler"] == "threads"
Пример #7
0
def mproc_local():
    "Multiprocessing executor using local dask (not distributed cluster)"
    yield LocalDaskExecutor(scheduler="processes")
Пример #8
0
                fastqs.append(os.path.join(dir_, f))
    result = " ".join(fastqs)
    print(result)
    return result


def partition_strategy():
    pass


if __name__ == '__main__':
    env = os.environ.copy()
    env["PATH"] = env["PATH"] + ":/home/kevin/anaconda3/envs/albacore/bin"
    runner = ShellRunner(env=env)
    cluster = LocalCluster()
    executor = LocalDaskExecutor(address=cluster.scheduler_address)

    # Data
    albacore_input = [
        "/home/kevin/bin/hydra_nanopore/tests/test_data/minion_sample_raw_data/Experiment_01/sample_02_local/pass/2",
        "/home/kevin/bin/hydra_nanopore/tests/test_data/minion_sample_raw_data/Experiment_01/sample_02_local/pass/3",
        "/home/kevin/bin/hydra_nanopore/tests/test_data/minion_sample_raw_data/Experiment_01/sample_02_local/pass/4",
        "/home/kevin/bin/hydra_nanopore/tests/test_data/minion_sample_raw_data/Experiment_01/sample_02_local/pass/5"
    ]
    albacore_output = [
        "/home/kevin/bin/hydra_nanopore/tests/test_data/output/2",
        "/home/kevin/bin/hydra_nanopore/tests/test_data/output/3",
        "/home/kevin/bin/hydra_nanopore/tests/test_data/output/4",
        "/home/kevin/bin/hydra_nanopore/tests/test_data/output/5"
    ]
def test_create_fargate_task_environment_with_executor():
    executor = LocalDaskExecutor()
    environment = FargateTaskEnvironment(executor=executor)
    assert environment.executor is executor
Пример #10
0
    load = postgres.load_datafile.map(datafile=downloads)
    # commit new data to database and clean up
    complete = postgres.complete_load()

    # make sure prep runs before load
    flow.add_edge(upstream_task=prep, downstream_task=load)
    # make sure load runs before complete
    flow.add_edge(upstream_task=load, downstream_task=complete)

if __name__ == "__main__":
    logger = prefect.context.get("logger")

    dask = prefect.config.dask
    mode = prefect.config.mode
    reset_db = prefect.config.reset_db

    all_datasets = dict(prefect.config.socrata.datasets)
    years = list(prefect.config.data.years)

    # use only year datasets if in full mode otherwise use all w/since
    if mode == 'full':
        run_datasets = dict((k, all_datasets[k]) for k in years)
    else:
        run_datasets = all_datasets

    logger.info(
        f"Starting \"{mode}\" flow for {', '.join(run_datasets.keys())}"
        f" {'and resetting db' if reset_db else ''}")
    state = flow.run(datasets=list(run_datasets.values()),
                     executor=LocalDaskExecutor() if dask else LocalExecutor())
Пример #11
0
import prefect
from prefect import Flow, task
from prefect.engine.executors import LocalDaskExecutor, DaskExecutor
from prefect.engine.state import Failed
from prefect.environments import LocalEnvironment
from prefect.utilities.notifications import slack_notifier

flow_name = "logger-test"


@task(timeout=60 * 60 * 2)
def test():
    from prefect import context
    logger = context.get("logger")
    logger.info("this is a test")


with Flow(
        flow_name,
        environment=LocalEnvironment(LocalDaskExecutor()),
        # state_handlers=[slack_notifier(only_states=[Failed])],
) as flow:
    test()
flow.register("Demo")
Пример #12
0
@task
def dec(x):
    sleep(random.random() / 10)
    return x - 1


@task
def add(x, y):
    sleep(random.random() / 10)
    return x + y


@task(name="sum")
def list_sum(arr):
    return sum(arr)


# executor = DaskExecutor(address="localhost:8786")
executor = LocalDaskExecutor()
with Flow("dask-example",
          environment=LocalEnvironment(executor=executor)) as flow:
    incs = inc.map(x=range(100))
    decs = dec.map(x=range(100))
    adds = add.map(x=incs, y=decs)
    total = list_sum(adds)
# executor = DaskExecutor(address="tcp://10.254.248.214:8786")
# flow.run(executor=executor)
flow.register("Demo")
# flow.run_agent()
Пример #13
0
from prefect.engine.executors import LocalDaskExecutor
from prefect.engine.results import LocalResult
from prefect.tasks.secrets import PrefectSecret

import tasks

with Flow(name="CommonLit SQL-to-S3",
          storage=Docker(
              registry_url='.../prefect-flows/',
              base_image='.../prefect-flows/prefect:0.13.15-python3.8',
              python_dependencies=list(
                  map(str.strip, (Path(__file__).parent /
                                  'requirements.txt').open().readlines())),
              env_vars={'PYTHONPATH': '/opt:${PYTHONPATH}'},
              files={Path(__file__).parent / 'tasks.py': '/opt/tasks.py'}),
          environment=LocalEnvironment(executor=LocalDaskExecutor(
              scheduler='threads', num_workers=4),
                                       labels=["cae"]),
          result=LocalResult(dir='./results')) as flow:
    prefect_secrets = PrefectSecret('COMMON_LIT_SECRETS')
    destination_directory = Parameter('destination_directory', default=None)
    tables = Parameter('tables', required=True)
    indexed_field = Parameter('indexed_field', default='id', required=True)
    starting_index = Parameter('starting_index', default=0, required=True)
    total_records_to_move = Parameter('total_records_to_move',
                                      default=0,
                                      required=True)
    number_of_records_in_batch = Parameter('number_of_records_in_batch',
                                           default=100000,
                                           required=False)
    max_concurrent_connections = Parameter('max_concurrent_connections',
                                           default=50)
Пример #14
0
def test_prefect_executors(train_data, grid_search, parallel_columns):
    try:
        from prefect.engine.executors import DaskExecutor
        from prefect.engine.executors import LocalDaskExecutor
        from prefect.engine.executors import LocalExecutor
        from dask.distributed import Client
    except Exception:
        print("`prefect` not installed, skipping the test...")
        pass
    else:
        client = Client()

        executors = {
            "dask_already_running":
            DaskExecutor(address=client.scheduler.address),
            "local": LocalExecutor(),
            "local_dask": LocalDaskExecutor(),
            # this spins up LocalDaskExecutor, but just to check the interface
            "dask_create_on_call": DaskExecutor(),
        }

        for executor_name, executor in executors.items():
            flow, state = run_model_selection(
                df=train_data,
                grid_search=grid_search,
                target_col_name="Quantity",
                frequency="D",
                partition_columns=["Product"],
                parallel_over_columns=parallel_columns,
                include_rules=None,
                exclude_rules=None,
                country_code_column="Holidays_code",
                output_path="",
                persist_cv_results=False,
                persist_cv_data=False,
                persist_model_reprs=False,
                persist_best_model=False,
                persist_partition=False,
                persist_model_selector_results=False,
                visualize_success=False,
                executor=executor,
            )
            assert state.is_successful()

            results = select_model_general(
                df=train_data,
                grid_search=grid_search,
                target_col_name="Quantity",
                frequency="D",
                partition_columns=["Product"],
                parallel_over_columns=parallel_columns,
                executor=executor,
                include_rules=None,
                exclude_rules=None,
                country_code_column="Holidays_code",
                output_path="",
                persist_cv_results=False,
                persist_cv_data=False,
                persist_model_reprs=False,
                persist_best_model=False,
                persist_partition=False,
                persist_model_selector_results=False,
            )

            assert len(results) == len(
                train_data[parallel_columns + ["Product"]].drop_duplicates())
            assert isinstance(results[0], ModelSelectorResult)

            if executor_name == "dask_already_running":
                client.shutdown()

        if client.status != "closed":
            client.shutdown()
Пример #15
0
 def test_scheduler_defaults_to_threads(self):
     e = LocalDaskExecutor()
     assert e.scheduler == "threads"
Пример #16
0
def sync():
    "Synchronous dask (not dask.distributed) executor"
    yield LocalDaskExecutor()
Пример #17
0
from prefect import task, Flow
from prefect.environments import LocalEnvironment
from prefect.engine.executors import LocalDaskExecutor

@task
def vals():
    return [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

@task
def printv(v):
    print(v)

with Flow("local-dask", environment=LocalEnvironment(executor=LocalDaskExecutor(nthreads=4))) as f:
    v = vals()
    printv.map(v)

f.register(project_name="Demo")