Exemplo n.º 1
0
def test_initial_checkpoint_write(n=2):
    """1. Launch a few apps and write the checkpoint once a few have completed
    """

    d = {}
    time.time()
    print("Launching : ", n)
    for i in range(0, n):
        d[i] = slow_double(i)
    print("Done launching")

    for i in range(0, n):
        d[i].result()
    print("Done sleeping")
    cpt_dir = parsl.dfk().checkpoint()

    cptpath = cpt_dir + '/dfk.pkl'
    print("Path exists : ", os.path.exists(cptpath))
    assert os.path.exists(cptpath), "DFK checkpoint missing: {0}".format(
        cptpath)

    cptpath = cpt_dir + '/tasks.pkl'
    print("Path exists : ", os.path.exists(cptpath))
    assert os.path.exists(cptpath), "Tasks checkpoint missing: {0}".format(
        cptpath)

    return parsl.dfk().run_dir
Exemplo n.º 2
0
def test_initial_checkpoint_write(n=2):
    """1. Launch a few apps and write the checkpoint once a few have completed
    """
    config = fresh_config()
    config.checkpoint_mode = 'manual'
    parsl.load(config)
    results = launch_n_random(n)

    cpt_dir = parsl.dfk().checkpoint()

    cptpath = cpt_dir + '/dfk.pkl'
    print("Path exists : ", os.path.exists(cptpath))
    assert os.path.exists(cptpath), "DFK checkpoint missing: {0}".format(
        cptpath)

    cptpath = cpt_dir + '/tasks.pkl'
    print("Path exists : ", os.path.exists(cptpath))
    assert os.path.exists(cptpath), "Tasks checkpoint missing: {0}".format(
        cptpath)

    run_dir = parsl.dfk().run_dir

    parsl.dfk().cleanup()
    parsl.clear()

    return run_dir, results
Exemplo n.º 3
0
def test_regression_stage_in_does_not_stage_out():
    no_stageout_config = Config(
        executors=[
            ThreadPoolExecutor(
                label='local_threads',
                storage_access=[NoOpTestingFileStaging(allow_stage_out=False)]
            )
        ],
    )

    parsl.load(no_stageout_config)

    f = open("test.4", "a")
    f.write("test")
    f.close()

    # Test that stage in does not invoke stage out. If stage out is
    # attempted, then the NoOpTestingFileStaging provider will raise
    # an exception which should propagate here.
    app_test_in(File("test.4")).result()

    # Test that stage out exceptions propagate to user code.
    with pytest.raises(NoOpError):
        touch("test.5", outputs=[File("test.5")]).result()

    parsl.dfk().cleanup()
    parsl.clear()
Exemplo n.º 4
0
def test_regression_stage_out_does_not_stage_in():
    no_stageout_config = Config(
        executors=[
            ThreadPoolExecutor(
                label='local_threads',
                storage_access=[NoOpTestingFileStaging(allow_stage_in=False)]
            )
        ]
    )

    parsl.load(no_stageout_config)

    # Test that the helper app runs with no staging
    touch("test.1", outputs=[]).result()

    # Test with stage-out, checking that provider stage in is never
    # invoked. If stage-in is invoked, the the NoOpTestingFileStaging
    # provider will raise an exception, which should propagate to
    # .result() here.
    touch("test.2", outputs=[File("test.2")]).result()

    # Test that stage-in exceptions propagate out to user code.
    with pytest.raises(NoOpError):
        touch("test.3", inputs=[File("test.3")]).result()

    parsl.dfk().cleanup()
    parsl.clear()
Exemplo n.º 5
0
def test_one_block():

    oneshot_provider = OneShotLocalProvider(
        channel=LocalChannel(),
        init_blocks=0,
        min_blocks=0,
        max_blocks=10,
        launcher=SimpleLauncher(),
    )

    config = Config(
        executors=[
            HighThroughputExecutor(
                label="htex_local",
                worker_debug=True,
                cores_per_worker=1,
                provider=oneshot_provider,
            )
        ],
        strategy='simple',
    )

    parsl.load(config)

    f = app()
    f.result()
    parsl.dfk().cleanup()
    parsl.clear()

    assert oneshot_provider.recorded_submits == 1
Exemplo n.º 6
0
def test_wait_for_tasks():
    slow_app(5)
    slow_app(
        10)  # This test has a higher task ID, and runs for a longer period
    slow_app(
        3)  # This test has a higher task ID, but runs for a shorter period
    parsl.dfk().wait_for_current_tasks()
Exemplo n.º 7
0
def test_row_counts():
    # this is imported here rather than at module level because
    # it isn't available in a plain parsl install, so this module
    # would otherwise fail to import and break even a basic test
    # run.
    import sqlalchemy
    from parsl.tests.configs.htex_local_alternate import fresh_config

    if os.path.exists("monitoring.db"):
        logger.info("Monitoring database already exists - deleting")
        os.remove("monitoring.db")

    logger.info("loading parsl")
    parsl.load(fresh_config())

    logger.info("invoking and waiting for result")
    assert this_app().result() == 5

    logger.info("cleaning up parsl")
    parsl.dfk().cleanup()
    parsl.clear()

    # at this point, we should find one row in the monitoring database.

    logger.info("checking database content")
    engine = sqlalchemy.create_engine("sqlite:///monitoring.db")
    with engine.begin() as connection:

        result = connection.execute("SELECT COUNT(*) FROM workflow")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM task")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM try")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM status, try "
                                    "WHERE status.task_id = try.task_id "
                                    "AND status.task_status_name='exec_done' "
                                    "AND task_try_time_running is NULL")
        (c, ) = result.first()
        assert c == 0

        # Two entries: one showing manager active, one inactive
        result = connection.execute("SELECT COUNT(*) FROM node")
        (c, ) = result.first()
        assert c == 2

        # There should be one block polling status
        # local provider has a status_polling_interval of 5s
        result = connection.execute("SELECT COUNT(*) FROM block")
        (c, ) = result.first()
        assert c >= 2

    logger.info("all done")
Exemplo n.º 8
0
def wait_for_task_completion(pytestconfig):
    """If we're in a config-file based mode, wait for task completion between
       each test. This will detect early on (by hanging) if particular test
       tasks are not finishing, rather than silently falling off the end of
       the test run with tasks still in progress.
       In local mode, this fixture does nothing, as there isn't anything
       reasonable to assume about DFK behaviour here.
    """
    config = pytestconfig.getoption('config')[0]
    yield
    if config != 'local':
        parsl.dfk().wait_for_current_tasks()
Exemplo n.º 9
0
def test_summary(caplog):

    parsl.load(fresh_config())

    succeed().result()
    fail().exception()

    parsl.dfk().cleanup()
    parsl.clear()

    assert "Summary of tasks in DFK:" in caplog.text
    assert "Tasks in state States.exec_done: 1" in caplog.text
    assert "Tasks in state States.failed: 1" in caplog.text
Exemplo n.º 10
0
def test_garbage_collect():
    """ Launches an app with a dependency and waits till it's done and asserts that
    the internal refs were wiped
    """
    x = slow_double(slow_double(10))

    if x.done() is False:
        assert parsl.dfk().tasks[x.tid][
            'app_fu'] == x, "Tasks table should have app_fu ref before done"

    x.result()
    parsl.dfk().checkpoint()

    assert parsl.dfk().tasks[x.tid][
        'app_fu'] is None, "Tasks should have app_fu ref wiped after task completion"
Exemplo n.º 11
0
def launch_tasks(n=10):
    """ Launch N arbitrary tasks onto an active job
    
    We will launch half the tasks with ranks on a single node, and the rest
    will be spread over 2 nodes.
    """
    dfk = parsl.dfk()
    name = list(dfk.executors.keys())[0]

    # Get 
    nodes_requested = dfk.executors[name].provider.nodes_per_block
    slots = dfk.executors[name].max_workers

    x = platform(sleep=0).result()
    print(f"Nodes requested : {nodes_requested}")
    print(f"Slots per job : {slots}")
    print(f"Platform info : {x}")
    print("Slots available   : {}".format(dfk.executors[name].connected_workers))

    futures = []
    # Launch a mix of single node and 2 node tasks
    for i in range(n):
        if i%2 == 0:
            x = mpi_hello(1, 4, i, mock=False)
        else:
            x = mpi_hello(2, 4, i, mock=False)
        futures.append(x)

    # wait for everything
    for i in futures:
        print(i.result())
        print(i.stdout, print_file(i.stdout))
Exemplo n.º 12
0
def load_dfk_session(request, pytestconfig):
    """Load a dfk around entire test suite, except in local mode.

    The special path `local` indicates that configuration will not come
    from a pytest managed configuration file; in that case, see
    load_dfk_local_module for module-level configuration management.
    """

    config = pytestconfig.getoption('config')[0]

    if config != 'local':
        spec = importlib.util.spec_from_file_location('', config)
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)

        if DataFlowKernelLoader._dfk is not None:
            raise ValueError(
                "DFK didn't start as None - there was a DFK from somewhere already"
            )

        dfk = parsl.load(module.config)

        yield

        if (parsl.dfk() != dfk):
            raise ValueError("DFK changed unexpectedly during test")
        dfk.cleanup()
        parsl.clear()
    else:
        yield
Exemplo n.º 13
0
def test_total_latency(n=100):
    d = {}

    dfk = parsl.dfk()
    name = list(dfk.executors.keys())[0]

    start = time.time()
    double(99999).result()
    delta = time.time() - start
    print("[{}] Priming done in {:=10.3f}ms".format(name, delta * 1000))

    launch_latency = []
    for i in range(0, n):
        start = time.time()
        d[i] = double(i)
        d[i].result()
        delta = time.time() - start
        launch_latency.append(delta * 1000)

    print("[{}] Latency min:{:=10.3f}ms max:{:=10.3f}ms avg:{:=10.3f}ms".format(name,
                                                                                min(launch_latency),
                                                                                max(launch_latency),
                                                                                sum(launch_latency) / len(launch_latency)))

    with open("latency.{}.pkl".format(name), 'wb') as f:
        pickle.dump(launch_latency, f)
    print("[TEST STATMS] test_parallel_for [SUCCESS]")
    return d
Exemplo n.º 14
0
def test_that_it_fails():
    x = dummy()
    failed = False
    try:
        x.result()
    except Exception as ex:
        print(ex)
        failed = True
    if not failed:
        raise Exception("The app somehow ran without a valid worker")

    assert parsl.dfk().config.executors[0]._executor_bad_state.is_set()

    # htex needs shutting down explicitly because dfk.cleanup() will not
    # do that, as it is in bad state
    parsl.dfk().config.executors[0].shutdown()
Exemplo n.º 15
0
def test_platform(n=2, sleep_dur=10):
    """ This should sleep to make sure that concurrent apps will go to different workers
    on different nodes.
    """
    config = fresh_config()
    if config.executors[0].label == "htex_local":
        return

    parsl.load(fresh_config())

    dfk = parsl.dfk()
    name = list(dfk.executors.keys())[0]
    print("Trying to get executor : ", name)

    x = [platform(sleep=1) for i in range(2)]
    print([i.result() for i in x])

    print("Executor : ", dfk.executors[name])
    print("Connected   : ", dfk.executors[name].connected_workers)
    print("Outstanding : ", dfk.executors[name].outstanding)

    d = []
    for i in range(0, n):
        x = platform(sleep=sleep_dur)
        d.append(x)

    pinfo = set([i.result() for i in d])
    assert len(pinfo) == 2, "Expected two nodes, instead got {}".format(pinfo)

    print("Test passed")

    dfk.cleanup()
    parsl.clear()
    return True
Exemplo n.º 16
0
def test_manager_fail(n=2, dur=10):
    """ Test manager failure due to intermittent n/w loss.
    Run parsl on laptop, start the worker on a remote node.
    Once connected and task is executed, kill the network
    confirm that the interchange does not error out.
    """
    start = time.time()

    # Prime a worker
    double(5).result()

    # Launch the slow task to a failing node.
    x = platform(sleep=dur)
    x.result()

    # At this point we know that the worker is connected.
    # Now we need to kill the network to mimic a n/w failure.
    time.sleep(0.1)
    print('*' * 80)
    print("Manually kill the network to create a n/w failure")
    print("We should see suppression of the failure")
    print('*' * 80)

    print("Duration : {0}s".format(time.time() - start))
    print("[TEST STATUS] test_parallel_for [SUCCESS]")
    dfk = parsl.dfk()
    for i in range(200):
        print(dfk.executors['htex_local'].connected_workers)
        time.sleep(1)

    return True
Exemplo n.º 17
0
def test_platform(n=2, sleep_dur=10):
    """This should sleep to make sure that concurrent apps will go to
    different workers
    """

    dfk = parsl.dfk()
    name = list(dfk.executors.keys())[0]
    print("Trying to get executor : ", name)

    x = platform(sleep=0)
    print(x.result())

    print("Executor : ", dfk.executors[name])
    print("Connected   : ", dfk.executors[name].connected_workers)
    print("Outstanding : ", dfk.executors[name].outstanding)

    d = []
    for i in range(0, n):
        x = platform(sleep=sleep_dur)
        d.append(x)

    pinfo = set([i.result()for i in d])
    assert len(pinfo) == 2, "Expected two nodes, instead got {}".format(pinfo)

    print("Test passed")
    return True
Exemplo n.º 18
0
def test_resource(n=2):
    executors = parsl.dfk().executors
    executor = None
    for label in executors:
        if label != '_parsl_internal':
            executor = executors[label]
            break

    # Specify incorrect number of resources
    spec = {'cores': 2, 'memory': 1000}
    fut = double(n, parsl_resource_specification=spec)
    try:
        fut.result()
    except UnsupportedFeatureError:
        assert not isinstance(executor, WorkQueueExecutor)
    except Exception as e:
        assert isinstance(e, ExecutorError)

    # Specify resources with wrong types
    # 'cpus' is incorrect, should be 'cores'
    spec = {'cpus': 2, 'memory': 1000, 'disk': 1000}
    fut = double(n, parsl_resource_specification=spec)
    try:
        fut.result()
    except UnsupportedFeatureError:
        assert not isinstance(executor, WorkQueueExecutor)
    except Exception as e:
        assert isinstance(e, ExecutorError)
Exemplo n.º 19
0
def test_platform(n=2):
    # sync
    logger = logging.getLogger("parsl.dataflow.dflow")
    skip_tags = ['Task', 'dependencies']
    for skip in skip_tags:
        skip_filter = SkipTasksFilter(skip)
        logger.addFilter(skip_filter)

    x = platform(sleep=0)
    print(x.result())

    d = []
    for i in range(0, n):
        x = platform(sleep=5)
        d.append(x)

    print(set([i.result() for i in d]))

    dfk = parsl.dfk()
    dfk.cleanup()

    with open("{}/parsl.log".format(dfk.run_dir)) as f:

        for line in f.readlines():
            if any(skip in line for skip in skip_tags):
                raise Exception("Logline {} contains a skip tag".format(line))
    return True
Exemplo n.º 20
0
def test_platform(n=2, sleep=1):

    dfk = parsl.dfk()
    # sync
    x = platform(sleep=0)
    print(x.result())

    name = list(dfk.executors.keys())[0]
    print("Trying to get executor : ", name)

    print("Executor : ", dfk.executors[name])
    print("Connected   : ", dfk.executors[name].connected_workers)
    print("Outstanding : ", dfk.executors[name].outstanding)
    d = []
    for i in range(0, n):
        x = platform(sleep=sleep)
        d.append(x)

    print("Connected   : ", dfk.executors[name].connected_workers)
    print("Outstanding : ", dfk.executors[name].outstanding)

    print(set([i.result() for i in d]))

    print("Connected   : ", dfk.executors[name].connected_workers)
    print("Outstanding : ", dfk.executors[name].outstanding)

    return True
Exemplo n.º 21
0
def load_dfk(config):
    """Load the dfk before running a test.

    The special path `local` indicates that whatever configuration is loaded
    locally in the test should not be replaced. Otherwise, it is expected that
    the supplied file contains a dictionary called `config`, which will be
    loaded before the test runs.

    Args:
        config (str) : path to config to load (this is a parameterized pytest fixture)
    """
    if config != 'local':
        spec = importlib.util.spec_from_file_location('', config)
        try:
            module = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(module)
            module.config.run_dir = get_rundir()  # Give unique rundir; needed running with -n=X where X > 1.

            if DataFlowKernelLoader._dfk is not None:
                raise ValueError("DFK didn't start as None - there was a DFK from somewhere already")

            parsl.clear()
            dfk = parsl.load(module.config)

            yield

            if(parsl.dfk() != dfk):
                raise ValueError("DFK changed unexpectedly during test")
            dfk.cleanup()
            parsl.clear()
        except KeyError:
            pytest.skip('options in user_opts.py not configured for {}'.format(config))
    else:
        yield
Exemplo n.º 22
0
 def _cleanup(self):
     """Close out any resources needed by the method server"""
     # Wait until all tasks have finished
     dfk = parsl.dfk()
     dfk.wait_for_current_tasks()
     logger.info(
         f"All tasks have completed for {self.__class__.__name__} on {self.ident}"
     )
Exemplo n.º 23
0
def setup(release, working_dir=None):
    _, _, sandbox = Sandbox(release=release).package()
    config = parsl.dfk().config
    for executor in config.executors:
        if isinstance(executor, ThreadPoolExecutor):
            shutil.copy(sandbox, os.path.join(os.environ['HOME'], '.parslcms'))
        else:
            executor.provider.channel.push_file(sandbox, '.parslcms')
Exemplo n.º 24
0
def test_garbage_collect():
    """ Launches an app with a dependency and waits till it's done and asserts that
    the internal refs were wiped
    """
    x = slow_double(slow_double(10))

    if x.done() is False:
        assert parsl.dfk().tasks[x.tid][
            'app_fu'] == x, "Tasks table should have app_fu ref before done"

    x.result()
    if parsl.dfk().checkpoint_mode is not None:
        # We explicit call checkpoint if checkpoint_mode is enabled covering
        # cases like manual/periodic where checkpointing may be deferred.
        parsl.dfk().checkpoint()

    time.sleep(0.2)  # Give enough time for task wipes to work
    assert x.tid not in parsl.dfk(
    ).tasks, "Task record should be wiped after task completion"
def test_implicit_staging_https_additional_executor():
    """Test implicit staging for an ftp file

    Create a remote input file (https) that points to unsorted.txt.
    """

    # unsorted_file = File('https://testbed.petrel.host/test/public/unsorted.txt')
    unsorted_file = File('https://gist.githubusercontent.com/yadudoc/7f21dd15e64a421990a46766bfa5359c/'
                         'raw/7fe04978ea44f807088c349f6ecb0f6ee350ec49/unsorted.txt')

    # Create a local file for output data
    sorted_file = File('sorted.txt')

    other_executor = parsl.ThreadPoolExecutor(label='other')

    parsl.dfk().add_executors([other_executor])

    f = sort_strings_additional_executor(inputs=[unsorted_file], outputs=[sorted_file])
    f.result()
Exemplo n.º 26
0
def test_scale_out():
    dfk = parsl.dfk()

    # Since we have init_blocks = 0, at this point we should have 0 managers
    print("Executor : ", dfk.executors['htex_local'])
    print("Before")
    print("Managers   : ", dfk.executors['htex_local'].connected_managers)
    print("Outstanding: \n", dfk.executors['htex_local'].outstanding)
    assert len(dfk.executors['htex_local'].connected_managers
               ) == 0, "Expected 0 managers at start"

    fus = [sleeper(i) for i in [3, 3, 25, 25, 50]]

    for i in range(2):
        fus[i].result()

    # At this point, since we have 1 task already processed we should have atleast 1 manager
    print("Between")
    print("Managers   : ", dfk.executors['htex_local'].connected_managers)
    print("Outstanding: \n", dfk.executors['htex_local'].outstanding)
    assert len(dfk.executors['htex_local'].connected_managers
               ) == 5, "Expected 5 managers once tasks are running"

    time.sleep(1)
    print("running")
    print("Managers   : ", dfk.executors['htex_local'].connected_managers)
    print("Outstanding: \n", dfk.executors['htex_local'].outstanding)
    assert len(dfk.executors['htex_local'].connected_managers
               ) == 5, "Expected 5 managers 3 seconds after 2 tasks finished"

    time.sleep(21)
    print("Middle")
    print("Managers   : ", dfk.executors['htex_local'].connected_managers)
    print("Outstanding: \n", dfk.executors['htex_local'].outstanding)
    assert len(dfk.executors['htex_local'].connected_managers
               ) == 3, "Expected 3 managers before cleaning up"

    for i in range(2, 4):
        fus[i].result()
    time.sleep(21)
    print("Finalizing result")
    print("Managers   : ", dfk.executors['htex_local'].connected_managers)
    print("Outstanding: \n", dfk.executors['htex_local'].outstanding)
    assert len(
        dfk.executors['htex_local'].connected_managers
    ) == 2, "Expected 2 managers before finishing, lower bound by min_blocks"

    [x.result() for x in fus]
    print("Cleaning")
    print("Managers   : ", dfk.executors['htex_local'].connected_managers)
    print("Outstanding: \n", dfk.executors['htex_local'].outstanding)
    time.sleep(21)
    assert len(
        dfk.executors['htex_local'].connected_managers
    ) == 2, "Expected 2 managers when no tasks, lower bound by min_blocks"
Exemplo n.º 27
0
def test_1316_local_path_on_execution_side_sp2():
    """This test demonstrates the ability of a StagingProvider to set the
    local_path of a File on the execution side, but that the change does not
    modify the local_path of the corresponding submit side File, even when
    running in a single python process.
    """

    config = Config(executors=[ThreadPoolExecutor(storage_access=[SP2()])])

    file = File("sp2://test")

    parsl.load(config)
    p = observe_input_local_path(file).result()

    assert p == "./test1.tmp", "File object on the execution side gets the local_path set by the staging provider"

    assert not file.local_path, "The local_path on the submit side should not be set"

    parsl.dfk().cleanup()
    parsl.clear()
Exemplo n.º 28
0
def test_1316_local_path_setting_preserves_dependency_sp2():
    config = Config(executors=[ThreadPoolExecutor(storage_access=[SP2()])])

    file = File("sp2://test")

    parsl.load(config)

    wc_app_future = wait_and_create(outputs=[file])
    data_future = wc_app_future.outputs[0]

    p = observe_input_local_path(data_future).result()

    assert wc_app_future.done(), "wait_and_create should finish before observe_input_local_path finishes"

    assert p == "./test1.tmp", "File object on the execution side gets the local_path set by the staging provider"

    assert not file.local_path, "The local_path on the submit side should not be set"

    parsl.dfk().cleanup()
    parsl.clear()
Exemplo n.º 29
0
def test_row_counts():
    # this is imported here rather than at module level because
    # it isn't available in a plain parsl install, so this module
    # would otherwise fail to import and break even a basic test
    # run.
    import sqlalchemy
    from parsl.tests.configs.htex_local_alternate import fresh_config

    if os.path.exists("monitoring.db"):
        logger.info("Monitoring database already exists - deleting")
        os.remove("monitoring.db")

    logger.info("loading parsl")
    parsl.load(fresh_config())

    logger.info("invoking and waiting for result")
    assert this_app().result() == 5

    logger.info("cleaning up parsl")
    parsl.dfk().cleanup()
    parsl.clear()

    # at this point, we should find one row in the monitoring database.

    logger.info("checking database content")
    engine = sqlalchemy.create_engine("sqlite:///monitoring.db")
    with engine.begin() as connection:

        result = connection.execute("SELECT COUNT(*) FROM workflow")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM task")
        (c, ) = result.first()
        assert c == 1

        result = connection.execute("SELECT COUNT(*) FROM try")
        (c, ) = result.first()
        assert c == 1

    logger.info("all done")
Exemplo n.º 30
0
def test_lazy_behavior():
    """Testing that lazy errors work"""

    config = fresh_config()
    parsl.load(config)

    @python_app
    def divide(a, b):
        return a / b

    futures = []
    for i in range(0, 10):
        futures.append(divide(10, 0))

    for f in futures:
        assert isinstance(f.exception(), ZeroDivisionError)
        assert f.done()

    parsl.dfk().cleanup()
    parsl.clear()
    return