Exemplo n.º 1
0
def test_dmlscheduler_arbitrary_scheduling(scheduler):
    """
    Manually schedule events and check that all jobs are completed.
    """
    first = make_initialize_job()
    second = make_initialize_job()
    scheduler.add_job(first)
    scheduler.add_job(second)
    while len(scheduler.processed) == 0:
        scheduler.runners_run_next_jobs()
    third = make_initialize_job()
    fourth = make_initialize_job()
    scheduler.add_job(third)
    scheduler.add_job(fourth)
    while len(scheduler.processed) < 4:
        scheduler.runners_run_next_jobs()
    fifth = make_initialize_job()
    scheduler.add_job(fifth)
    while len(scheduler.processed) < 5:
        scheduler.runners_run_next_jobs()
    assert len(scheduler.processed) == 5, \
        "Jobs {} failed/not completed in time!".format([
        result.job.job_type for result in scheduler.processed])
    while scheduler.processed:
        output = scheduler.processed.pop(0)
        initial_weights = output.results['weights']
        assert type(initial_weights) == list
        assert type(initial_weights[0]) == np.ndarray
Exemplo n.º 2
0
def test_dmlrunner_uniform_initialization(runner):
    initialize_job = make_initialize_job(small_filepath)
    result = runner.run_job(initialize_job).results
    first_weights = result['weights']
    initialize_job = make_initialize_job(small_filepath)
    result = runner.run_job(initialize_job).results
    second_weights = result['weights']
    assert all(np.allclose(arr1, arr2) for arr1,arr2 in zip(first_weights, second_weights))
Exemplo n.º 3
0
def test_communication_manager_can_inform_new_job_to_the_optimizer(
        config_manager, ipfs_client):
    """
    Ensures that Communication Manager can tell the optimizer of something,
    and that the job will transfer correctly.
    """
    communication_manager = CommunicationManager()
    scheduler = DMLScheduler(config_manager)
    communication_manager.configure(scheduler)
    scheduler.configure(communication_manager, ipfs_client)
    true_job = make_initialize_job(make_model_json())
    true_job.hyperparams['epochs'] = 10
    true_job.hyperparams['batch_size'] = 128
    true_job.hyperparams['split'] = .05
    serialized_job = serialize_job(true_job)
    new_session_event = {
        TxEnum.KEY.name: None,
        TxEnum.CONTENT.name: {
            "optimizer_params": {},
            "serialized_job": serialized_job
        }
    }
    communication_manager.inform(MessageEventTypes.NEW_SESSION.name,
                                 new_session_event)
    optimizer_job = communication_manager.optimizer.job
    assert optimizer_job.weights == true_job.weights
    assert optimizer_job.serialized_model == true_job.serialized_model
    assert optimizer_job.framework_type == true_job.framework_type
    assert optimizer_job.hyperparams == true_job.hyperparams
    assert optimizer_job.label_column_name == true_job.label_column_name
Exemplo n.º 4
0
def test_dmlrunner_same_train_job_with_split_1( \
        runner, mnist_filepath):
    split_job = make_split_job(mnist_filepath)
    split_job.hyperparams['split'] = 1
    job_results = runner.run_job(split_job)
    session_filepath = job_results.results['session_filepath']
    datapoint_count = job_results.results['datapoint_count']
    initialize_job = make_initialize_job()
    initial_weights = runner.run_job(initialize_job).results['weights']
    train_job = make_train_job(
                    initial_weights, 
                    session_filepath,
                    datapoint_count
                )
    result = runner.run_job(train_job)
    assert result.status == 'successful'
    results = result.results
    new_weights = results['weights']
    omega = results['omega']
    train_stats = results['train_stats']
    assert result.job.job_type is JobTypes.JOB_TRAIN.name
    assert type(new_weights) == list
    assert type(new_weights[0]) == np.ndarray
    assert type(omega) == int or type(omega) == float
    assert type(train_stats) == dict
Exemplo n.º 5
0
def test_dmlscheduler_sanity(scheduler):
    """
    Check that the scheduling/running functionality is maintained.
    """
    initialize_job = make_initialize_job()
    scheduler.add_job(initialize_job)
    scheduler.runners_run_next_jobs()
    while not scheduler.processed:
        time.sleep(0.1)
        scheduler.runners_run_next_jobs()
    output = scheduler.processed.pop(0)
    initial_weights = output.results['weights']
    assert type(initial_weights) == list
    assert type(initial_weights[0]) == np.ndarray
Exemplo n.º 6
0
def test_dmlscheduler_cron(scheduler):
    """
    Test that the scheduler's cron works.
    """
    m = 2
    for _ in range(m):
        initialize_job = make_initialize_job()
        scheduler.add_job(initialize_job)
    scheduler.start_cron(period_in_mins = 0.01)
    timeout = time.time() + 6
    while time.time() < timeout and len(scheduler.processed) != m:
        time.sleep(1)
    scheduler.stop_cron()
    assert len(scheduler.processed) == m
    while scheduler.processed:
        output = scheduler.processed.pop(0)
        initial_weights = output.results['weights']
        assert type(initial_weights) == list
        assert type(initial_weights[0]) == np.ndarray
Exemplo n.º 7
0
def test_dmlrunner_same_train_job_with_split_1( \
        config_manager, mnist_filepath):
    model_json = make_model_json()
    hyperparams = make_hyperparams(split=1)
    runner = DMLRunner(config_manager)
    initialize_job = make_initialize_job(model_json)
    initial_weights = runner.run_job(initialize_job).results['weights']
    split_job = make_split_job(model_json, mnist_filepath)
    job_results = runner.run_job(split_job)
    session_filepath = job_results.results['session_filepath']
    datapoint_count = job_results.results['datapoint_count']
    train_job = make_train_job(model_json, initial_weights, hyperparams,
                               session_filepath, datapoint_count)
    result = runner.run_job(train_job)
    assert result.status == 'successful'
    results = result.results
    new_weights = results['weights']
    omega = results['omega']
    train_stats = results['train_stats']
    assert result.job.job_type is JobTypes.JOB_TRAIN.name
    assert type(new_weights) == list
    assert type(new_weights[0]) == np.ndarray
    assert type(omega) == int or type(omega) == float
    assert type(train_stats) == dict
Exemplo n.º 8
0
def init_dmlresult_obj(config_manager, small_filepath):
    runner = DMLRunner(config_manager)
    initialize_job = make_initialize_job(make_model_json(), small_filepath)
    result = runner.run_job(initialize_job)
    return result
Exemplo n.º 9
0
def init_dmlresult_obj(runner, small_filepath):
    initialize_job = make_initialize_job(small_filepath)
    result = runner.run_job(initialize_job)
    return result
Exemplo n.º 10
0
def init_dmlresult_obj(config_manager, small_uuid, dataset_manager):
    runner = DMLRunner(config_manager)
    small_filepath = dataset_manager.get_mappings()[small_uuid]
    initialize_job = make_initialize_job(make_model_json(), small_filepath)
    result = runner.run_job(initialize_job)
    return result