def test_dmlscheduler_arbitrary_scheduling(scheduler):
    """
    Manually schedule events and check that all jobs are completed.
    """
    model_json = make_model_json()
    first = make_initialize_job(model_json)
    second = make_initialize_job(model_json)
    scheduler.add_job(first)
    scheduler.add_job(second)
    while len(scheduler.processed) == 0:
        scheduler.runners_run_next_jobs()
    third = make_initialize_job(model_json)
    fourth = make_initialize_job(model_json)
    scheduler.add_job(third)
    scheduler.add_job(fourth)
    while len(scheduler.processed) < 4:
        scheduler.runners_run_next_jobs()
    fifth = make_initialize_job(model_json)
    scheduler.add_job(fifth)
    while len(scheduler.processed) < 5:
        scheduler.runners_run_next_jobs()
    assert len(scheduler.processed) == 5, \
        "Jobs {} failed/not completed in time!".format([
        result.job.job_type for result in scheduler.processed])
    while scheduler.processed:
        output = scheduler.processed.pop(0)
        initial_weights = output.results['weights']
        assert type(initial_weights) == list
        assert type(initial_weights[0]) == np.ndarray
示例#2
0
def test_communication_manager_can_inform_new_job_to_the_optimizer(
        config_manager, ipfs_client):
    """
    Ensures that Communication Manager can tell the optimizer of something,
    and that the job will transfer correctly.
    """
    communication_manager = CommunicationManager()
    scheduler = DMLScheduler(config_manager)
    communication_manager.configure(scheduler)
    scheduler.configure(communication_manager, ipfs_client)
    true_job = make_initialize_job(make_model_json())
    true_job.hyperparams['epochs'] = 10
    true_job.hyperparams['batch_size'] = 128
    true_job.hyperparams['split'] = .05
    serialized_job = serialize_job(true_job)
    new_session_event = {
        TxEnum.KEY.name: None,
        TxEnum.CONTENT.name: {
            "optimizer_params": {},
            "serialized_job": serialized_job
        }
    }
    communication_manager.inform(MessageEventTypes.NEW_SESSION.name,
                                 new_session_event)
    optimizer_job = communication_manager.optimizer.job
    assert optimizer_job.weights == true_job.weights
    assert optimizer_job.serialized_model == true_job.serialized_model
    assert optimizer_job.framework_type == true_job.framework_type
    assert optimizer_job.hyperparams == true_job.hyperparams
    assert optimizer_job.label_column_name == true_job.label_column_name
示例#3
0
def split_dmlresult_obj(config_manager, mnist_filepath):
    model_json = make_model_json()
    runner = DMLRunner(config_manager)
    split_job = make_split_job(
        model_json,
        mnist_filepath,
    )
    split_job.hyperparams['split'] = 0.75
    job_results = runner.run_job(split_job)
    return job_results
示例#4
0
def train_dmlresult_obj(config_manager, split_dmlresult_obj,
                        init_dmlresult_obj):
    runner = DMLRunner(config_manager)
    initial_weights = init_dmlresult_obj.results['weights']
    session_filepath = split_dmlresult_obj.results['session_filepath']
    datapoint_count = split_dmlresult_obj.results['datapoint_count']
    train_job = make_train_job(make_model_json(), initial_weights,
                               make_hyperparams(split=1), session_filepath,
                               datapoint_count)
    result = runner.run_job(train_job)
    return result
示例#5
0
def split_dmlresult_obj(config_manager, mnist_uuid, dataset_manager):
    model_json = make_model_json()
    runner = DMLRunner(config_manager)
    mnist_filepath = dataset_manager.get_mappings()[mnist_uuid]
    split_job = make_split_job(
                            model_json, 
                            mnist_filepath,
                        )
    split_job.hyperparams['split'] = 0.75
    job_results = runner.run_job(split_job)
    print(job_results)
    return job_results
def test_dmlscheduler_sanity(scheduler):
    """
    Check that the scheduling/running functionality is maintained.
    """
    model_json = make_model_json()
    initialize_job = make_initialize_job(model_json)
    scheduler.add_job(initialize_job)
    scheduler.runners_run_next_jobs()
    timeout = time.time
    while not scheduler.processed:
        time.sleep(0.1)
        scheduler.runners_run_next_jobs()
    output = scheduler.processed.pop(0)
    initial_weights = output.results['weights']
    assert type(initial_weights) == list
    assert type(initial_weights[0]) == np.ndarray
def test_dmlrunner_transform_and_split( \
        config_manager, small_filepath):
    model_json = make_model_json()
    runner = DMLRunner(config_manager)
    split_job = make_split_job(model_json, small_filepath)
    split_job.hyperparams['split'] = 0.75
    job_results = runner.run_job(split_job)
    session_filepath = job_results.results['session_filepath']
    assert os.path.isdir(session_filepath), \
        "Session folder does not exist!"
    train_filepath = os.path.join(session_filepath, 'train.csv')
    test_filepath = os.path.join(session_filepath, 'test.csv')
    assert os.path.isfile(train_filepath) and os.path.isfile(test_filepath), \
        "Training and test set not created!"
    train = pd.read_csv(train_filepath)
    test = pd.read_csv(test_filepath)
    assert len(train) == 6 and len(test) == 2, \
        "Train test split was not performed correctly."
def test_dmlscheduler_cron(scheduler):
    """
    Test that the scheduler's cron works.
    """
    model_json = make_model_json()
    m = 2
    for _ in range(m):
        initialize_job = make_initialize_job(model_json)
        scheduler.add_job(initialize_job)
    scheduler.start_cron(period_in_mins=0.01)
    timeout = time.time() + 6
    while time.time() < timeout and len(scheduler.processed) != m:
        time.sleep(1)
    scheduler.stop_cron()
    assert len(scheduler.processed) == m
    while scheduler.processed:
        output = scheduler.processed.pop(0)
        initial_weights = output.results['weights']
        assert type(initial_weights) == list
        assert type(initial_weights[0]) == np.ndarray
def test_dmlrunner_validate_job_returns_stats( \
        config_manager, mnist_filepath, train_dmlresult_obj):
    model_json = make_model_json()
    hyperparams = make_hyperparams()
    runner = DMLRunner(config_manager)
    job_results = train_dmlresult_obj
    session_filepath = job_results.job.session_filepath
    datapoint_count = job_results.job.datapoint_count
    result = train_dmlresult_obj
    assert result.status == 'successful'
    results = result.results
    new_weights = results['weights']
    omega = results['omega']
    train_stats = results['train_stats']
    hyperparams['split'] = 1 - hyperparams['split']
    validate_job = make_validate_job(model_json, new_weights, hyperparams,
                                     session_filepath, datapoint_count)
    result = runner.run_job(validate_job)
    assert result.status == 'successful'
    results = result.results
    val_stats = results['val_stats']
    assert result.job.job_type is JobTypes.JOB_VAL.name
    assert type(val_stats) == dict
def test_dmlrunner_same_train_job_with_split_1( \
        config_manager, mnist_filepath):
    model_json = make_model_json()
    hyperparams = make_hyperparams(split=1)
    runner = DMLRunner(config_manager)
    initialize_job = make_initialize_job(model_json)
    initial_weights = runner.run_job(initialize_job).results['weights']
    split_job = make_split_job(model_json, mnist_filepath)
    job_results = runner.run_job(split_job)
    session_filepath = job_results.results['session_filepath']
    datapoint_count = job_results.results['datapoint_count']
    train_job = make_train_job(model_json, initial_weights, hyperparams,
                               session_filepath, datapoint_count)
    result = runner.run_job(train_job)
    assert result.status == 'successful'
    results = result.results
    new_weights = results['weights']
    omega = results['omega']
    train_stats = results['train_stats']
    assert result.job.job_type is JobTypes.JOB_TRAIN.name
    assert type(new_weights) == list
    assert type(new_weights[0]) == np.ndarray
    assert type(omega) == int or type(omega) == float
    assert type(train_stats) == dict
示例#11
0
def init_dmlresult_obj(config_manager, small_filepath):
    runner = DMLRunner(config_manager)
    initialize_job = make_initialize_job(make_model_json(), small_filepath)
    result = runner.run_job(initialize_job)
    return result
示例#12
0
def init_dmlresult_obj(config_manager, small_uuid, dataset_manager):
    runner = DMLRunner(config_manager)
    small_filepath = dataset_manager.get_mappings()[small_uuid]
    initialize_job = make_initialize_job(make_model_json(), small_filepath)
    result = runner.run_job(initialize_job)
    return result