Exemplo n.º 1
0
def test_noise_files(fake_input_dir):
    train_files = glob_train_files(fake_input_dir)
    adapt_files = glob_adapt_files(fake_input_dir, 10000)
    open(os.path.join(fake_input_dir, 'train/Xm1/', 'Xfake.h5'), 'w').close()
    open(os.path.join(fake_input_dir, 'train/', 'Xfake.h5'), 'w').close()
    open(os.path.join(fake_input_dir, 'adapt/', 'Xfake.h5'), 'w').close()
    new_train_files = glob_train_files(fake_input_dir)
    new_adapt_files = glob_adapt_files(fake_input_dir, 10000)
    assert len(train_files) == len(new_train_files)
    assert len(adapt_files) == len(new_adapt_files)
Exemplo n.º 2
0
def test_X_change_shape(tmpdir, fake_input_dir, output_dir, code_dir):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    test_output_dir = os.path.join(str(tmpdir), 'test_output')
    tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        if np.random.rand() <= 0.5:
            keep_size = X.shape[1] - 300
            X = X[:, :keep_size]
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    run_predict_on_test_dataset_and_output_score('test_X_change_shape',
                                                 code_dir,
                                                 fake_input_dir,
                                                 test_input_dir,
                                                 test_output_dir,
                                                 tests_log_path,
                                                 run_compute_score=False)
Exemplo n.º 3
0
def test_empty_X(tmpdir, fake_input_dir, output_dir, code_dir):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    test_output_dir = os.path.join(str(tmpdir), 'test_output')
    tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        if np.random.rand() <= 0.1:
            X = np.array([])
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    run_predict_on_test_dataset_and_output_score('test_empty_X_10%', code_dir,
                                                 fake_input_dir,
                                                 test_input_dir,
                                                 test_output_dir,
                                                 tests_log_path)
Exemplo n.º 4
0
def test_no_in_h5(fake_input_dir, tmpdir, hist_length=64 * 12 * 12):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)

        os.unlink(data_file)
        with h5py.File(data_file, driver='core', mode='w') as h5file:
            group = h5file
            group = group.create_group('X')
            group = group.create_group('value')
            group_X = group.create_group('X')
            group_X.create_dataset('value', data=X)

    # Evaluation
    cache_X = None
    cache_t = None
    cache_X_corrupted = None
    cache_t_corrupted = None
    for eval_step in range(-1, 30, 1):
        original_X, last_time_step, cache_X, cache_t = load_prediction_matrix(
            fake_input_dir, fake_input_dir, fake_input_dir, fake_input_dir,
            eval_step, eval_step - 1, hist_length, cache_X, cache_t)
        corrupted_X, last_time_step_2, cache_X_corrupted, cache_t_corrupted = load_prediction_matrix(
            test_input_dir, test_input_dir, test_input_dir, test_input_dir,
            eval_step, eval_step - 1, hist_length, cache_X_corrupted,
            cache_t_corrupted)
        np.testing.assert_array_equal(original_X, corrupted_X)
Exemplo n.º 5
0
def test_load_predict_matrixes_corrupted_t(fake_input_dir,
                                           tmpdir,
                                           hist_length=64 * 12 * 12):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        t = corrupt_t(t)
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    for eval_step in range(-1, 60, 5):
        original_X = load_data_matrix(fake_input_dir,
                                      eval_step=eval_step,
                                      hist_length=hist_length)
        X_with_corrupted_t = load_data_matrix(test_input_dir,
                                              eval_step=eval_step,
                                              hist_length=hist_length)
        np.testing.assert_array_equal(original_X, X_with_corrupted_t)
Exemplo n.º 6
0
def test_load_full_cache_matrixes_corrupted_t(fake_input_dir,
                                              tmpdir,
                                              hist_length=64 * 12 * 12):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        t = corrupt_t(t)
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    for eval_step in range(-1, 60, 5):
        original_X, original_t = load_full_cache_matrixes(
            fake_input_dir, eval_step=eval_step, steps_needed=hist_length)
        X_with_corrupted_t, corrupted_t = load_full_cache_matrixes(
            test_input_dir, eval_step=eval_step, steps_needed=hist_length)
        assert len(corrupted_t) == len(original_t)
        np.testing.assert_array_equal(original_X, X_with_corrupted_t)
        np.testing.assert_array_equal(original_t[~np.isnan(corrupted_t)],
                                      corrupted_t[~np.isnan(corrupted_t)])
        np.testing.assert_array_equal(original_t, fillna_t(corrupted_t))
Exemplo n.º 7
0
def test_read_ndim_2(fake_input_dir):
    train_files = glob_train_files(fake_input_dir)
    adapt_files = glob_adapt_files(fake_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        print(X.shape)
        # Corruption
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)
        X_copy, t_copy = load_h5_input_file(data_file)
        assert len(X_copy.shape) == 2
Exemplo n.º 8
0
def test_fill_timesteps_with_nan(tmpdir, fake_input_dir, output_dir, code_dir):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    test_output_dir = os.path.join(str(tmpdir), 'test_output')
    tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for adapt_file in adapt_files:
        X, t = load_h5_input_file(adapt_file)
        # Corruption
        X, t = fill_with_nan(X, t, None)
        os.unlink(adapt_file)
        save_h5_input_file(adapt_file, X, t, output_dim=2)

    # Evaluation
    run_predict_on_test_dataset_and_output_score(
        'test_fill_timesteps_with_nan', code_dir, fake_input_dir,
        test_input_dir, test_output_dir, tests_log_path)
Exemplo n.º 9
0
def test_X_change_shape_no_predict(fake_input_dir,
                                   tmpdir,
                                   hist_length=64 * 12 * 12):
    # Transformation
    train_files = glob_train_files(fake_input_dir)
    adapt_files = glob_adapt_files(fake_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        if np.random.rand() <= 0.5:
            keep_size = X.shape[1] - 300
            X = X[:, :keep_size]
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    cache_X = None
    cache_t = None
    for eval_step in range(-1, 30, 1):
        original_X, last_time_step, cache_X, cache_t = load_prediction_matrix(
            fake_input_dir, fake_input_dir, fake_input_dir, fake_input_dir,
            eval_step, eval_step - 1, hist_length, cache_X, cache_t)
Exemplo n.º 10
0
def test_sequential_load_corrupted_t(fake_input_dir,
                                     tmpdir,
                                     corrupt_fn_train,
                                     corrupt_fn_adapt,
                                     hist_length=64 * 12 * 12):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        if data_file in train_files:
            X, t = corrupt_fn_train(X, t)
        else:
            X, t = corrupt_fn_adapt(X, t)
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    cache_X = None
    cache_t = None
    cache_X_corrupted = None
    cache_t_corrupted = None
    for eval_step in range(-1, 30, 1):
        original_X, last_time_step, cache_X, cache_t = load_prediction_matrix(
            fake_input_dir, fake_input_dir, fake_input_dir, fake_input_dir,
            eval_step, eval_step - 1, hist_length, cache_X, cache_t)
        corrupted_X, last_time_step, cache_X_corrupted, cache_t_corrupted = load_prediction_matrix(
            test_input_dir, test_input_dir, test_input_dir, test_input_dir,
            eval_step, eval_step - 1, hist_length, cache_X_corrupted,
            cache_t_corrupted)
        np.testing.assert_array_equal(original_X, corrupted_X)
Exemplo n.º 11
0
def test_remove_half_lines(tmpdir, fake_input_dir, output_dir, code_dir):
    # Configuration
    test_input_dir = os.path.join(str(tmpdir), 'test_input')
    test_output_dir = os.path.join(str(tmpdir), 'test_output')
    tests_log_path = os.path.join(code_dir, 'tests_scores_log.txt')
    shutil.copytree(fake_input_dir, test_input_dir)

    # Transformation
    train_files = glob_train_files(test_input_dir)
    adapt_files = glob_adapt_files(test_input_dir, 10000)

    for data_file in adapt_files + train_files:
        X, t = load_h5_input_file(data_file)
        # Corruption
        X, t = remove_half_lines(X, t)
        os.unlink(data_file)
        save_h5_input_file(data_file, X, t, output_dim=2)

    # Evaluation
    run_predict_on_test_dataset_and_output_score('test_remove_half_lines',
                                                 code_dir, test_input_dir,
                                                 test_input_dir,
                                                 test_output_dir,
                                                 tests_log_path)