Пример #1
0
def test_new_img_like_side_effect():
    img1 = Nifti1Image(np.ones((2, 2, 2, 2)), affine=np.eye(4))
    hash1 = joblib.hash(img1)
    new_img_like(img1, np.ones((2, 2, 2, 2)), img1.affine.copy(),
                 copy_header=True)
    hash2 = joblib.hash(img1)
    assert_equal(hash1, hash2)
Пример #2
0
def test_joblib_cache():
    if not LooseVersion(nibabel.__version__) > LooseVersion('1.1.0'):
        # Old nibabel do not pickle
        raise SkipTest
    from sklearn.externals.joblib import hash, Memory
    mask = np.zeros((40, 40, 40))
    mask[20, 20, 20] = 1
    mask_img = Nifti1Image(mask, np.eye(4))

    with testing.write_tmp_imgs(mask_img, create_files=True)\
            as filename:
        masker = NiftiMasker(mask_img=filename)
        masker.fit()
        mask_hash = hash(masker.mask_img_)
        masker.mask_img_.get_data()
        assert_true(mask_hash == hash(masker.mask_img_))

    # Test a tricky issue with memmapped joblib.memory that makes
    # imgs return by inverse_transform impossible to save
    cachedir = mkdtemp()
    try:
        masker.memory = Memory(cachedir=cachedir, mmap_mode='r',
                               verbose=0)
        X = masker.transform(mask_img)
        # inverse_transform a first time, so that the result is cached
        out_img = masker.inverse_transform(X)
        out_img = masker.inverse_transform(X)
        out_img.to_filename(os.path.join(cachedir, 'test.nii'))
    finally:
        shutil.rmtree(cachedir, ignore_errors=True)
def hash_X_y(X, y, n_samples=10, n_features=5):
    """Compute hash of the input arrays.

    Parameters
    ----------
    X : ndarray, shape (n_samples, n_features)
        The ``X`` array.

    y : ndarray, shape (n_samples)
        The ``y`` array.

    n_samples : int, optional
        The number of samples to use to compute the hash. Default is 100.

    n_features : int, optional
        The number of features to use to compute the hash. Default is 10.

    Returns
    -------
    X_hash: str
        Hash identifier of the ``X`` matrix.
    y_hash: str
        Hash identifier of the ``y`` matrix.
    """
    row_idx = slice(None, None, max(1, X.shape[0] // n_samples))
    col_idx = slice(None, None, max(1, X.shape[1] // n_features))

    return joblib.hash(X[row_idx, col_idx]), joblib.hash(y[row_idx])
Пример #4
0
def test_new_img_like_side_effect():
    img1 = Nifti1Image(np.ones((2, 2, 2, 2)), affine=np.eye(4))
    hash1 = joblib.hash(img1)
    new_img_like(img1, np.ones((2, 2, 2, 2)), img1.get_affine().copy(),
                 copy_header=True)
    hash2 = joblib.hash(img1)
    assert_equal(hash1, hash2)
Пример #5
0
def check_estimators_overwrite_params(name, Estimator):
    X, y = make_blobs(random_state=0, n_samples=9)
    y = multioutput_estimator_convert_y_2d(name, y)
    # some want non-negative input
    X -= X.min()
    with warnings.catch_warnings(record=True):
        # catch deprecation warnings
        estimator = Estimator()

    set_fast_parameters(estimator)
    set_random_state(estimator)

    # Make a physical copy of the orginal estimator parameters before fitting.
    params = estimator.get_params()
    original_params = deepcopy(params)

    # Fit the model
    estimator.fit(X, y)

    # Compare the state of the model parameters with the original parameters
    new_params = estimator.get_params()
    for param_name, original_value in original_params.items():
        new_value = new_params[param_name]

        # We should never change or mutate the internal state of input
        # parameters by default. To check this we use the joblib.hash function
        # that introspects recursively any subobjects to compute a checksum.
        # The only exception to this rule of immutable constructor parameters
        # is possible RandomState instance but in this check we explicitly
        # fixed the random_state params recursively to be integer seeds.
        assert_equal(hash(new_value), hash(original_value),
                     "Estimator %s should not change or mutate "
                     " the parameter %s from %s to %s during fit."
                     % (name, param_name, original_value, new_value))
Пример #6
0
def test_joblib_cache():
    if not LooseVersion(nibabel.__version__) > LooseVersion('1.1.0'):
        # Old nibabel do not pickle
        raise SkipTest
    from sklearn.externals.joblib import hash, Memory
    mask = np.zeros((40, 40, 40))
    mask[20, 20, 20] = 1
    mask_img = Nifti1Image(mask, np.eye(4))

    with testing.write_tmp_imgs(mask_img, create_files=True)\
                as filename:
        masker = NiftiMasker(mask_img=filename)
        masker.fit()
        mask_hash = hash(masker.mask_img_)
        masker.mask_img_.get_data()
        assert_true(mask_hash == hash(masker.mask_img_))

    # Test a tricky issue with memmapped joblib.memory that makes
    # imgs return by inverse_transform impossible to save
    cachedir = mkdtemp()
    try:
        masker.memory = Memory(cachedir=cachedir, mmap_mode='r', verbose=0)
        X = masker.transform(mask_img)
        # inverse_transform a first time, so that the result is cached
        out_img = masker.inverse_transform(X)
        out_img = masker.inverse_transform(X)
        out_img.to_filename(os.path.join(cachedir, 'test.nii'))
    finally:
        shutil.rmtree(cachedir, ignore_errors=True)
Пример #7
0
def check_estimators_overwrite_params(name, Estimator):
    X, y = make_blobs(random_state=0, n_samples=9)
    y = multioutput_estimator_convert_y_2d(name, y)
    # some want non-negative input
    X -= X.min()
    with warnings.catch_warnings(record=True):
        # catch deprecation warnings
        estimator = Estimator()

    set_fast_parameters(estimator)
    set_random_state(estimator)

    # Make a physical copy of the orginal estimator parameters before fitting.
    params = estimator.get_params()
    original_params = deepcopy(params)

    # Fit the model
    estimator.fit(X, y)

    # Compare the state of the model parameters with the original parameters
    new_params = estimator.get_params()
    for param_name, original_value in original_params.items():
        new_value = new_params[param_name]

        # We should never change or mutate the internal state of input
        # parameters by default. To check this we use the joblib.hash function
        # that introspects recursively any subobjects to compute a checksum.
        # The only exception to this rule of immutable constructor parameters
        # is possible RandomState instance but in this check we explicitly
        # fixed the random_state params recursively to be integer seeds.
        assert_equal(
            hash(new_value), hash(original_value),
            "Estimator %s should not change or mutate "
            " the parameter %s from %s to %s during fit." %
            (name, param_name, original_value, new_value))
Пример #8
0
def hash_X_y(X, y, n_samples=10, n_features=5):
    """Compute hash of the input arrays.

    Parameters
    ----------
    X : ndarray, shape (n_samples, n_features)
        The ``X`` array.

    y : ndarray, shape (n_samples)
        The ``y`` array.

    n_samples : int, optional
        The number of samples to use to compute the hash. Default is 100.

    n_features : int, optional
        The number of features to use to compute the hash. Default is 10.

    Returns
    -------
    X_hash: str
        Hash identifier of the ``X`` matrix.
    y_hash: str
        Hash identifier of the ``y`` matrix.
    """
    row_idx = slice(None, None, max(1, X.shape[0] // n_samples))
    col_idx = slice(None, None, max(1, X.shape[1] // n_features))

    return joblib.hash(X[row_idx, col_idx]), joblib.hash(y[row_idx])
def test_check_estimator_clones():
    # check that check_estimator doesn't modify the estimator it receives
    from sklearn.datasets import load_iris
    iris = load_iris()

    for Estimator in [GaussianMixture, LinearRegression,
                      RandomForestClassifier, NMF, SGDClassifier,
                      MiniBatchKMeans]:
        with ignore_warnings(category=FutureWarning):
            # when 'est = SGDClassifier()'
            est = Estimator()
        set_checking_parameters(est)
        set_random_state(est)
        # without fitting
        old_hash = joblib.hash(est)
        check_estimator(est)
        assert_equal(old_hash, joblib.hash(est))

        with ignore_warnings(category=FutureWarning):
            # when 'est = SGDClassifier()'
            est = Estimator()
        set_checking_parameters(est)
        set_random_state(est)
        # with fitting
        est.fit(iris.data + 10, iris.target)
        old_hash = joblib.hash(est)
        check_estimator(est)
        assert_equal(old_hash, joblib.hash(est))
Пример #10
0
def _check_integrity_atlas(atlas):
    # check that the folder is existing
    atlas_directory = os.path.abspath(os.path.join('.', 'data', 'fmri', atlas))
    if os.path.isdir(atlas_directory):
        # compute the hash of the current data present on the disk
        filenames_atlas_current = np.array(glob.glob(
            os.path.join(atlas_directory, '*', '*', '*')),
                                           dtype=object)
        filenames_atlas_current.sort()
        current_hash = joblib.hash(filenames_atlas_current)

        # compute the expected hash from the data set which we provide
        filenames_atlas_expected = pd.read_csv(os.path.abspath(
            os.path.join('.', 'data', 'fmri_filename.csv')),
                                               index_col=0)[atlas].values
        for idx in range(filenames_atlas_expected.size):
            filenames_atlas_expected[idx] = os.path.abspath(
                filenames_atlas_expected[idx])
        filenames_atlas_expected.sort()
        expected_hash = joblib.hash(filenames_atlas_expected)

        if current_hash == expected_hash:
            return

        shutil.rmtree(atlas_directory)

    _download_fmri_data(atlas)
def test_hash_X_y():
    rng = check_random_state(0)
    X = rng.randn(2000, 20)
    y = np.array([0] * 500 + [1] * 1500)
    assert hash_X_y(X, y, 10,
                    10) == (joblib.hash(X[::200, ::2]), joblib.hash(y[::200]))

    X = rng.randn(5, 2)
    y = np.array([0] * 2 + [1] * 3)
    # all data will be used in this case
    assert hash_X_y(X, y) == (joblib.hash(X), joblib.hash(y))
Пример #12
0
def test_hash_X_y():
    rng = check_random_state(0)
    X = rng.randn(2000, 20)
    y = np.array([0] * 500 + [1] * 1500)
    assert hash_X_y(X, y, 10, 10) == (joblib.hash(X[::200, ::2]),
                                      joblib.hash(y[::200]))

    X = rng.randn(5, 2)
    y = np.array([0] * 2 + [1] * 3)
    # all data will be used in this case
    assert hash_X_y(X, y) == (joblib.hash(X), joblib.hash(y))
Пример #13
0
def test_hash_X_y_pandas():
    pd = pytest.importorskip("pandas")
    rng = check_random_state(0)
    X = pd.DataFrame(rng.randn(2000, 20))
    y = pd.Series([0] * 500 + [1] * 1500)
    assert hash_X_y(X, y, 10, 10) == (joblib.hash(X.iloc[::200, ::2]),
                                      joblib.hash(y.iloc[::200]))

    X = pd.DataFrame(rng.randn(5, 2))
    y = pd.Series([0] * 2 + [1] * 3)
    # all data will be used in this case
    assert hash_X_y(X, y) == (joblib.hash(X), joblib.hash(y))
Пример #14
0
def test_joblib_cache():
    from sklearn.externals.joblib import hash
    # Dummy mask
    mask = np.zeros((40, 40, 40))
    mask[20, 20, 20] = 1
    mask_img = Nifti1Image(mask, np.eye(4))

    with write_tmp_imgs(mask_img, create_files=True) as filename:
        masker = MultiNiftiMasker(mask_img=filename)
        masker.fit()
        mask_hash = hash(masker.mask_img_)
        masker.mask_img_.get_data()
        assert_true(mask_hash == hash(masker.mask_img_))
        # enables to delete "filename" on windows
        del masker
Пример #15
0
 def __init__(self, tmp_dir, array):
     h = hash(array)
     hashs[h] = array
     output_filename = os.path.join(tmp_dir, h)
     with lockfile.LockFile(output_filename):
         if not os.path.exists(output_filename):
             dump(array, output_filename)
     self.filename = output_filename
Пример #16
0
def test_joblib_cache():
    if not LooseVersion(nibabel.__version__) > LooseVersion('1.1.0'):
        # Old nibabel do not pickle
        raise SkipTest
    from sklearn.externals.joblib import hash
    # Dummy mask
    mask = np.zeros((40, 40, 40))
    mask[20, 20, 20] = 1
    mask_img = Nifti1Image(mask, np.eye(4))

    with write_tmp_imgs(mask_img, create_files=True)\
            as filename:
        masker = MultiNiftiMasker(mask_img=filename)
        masker.fit()
        mask_hash = hash(masker.mask_img_)
        masker.mask_img_.get_data()
        assert_true(mask_hash == hash(masker.mask_img_))
Пример #17
0
def test_joblib_cache():
    if not LooseVersion(nibabel.__version__) > LooseVersion('1.1.0'):
        # Old nibabel do not pickle
        raise SkipTest
    from sklearn.externals.joblib import hash
    # Dummy mask
    mask = np.zeros((40, 40, 40))
    mask[20, 20, 20] = 1
    mask_img = Nifti1Image(mask, np.eye(4))

    with testing.write_tmp_imgs(mask_img, create_files=True)\
                as filename:
        masker = MultiNiftiMasker(mask_img=filename)
        masker.fit()
        mask_hash = hash(masker.mask_img_)
        masker.mask_img_.get_data()
        assert_true(mask_hash == hash(masker.mask_img_))
Пример #18
0
def evaluate_one(model_class, parameters, cv_split):
    split_idx, (X_train, X_val, y_train, y_val) = cv_split
    model = model_class(**parameters).fit(X_train, y_train)

    train_score = model.score(X_train, y_train)
    validation_score = model.score(X_val, y_val)

    results = {
        'train_score': train_score,
        'val_score': validation_score,
        'parameters': parameters,
        'parameters_hash': hash(parameters),
    }
    return results
Пример #19
0
def hash_X_y(X, y, n_samples=1000):
    """Compute hash of the input arrays.

    Parameters
    ----------
    X : ndarray, shape (n_samples, n_features)
        The ``X`` array.

    y : ndarray, shape (n_samples)

    Returns
    -------
    X_hash: str
        Hash identifier of the ``X`` matrix.

    y_hash: str
        Hash identifier of the ``y`` matrix.
    """
    rng = np.random.RandomState(0)
    raw_idx = rng.randint(X.shape[0], size=n_samples)
    col_idx = rng.randint(X.shape[1], size=n_samples)

    return joblib.hash(X[raw_idx, col_idx]), joblib.hash(y[raw_idx])
Пример #20
0
def evaluate_one(model_class, parameters, cv_split):
    split_idx, (X_train, X_val, y_train, y_val) = cv_split
    model = model_class(**parameters).fit(X_train, y_train)

    train_score = model.score(X_train, y_train)
    validation_score = model.score(X_val, y_val)

    results = {
        'train_score': train_score,
        'val_score': validation_score,
        'parameters': parameters,
        'parameters_hash': hash(parameters),
    }
    return results
Пример #21
0
def test_copy_img_side_effect():
    img1 = Nifti1Image(np.ones((2, 2, 2, 2)), affine=np.eye(4))
    hash1 = joblib.hash(img1)
    niimg.copy_img(img1)
    hash2 = joblib.hash(img1)
    assert_equal(hash1, hash2)
Пример #22
0
    joblib.dump((X_vali, y_vali, qid_vali), VALI_DATA)

if not os.path.exists(GRID_JOBS_FOLDER):
    os.makedirs(GRID_JOBS_FOLDER)

params = {
    'max_features': [10, 20, 50, 100],
    'max_depth': [2, 3, 4, 5],
    'subsample': [0.5, 0.8, 1.0],
    'loss': ['ls', 'huber', 'quantile'],
    'learning_rate': [0.05, 0.1, 0.5],
}

for i, param in enumerate(ParameterGrid(params)):
    params_description = json.dumps(param)
    job_id = joblib.hash(params_description)
    job_folder = GRID_JOBS_FOLDER + '/' + job_id
    if not os.path.exists(job_folder):
        os.makedirs(job_folder)
    with open(job_folder + '/parameters.json', 'wb') as f:
        f.write(params_description.encode('utf-8'))

    data_filenames = {'train': TRAIN_SAMPLE_DATA, 'validation': VALI_DATA}
    with open(job_folder + '/data.json', 'wb') as f:
        f.write(json.dumps(data_filenames).encode('utf-8'))

    cmd = 'qsub -V -cwd letor_gridpoint.py {}'.format(job_folder)
    os.system(cmd)

    # if i > 100:
    #     break
Пример #23
0
 def __hash__(self):
     if self._hashvalue is None:
         self._hashvalue = int(joblib.hash(self._hashkey), 16)
     return self._hashvalue
Пример #24
0
def test_copy_img_side_effect():
    img1 = Nifti1Image(np.ones((2, 2, 2, 2)), affine=np.eye(4))
    hash1 = joblib.hash(img1)
    niimg.copy_img(img1)
    hash2 = joblib.hash(img1)
    assert_equal(hash1, hash2)
Пример #25
0
if not os.path.exists(GRID_JOBS_FOLDER):
    os.makedirs(GRID_JOBS_FOLDER)


params = {
    'max_features': [10, 20, 50, 100],
    'max_depth': [2, 3, 4, 5],
    'subsample': [0.5, 0.8, 1.0],
    'loss': ['ls', 'huber', 'quantile'],
    'learning_rate': [0.05, 0.1, 0.5],
}

for i, param in enumerate(ParameterGrid(params)):
    params_description = json.dumps(param)
    job_id = joblib.hash(params_description)
    job_folder = GRID_JOBS_FOLDER + '/' + job_id
    if not os.path.exists(job_folder):
        os.makedirs(job_folder)
    with open(job_folder + '/parameters.json', 'wb') as f:
        f.write(params_description.encode('utf-8'))

    data_filenames = {'train': TRAIN_SAMPLE_DATA, 'validation': VALI_DATA}
    with open(job_folder + '/data.json', 'wb') as f:
        f.write(json.dumps(data_filenames).encode('utf-8'))

    cmd = 'qsub -V -cwd letor_gridpoint.py {}'.format(job_folder)
    os.system(cmd)

    # if i > 100:
    #     break