示例#1
0
def test_hashes_are_different_between_c_and_fortran_contiguous_arrays():
    # We want to be sure that the c-contiguous and f-contiguous versions of the
    # same array produce 2 different hashes.
    rng = np.random.RandomState(0)
    arr_c = rng.random_sample((10, 10))
    arr_f = np.asfortranarray(arr_c)
    assert hash(arr_c) != hash(arr_f)
示例#2
0
def test_hash_object_dtype():
    """ Make sure that ndarrays with dtype `object' hash correctly."""

    a = np.array([np.arange(i) for i in range(6)], dtype=object)
    b = np.array([np.arange(i) for i in range(6)], dtype=object)

    nose.tools.assert_equal(hash(a), hash(b))
示例#3
0
def test_hash_object_dtype():
    """ Make sure that ndarrays with dtype `object' hash correctly."""

    a = np.array([np.arange(i) for i in range(6)], dtype=object)
    b = np.array([np.arange(i) for i in range(6)], dtype=object)

    assert hash(a) == hash(b)
示例#4
0
def test_hash_numpy_noncontiguous():
    a = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F')[:, :1, :]
    b = np.ascontiguousarray(a)
    assert hash(a) != hash(b)

    c = np.asfortranarray(a)
    assert hash(a) != hash(c)
示例#5
0
def test_hash_methods():
    # Check that hashing instance methods works
    a = io.StringIO(unicode('a'))
    assert hash(a.flush) == hash(a.flush)
    a1 = collections.deque(range(10))
    a2 = collections.deque(range(9))
    assert hash(a1.extend) != hash(a2.extend)
示例#6
0
def test_hash_object_dtype():
    """ Make sure that ndarrays with dtype `object' hash correctly."""

    a = np.array([np.arange(i) for i in range(6)], dtype=object)
    b = np.array([np.arange(i) for i in range(6)], dtype=object)

    assert hash(a) == hash(b)
示例#7
0
def test_hash_numpy_noncontiguous():
    a = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F')[:, :1, :]
    b = np.ascontiguousarray(a)
    nose.tools.assert_not_equal(hash(a), hash(b))

    c = np.asfortranarray(a)
    nose.tools.assert_not_equal(hash(a), hash(c))
示例#8
0
def test_hash_methods():
    # Check that hashing instance methods works
    a = io.StringIO(unicode('a'))
    assert hash(a.flush) == hash(a.flush)
    a1 = collections.deque(range(10))
    a2 = collections.deque(range(9))
    assert hash(a1.extend) != hash(a2.extend)
示例#9
0
 def test_set_decimal_hash():
     # Check that sets containing decimals hash consistently, even though
     # ordering is not guaranteed
     nose.tools.assert_equal(hash(set([Decimal(0),
                                       Decimal('NaN')])),
                             hash(set([Decimal('NaN'),
                                       Decimal(0)])))
示例#10
0
def test_trival_hash():
    """ Smoke test hash on various types.
    """
    obj_list = [
        1,
        2,
        1.,
        2.,
        1 + 1j,
        2. + 1j,
        'a',
        'b',
        (1, ),
        (
            1,
            1,
        ),
        [
            1,
        ],
        [
            1,
            1,
        ],
        {
            1: 1
        },
        {
            1: 2
        },
        {
            2: 1
        },
        None,
        gc.collect,
        [
            1,
        ].append,
        # Next 2 sets have unorderable elements in python 3.
        set(('a', 1)),
        set(('a', 1, ('a', 1))),
        # Next 2 dicts have unorderable type of keys in python 3.
        {
            'a': 1,
            1: 2
        },
        {
            'a': 1,
            1: 2,
            'd': {
                'a': 1
            }
        },
    ]
    for obj1 in obj_list:
        for obj2 in obj_list:
            # Check that 2 objects have the same hash only if they are
            # the same.
            yield nose.tools.assert_equal, hash(obj1) == hash(obj2), \
                obj1 is obj2
示例#11
0
def test_dict_hash():
    # Check that dictionaries hash consistently, eventhough the ordering
    # of the keys is not garanteed
    k = KlassWithCachedMethod()

    d = {
        '#s12069__c_maps.nii.gz': [33],
        '#s12158__c_maps.nii.gz': [33],
        '#s12258__c_maps.nii.gz': [33],
        '#s12277__c_maps.nii.gz': [33],
        '#s12300__c_maps.nii.gz': [33],
        '#s12401__c_maps.nii.gz': [33],
        '#s12430__c_maps.nii.gz': [33],
        '#s13817__c_maps.nii.gz': [33],
        '#s13903__c_maps.nii.gz': [33],
        '#s13916__c_maps.nii.gz': [33],
        '#s13981__c_maps.nii.gz': [33],
        '#s13982__c_maps.nii.gz': [33],
        '#s13983__c_maps.nii.gz': [33]
    }

    a = k.f(d)
    b = k.f(a)

    nose.tools.assert_equal(hash(a), hash(b))
示例#12
0
def test_hashing_pickling_error():
    def non_picklable():
        return 42

    with raises(pickle.PicklingError) as excinfo:
        hash(non_picklable)
    excinfo.match('PicklingError while hashing')
示例#13
0
def test_dict_hash(tmpdir):
    # Check that dictionaries hash consistently, eventhough the ordering
    # of the keys is not garanteed
    k = KlassWithCachedMethod(tmpdir.strpath)

    d = {
        '#s12069__c_maps.nii.gz': [33],
        '#s12158__c_maps.nii.gz': [33],
        '#s12258__c_maps.nii.gz': [33],
        '#s12277__c_maps.nii.gz': [33],
        '#s12300__c_maps.nii.gz': [33],
        '#s12401__c_maps.nii.gz': [33],
        '#s12430__c_maps.nii.gz': [33],
        '#s13817__c_maps.nii.gz': [33],
        '#s13903__c_maps.nii.gz': [33],
        '#s13916__c_maps.nii.gz': [33],
        '#s13981__c_maps.nii.gz': [33],
        '#s13982__c_maps.nii.gz': [33],
        '#s13983__c_maps.nii.gz': [33]
    }

    a = k.f(d)
    b = k.f(a)

    assert hash(a) == hash(b)
示例#14
0
def test_hash_methods():
    # Check that hashing instance methods works
    a = io.StringIO(unicode('a'))
    nose.tools.assert_equal(hash(a.flush), hash(a.flush))
    a1 = collections.deque(range(10))
    a2 = collections.deque(range(9))
    nose.tools.assert_not_equal(hash(a1.extend), hash(a2.extend))
示例#15
0
def test_hashes_are_different_between_c_and_fortran_contiguous_arrays():
    # We want to be sure that the c-contiguous and f-contiguous versions of the
    # same array produce 2 different hashes.
    rng = np.random.RandomState(0)
    arr_c = rng.random_sample((10, 10))
    arr_f = np.asfortranarray(arr_c)
    assert hash(arr_c) != hash(arr_f)
示例#16
0
def test_hash_memmap():
    """ Check that memmap and arrays hash identically if coerce_mmap is
        True.
    """
    filename = tempfile.mktemp(prefix='joblib_test_hash_memmap_')
    try:
        m = np.memmap(filename, shape=(10, 10), mode='w+')
        a = np.asarray(m)
        for coerce_mmap in (False, True):
            yield (assert_equal,
                   hash(a, coerce_mmap=coerce_mmap) ==
                   hash(m, coerce_mmap=coerce_mmap),
                   coerce_mmap)
    finally:
        if 'm' in locals():
            del m
            # Force a garbage-collection cycle, to be certain that the
            # object is delete, and we don't run in a problem under
            # Windows with a file handle still open.
            gc.collect()
            try:
                os.unlink(filename)
            except OSError as e:
                # Under windows, some files don't get erased.
                if not os.name == 'nt':
                    raise e
示例#17
0
def test_hashing_pickling_error():
    def non_picklable():
        return 42

    with raises(pickle.PicklingError) as excinfo:
        hash(non_picklable)
    excinfo.match('PicklingError while hashing')
示例#18
0
def test_hash_methods():
    # Check that hashing instance methods works
    a = io.StringIO(unicode('a'))
    nose.tools.assert_equal(hash(a.flush), hash(a.flush))
    a1 = collections.deque(range(10))
    a2 = collections.deque(range(9))
    nose.tools.assert_not_equal(hash(a1.extend), hash(a2.extend))
示例#19
0
def test_hash_memmap():
    """ Check that memmap and arrays hash identically if coerce_mmap is
        True.
    """
    filename = tempfile.mktemp(prefix='joblib_test_hash_memmap_')
    try:
        m = np.memmap(filename, shape=(10, 10), mode='w+')
        a = np.asarray(m)
        for coerce_mmap in (False, True):
            yield (nose.tools.assert_equal,
                   hash(a, coerce_mmap=coerce_mmap) == hash(
                       m, coerce_mmap=coerce_mmap), coerce_mmap)
    finally:
        if 'm' in locals():
            del m
            # Force a garbage-collection cycle, to be certain that the
            # object is delete, and we don't run in a problem under
            # Windows with a file handle still open.
            gc.collect()
            try:
                os.unlink(filename)
            except OSError as e:
                # Under windows, some files don't get erased.
                if not os.name == 'nt':
                    raise e
示例#20
0
def test_string():
    # Test that we obtain the same hash for object owning several strings,
    # whatever the past of these strings (which are immutable in Python)
    string = 'foo'
    a = {string: 'bar'}
    b = {string: 'bar'}
    c = pickle.loads(pickle.dumps(b))
    assert hash([a, b]) == hash([a, c])
示例#21
0
def test_bound_methods_hash():
    """ Make sure that calling the same method on two different instances
    of the same class does resolve to the same hashes.
    """
    a = Klass()
    b = Klass()
    assert (hash(filter_args(a.f, [], (1, ))) ==
            hash(filter_args(b.f, [], (1, ))))
示例#22
0
def test_bound_cached_methods_hash():
    """ Make sure that calling the same _cached_ method on two different
    instances of the same class does resolve to the same hashes.
    """
    a = KlassWithCachedMethod()
    b = KlassWithCachedMethod()
    nose.tools.assert_equal(hash(filter_args(a.f.func, [], (1, ))),
                            hash(filter_args(b.f.func, [], (1, ))))
示例#23
0
def test_hash_numpy_noncontiguous():
    a = np.asarray(np.arange(6000).reshape((1000, 2, 3)),
                   order='F')[:, :1, :]
    b = np.ascontiguousarray(a)
    assert hash(a) != hash(b)

    c = np.asfortranarray(a)
    assert hash(a) != hash(c)
示例#24
0
def test_hash_numpy_noncontiguous():
    a = np.asarray(np.arange(6000).reshape((1000, 2, 3)),
                   order='F')[:, :1, :]
    b = np.ascontiguousarray(a)
    nose.tools.assert_not_equal(hash(a), hash(b))

    c = np.asfortranarray(a)
    nose.tools.assert_not_equal(hash(a), hash(c))
示例#25
0
def test_bound_cached_methods_hash():
    """ Make sure that calling the same _cached_ method on two different
    instances of the same class does resolve to the same hashes.
    """
    a = KlassWithCachedMethod()
    b = KlassWithCachedMethod()
    nose.tools.assert_equal(hash(filter_args(a.f.func, [], (1, ))),
                            hash(filter_args(b.f.func, [], (1, ))))
示例#26
0
def test_hash_object_dtype():
    """ Make sure that ndarrays with dtype `object' hash correctly."""

    a = np.array([np.arange(i) for i in range(6)], dtype=object)
    b = np.array([np.arange(i) for i in range(6)], dtype=object)

    nose.tools.assert_equal(hash(a),
                            hash(b))
示例#27
0
def test_bound_cached_methods_hash(tmpdir_path):
    """ Make sure that calling the same _cached_ method on two different
    instances of the same class does resolve to the same hashes.
    """
    a = KlassWithCachedMethod(tmpdir_path)
    b = KlassWithCachedMethod(tmpdir_path)
    assert (hash(filter_args(a.f.func, [], (1, ))) ==
            hash(filter_args(b.f.func, [], (1, ))))
示例#28
0
    def predict(self, X):
        h1 = hashing.hash(X.todense())
        h1 = ''.join([str(ord(x)) for x in h1])

        h2 = hashing.hash(self.train_data.todense())
        h2 = ''.join([str(ord(x)) for x in h2])

        return hash(h1 + h2)
示例#29
0
def test_bound_cached_methods_hash(tmpdir):
    """ Make sure that calling the same _cached_ method on two different
    instances of the same class does resolve to the same hashes.
    """
    a = KlassWithCachedMethod(tmpdir.strpath)
    b = KlassWithCachedMethod(tmpdir.strpath)
    assert (hash(filter_args(a.f.func, [],
                             (1, ))) == hash(filter_args(b.f.func, [], (1, ))))
示例#30
0
def test_dtype():
    # Test that we obtain the same hash for object owning several dtype,
    # whatever the past of these dtypes. Catter for cache invalidation with
    # complex dtype
    a = np.dtype([('f1', np.uint), ('f2', np.int32)])
    b = a
    c = pickle.loads(pickle.dumps(a))
    assert hash([a, c]) == hash([a, b])
示例#31
0
def test_string():
    # Test that we obtain the same hash for object owning several strings,
    # whatever the past of these strings (which are immutable in Python)
    string = 'foo'
    a = {string: 'bar'}
    b = {string: 'bar'}
    c = pickle.loads(pickle.dumps(b))
    assert hash([a, b]) == hash([a, c])
示例#32
0
def test_bound_methods_hash():
    """ Make sure that calling the same method on two different instances
    of the same class does resolve to the same hashes.
    """
    a = Klass()
    b = Klass()
    assert (hash(filter_args(a.f, [],
                             (1, ))) == hash(filter_args(b.f, [], (1, ))))
示例#33
0
def test_dtype():
    # Test that we obtain the same hash for object owning several dtype,
    # whatever the past of these dtypes. Catter for cache invalidation with
    # complex dtype
    a = np.dtype([('f1', np.uint), ('f2', np.int32)])
    b = a
    c = pickle.loads(pickle.dumps(a))
    assert hash([a, c]) == hash([a, b])
示例#34
0
def test_hash_numpy_arrays(three_np_arrays):
    arr1, arr2, arr3 = three_np_arrays

    for obj1, obj2 in itertools.product(three_np_arrays, repeat=2):
        are_hashes_equal = hash(obj1) == hash(obj2)
        are_arrays_equal = np.all(obj1 == obj2)
        assert are_hashes_equal == are_arrays_equal

    assert hash(arr1) != hash(arr1.T)
示例#35
0
def test_hash_numpy_dict_of_arrays(three_np_arrays):
    arr1, arr2, arr3 = three_np_arrays

    d1 = {1: arr1, 2: arr2}
    d2 = {1: arr2, 2: arr1}
    d3 = {1: arr2, 2: arr3}

    assert hash(d1) == hash(d2)
    assert hash(d1) != hash(d3)
示例#36
0
def test_hash_numpy_dict_of_arrays(three_np_arrays):
    arr1, arr2, arr3 = three_np_arrays

    d1 = {1: arr1, 2: arr2}
    d2 = {1: arr2, 2: arr1}
    d3 = {1: arr2, 2: arr3}

    assert hash(d1) == hash(d2)
    assert hash(d1) != hash(d3)
示例#37
0
def test_numpy_datetime_array():
    # memoryview is not supported for some dtypes e.g. datetime64
    # see https://github.com/joblib/joblib/issues/188 for more details
    dtypes = ['datetime64[s]', 'timedelta64[D]']

    a_hash = hash(np.arange(10))
    arrays = (np.arange(0, 10, dtype=dtype) for dtype in dtypes)
    for array in arrays:
        nose.tools.assert_not_equal(hash(array), a_hash)
示例#38
0
def test_hash_numpy_arrays(three_np_arrays):
    arr1, arr2, arr3 = three_np_arrays

    for obj1, obj2 in itertools.product(three_np_arrays, repeat=2):
        are_hashes_equal = hash(obj1) == hash(obj2)
        are_arrays_equal = np.all(obj1 == obj2)
        assert are_hashes_equal == are_arrays_equal

    assert hash(arr1) != hash(arr1.T)
示例#39
0
def test_numpy_datetime_array():
    # memoryview is not supported for some dtypes e.g. datetime64
    # see https://github.com/joblib/joblib/issues/188 for more details
    dtypes = ['datetime64[s]', 'timedelta64[D]']

    a_hash = hash(np.arange(10))
    arrays = (np.arange(0, 10, dtype=dtype) for dtype in dtypes)
    for array in arrays:
        assert hash(array) != a_hash
示例#40
0
def test_hash_memmap(tmpdir, coerce_mmap):
    """Check that memmap and arrays hash identically if coerce_mmap is True."""
    filename = tmpdir.join('memmap_temp').strpath
    try:
        m = np.memmap(filename, shape=(10, 10), mode='w+')
        a = np.asarray(m)
        are_hashes_equal = (hash(a, coerce_mmap=coerce_mmap) == hash(
            m, coerce_mmap=coerce_mmap))
        assert are_hashes_equal == coerce_mmap
    finally:
        if 'm' in locals():
            del m
            # Force a garbage-collection cycle, to be certain that the
            # object is delete, and we don't run in a problem under
            # Windows with a file handle still open.
            gc.collect()
示例#41
0
def test_hashes_stay_the_same():
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    rng = random.Random(42)
    to_hash_list = ['This is a string to hash',
                    u"C'est l\xe9t\xe9",
                    (123456, 54321, -98765),
                    [rng.random() for _ in range(5)],
                    [3, 'abc', None, MyClass(1, 2)],
                    {'abcde': 123, 'sadfas': [-9999, 2, 3]}]

    # These expected results have been generated with joblib 0.9.2
    expected_dict = {
        'py2': ['80436ada343b0d79a99bfd8883a96e45',
                '2ff3a25200eb6219f468de2640913c2d',
                '50d81c80af05061ac4dcdc2d5edee6d6',
                '536af09b66a087ed18b515acc17dc7fc',
                'b5547baee3f205fb763e8a97c130c054',
                'fc9314a39ff75b829498380850447047'],
        'py3': ['71b3f47df22cb19431d85d92d0b230b2',
                '2d8d189e9b2b0b2e384d93c868c0e576',
                'e205227dd82250871fa25aa0ec690aa3',
                '9e4e9bf9b91890c9734a6111a35e6633',
                '731fafc4405a6c192c0a85a58c9e7a93',
                'aeda150553d4bb5c69f0e69d51b0e2ef']}

    py_version_str = 'py3' if PY3 else 'py2'
    expected_list = expected_dict[py_version_str]

    for to_hash, expected in zip(to_hash_list, expected_list):
        yield assert_equal, hash(to_hash), expected
示例#42
0
def test_hashes_stay_the_same():
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    rng = random.Random(42)
    to_hash_list = ['This is a string to hash',
                    u"C'est l\xe9t\xe9",
                    (123456, 54321, -98765),
                    [rng.random() for _ in range(5)],
                    [3, 'abc', None,
                     TransportableException('the message', ValueError)],
                    {'abcde': 123, 'sadfas': [-9999, 2, 3]}]

    # These expected results have been generated with joblib 0.9.2
    expected_dict = {
        'py2': ['80436ada343b0d79a99bfd8883a96e45',
                '2ff3a25200eb6219f468de2640913c2d',
                '50d81c80af05061ac4dcdc2d5edee6d6',
                '536af09b66a087ed18b515acc17dc7fc',
                '123ffc6f13480767167e171a8e1f6f4a',
                'fc9314a39ff75b829498380850447047'],
        'py3': ['71b3f47df22cb19431d85d92d0b230b2',
                '2d8d189e9b2b0b2e384d93c868c0e576',
                'e205227dd82250871fa25aa0ec690aa3',
                '9e4e9bf9b91890c9734a6111a35e6633',
                '6065a3c48e842ea5dee2cfd0d6820ad6',
                'aeda150553d4bb5c69f0e69d51b0e2ef']}

    py_version_str = 'py3' if PY3 else 'py2'
    expected_list = expected_dict[py_version_str]

    for to_hash, expected in zip(to_hash_list, expected_list):
        yield assert_equal, hash(to_hash), expected
示例#43
0
def test_hashes_stay_the_same():
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    rng = random.Random(42)
    to_hash_list = ['This is a string to hash',
                    u"C'est l\xe9t\xe9",
                    (123456, 54321, -98765),
                    [rng.random() for _ in range(5)],
                    [3, 'abc', None,
                     TransportableException('the message', ValueError)],
                    {'abcde': 123, 'sadfas': [-9999, 2, 3]}]

    # These expected results have been generated with joblib 0.9.2
    expected_dict = {
        'py2': ['80436ada343b0d79a99bfd8883a96e45',
                '2ff3a25200eb6219f468de2640913c2d',
                '50d81c80af05061ac4dcdc2d5edee6d6',
                '536af09b66a087ed18b515acc17dc7fc',
                '123ffc6f13480767167e171a8e1f6f4a',
                'fc9314a39ff75b829498380850447047'],
        'py3': ['71b3f47df22cb19431d85d92d0b230b2',
                '2d8d189e9b2b0b2e384d93c868c0e576',
                'e205227dd82250871fa25aa0ec690aa3',
                '9e4e9bf9b91890c9734a6111a35e6633',
                '6065a3c48e842ea5dee2cfd0d6820ad6',
                'aeda150553d4bb5c69f0e69d51b0e2ef']}

    py_version_str = 'py3' if PY3_OR_LATER else 'py2'
    expected_list = expected_dict[py_version_str]

    for to_hash, expected in zip(to_hash_list, expected_list):
        yield assert_equal, hash(to_hash), expected
示例#44
0
def test_hashes_stay_the_same(to_hash, expected):
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    # Expected results have been generated with joblib 0.9.2
    assert hash(to_hash) == expected
示例#45
0
def test_hash_memmap(tmpdir, coerce_mmap):
    """Check that memmap and arrays hash identically if coerce_mmap is True."""
    filename = tmpdir.join('memmap_temp').strpath
    try:
        m = np.memmap(filename, shape=(10, 10), mode='w+')
        a = np.asarray(m)
        are_hashes_equal = (hash(a, coerce_mmap=coerce_mmap) ==
                            hash(m, coerce_mmap=coerce_mmap))
        assert are_hashes_equal == coerce_mmap
    finally:
        if 'm' in locals():
            del m
            # Force a garbage-collection cycle, to be certain that the
            # object is delete, and we don't run in a problem under
            # Windows with a file handle still open.
            gc.collect()
示例#46
0
def test_hashes_stay_the_same_with_numpy_objects():
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    rng = np.random.RandomState(42)
    # Being explicit about dtypes in order to avoid
    # architecture-related differences. Also using 'f4' rather than
    # 'f8' for float arrays because 'f8' arrays generated by
    # rng.random.randn don't seem to be bit-identical on 32bit and
    # 64bit machines.
    to_hash_list = [
        rng.randint(-1000, high=1000, size=50).astype('<i8'),
        tuple(rng.randn(3).astype('<f4') for _ in range(5)),
        [rng.randn(3).astype('<f4') for _ in range(5)],
        {
            -3333:
            rng.randn(3, 5).astype('<f4'),
            0: [
                rng.randint(10, size=20).astype('<i8'),
                rng.randn(10).astype('<f4')
            ]
        },
        # Non regression cases for https://github.com/joblib/joblib/issues/308.
        # Generated with joblib 0.9.4.
        np.arange(100, dtype='<i8').reshape((10, 10)),
        # Fortran contiguous array
        np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))),
        # Non contiguous array
        np.arange(100, dtype='<i8').reshape((10, 10))[:, :2],
    ]

    # These expected results have been generated with joblib 0.9.0
    expected_dict = {
        'py2': [
            '80f2387e7752abbda2658aafed49e086',
            '0d700f7f25ea670fd305e4cd93b0e8cd',
            '83a2bdf843e79e4b3e26521db73088b9',
            '63e0efd43c0a9ad92a07e8ce04338dd3',
            '03fef702946b602c852b8b4e60929914',
            '07074691e90d7098a85956367045c81e',
            'd264cf79f353aa7bbfa8349e3df72d8f'
        ],
        'py3': [
            '10a6afc379ca2708acfbaef0ab676eab',
            '988a7114f337f381393025911ebc823b',
            'c6809f4b97e35f2fa0ee8d653cbd025c',
            'b3ad17348e32728a7eb9cda1e7ede438',
            '927b3e6b0b6a037e8e035bda134e0b05',
            '108f6ee98e7db19ea2006ffd208f4bf1',
            'bd48ccaaff28e16e6badee81041b7180'
        ]
    }

    py_version_str = 'py3' if PY3_OR_LATER else 'py2'
    expected_list = expected_dict[py_version_str]

    for to_hash, expected in zip(to_hash_list, expected_list):
        assert hash(to_hash) == expected
示例#47
0
def test_trival_hash():
    """ Smoke test hash on various types.
    """
    obj_list = [1, 2, 1., 2., 1 + 1j, 2. + 1j,
                'a', 'b',
                (1, ), (1, 1, ), [1, ], [1, 1, ],
                {1: 1}, {1: 2}, {2: 1},
                None,
                gc.collect,
                [1, ].append,
               ]
    for obj1 in obj_list:
        for obj2 in obj_list:
            # Check that 2 objects have the same hash only if they are
            # the same.
            yield nose.tools.assert_equal, hash(obj1) == hash(obj2), \
                obj1 is obj2
示例#48
0
def test_trival_hash():
    """ Smoke test hash on various types.
    """
    obj_list = [1, 2, 1., 2., 1 + 1j, 2. + 1j,
                'a', 'b',
                (1, ), (1, 1, ), [1, ], [1, 1, ],
                {1: 1}, {1: 2}, {2: 1},
                None,
                gc.collect,
                [1, ].append,
               ]
    for obj1 in obj_list:
        for obj2 in obj_list:
            # Check that 2 objects have the same hash only if they are
            # the same.
            yield nose.tools.assert_equal, hash(obj1) == hash(obj2), \
                obj1 is obj2
示例#49
0
def test_hashes_stay_the_same(to_hash, expected):
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    # Expected results have been generated with joblib 0.9.2

    py_version_str = 'py3' if PY3_OR_LATER else 'py2'
    assert hash(to_hash) == expected[py_version_str]
示例#50
0
def test_set_hash():
    # Check that sets hash consistently, even though their ordering
    # is not guaranteed
    k = KlassWithCachedMethod()

    s = set([
        '#s12069__c_maps.nii.gz', '#s12158__c_maps.nii.gz',
        '#s12258__c_maps.nii.gz', '#s12277__c_maps.nii.gz',
        '#s12300__c_maps.nii.gz', '#s12401__c_maps.nii.gz',
        '#s12430__c_maps.nii.gz', '#s13817__c_maps.nii.gz',
        '#s13903__c_maps.nii.gz', '#s13916__c_maps.nii.gz',
        '#s13981__c_maps.nii.gz', '#s13982__c_maps.nii.gz',
        '#s13983__c_maps.nii.gz'
    ])

    a = k.f(s)
    b = k.f(a)

    nose.tools.assert_equal(hash(a), hash(b))
示例#51
0
def test_set_hash(tmpdir):
    # Check that sets hash consistently, even though their ordering
    # is not guaranteed
    k = KlassWithCachedMethod(tmpdir.strpath)

    s = set([
        '#s12069__c_maps.nii.gz', '#s12158__c_maps.nii.gz',
        '#s12258__c_maps.nii.gz', '#s12277__c_maps.nii.gz',
        '#s12300__c_maps.nii.gz', '#s12401__c_maps.nii.gz',
        '#s12430__c_maps.nii.gz', '#s13817__c_maps.nii.gz',
        '#s13903__c_maps.nii.gz', '#s13916__c_maps.nii.gz',
        '#s13981__c_maps.nii.gz', '#s13982__c_maps.nii.gz',
        '#s13983__c_maps.nii.gz'
    ])

    a = k.f(s)
    b = k.f(a)

    assert hash(a) == hash(b)
示例#52
0
def _fast_hash(obj):
    """
    Returns hash of an arbitrary Python object.

    Works for numpy arrays, pandas objects, custom classes and functions. If an
    object doesn't support hashing natively, use md5-based ``joblib.hashing.hash()``,
    otherwise use the standard ``hash()`` function for the sake of performance.
    """
    try:
        return hash(obj)
    except:
        return hashing.hash(obj)
示例#53
0
def test_memory_pickle_dump_load(tmpdir, memory_kwargs):
    memory = Memory(location=tmpdir.strpath, **memory_kwargs)

    memory_reloaded = pickle.loads(pickle.dumps(memory))

    # Compare Memory instance before and after pickle roundtrip
    compare(memory.store_backend, memory_reloaded.store_backend)
    compare(memory,
            memory_reloaded,
            ignored_attrs=set(['store_backend', 'timestamp']))
    assert hash(memory) == hash(memory_reloaded)

    func_cached = memory.cache(f)

    func_cached_reloaded = pickle.loads(pickle.dumps(func_cached))

    # Compare MemorizedFunc instance before/after pickle roundtrip
    compare(func_cached.store_backend, func_cached_reloaded.store_backend)
    compare(func_cached,
            func_cached_reloaded,
            ignored_attrs=set(['store_backend', 'timestamp']))
    assert hash(func_cached) == hash(func_cached_reloaded)

    # Compare MemorizedResult instance before/after pickle roundtrip
    memorized_result = func_cached.call_and_shelve(1)
    memorized_result_reloaded = pickle.loads(pickle.dumps(memorized_result))

    compare(memorized_result.store_backend,
            memorized_result_reloaded.store_backend)
    compare(memorized_result,
            memorized_result_reloaded,
            ignored_attrs=set(['store_backend', 'timestamp']))
    assert hash(memorized_result) == hash(memorized_result_reloaded)
示例#54
0
def test_memory_pickle_dump_load(tmpdir, memory_kwargs):
    memory = Memory(location=tmpdir.strpath, **memory_kwargs)

    memory_reloaded = pickle.loads(pickle.dumps(memory))

    # Compare Memory instance before and after pickle roundtrip
    compare(memory.store_backend, memory_reloaded.store_backend)
    compare(memory, memory_reloaded,
            ignored_attrs=set(['store_backend', 'timestamp']))
    assert hash(memory) == hash(memory_reloaded)

    func_cached = memory.cache(f)

    func_cached_reloaded = pickle.loads(pickle.dumps(func_cached))

    # Compare MemorizedFunc instance before/after pickle roundtrip
    compare(func_cached.store_backend, func_cached_reloaded.store_backend)
    compare(func_cached, func_cached_reloaded,
            ignored_attrs=set(['store_backend', 'timestamp']))
    assert hash(func_cached) == hash(func_cached_reloaded)

    # Compare MemorizedResult instance before/after pickle roundtrip
    memorized_result = func_cached.call_and_shelve(1)
    memorized_result_reloaded = pickle.loads(pickle.dumps(memorized_result))

    compare(memorized_result.store_backend,
            memorized_result_reloaded.store_backend)
    compare(memorized_result, memorized_result_reloaded,
            ignored_attrs=set(['store_backend', 'timestamp']))
    assert hash(memorized_result) == hash(memorized_result_reloaded)
示例#55
0
def test_hashes_stay_the_same_with_numpy_objects():
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    rng = np.random.RandomState(42)
    # Being explicit about dtypes in order to avoid
    # architecture-related differences. Also using 'f4' rather than
    # 'f8' for float arrays because 'f8' arrays generated by
    # rng.random.randn don't seem to be bit-identical on 32bit and
    # 64bit machines.
    to_hash_list = [
        rng.randint(-1000, high=1000, size=50).astype('<i8'),
        tuple(rng.randn(3).astype('<f4') for _ in range(5)),
        [rng.randn(3).astype('<f4') for _ in range(5)],
        {
            -3333: rng.randn(3, 5).astype('<f4'),
            0: [
                rng.randint(10, size=20).astype('<i8'),
                rng.randn(10).astype('<f4')
            ]
        },
        # Non regression cases for https://github.com/joblib/joblib/issues/308.
        # Generated with joblib 0.9.4.
        np.arange(100, dtype='<i8').reshape((10, 10)),
        # Fortran contiguous array
        np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))),
        # Non contiguous array
        np.arange(100, dtype='<i8').reshape((10, 10))[:, :2],
    ]

    # These expected results have been generated with joblib 0.9.0
    expected_dict = {'py2': ['80f2387e7752abbda2658aafed49e086',
                             '0d700f7f25ea670fd305e4cd93b0e8cd',
                             '83a2bdf843e79e4b3e26521db73088b9',
                             '63e0efd43c0a9ad92a07e8ce04338dd3',
                             '03fef702946b602c852b8b4e60929914',
                             '07074691e90d7098a85956367045c81e',
                             'd264cf79f353aa7bbfa8349e3df72d8f'],
                     'py3': ['10a6afc379ca2708acfbaef0ab676eab',
                             '988a7114f337f381393025911ebc823b',
                             'c6809f4b97e35f2fa0ee8d653cbd025c',
                             'b3ad17348e32728a7eb9cda1e7ede438',
                             '927b3e6b0b6a037e8e035bda134e0b05',
                             '108f6ee98e7db19ea2006ffd208f4bf1',
                             'bd48ccaaff28e16e6badee81041b7180']}

    py_version_str = 'py3' if PY3_OR_LATER else 'py2'
    expected_list = expected_dict[py_version_str]

    for to_hash, expected in zip(to_hash_list, expected_list):
        yield assert_equal, hash(to_hash), expected
示例#56
0
def test_trival_hash():
    """ Smoke test hash on various types.
    """
    obj_list = [1, 2, 1., 2., 1 + 1j, 2. + 1j,
                'a', 'b',
                (1, ), (1, 1, ), [1, ], [1, 1, ],
                {1: 1}, {1: 2}, {2: 1},
                None,
                gc.collect,
                [1, ].append,
                # Next 2 sets have unorderable elements in python 3.
                set(('a', 1)),
                set(('a', 1, ('a', 1))),
                # Next 2 dicts have unorderable type of keys in python 3.
                {'a': 1, 1: 2},
                {'a': 1, 1: 2, 'd': {'a': 1}},
                ]
    for obj1 in obj_list:
        for obj2 in obj_list:
            # Check that 2 objects have the same hash only if they are
            # the same.
            yield assert_equal, hash(obj1) == hash(obj2), obj1 is obj2
示例#57
0
def test_hashes_stay_the_same_with_numpy_objects():
    # We want to make sure that hashes don't change with joblib
    # version. For end users, that would mean that they have to
    # regenerate their cache from scratch, which potentially means
    # lengthy recomputations.
    rng = np.random.RandomState(42)
    # Being explicit about dtypes in order to avoid
    # architecture-related differences. Also using 'f4' rather than
    # 'f8' for float arrays because 'f8' arrays generated by
    # rng.random.randn don't seem to be bit-identical on 32bit and
    # 64bit machines.

    to_hash_list = [
        rng.randint(-1000, high=1000, size=50).astype('<i8'),
        tuple(rng.randn(3).astype('<f4') for _ in range(5)),
        [rng.randn(3).astype('<f4') for _ in range(5)],
        {
            -3333:
            rng.randn(3, 5).astype('<f4'),
            0: [
                rng.randint(10, size=20).astype('<i8'),
                rng.randn(10).astype('<f4')
            ]
        },
        (lambda: 1) if sys.version[:3] in ('2.7', '3.4', '3.5') else None,
    ]

    # These expected results have been generated with joblib 0.9.0
    expected_dict = {
        'py2': [
            '80f2387e7752abbda2658aafed49e086',
            '0d700f7f25ea670fd305e4cd93b0e8cd',
            '83a2bdf843e79e4b3e26521db73088b9',
            '63e0efd43c0a9ad92a07e8ce04338dd3',
            '019da8de01773a6eb314174b9cbb30ee'
        ],
        'py3': [
            '10a6afc379ca2708acfbaef0ab676eab',
            '988a7114f337f381393025911ebc823b',
            'c6809f4b97e35f2fa0ee8d653cbd025c',
            'b3ad17348e32728a7eb9cda1e7ede438',
            '660d98403f0771ee4093f3c781042181'
        ]
    }

    py_version_str = 'py3' if PY3 else 'py2'
    expected_list = expected_dict[py_version_str]

    for to_hash, expected in zip(to_hash_list, expected_list):
        if to_hash is not None:
            yield assert_equal, hash(to_hash), expected
示例#58
0
def test_set_hash():
    # Check that sets hash consistently, eventhough their ordering
    # is not garanteed
    k = KlassWithCachedMethod()

    s = set(['#s12069__c_maps.nii.gz',
             '#s12158__c_maps.nii.gz',
             '#s12258__c_maps.nii.gz',
             '#s12277__c_maps.nii.gz',
             '#s12300__c_maps.nii.gz',
             '#s12401__c_maps.nii.gz',
             '#s12430__c_maps.nii.gz',
             '#s13817__c_maps.nii.gz',
             '#s13903__c_maps.nii.gz',
             '#s13916__c_maps.nii.gz',
             '#s13981__c_maps.nii.gz',
             '#s13982__c_maps.nii.gz',
             '#s13983__c_maps.nii.gz'])

    a = k.f(s)
    b = k.f(a)

    nose.tools.assert_equal(hash(a), hash(b))
示例#59
0
def test_set_hash(tmpdir_path):
    # Check that sets hash consistently, even though their ordering
    # is not guaranteed
    k = KlassWithCachedMethod(tmpdir_path)

    s = set(['#s12069__c_maps.nii.gz',
             '#s12158__c_maps.nii.gz',
             '#s12258__c_maps.nii.gz',
             '#s12277__c_maps.nii.gz',
             '#s12300__c_maps.nii.gz',
             '#s12401__c_maps.nii.gz',
             '#s12430__c_maps.nii.gz',
             '#s13817__c_maps.nii.gz',
             '#s13903__c_maps.nii.gz',
             '#s13916__c_maps.nii.gz',
             '#s13981__c_maps.nii.gz',
             '#s13982__c_maps.nii.gz',
             '#s13983__c_maps.nii.gz'])

    a = k.f(s)
    b = k.f(a)

    assert hash(a) == hash(b)
示例#60
0
def test_dict_hash(tmpdir_path):
    # Check that dictionaries hash consistently, eventhough the ordering
    # of the keys is not garanteed
    k = KlassWithCachedMethod(tmpdir_path)

    d = {'#s12069__c_maps.nii.gz': [33],
         '#s12158__c_maps.nii.gz': [33],
         '#s12258__c_maps.nii.gz': [33],
         '#s12277__c_maps.nii.gz': [33],
         '#s12300__c_maps.nii.gz': [33],
         '#s12401__c_maps.nii.gz': [33],
         '#s12430__c_maps.nii.gz': [33],
         '#s13817__c_maps.nii.gz': [33],
         '#s13903__c_maps.nii.gz': [33],
         '#s13916__c_maps.nii.gz': [33],
         '#s13981__c_maps.nii.gz': [33],
         '#s13982__c_maps.nii.gz': [33],
         '#s13983__c_maps.nii.gz': [33]}

    a = k.f(d)
    b = k.f(a)

    assert hash(a) == hash(b)