def test_hashes_are_different_between_c_and_fortran_contiguous_arrays(): # We want to be sure that the c-contiguous and f-contiguous versions of the # same array produce 2 different hashes. rng = np.random.RandomState(0) arr_c = rng.random_sample((10, 10)) arr_f = np.asfortranarray(arr_c) assert hash(arr_c) != hash(arr_f)
def test_hash_object_dtype(): """ Make sure that ndarrays with dtype `object' hash correctly.""" a = np.array([np.arange(i) for i in range(6)], dtype=object) b = np.array([np.arange(i) for i in range(6)], dtype=object) nose.tools.assert_equal(hash(a), hash(b))
def test_hash_object_dtype(): """ Make sure that ndarrays with dtype `object' hash correctly.""" a = np.array([np.arange(i) for i in range(6)], dtype=object) b = np.array([np.arange(i) for i in range(6)], dtype=object) assert hash(a) == hash(b)
def test_hash_numpy_noncontiguous(): a = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F')[:, :1, :] b = np.ascontiguousarray(a) assert hash(a) != hash(b) c = np.asfortranarray(a) assert hash(a) != hash(c)
def test_hash_methods(): # Check that hashing instance methods works a = io.StringIO(unicode('a')) assert hash(a.flush) == hash(a.flush) a1 = collections.deque(range(10)) a2 = collections.deque(range(9)) assert hash(a1.extend) != hash(a2.extend)
def test_hash_numpy_noncontiguous(): a = np.asarray(np.arange(6000).reshape((1000, 2, 3)), order='F')[:, :1, :] b = np.ascontiguousarray(a) nose.tools.assert_not_equal(hash(a), hash(b)) c = np.asfortranarray(a) nose.tools.assert_not_equal(hash(a), hash(c))
def test_set_decimal_hash(): # Check that sets containing decimals hash consistently, even though # ordering is not guaranteed nose.tools.assert_equal(hash(set([Decimal(0), Decimal('NaN')])), hash(set([Decimal('NaN'), Decimal(0)])))
def test_trival_hash(): """ Smoke test hash on various types. """ obj_list = [ 1, 2, 1., 2., 1 + 1j, 2. + 1j, 'a', 'b', (1, ), ( 1, 1, ), [ 1, ], [ 1, 1, ], { 1: 1 }, { 1: 2 }, { 2: 1 }, None, gc.collect, [ 1, ].append, # Next 2 sets have unorderable elements in python 3. set(('a', 1)), set(('a', 1, ('a', 1))), # Next 2 dicts have unorderable type of keys in python 3. { 'a': 1, 1: 2 }, { 'a': 1, 1: 2, 'd': { 'a': 1 } }, ] for obj1 in obj_list: for obj2 in obj_list: # Check that 2 objects have the same hash only if they are # the same. yield nose.tools.assert_equal, hash(obj1) == hash(obj2), \ obj1 is obj2
def test_dict_hash(): # Check that dictionaries hash consistently, eventhough the ordering # of the keys is not garanteed k = KlassWithCachedMethod() d = { '#s12069__c_maps.nii.gz': [33], '#s12158__c_maps.nii.gz': [33], '#s12258__c_maps.nii.gz': [33], '#s12277__c_maps.nii.gz': [33], '#s12300__c_maps.nii.gz': [33], '#s12401__c_maps.nii.gz': [33], '#s12430__c_maps.nii.gz': [33], '#s13817__c_maps.nii.gz': [33], '#s13903__c_maps.nii.gz': [33], '#s13916__c_maps.nii.gz': [33], '#s13981__c_maps.nii.gz': [33], '#s13982__c_maps.nii.gz': [33], '#s13983__c_maps.nii.gz': [33] } a = k.f(d) b = k.f(a) nose.tools.assert_equal(hash(a), hash(b))
def test_hashing_pickling_error(): def non_picklable(): return 42 with raises(pickle.PicklingError) as excinfo: hash(non_picklable) excinfo.match('PicklingError while hashing')
def test_dict_hash(tmpdir): # Check that dictionaries hash consistently, eventhough the ordering # of the keys is not garanteed k = KlassWithCachedMethod(tmpdir.strpath) d = { '#s12069__c_maps.nii.gz': [33], '#s12158__c_maps.nii.gz': [33], '#s12258__c_maps.nii.gz': [33], '#s12277__c_maps.nii.gz': [33], '#s12300__c_maps.nii.gz': [33], '#s12401__c_maps.nii.gz': [33], '#s12430__c_maps.nii.gz': [33], '#s13817__c_maps.nii.gz': [33], '#s13903__c_maps.nii.gz': [33], '#s13916__c_maps.nii.gz': [33], '#s13981__c_maps.nii.gz': [33], '#s13982__c_maps.nii.gz': [33], '#s13983__c_maps.nii.gz': [33] } a = k.f(d) b = k.f(a) assert hash(a) == hash(b)
def test_hash_methods(): # Check that hashing instance methods works a = io.StringIO(unicode('a')) nose.tools.assert_equal(hash(a.flush), hash(a.flush)) a1 = collections.deque(range(10)) a2 = collections.deque(range(9)) nose.tools.assert_not_equal(hash(a1.extend), hash(a2.extend))
def test_hash_memmap(): """ Check that memmap and arrays hash identically if coerce_mmap is True. """ filename = tempfile.mktemp(prefix='joblib_test_hash_memmap_') try: m = np.memmap(filename, shape=(10, 10), mode='w+') a = np.asarray(m) for coerce_mmap in (False, True): yield (assert_equal, hash(a, coerce_mmap=coerce_mmap) == hash(m, coerce_mmap=coerce_mmap), coerce_mmap) finally: if 'm' in locals(): del m # Force a garbage-collection cycle, to be certain that the # object is delete, and we don't run in a problem under # Windows with a file handle still open. gc.collect() try: os.unlink(filename) except OSError as e: # Under windows, some files don't get erased. if not os.name == 'nt': raise e
def test_hash_memmap(): """ Check that memmap and arrays hash identically if coerce_mmap is True. """ filename = tempfile.mktemp(prefix='joblib_test_hash_memmap_') try: m = np.memmap(filename, shape=(10, 10), mode='w+') a = np.asarray(m) for coerce_mmap in (False, True): yield (nose.tools.assert_equal, hash(a, coerce_mmap=coerce_mmap) == hash( m, coerce_mmap=coerce_mmap), coerce_mmap) finally: if 'm' in locals(): del m # Force a garbage-collection cycle, to be certain that the # object is delete, and we don't run in a problem under # Windows with a file handle still open. gc.collect() try: os.unlink(filename) except OSError as e: # Under windows, some files don't get erased. if not os.name == 'nt': raise e
def test_string(): # Test that we obtain the same hash for object owning several strings, # whatever the past of these strings (which are immutable in Python) string = 'foo' a = {string: 'bar'} b = {string: 'bar'} c = pickle.loads(pickle.dumps(b)) assert hash([a, b]) == hash([a, c])
def test_bound_methods_hash(): """ Make sure that calling the same method on two different instances of the same class does resolve to the same hashes. """ a = Klass() b = Klass() assert (hash(filter_args(a.f, [], (1, ))) == hash(filter_args(b.f, [], (1, ))))
def test_bound_cached_methods_hash(): """ Make sure that calling the same _cached_ method on two different instances of the same class does resolve to the same hashes. """ a = KlassWithCachedMethod() b = KlassWithCachedMethod() nose.tools.assert_equal(hash(filter_args(a.f.func, [], (1, ))), hash(filter_args(b.f.func, [], (1, ))))
def test_bound_cached_methods_hash(tmpdir_path): """ Make sure that calling the same _cached_ method on two different instances of the same class does resolve to the same hashes. """ a = KlassWithCachedMethod(tmpdir_path) b = KlassWithCachedMethod(tmpdir_path) assert (hash(filter_args(a.f.func, [], (1, ))) == hash(filter_args(b.f.func, [], (1, ))))
def predict(self, X): h1 = hashing.hash(X.todense()) h1 = ''.join([str(ord(x)) for x in h1]) h2 = hashing.hash(self.train_data.todense()) h2 = ''.join([str(ord(x)) for x in h2]) return hash(h1 + h2)
def test_bound_cached_methods_hash(tmpdir): """ Make sure that calling the same _cached_ method on two different instances of the same class does resolve to the same hashes. """ a = KlassWithCachedMethod(tmpdir.strpath) b = KlassWithCachedMethod(tmpdir.strpath) assert (hash(filter_args(a.f.func, [], (1, ))) == hash(filter_args(b.f.func, [], (1, ))))
def test_dtype(): # Test that we obtain the same hash for object owning several dtype, # whatever the past of these dtypes. Catter for cache invalidation with # complex dtype a = np.dtype([('f1', np.uint), ('f2', np.int32)]) b = a c = pickle.loads(pickle.dumps(a)) assert hash([a, c]) == hash([a, b])
def test_hash_numpy_arrays(three_np_arrays): arr1, arr2, arr3 = three_np_arrays for obj1, obj2 in itertools.product(three_np_arrays, repeat=2): are_hashes_equal = hash(obj1) == hash(obj2) are_arrays_equal = np.all(obj1 == obj2) assert are_hashes_equal == are_arrays_equal assert hash(arr1) != hash(arr1.T)
def test_hash_numpy_dict_of_arrays(three_np_arrays): arr1, arr2, arr3 = three_np_arrays d1 = {1: arr1, 2: arr2} d2 = {1: arr2, 2: arr1} d3 = {1: arr2, 2: arr3} assert hash(d1) == hash(d2) assert hash(d1) != hash(d3)
def test_numpy_datetime_array(): # memoryview is not supported for some dtypes e.g. datetime64 # see https://github.com/joblib/joblib/issues/188 for more details dtypes = ['datetime64[s]', 'timedelta64[D]'] a_hash = hash(np.arange(10)) arrays = (np.arange(0, 10, dtype=dtype) for dtype in dtypes) for array in arrays: nose.tools.assert_not_equal(hash(array), a_hash)
def test_numpy_datetime_array(): # memoryview is not supported for some dtypes e.g. datetime64 # see https://github.com/joblib/joblib/issues/188 for more details dtypes = ['datetime64[s]', 'timedelta64[D]'] a_hash = hash(np.arange(10)) arrays = (np.arange(0, 10, dtype=dtype) for dtype in dtypes) for array in arrays: assert hash(array) != a_hash
def test_hash_memmap(tmpdir, coerce_mmap): """Check that memmap and arrays hash identically if coerce_mmap is True.""" filename = tmpdir.join('memmap_temp').strpath try: m = np.memmap(filename, shape=(10, 10), mode='w+') a = np.asarray(m) are_hashes_equal = (hash(a, coerce_mmap=coerce_mmap) == hash( m, coerce_mmap=coerce_mmap)) assert are_hashes_equal == coerce_mmap finally: if 'm' in locals(): del m # Force a garbage-collection cycle, to be certain that the # object is delete, and we don't run in a problem under # Windows with a file handle still open. gc.collect()
def test_hashes_stay_the_same(): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. rng = random.Random(42) to_hash_list = ['This is a string to hash', u"C'est l\xe9t\xe9", (123456, 54321, -98765), [rng.random() for _ in range(5)], [3, 'abc', None, MyClass(1, 2)], {'abcde': 123, 'sadfas': [-9999, 2, 3]}] # These expected results have been generated with joblib 0.9.2 expected_dict = { 'py2': ['80436ada343b0d79a99bfd8883a96e45', '2ff3a25200eb6219f468de2640913c2d', '50d81c80af05061ac4dcdc2d5edee6d6', '536af09b66a087ed18b515acc17dc7fc', 'b5547baee3f205fb763e8a97c130c054', 'fc9314a39ff75b829498380850447047'], 'py3': ['71b3f47df22cb19431d85d92d0b230b2', '2d8d189e9b2b0b2e384d93c868c0e576', 'e205227dd82250871fa25aa0ec690aa3', '9e4e9bf9b91890c9734a6111a35e6633', '731fafc4405a6c192c0a85a58c9e7a93', 'aeda150553d4bb5c69f0e69d51b0e2ef']} py_version_str = 'py3' if PY3 else 'py2' expected_list = expected_dict[py_version_str] for to_hash, expected in zip(to_hash_list, expected_list): yield assert_equal, hash(to_hash), expected
def test_hashes_stay_the_same(): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. rng = random.Random(42) to_hash_list = ['This is a string to hash', u"C'est l\xe9t\xe9", (123456, 54321, -98765), [rng.random() for _ in range(5)], [3, 'abc', None, TransportableException('the message', ValueError)], {'abcde': 123, 'sadfas': [-9999, 2, 3]}] # These expected results have been generated with joblib 0.9.2 expected_dict = { 'py2': ['80436ada343b0d79a99bfd8883a96e45', '2ff3a25200eb6219f468de2640913c2d', '50d81c80af05061ac4dcdc2d5edee6d6', '536af09b66a087ed18b515acc17dc7fc', '123ffc6f13480767167e171a8e1f6f4a', 'fc9314a39ff75b829498380850447047'], 'py3': ['71b3f47df22cb19431d85d92d0b230b2', '2d8d189e9b2b0b2e384d93c868c0e576', 'e205227dd82250871fa25aa0ec690aa3', '9e4e9bf9b91890c9734a6111a35e6633', '6065a3c48e842ea5dee2cfd0d6820ad6', 'aeda150553d4bb5c69f0e69d51b0e2ef']} py_version_str = 'py3' if PY3 else 'py2' expected_list = expected_dict[py_version_str] for to_hash, expected in zip(to_hash_list, expected_list): yield assert_equal, hash(to_hash), expected
def test_hashes_stay_the_same(): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. rng = random.Random(42) to_hash_list = ['This is a string to hash', u"C'est l\xe9t\xe9", (123456, 54321, -98765), [rng.random() for _ in range(5)], [3, 'abc', None, TransportableException('the message', ValueError)], {'abcde': 123, 'sadfas': [-9999, 2, 3]}] # These expected results have been generated with joblib 0.9.2 expected_dict = { 'py2': ['80436ada343b0d79a99bfd8883a96e45', '2ff3a25200eb6219f468de2640913c2d', '50d81c80af05061ac4dcdc2d5edee6d6', '536af09b66a087ed18b515acc17dc7fc', '123ffc6f13480767167e171a8e1f6f4a', 'fc9314a39ff75b829498380850447047'], 'py3': ['71b3f47df22cb19431d85d92d0b230b2', '2d8d189e9b2b0b2e384d93c868c0e576', 'e205227dd82250871fa25aa0ec690aa3', '9e4e9bf9b91890c9734a6111a35e6633', '6065a3c48e842ea5dee2cfd0d6820ad6', 'aeda150553d4bb5c69f0e69d51b0e2ef']} py_version_str = 'py3' if PY3_OR_LATER else 'py2' expected_list = expected_dict[py_version_str] for to_hash, expected in zip(to_hash_list, expected_list): yield assert_equal, hash(to_hash), expected
def test_hashes_stay_the_same(to_hash, expected): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. # Expected results have been generated with joblib 0.9.2 assert hash(to_hash) == expected
def test_hash_memmap(tmpdir, coerce_mmap): """Check that memmap and arrays hash identically if coerce_mmap is True.""" filename = tmpdir.join('memmap_temp').strpath try: m = np.memmap(filename, shape=(10, 10), mode='w+') a = np.asarray(m) are_hashes_equal = (hash(a, coerce_mmap=coerce_mmap) == hash(m, coerce_mmap=coerce_mmap)) assert are_hashes_equal == coerce_mmap finally: if 'm' in locals(): del m # Force a garbage-collection cycle, to be certain that the # object is delete, and we don't run in a problem under # Windows with a file handle still open. gc.collect()
def test_hashes_stay_the_same_with_numpy_objects(): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. rng = np.random.RandomState(42) # Being explicit about dtypes in order to avoid # architecture-related differences. Also using 'f4' rather than # 'f8' for float arrays because 'f8' arrays generated by # rng.random.randn don't seem to be bit-identical on 32bit and # 64bit machines. to_hash_list = [ rng.randint(-1000, high=1000, size=50).astype('<i8'), tuple(rng.randn(3).astype('<f4') for _ in range(5)), [rng.randn(3).astype('<f4') for _ in range(5)], { -3333: rng.randn(3, 5).astype('<f4'), 0: [ rng.randint(10, size=20).astype('<i8'), rng.randn(10).astype('<f4') ] }, # Non regression cases for https://github.com/joblib/joblib/issues/308. # Generated with joblib 0.9.4. np.arange(100, dtype='<i8').reshape((10, 10)), # Fortran contiguous array np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))), # Non contiguous array np.arange(100, dtype='<i8').reshape((10, 10))[:, :2], ] # These expected results have been generated with joblib 0.9.0 expected_dict = { 'py2': [ '80f2387e7752abbda2658aafed49e086', '0d700f7f25ea670fd305e4cd93b0e8cd', '83a2bdf843e79e4b3e26521db73088b9', '63e0efd43c0a9ad92a07e8ce04338dd3', '03fef702946b602c852b8b4e60929914', '07074691e90d7098a85956367045c81e', 'd264cf79f353aa7bbfa8349e3df72d8f' ], 'py3': [ '10a6afc379ca2708acfbaef0ab676eab', '988a7114f337f381393025911ebc823b', 'c6809f4b97e35f2fa0ee8d653cbd025c', 'b3ad17348e32728a7eb9cda1e7ede438', '927b3e6b0b6a037e8e035bda134e0b05', '108f6ee98e7db19ea2006ffd208f4bf1', 'bd48ccaaff28e16e6badee81041b7180' ] } py_version_str = 'py3' if PY3_OR_LATER else 'py2' expected_list = expected_dict[py_version_str] for to_hash, expected in zip(to_hash_list, expected_list): assert hash(to_hash) == expected
def test_trival_hash(): """ Smoke test hash on various types. """ obj_list = [1, 2, 1., 2., 1 + 1j, 2. + 1j, 'a', 'b', (1, ), (1, 1, ), [1, ], [1, 1, ], {1: 1}, {1: 2}, {2: 1}, None, gc.collect, [1, ].append, ] for obj1 in obj_list: for obj2 in obj_list: # Check that 2 objects have the same hash only if they are # the same. yield nose.tools.assert_equal, hash(obj1) == hash(obj2), \ obj1 is obj2
def test_hashes_stay_the_same(to_hash, expected): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. # Expected results have been generated with joblib 0.9.2 py_version_str = 'py3' if PY3_OR_LATER else 'py2' assert hash(to_hash) == expected[py_version_str]
def test_set_hash(): # Check that sets hash consistently, even though their ordering # is not guaranteed k = KlassWithCachedMethod() s = set([ '#s12069__c_maps.nii.gz', '#s12158__c_maps.nii.gz', '#s12258__c_maps.nii.gz', '#s12277__c_maps.nii.gz', '#s12300__c_maps.nii.gz', '#s12401__c_maps.nii.gz', '#s12430__c_maps.nii.gz', '#s13817__c_maps.nii.gz', '#s13903__c_maps.nii.gz', '#s13916__c_maps.nii.gz', '#s13981__c_maps.nii.gz', '#s13982__c_maps.nii.gz', '#s13983__c_maps.nii.gz' ]) a = k.f(s) b = k.f(a) nose.tools.assert_equal(hash(a), hash(b))
def test_set_hash(tmpdir): # Check that sets hash consistently, even though their ordering # is not guaranteed k = KlassWithCachedMethod(tmpdir.strpath) s = set([ '#s12069__c_maps.nii.gz', '#s12158__c_maps.nii.gz', '#s12258__c_maps.nii.gz', '#s12277__c_maps.nii.gz', '#s12300__c_maps.nii.gz', '#s12401__c_maps.nii.gz', '#s12430__c_maps.nii.gz', '#s13817__c_maps.nii.gz', '#s13903__c_maps.nii.gz', '#s13916__c_maps.nii.gz', '#s13981__c_maps.nii.gz', '#s13982__c_maps.nii.gz', '#s13983__c_maps.nii.gz' ]) a = k.f(s) b = k.f(a) assert hash(a) == hash(b)
def _fast_hash(obj): """ Returns hash of an arbitrary Python object. Works for numpy arrays, pandas objects, custom classes and functions. If an object doesn't support hashing natively, use md5-based ``joblib.hashing.hash()``, otherwise use the standard ``hash()`` function for the sake of performance. """ try: return hash(obj) except: return hashing.hash(obj)
def test_memory_pickle_dump_load(tmpdir, memory_kwargs): memory = Memory(location=tmpdir.strpath, **memory_kwargs) memory_reloaded = pickle.loads(pickle.dumps(memory)) # Compare Memory instance before and after pickle roundtrip compare(memory.store_backend, memory_reloaded.store_backend) compare(memory, memory_reloaded, ignored_attrs=set(['store_backend', 'timestamp'])) assert hash(memory) == hash(memory_reloaded) func_cached = memory.cache(f) func_cached_reloaded = pickle.loads(pickle.dumps(func_cached)) # Compare MemorizedFunc instance before/after pickle roundtrip compare(func_cached.store_backend, func_cached_reloaded.store_backend) compare(func_cached, func_cached_reloaded, ignored_attrs=set(['store_backend', 'timestamp'])) assert hash(func_cached) == hash(func_cached_reloaded) # Compare MemorizedResult instance before/after pickle roundtrip memorized_result = func_cached.call_and_shelve(1) memorized_result_reloaded = pickle.loads(pickle.dumps(memorized_result)) compare(memorized_result.store_backend, memorized_result_reloaded.store_backend) compare(memorized_result, memorized_result_reloaded, ignored_attrs=set(['store_backend', 'timestamp'])) assert hash(memorized_result) == hash(memorized_result_reloaded)
def test_hashes_stay_the_same_with_numpy_objects(): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. rng = np.random.RandomState(42) # Being explicit about dtypes in order to avoid # architecture-related differences. Also using 'f4' rather than # 'f8' for float arrays because 'f8' arrays generated by # rng.random.randn don't seem to be bit-identical on 32bit and # 64bit machines. to_hash_list = [ rng.randint(-1000, high=1000, size=50).astype('<i8'), tuple(rng.randn(3).astype('<f4') for _ in range(5)), [rng.randn(3).astype('<f4') for _ in range(5)], { -3333: rng.randn(3, 5).astype('<f4'), 0: [ rng.randint(10, size=20).astype('<i8'), rng.randn(10).astype('<f4') ] }, # Non regression cases for https://github.com/joblib/joblib/issues/308. # Generated with joblib 0.9.4. np.arange(100, dtype='<i8').reshape((10, 10)), # Fortran contiguous array np.asfortranarray(np.arange(100, dtype='<i8').reshape((10, 10))), # Non contiguous array np.arange(100, dtype='<i8').reshape((10, 10))[:, :2], ] # These expected results have been generated with joblib 0.9.0 expected_dict = {'py2': ['80f2387e7752abbda2658aafed49e086', '0d700f7f25ea670fd305e4cd93b0e8cd', '83a2bdf843e79e4b3e26521db73088b9', '63e0efd43c0a9ad92a07e8ce04338dd3', '03fef702946b602c852b8b4e60929914', '07074691e90d7098a85956367045c81e', 'd264cf79f353aa7bbfa8349e3df72d8f'], 'py3': ['10a6afc379ca2708acfbaef0ab676eab', '988a7114f337f381393025911ebc823b', 'c6809f4b97e35f2fa0ee8d653cbd025c', 'b3ad17348e32728a7eb9cda1e7ede438', '927b3e6b0b6a037e8e035bda134e0b05', '108f6ee98e7db19ea2006ffd208f4bf1', 'bd48ccaaff28e16e6badee81041b7180']} py_version_str = 'py3' if PY3_OR_LATER else 'py2' expected_list = expected_dict[py_version_str] for to_hash, expected in zip(to_hash_list, expected_list): yield assert_equal, hash(to_hash), expected
def test_trival_hash(): """ Smoke test hash on various types. """ obj_list = [1, 2, 1., 2., 1 + 1j, 2. + 1j, 'a', 'b', (1, ), (1, 1, ), [1, ], [1, 1, ], {1: 1}, {1: 2}, {2: 1}, None, gc.collect, [1, ].append, # Next 2 sets have unorderable elements in python 3. set(('a', 1)), set(('a', 1, ('a', 1))), # Next 2 dicts have unorderable type of keys in python 3. {'a': 1, 1: 2}, {'a': 1, 1: 2, 'd': {'a': 1}}, ] for obj1 in obj_list: for obj2 in obj_list: # Check that 2 objects have the same hash only if they are # the same. yield assert_equal, hash(obj1) == hash(obj2), obj1 is obj2
def test_hashes_stay_the_same_with_numpy_objects(): # We want to make sure that hashes don't change with joblib # version. For end users, that would mean that they have to # regenerate their cache from scratch, which potentially means # lengthy recomputations. rng = np.random.RandomState(42) # Being explicit about dtypes in order to avoid # architecture-related differences. Also using 'f4' rather than # 'f8' for float arrays because 'f8' arrays generated by # rng.random.randn don't seem to be bit-identical on 32bit and # 64bit machines. to_hash_list = [ rng.randint(-1000, high=1000, size=50).astype('<i8'), tuple(rng.randn(3).astype('<f4') for _ in range(5)), [rng.randn(3).astype('<f4') for _ in range(5)], { -3333: rng.randn(3, 5).astype('<f4'), 0: [ rng.randint(10, size=20).astype('<i8'), rng.randn(10).astype('<f4') ] }, (lambda: 1) if sys.version[:3] in ('2.7', '3.4', '3.5') else None, ] # These expected results have been generated with joblib 0.9.0 expected_dict = { 'py2': [ '80f2387e7752abbda2658aafed49e086', '0d700f7f25ea670fd305e4cd93b0e8cd', '83a2bdf843e79e4b3e26521db73088b9', '63e0efd43c0a9ad92a07e8ce04338dd3', '019da8de01773a6eb314174b9cbb30ee' ], 'py3': [ '10a6afc379ca2708acfbaef0ab676eab', '988a7114f337f381393025911ebc823b', 'c6809f4b97e35f2fa0ee8d653cbd025c', 'b3ad17348e32728a7eb9cda1e7ede438', '660d98403f0771ee4093f3c781042181' ] } py_version_str = 'py3' if PY3 else 'py2' expected_list = expected_dict[py_version_str] for to_hash, expected in zip(to_hash_list, expected_list): if to_hash is not None: yield assert_equal, hash(to_hash), expected
def test_set_hash(): # Check that sets hash consistently, eventhough their ordering # is not garanteed k = KlassWithCachedMethod() s = set(['#s12069__c_maps.nii.gz', '#s12158__c_maps.nii.gz', '#s12258__c_maps.nii.gz', '#s12277__c_maps.nii.gz', '#s12300__c_maps.nii.gz', '#s12401__c_maps.nii.gz', '#s12430__c_maps.nii.gz', '#s13817__c_maps.nii.gz', '#s13903__c_maps.nii.gz', '#s13916__c_maps.nii.gz', '#s13981__c_maps.nii.gz', '#s13982__c_maps.nii.gz', '#s13983__c_maps.nii.gz']) a = k.f(s) b = k.f(a) nose.tools.assert_equal(hash(a), hash(b))
def test_set_hash(tmpdir_path): # Check that sets hash consistently, even though their ordering # is not guaranteed k = KlassWithCachedMethod(tmpdir_path) s = set(['#s12069__c_maps.nii.gz', '#s12158__c_maps.nii.gz', '#s12258__c_maps.nii.gz', '#s12277__c_maps.nii.gz', '#s12300__c_maps.nii.gz', '#s12401__c_maps.nii.gz', '#s12430__c_maps.nii.gz', '#s13817__c_maps.nii.gz', '#s13903__c_maps.nii.gz', '#s13916__c_maps.nii.gz', '#s13981__c_maps.nii.gz', '#s13982__c_maps.nii.gz', '#s13983__c_maps.nii.gz']) a = k.f(s) b = k.f(a) assert hash(a) == hash(b)
def test_dict_hash(tmpdir_path): # Check that dictionaries hash consistently, eventhough the ordering # of the keys is not garanteed k = KlassWithCachedMethod(tmpdir_path) d = {'#s12069__c_maps.nii.gz': [33], '#s12158__c_maps.nii.gz': [33], '#s12258__c_maps.nii.gz': [33], '#s12277__c_maps.nii.gz': [33], '#s12300__c_maps.nii.gz': [33], '#s12401__c_maps.nii.gz': [33], '#s12430__c_maps.nii.gz': [33], '#s13817__c_maps.nii.gz': [33], '#s13903__c_maps.nii.gz': [33], '#s13916__c_maps.nii.gz': [33], '#s13981__c_maps.nii.gz': [33], '#s13982__c_maps.nii.gz': [33], '#s13983__c_maps.nii.gz': [33]} a = k.f(d) b = k.f(a) assert hash(a) == hash(b)