def test_migrate_unhashed_name(self): store = OmegaStore(bucket='foo', prefix='foo/') df = pd.DataFrame({'x': range(100)}) long_name = 'a' * 10 raised = False error = '' # save as unhashed (old version) store.defaults.OMEGA_STORE_HASHEDNAMES = False meta_unhashed = store.put(df, long_name) # simulate upgrade, no migration store.defaults.OMEGA_STORE_HASHEDNAMES = True # check we can still retrieve dfx = store.get(long_name) assert_frame_equal(df, dfx) # migrate store.defaults.OMEGA_STORE_HASHEDNAMES = True migrate_unhashed_datasets(store) meta_migrated = store.metadata(long_name) # check we can still retrieve after migration dfx = store.get(long_name) assert_frame_equal(df, dfx) # stored hashed meta_hashed = store.put(df, long_name, append=False) # check migration worked as expected self.assertNotEqual(meta_unhashed.collection, meta_hashed.collection) self.assertEqual(meta_migrated.collection, meta_hashed.collection)
def test_store_with_attributes(self): data = {'a': list(range(1, 10)), 'b': list(range(1, 10))} df = pd.DataFrame(data) store = OmegaStore() # store the object, no attributes meta = store.put(df, 'foo', append=False) meta = store.metadata('foo') self.assertEqual(meta.attributes, {}) # update attributes meta = store.put(df, 'foo', append=False, attributes={'foo': 'bar'}) meta = store.metadata('foo') self.assertEqual(meta.attributes, {'foo': 'bar'}) meta = store.put(df, 'foo', append=False, attributes={ 'foo': 'bax', 'foobar': 'barbar' }) meta = store.metadata('foo') self.assertEqual(meta.attributes, {'foo': 'bax', 'foobar': 'barbar'})
def test_long_dataset_name_hdf(self): store = OmegaStore(bucket='foo', prefix='foo/') df = pd.DataFrame({'xyz' * 100: range(100)}) # limited by index key limit in MongoDB # see https://docs.mongodb.com/manual/reference/limits/#Index-Key-Limit long_name = 'a' * 990 raised = False error = '' # hashed names store.defaults.OMEGA_STORE_HASHEDNAMES = True meta = store.put(df, long_name, as_hdf=True) meta = store.metadata(long_name) self.assertNotEqual(meta.gridfile.name, long_name) self.assertFalse(raised, error) # unhashed names store.defaults.OMEGA_STORE_HASHEDNAMES = False long_name = 'a' * 200 store.put(df, long_name, as_hdf=True) meta = store.metadata(long_name) self.assertEqual(meta.gridfile.name, store._get_obj_store_key(long_name, '.hdf'))
def test_store_dataframe_as_hdf(self): data = {'a': list(range(1, 10)), 'b': list(range(1, 10))} df = pd.DataFrame(data) store = OmegaStore() meta = store.put(df, 'foo', as_hdf=True) self.assertEqual(meta.kind, 'pandas.hdf') # make sure the hdf file is actually there meta = store.metadata('foo') self.assertIn(meta.gridfile.name, store.fs.list()) df2 = store.get('foo') self.assertTrue(df.equals(df2), "dataframes differ") # test for non-existent file raises exception meta = store.put(df2, 'foo_will_be_removed', as_hdf=True) meta = store.metadata('foo_will_be_removed') file_id = store.fs.get_last_version(meta.gridfile.name)._id store.fs.delete(file_id) store2 = OmegaStore() with self.assertRaises(gridfs.errors.NoFile): store2.get('foo_will_be_removed') # test hdf file is not there self.assertNotIn('hdfdf.hdf', store2.fs.list())
def test_migrate_unhashed_name_hdf(self): store = OmegaStore(bucket='foo', prefix='foo/') df = pd.DataFrame({'x': range(100)}) long_name = 'a' * 10 raised = False error = '' # save as unhashed (old version) store.defaults.OMEGA_STORE_HASHEDNAMES = False store.put(df, long_name, as_hdf=True) meta_unhashed = store.metadata(long_name) # retrieve should still work store.defaults.OMEGA_STORE_HASHEDNAMES = True dfx = store.get(long_name) assert_frame_equal(df, dfx) # stored hashed store.put(df, long_name, replace=True, as_hdf=True) meta_hashed = store.metadata(long_name) dfx = store.get(long_name) assert_frame_equal(df, dfx) # check hashing actually worked self.assertNotEqual(meta_unhashed.gridfile.name, meta_hashed.gridfile.name)
def test_store_metadata_notstrict(self): """ ensure Metadata attributes are not strictly checked this is to allow metadata extensions between omegaml versions """ om = OmegaStore(prefix='') # dict data = {'a': list(range(1, 10)), 'b': list(range(1, 10))} attributes = {'foo': 'bar'} meta = om.put(data, 'data', attributes=attributes) meta_collection = om.mongodb['metadata'] flt = {'name': 'data'} meta_entry = meta_collection.find_one(flt) meta_entry['modified_extra'] = meta_entry['modified'] meta_collection.replace_one(flt, meta_entry) try: meta = om.metadata('data') except FieldDoesNotExist: not_raised = False else: not_raised = True self.assertTrue(not_raised)