def test_max_revision_from_head(self): data = Data(name='my_variable', namespace='raw') data.update(data=1) with open(os.path.join('.vdata', 'raw', 'my_variable.head'), 'rb') as f: revision = pickle.load(f) self.assertEqual(revision, 0)
def test_update_file(self): """ Save a file from path. """ data = Data(name="job", namespace="raw") data.update(file_path='tests/tests.csv') self.assertTrue(os.path.isfile(os.path.join('.vdata', 'raw', 'job.0.csv')))
def test_update_is_variable_created(self): """ Check if a file is created when save is called. """ a = 42 data = Data(name='my_variable', namespace='raw') data.update(data=a) self.assertTrue(os.path.isfile(os.path.join('.vdata', 'raw', 'my_variable.0.vdata')))
def test_get_retrieve_specific_version(self): data = Data(name='my_variable', namespace='raw') data.update(data=10) data.update(data=100) data.update(data=1000) self.assertEqual(data.get(revision=1), 100)
def test_update_file_load_dataframe(self): """ Save CSV file and load in dataframe. """ data = Data(name="job", namespace="raw") data.update(file_path='tests/tests.csv') df = pd.read_csv(data.get()) a = df.iloc[0]['a'] self.assertTrue(a, 42)
def test_get_variable(self): """ Check if a file is created when save is called. """ a = 42 data = Data(name='my_variable', namespace='raw') data.update(data=a) del a a = data.get() self.assertEqual(a, 42)
def test_model_loading(self): model = RandomForestRegressor(random_state=42) MODEL = Data(name='my_model', namespace='model') X = [[random.randint(0, 100)] for _ in range(1000)] y = [np.multiply(x, 2) for x in X] model.fit(X, y) MODEL.update(model) del model model = MODEL.get() predict = model.predict([[1]]) self.assertEqual(predict, [2])
def test_model_saving(self): model = RandomForestRegressor(random_state=42) MODEL = Data(name='my_model', namespace='model') for _ in range(10): x = random.randint(0, 1000) y = x * 2 model.fit([[x]], [y]) MODEL.update(model) self.assertTrue( os.path.isfile(os.path.join('.vdata', 'model', 'my_model.0.vdata'))) self.assertTrue( os.path.isfile(os.path.join('.vdata', 'model', 'my_model.9.vdata')))
def test_get_max_version(self): """ Check if we are able to get max version of Data """ data = Data(name='my_variable', namespace='raw') data.update(data=10) data.update(data=100) self.assertEqual(data.get_max_version(), 1)
def test_get_versions(self): """ Check if we are able to get versions of Data """ data = Data(name='my_variable', namespace='raw') data.update(data=10) data.update(data=100) self.assertEqual(data.get_versions(), [0, 1])
def test_save_file_ext_and_reload(self): data = Data(name="job", namespace="raw") data.update(file_path='tests/tests.csv') del data data = Data(name="job", namespace="raw") df = pd.read_csv(data.get()) a = df.iloc[0]['a'] self.assertTrue(a, 42)
from vdata import Data import time N = 50_000 SUM_UPDATE = 0 SUM_GET = 0 # instantiate a virtual data variable = Data(name='variable', namespace='raw') for i in range(N): t1 = time.time() variable.update(data=i) t2 = time.time() delta = t2 - t1 SUM_UPDATE += delta for i in range(N): t1 = time.time() a = variable.get(revision=i) t2 = time.time() delta = t2 - t1 SUM_GET += delta print(SUM_UPDATE) print(SUM_GET)
import numpy as np import random from vdata import Data from sklearn.ensemble import RandomForestRegressor dataset_X = Data(name='dataset_X', namespace='raw') dataset_y = Data(name='dataset_y', namespace='raw') data_model = Data(name='my_model', namespace='models') prediction = Data(name='prediction', namespace='metrics') X = np.array([random.randint(0, 100) for _ in range(0, 1000)]) X = X.reshape(-1, 1) y = [x * 2 for x in X] dataset_X.update(data=X) dataset_y.update(data=y) model = RandomForestRegressor() model.fit(dataset_X.get(), dataset_y.get()) data_model.update(data=model) del model model = data_model.get() x = [[5]] p = model.predict(x) prediction.update({'x': x, 'predicted': p})
from vdata import Data # instantiate some variables a = 42 b = 43 # instantiate a virtual data meaning_of_life = Data(name='meaning_of_life', namespace='raw') meaning_of_life.update( data=a ) # this will create a pickle in ./.vdata/raw/meaning_of_life.0.vdata del a assert meaning_of_life.get( ) == 42 # we are able to retrieve the previous variable # update with another variable value meaning_of_life.update( data=43 ) # this will create a pickle in ./.vdata/raw/meaning_of_life.1.vdata assert meaning_of_life.get() == 43 assert meaning_of_life.get(revision='latest') == 43 assert meaning_of_life.get(revision=1) == 43 assert meaning_of_life.get(revision=0) == 42
def test_max_revision_saving_head_file(self): data = Data(name='my_variable', namespace='raw') data.update(data=1) self.assertTrue(os.path.isfile(os.path.join('.vdata', 'raw', 'my_variable.head')))
def test_get_head_path(self): data = Data(name='my_variable', namespace='raw') data.update(data=1) self.assertEqual(data._get_head_path(), os.path.join('.vdata', 'raw', 'my_variable.head'))