示例#1
0
    def test_max_revision_from_head(self):
        data = Data(name='my_variable', namespace='raw')
        data.update(data=1)

        with open(os.path.join('.vdata', 'raw', 'my_variable.head'), 'rb') as f:
            revision = pickle.load(f)

        self.assertEqual(revision, 0)
示例#2
0
    def test_update_file(self):
        """
        Save a file from path.
        """
        data = Data(name="job", namespace="raw")

        data.update(file_path='tests/tests.csv')

        self.assertTrue(os.path.isfile(os.path.join('.vdata', 'raw', 'job.0.csv')))
示例#3
0
    def test_update_is_variable_created(self):
        """
        Check if a file is created when save is called.
        """
        a = 42

        data = Data(name='my_variable', namespace='raw')
        data.update(data=a)

        self.assertTrue(os.path.isfile(os.path.join('.vdata', 'raw', 'my_variable.0.vdata')))
示例#4
0
    def test_get_retrieve_specific_version(self):
        data = Data(name='my_variable', namespace='raw')
        data.update(data=10)
        data.update(data=100)
        data.update(data=1000)

        self.assertEqual(data.get(revision=1), 100)
示例#5
0
    def test_update_file_load_dataframe(self):
        """
        Save CSV file and load in dataframe.
        """
        data = Data(name="job", namespace="raw")

        data.update(file_path='tests/tests.csv')
        df = pd.read_csv(data.get())

        a = df.iloc[0]['a']

        self.assertTrue(a, 42)
示例#6
0
    def test_get_variable(self):
        """
        Check if a file is created when save is called.
        """
        a = 42
        data = Data(name='my_variable', namespace='raw')
        data.update(data=a)

        del a

        a = data.get()

        self.assertEqual(a, 42)
示例#7
0
    def test_model_loading(self):
        model = RandomForestRegressor(random_state=42)

        MODEL = Data(name='my_model', namespace='model')

        X = [[random.randint(0, 100)] for _ in range(1000)]
        y = [np.multiply(x, 2) for x in X]

        model.fit(X, y)

        MODEL.update(model)
        del model

        model = MODEL.get()
        predict = model.predict([[1]])

        self.assertEqual(predict, [2])
示例#8
0
    def test_model_saving(self):
        model = RandomForestRegressor(random_state=42)

        MODEL = Data(name='my_model', namespace='model')

        for _ in range(10):
            x = random.randint(0, 1000)
            y = x * 2

            model.fit([[x]], [y])
            MODEL.update(model)

        self.assertTrue(
            os.path.isfile(os.path.join('.vdata', 'model',
                                        'my_model.0.vdata')))
        self.assertTrue(
            os.path.isfile(os.path.join('.vdata', 'model',
                                        'my_model.9.vdata')))
示例#9
0
    def test_get_max_version(self):
        """
        Check if we are able to get max version of Data
        """
        data = Data(name='my_variable', namespace='raw')
        data.update(data=10)
        data.update(data=100)

        self.assertEqual(data.get_max_version(), 1)
示例#10
0
    def test_get_versions(self):
        """
        Check if we are able to get versions of Data
        """
        data = Data(name='my_variable', namespace='raw')
        data.update(data=10)
        data.update(data=100)

        self.assertEqual(data.get_versions(), [0, 1])
示例#11
0
    def test_save_file_ext_and_reload(self):
        data = Data(name="job", namespace="raw")

        data.update(file_path='tests/tests.csv')

        del data

        data = Data(name="job", namespace="raw")
        df = pd.read_csv(data.get())

        a = df.iloc[0]['a']

        self.assertTrue(a, 42)
示例#12
0
from vdata import Data
import time


N = 50_000
SUM_UPDATE = 0
SUM_GET = 0

# instantiate a virtual data
variable = Data(name='variable', namespace='raw')

for i in range(N):
    t1 = time.time()
    variable.update(data=i)
    t2 = time.time()

    delta = t2 - t1
    SUM_UPDATE += delta

for i in range(N):
    t1 = time.time()
    a = variable.get(revision=i)
    t2 = time.time()

    delta = t2 - t1
    SUM_GET += delta


print(SUM_UPDATE)
print(SUM_GET)
示例#13
0
import numpy as np
import random
from vdata import Data

from sklearn.ensemble import RandomForestRegressor

dataset_X = Data(name='dataset_X', namespace='raw')
dataset_y = Data(name='dataset_y', namespace='raw')
data_model = Data(name='my_model', namespace='models')
prediction = Data(name='prediction', namespace='metrics')

X = np.array([random.randint(0, 100) for _ in range(0, 1000)])
X = X.reshape(-1, 1)
y = [x * 2 for x in X]

dataset_X.update(data=X)
dataset_y.update(data=y)

model = RandomForestRegressor()
model.fit(dataset_X.get(), dataset_y.get())

data_model.update(data=model)

del model
model = data_model.get()

x = [[5]]
p = model.predict(x)

prediction.update({'x': x, 'predicted': p})
示例#14
0
from vdata import Data

# instantiate some variables
a = 42
b = 43

# instantiate a virtual data
meaning_of_life = Data(name='meaning_of_life', namespace='raw')

meaning_of_life.update(
    data=a
)  # this will create a pickle in ./.vdata/raw/meaning_of_life.0.vdata

del a
assert meaning_of_life.get(
) == 42  # we are able to retrieve the previous variable

# update with another variable value
meaning_of_life.update(
    data=43
)  # this will create a pickle in ./.vdata/raw/meaning_of_life.1.vdata

assert meaning_of_life.get() == 43
assert meaning_of_life.get(revision='latest') == 43
assert meaning_of_life.get(revision=1) == 43
assert meaning_of_life.get(revision=0) == 42
示例#15
0
    def test_max_revision_saving_head_file(self):
        data = Data(name='my_variable', namespace='raw')
        data.update(data=1)

        self.assertTrue(os.path.isfile(os.path.join('.vdata', 'raw', 'my_variable.head')))
示例#16
0
    def test_get_head_path(self):
        data = Data(name='my_variable', namespace='raw')
        data.update(data=1)

        self.assertEqual(data._get_head_path(), os.path.join('.vdata', 'raw', 'my_variable.head'))