Exemplo n.º 1
0
def test_accumulate_start_stop_2D(random):
    node = Pipeline(steps=dummy_classifier, buffer_size='5s')
    start = now()
    events = [
        ['accumulation_starts', ''],
        ['accumulation_stops', '']
    ]
    times = pd.date_range(start=start, periods=2, freq='10s')
    node.i_events.set(events, times, ['label', 'data'])
    stream = DummyData(start_date=start, rate=1, jitter=0)
    node.i_training.data = stream.next(100)
    node.update()
    assert len(node._X_train) == 10
Exemplo n.º 2
0
def test_reindex_indices():
    data = DummyData()
    node = Reindex(rate=10)
    node.i.data = data.next()
    node.update()
    assert node.o.data.index.values[0] == np.datetime64(
        "2017-12-31 23:59:59.998745401")
    assert node.o.data.index.values[-1] == np.datetime64(
        "2018-01-01 00:00:00.898745401")
    node.i.data = data.next()
    node.update()
    assert node.o.data.index.values[0] == np.datetime64(
        "2018-01-01 00:00:00.998745401")
    assert node.o.data.index.values[-1] == np.datetime64(
        "2018-01-01 00:00:01.898745401")
Exemplo n.º 3
0
def test_receive_3D_unsupervised():
    node = Pipeline(steps=dummy_transformer, fit=False, mode='transform', meta_label=None)
    node.i_0.data = DummyData().next()
    node.update()
    assert node._X[0].shape == (10, 5)
    assert node._y == None
    assert node._dimensions == 3
Exemplo n.º 4
0
def test_predict_3D_output():
    node = Pipeline(steps=dummy_classifier, mode='predict', meta_label='target')
    stream = DummyData(start_date=now())
    node.i_training_0.data = stream.next(5)
    node.i_training_1.data = stream.next(5)
    node.i_training_0.meta = { 'target': 0 }
    node.i_training_1.meta = { 'target': 1 }
    node.i_events.data = make_event('training_starts')
    while node._status != 3:
        node.update()
    node.i_0.data = stream.next(5)
    node.i_1.data = stream.next(5)
    node.i_0.meta = {'index': 0}
    node.i_1.meta = {'index': 1}
    node.update()
    assert len(node.o_events.data) == 2
    assert node.o_events.meta == {'epochs': [{'index': 0}, {'index': 1}]}
Exemplo n.º 5
0
def test_transform_2D_output(random):
    node = Pipeline(steps=dummy_transformer, mode='fit_transform')
    columns = ['A', 'B', 'C', 'D', 'E']
    node.i.data = DummyData(start_date=now()).next()
    node.i.meta = {'foo': 'bar'}
    node.i.data.columns = columns
    node.update()
    assert np.array_equal(node.i.data.index.values, node.o.data.index.values)
    assert list(node.o.data.columns) == columns
    assert node.o.meta == node.i.meta
Exemplo n.º 6
0
def test_trim_2D(random):
    node = Pipeline(steps=dummy_classifier)
    data = DummyData(rate=1).next(20)
    node._X_train = data.values
    node._X_train_indices = np.array(data.index.values, dtype=np.datetime64)
    start = np.datetime64('2018-01-01T00:00:05')
    stop = np.datetime64('2018-01-01T00:00:15')
    node._dimensions = 2
    node._accumulate(start, stop)
    assert len(node._X_train_indices) == 10
    assert len(node._X_train) == 10
Exemplo n.º 7
0
def test_trim_3D(random):
    node = Pipeline(steps=dummy_classifier)
    node.i_training_0.data = DummyData(start_date='2018-01-01T00:00:00').next()
    node.i_training_1.data = DummyData(start_date='2018-01-01T00:00:10').next()
    node.i_training_2.data = DummyData(start_date='2018-01-01T00:00:20').next()
    node.i_training_3.data = DummyData(start_date='2018-01-01T00:00:30').next()
    node.i_training_0.meta = { 'epoch': { 'context': { 'target': 1 }}}
    node.i_training_1.meta = { 'epoch': { 'context': { 'target': 2 }}}
    node.i_training_2.meta = { 'epoch': { 'context': { 'target': 3 }}}
    node.i_training_3.meta = { 'epoch': { 'context': { 'target': 4 }}}
    node._accumulation_start = np.datetime64('2017-12-31T00:00:00')
    node._accumulation_stop = np.datetime64('2018-01-01T00:01:00')
    node._status = 1
    node.update()
    node._dimensions = 0 # Bypass accumulation
    start = np.datetime64('2018-01-01T00:00:05')
    stop = np.datetime64('2018-01-01T00:00:25')
    node._accumulate(start, stop)
    assert len(node._X_train_indices) == 2
    assert len(node._X_train) == 2
    assert len(node._y_train) == 2
    assert node._y_train.tolist() == [2, 3]
Exemplo n.º 8
0
def test_transform_3D_output(random):
    pipeline = [
        {'module': 'test_ml', 'class': 'Vectorizer'},
        {'module': 'test_ml', 'class': 'DummyTransformer'},
        {'module': 'test_ml', 'class': 'Shaper', 'args': { 'shape': (2, -1, 5) }}
    ]
    node = Pipeline(steps=pipeline, mode='fit_transform', meta_label=None)
    columns = ['A', 'B', 'C', 'D', 'E']
    stream = DummyData(start_date=now())
    node.i_0.data = stream.next()
    node.i_1.data = stream.next()
    node.i_0.data.columns = columns
    node.i_1.data.columns = columns
    node.i_0.meta = {'index': 0}
    node.i_1.meta = {'index': 1}
    node.update()
    assert len(list(node.iterate('o_*'))) == 2
    assert np.array_equal(node.i_0.data.index.values, node.o_0.data.index.values)
    assert list(node.i_0.data.columns) == columns
    assert list(node.i_1.data.columns) == columns
    assert node.o_0.meta == node.i_0.meta
    assert node.o_1.meta == node.i_1.meta
Exemplo n.º 9
0
def test_passthrough():
    node = Pipeline(steps=dummy_classifier, passthrough=True)
    streamer = DummyData()
    node.i_training.data = streamer.next()
    node.i_training_0.data = streamer.next()
    node.i_events.data = make_event('foobar')
    node.i.data = streamer.next()
    node.i_0.data = streamer.next()
    node.i_1.data = streamer.next()
    node.i.meta = {'foobar': 42}
    node.update()
    assert len(list(node.iterate('o*'))) == 3
    assert node.o.data.equals(node.i.data)
    assert node.o_0.data.equals(node.i_0.data)
    assert node.o_0.data.equals(node.i_0.data)
    assert node.o.meta == node.i.meta
Exemplo n.º 10
0
def test_convert():

    # Filenames
    src = os.path.join(tempfile.gettempdir(), "test.hdf")
    dst = os.path.join(tempfile.gettempdir(), "test.bdf")

    # Make fake signal
    rate = 100
    channels = ["ch1", "ch2", "ch3", "ch4", "ch5"]
    eeg = DummyData(rate=rate, round=2, cols=channels).next(300)

    # Make fake events
    timestamps = np.array(
        ["2018-01-01 00:00:00", "2018-01-01 00:00:01", "2018-01-01 00:00:02"],
        dtype='datetime64')
    cols = ["label", "data"]
    rows = [["start", "{'mood': 'happy'}"], ["something", None],
            ["stop", "{'mood': 42}"]]
    events = pd.DataFrame(rows, index=timestamps, columns=cols)

    # Save to HDF
    store = pd.HDFStore(src)
    store.append("/eeg", eeg)
    store.get_node("/eeg")._v_attrs["meta"] = {"rate": rate}
    store.append("/events", events)
    store.close()

    # Convert
    convert(src)

    # Compute MD5
    md5 = hashlib.md5()
    file = open(dst, "rb")
    md5.update(file.read())

    #assert md5.hexdigest() == "27a686606e589b67fc9888802afef57c"
    assert True

    os.unlink(src)
from timeflux_ml.nodes.fit import Fit
from timeflux_ml.nodes.transform import Transform

# ------------------------------------------------------
# Use-case #1 : Not supervised fit Pipeline on pandas data
# ------------------------------------------------------
# params:
#    has_targets = False
#
# Inputs: DataFrame
# Outputs: DataFrame
#
# eg: MinMax scaler calibrated on streaming data.

num_cols = 5
data = DummyData(rate=10, jitter=.05, num_cols=num_cols)
node_fit = Fit(pipeline_steps={'scaler': 'sklearn.preprocessing.MinMaxScaler'},
               has_targets=False)


def test_fit_no_data():
    """Fit received empty DataFrame no data"""
    node_fit.i.data = pd.DataFrame()
    node_fit.update()
    assert node_fit.o.meta == {}


def test_fit_data():
    """Fit received some data and fit pipeline in a thread """
    calibration_size = 30
    output_data = pd.DataFrame()
Exemplo n.º 12
0
def test_3D_training(random):
    node = Pipeline(steps=dummy_classifier)
    node.i_training_0.data = DummyData().next()
    node.update()
    assert node._dimensions == 3
Exemplo n.º 13
0
def generator(rate=10, jitter=.05):
    """Create object to mimic data streaming """
    generator = DummyData(rate=rate, jitter=jitter)
    return generator
Exemplo n.º 14
0
"""Tests for axis.py"""
import pandas as pd

from timeflux.helpers.testing import DummyData
from timeflux.nodes.axis import AddSuffix, Rename, RenameColumns

pandas_data = DummyData(cols=['0', '1', '2'])


def test_add_suffix():
    node = AddSuffix(suffix='_foo')
    pandas_data.reset()
    node.i.data = pandas_data.next(20)
    node.update()

    pd.testing.assert_index_equal(node.o.data.columns,
                                  pd.Index(['0_foo', '1_foo', '2_foo']))


def test_rename():
    node = Rename(mapper={'0': 'new_0'}, axis=1)
    pandas_data.reset()
    node.i.data = pandas_data.next(20)
    node.update()
    pd.testing.assert_index_equal(node.o.data.columns,
                                  pd.Index(['new_0', '1', '2']))


def test_rename_columns():
    # wrong length of parameter `list`
    node = RenameColumns(names=['foo'])
Exemplo n.º 15
0
def test_2D_no_training(random):
    node = Pipeline(steps=dummy_transformer, mode='fit_transform')
    node.i.data = DummyData().next()
    node.update()
    assert node._dimensions == 2
Exemplo n.º 16
0
"""Tests for accumulate.py"""
import pandas as pd
import xarray as xr
from timeflux.helpers.testing import DummyData, DummyXArray
from timeflux.nodes.accumulate import AppendDataFrame, AppendDataArray

xarray_data = DummyXArray()
pandas_data = DummyData()


def test_append_dataframe():
    """"Test node AppendDataFrame"""

    node = AppendDataFrame()
    pandas_data.reset()
    node.clear()
    # gate is not closed, data should be accumulated but not released
    # first chunk
    node.i.data = pandas_data.next(5)
    node.update()
    # assert no output
    assert node.o.data == None
    # assert the data has been buffered
    pd.testing.assert_frame_equal(pandas_data._data.iloc[:5, :], node._data)
    # second chunk
    node.clear()
    node.i.data = pandas_data.next(10)
    node.update()
    # assert no output
    assert node.o.data == None
    # assert the buffer is the concatenation of the 2 accumulated chunks
Exemplo n.º 17
0
def test_3D_no_training(random):
    node = Pipeline(steps=dummy_classifier, mode='fit_predict')
    node.i_0.data = DummyData().next()
    node.update()
    assert node._dimensions == 3
Exemplo n.º 18
0
def test_reindex_rate_constructor():
    data = DummyData()
    node = Reindex(rate=10)
    node.update()
    assert node._rate == 10
Exemplo n.º 19
0
def test_receive_2D():
    node = Pipeline(steps=dummy_transformer, fit=False, mode='transform')
    node.i.data = DummyData().next()
    node.update()
    assert node._X.shape == (10, 5)
    assert node._dimensions == 2
Exemplo n.º 20
0
import numpy as np
import pandas as pd
import pytest
import logging
from timeflux.core.exceptions import WorkerInterrupt
from timeflux.helpers.testing import DummyData, Looper
from timeflux.nodes.dejitter import Snap, Interpolate

rate = 10

dummy_data_with_jitter = DummyData(rate=rate, jitter=.05)

num_cols = 5
dummy_data_no_jitter = DummyData(rate=rate, jitter=0.0, cols=[f'ch{k}' for k in range(num_cols)])


def test_round_on_data_with_jitter():
    data = dummy_data_with_jitter
    data.reset()
    node = Snap(rate=rate)
    node.i.data = data.next(6)
    node.update()

    expected_data = pd.DataFrame(
        [
            [0.185133, 0.541901, 0.872946, 0.732225, 0.806561],
            [0.658783, 0.692277, 0.849196, 0.249668, 0.489425],
            [0.221209, 0.987668, 0.944059, 0.039427, 0.705575],
            [0.925248, 0.180575, 0.567945, 0.915488, 0.033946],
            [0.69742, 0.297349, 0.924396, 0.971058, 0.944266],
            [0.474214, 0.862043, 0.844549, 0.3191, 0.828915]
Exemplo n.º 21
0
def test_receive_3D_invalid_label(caplog):
    node = Pipeline(steps=dummy_classifier, mode='fit_predict')
    node.i_0.data = DummyData().next()
    node.update()
    assert caplog.record_tuples[0][2] == 'Invalid label'
    assert node._X == None
Exemplo n.º 22
0
import numpy as np
import pandas as pd
import pytest
from timeflux.core.exceptions import WorkerInterrupt
from timeflux.helpers.testing import DummyData, Looper
from timeflux.nodes.dejitter import Snap, Interpolate

rate = 10

dummy_data_with_jitter = DummyData(rate=rate, jitter=.05)

num_cols = 5
dummy_data_no_jitter = DummyData(rate=rate, jitter=0.0, cols=[f'ch{k}' for k in range(num_cols)])

dummy_data_not_monotonic = DummyData(rate=rate, jitter=.05, num_rows=100)
# swap rows 51 and 52 to have a DataFrame with not monotonic index
dummy_data_not_monotonic._data = dummy_data_not_monotonic._data.iloc[
    list(np.arange(0, 50)) + [52, 51] + list(np.arange(52, 100))]
assert ~ dummy_data_not_monotonic._data.index.is_monotonic


def test_round_on_data_with_jitter():
    data = dummy_data_with_jitter
    data.reset()
    node = Snap(rate=rate)
    node.i.data = data.next(6)
    node.update()

    expected_data = pd.DataFrame(
        [
            [0.185133, 0.541901, 0.872946, 0.732225, 0.806561],
Exemplo n.º 23
0
def test_transform():
    node = Pipeline(steps=dummy_transformer, fit=False, mode='transform', meta_label=None)
    node.i.data = DummyData().next()
    node.update()
    expected = node.i.data.values * 2
    assert np.array_equal(expected, node._out)
Exemplo n.º 24
0
"""Tests for nodes from timeflux_dsp.nodes.spectral"""

import numpy as np
import pandas as pd
import pytest
import xarray as xr
from timeflux.helpers.testing import DummyData

from timeflux_dsp.nodes.spectral import FFT

fs = 10

data = DummyData(rate=fs, jitter=0.05)
all_data = data.next(50)


def test_welch():
    data.reset()

    node = FFT(fs=fs, return_onesided=False)
    node.i.data = data.next(5)

    node.update()
    expected_freqs = [0.0, 2.0, 4.0, -4.0, -2.0]
    expected_times = [pd.Timestamp("2018-01-01 00:00:00.396560186")]
    expected_data = np.array([
        [
            2.687793 + 0.0j,
            2.69977 + 0.0j,
            4.158542 + 0.0j,
            2.907866 + 0.0j,
Exemplo n.º 25
0
"""Tests for query nodes"""

import pandas as pd
import pytest
from timeflux.core.exceptions import WorkerInterrupt
from timeflux.helpers.testing import DummyData
from timeflux.nodes.query import LocQuery, SelectRange, XsQuery

fs = 10
data = DummyData(rate=fs, jitter=.05, num_cols=6)
all_data = data._data


def test_locquery():
    data.reset()
    data._data.columns = ['A', 'B', 'C', 'D', 'E', 'F']
    node = LocQuery(key=('A', 'E'), axis=1)
    node.i.data = data.next(3)
    node.update()
    expected_data = pd.DataFrame(
        [[0.185133, 0.806561], [0.692277, 0.221209], [0.944059, 0.180575]], [
            pd.Timestamp('2017-12-31 23:59:59.998745401'),
            pd.Timestamp('2018-01-01 00:00:00.104507143'),
            pd.Timestamp('2018-01-01 00:00:00.202319939'),
        ], ['A', 'E'])
    pd.testing.assert_frame_equal(node.o.data, expected_data)

    # test query with wrong key: "R" not in the input columns
    with pytest.raises(WorkerInterrupt):
        node = LocQuery(key=['R'], axis=1)
        node.i.data = data.next()