def test_idle_buffer_3D(random): node = Pipeline(steps=dummy_classifier, buffer_size='5s', meta_label=None) start_0 = now() - pd.Timedelta('10s') start_1 = now() start_2 = now() + pd.Timedelta('10s') node.i_training_0.data = DummyData(start_date=start_0).next(10) node.i_training_1.data = DummyData(start_date=start_1).next(10) node.i_training_2.data = DummyData(start_date=start_2).next(10) node.update() assert len(node._X_train_indices) == 2 assert len(node._X_train_indices) == len(node._X_train) assert node._X_train.shape == (2, 10, 5)
def test_accumulate_y_train(caplog): node = Pipeline(steps=dummy_classifier) stream = DummyData(start_date=now()) node.i_training_0.data = stream.next() node.i_training_1.data = stream.next() node.i_training_2.data = stream.next() node.i_training_0.meta = { 'epoch': { 'context': { 'target': True }}} node.i_training_1.meta = {} node.i_training_2.meta = { 'epoch': { 'context': { 'target': False }}} node.update() assert node._y_train.tolist() == [True, False] assert caplog.record_tuples[0][2] =='Invalid label'
def test_accumulation_boundaries(): node = Pipeline(steps=dummy_classifier) events = [ ['accumulation_starts', ''], ['accumulation_stops', ''], ['accumulation_starts', ''] ] times = pd.date_range(start='2018-01-01', periods=3, freq='1s') node.i_events.set(events, times, ['label', 'data']) node.update() assert node._accumulation_start == np.datetime64('2018-01-01T00:00:00') assert node._accumulation_stop == np.datetime64('2018-01-01T00:00:01')
def test_accumulate_start_stop_2D(random): node = Pipeline(steps=dummy_classifier, buffer_size='5s') start = now() events = [ ['accumulation_starts', ''], ['accumulation_stops', ''] ] times = pd.date_range(start=start, periods=2, freq='10s') node.i_events.set(events, times, ['label', 'data']) stream = DummyData(start_date=start, rate=1, jitter=0) node.i_training.data = stream.next(100) node.update() assert len(node._X_train) == 10
def test_fit_reset(caplog): caplog.set_level(logging.DEBUG) node = Pipeline(steps=dummy_classifier, event_reset='reset') node._status = -1 # bypass accumulation node._X_train = np.array([-1, 1, 1, 1]) node._y_train = np.array([0, 1, 1, 1]) node.i_events.data = make_event('training_starts') node.update() node.i_events.data = make_event('reset') node.update() assert caplog.record_tuples[0][2] == 'Start training' assert caplog.record_tuples[1][2] == 'Reset' assert node._status == 0
def test_fit_success(caplog): caplog.set_level(logging.DEBUG) node = Pipeline(steps=dummy_classifier) node._status = -1 # bypass accumulation assert hasattr(node._pipeline[0], 'n_classes_') == False node._X_train = np.array([-1, 1, 1, 1]) node._y_train = np.array([0, 1, 1, 1]) node.i_events.data = make_event('training_starts') while node._status != 3: node.update() assert node._pipeline[0].n_classes_ == 2 assert caplog.record_tuples[0][2] == 'Start training' assert caplog.record_tuples[1][2].startswith('Model fitted in')
def test_fit_error(caplog): steps = [{ 'module': 'sklearn.dummy', 'class': 'DummyClassifier', 'args': { 'strategy': 'foobar' } }] node = Pipeline(steps=steps) node.i_events.data = make_event('training_starts') with pytest.raises(WorkerInterrupt): while node._status != 3: node.update() assert caplog.record_tuples[0][2].startswith('An error occured while fitting')
def test_passthrough(): node = Pipeline(steps=dummy_classifier, passthrough=True) streamer = DummyData() node.i_training.data = streamer.next() node.i_training_0.data = streamer.next() node.i_events.data = make_event('foobar') node.i.data = streamer.next() node.i_0.data = streamer.next() node.i_1.data = streamer.next() node.i.meta = {'foobar': 42} node.update() assert len(list(node.iterate('o*'))) == 3 assert node.o.data.equals(node.i.data) assert node.o_0.data.equals(node.i_0.data) assert node.o_0.data.equals(node.i_0.data) assert node.o.meta == node.i.meta
def test_predict_3D_output(): node = Pipeline(steps=dummy_classifier, mode='predict', meta_label='target') stream = DummyData(start_date=now()) node.i_training_0.data = stream.next(5) node.i_training_1.data = stream.next(5) node.i_training_0.meta = { 'target': 0 } node.i_training_1.meta = { 'target': 1 } node.i_events.data = make_event('training_starts') while node._status != 3: node.update() node.i_0.data = stream.next(5) node.i_1.data = stream.next(5) node.i_0.meta = {'index': 0} node.i_1.meta = {'index': 1} node.update() assert len(node.o_events.data) == 2 assert node.o_events.meta == {'epochs': [{'index': 0}, {'index': 1}]}
def test_predict(): # classifier = [ # {'module': 'test_node_ml', 'class': 'Flattener'}, # {'module': 'sklearn.dummy', 'class': 'DummyClassifier', 'args': {'strategy': 'most_frequent'}} # ] node = Pipeline(steps=dummy_classifier, mode='predict', meta_label='target') node.i_training_0.set([-1], [now()], meta={ 'target': 0 }) node.i_training_1.set([1], [now()], meta={ 'target': 1 }) node.i_training_2.set([1], [now()], meta={ 'target': 1 }) node.i_training_3.set([1], [now()], meta={ 'target': 1 }) node.i_events.data = make_event('training_starts') while node._status != 3: node.update() node.i_0.set([-1], [now()]) node.i_1.set([1], [now()]) node.i_2.set([1], [now()]) node.i_3.set([1], [now()]) node.update() assert list(node._out) == [1, 1, 1, 1]
def test_transform_3D_output(random): pipeline = [ {'module': 'test_ml', 'class': 'Vectorizer'}, {'module': 'test_ml', 'class': 'DummyTransformer'}, {'module': 'test_ml', 'class': 'Shaper', 'args': { 'shape': (2, -1, 5) }} ] node = Pipeline(steps=pipeline, mode='fit_transform', meta_label=None) columns = ['A', 'B', 'C', 'D', 'E'] stream = DummyData(start_date=now()) node.i_0.data = stream.next() node.i_1.data = stream.next() node.i_0.data.columns = columns node.i_1.data.columns = columns node.i_0.meta = {'index': 0} node.i_1.meta = {'index': 1} node.update() assert len(list(node.iterate('o_*'))) == 2 assert np.array_equal(node.i_0.data.index.values, node.o_0.data.index.values) assert list(node.i_0.data.columns) == columns assert list(node.i_1.data.columns) == columns assert node.o_0.meta == node.i_0.meta assert node.o_1.meta == node.i_1.meta
def test_trim_3D(random): node = Pipeline(steps=dummy_classifier) node.i_training_0.data = DummyData(start_date='2018-01-01T00:00:00').next() node.i_training_1.data = DummyData(start_date='2018-01-01T00:00:10').next() node.i_training_2.data = DummyData(start_date='2018-01-01T00:00:20').next() node.i_training_3.data = DummyData(start_date='2018-01-01T00:00:30').next() node.i_training_0.meta = { 'epoch': { 'context': { 'target': 1 }}} node.i_training_1.meta = { 'epoch': { 'context': { 'target': 2 }}} node.i_training_2.meta = { 'epoch': { 'context': { 'target': 3 }}} node.i_training_3.meta = { 'epoch': { 'context': { 'target': 4 }}} node._accumulation_start = np.datetime64('2017-12-31T00:00:00') node._accumulation_stop = np.datetime64('2018-01-01T00:01:00') node._status = 1 node.update() node._dimensions = 0 # Bypass accumulation start = np.datetime64('2018-01-01T00:00:05') stop = np.datetime64('2018-01-01T00:00:25') node._accumulate(start, stop) assert len(node._X_train_indices) == 2 assert len(node._X_train) == 2 assert len(node._y_train) == 2 assert node._y_train.tolist() == [2, 3]
def test_3D_training(random): node = Pipeline(steps=dummy_classifier) node.i_training_0.data = DummyData().next() node.update() assert node._dimensions == 3
def test_transform(): node = Pipeline(steps=dummy_transformer, fit=False, mode='transform', meta_label=None) node.i.data = DummyData().next() node.update() expected = node.i.data.values * 2 assert np.array_equal(expected, node._out)
def test_receive_3D_invalid_label(caplog): node = Pipeline(steps=dummy_classifier, mode='fit_predict') node.i_0.data = DummyData().next() node.update() assert caplog.record_tuples[0][2] == 'Invalid label' assert node._X == None
def test_receive_2D(): node = Pipeline(steps=dummy_transformer, fit=False, mode='transform') node.i.data = DummyData().next() node.update() assert node._X.shape == (10, 5) assert node._dimensions == 2
def test_3D_no_training(random): node = Pipeline(steps=dummy_classifier, mode='fit_predict') node.i_0.data = DummyData().next() node.update() assert node._dimensions == 3
def test_2D_no_training(random): node = Pipeline(steps=dummy_transformer, mode='fit_transform') node.i.data = DummyData().next() node.update() assert node._dimensions == 2