def test_fit_reset(caplog): caplog.set_level(logging.DEBUG) node = Pipeline(steps=dummy_classifier, event_reset='reset') node._status = -1 # bypass accumulation node._X_train = np.array([-1, 1, 1, 1]) node._y_train = np.array([0, 1, 1, 1]) node.i_events.data = make_event('training_starts') node.update() node.i_events.data = make_event('reset') node.update() assert caplog.record_tuples[0][2] == 'Start training' assert caplog.record_tuples[1][2] == 'Reset' assert node._status == 0
def test_fit_success(caplog): caplog.set_level(logging.DEBUG) node = Pipeline(steps=dummy_classifier) node._status = -1 # bypass accumulation assert hasattr(node._pipeline[0], 'n_classes_') == False node._X_train = np.array([-1, 1, 1, 1]) node._y_train = np.array([0, 1, 1, 1]) node.i_events.data = make_event('training_starts') while node._status != 3: node.update() assert node._pipeline[0].n_classes_ == 2 assert caplog.record_tuples[0][2] == 'Start training' assert caplog.record_tuples[1][2].startswith('Model fitted in')
def test_fit_error(caplog): steps = [{ 'module': 'sklearn.dummy', 'class': 'DummyClassifier', 'args': { 'strategy': 'foobar' } }] node = Pipeline(steps=steps) node.i_events.data = make_event('training_starts') with pytest.raises(WorkerInterrupt): while node._status != 3: node.update() assert caplog.record_tuples[0][2].startswith('An error occured while fitting')
def test_passthrough(): node = Pipeline(steps=dummy_classifier, passthrough=True) streamer = DummyData() node.i_training.data = streamer.next() node.i_training_0.data = streamer.next() node.i_events.data = make_event('foobar') node.i.data = streamer.next() node.i_0.data = streamer.next() node.i_1.data = streamer.next() node.i.meta = {'foobar': 42} node.update() assert len(list(node.iterate('o*'))) == 3 assert node.o.data.equals(node.i.data) assert node.o_0.data.equals(node.i_0.data) assert node.o_0.data.equals(node.i_0.data) assert node.o.meta == node.i.meta
def test_predict_3D_output(): node = Pipeline(steps=dummy_classifier, mode='predict', meta_label='target') stream = DummyData(start_date=now()) node.i_training_0.data = stream.next(5) node.i_training_1.data = stream.next(5) node.i_training_0.meta = { 'target': 0 } node.i_training_1.meta = { 'target': 1 } node.i_events.data = make_event('training_starts') while node._status != 3: node.update() node.i_0.data = stream.next(5) node.i_1.data = stream.next(5) node.i_0.meta = {'index': 0} node.i_1.meta = {'index': 1} node.update() assert len(node.o_events.data) == 2 assert node.o_events.meta == {'epochs': [{'index': 0}, {'index': 1}]}
def test_predict(): # classifier = [ # {'module': 'test_node_ml', 'class': 'Flattener'}, # {'module': 'sklearn.dummy', 'class': 'DummyClassifier', 'args': {'strategy': 'most_frequent'}} # ] node = Pipeline(steps=dummy_classifier, mode='predict', meta_label='target') node.i_training_0.set([-1], [now()], meta={ 'target': 0 }) node.i_training_1.set([1], [now()], meta={ 'target': 1 }) node.i_training_2.set([1], [now()], meta={ 'target': 1 }) node.i_training_3.set([1], [now()], meta={ 'target': 1 }) node.i_events.data = make_event('training_starts') while node._status != 3: node.update() node.i_0.set([-1], [now()]) node.i_1.set([1], [now()]) node.i_2.set([1], [now()]) node.i_3.set([1], [now()]) node.update() assert list(node._out) == [1, 1, 1, 1]
def update(self): # Let's get ready self._clear() # Reset if self.event_reset: matches = match_events(self.i_events, self.event_reset) if matches is not None: self.logger.debug("Reset") if self._status == FITTING: self._task.stop() self._reset() # Are we dealing with continuous data or epochs? if self._dimensions is None: port_name = "i_training" if self.fit else "i" if getattr(self, port_name).ready(): self._dimensions = 2 elif len(list(self.iterate(port_name + "_*"))) > 0: self._dimensions = 3 # Set the accumulation boundaries if self._accumulation_start is None: matches = match_events(self.i_events, self.event_start_accumulation) if matches is not None: self._accumulation_start = matches.index.values[0] self._status = ACCUMULATING self.logger.debug("Start accumulation") if self._accumulation_stop is None: matches = match_events(self.i_events, self.event_stop_accumulation) if matches is not None: self._accumulation_stop = matches.index.values[0] self.logger.debug("Stop accumulation") # Always buffer a few seconds, in case the start event is coming late if self._status == IDLE: start = (now() - self._buffer_size).to_datetime64() stop = max_time() self._accumulate(start, stop) # Accumulate between boundaries if self._status == ACCUMULATING: start = self._accumulation_start stop = self._accumulation_stop if self._accumulation_stop else max_time( ) self._accumulate(start, stop) # Should we start fitting the model? if self._status < FITTING: if match_events(self.i_events, self.event_start_training) is not None: self._status = FITTING self.logger.debug("Start training") self._task = Task(self._pipeline, "fit", self._X_train, self._y_train).start() # Is the model ready? if self._status == FITTING: status = self._task.status() if status: if status["success"]: self._pipeline = status["instance"] self._status = READY self.logger.debug( f"Model fitted in {status['time']} seconds") # TODO: this can potentially be overwritten in _send() self.o_events.data = make_event("ready") else: self.logger.error( f"An error occured while fitting: {status['exception'].args[0]}" ) self.logger.debug( "\nTraceback (most recent call last):\n" + "".join(status["traceback"])) raise WorkerInterrupt() # Run the pipeline if self._status == READY: self._receive() if self._X is not None: args = [self._X] if self.mode.startswith("fit"): args.append(self._y) # TODO: optionally loop through epochs instead of sending them all at once self._out = getattr(self._pipeline, self.mode)(*args) # Set output streams self._send()
def test_make_event(): event = make_event('hello', {'foobar': 42}) assert event['label'][0] == 'hello' assert event['data'][0] == '{"foobar": 42}'
def test_make_event_no_meta(): event = make_event('hello') assert event['label'][0] == 'hello' assert event['data'][0] == '{}'