示例#1
0
def test_fit_reset(caplog):
    caplog.set_level(logging.DEBUG)
    node = Pipeline(steps=dummy_classifier, event_reset='reset')
    node._status = -1 # bypass accumulation
    node._X_train = np.array([-1, 1, 1, 1])
    node._y_train = np.array([0, 1, 1, 1])
    node.i_events.data = make_event('training_starts')
    node.update()
    node.i_events.data = make_event('reset')
    node.update()
    assert caplog.record_tuples[0][2] == 'Start training'
    assert caplog.record_tuples[1][2] == 'Reset'
    assert node._status == 0
示例#2
0
def test_fit_success(caplog):
    caplog.set_level(logging.DEBUG)
    node = Pipeline(steps=dummy_classifier)
    node._status = -1 # bypass accumulation
    assert hasattr(node._pipeline[0], 'n_classes_') == False
    node._X_train = np.array([-1, 1, 1, 1])
    node._y_train = np.array([0, 1, 1, 1])
    node.i_events.data = make_event('training_starts')
    while node._status != 3:
        node.update()
    assert node._pipeline[0].n_classes_ == 2
    assert caplog.record_tuples[0][2] == 'Start training'
    assert caplog.record_tuples[1][2].startswith('Model fitted in')
示例#3
0
def test_fit_error(caplog):
    steps = [{
        'module': 'sklearn.dummy',
        'class': 'DummyClassifier',
        'args': {
            'strategy': 'foobar'
        }
    }]
    node = Pipeline(steps=steps)
    node.i_events.data = make_event('training_starts')
    with pytest.raises(WorkerInterrupt):
        while node._status != 3:
            node.update()
    assert caplog.record_tuples[0][2].startswith('An error occured while fitting')
示例#4
0
def test_passthrough():
    node = Pipeline(steps=dummy_classifier, passthrough=True)
    streamer = DummyData()
    node.i_training.data = streamer.next()
    node.i_training_0.data = streamer.next()
    node.i_events.data = make_event('foobar')
    node.i.data = streamer.next()
    node.i_0.data = streamer.next()
    node.i_1.data = streamer.next()
    node.i.meta = {'foobar': 42}
    node.update()
    assert len(list(node.iterate('o*'))) == 3
    assert node.o.data.equals(node.i.data)
    assert node.o_0.data.equals(node.i_0.data)
    assert node.o_0.data.equals(node.i_0.data)
    assert node.o.meta == node.i.meta
示例#5
0
def test_predict_3D_output():
    node = Pipeline(steps=dummy_classifier, mode='predict', meta_label='target')
    stream = DummyData(start_date=now())
    node.i_training_0.data = stream.next(5)
    node.i_training_1.data = stream.next(5)
    node.i_training_0.meta = { 'target': 0 }
    node.i_training_1.meta = { 'target': 1 }
    node.i_events.data = make_event('training_starts')
    while node._status != 3:
        node.update()
    node.i_0.data = stream.next(5)
    node.i_1.data = stream.next(5)
    node.i_0.meta = {'index': 0}
    node.i_1.meta = {'index': 1}
    node.update()
    assert len(node.o_events.data) == 2
    assert node.o_events.meta == {'epochs': [{'index': 0}, {'index': 1}]}
示例#6
0
def test_predict():
    # classifier = [
    #     {'module': 'test_node_ml', 'class': 'Flattener'},
    #     {'module': 'sklearn.dummy', 'class': 'DummyClassifier', 'args': {'strategy': 'most_frequent'}}
    # ]
    node = Pipeline(steps=dummy_classifier, mode='predict', meta_label='target')
    node.i_training_0.set([-1], [now()], meta={ 'target': 0 })
    node.i_training_1.set([1], [now()], meta={ 'target': 1 })
    node.i_training_2.set([1], [now()], meta={ 'target': 1 })
    node.i_training_3.set([1], [now()], meta={ 'target': 1 })
    node.i_events.data = make_event('training_starts')
    while node._status != 3:
        node.update()
    node.i_0.set([-1], [now()])
    node.i_1.set([1], [now()])
    node.i_2.set([1], [now()])
    node.i_3.set([1], [now()])
    node.update()
    assert list(node._out) == [1, 1, 1, 1]
示例#7
0
    def update(self):

        # Let's get ready
        self._clear()

        # Reset
        if self.event_reset:
            matches = match_events(self.i_events, self.event_reset)
            if matches is not None:
                self.logger.debug("Reset")
                if self._status == FITTING:
                    self._task.stop()
                self._reset()

        # Are we dealing with continuous data or epochs?
        if self._dimensions is None:
            port_name = "i_training" if self.fit else "i"
            if getattr(self, port_name).ready():
                self._dimensions = 2
            elif len(list(self.iterate(port_name + "_*"))) > 0:
                self._dimensions = 3

        # Set the accumulation boundaries
        if self._accumulation_start is None:
            matches = match_events(self.i_events,
                                   self.event_start_accumulation)
            if matches is not None:
                self._accumulation_start = matches.index.values[0]
                self._status = ACCUMULATING
                self.logger.debug("Start accumulation")
        if self._accumulation_stop is None:
            matches = match_events(self.i_events, self.event_stop_accumulation)
            if matches is not None:
                self._accumulation_stop = matches.index.values[0]
                self.logger.debug("Stop accumulation")

        # Always buffer a few seconds, in case the start event is coming late
        if self._status == IDLE:
            start = (now() - self._buffer_size).to_datetime64()
            stop = max_time()
            self._accumulate(start, stop)

        # Accumulate between boundaries
        if self._status == ACCUMULATING:
            start = self._accumulation_start
            stop = self._accumulation_stop if self._accumulation_stop else max_time(
            )
            self._accumulate(start, stop)

        # Should we start fitting the model?
        if self._status < FITTING:
            if match_events(self.i_events,
                            self.event_start_training) is not None:
                self._status = FITTING
                self.logger.debug("Start training")
                self._task = Task(self._pipeline, "fit", self._X_train,
                                  self._y_train).start()

        # Is the model ready?
        if self._status == FITTING:
            status = self._task.status()
            if status:
                if status["success"]:
                    self._pipeline = status["instance"]
                    self._status = READY
                    self.logger.debug(
                        f"Model fitted in {status['time']} seconds")
                    # TODO: this can potentially be overwritten in _send()
                    self.o_events.data = make_event("ready")
                else:
                    self.logger.error(
                        f"An error occured while fitting: {status['exception'].args[0]}"
                    )
                    self.logger.debug(
                        "\nTraceback (most recent call last):\n" +
                        "".join(status["traceback"]))
                    raise WorkerInterrupt()

        # Run the pipeline
        if self._status == READY:
            self._receive()
            if self._X is not None:
                args = [self._X]
                if self.mode.startswith("fit"):
                    args.append(self._y)
                # TODO: optionally loop through epochs instead of sending them all at once
                self._out = getattr(self._pipeline, self.mode)(*args)

        # Set output streams
        self._send()
示例#8
0
def test_make_event():
    event = make_event('hello', {'foobar': 42})
    assert event['label'][0] == 'hello'
    assert event['data'][0] == '{"foobar": 42}'
示例#9
0
def test_make_event_no_meta():
    event = make_event('hello')
    assert event['label'][0] == 'hello'
    assert event['data'][0] == '{}'