def test_recognize():
    context = SpeechContext()
    recognizer = CloudSpeechRecognizer()
    recognizer._client._socket = mock.MagicMock()

    recognizer._client._socket.recv.return_value = json.dumps({
        "error":
        None,
        "final":
        False,
        "hypotheses": [{
            "confidence": 0.5,
            "transcript": "this is a test"
        }],
        "status":
        "ok",
    })

    frame = np.random.rand(160).astype(np.int16)
    # call with context active to test _begin and first _send
    context.is_active = True
    recognizer(context, frame)

    # call again to test with internal _is_active as True
    recognizer(context, frame)

    # call with context not active to test _commit
    context.is_active = False
    recognizer(context, frame)

    recognizer._client._socket.recv.return_value = json.dumps({
        "error":
        None,
        "final":
        True,
        "hypotheses": [{
            "confidence": 0.5,
            "transcript": "this is a test"
        }],
        "status":
        "ok",
    })

    # call with the client indicating it's the final frame to test _receive
    recognizer(context, frame)

    recognizer._client._socket.max_idle_time = 500
    # test timeout
    for i in range(501):
        recognizer(context, frame)

    assert not context.is_active
    assert not recognizer._client.is_connected
def test_recognize(*args):
    context = SpeechContext()
    audio = np.zeros(160).astype(np.int16)
    recognizer = GoogleSpeechRecognizer(language="en-US", credentials="")

    context.is_active = True
    for i in range(10):
        if i > 3:
            context.is_active = False
        recognizer(context, audio)

    recognizer.reset()
    recognizer.close()
Пример #3
0
def test_context():
    context = SpeechContext()

    # test is_speech
    assert not context.is_speech
    context.is_speech = True
    assert context.is_speech

    # test is_active
    assert not context.is_active
    context.is_active = True
    assert context.is_active

    # test transcript
    assert not context.transcript
    context.transcript = "this is a test"
    assert context.transcript

    # test confidence
    assert context.confidence == 0.0
    context.confidence = 1.0
    assert context.confidence == 1.0

    # test reset
    context.reset()
    assert not context.is_speech
    assert not context.is_active
    assert not context.transcript
    assert context.confidence == 0.0
def test_reset():
    context = SpeechContext()
    recognizer = CloudSpeechRecognizer()
    recognizer._client._socket = mock.MagicMock()

    recognizer._client._socket.recv.return_value = json.dumps({
        "error":
        None,
        "final":
        False,
        "hypotheses": [{
            "confidence": 0.5,
            "transcript": "this is a test"
        }],
        "status":
        "ok",
    })

    frame = np.random.rand(160).astype(np.int16)

    # trigger _begin and first _send
    context.is_active = True
    recognizer(context, frame)

    # trigger _send
    recognizer(context, frame)

    # we haven't triggered _commit or sent the final frame
    # which means context is still active and _is_active is True
    recognizer.reset()

    assert not recognizer._is_active
    assert not recognizer._client.is_connected
def test_response():
    context = SpeechContext()
    recognizer = CloudSpeechRecognizer()
    recognizer._client._socket = mock.MagicMock()

    recognizer._client._socket.recv.return_value = json.dumps({
        "error":
        None,
        "final":
        False,
        "hypotheses": [{
            "confidence": 0.5,
            "transcript": "this is a test"
        }],
        "status":
        "ok",
    })

    frame = np.random.rand(160).astype(np.int16)

    # run through all the steps
    context.is_active = True
    recognizer(context, frame)
    recognizer(context, frame)
    context.is_active = False
    recognizer(context, frame)

    recognizer._client._socket.recv.return_value = json.dumps({
        "error":
        None,
        "final":
        True,
        "hypotheses": [{
            "confidence": 0.5,
            "transcript": "this is a test"
        }],
        "status":
        "ok",
    })
    # process the final frame with the final transcript
    recognizer(context, frame)

    assert context.transcript == "this is a test"
    assert context.confidence == 0.5

    recognizer.close()
Пример #6
0
def test_recognize(*args):
    context = SpeechContext()
    recognizer = KeywordRecognizer(classes=["one", "two", "three"])

    test_frame = np.random.rand(
        160,
    ).astype(np.float32)

    context.is_active = True
    for i in range(10):
        recognizer(context, test_frame)
        recognizer(context, test_frame)

    context.is_active = False
    recognizer(context, test_frame)
    assert context.transcript == "one"

    recognizer.close()
Пример #7
0
def test_timeout(*args):
    context = SpeechContext()
    recognizer = KeywordRecognizer(classes=["one", "two", "three"])
    recognizer.detect_model.return_value = [[[0.0, 0.0, 0.0]]]

    test_frame = np.random.rand(
        160,
    ).astype(np.float32)

    context.is_active = True
    for i in range(10):
        recognizer(context, test_frame)
        recognizer(context, test_frame)

    context.is_active = False
    recognizer(context, test_frame)
    assert not context.transcript

    recognizer.close()
Пример #8
0
def test_detect_manual_min_delay(_mock):
    context = SpeechContext()
    detector = WakewordTrigger(model_dir="wakeword_model")
    detector.detect_model.return_value[0][:] = 1

    context.is_active = True
    test_frame = np.random.rand(512, ).astype(np.float32)
    detector(context, test_frame)
    detector(context, test_frame)
    detector(context, test_frame)

    assert context.is_active
Пример #9
0
    def _detect(self, context: SpeechContext) -> None:
        # read the full contents of the encode window and add the batch dimension
        # calculate a scalar probability of if the frame contains the wakeword
        # with the detect model
        frame = self.encode_window.read_all()
        frame = np.expand_dims(frame, 0)
        posterior = self.detect_model(frame)[0][0][0]

        if posterior > self._posterior_max:
            self._posterior_max = posterior
        if posterior > self._posterior_threshold:
            context.is_active = True
            _LOG.info(f"wake: {self._posterior_max}")
Пример #10
0
    def __call__(self, context: SpeechContext, frame: np.ndarray) -> None:
        """Activates speech context whenever speech is detected

        Args:
            context (SpeechContext): State based information that needs to be shared
            between pieces of the pipeline
            frame (np.ndarray): Single frame of PCM-16 audio from an input source

        """
        if context.is_speech != self._is_speech:
            if context.is_speech:
                context.is_active = True
            self._is_speech = context.is_speech
def test_receive(*args):
    context = SpeechContext()
    audio = np.zeros(160).astype(np.int16)
    recognizer = GoogleSpeechRecognizer(language="en-US", credentials="")
    recognizer._queue.put([audio, audio, audio])

    recognizer._client.streaming_recognize.return_value = [
        mock.Mock(
            results=[
                mock.Mock(alternatives=[mock.Mock(transcript="test", confidence=0.99)])
            ]
        )
    ]

    context.is_active = True
    for i in range(10):
        if i > 3:

            context.is_active = False
        recognizer(context, audio)

    recognizer._thread = mock.Mock()
    recognizer.reset()
    recognizer.close()
def test_max_active():
    max_active = 500
    min_active = 20
    context = SpeechContext()
    timeout = ActivationTimeout(min_active=min_active, max_active=max_active)

    context.is_active = True

    steps_before_timeout = (max_active // 20) + 1
    for _ in range(steps_before_timeout):
        timeout(context)

    assert not context.is_active

    timeout.close()
def test_min_active():
    max_active = 500
    min_active = 120
    context = SpeechContext()
    timeout = ActivationTimeout(min_active=min_active, max_active=max_active)

    context.is_active = True

    # call with speech active
    context.is_speech = True
    timeout(context)

    # call timeout after speech is no longer detected
    context.is_speech = False
    timeout(context)
    assert context.is_active

    # vad fall should be True
    # with context still active
    timeout(context)
    assert context.is_active

    # context should remain active until min active
    steps_before_deactivate = min_active // 20
    for _ in range(steps_before_deactivate):
        timeout(context)
        assert context.is_active

    # call with speech active
    context.is_speech = True
    timeout(context)

    # call timeout after speech is no longer detected
    # min active should be satisfied
    context.is_speech = False
    timeout(context)
    assert not context.is_active

    timeout.close()
def test_timeout_vad_fall():
    max_active = 500
    min_active = 20
    context = SpeechContext()
    timeout = ActivationTimeout(min_active=min_active, max_active=max_active)

    context.is_active = True
    context.is_speech = False

    timeout(context)
    context.is_speech = True

    timeout(context)
    assert context.is_active

    context.is_speech = False

    steps_before_timeout = (min_active // 20) + 2
    for _ in range(steps_before_timeout):
        timeout(context)
    assert not context.is_active

    timeout.close()
Пример #15
0
 def deactivate(self, context: SpeechContext) -> None:
     """ Deactivates the speech pipeline """
     self.reset()
     context.is_active = False