示例#1
0
class Translator:
    def __init__(self):
        self.client = SpeechClient()
        storage_client = storage.Client()
        self.bucket_name = 'cross-culture-audios'
        self.bucket = storage_client.get_bucket(self.bucket_name)

    def translate_long(self, gs_uri):
        audio = types.RecognitionAudio(uri=gs_uri, )
        config = types.RecognitionConfig(
            encoding='FLAC',
            language_code='en-US',
            sample_rate_hertz=44100,
        )
        operation = self.client.long_running_recognize(config=config,
                                                       audio=audio)
        op_result = operation.result()
        result = '\n'.join([
            str.strip(result.alternatives[0].transcript)
            for result in op_result.results if len(result.alternatives) > 0
        ])
        return result

    def translate_with_timestamps(self, gs_uri):
        audio = types.RecognitionAudio(uri=gs_uri, )
        config = types.RecognitionConfig(
            encoding='FLAC',
            language_code='en-US',
            # sample_rate_hertz=44100,
            enable_word_time_offsets=True)
        operation = self.client.long_running_recognize(config=config,
                                                       audio=audio)
        results = []
        for result in operation.result().results:
            alternatives = result.alternatives
            if len(alternatives) == 0:
                continue
            alternative = alternatives[0]
            for word_info in alternative.words:
                word = word_info.word
                start_time = word_info.start_time.seconds + round(
                    word_info.start_time.nanos * 1e-9, 1)
                end_time = word_info.end_time.seconds + round(
                    word_info.end_time.nanos * 1e-9, 1)
                results.append([word, start_time, end_time])
        return results

    def upload_to_gcs(self, filepath):
        filename = ntpath.basename(filepath)
        gs_filepath = 'audios/%s' % filename
        blob = self.bucket.blob(gs_filepath)
        blob.upload_from_filename(filepath)
        return self.generate_uri(gs_filepath)

    def delete_from_gcs(self, filename):
        gs_filepath = 'audios/%s' % filename
        self.bucket.delete_blob(gs_filepath)

    def generate_uri(self, filepath):
        return 'gs://%s/%s' % (self.bucket_name, filepath)
示例#2
0
def recognize_audio_from_file(
    file: Union[str, os.PathLike],
    credential: Union[str, os.PathLike, None] = None,
    language_code: str = 'en-US',
    encoding: enums.RecognitionConfig.AudioEncoding = enums.RecognitionConfig.
    AudioEncoding.FLAC,
    sampling_rate_hertz: int = 44100,
) -> types.RecognizeResponse:
    """

    Args:
        file (str, os.PathLike) :
        credential (str) :
        language_code (str) :
        encoding (str) :
        sampling_rate_hertz (int) :

    Returns:
        types.RecognizeResponse
    """
    if credential is None:
        client = SpeechClient()
    else:
        credentials = Credentials.from_service_account_file(
            filename=credential)
        client = SpeechClient(credentials=credentials)

    config = types.RecognitionConfig(encoding=encoding,
                                     language_code=language_code,
                                     sampling_rate_hertz=sampling_rate_hertz)
    with io.open(file, 'rb') as audio:
        content = audio.read()
    audio = types.RecognitionAudio(content=content)

    return client.recognize(config, audio)
示例#3
0
    def __init__(self, credential: Union[str, os.PathLike, None] = None):
        """

        Args:
            credential (str, os.PathLike, None) :
        """
        if credential is None:
            self.client = SpeechClient()
        else:
            credentials = Credentials.from_service_account_file(
                filename=credential)
            self.client = SpeechClient(credentials=credentials)
示例#4
0
	def onStart(self):
		super().onStart()
		os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(self._credentialsFile)

		self._client = SpeechClient()
		# noinspection PyUnresolvedReferences
		config = types.RecognitionConfig(
			encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
			sample_rate_hertz=self.AudioServer.SAMPLERATE,
			language_code=self.LanguageManager.getLanguageAndCountryCode()
		)

		self._streamingConfig = types.StreamingRecognitionConfig(config=config, interim_results=True)
示例#5
0
    def transcribe_gcs(self, gcs_uri):
        """Asynchronously transcribes the audio file specified by the gcs_uri.
        args:
            gcs_uri - URI with format 'gs://<bucket>/<path_to_audio>'
        returns:
            trans - a list of transcribed sections
        """
        printmsg.begin('Initiating Google Cloud Speech operation')
        client = SpeechClient()

        audio = types.RecognitionAudio(uri=gcs_uri)
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
            sample_rate_hertz=44100,
            language_code='en-GB',
            enable_word_time_offsets=True)

        operation = client.long_running_recognize(config, audio)
        printmsg.end()

        printmsg.begin('Waiting for operation to complete [0%%]')
        while not operation.done():
            time.sleep(1)
            printmsg.begin('Waiting for operation to complete [%s%%]' %
                           operation.metadata.progress_percent)
        response = operation.result(timeout=10)
        printmsg.end()

        # Each result is for a consecutive portion of the audio. Iterate through
        # them to get the transcripts for the entire audio file.
        trans = []
        for result in response.results:
            # The first alternative is the most likely one for this portion.
            best = result.alternatives[0]
            get_ts = lambda x: dict(min=x.seconds // 60,
                                    sec=x.seconds % 60,
                                    msec=x.nanos // (10**6))
            seg = dict(text=best.transcript,
                       confidence=best.confidence,
                       words=[])
            # loop the words
            for word_info in best.words:
                word = word_info.word
                start_time = word_info.start_time
                end_time = word_info.end_time
                word_obj = dict(word=word, tstamp=get_ts(start_time))
                seg['words'].append(word_obj)
            trans.append(seg)

        return trans
示例#6
0
    def onStart(self):
        super().onStart()
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(
            Path(self.Commons.rootDir(), 'credentials/googlecredentials.json'))

        self._client = SpeechClient()
        # noinspection PyUnresolvedReferences
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=self.ConfigManager.getAliceConfigByName(
                'micSampleRate'),
            language_code=self.LanguageManager.activeLanguageAndCountryCode)

        self._streamingConfig = types.StreamingRecognitionConfig(
            config=config, interim_results=True)
示例#7
0
def VoiceRecognition(b_voice_data):

    print("VR: initialized")

    try:
        client = SpeechClient()
        print("VR: preparing recognition request")

        audio = types.RecognitionAudio(content=b_voice_data)
        config = types.RecognitionConfig(
            # setup default Telegram format
            encoding=enums.RecognitionConfig.AudioEncoding.OGG_OPUS,
            sample_rate_hertz=16000,
            language_code='en-US',
            max_alternatives=0)

        # Recognize speech content
        print("VR: call for Google Speech API")

        try:
            response = client.recognize(config, audio)
            print("VR: GCS API call finished")
            print(response)

            if (response.results):
                for result in response.results:
                    rec_voice = result.alternatives[0].transcript
                    return rec_voice
            else:
                print("VR: GCS API returned NULL")
                rec_voice = "NDVR"
                return rec_voice

        except Exception as apiClientExpt:
            print(
                "VR: FATAL ERROR: unhandled exception when calling recognize API"
            )
            print(apiClientExpt)

            return False

    except Exception as speechClientExpt:
        print(
            "VR: FATAL ERROR: unhandled exception when initializing SpeechClient"
        )
        print(speechClientExpt)

        return False
    def __init__(self):
        global SpeechClient, types, enums, Credentials
        from google.cloud.speech import SpeechClient, types, enums
        from google.oauth2.service_account import Credentials

        super(GoogleCloudStreamingSTT, self).__init__()
        # override language with module specific language selection
        self.language = self.config.get('lang') or self.lang

        credentials = Credentials.from_service_account_info(
            self.credential.get('json'))

        self.client = SpeechClient(credentials=credentials)
        recognition_config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=16000,
            language_code=self.language,
            model='command_and_search',
            max_alternatives=1,
        )
        self.streaming_config = types.StreamingRecognitionConfig(
            config=recognition_config,
            interim_results=True,
            single_utterance=True,
        )
示例#9
0
def proof_of_concept():
    config = RecognitionConfig(encoding=RecognitionConfig.AudioEncoding.FLAC,
                               language_code="en-UK",
                               audio_channel_count=2)
    audio = RecognitionAudio(uri='gs://general-rodderscode-co-uk/test.flac')
    response = SpeechClient().recognize(config=config, audio=audio)
    print(response)
示例#10
0
 def __transcribe_chunk(self, async_iter):
     """ Accesses Google Cloud Speech and print the lyrics for each chunk """
     frame_rate, encoding, file_path = async_iter
     accuracy_chunk_path = append_before_ext(file_path, '-accuracy')
     with open(accuracy_chunk_path, 'rb') as audio_content:
         content = audio_content.read()
     config = self.__get_config(encoding, frame_rate)
     audio = types.RecognitionAudio(content=content)
     return SpeechClient().recognize(config, audio)
def get_raw(file_name: str, client: speech.SpeechClient) -> str:
    """
    Get the raw Speech to text result from Google Cloud API

    :param file_name: File name + path
    :param client:    Google Cloud API Speech client

    :return: str JSON encoded response
    """
    audio = types.RecognitionAudio(uri=file_name)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
        sample_rate_hertz=44100,
        language_code="de-DE",
        enable_word_time_offsets=True)

    operation = client.long_running_recognize(config, audio)

    response = operation.result(timeout=900)

    return MessageToJson(response)
示例#12
0
def recognize_stream(bytestream: Generator[ByteString, None,
                                           None], client: speech.SpeechClient,
                     recognition_config: types.RecognitionConfig, q: Queue):
    """Streams transcription of the given audio file."""
    requests = (types.StreamingRecognizeRequest(audio_content=chunk)
                for chunk in bytestream)
    responses = client.streaming_recognize(
        get_streaming_recognition_config(recognition_config), requests)
    while True:
        try:
            resp = next(responses)
            logger.debug("reading next response; resp.results is {}".format(
                resp.results))
        except StopIteration:
            logger.info("no more responses!")
            break
        if resp.results:
            final = [x for x in resp.results if x.is_final]
            if final: q.put(final[0].alternatives[0])
    logger.info("exit from recognize_stream!")
    return
示例#13
0
def recognize_audio_from_uri(
    uri: str,
    credential: Union[str, os.PathLike, None] = None,
    language_code: str = 'en-US',
    encoding: enums.RecognitionConfig.AudioEncoding = enums.RecognitionConfig.
    AudioEncoding.FLAC,
    sampling_rate_hertz: int = 44100,
) -> types.RecognizeResponse:
    """

    Args:
        uri (str) : Cloud
        credential (str, os.PathLike, None) :
        language_code:
        encoding (enums.RecognitionConfig.AudioEncoding) :
        sampling_rate_hertz (int) :

    Returns:
        types.RecognizeResponse
    """
    if credential is None:
        client = SpeechClient()
    else:
        credentials = Credentials.from_service_account_file(
            filename=credential)
        client = SpeechClient(credentials=credentials)

    config = types.RecognitionConfig(encoding=encoding,
                                     language_code=language_code,
                                     sample_rate_hertz=sampling_rate_hertz)
    audio = types.RecognitionAudio(uri=uri)

    try:
        result = client.recognize(config=config, audio=audio)
    except exceptions.InvalidArgument:
        print(
            'cannot synchronize recognition. switched asynchronized recognition'
        )
        operartion = client.long_running_recognize(config=config, audio=audio)
        result = operartion.result()
    return result
示例#14
0
from flask import Flask, request, render_template
from google.cloud.speech import enums, types, SpeechClient

import json
import os
import traceback

from parse_command import parse_command

app = Flask(__name__)

os.environ.setdefault('GOOGLE_APPLICATION_CREDENTIALS', 'key.json')

client = SpeechClient()


@app.route('/upload', methods=['POST'])
def upload():
    raw_audio = request.files['audio_data']
    user_agent = request.headers.get('User-Agent')
    content = raw_audio.read()
    audio = types.RecognitionAudio(content=content)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        audio_channel_count=2,
        language_code='en-US',
    )
    response = client.recognize(config, audio)
    print(response)
    for result in response.results:
        voice_command = result.alternatives[0].transcript
from google.cloud import firestore
from google.cloud.vision import ImageAnnotatorClient
from google.cloud.speech import SpeechClient, RecognitionAudio, RecognitionConfig
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types

service_account_name = './service_account.json'
db = firestore.Client.from_service_account_json(service_account_name)
vision_client = ImageAnnotatorClient.from_service_account_json(service_account_name)
speech_client = SpeechClient.from_service_account_json(service_account_name)
language_client = language.LanguageServiceClient.from_service_account_json(service_account_name)

def max_window():
    return 60
    
def database():
    return db

def vision():
    return vision_client

def speech():
    return speech_client, RecognitionAudio, RecognitionConfig

def language(text):
    document = types.Document(content = text,
        type = enums.Document.Type.PLAIN_TEXT)

    return language_client, document
示例#16
0
class SpeechToText:
    def __init__(self, credential: Union[str, os.PathLike, None] = None):
        """

        Args:
            credential (str, os.PathLike, None) :
        """
        if credential is None:
            self.client = SpeechClient()
        else:
            credentials = Credentials.from_service_account_file(
                filename=credential)
            self.client = SpeechClient(credentials=credentials)

    def recognize_from_uri(
            self,
            uri: str,
            encoding: enums.RecognitionConfig.AudioEncoding = enums.
        RecognitionConfig.AudioEncoding.FLAC,
            language_code: str = 'en-US',
            sampling_rate_hertz: int = 44100) -> types.RecognizeResponse:
        """

        Args:
            uri (str) :
            encoding (enums.RecognitionConfig.AudioEncoding) :
            language_code (str) :
            sampling_rate_hertz (int) :

        Returns:
            types.RecognizeResponse
        """
        config = types.RecognitionConfig(
            encoding=encoding,
            language_code=language_code,
            sampling_rate_hertz=sampling_rate_hertz)
        audio = types.RecognitionAudio(uri=uri)

        return self.client.recognize(config, audio)

    def recognize_from_file(
            self,
            file: Union[str, os.PathLike],
            encoding: enums.RecognitionConfig.AudioEncoding = enums.
        RecognitionConfig.AudioEncoding.FLAC,
            language_code: str = 'en-US',
            sampling_rate_hertz: int = 44100) -> types.RecognizeResponse:
        """

        Args:
            file (str, os.PathLike) :
            encoding (enums.RecognitionConfig.AudioEncoding) :
            language_code (str) :
            sampling_rate_hertz (int) :

        Returns:
            types.RecognizeResponse
        """
        config = types.RecognitionConfig(
            encoding=encoding,
            language_code=language_code,
            sampling_rate_hertz=sampling_rate_hertz)
        with io.open(file, 'rb') as audio:
            content = audio.read()
        audio = types.RecognitionAudio(content=content)
        return self.client.recognize(config, audio)
示例#17
0
 def __init__(self):
     self.client = SpeechClient()
     storage_client = storage.Client()
     self.bucket_name = 'cross-culture-audios'
     self.bucket = storage_client.get_bucket(self.bucket_name)
示例#18
0
class GoogleAsr(Asr):
	NAME = 'Google Asr'
	DEPENDENCIES = {
		'system': [],
		'pip'   : {
			'google-cloud-speech==1.3.1'
		}
	}


	def __init__(self):
		super().__init__()
		self._credentialsFile = Path(self.Commons.rootDir(), 'credentials/googlecredentials.json')
		self._capableOfArbitraryCapture = True
		self._isOnlineASR = True

		self._client: Optional[SpeechClient] = None
		self._streamingConfig: Optional[types.StreamingRecognitionConfig] = None

		if self._credentialsFile.exists() and not self.ConfigManager.getAliceConfigByName('googleASRCredentials'):
			self.ConfigManager.updateAliceConfiguration(key='googleASRCredentials', value=self._credentialsFile.read_text(), doPreAndPostProcessing=False)

		self._internetLostFlag = Event()  # Set if internet goes down, cut the decoding
		self._lastResultCheck = 0  # The time the intermediate results were last checked. If actual time is greater than this value + 3, stop processing, internet issues

		self._previousCapture = ''  # The text that was last captured in the iteration


	def onStart(self):
		super().onStart()
		os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(self._credentialsFile)

		self._client = SpeechClient()
		# noinspection PyUnresolvedReferences
		config = types.RecognitionConfig(
			encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
			sample_rate_hertz=self.AudioServer.SAMPLERATE,
			language_code=self.LanguageManager.getLanguageAndCountryCode()
		)

		self._streamingConfig = types.StreamingRecognitionConfig(config=config, interim_results=True)


	def decodeStream(self, session: DialogSession) -> Optional[ASRResult]:
		super().decodeStream(session)

		recorder = Recorder(self._timeout, session.user, session.deviceUid)
		self.ASRManager.addRecorder(session.deviceUid, recorder)
		self._recorder = recorder
		result = None
		with Stopwatch() as processingTime:
			with recorder as stream:
				audioStream = stream.audioStream()
				# noinspection PyUnresolvedReferences
				try:
					requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audioStream)
					responses = self._client.streaming_recognize(self._streamingConfig, requests)
					result = self._checkResponses(session, responses)
				except Exception as e:
					self._internetLostFlag.clear()
					self.logWarning(f'Failed ASR request: {e}')

			self.end()

		return ASRResult(
			text=result[0],
			session=session,
			likelihood=result[1],
			processingTime=processingTime.time
		) if result else None


	def onInternetLost(self):
		self._internetLostFlag.set()


	def _checkResponses(self, session: DialogSession, responses: Generator) -> Optional[tuple]:
		if responses is None:
			return None

		for response in responses:
			if self._internetLostFlag.is_set():
				self.logDebug('Internet connectivity lost during ASR decoding')

				if not response.results:
					raise Exception('Internet connectivity lost during decoding')

				result = response.results[0]
				return result.alternatives[0].transcript, result.alternatives[0].confidence

			if not response.results:
				continue

			result = response.results[0]
			if not result.alternatives:
				continue

			if result.is_final:
				return result.alternatives[0].transcript, result.alternatives[0].confidence
			elif result.alternatives[0].transcript != self._previousCapture:
				self.partialTextCaptured(session=session, text=result.alternatives[0].transcript, likelihood=result.alternatives[0].confidence, seconds=0)
				self._previousCapture = result.alternatives[0].transcript
			elif result.alternatives[0].transcript == self._previousCapture:
				now = int(time())

				if self._lastResultCheck == 0:
					self._lastResultCheck = 0
					continue

				if now > self._lastResultCheck + 3:
					self.logDebug(f'Stopping process as there seems to be connectivity issues')
					return result.alternatives[0].transcript, result.alternatives[0].confidence

				self._lastResultCheck = now

		return None
示例#19
0
                        help="connect to unity",
                        default=False)
    parser.add_argument("--lang_code",
                        type=str,
                        help="the language code of your language",
                        default="zh-tw")
    args = parser.parse_args()

    if args.connect:
        address = ('127.0.0.1', 5067)
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect(address)
    else:
        sock = None

    client = SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=args.lang_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    print("%s recognition started!" % args.lang_code)
    while True:
        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)
            try:
                responses = client.streaming_recognize(streaming_config,
示例#20
0
class GoogleAsr(Asr):

    NAME = 'Google Asr'
    DEPENDENCIES = {'system': [], 'pip': {'google-cloud-speech==1.3.1'}}

    def __init__(self):
        super().__init__()
        self._capableOfArbitraryCapture = True
        self._isOnlineASR = True

        self._client: Optional[SpeechClient] = None
        self._streamingConfig: Optional[
            types.StreamingRecognitionConfig] = None

        self._previousCapture = ''

    def onStart(self):
        super().onStart()
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(
            Path(self.Commons.rootDir(), 'credentials/googlecredentials.json'))

        self._client = SpeechClient()
        # noinspection PyUnresolvedReferences
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=self.AudioServer.SAMPLERATE,
            language_code=self.LanguageManager.getLanguageAndCountryCode())

        self._streamingConfig = types.StreamingRecognitionConfig(
            config=config, interim_results=True)

    def decodeStream(self, session: DialogSession) -> Optional[ASRResult]:
        super().decodeStream(session)

        recorder = Recorder(self._timeout, session.user, session.siteId)
        self.ASRManager.addRecorder(session.siteId, recorder)
        self._recorder = recorder
        with Stopwatch() as processingTime:
            with recorder as stream:
                audioStream = stream.audioStream()
                # noinspection PyUnresolvedReferences
                try:
                    requests = (types.StreamingRecognizeRequest(
                        audio_content=content) for content in audioStream)
                    responses = self._client.streaming_recognize(
                        self._streamingConfig, requests)
                    result = self._checkResponses(session, responses)
                except:
                    self.logWarning('Failed ASR request')

            self.end()

        return ASRResult(
            text=result[0],
            session=session,
            likelihood=result[1],
            processingTime=processingTime.time) if result else None

    def _checkResponses(self, session: DialogSession,
                        responses: Generator) -> Optional[tuple]:
        if responses is None:
            return None

        for response in responses:
            if not response.results:
                continue

            result = response.results[0]
            if not result.alternatives:
                continue

            if result.is_final:
                return result.alternatives[0].transcript, result.alternatives[
                    0].confidence
            elif result.alternatives[0].transcript != self._previousCapture:
                self.partialTextCaptured(
                    session=session,
                    text=result.alternatives[0].transcript,
                    likelihood=result.alternatives[0].confidence,
                    seconds=0)
                self._previousCapture = result.alternatives[0].transcript

        return None
示例#21
0
class GoogleAsr(Asr):

    NAME = 'Google Asr'
    DEPENDENCIES = {'system': [], 'pip': {'google-cloud-speech==1.3.1'}}

    def __init__(self):
        super().__init__()
        self._capableOfArbitraryCapture = True
        self._isOnlineASR = True

        self._client: Optional[SpeechClient] = None
        self._streamingConfig: Optional[
            types.StreamingRecognitionConfig] = None

        self._internetLostFlag = Event(
        )  # Set if internet goes down, cut the decoding
        self._lastResultCheck = 0  # The time the intermediate results were last checked. If actual time is greater than this value + 3, stop processing, internet issues

        self._previousCapture = ''  # The text that was last captured in the iteration
        self._delayedGoogleConfirmation = False  # set whether slow internet is detected or not

    def onStart(self):
        super().onStart()
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(
            Path(self.Commons.rootDir(), 'credentials/googlecredentials.json'))

        self._client = SpeechClient()
        # noinspection PyUnresolvedReferences
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=self.AudioServer.SAMPLERATE,
            language_code=self.LanguageManager.getLanguageAndCountryCode())

        self._streamingConfig = types.StreamingRecognitionConfig(
            config=config, interim_results=True)

    def decodeStream(self, session: DialogSession) -> Optional[ASRResult]:
        super().decodeStream(session)

        recorder = Recorder(self._timeout, session.user, session.siteId)
        self.ASRManager.addRecorder(session.siteId, recorder)
        self._recorder = recorder
        result = None
        with Stopwatch() as processingTime:
            with recorder as stream:
                audioStream = stream.audioStream()
                # noinspection PyUnresolvedReferences
                try:
                    requests = (types.StreamingRecognizeRequest(
                        audio_content=content) for content in audioStream)
                    responses = self._client.streaming_recognize(
                        self._streamingConfig, requests)
                    result = self._checkResponses(session, responses)
                except:
                    self._internetLostFlag.clear()
                    self.logWarning('Failed ASR request')

            self.end()

        return ASRResult(
            text=result[0],
            session=session,
            likelihood=result[1],
            processingTime=processingTime.time) if result else None

    def onInternetLost(self):
        self._internetLostFlag.set()

    def _checkResponses(self, session: DialogSession,
                        responses: Generator) -> Optional[tuple]:
        if responses is None:
            return None

        for response in responses:
            if self._internetLostFlag.is_set():
                self.logDebug('Internet connectivity lost during ASR decoding')

                if not response.results:
                    raise Exception(
                        'Internet connectivity lost during decoding')

                result = response.results[0]
                return result.alternatives[0].transcript, result.alternatives[
                    0].confidence

            if not response.results:
                continue

            result = response.results[0]
            if not result.alternatives:
                continue

            if result.is_final:
                self._lastResultCheck = 0
                self._delayedGoogleConfirmation = False
                # print(f'Text confirmed by Google')
                return result.alternatives[0].transcript, result.alternatives[
                    0].confidence
            elif result.alternatives[0].transcript != self._previousCapture:
                self.partialTextCaptured(
                    session=session,
                    text=result.alternatives[0].transcript,
                    likelihood=result.alternatives[0].confidence,
                    seconds=0)
                # below function captures the "potential" full utterance not just one word from it
                if len(self._previousCapture) <= len(
                        result.alternatives[0].transcript):
                    self._previousCapture = result.alternatives[0].transcript
            elif result.alternatives[0].transcript == self._previousCapture:

                # If we are here it's cause google hasn't responded yet with confirmation on captured text
                # Store the time in seconds since epoch
                now = int(time())
                # Set a reference to nows time plus 3 seconds
                self._lastResultCheck = now + 3
                # wait 3 seconds and see if google responds
                if not self._delayedGoogleConfirmation:
                    # print(f'Text of "{self._previousCapture}" captured but not confirmed by GoogleASR yet')
                    while now <= self._lastResultCheck:
                        now = int(time())
                        self._delayedGoogleConfirmation = True
                    # Give google the option to still process  the utterance
                    continue
                # During next iteration, If google hasn't responded in 3 seconds assume intent is correct
                if self._delayedGoogleConfirmation:
                    self.logDebug(
                        f'Stopping process as there seems to be connectivity issues'
                    )
                    self._lastResultCheck = 0
                    self._delayedGoogleConfirmation = False
                    return result.alternatives[
                        0].transcript, result.alternatives[0].confidence

        return None