def writeInitCode(self, buff): # Alert user if there's no eyetracker record component in this routine recorded = False for sibling in self.exp.routines[self.parentName]: if isinstance(sibling, EyetrackerRecordComponent): recorded = True if not recorded: alert(code=4550, strFields={"name": self.params['name']}) # do we need units code? if self.params['units'].val == 'from exp settings': unitsStr = "" else: unitsStr = "units=%(units)s, " % self.params # do writing of init inits = getInitVals(self.params, 'PsychoPy') if self.params['shape'] == 'regular polygon...': inits['shape'] = self.params['nVertices'] elif self.params['shape'] == 'custom polygon...': inits['shape'] = self.params['vertices'] code = ( "%(name)s = visual.ROI(win, name='%(name)s', tracker=eyetracker,\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(1, relative=True) code = ( "debug=%(debug)s,\n" "shape=%(shape)s,\n" + unitsStr + "pos=%(pos)s, size=%(size)s, ori=0.0)\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(-1, relative=True)
def writeFrameCode(self, buff): """Write the code that will be called every frame """ # Alert user if eyetracking isn't setup if self.exp.eyetracking == "None": alert(code=4505) inits = self.params buff.writeIndentedLines("# *%s* updates\n" % self.params['name']) # test for whether we're just starting to record # writes an if statement to determine whether to draw etc self.writeStartTestCode(buff) code = ("%(name)s.status = STARTED\n") buff.writeIndentedLines(code % self.params) buff.setIndentLevel(-1, relative=True) # test for stop (only if there was some setting for duration or stop) org_val = self.params['stopVal'].val if self.params['actionType'].val.find('Start Only') >= 0: self.params['stopVal'].val = 0 if self.params['stopVal'].val not in ['', None, -1, 'None']: # writes an if statement to determine whether to draw etc self.writeStopTestCode(buff) code = ("%(name)s.status = FINISHED\n") buff.writeIndentedLines(code % self.params) # to get out of the if statement buff.setIndentLevel(-2, relative=True) self.params['stopVal'].val = org_val
def writeInitCode(self, buff): inits = getInitVals(self.params) # Substitute sample rate value for numeric equivalent inits['sampleRate'] = sampleRates[inits['sampleRate'].val] # Substitute channel value for numeric equivalent inits['channels'] = { 'mono': 1, 'stereo': 2, 'auto': None }[self.params['channels'].val] # Substitute device name for device index, or default if not found if self.params['device'].val in devices: device = devices[self.params['device'].val] if hasattr(device, "deviceIndex"): inits['device'] = device.deviceIndex else: inits['device'] = None else: alert(4330, strFields={'device': self.params['device'].val}) inits['device'] = None # Create Microphone object and clips dict code = ("%(name)s = sound.microphone.Microphone(\n") buff.writeIndentedLines(code % inits) buff.setIndentLevel(1, relative=True) code = ("device=%(device)s, channels=%(channels)s, \n" "sampleRateHz=%(sampleRate)s, maxRecordingSize=%(maxSize)s\n") buff.writeIndentedLines(code % inits) buff.setIndentLevel(-1, relative=True) code = (")\n") buff.writeIndentedLines(code % inits)
def writeInitCodeJS(self, buff): inits = getInitVals(self.params) inits['sampleRate'] = sampleRates[inits['sampleRate'].val] # Alert user if non-default value is selected for device if inits['device'].val != 'default': alert(5055, strFields={'name': inits['name'].val}) # Write code code = ( "%(name)s = new sound.Microphone({\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(1, relative=True) code = ( "win : psychoJS.window, \n" "name:'%(name)s',\n" "sampleRateHz : %(sampleRate)s,\n" "channels : %(channels)s,\n" "maxRecordingSize : %(maxSize)s,\n" "loopback : true,\n" "policyWhenFull : 'ignore',\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(-1, relative=True) code = ( "});\n" ) buff.writeIndentedLines(code % inits)
def run(self): tracker = self.eyetracker.getIOHubDeviceClass(full=True) # Deliver any alerts as needed if tracker == 'eyetracker.hw.sr_research.eyelink.EyeTracker': if self.movementAnimation: # Alert user that their animation params aren't used alert(code=4520, strFields={"brand": "EyeLink"}) elif tracker == 'eyetracker.hw.gazepoint.gp3.EyeTracker': if not self.progressMode == "time": # As GazePoint doesn't use auto-pace, alert user alert(4530, strFields={"brand": "GazePoint"}) # Minimise PsychoPy window if self.win._isFullScr and sys.platform == 'win32': self.win.winHandle.set_fullscreen(False) self.win.winHandle.minimize() # Run self.last = self.eyetracker.runSetupProcedure(dict(self)) # Bring back PsychoPy window if self.win._isFullScr and sys.platform == 'win32': self.win.winHandle.set_fullscreen(True) self.win.winHandle.maximize() # Not 100% sure activate is necessary, but does not seem to hurt. self.win.winHandle.activate() # SS: Flip otherwise black screen has been seen, not sure why this just started.... self.win.flip()
def writeRoutineEndCodeJS(self, buff): inits = getInitVals(self.params) inits['routine'] = self.parentName if inits['transcribeBackend'].val in allTranscribers: inits['transcribeBackend'].val = allTranscribers[self.params['transcribeBackend'].val] # Warn user if their transcriber won't work online if inits['transcribe'].val and inits['transcribeBackend'].val not in onlineTranscribers.values(): default = list(onlineTranscribers.values())[0] alert(4605, strFields={"transcriber": inits['transcribeBackend'].val, "default": default}) # Write base end routine code BaseComponent.writeRoutineEndCodeJS(self, buff) # Store recordings from this routine code = ( "// stop the microphone (make the audio data ready for upload)\n" "await %(name)s.stop();\n" "// construct a filename for this recording\n" "thisFilename = 'recording_%(name)s_' + currentLoop.name + '_' + currentLoop.thisN\n" "// get the recording\n" "%(name)s.lastClip = await %(name)s.getRecording({\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(1, relative=True) code = ( "tag: thisFilename + '_' + util.MonotonicClock.getDateStr(),\n" "flush: false\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(-1, relative=True) code = ( "});\n" "psychoJS.experiment.addData('%(name)s.clip', thisFilename);\n" "// start the asynchronous upload to the server\n" "%(name)s.lastClip.upload();\n" ) buff.writeIndentedLines(code % inits) if self.params['transcribe'].val: code = ( "// transcribe the recording\n" "const transcription = await %(name)s.lastClip.transcribe({\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(1, relative=True) code = ( "languageCode: %(transcribeLang)s,\n" "engine: sound.AudioClip.Engine.%(transcribeBackend)s,\n" "wordList: %(transcribeWords)s\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(-1, relative=True) code = ( "});\n" "%(name)s.lastScript = transcription.transcript;\n" "%(name)s.lastConf = transcription.confidence;\n" "psychoJS.experiment.addData('%(name)s.transcript', %(name)s.lastScript);\n" "psychoJS.experiment.addData('%(name)s.confidence', %(name)s.lastConf);\n" ) buff.writeIndentedLines(code % inits)
def writeMainCode(self, buff): # Alert user if eyetracking isn't setup if self.exp.eyetracking == "None": alert(code=4505) # Get inits inits = self.params # Code-ify 'from exp settings' if self.params['units'].val == 'from exp settings': inits['units'].val = None # Synonymise expand dur and target dur if inits['progressMode'].val == 'time': inits['expandDur'] = inits['targetDur'] if inits['progressMode'].val == 'space key': inits['targetDur'] = inits['expandDur'] # Synonymise movement dur and target delay if inits['movementAnimation'].val: inits['targetDelay'] = inits['movementDur'] else: inits['movementDur'] = inits['targetDelay'] BaseStandaloneRoutine.writeMainCode(self, buff) # Make target code = ("# define target for %(name)s\n" "%(name)sTarget = visual.TargetStim(win, \n") buff.writeIndentedLines(code % inits) buff.setIndentLevel(1, relative=True) code = ( "name='%(name)sTarget',\n" "radius=%(outerRadius)s, fillColor=%(fillColor)s, borderColor=%(borderColor)s, lineWidth=%(borderWidth)s,\n" "innerRadius=%(innerRadius)s, innerFillColor=%(innerFillColor)s, innerBorderColor=%(innerBorderColor)s, innerLineWidth=%(innerBorderWidth)s,\n" "colorSpace=%(colorSpace)s, units=%(units)s\n") buff.writeIndentedLines(code % inits) buff.setIndentLevel(-1, relative=True) code = (")") buff.writeIndentedLines(code % inits) # Make config object code = ("# define parameters for %(name)s\n" "%(name)s = hardware.eyetracker.EyetrackerCalibration(win, \n") buff.writeIndentedLines(code % inits) buff.setIndentLevel(1, relative=True) code = ( "eyetracker, %(name)sTarget,\n" "units=%(units)s, colorSpace=%(colorSpace)s,\n" "progressMode=%(progressMode)s, targetDur=%(targetDur)s, expandScale=%(expandScale)s,\n" "targetLayout=%(targetLayout)s, randomisePos=%(randomisePos)s,\n" "movementAnimation=%(movementAnimation)s, targetDelay=%(targetDelay)s\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(-1, relative=True) code = ( ")\n" "# run calibration\n" "%(name)s.run()\n" "# clear any keypresses from during %(name)s so they don't interfere with the experiment\n" "defaultKeyboard.clearEvents()\n") buff.writeIndentedLines(code % inits)
def writeRoutineEndCode(self, buff): inits = getInitVals(self.params) # Alter inits if len(self.exp.flow._loopList): inits['loop'] = self.exp.flow._loopList[-1].params['name'] inits['filename'] = f"'recording_{inits['name']}_{inits['loop']}_%s.{inits['outputType']}' % {inits['loop']}.thisTrialN" else: inits['loop'] = "thisExp" inits['filename'] = f"'recording_{inits['name']}'" transcribe = inits['transcribe'].val if inits['transcribe'].val == False: inits['transcribeBackend'].val = None if inits['outputType'].val == 'default': inits['outputType'].val = 'wav' # Warn user if their transcriber won't work locally if inits['transcribe'].val: if inits['transcribeBackend'].val in localTranscribers: inits['transcribeBackend'].val = localTranscribers[self.params['transcribeBackend'].val] else: default = list(localTranscribers.values())[0] alert(4610, strFields={"transcriber": inits['transcribeBackend'].val, "default": default}) # Store recordings from this routine code = ( "# tell mic to keep hold of current recording in %(name)s.clips and transcript (if applicable) in %(name)s.scripts\n" "# this will also update %(name)s.lastClip and %(name)s.lastScript\n" "%(name)s.stop()\n" "%(name)sClip, %(name)sScript = %(name)s.bank(\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(1, relative=True) code = ( "tag='%(loop)s', transcribe='%(transcribeBackend)s',\n" ) buff.writeIndentedLines(code % inits) if transcribe: code = ( "language=%(transcribeLang)s, expectedWords=%(transcribeWords)s\n" ) else: code = ( "config=None\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(-1, relative=True) code = ( ")\n" "%(loop)s.addData('%(name)s.clip', os.path.join(%(name)sRecFolder, %(filename)s))\n" ) buff.writeIndentedLines(code % inits) if transcribe: code = ( "%(loop)s.addData('%(name)s.script', %(name)sScript)\n" ) buff.writeIndentedLines(code % inits) # Write base end routine code BaseComponent.writeRoutineEndCode(self, buff)
def writeRoutineStartCode(self, buff): # Give alert if in the same routine as a Keyboard component if self.params['editable'].val: routine = self.exp.routines[self.parentName] for sibling in routine: if isinstance(sibling, KeyboardComponent): alert(4405, strFields={ 'textbox': self.params['name'], 'keyboard': sibling.params['name'] }) code = ("%(name)s.reset()") buff.writeIndentedLines(code % self.params) BaseVisualComponent.writeRoutineStartCode(self, buff)
def transcribe(audioClip, engine='sphinx', language='en-US', expectedWords=None, config=None): """Convert speech in audio to text. This feature passes the audio clip samples to a specified text-to-speech engine which will attempt to transcribe any speech within. The efficacy of the transcription depends on the engine selected, audio quality, and language support. By default, Pocket Sphinx is used which provides decent transcription capabilities offline for English and a few other languages. For more robust transcription capabilities with a greater range of language support, online providers such as Google may be used. Speech-to-text conversion blocks the main application thread when used on Python. Don't transcribe audio during time-sensitive parts of your experiment! This issue is known to the developers and will be fixed in a later release. Parameters ---------- audioClip : :class:`~psychopy.sound.AudioClip` or tuple Audio clip containing speech to transcribe (e.g., recorded from a microphone). Can be either an :class:`~psychopy.sound.AudioClip` object or tuple where the first value is as a Nx1 or Nx2 array of audio samples (`ndarray`) and the second the sample rate (`int`) in Hertz (e.g., ``(samples, 480000)``). engine : str Speech-to-text engine to use. Can be one of 'sphinx' for CMU Pocket Sphinx or 'google' for Google Cloud. language : str BCP-47 language code (eg., 'en-US'). Note that supported languages vary between transcription engines. expectedWords : list or tuple List of strings representing expected words or phrases. This will constrain the possible output words to the ones specified. Note not all engines support this feature (only Sphinx and Google Cloud do at this time). A warning will be logged if the engine selected does not support this feature. CMU PocketSphinx has an additional feature where the sensitivity can be specified for each expected word. You can indicate the sensitivity level to use by putting a ``:`` after each word in the list (see the Example below). Sensitivity levels range between 0 and 100. A higher number results in the engine being more conservative, resulting in a higher likelihood of false rejections. The default sensitivity is 80% for words/phrases without one specified. config : dict or None Additional configuration options for the specified engine. These are specified using a dictionary (ex. `config={'pfilter': 1}` will enable the profanity filter when using the `'google'` engine). Returns ------- :class:`~psychopy.sound.transcribe.TranscriptionResult` Transcription result. Notes ----- * Online transcription services (eg., Google) provide robust and accurate speech recognition capabilities with broader language support than offline solutions. However, these services may require a paid subscription to use, reliable broadband internet connections, and may not respect the privacy of your participants as their responses are being sent to a third-party. Also consider that a track of audio data being sent over the network can be large, users on metered connections may incur additional costs to run your experiment. * If the audio clip has multiple channels, they will be combined prior to being passed to the transcription service if needed. Examples -------- Use a voice command as a response to a task:: # after doing microphone recording resp = mic.getRecording() transcribeResults = transcribe(resp) if transcribeResults.success: # successful transcription words = transcribeResults.words if 'hello' in words: print('You said hello.') Specifying expected words with sensitivity levels when using CMU Pocket Sphinx: # expected words 90% sensitivity on the first two, default for the rest expectedWords = ['right:90', 'left:90', 'up', 'down'] transcribeResults = transcribe( resp.samples, resp.sampleRateHz, expectedWords=expectedWords) if transcribeResults.success: # successful transcription # process results ... Specifying the API key to use Google's Cloud service for speech-to-text:: # set the environment variable import os os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = \ "C:\\path\\to\\my\\key.json" # you can now call the transcriber ... results = transcribe( myRecording, engine='google', expectedWords=['left', 'right']) if results.success: print("You said: {}".format(results.words[0])) """ # check if the engine parameter is valid engine = engine.lower() # make lower case # check if we have necessary keys if engine in ('google',): alert(4615, strFields={'engine': engine}) # if we got a tuple, convert to audio clip object if isinstance(audioClip, (tuple, list,)): samples, sampleRateHz = audioClip audioClip = AudioClip(samples, sampleRateHz) # pass data over to the appropriate engine for transcription if engine in ('sphinx', 'built-in'): return recognizeSphinx( audioClip, language=language, expectedWords=expectedWords, config=config) elif engine == 'google': return recognizeGoogle( audioClip, language=language, expectedWords=expectedWords, config=config) else: raise ValueError( f'Parameter `engine` for `transcribe()` should be one of ' f'"sphinx", "built-in" or "google" not "{engine}"')
def writeMainCode(self, buff): # Alert user if eyetracking isn't setup if self.exp.eyetracking == "None": alert(code=4505) # Get inits inits = deepcopy(self.params) # Code-ify 'from exp settings' if inits['units'].val == 'from exp settings': inits['units'].val = None # Synonymise expand dur and target dur if inits['progressMode'].val == 'time': inits['expandDur'] = inits['targetDur'] if inits['progressMode'].val == 'space key': inits['targetDur'] = inits['expandDur'] # Synonymise movement dur and target delay if inits['movementAnimation'].val: inits['targetDelay'] = inits['movementDur'] else: inits['movementDur'] = inits['targetDelay'] # Convert progress mode to ioHub format if inits['progressMode'].val == 'space key': inits['progressKey'] = "' '" else: inits['progressKey'] = "None" # If positions are preset, override param value if inits['targetLayout'].val in positions: inits['targetPositions'].val = inits['targetLayout'].val inits['targetPositions'].valType = 'str' BaseStandaloneRoutine.writeMainCode(self, buff) # Make target code = ( "# define target for %(name)s\n" "%(name)sTarget = visual.TargetStim(win, \n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(1, relative=True) code = ( "name='%(name)sTarget',\n" "radius=%(outerRadius)s, fillColor=%(fillColor)s, borderColor=%(borderColor)s, lineWidth=%(borderWidth)s,\n" "innerRadius=%(innerRadius)s, innerFillColor=%(innerFillColor)s, innerBorderColor=%(innerBorderColor)s, innerLineWidth=%(innerBorderWidth)s,\n" "colorSpace=%(colorSpace)s, units=%(units)s\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(-1, relative=True) code = ( ")" ) buff.writeIndentedLines(code % inits) # Make validation object code = ( "# define parameters for %(name)s\n" "%(name)s = iohub.ValidationProcedure(win,\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(1, relative=True) code = ( "target=%(name)sTarget,\n" "gaze_cursor=%(cursorFillColor)s, \n" "positions=%(targetPositions)s, randomize_positions=%(randomisePos)s,\n" "expand_scale=%(expandScale)s, target_duration=%(targetDur)s,\n" "enable_position_animation=%(movementAnimation)s, target_delay=%(targetDelay)s,\n" "progress_on_key=%(progressKey)s,\n" "show_results_screen=%(showResults)s, save_results_screen=%(saveAsImg)s,\n" "color_space=%(colorSpace)s, unit_type=%(units)s\n" ) buff.writeIndentedLines(code % inits) buff.setIndentLevel(-1, relative=True) code = ( ")\n" ) buff.writeIndentedLines(code % inits) # Run code = ( "# run %(name)s\n" "%(name)s.run()\n" "# clear any keypresses from during %(name)s so they don't interfere with the experiment\n" "defaultKeyboard.clearEvents()\n" ) buff.writeIndentedLines(code % inits)
def setColor( obj, color, colorSpace=None, operation='', colorAttrib='color', # or 'fillColor' etc # legacy colorSpaceAttrib=None, rgbAttrib=None, log=True): """ Sets the given color attribute of an object. Obsolete as of 2021.1.0, as colors are now handled by Color objects, all of the necessary operations are called when setting directly via obj.color, obj.fillColor or obj.borderColor. obj : psychopy.visual object The object whose color you are changing color : color The color to use - can be a valid color value (e.g. (1,1,1), '#ffffff', 'white') or a psychopy.colors.Color object colorSpace : str The color space of the color value. Can be None for hex or named colors, otherwise must be specified. operation : str Can be '=', '+' or '-', or left blank for '='. '=' will set the color, '+' will add the color and '-' will subtract it. colorAttrib : str Name of the color attribute you are setting, e.g. 'color', 'fillColor', 'borderColor' Legacy --- colorSpaceAttrib : str PsychoPy used to have a color space for each attribute, but color spaces are now handled by Color objects, so this input is no longer used. rgbAttrib : str PsychoPy used to handle color by converting to RGB and storing in an rgb attribute, now this conversion is done within Color objects so this input is no longer used. log : bool log argument is deprecated - has no effect now. Logging should be done when setColor() is called. """ if colorSpaceAttrib is not None: alert(8105, strFields={'colorSpaceAttrib': colorSpaceAttrib}) if rgbAttrib is not None: alert(8110, strFields={'rgbAttrib': rgbAttrib}) # Make a Color object using supplied values raw = color color = colors.Color(raw, colorSpace) assert color.valid, f"Could not create valid Color object from value {raw} in space {colorSpace}" # Apply new value if operation in ('=', '', None): # If no operation, just set color from object setattr(obj, colorAttrib, color) elif operation == '+': # If +, add to old color setattr(obj, colorAttrib, getattr(obj, "_" + colorAttrib) + color) elif operation == '-': # If -, subtract from old color setattr(obj, colorAttrib, getattr(obj, "_" + colorAttrib) - color) else: # Any other operation is not supported msg = ('Unsupported value "%s" for operation when ' 'setting %s in %s') vals = (operation, colorAttrib, obj.__class__.__name__) raise ValueError(msg % vals)
def transcribe(samples, sampleRate, engine='sphinx', language='en-US', expectedWords=(), key=None, config=None): """Convert speech in audio to text. This feature passes the audio clip samples to a text-to-speech engine which will attempt to transcribe any speech within. The efficacy of the transcription depends on the engine selected, recording hardware and audio quality, and quality of the language support. By default, Pocket Sphinx is used which provides decent transcription capabilities offline for English and a few other languages. For more robust transcription capabilities with a greater range of language support, online providers such as Google may be used. If the audio clip has multiple channels, they will be combined prior to being passed to the transcription service. Speech-to-text conversion blocks the main application thread when used on Python. Don't transcribe audio during time-sensitive parts of your experiment! This issue is known to the developers and will be fixed in a later release. Parameters ---------- samples : ArrayLike Audio clip containing speech to transcribe (e.g., recorded from a microphone) as a Nx1 or Nx2 array. sampleRate : int or float Sample rate which `samples` was recorded in Hertz (Hz). engine : str Speech-to-text engine to use. Can be one of 'sphinx', 'google', 'googleCloud', or 'bing'. language : str BCP-47 language code (eg., 'en-US'). Note that supported languages vary between transcription engines. expectedWords : list or tuple List of strings representing expected words or phrases. This will constrain the possible output words to the ones specified. Note not all engines support this feature (only Sphinx and Google Cloud do at this time). A warning will be logged if the engine selected does not support this feature. CMU PocketSphinx has an additional feature where the sensitivity can be specified for each expected word. You can indicate the sensitivity level to use by putting a ``:`` after each word in the list (see the Example below). Sensitivity levels range between 50 and 100. A higher number results in the engine being more conservative, resulting in a higher likelihood of false rejections. The default sensitivity is 80% for words/phrases without one specified. key : str or None API key or credentials, format depends on the API in use. If `None`, the values will be obtained elsewhere (See Notes). config : dict or None Additional configuration options for the specified engine. These are specified using a dictionary (ex. `config={'pfilter': 1}` will enable the profanity filter when using the `'google'` engine). Returns ------- :class:`~psychopy.sound.transcribe.TranscriptionResult` Transcription result. Notes ----- * Online transcription services (eg., Google, Bing, etc.) provide robust and accurate speech recognition capabilities with broader language support than offline solutions. However, these services may require a paid subscription to use, reliable broadband internet connections, and may not respect the privacy of your participants as their responses are being sent to a third-party. Also consider that a track of audio data being sent over the network can be large, users on metered connections may incur additional costs to run your experiment. * Some errors may be emitted by the `SpeechRecognition` API, check that project's documentation if you encounter such an error for more information. * If `key` is not specified (i.e. is `None`) then PsychoPy will look for the API key at other locations. By default, PsychoPy will look for an environment variables starting with `PSYCHOPY_TRANSCR_KEY_` first. If there is no appropriate API key for the given `engine`, then the preference *General -> transcrKeyXXX* is used. Keys can be specified as a file path, if so, the key data will be loaded from the file. System administrators can specify keys this way to use them across a site installation without needing the user manage the keys directly. * Use `expectedWords` if provided by the API. This will greatly speed up recognition. CMU Pocket Sphinx gives the option for sensitivity levels per phrase. Higher levels Examples -------- Use a voice command as a response to a task:: # after doing microphone recording resp = mic.getRecording() transcribeResults = transcribe(resp.samples, resp.sampleRateHz) if transcribeResults.success: # successful transcription words = transcribeResults.words if 'hello' in words: print('You said hello.') Specifying expected words with sensitivity levels when using CMU Pocket Sphinx: # expected words 90% confidence on the first two, default for the rest expectedWords = ['right:90', 'left:90', 'up', 'down'] transcribeResults = transcribe( resp.samples, resp.sampleRateHz, expectedWords=expectedWords) if transcribeResults.success: # successful transcription # process results ... """ # Bunch of checks to make sure the parameters specified are correct. if not _hasSpeechRecognition: # don't have speech recognition raise ModuleNotFoundError( "Cannot use `.transcribe()`, missing required module " "`speech_recognition` from package `SpeechRecognition`.") # check if the engine parameter is valid if engine not in _recognizers.keys(): raise ValueError( f'transcribe() `engine` should be one of {list(_recognizers.keys())} not ' f'{engine}') # check if we have necessary keys if engine in _apiKeys: if not _apiKeys[engine]: alert(4615, strFields={'engine': engine}) # engine configuration config = {} if config is None else config if not isinstance(config, dict): raise TypeError( "Invalid type for parameter `config` specified, must be `dict` " "or `None`.") if not isinstance(language, str): raise TypeError( "Invalid type for parameter `language`, must be type `str`.") # common engine configuration options config['language'] = language # set language code config['show_all'] = False # API specific config expectedWordsNotSupported = requiresKey = False if engine in ('sphinx', 'built-in'): expectedWordsTemp = None # check valid language config['language'] = language.lower() # sphinx users en-us not en-US if config['language'] not in sphinxLangs: url = "https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/" raise ValueError( f"Language `{config['language']}` is not installed for pocketsphinx. " f"You can download languages here: {url}" f"Install them here: {pocketsphinx.get_model_path()}") # check expected words if expectedWords is not None: # sensitivity specified as `word:80` expectedWordsTemp = [] for word in expectedWords: wordAndSense = word.split(':') if len(wordAndSense) == 2: # specified as `word:80` word, sensitivity = wordAndSense sensitivity = int(sensitivity) / 100. else: word = wordAndSense[0] sensitivity = 0.8 # default is 80% confidence expectedWordsTemp.append((word, sensitivity)) config['keyword_entries'] = expectedWordsTemp elif engine == 'googleCloud': config['preferred_phrases'] = expectedWords requiresKey = True elif engine == 'google': expectedWordsNotSupported = True requiresKey = True elif engine in ('bing', 'azure'): expectedWordsNotSupported = True requiresKey = True if expectedWordsNotSupported: logging.warning( f"Transcription engine '{engine}' does not allow for expected phrases to " "be specified.") # API requires a key if requiresKey: try: if engine != 'googleCloud': config['key'] = _apiKeys[engine] if key is None else key else: config['credentials_json'] = \ _apiKeys[engine] if key is None else key except KeyError: raise ValueError( f"Selected speech-to-text engine '{engine}' requires an API key but one" "cannot be found. Add key to PsychoPy prefs or try specifying " "`key` directly.") # combine channels if needed samples = np.atleast_2d(samples) # enforce 2D if samples.shape[1] > 1: samplesMixed = \ np.sum(samples, axis=1, dtype=np.float32) / np.float32(2.) else: samplesMixed = samples # convert samples to WAV PCM format clipDataInt16 = np.asarray(samplesMixed * ((1 << 15) - 1), dtype=np.int16).tobytes() sampleWidth = 2 # two bytes per sample audio = sr.AudioData(clipDataInt16, sample_rate=sampleRate, sample_width=sampleWidth) config = {} if config is None else config assert isinstance(config, dict) # submit audio samples to the API respAPI = '' unknownValueError = requestError = False try: respAPI = _recognizers[engine](audio, **config) except KeyError: raise ValueError(f"`{engine}` is not a valid transcribe() engine. " f"Please use one of {list(_recognizers.keys())}") except sr.UnknownValueError: unknownValueError = True except sr.RequestError: requestError = True # remove empty words result = [word for word in respAPI.split(' ') if word != ''] # object to return containing transcription data toReturn = TranscriptionResult(words=result, unknownValue=unknownValueError, requestFailed=requestError, engine=engine, language=language) # split only if the user does not want the raw API data return toReturn