def rec_wav_file(asr, cfg, output_dir, wav_path): """ Recognise speech in wav file and profile speech recognition. The decoding and ASR output extraction times are estimated. Args: cfg (dict): Alex configuration with setting for speech recognition wav_path (str): Path to Wave file which is recognised Returns: Tuple of decodeded ASR hypothesis, time of decoding, time of hypothesis extraction """ pcm = load_wav(cfg, wav_path) frame = Frame(pcm) start = time.time() asr.rec_in(frame) rec_in_end = time.time() res = asr.hyp_out() hyp_out_end = time.time() save_lattice(asr.get_last_lattice(), output_dir, wav_path) asr.flush() return res, rec_in_end - start, hyp_out_end - rec_in_end
def rec_wav_file(output_dir, wav_path): """ Recognise speech in wav file and profile speech recognition. The decoding and ASR output extraction times are estimated. Args: cfg (dict): Alex configuration with setting for speech recognition wav_path (str): Path to Wave file which is recognised Returns: Tuple of decodeded ASR hypothesis, time of decoding, time of hypothesis extraction """ pcm = load_wav(cfg, wav_path) frame = Frame(pcm) start = time.time() asr.rec_in(frame) rec_in_end = time.time() res = asr.hyp_out() hyp_out_end = time.time() try: save_lattice(asr.get_last_lattice(), output_dir, wav_path) except AttributeError: pass asr.flush() return res, rec_in_end - start, hyp_out_end - rec_in_end
def rec_wav_file(asr, cfg, wav_path): pcm = load_wav(cfg, wav_path) frame = Frame(pcm) start = time.time() asr.rec_in(frame) rec_in_end = time.time() res = asr.hyp_out() hyp_out_end = time.time() asr.flush() return res, rec_in_end - start, hyp_out_end - rec_in_end
def send_wav(self, filename, stream=None): """Send given wavfile to the dialogue system as if it was said throught microphone.""" # load wav wav = load_wav(self.cfg, filename) wav = various.split_to_bins( wav, 2 * self.cfg['Audio']['samples_per_frame']) # frame by frame send it for frame in wav: if stream is not None: stream.write(frame) self.audio_record.send(Frame(frame)) # send some silence so that VAD recognizes end of recording for _ in range(10): self.audio_record.send(Frame(b"\x00\x00" * self.cfg['Audio']['samples_per_frame']))
def get_tts_wav(self, voice, text): """Runs flite from the command line and gets the synthesized audio. Note that the returned audio is in the re-sampled PCM audio format. """ handle, wav_file_name = mkstemp('TmpSpeechFile.wav') if voice not in ['awb', 'rms', 'slt', 'kal', 'awb_time', 'kal16']: voice = 'awb' try: subprocess.call("flite -voice %s -t \"%s\" -o %s 2> /dev/null" % (voice, text, wav_file_name), shell=True) wav = audio.load_wav(self.cfg, wav_file_name) except: raise TTSException("No data synthesized.") return wav
""") parser.add_argument('-c', "--configs", nargs='+', help='additional configuration files') args = parser.parse_args() cfg = Config.load_configs(args.configs) session_logger = cfg['Logging']['session_logger'] system_logger = cfg['Logging']['system_logger'] ######################################################################### ######################################################################### system_logger.info("Test of the AudioIO component\n" + "=" * 120) wav = audio.load_wav(cfg, './resources/test16k-mono.wav') # split audio into frames wav = various.split_to_bins(wav, 2 * cfg['Audio']['samples_per_frame']) # remove the last frame aio_commands, aio_child_commands = multiprocessing.Pipe() # used to send aio_commands audio_record, child_audio_record = multiprocessing.Pipe() # I read from this connection recorded audio audio_play, child_audio_play = multiprocessing.Pipe() # I write in audio to be played close_event = multiprocessing.Event() aio = AudioIO(cfg, aio_child_commands, child_audio_record, child_audio_play, close_event) aio.start() count = 0 max_count = 2500
def main(dirname, outfname, cfg, skip=0, ignore_list_file=None): """ Arguments: dirname -- the directory to search for WAVs outfname -- path towards the file to output to cfg -- a configuration dictionary (of the Config class) skip -- how many wavs to skip (default: 0) ignore_list_file -- a file open for reading whose lines specify path globs for logs that should be ignored, or None if no such file should be used. The format of this file is described in some alex/corpustools scripts. """ # Fetch relevant config arguments. frame_size = cfg['corpustools']['get_jasr_confnets']['frame_size'] rt_ratio = cfg['corpustools']['get_jasr_confnets']['rt_ratio'] sleep_time = rt_ratio * frame_size / 32000. wavs = sorted(get_wav_fnames(dirname, ignore_list_file), key=itemgetter(1)) jul = None try: with codecs.open(outfname, 'a+', encoding='UTF-8') as outfile: for wav_fname, wav_id in wavs[skip:]: # Load the wav. mywav = load_wav(cfg, wav_fname) # Start Julius. if jul is None: jul, grep, errfile = start_julius(cfg, on_no_context) # Insist on feeding all the input data to Julius, regardless of # how many times it crashes. exception = 1 while exception: try: for startidx in xrange(0, len(mywav), frame_size): jul.rec_in(Frame( mywav[startidx:startidx + frame_size])) sleep(sleep_time) # sleep(rt_ratio * len(mywav) / 32000.) except socket.error as e: # Julius crashing results in # error: [Errno 104] Connection reset by peer # Catch only that one. if e.errno != 104: raise e exception = e traceback.print_exc() print "get_jasr_confnets: Restarting Julius." clean_up(jul, grep, errfile) jul, grep, errfile = start_julius(cfg, on_no_context) else: exception = None exception = None try: hyp = jul.hyp_out() except ASRException as e: exception = e except socket.error as e: # Julius crashing results in # error: [Errno 104] Connection reset by peer # Catch only that one. if e.errno != 104: raise e exception = e if exception is not None: traceback.print_exc() clean_up(jul, grep, errfile) jul = None hyp = 'None' exception = None outfile.write('{id_} => {hyp!r}\n'.format(id_=wav_id, hyp=hyp)) sys.stderr.write('.') sys.stderr.flush() finally: if jul is not None: clean_up(jul, grep, errfile)
def rec_wav_file(self, wav_path): pcm = load_wav(self.cfg, wav_path) frame = Frame(pcm) res = self.rec_wave(frame) self.flush() return res
'power_decision_frames': 25, 'power_decision_speech_threshold': 0.7, 'power_decision_non_speech_threshold': 0.2, }, 'Hub': { 'main_loop_sleep_time': 0.005, }, 'Logging': { 'output_dir': './tmp' } } print "Test of the AudioIO and VAD components:" print "=" * 120 wav = audio.load_wav(cfg, './resources/test16k-mono.wav') # split audio into frames wav = various.split_to_bins(wav, 2 * cfg['Audio']['samples_per_frame']) # remove the last frame aio_commands, aio_child_commands = multiprocessing.Pipe() # used to send commands to AudioIO audio_record, child_audio_record = multiprocessing.Pipe() # I read from this connection recorded audio audio_play, child_audio_play = multiprocessing.Pipe( ) # I write in audio to be played vad_commands, vad_child_commands = multiprocessing.Pipe() # used to send commands to VAD vad_audio_out, vad_child_audio_out = multiprocessing.Pipe()# used to read output audio from VAD close_event = multiprocessing.Event() aio = AudioIO(cfg, aio_child_commands, child_audio_record, child_audio_play, close_event) vad = VAD(cfg, vad_child_commands, audio_record, vad_child_audio_out, close_event)
def main(dirname, outfname, cfg, skip=0, ignore_list_file=None): """ Arguments: dirname -- the directory to search for WAVs outfname -- path towards the file to output to cfg -- a configuration dictionary (of the Config class) skip -- how many wavs to skip (default: 0) ignore_list_file -- a file open for reading whose lines specify path globs for logs that should be ignored, or None if no such file should be used. The format of this file is described in some alex/corpustools scripts. """ # Fetch relevant config arguments. frame_size = cfg['corpustools']['get_jasr_confnets']['frame_size'] rt_ratio = cfg['corpustools']['get_jasr_confnets']['rt_ratio'] sleep_time = rt_ratio * frame_size / 32000. wavs = sorted(get_wav_fnames(dirname, ignore_list_file), key=itemgetter(1)) jul = None try: with codecs.open(outfname, 'a+', encoding='UTF-8') as outfile: for wav_fname, wav_id in wavs[skip:]: # Load the wav. mywav = load_wav(cfg, wav_fname) # Start Julius. if jul is None: jul, grep, errfile = start_julius(cfg, on_no_context) # Insist on feeding all the input data to Julius, regardless of # how many times it crashes. exception = 1 while exception: try: for startidx in xrange(0, len(mywav), frame_size): jul.rec_in( Frame(mywav[startidx:startidx + frame_size])) sleep(sleep_time) # sleep(rt_ratio * len(mywav) / 32000.) except socket.error as e: # Julius crashing results in # error: [Errno 104] Connection reset by peer # Catch only that one. if e.errno != 104: raise e exception = e traceback.print_exc() print "get_jasr_confnets: Restarting Julius." clean_up(jul, grep, errfile) jul, grep, errfile = start_julius(cfg, on_no_context) else: exception = None exception = None try: hyp = jul.hyp_out() except ASRException as e: exception = e except socket.error as e: # Julius crashing results in # error: [Errno 104] Connection reset by peer # Catch only that one. if e.errno != 104: raise e exception = e if exception is not None: traceback.print_exc() clean_up(jul, grep, errfile) jul = None hyp = 'None' exception = None outfile.write('{id_} => {hyp!r}\n'.format(id_=wav_id, hyp=hyp)) sys.stderr.write('.') sys.stderr.flush() finally: if jul is not None: clean_up(jul, grep, errfile)