def test_conversion_of_confnet_into_nblist(self): A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.50, 0.35, 0.15 C1, C2, C3 = 0.60, 0.30, 0.10 correct_nblist = UtteranceNBList() correct_nblist.add(A1*B1*C1, Utterance("A1 B1 C1")) correct_nblist.add(A1*B2*C1, Utterance("A1 B2 C1")) correct_nblist.add(A1*B1*C2, Utterance("A1 B1 C2")) correct_nblist.add(A1*B2*C2, Utterance("A1 B2 C2")) correct_nblist.add(A1*B3*C1, Utterance("A1 B3 C1")) correct_nblist.add(A1*B1*C3, Utterance("A1 B1 C3")) correct_nblist.add(A1*B3*C2, Utterance("A1 B3 C2")) correct_nblist.add(A1*B2*C3, Utterance("A1 B2 C3")) correct_nblist.merge() correct_nblist.add_other() confnet = UtteranceConfusionNetwork() confnet.add([[A1, 'A1'], [A2, 'A2'], [A3, 'A3'],]) confnet.add([[B1, 'B1'], [B2, 'B2'], [B3, 'B3'],]) confnet.add([[C1, 'C1'], [C2, 'C2'], [C3, 'C3'],]) confnet.merge().sort() gen_nblist = confnet.get_utterance_nblist(10) s = [] s.append("") s.append("Confusion network:") s.append(unicode(confnet)) s.append("") s.append("Generated nblist:") s.append(unicode(gen_nblist)) s.append("") s.append("Correct nblist:") s.append(unicode(correct_nblist)) s.append("") print '\n'.join(s) self.assertEqual(unicode(gen_nblist), unicode(correct_nblist))
def read_audio_write_asr_hypotheses(self): # Read input audio. if self.local_audio_in: if len(self.local_audio_in) > 40: print "ASR unprocessed frames:", len(self.local_audio_in) if len(self.local_audio_in) > 200: print "ASR too many unprocessed frames:", len( self.local_audio_in) print " skipping everything until the end of the segment:", len( self.local_audio_in) while len(self.local_audio_in) > 2 and isinstance( self.local_audio_in[0], Frame): skip = self.local_audio_in.popleft() # read recorded audio data_rec = self.local_audio_in.popleft() if isinstance(data_rec, Frame): if self.recognition_on: self.asr.rec_in(data_rec) elif isinstance(data_rec, Command): dr_speech_start = False fname = None if data_rec.parsed['__name__'] == "speech_start": # check whether there are more then one speech segments segments = [ cmd for cmd in self.local_audio_in if isinstance(cmd, Command) and cmd.parsed['__name__'] == "speech_start" ] if len(segments): # there are multiple unprocessed segments in the queue # remove all unprocessed segments except the last print "ASR too many unprocessed speech segments:", len( segments) print " removed all segments but the last" removed_segments = 0 while removed_segments < len(segments): data_rec = self.local_audio_in.popleft() if isinstance(data_rec, Command) and data_rec.parsed[ '__name__'] == "speech_start": removed_segments += 1 dr_speech_start = "speech_start" fname = data_rec.parsed['fname'] elif data_rec.parsed['__name__'] == "speech_end": dr_speech_start = "speech_end" fname = data_rec.parsed['fname'] # Check consistency of the input command. if dr_speech_start: if ((not self.recognition_on and dr_speech_start != "speech_start") or (self.recognition_on and dr_speech_start != "speech_end")): msg = ('Commands received by the ASR component are ' 'inconsistent (recognition_on: {rec}; the new ' 'command: {cmd}').format( rec=self.recognition_on, cmd=dr_speech_start) self.system_logger.exception(msg) if dr_speech_start == "speech_start": self.commands.send( Command('asr_start(fname="%s")' % fname, 'ASR', 'HUB')) self.recognition_on = True if self.cfg['ASR']['debug']: self.system_logger.debug( 'ASR: speech_start(fname="%s")' % fname) elif dr_speech_start == "speech_end": self.recognition_on = False if self.cfg['ASR']['debug']: self.system_logger.debug( 'ASR: speech_end(fname="%s")' % fname) try: asr_hyp = self.asr.hyp_out() if self.cfg['ASR']['debug']: msg = list() msg.append("ASR Hypothesis") msg.append("-" * 60) msg.append(unicode(asr_hyp)) msg.append(u"") msg = u'\n'.join(msg) self.system_logger.debug(msg) except (ASRException, JuliusASRTimeoutException): self.system_logger.debug("Julius ASR Result Timeout.") if self.cfg['ASR']['debug']: msg = list() msg.append("ASR Alternative hypothesis") msg.append("-" * 60) msg.append("sil") msg.append("") msg = u'\n'.join(msg) self.system_logger.debug(msg) asr_hyp = UtteranceConfusionNetwork() asr_hyp.add([ [1.0, "_other_"], ]) # The ASR component can return either NBList or a confusion # network. if isinstance(asr_hyp, UtteranceNBList): self.session_logger.asr("user", fname, asr_hyp, None) elif isinstance(asr_hyp, UtteranceConfusionNetwork): self.session_logger.asr("user", fname, asr_hyp.get_utterance_nblist(), asr_hyp) else: self.session_logger.asr("user", fname, [(-1, asr_hyp)], None) self.commands.send( Command('asr_end(fname="%s")' % fname, 'ASR', 'HUB')) self.commands.send(ASRHyp(asr_hyp, fname=fname)) self.asr_hypotheses_out.send(ASRHyp(asr_hyp, fname=fname)) else: raise ASRException('Unsupported input.')
def read_audio_write_asr_hypotheses(self): # Read input audio. if self.local_audio_in: if len(self.local_audio_in) > 40: print "ASR unprocessed frames:", len(self.local_audio_in) if len(self.local_audio_in) > 200: print "ASR too many unprocessed frames:", len(self.local_audio_in) print " skipping everything until the end of the segment:", len(self.local_audio_in) while len(self.local_audio_in) > 2 and isinstance(self.local_audio_in[0], Frame): skip = self.local_audio_in.popleft() # read recorded audio data_rec = self.local_audio_in.popleft() if isinstance(data_rec, Frame): if self.recognition_on: self.asr.rec_in(data_rec) elif isinstance(data_rec, Command): dr_speech_start = False fname = None if data_rec.parsed['__name__'] == "speech_start": # check whether there are more then one speech segments segments = [ cmd for cmd in self.local_audio_in if isinstance(cmd, Command) and cmd.parsed['__name__'] == "speech_start"] if len(segments): # there are multiple unprocessed segments in the queue # remove all unprocessed segments except the last print "ASR too many unprocessed speech segments:", len(segments) print " removed all segments but the last" removed_segments = 0 while removed_segments < len(segments): data_rec = self.local_audio_in.popleft() if isinstance(data_rec, Command) and data_rec.parsed['__name__'] == "speech_start": removed_segments += 1 dr_speech_start = "speech_start" fname = data_rec.parsed['fname'] elif data_rec.parsed['__name__'] == "speech_end": dr_speech_start = "speech_end" fname = data_rec.parsed['fname'] # Check consistency of the input command. if dr_speech_start: if ((not self.recognition_on and dr_speech_start != "speech_start") or (self.recognition_on and dr_speech_start != "speech_end")): msg = ('Commands received by the ASR component are ' 'inconsistent (recognition_on: {rec}; the new ' 'command: {cmd}').format( rec=self.recognition_on, cmd=dr_speech_start) self.system_logger.exception(msg) if dr_speech_start == "speech_start": self.commands.send(Command('asr_start(fname="%s")' % fname, 'ASR', 'HUB')) self.recognition_on = True if self.cfg['ASR']['debug']: self.system_logger.debug('ASR: speech_start(fname="%s")' % fname) elif dr_speech_start == "speech_end": self.recognition_on = False if self.cfg['ASR']['debug']: self.system_logger.debug('ASR: speech_end(fname="%s")' % fname) try: asr_hyp = self.asr.hyp_out() if self.cfg['ASR']['debug']: msg = list() msg.append("ASR Hypothesis") msg.append("-" * 60) msg.append(unicode(asr_hyp)) msg.append(u"") msg = u'\n'.join(msg) self.system_logger.debug(msg) except (ASRException, JuliusASRTimeoutException): self.system_logger.debug("Julius ASR Result Timeout.") if self.cfg['ASR']['debug']: msg = list() msg.append("ASR Alternative hypothesis") msg.append("-" * 60) msg.append("sil") msg.append("") msg = u'\n'.join(msg) self.system_logger.debug(msg) asr_hyp = UtteranceConfusionNetwork() asr_hyp.add([[1.0, "_other_"], ]) # The ASR component can return either NBList or a confusion # network. if isinstance(asr_hyp, UtteranceNBList): self.session_logger.asr("user", fname, asr_hyp, None) elif isinstance(asr_hyp, UtteranceConfusionNetwork): self.session_logger.asr("user", fname, asr_hyp.get_utterance_nblist(), asr_hyp) else: self.session_logger.asr("user", fname, [(-1, asr_hyp)], None) self.commands.send(Command('asr_end(fname="%s")' % fname, 'ASR', 'HUB')) self.asr_hypotheses_out.send(ASRHyp(asr_hyp, fname=fname)) else: raise ASRException('Unsupported input.')
def test_session_logger(self): cfg = Config.load_configs(config=CONFIG_DICT, use_default=False) sl = SessionLogger() # test 3 calls at once for i in range(3): sess_dir = "./%d" % i if not os.path.isdir(sess_dir): os.mkdir(sess_dir) sl.session_start(sess_dir) sl.config('config = ' + unicode(cfg)) sl.header(cfg['Logging']["system_name"], cfg['Logging']["version"]) sl.input_source("voip") sl.dialogue_rec_start(None, "both_complete_dialogue.wav") sl.dialogue_rec_start("system", "system_complete_dialogue.wav") sl.dialogue_rec_start("user", "user_complete_dialogue.wav") sl.dialogue_rec_end("both_complete_dialogue.wav") sl.dialogue_rec_end("system_complete_dialogue.wav") sl.dialogue_rec_end("user_complete_dialogue.wav") sl.turn("system") sl.dialogue_act("system", "hello()") sl.text("system", "Hello.") sl.rec_start("system", "system1.wav") sl.rec_end("system1.wav") sl.turn("user") sl.rec_start("user", "user1.wav") sl.rec_end("user1.wav") A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.70, 0.20, 0.10 C1, C2, C3 = 0.80, 0.10, 0.10 asr_confnet = UtteranceConfusionNetwork() asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']]) asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']]) asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']]) asr_confnet.merge() asr_confnet.normalise() asr_confnet.sort() asr_nblist = asr_confnet.get_utterance_nblist() sl.asr("user", "user1.wav", asr_nblist, asr_confnet) slu_confnet = DialogueActConfusionNetwork() slu_confnet.add(0.7, DialogueActItem('hello')) slu_confnet.add(0.6, DialogueActItem('thankyou')) slu_confnet.add(0.4, DialogueActItem('restart')) slu_confnet.add(0.1, DialogueActItem('bye')) slu_confnet.merge() slu_confnet.normalise() slu_confnet.sort() slu_nblist = slu_confnet.get_da_nblist() sl.slu("user", "user1.wav", slu_nblist, slu_confnet) sl.turn("system") sl.dialogue_act("system", "thankyou()") sl.text("system", "Thank you.", cost = 1.0) sl.rec_start("system", "system2.wav") sl.rec_end("system2.wav") sl.barge_in("system", tts_time = True) sl.turn("user") sl.rec_start("user", "user2.wav") sl.rec_end("user2.wav") sl.hangup("user")
def test_session_logger(self): cfg = Config.load_configs(config=CONFIG_DICT, use_default=False) sl = SessionLogger() # test 3 calls at once for i in range(3): sess_dir = "./%d" % i if not os.path.isdir(sess_dir): os.mkdir(sess_dir) sl.session_start(sess_dir) sl.config('config = ' + unicode(cfg)) sl.header(cfg['Logging']["system_name"], cfg['Logging']["version"]) sl.input_source("voip") sl.dialogue_rec_start(None, "both_complete_dialogue.wav") sl.dialogue_rec_start("system", "system_complete_dialogue.wav") sl.dialogue_rec_start("user", "user_complete_dialogue.wav") sl.dialogue_rec_end("both_complete_dialogue.wav") sl.dialogue_rec_end("system_complete_dialogue.wav") sl.dialogue_rec_end("user_complete_dialogue.wav") sl.turn("system") sl.dialogue_act("system", "hello()") sl.text("system", "Hello.") sl.rec_start("system", "system1.wav") sl.rec_end("system1.wav") sl.turn("user") sl.rec_start("user", "user1.wav") sl.rec_end("user1.wav") A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.70, 0.20, 0.10 C1, C2, C3 = 0.80, 0.10, 0.10 asr_confnet = UtteranceConfusionNetwork() asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']]) asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']]) asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']]) asr_confnet.merge() asr_confnet.normalise() asr_confnet.sort() asr_nblist = asr_confnet.get_utterance_nblist() sl.asr("user", "user1.wav", asr_nblist, asr_confnet) slu_confnet = DialogueActConfusionNetwork() slu_confnet.add(0.7, DialogueActItem('hello')) slu_confnet.add(0.6, DialogueActItem('thankyou')) slu_confnet.add(0.4, DialogueActItem('restart')) slu_confnet.add(0.1, DialogueActItem('bye')) slu_confnet.merge() slu_confnet.normalise() slu_confnet.sort() slu_nblist = slu_confnet.get_da_nblist() sl.slu("user", "user1.wav", slu_nblist, slu_confnet) sl.turn("system") sl.dialogue_act("system", "thankyou()") sl.text("system", "Thank you.", cost=1.0) sl.rec_start("system", "system2.wav") sl.rec_end("system2.wav") sl.barge_in("system", tts_time=True) sl.turn("user") sl.rec_start("user", "user2.wav") sl.rec_end("user2.wav") sl.hangup("user")