def test_empty_features(self): empty_feats_items = [('__empty__', 1.0)] cn = UtteranceConfusionNetwork() feats = UtteranceConfusionNetworkFeatures(confnet=cn) self.assertEqual(feats.features.items(), empty_feats_items) cn.add([(.9, '')]) feats = UtteranceConfusionNetworkFeatures(confnet=cn) self.assertEqual(feats.features.items(), empty_feats_items) cn.add([(1., '')]) feats = UtteranceConfusionNetworkFeatures(confnet=cn) self.assertEqual(feats.features.items(), empty_feats_items)
def test_repr_basic(self): A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.50, 0.35, 0.15 C1, C2, C3 = 0.60, 0.30, 0.10 confnet = UtteranceConfusionNetwork() confnet.add([[A1, 'A ("1\\")'], [A2, 'A2'], [A3, 'A3'],]) confnet.add([[B1, 'B1'], [B2, 'B2'], [B3, 'B3'],]) confnet.add([[C1, 'C1'], [C2, 'C2'], [C3, 'C3'],]) rep = repr(confnet) self.assertEqual(repr(eval(rep)), rep)
def test_repr_w_long_links(self): A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.70, 0.20, 0.10 C1, C2, C3 = 0.80, 0.10, 0.10 asr_confnet = UtteranceConfusionNetwork() asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']]) asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']]) asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']]) asr_confnet.merge().sort() confnet = asr_confnet.replace(("has", ), ("is", )) rep = repr(confnet) self.assertEqual(repr(eval(rep)), rep) confnet = asr_confnet.replace(("has", ), tuple()) rep = repr(confnet) self.assertEqual(repr(eval(rep)), rep) confnet = asr_confnet.replace(("has", ), ("should", "have", )) rep = repr(confnet) self.assertEqual(repr(eval(rep)), rep) confnet.add([(0.5, 'want'), (0.5, 'pub')]) rep = repr(confnet) self.assertEqual(repr(eval(rep)), rep)
def test_conversion_of_confnet_into_nblist(self): A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.50, 0.35, 0.15 C1, C2, C3 = 0.60, 0.30, 0.10 correct_nblist = UtteranceNBList() correct_nblist.add(A1*B1*C1, Utterance("A1 B1 C1")) correct_nblist.add(A1*B2*C1, Utterance("A1 B2 C1")) correct_nblist.add(A1*B1*C2, Utterance("A1 B1 C2")) correct_nblist.add(A1*B2*C2, Utterance("A1 B2 C2")) correct_nblist.add(A1*B3*C1, Utterance("A1 B3 C1")) correct_nblist.add(A1*B1*C3, Utterance("A1 B1 C3")) correct_nblist.add(A1*B3*C2, Utterance("A1 B3 C2")) correct_nblist.add(A1*B2*C3, Utterance("A1 B2 C3")) correct_nblist.merge() correct_nblist.add_other() confnet = UtteranceConfusionNetwork() confnet.add([[A1, 'A1'], [A2, 'A2'], [A3, 'A3'],]) confnet.add([[B1, 'B1'], [B2, 'B2'], [B3, 'B3'],]) confnet.add([[C1, 'C1'], [C2, 'C2'], [C3, 'C3'],]) confnet.merge().sort() gen_nblist = confnet.get_utterance_nblist(10) s = [] s.append("") s.append("Confusion network:") s.append(unicode(confnet)) s.append("") s.append("Generated nblist:") s.append(unicode(gen_nblist)) s.append("") s.append("Correct nblist:") s.append(unicode(correct_nblist)) s.append("") print '\n'.join(s) self.assertEqual(unicode(gen_nblist), unicode(correct_nblist))
def read_audio_write_asr_hypotheses(self): # Read input audio. if self.local_audio_in: if len(self.local_audio_in) > 40: print "ASR unprocessed frames:", len(self.local_audio_in) if len(self.local_audio_in) > 200: print "ASR too many unprocessed frames:", len( self.local_audio_in) print " skipping everything until the end of the segment:", len( self.local_audio_in) while len(self.local_audio_in) > 2 and isinstance( self.local_audio_in[0], Frame): skip = self.local_audio_in.popleft() # read recorded audio data_rec = self.local_audio_in.popleft() if isinstance(data_rec, Frame): if self.recognition_on: self.asr.rec_in(data_rec) elif isinstance(data_rec, Command): dr_speech_start = False fname = None if data_rec.parsed['__name__'] == "speech_start": # check whether there are more then one speech segments segments = [ cmd for cmd in self.local_audio_in if isinstance(cmd, Command) and cmd.parsed['__name__'] == "speech_start" ] if len(segments): # there are multiple unprocessed segments in the queue # remove all unprocessed segments except the last print "ASR too many unprocessed speech segments:", len( segments) print " removed all segments but the last" removed_segments = 0 while removed_segments < len(segments): data_rec = self.local_audio_in.popleft() if isinstance(data_rec, Command) and data_rec.parsed[ '__name__'] == "speech_start": removed_segments += 1 dr_speech_start = "speech_start" fname = data_rec.parsed['fname'] elif data_rec.parsed['__name__'] == "speech_end": dr_speech_start = "speech_end" fname = data_rec.parsed['fname'] # Check consistency of the input command. if dr_speech_start: if ((not self.recognition_on and dr_speech_start != "speech_start") or (self.recognition_on and dr_speech_start != "speech_end")): msg = ('Commands received by the ASR component are ' 'inconsistent (recognition_on: {rec}; the new ' 'command: {cmd}').format( rec=self.recognition_on, cmd=dr_speech_start) self.system_logger.exception(msg) if dr_speech_start == "speech_start": self.commands.send( Command('asr_start(fname="%s")' % fname, 'ASR', 'HUB')) self.recognition_on = True if self.cfg['ASR']['debug']: self.system_logger.debug( 'ASR: speech_start(fname="%s")' % fname) elif dr_speech_start == "speech_end": self.recognition_on = False if self.cfg['ASR']['debug']: self.system_logger.debug( 'ASR: speech_end(fname="%s")' % fname) try: asr_hyp = self.asr.hyp_out() if self.cfg['ASR']['debug']: msg = list() msg.append("ASR Hypothesis") msg.append("-" * 60) msg.append(unicode(asr_hyp)) msg.append(u"") msg = u'\n'.join(msg) self.system_logger.debug(msg) except (ASRException, JuliusASRTimeoutException): self.system_logger.debug("Julius ASR Result Timeout.") if self.cfg['ASR']['debug']: msg = list() msg.append("ASR Alternative hypothesis") msg.append("-" * 60) msg.append("sil") msg.append("") msg = u'\n'.join(msg) self.system_logger.debug(msg) asr_hyp = UtteranceConfusionNetwork() asr_hyp.add([ [1.0, "_other_"], ]) # The ASR component can return either NBList or a confusion # network. if isinstance(asr_hyp, UtteranceNBList): self.session_logger.asr("user", fname, asr_hyp, None) elif isinstance(asr_hyp, UtteranceConfusionNetwork): self.session_logger.asr("user", fname, asr_hyp.get_utterance_nblist(), asr_hyp) else: self.session_logger.asr("user", fname, [(-1, asr_hyp)], None) self.commands.send( Command('asr_end(fname="%s")' % fname, 'ASR', 'HUB')) self.commands.send(ASRHyp(asr_hyp, fname=fname)) self.asr_hypotheses_out.send(ASRHyp(asr_hyp, fname=fname)) else: raise ASRException('Unsupported input.')
def read_audio_write_asr_hypotheses(self): # Read input audio. if self.local_audio_in: if len(self.local_audio_in) > 40: print "ASR unprocessed frames:", len(self.local_audio_in) if len(self.local_audio_in) > 200: print "ASR too many unprocessed frames:", len(self.local_audio_in) print " skipping everything until the end of the segment:", len(self.local_audio_in) while len(self.local_audio_in) > 2 and isinstance(self.local_audio_in[0], Frame): skip = self.local_audio_in.popleft() # read recorded audio data_rec = self.local_audio_in.popleft() if isinstance(data_rec, Frame): if self.recognition_on: self.asr.rec_in(data_rec) elif isinstance(data_rec, Command): dr_speech_start = False fname = None if data_rec.parsed['__name__'] == "speech_start": # check whether there are more then one speech segments segments = [ cmd for cmd in self.local_audio_in if isinstance(cmd, Command) and cmd.parsed['__name__'] == "speech_start"] if len(segments): # there are multiple unprocessed segments in the queue # remove all unprocessed segments except the last print "ASR too many unprocessed speech segments:", len(segments) print " removed all segments but the last" removed_segments = 0 while removed_segments < len(segments): data_rec = self.local_audio_in.popleft() if isinstance(data_rec, Command) and data_rec.parsed['__name__'] == "speech_start": removed_segments += 1 dr_speech_start = "speech_start" fname = data_rec.parsed['fname'] elif data_rec.parsed['__name__'] == "speech_end": dr_speech_start = "speech_end" fname = data_rec.parsed['fname'] # Check consistency of the input command. if dr_speech_start: if ((not self.recognition_on and dr_speech_start != "speech_start") or (self.recognition_on and dr_speech_start != "speech_end")): msg = ('Commands received by the ASR component are ' 'inconsistent (recognition_on: {rec}; the new ' 'command: {cmd}').format( rec=self.recognition_on, cmd=dr_speech_start) self.system_logger.exception(msg) if dr_speech_start == "speech_start": self.commands.send(Command('asr_start(fname="%s")' % fname, 'ASR', 'HUB')) self.recognition_on = True if self.cfg['ASR']['debug']: self.system_logger.debug('ASR: speech_start(fname="%s")' % fname) elif dr_speech_start == "speech_end": self.recognition_on = False if self.cfg['ASR']['debug']: self.system_logger.debug('ASR: speech_end(fname="%s")' % fname) try: asr_hyp = self.asr.hyp_out() if self.cfg['ASR']['debug']: msg = list() msg.append("ASR Hypothesis") msg.append("-" * 60) msg.append(unicode(asr_hyp)) msg.append(u"") msg = u'\n'.join(msg) self.system_logger.debug(msg) except (ASRException, JuliusASRTimeoutException): self.system_logger.debug("Julius ASR Result Timeout.") if self.cfg['ASR']['debug']: msg = list() msg.append("ASR Alternative hypothesis") msg.append("-" * 60) msg.append("sil") msg.append("") msg = u'\n'.join(msg) self.system_logger.debug(msg) asr_hyp = UtteranceConfusionNetwork() asr_hyp.add([[1.0, "_other_"], ]) # The ASR component can return either NBList or a confusion # network. if isinstance(asr_hyp, UtteranceNBList): self.session_logger.asr("user", fname, asr_hyp, None) elif isinstance(asr_hyp, UtteranceConfusionNetwork): self.session_logger.asr("user", fname, asr_hyp.get_utterance_nblist(), asr_hyp) else: self.session_logger.asr("user", fname, [(-1, asr_hyp)], None) self.commands.send(Command('asr_end(fname="%s")' % fname, 'ASR', 'HUB')) self.asr_hypotheses_out.send(ASRHyp(asr_hyp, fname=fname)) else: raise ASRException('Unsupported input.')
def test_replace(self): tolerance = 0.01 # Create a simple confusion network. A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.70, 0.20, 0.10 C1, C2, C3 = 0.80, 0.10, 0.10 asr_confnet = UtteranceConfusionNetwork() asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']]) asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']]) asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']]) asr_confnet.merge().sort() replaced = asr_confnet.replace(("nothing", ), ("something", )) self.assertEqual(replaced, asr_confnet) replaced = asr_confnet.replace(("has", ), ("is", )) self.assertNotEqual(replaced, asr_confnet) self.assertEqual(list(replaced.cn[0][1]), [A2, "is"]) replaced = asr_confnet.replace(("has", ), tuple()) self.assertNotEqual(replaced, asr_confnet) self.assertAlmostEqual(sum(hyp[0] for hyp in asr_confnet.cn[0]), 1.) replaced = asr_confnet.replace(("has", ), ("should", "have", )) replaced.add([(0.5, 'want'), (0.5, 'pub')]) unigrams = [['want'], ['ehm'], ['should'], ['have'], ['Chinese'], ['English'], ['cheap'], ['restaurant'], ['pub'], ['hotel'], ['want'], ['pub'], ] act_unigrams = list(hyp[1] for hyp in replaced.iter_ngrams(1)) self.assertItemsEqual(unigrams, act_unigrams) bigrams = [ ['want', 'Chinese'], ['want', 'English'], ['want', 'cheap'], ['ehm', 'Chinese'], ['ehm', 'English'], ['ehm', 'cheap'], ['should', 'have'], ['have', 'Chinese'], ['have', 'English'], ['have', 'cheap'], ['Chinese', 'restaurant'], ['Chinese', 'pub'], ['Chinese', 'hotel'], ['English', 'restaurant'], ['English', 'pub'], ['English', 'hotel'], ['cheap', 'restaurant'], ['cheap', 'pub'], ['cheap', 'hotel'], ['restaurant', 'want'], ['restaurant', 'pub'], ['pub', 'want'], ['pub', 'pub'], ['hotel', 'want'], ['hotel', 'pub'], ] act_bigrams = list(hyp[1] for hyp in replaced.iter_ngrams(2)) self.assertItemsEqual(bigrams, act_bigrams) trigrams = [ ['want', 'Chinese', 'restaurant'], ['want', 'Chinese', 'pub'], ['want', 'Chinese', 'hotel'], ['want', 'English', 'restaurant'], ['want', 'English', 'pub'], ['want', 'English', 'hotel'], ['want', 'cheap', 'restaurant'], ['want', 'cheap', 'pub'], ['want', 'cheap', 'hotel'], ['ehm', 'Chinese', 'restaurant'], ['ehm', 'Chinese', 'pub'], ['ehm', 'Chinese', 'hotel'], ['ehm', 'English', 'restaurant'], ['ehm', 'English', 'pub'], ['ehm', 'English', 'hotel'], ['ehm', 'cheap', 'restaurant'], ['ehm', 'cheap', 'pub'], ['ehm', 'cheap', 'hotel'], ['should', 'have', 'Chinese'], ['should', 'have', 'English'], ['should', 'have', 'cheap'], ['have', 'Chinese', 'restaurant'], ['have', 'Chinese', 'pub'], ['have', 'Chinese', 'hotel'], ['have', 'English', 'restaurant'], ['have', 'English', 'pub'], ['have', 'English', 'hotel'], ['have', 'cheap', 'restaurant'], ['have', 'cheap', 'pub'], ['have', 'cheap', 'hotel'], ['Chinese', 'restaurant', 'want'], ['Chinese', 'restaurant', 'pub'], ['Chinese', 'pub', 'want'], ['Chinese', 'pub', 'pub'], ['Chinese', 'hotel', 'want'], ['Chinese', 'hotel', 'pub'], ['English', 'restaurant', 'want'], ['English', 'restaurant', 'pub'], ['English', 'pub', 'want'], ['English', 'pub', 'pub'], ['English', 'hotel', 'want'], ['English', 'hotel', 'pub'], ['cheap', 'restaurant', 'want'], ['cheap', 'restaurant', 'pub'], ['cheap', 'pub', 'want'], ['cheap', 'pub', 'pub'], ['cheap', 'hotel', 'want'], ['cheap', 'hotel', 'pub'], ] act_trigrams = list(hyp[1] for hyp in replaced.iter_ngrams(3)) self.assertItemsEqual(trigrams, act_trigrams) replaced2 = replaced.replace(('pub',), ('fast', 'food',)) replaced2 = replaced2.replace(('want', 'English'), ('would', 'like', 'English')) bigrams = [ ['<s>', 'ehm'], ['<s>', 'should'], ['<s>', 'would'], ['ehm', 'Chinese'], ['ehm', 'cheap'], ['should', 'have'], ['have', 'Chinese'], ['have', 'cheap'], ['would', 'like'], ['like', 'English'], ['English', 'restaurant'], ['English', 'hotel'], ['English', 'fast'], ['Chinese', 'restaurant'], ['Chinese', 'hotel'], ['Chinese', 'fast'], ['cheap', 'restaurant'], ['cheap', 'hotel'], ['cheap', 'fast'], ['restaurant', 'want'], ['restaurant', 'fast'], ['hotel', 'want'], ['hotel', 'fast'], ['fast', 'food'], ['food', 'want'], ['food', 'fast'], ['want', '</s>'], ['fast', 'food'], ['food', '</s>'] ] act_bigrams = list(hyp[1] for hyp in replaced2.iter_ngrams(2, True)) self.assertItemsEqual(bigrams, act_bigrams)
def test_ngram_iterator(self): tolerance = 0.01 # Create a simple confusion network. A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.70, 0.20, 0.10 C1, C2, C3 = 0.80, 0.10, 0.10 asr_confnet = UtteranceConfusionNetwork() asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']]) asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']]) asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']]) asr_confnet.merge().sort() # Normal use case. trigram_hyps = [ (0.504, ['want', 'Chinese', 'restaurant']), (0.063, ['want', 'Chinese', 'pub']), (0.063, ['want', 'Chinese', 'hotel']), (0.14400000000000004, ['want', 'English', 'restaurant']), (0.018000000000000006, ['want', 'English', 'pub']), (0.018000000000000006, ['want', 'English', 'hotel']), (0.07200000000000002, ['want', 'cheap', 'restaurant']), (0.009000000000000003, ['want', 'cheap', 'pub']), (0.009000000000000003, ['want', 'cheap', 'hotel']), (0.027999999999999997, ['has', 'Chinese', 'restaurant']), (0.0034999999999999996, ['has', 'Chinese', 'pub']), (0.0034999999999999996, ['has', 'Chinese', 'hotel']), (0.008000000000000002, ['has', 'English', 'restaurant']), (0.0010000000000000002, ['has', 'English', 'pub']), (0.0010000000000000002, ['has', 'English', 'hotel']), (0.004000000000000001, ['has', 'cheap', 'restaurant']), (0.0005000000000000001, ['has', 'cheap', 'pub']), (0.0005000000000000001, ['has', 'cheap', 'hotel']), (0.027999999999999997, ['ehm', 'Chinese', 'restaurant']), (0.0034999999999999996, ['ehm', 'Chinese', 'pub']), (0.0034999999999999996, ['ehm', 'Chinese', 'hotel']), (0.008000000000000002, ['ehm', 'English', 'restaurant']), (0.0010000000000000002, ['ehm', 'English', 'pub']), (0.0010000000000000002, ['ehm', 'English', 'hotel']), (0.004000000000000001, ['ehm', 'cheap', 'restaurant']), (0.0005000000000000001, ['ehm', 'cheap', 'pub']), (0.0005000000000000001, ['ehm', 'cheap', 'hotel'])] trigram_hyps_with_boundaries = [ (0.63, [SENTENCE_START, 'want', 'Chinese']), (0.18000000000000002, [SENTENCE_START, 'want', 'English']), (0.09000000000000001, [SENTENCE_START, 'want', 'cheap']), (0.034999999999999996, [SENTENCE_START, 'has', 'Chinese']), (0.010000000000000002, [SENTENCE_START, 'has', 'English']), (0.005000000000000001, [SENTENCE_START, 'has', 'cheap']), (0.034999999999999996, [SENTENCE_START, 'ehm', 'Chinese']), (0.010000000000000002, [SENTENCE_START, 'ehm', 'English']), (0.005000000000000001, [SENTENCE_START, 'ehm', 'cheap']), (0.504, ['want', 'Chinese', 'restaurant']), (0.063, ['want', 'Chinese', 'pub']), (0.063, ['want', 'Chinese', 'hotel']), (0.14400000000000004, ['want', 'English', 'restaurant']), (0.018000000000000006, ['want', 'English', 'pub']), (0.018000000000000006, ['want', 'English', 'hotel']), (0.07200000000000002, ['want', 'cheap', 'restaurant']), (0.009000000000000003, ['want', 'cheap', 'pub']), (0.009000000000000003, ['want', 'cheap', 'hotel']), (0.027999999999999997, ['has', 'Chinese', 'restaurant']), (0.0034999999999999996, ['has', 'Chinese', 'pub']), (0.0034999999999999996, ['has', 'Chinese', 'hotel']), (0.008000000000000002, ['has', 'English', 'restaurant']), (0.0010000000000000002, ['has', 'English', 'pub']), (0.0010000000000000002, ['has', 'English', 'hotel']), (0.004000000000000001, ['has', 'cheap', 'restaurant']), (0.0005000000000000001, ['has', 'cheap', 'pub']), (0.0005000000000000001, ['has', 'cheap', 'hotel']), (0.027999999999999997, ['ehm', 'Chinese', 'restaurant']), (0.0034999999999999996, ['ehm', 'Chinese', 'pub']), (0.0034999999999999996, ['ehm', 'Chinese', 'hotel']), (0.008000000000000002, ['ehm', 'English', 'restaurant']), (0.0010000000000000002, ['ehm', 'English', 'pub']), (0.0010000000000000002, ['ehm', 'English', 'hotel']), (0.004000000000000001, ['ehm', 'cheap', 'restaurant']), (0.0005000000000000001, ['ehm', 'cheap', 'pub']), (0.0005000000000000001, ['ehm', 'cheap', 'hotel']), (0.5599999999999999, ['Chinese', 'restaurant', SENTENCE_END]), (0.06999999999999999, ['Chinese', 'pub', SENTENCE_END]), (0.06999999999999999, ['Chinese', 'hotel', SENTENCE_END]), (0.16000000000000003, ['English', 'restaurant', SENTENCE_END]), (0.020000000000000004, ['English', 'pub', SENTENCE_END]), (0.020000000000000004, ['English', 'hotel', SENTENCE_END]), (0.08000000000000002, ['cheap', 'restaurant', SENTENCE_END]), (0.010000000000000002, ['cheap', 'pub', SENTENCE_END]), (0.010000000000000002, ['cheap', 'hotel', SENTENCE_END])] act_trigram_hyps = list(asr_confnet.iter_ngrams(3)) act_trigram_hyps_with_boundaries = list( asr_confnet.iter_ngrams(3, with_boundaries=True)) # Compare the actual answer to the expected one. for hyps, act_hyps in ((trigram_hyps, act_trigram_hyps), (trigram_hyps_with_boundaries, act_trigram_hyps_with_boundaries)): self.assertItemsEqual([hyp[1] for hyp in hyps], [ahyp[1] for ahyp in act_hyps]) for hyp in hyps: corresponding = [act_hyp for act_hyp in act_hyps if act_hyp[1] == hyp[1]] self.assertTrue(len(corresponding) == 1) act_hyp = corresponding[0] self.assertTrue(act_hyp[0] * (1 - tolerance) <= hyp[0] <= act_hyp[0] * (1 + tolerance)) # Corner cases. self.assertItemsEqual(list(asr_confnet.iter_ngrams(4)), []) pentagram_hyps = [ (0.504, [SENTENCE_START, 'want', 'Chinese', 'restaurant', SENTENCE_END]), (0.063, [SENTENCE_START, 'want', 'Chinese', 'pub', SENTENCE_END]), (0.063, [SENTENCE_START, 'want', 'Chinese', 'hotel', SENTENCE_END]), (0.14400000000000004, [SENTENCE_START, 'want', 'English', 'restaurant', SENTENCE_END]), (0.018000000000000006, [SENTENCE_START, 'want', 'English', 'pub', SENTENCE_END]), (0.018000000000000006, [SENTENCE_START, 'want', 'English', 'hotel', SENTENCE_END]), (0.07200000000000002, [SENTENCE_START, 'want', 'cheap', 'restaurant', SENTENCE_END]), (0.009000000000000003, [SENTENCE_START, 'want', 'cheap', 'pub', SENTENCE_END]), (0.009000000000000003, [SENTENCE_START, 'want', 'cheap', 'hotel', SENTENCE_END]), (0.027999999999999997, [SENTENCE_START, 'has', 'Chinese', 'restaurant', SENTENCE_END]), (0.0034999999999999996, [SENTENCE_START, 'has', 'Chinese', 'pub', SENTENCE_END]), (0.0034999999999999996, [SENTENCE_START, 'has', 'Chinese', 'hotel', SENTENCE_END]), (0.008000000000000002, [SENTENCE_START, 'has', 'English', 'restaurant', SENTENCE_END]), (0.0010000000000000002, [SENTENCE_START, 'has', 'English', 'pub', SENTENCE_END]), (0.0010000000000000002, [SENTENCE_START, 'has', 'English', 'hotel', SENTENCE_END]), (0.004000000000000001, [SENTENCE_START, 'has', 'cheap', 'restaurant', SENTENCE_END]), (0.0005000000000000001, [SENTENCE_START, 'has', 'cheap', 'pub', SENTENCE_END]), (0.0005000000000000001, [SENTENCE_START, 'has', 'cheap', 'hotel', SENTENCE_END]), (0.027999999999999997, [SENTENCE_START, 'ehm', 'Chinese', 'restaurant', SENTENCE_END]), (0.0034999999999999996, [SENTENCE_START, 'ehm', 'Chinese', 'pub', SENTENCE_END]), (0.0034999999999999996, [SENTENCE_START, 'ehm', 'Chinese', 'hotel', SENTENCE_END]), (0.008000000000000002, [SENTENCE_START, 'ehm', 'English', 'restaurant', SENTENCE_END]), (0.0010000000000000002, [SENTENCE_START, 'ehm', 'English', 'pub', SENTENCE_END]), (0.0010000000000000002, [SENTENCE_START, 'ehm', 'English', 'hotel', SENTENCE_END]), (0.004000000000000001, [SENTENCE_START, 'ehm', 'cheap', 'restaurant', SENTENCE_END]), (0.0005000000000000001, [SENTENCE_START, 'ehm', 'cheap', 'pub', SENTENCE_END]), (0.0005000000000000001, [SENTENCE_START, 'ehm', 'cheap', 'hotel', SENTENCE_END])] act_pentagram_hyps = list(asr_confnet.iter_ngrams(5, with_boundaries=True)) self.assertItemsEqual([hyp[1] for hyp in pentagram_hyps], [ahyp[1] for ahyp in act_pentagram_hyps]) for hyp in pentagram_hyps: corresponding = [act_hyp for act_hyp in act_pentagram_hyps if act_hyp[1] == hyp[1]] self.assertTrue(len(corresponding) == 1) act_hyp = corresponding[0] self.assertTrue(act_hyp[0] * (1 - tolerance) <= hyp[0] <= act_hyp[0] * (1 + tolerance)) self.assertFalse(list(asr_confnet.iter_ngrams(6, with_boundaries=True)))
def test_session_logger(self): cfg = Config.load_configs(config=CONFIG_DICT, use_default=False) sl = SessionLogger() # test 3 calls at once for i in range(3): sess_dir = "./%d" % i if not os.path.isdir(sess_dir): os.mkdir(sess_dir) sl.session_start(sess_dir) sl.config('config = ' + unicode(cfg)) sl.header(cfg['Logging']["system_name"], cfg['Logging']["version"]) sl.input_source("voip") sl.dialogue_rec_start(None, "both_complete_dialogue.wav") sl.dialogue_rec_start("system", "system_complete_dialogue.wav") sl.dialogue_rec_start("user", "user_complete_dialogue.wav") sl.dialogue_rec_end("both_complete_dialogue.wav") sl.dialogue_rec_end("system_complete_dialogue.wav") sl.dialogue_rec_end("user_complete_dialogue.wav") sl.turn("system") sl.dialogue_act("system", "hello()") sl.text("system", "Hello.") sl.rec_start("system", "system1.wav") sl.rec_end("system1.wav") sl.turn("user") sl.rec_start("user", "user1.wav") sl.rec_end("user1.wav") A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.70, 0.20, 0.10 C1, C2, C3 = 0.80, 0.10, 0.10 asr_confnet = UtteranceConfusionNetwork() asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']]) asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']]) asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']]) asr_confnet.merge() asr_confnet.normalise() asr_confnet.sort() asr_nblist = asr_confnet.get_utterance_nblist() sl.asr("user", "user1.wav", asr_nblist, asr_confnet) slu_confnet = DialogueActConfusionNetwork() slu_confnet.add(0.7, DialogueActItem('hello')) slu_confnet.add(0.6, DialogueActItem('thankyou')) slu_confnet.add(0.4, DialogueActItem('restart')) slu_confnet.add(0.1, DialogueActItem('bye')) slu_confnet.merge() slu_confnet.normalise() slu_confnet.sort() slu_nblist = slu_confnet.get_da_nblist() sl.slu("user", "user1.wav", slu_nblist, slu_confnet) sl.turn("system") sl.dialogue_act("system", "thankyou()") sl.text("system", "Thank you.", cost = 1.0) sl.rec_start("system", "system2.wav") sl.rec_end("system2.wav") sl.barge_in("system", tts_time = True) sl.turn("user") sl.rec_start("user", "user2.wav") sl.rec_end("user2.wav") sl.hangup("user")
def test_session_logger(self): cfg = Config.load_configs(config=CONFIG_DICT, use_default=False) sl = SessionLogger() # test 3 calls at once for i in range(3): sess_dir = "./%d" % i if not os.path.isdir(sess_dir): os.mkdir(sess_dir) sl.session_start(sess_dir) sl.config('config = ' + unicode(cfg)) sl.header(cfg['Logging']["system_name"], cfg['Logging']["version"]) sl.input_source("voip") sl.dialogue_rec_start(None, "both_complete_dialogue.wav") sl.dialogue_rec_start("system", "system_complete_dialogue.wav") sl.dialogue_rec_start("user", "user_complete_dialogue.wav") sl.dialogue_rec_end("both_complete_dialogue.wav") sl.dialogue_rec_end("system_complete_dialogue.wav") sl.dialogue_rec_end("user_complete_dialogue.wav") sl.turn("system") sl.dialogue_act("system", "hello()") sl.text("system", "Hello.") sl.rec_start("system", "system1.wav") sl.rec_end("system1.wav") sl.turn("user") sl.rec_start("user", "user1.wav") sl.rec_end("user1.wav") A1, A2, A3 = 0.90, 0.05, 0.05 B1, B2, B3 = 0.70, 0.20, 0.10 C1, C2, C3 = 0.80, 0.10, 0.10 asr_confnet = UtteranceConfusionNetwork() asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']]) asr_confnet.add([[B1, "Chinese"], [B2, "English"], [B3, 'cheap']]) asr_confnet.add([[C1, "restaurant"], [C2, "pub"], [C3, 'hotel']]) asr_confnet.merge() asr_confnet.normalise() asr_confnet.sort() asr_nblist = asr_confnet.get_utterance_nblist() sl.asr("user", "user1.wav", asr_nblist, asr_confnet) slu_confnet = DialogueActConfusionNetwork() slu_confnet.add(0.7, DialogueActItem('hello')) slu_confnet.add(0.6, DialogueActItem('thankyou')) slu_confnet.add(0.4, DialogueActItem('restart')) slu_confnet.add(0.1, DialogueActItem('bye')) slu_confnet.merge() slu_confnet.normalise() slu_confnet.sort() slu_nblist = slu_confnet.get_da_nblist() sl.slu("user", "user1.wav", slu_nblist, slu_confnet) sl.turn("system") sl.dialogue_act("system", "thankyou()") sl.text("system", "Thank you.", cost=1.0) sl.rec_start("system", "system2.wav") sl.rec_end("system2.wav") sl.barge_in("system", tts_time=True) sl.turn("user") sl.rec_start("user", "user2.wav") sl.rec_end("user2.wav") sl.hangup("user")
def get_results(self, timeout=0.6): """" Waits for the complete recognition results from the Julius ASR server. Timeout specifies how long it will wait for the end of message. """ msg = "" # Get results from the server. time_slept = 0.0 while time_slept < timeout: msg_part = self.read_server_message(self.msg_timeout) if not msg_part: # Wait and check whether there is a message. time.sleep(self.cfg['Hub']['main_loop_sleep_time']) time_slept += self.cfg['Hub']['main_loop_sleep_time'] if self.debug >= 2: print "gr.time_slept:", time_slept continue msg += msg_part + '\n' if self.debug: print msg if '<CONFNET>' in msg: break else: raise JuliusASRTimeoutException( "Timeout when waiting for the Julius server results.") # Process the results. """ Typical result returned by the Julius ASR. <STARTPROC/> <INPUT STATUS="LISTEN" TIME="1343896296"/> <INPUT STATUS="STARTREC" TIME="1343896311"/> <STARTRECOG/> <INPUT STATUS="ENDREC" TIME="1343896312"/> <ENDRECOG/> <INPUTPARAM FRAMES="164" MSEC="1640"/> <RECOGOUT> <SHYPO RANK="1" SCORE="-7250.111328"> <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/> <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/> <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/> <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/> <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/> <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/> <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/> </SHYPO> </RECOGOUT> <GRAPHOUT NODENUM="43" ARCNUM="70"> <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/> <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/> <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/> <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/> <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/> <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/> <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/> <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/> ... <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/> <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/> <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/> <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/> <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/> <ARC FROM="0" TO="4"/> <ARC FROM="0" TO="3"/> <ARC FROM="1" TO="7"/> <ARC FROM="1" TO="5"/> <ARC FROM="1" TO="6"/> ... <ARC FROM="38" TO="41"/> <ARC FROM="39" TO="42"/> <ARC FROM="40" TO="42"/> </GRAPHOUT> <CONFNET> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.950">I</ALTERNATIVE> <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE> <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE> <ALTERNATIVE PROB="0.010"></ALTERNATIVE> <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE> <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">A</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE> <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE> <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE> <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> </CONFNET> <INPUT STATUS="LISTEN" TIME="1343896312"/> """ msg = "<RESULTS>" + msg + "</RESULTS>" msg = msg.replace("<s>", "<s>").replace("</s>", "</s>") nblist = UtteranceNBList() doc = xml.dom.minidom.parseString(msg) recogout = doc.getElementsByTagName("RECOGOUT") for el in recogout: shypo = el.getElementsByTagName("SHYPO") for el in shypo: whypo = el.getElementsByTagName("WHYPO") utterance = "" cm = 1.0 for el in whypo: word = el.getAttribute("WORD") utterance += " " + word if word: cm *= float(el.getAttribute("CM")) nblist.add(cm, Utterance(utterance)) nblist.merge() nblist.add_other() cn = UtteranceConfusionNetwork() confnet = doc.getElementsByTagName("CONFNET") for el in confnet: word = el.getElementsByTagName("WORD") for el in word: alternative = el.getElementsByTagName("ALTERNATIVE") word_list = [] for el in alternative: prob = float(el.getAttribute("PROB")) text = get_text_from_xml_node(el) word_list.append([prob, text]) # Filter out empty hypotheses. if len(word_list) == 0: continue if len(word_list) == 1 and len(word_list[0][1]) == 0: continue # Add the word into the confusion network. cn.add(word_list) cn.merge() cn.normalise() cn.prune() cn.normalise() cn.sort() return nblist, cn
def get_results(self, timeout=0.6): """" Waits for the complete recognition results from the Julius ASR server. Timeout specifies how long it will wait for the end of message. """ msg = "" # Get results from the server. time_slept = 0.0 while time_slept < timeout: msg_part = self.read_server_message(self.msg_timeout) if not msg_part: # Wait and check whether there is a message. time.sleep(self.cfg['Hub']['main_loop_sleep_time']) time_slept += self.cfg['Hub']['main_loop_sleep_time'] if self.debug >= 2: print "gr.time_slept:", time_slept continue msg += msg_part + '\n' if self.debug: print msg if '<CONFNET>' in msg: break else: raise JuliusASRTimeoutException( "Timeout when waiting for the Julius server results.") # Process the results. """ Typical result returned by the Julius ASR. <STARTPROC/> <INPUT STATUS="LISTEN" TIME="1343896296"/> <INPUT STATUS="STARTREC" TIME="1343896311"/> <STARTRECOG/> <INPUT STATUS="ENDREC" TIME="1343896312"/> <ENDRECOG/> <INPUTPARAM FRAMES="164" MSEC="1640"/> <RECOGOUT> <SHYPO RANK="1" SCORE="-7250.111328"> <WHYPO WORD="" CLASSID="<s>" PHONE="sil" CM="0.887"/> <WHYPO WORD="I'M" CLASSID="I'M" PHONE="ah m" CM="0.705"/> <WHYPO WORD="LOOKING" CLASSID="LOOKING" PHONE="l uh k ih ng" CM="0.992"/> <WHYPO WORD="FOR" CLASSID="FOR" PHONE="f er" CM="0.757"/> <WHYPO WORD="A" CLASSID="A" PHONE="ah" CM="0.672"/> <WHYPO WORD="PUB" CLASSID="PUB" PHONE="p ah b" CM="0.409"/> <WHYPO WORD="" CLASSID="</s>" PHONE="sil" CM="1.000"/> </SHYPO> </RECOGOUT> <GRAPHOUT NODENUM="43" ARCNUM="70"> <NODE GID="0" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="2"/> <NODE GID="1" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="3"/> <NODE GID="2" WORD="" CLASSID="<s>" PHONE="sil" BEGIN="0" END="4"/> <NODE GID="3" WORD="I" CLASSID="I" PHONE="ay" BEGIN="3" END="5"/> <NODE GID="4" WORD="NO" CLASSID="NO" PHONE="n ow" BEGIN="3" END="7"/> <NODE GID="5" WORD="I" CLASSID="I" PHONE="ay" BEGIN="4" END="6"/> <NODE GID="6" WORD="UH" CLASSID="UH" PHONE="ah" BEGIN="4" END="6"/> <NODE GID="7" WORD="I'M" CLASSID="I'M" PHONE="ay m" BEGIN="4" END="27"/> ... <NODE GID="38" WORD="PUB" CLASSID="PUB" PHONE="p ah b" BEGIN="79" END="104"/> <NODE GID="39" WORD="AH" CLASSID="AH" PHONE="aa" BEGIN="81" END="110"/> <NODE GID="40" WORD="LOT" CLASSID="LOT" PHONE="l aa t" BEGIN="81" END="110"/> <NODE GID="41" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="105" END="163"/> <NODE GID="42" WORD="" CLASSID="</s>" PHONE="sil" BEGIN="111" END="163"/> <ARC FROM="0" TO="4"/> <ARC FROM="0" TO="3"/> <ARC FROM="1" TO="7"/> <ARC FROM="1" TO="5"/> <ARC FROM="1" TO="6"/> ... <ARC FROM="38" TO="41"/> <ARC FROM="39" TO="42"/> <ARC FROM="40" TO="42"/> </GRAPHOUT> <CONFNET> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.950">I</ALTERNATIVE> <ALTERNATIVE PROB="0.020">HI</ALTERNATIVE> <ALTERNATIVE PROB="0.013">NO</ALTERNATIVE> <ALTERNATIVE PROB="0.010"></ALTERNATIVE> <ALTERNATIVE PROB="0.006">UH</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.945">AM</ALTERNATIVE> <ALTERNATIVE PROB="0.055">I'M</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">LOOKING</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">FOR</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000">A</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="0.963">PUB</ALTERNATIVE> <ALTERNATIVE PROB="0.016">AH</ALTERNATIVE> <ALTERNATIVE PROB="0.012">BAR</ALTERNATIVE> <ALTERNATIVE PROB="0.008">LOT</ALTERNATIVE> </WORD> <WORD> <ALTERNATIVE PROB="1.000"></ALTERNATIVE> </WORD> </CONFNET> <INPUT STATUS="LISTEN" TIME="1343896312"/> """ msg = "<RESULTS>" + msg + "</RESULTS>" msg = msg.replace("<s>", "<s>").replace("</s>", "</s>") nblist = UtteranceNBList() doc = xml.dom.minidom.parseString(msg) recogout = doc.getElementsByTagName("RECOGOUT") for el in recogout: shypo = el.getElementsByTagName("SHYPO") for el in shypo: whypo = el.getElementsByTagName("WHYPO") utterance = "" cm = 1.0 for el in whypo: word = el.getAttribute("WORD") utterance += " " + word if word: cm *= float(el.getAttribute("CM")) nblist.add(cm, Utterance(utterance)) nblist.merge() nblist.add_other() cn = UtteranceConfusionNetwork() confnet = doc.getElementsByTagName("CONFNET") for el in confnet: word = el.getElementsByTagName("WORD") for el in word: alternative = el.getElementsByTagName("ALTERNATIVE") word_list = [] for el in alternative: prob = float(el.getAttribute("PROB")) text = get_text_from_xml_node(el) word_list.append([prob, text]) # Filter out empty hypotheses. if len(word_list) == 0: continue if len(word_list) == 1 and len(word_list[0][1]) == 0: continue # Add the word into the confusion network. cn.add(word_list) cn.merge() cn.normalise() cn.prune() cn.normalise() cn.sort() return nblist, cn