Exemplo n.º 1
0
    def test_repr_w_long_links(self):
        A1, A2, A3 = 0.90, 0.05, 0.05
        B1, B2, B3 = 0.70, 0.20, 0.10
        C1, C2, C3 = 0.80, 0.10, 0.10

        asr_confnet = UtteranceConfusionNetwork()
        asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']])
        asr_confnet.add([[B1, "Chinese"],  [B2, "English"], [B3, 'cheap']])
        asr_confnet.add([[C1, "restaurant"],  [C2, "pub"],   [C3, 'hotel']])
        asr_confnet.merge().sort()

        confnet = asr_confnet.replace(("has", ), ("is", ))
        rep = repr(confnet)
        self.assertEqual(repr(eval(rep)), rep)

        confnet = asr_confnet.replace(("has", ), tuple())
        rep = repr(confnet)
        self.assertEqual(repr(eval(rep)), rep)

        confnet = asr_confnet.replace(("has", ), ("should", "have", ))
        rep = repr(confnet)
        self.assertEqual(repr(eval(rep)), rep)

        confnet.add([(0.5, 'want'), (0.5, 'pub')])
        rep = repr(confnet)
        self.assertEqual(repr(eval(rep)), rep)
Exemplo n.º 2
0
    def test_replace(self):
        tolerance = 0.01

        # Create a simple confusion network.
        A1, A2, A3 = 0.90, 0.05, 0.05
        B1, B2, B3 = 0.70, 0.20, 0.10
        C1, C2, C3 = 0.80, 0.10, 0.10

        asr_confnet = UtteranceConfusionNetwork()
        asr_confnet.add([[A1, "want"], [A2, "has"], [A3, 'ehm']])
        asr_confnet.add([[B1, "Chinese"],  [B2, "English"], [B3, 'cheap']])
        asr_confnet.add([[C1, "restaurant"],  [C2, "pub"],   [C3, 'hotel']])
        asr_confnet.merge().sort()

        replaced = asr_confnet.replace(("nothing", ), ("something", ))
        self.assertEqual(replaced, asr_confnet)

        replaced = asr_confnet.replace(("has", ), ("is", ))
        self.assertNotEqual(replaced, asr_confnet)
        self.assertEqual(list(replaced.cn[0][1]), [A2, "is"])

        replaced = asr_confnet.replace(("has", ), tuple())
        self.assertNotEqual(replaced, asr_confnet)
        self.assertAlmostEqual(sum(hyp[0] for hyp in asr_confnet.cn[0]), 1.)

        replaced = asr_confnet.replace(("has", ), ("should", "have", ))
        replaced.add([(0.5, 'want'), (0.5, 'pub')])
        unigrams = [['want'], ['ehm'], ['should'], ['have'], ['Chinese'],
                    ['English'], ['cheap'], ['restaurant'], ['pub'], ['hotel'],
                    ['want'], ['pub'], ]
        act_unigrams = list(hyp[1] for hyp in replaced.iter_ngrams(1))
        self.assertItemsEqual(unigrams, act_unigrams)
        bigrams = [
            ['want', 'Chinese'],
            ['want', 'English'],
            ['want', 'cheap'],
            ['ehm', 'Chinese'],
            ['ehm', 'English'],
            ['ehm', 'cheap'],
            ['should', 'have'],
            ['have', 'Chinese'],
            ['have', 'English'],
            ['have', 'cheap'],
            ['Chinese', 'restaurant'],
            ['Chinese', 'pub'],
            ['Chinese', 'hotel'],
            ['English', 'restaurant'],
            ['English', 'pub'],
            ['English', 'hotel'],
            ['cheap', 'restaurant'],
            ['cheap', 'pub'],
            ['cheap', 'hotel'],
            ['restaurant', 'want'],
            ['restaurant', 'pub'],
            ['pub', 'want'],
            ['pub', 'pub'],
            ['hotel', 'want'],
            ['hotel', 'pub'],
        ]
        act_bigrams = list(hyp[1] for hyp in replaced.iter_ngrams(2))
        self.assertItemsEqual(bigrams, act_bigrams)
        trigrams = [
            ['want', 'Chinese', 'restaurant'],
            ['want', 'Chinese', 'pub'],
            ['want', 'Chinese', 'hotel'],
            ['want', 'English', 'restaurant'],
            ['want', 'English', 'pub'],
            ['want', 'English', 'hotel'],
            ['want', 'cheap', 'restaurant'],
            ['want', 'cheap', 'pub'],
            ['want', 'cheap', 'hotel'],
            ['ehm', 'Chinese', 'restaurant'],
            ['ehm', 'Chinese', 'pub'],
            ['ehm', 'Chinese', 'hotel'],
            ['ehm', 'English', 'restaurant'],
            ['ehm', 'English', 'pub'],
            ['ehm', 'English', 'hotel'],
            ['ehm', 'cheap', 'restaurant'],
            ['ehm', 'cheap', 'pub'],
            ['ehm', 'cheap', 'hotel'],
            ['should', 'have', 'Chinese'],
            ['should', 'have', 'English'],
            ['should', 'have', 'cheap'],
            ['have', 'Chinese', 'restaurant'],
            ['have', 'Chinese', 'pub'],
            ['have', 'Chinese', 'hotel'],
            ['have', 'English', 'restaurant'],
            ['have', 'English', 'pub'],
            ['have', 'English', 'hotel'],
            ['have', 'cheap', 'restaurant'],
            ['have', 'cheap', 'pub'],
            ['have', 'cheap', 'hotel'],
            ['Chinese', 'restaurant', 'want'],
            ['Chinese', 'restaurant', 'pub'],
            ['Chinese', 'pub', 'want'],
            ['Chinese', 'pub', 'pub'],
            ['Chinese', 'hotel', 'want'],
            ['Chinese', 'hotel', 'pub'],
            ['English', 'restaurant', 'want'],
            ['English', 'restaurant', 'pub'],
            ['English', 'pub', 'want'],
            ['English', 'pub', 'pub'],
            ['English', 'hotel', 'want'],
            ['English', 'hotel', 'pub'],
            ['cheap', 'restaurant', 'want'],
            ['cheap', 'restaurant', 'pub'],
            ['cheap', 'pub', 'want'],
            ['cheap', 'pub', 'pub'],
            ['cheap', 'hotel', 'want'],
            ['cheap', 'hotel', 'pub'],
        ]
        act_trigrams = list(hyp[1] for hyp in replaced.iter_ngrams(3))
        self.assertItemsEqual(trigrams, act_trigrams)

        replaced2 = replaced.replace(('pub',), ('fast', 'food',))
        replaced2 = replaced2.replace(('want', 'English'), ('would', 'like', 'English'))
        bigrams = [
            ['<s>', 'ehm'],
            ['<s>', 'should'],
            ['<s>', 'would'],
            ['ehm', 'Chinese'],
            ['ehm', 'cheap'],
            ['should', 'have'],
            ['have', 'Chinese'],
            ['have', 'cheap'],
            ['would', 'like'],
            ['like', 'English'],
            ['English', 'restaurant'],
            ['English', 'hotel'],
            ['English', 'fast'],
            ['Chinese', 'restaurant'],
            ['Chinese', 'hotel'],
            ['Chinese', 'fast'],
            ['cheap', 'restaurant'],
            ['cheap', 'hotel'],
            ['cheap', 'fast'],
            ['restaurant', 'want'],
            ['restaurant', 'fast'],
            ['hotel', 'want'],
            ['hotel', 'fast'],
            ['fast', 'food'],
            ['food', 'want'],
            ['food', 'fast'],
            ['want', '</s>'],
            ['fast', 'food'],
            ['food', '</s>']
        ]
        act_bigrams = list(hyp[1] for hyp in replaced2.iter_ngrams(2, True))
        self.assertItemsEqual(bigrams, act_bigrams)