def test_score_batch(self): # Network predicts <unk> probability. scorer = TextScorer(self.dummy_network) word_ids = numpy.arange(6).reshape((3, 2)) class_ids = numpy.arange(6).reshape((3, 2)) membership_probs = numpy.ones_like(word_ids).astype('float32') mask = numpy.ones_like(word_ids) logprobs = scorer.score_batch(word_ids, class_ids, membership_probs, mask) assert_almost_equal(logprobs[0], numpy.log(word_ids[1:, 0].astype('float32') / 5)) assert_almost_equal(logprobs[1], numpy.log(word_ids[1:, 1].astype('float32') / 5)) # <unk> is removed from the resulting logprobs. scorer = TextScorer(self.dummy_network, ignore_unk=True) word_ids = numpy.arange(6).reshape((3, 2)) word_ids[1, 1] = self.vocabulary.word_to_id['<unk>'] class_ids = numpy.arange(6).reshape((3, 2)) membership_probs = numpy.ones_like(word_ids).astype('float32') mask = numpy.ones_like(word_ids) logprobs = scorer.score_batch(word_ids, class_ids, membership_probs, mask) assert_almost_equal(logprobs[0], numpy.log(word_ids[1:, 0].astype('float32') / 5)) assert_almost_equal(logprobs[1], numpy.log(word_ids[2:, 1].astype('float32') / 5)) # <unk> is assigned a constant logprob. scorer = TextScorer(self.dummy_network, ignore_unk=False, unk_penalty=-5) word_ids = numpy.arange(6).reshape((3, 2)) word_ids[1, 1] = self.vocabulary.word_to_id['<unk>'] class_ids = numpy.arange(6).reshape((3, 2)) membership_probs = numpy.ones_like(word_ids).astype('float32') mask = numpy.ones_like(word_ids) logprobs = scorer.score_batch(word_ids, class_ids, membership_probs, mask) assert_almost_equal(logprobs[0], numpy.log(word_ids[1:, 0].astype('float32') / 5)) assert_almost_equal(logprobs[1][0], -5) assert_almost_equal(logprobs[1][1], numpy.log(word_ids[2, 1].astype('float32') / 5))
def test_score_batch(self): # Network predicts <unk> probability. scorer = TextScorer(self.dummy_network) word_ids = numpy.arange(6).reshape((3, 2)) class_ids = numpy.arange(6).reshape((3, 2)) membership_probs = numpy.ones_like(word_ids).astype('float32') mask = numpy.ones_like(word_ids) logprobs = scorer.score_batch(word_ids, class_ids, membership_probs, mask) assert_almost_equal(logprobs[0], numpy.log(word_ids[1:,0].astype('float32') / 5)) assert_almost_equal(logprobs[1], numpy.log(word_ids[1:,1].astype('float32') / 5)) # <unk> is removed from the resulting logprobs. scorer = TextScorer(self.dummy_network, ignore_unk=True) word_ids = numpy.arange(6).reshape((3, 2)) word_ids[1,1] = self.vocabulary.word_to_id['<unk>'] class_ids = numpy.arange(6).reshape((3, 2)) membership_probs = numpy.ones_like(word_ids).astype('float32') mask = numpy.ones_like(word_ids) logprobs = scorer.score_batch(word_ids, class_ids, membership_probs, mask) assert_almost_equal(logprobs[0], numpy.log(word_ids[1:,0].astype('float32') / 5)) assert_almost_equal(logprobs[1], numpy.log(word_ids[2:,1].astype('float32') / 5)) # <unk> is assigned a constant logprob. scorer = TextScorer(self.dummy_network, ignore_unk=False, unk_penalty=-5) word_ids = numpy.arange(6).reshape((3, 2)) word_ids[1,1] = self.vocabulary.word_to_id['<unk>'] class_ids = numpy.arange(6).reshape((3, 2)) membership_probs = numpy.ones_like(word_ids).astype('float32') mask = numpy.ones_like(word_ids) logprobs = scorer.score_batch(word_ids, class_ids, membership_probs, mask) assert_almost_equal(logprobs[0], numpy.log(word_ids[1:,0].astype('float32') / 5)) assert_almost_equal(logprobs[1][0], -5) assert_almost_equal(logprobs[1][1], numpy.log(word_ids[2,1].astype('float32') / 5))
def test_score_batch(self): # Network predicts <unk> probability. Out-of-shortlist words are mapped # to <unk> class by . scorer = TextScorer(self.dummy_network, use_shortlist=False) word_ids = numpy.arange(15).reshape((3, 5)).T class_ids, _ = self.vocabulary.get_class_memberships(word_ids) membership_probs = numpy.ones_like(word_ids).astype('float32') mask = numpy.ones_like(word_ids) logprobs = scorer.score_batch(word_ids, class_ids, membership_probs, mask) assert_almost_equal( logprobs[0], numpy.log(word_ids[1:, 0].astype('float32') / 100.0)) assert_almost_equal( logprobs[1], numpy.log(word_ids[1:, 1].astype('float32') / 100.0)) self.assertAlmostEqual(logprobs[2][0], numpy.log(11.0 / 100.0), places=5) # </s> self.assertAlmostEqual(logprobs[2][1], numpy.log(12.0 / 100.0), places=5) # <unk> self.assertAlmostEqual(logprobs[2][2], numpy.log(12.0 / 100.0), places=5) self.assertAlmostEqual(logprobs[2][3], numpy.log(12.0 / 100.0), places=5) # Network predicts <unk> probability. This is distributed for # out-of-shortlist words according to word frequency. scorer = TextScorer(self.dummy_network, use_shortlist=True) word_ids = numpy.arange(15).reshape((3, 5)).T class_ids, _ = self.vocabulary.get_class_memberships(word_ids) membership_probs = numpy.ones_like(word_ids).astype('float32') mask = numpy.ones_like(word_ids) logprobs = scorer.score_batch(word_ids, class_ids, membership_probs, mask) assert_almost_equal( logprobs[0], numpy.log(word_ids[1:, 0].astype('float32') / 100.0)) assert_almost_equal( logprobs[1], numpy.log(word_ids[1:, 1].astype('float32') / 100.0)) self.assertAlmostEqual(logprobs[2][0], numpy.log(11.0 / 100.0), places=5) # </s> self.assertIsNone(logprobs[2][1]) # <unk> self.assertAlmostEqual(logprobs[2][2], numpy.log(12.0 / 100.0 * 0.3), places=5) self.assertAlmostEqual(logprobs[2][3], numpy.log(12.0 / 100.0 * 0.7), places=5) # OOV and OOS words are replaced with None. scorer = TextScorer(self.dummy_network, use_shortlist=False, exclude_unk=True) word_ids = numpy.arange(15).reshape((3, 5)).T class_ids, _ = self.vocabulary.get_class_memberships(word_ids) membership_probs = numpy.ones_like(word_ids).astype('float32') mask = numpy.ones_like(word_ids) logprobs = scorer.score_batch(word_ids, class_ids, membership_probs, mask) assert_almost_equal( logprobs[0], numpy.log(word_ids[1:, 0].astype('float32') / 100.0)) assert_almost_equal( logprobs[1], numpy.log(word_ids[1:, 1].astype('float32') / 100.0)) self.assertAlmostEqual(logprobs[2][0], numpy.log(11.0 / 100.0), places=5) # </s> self.assertIsNone(logprobs[2][1]) # <unk> self.assertIsNone(logprobs[2][2]) self.assertIsNone(logprobs[2][3])