def test_pad_word_sequence_handles_words_and_characters(self): instance = IndexedTrueFalseInstance([[1, 2], [3, 1, 2]], True) padded = instance.pad_word_sequence(instance.word_indices, { 'word_sequence_length': 3, 'word_character_length': 4 }) assert padded == [[0, 0, 0, 0], [1, 2, 0, 0], [3, 1, 2, 0]]
def test_as_training_data_produces_correct_numpy_arrays(self): instance = IndexedTrueFalseInstance([1, 2, 3, 4], True) inputs, label = instance.as_training_data() assert numpy.all(label == numpy.asarray([0, 1])) assert numpy.all(inputs == numpy.asarray([1, 2, 3, 4])) instance.label = False _, label = instance.as_training_data() assert numpy.all(label == numpy.asarray([1, 0]))
def test_pad_word_sequence_handles_words_and_characters_greater(self): instance = IndexedTrueFalseInstance([[1, 2], [3, 1, 2]], True) padded = instance.pad_word_sequence(instance.word_indices, { 'num_sentence_words': 5, 'num_word_characters': 4 }) assert padded == [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [1, 2, 0, 0], [3, 1, 2, 0]]
def setUp(self): self.base_instance = IndexedTrueFalseInstance([1, 2], True) self.background_instances = [ IndexedTrueFalseInstance([2, 3, 4], None), IndexedTrueFalseInstance([4, 5], None) ] self.qa_instance = IndexedQuestionAnswerInstance([1, 2, 3], [[2, 3], [4], [5, 6]], 1)
def setUp(self): # We'll just test with underlying IndexedTrueFalseInstances for most of these, because it's # simpler. self.instance = IndexedMultipleTrueFalseInstance([ IndexedTrueFalseInstance([1], False), IndexedTrueFalseInstance([2, 3, 4], False), IndexedTrueFalseInstance([5, 6], True), IndexedTrueFalseInstance([7, 8], False) ], 2)
def test_get_lengths_works_with_words_and_characters(self): instance = IndexedTrueFalseInstance([[1, 2], [3, 1, 2]], True) assert instance.get_lengths() == { 'word_sequence_length': 2, 'word_character_length': 3 }
def test_as_training_data_produces_correct_numpy_arrays_with_background_instances( self): instance = IndexedMultipleTrueFalseInstance([ IndexedBackgroundInstance(IndexedTrueFalseInstance([1, 2], False), [ IndexedTrueFalseInstance([2], None), IndexedTrueFalseInstance([3], None) ]), IndexedBackgroundInstance(IndexedTrueFalseInstance([3, 4], False), [ IndexedTrueFalseInstance([5], None), IndexedTrueFalseInstance([6], None) ]), IndexedBackgroundInstance(IndexedTrueFalseInstance([5, 6], False), [ IndexedTrueFalseInstance([8], None), IndexedTrueFalseInstance([9], None) ]), IndexedBackgroundInstance(IndexedTrueFalseInstance([7, 8], True), [ IndexedTrueFalseInstance([11], None), IndexedTrueFalseInstance([12], None) ]), ], 3) (word_arrays, background_arrays), label = instance.as_training_data() assert numpy.all(label == numpy.asarray([0, 0, 0, 1])) assert numpy.all( word_arrays == numpy.asarray([[1, 2], [3, 4], [5, 6], [7, 8]])) assert numpy.all(background_arrays == numpy.asarray( [[[2], [3]], [[5], [6]], [[8], [9]], [[11], [12]]]))
def test_pad_adds_zeros_on_left(self): instance = IndexedTrueFalseInstance([1, 2, 3, 4], True) instance.pad({'word_sequence_length': 5}) assert instance.word_indices == [0, 1, 2, 3, 4]
def test_get_lengths_returns_length_of_word_indices(self): instance = IndexedTrueFalseInstance([1, 2, 3, 4], True) assert instance.get_lengths() == {'word_sequence_length': 4}
def test_pad_truncates_from_right(self): instance = IndexedTrueFalseInstance([1, 2, 3, 4], True) instance.pad({'word_sequence_length': 3}) assert instance.word_indices == [2, 3, 4]