def test_raw_tokens_on_sentence_wit_mixed_element(self): expected = [ Multiword(first_index=1, last_index=2), Word(index=3), Multiword(first_index=4, last_index=6), Word(index=7) ] sentence = Sentence([ expected[0], # 1-2 Word(index=1), Word(index=2), expected[1], # 3 EmptyNode(main_index=3, sub_index=1), EmptyNode(main_index=3, sub_index=2), expected[2], # 4-6 Word(index=4), EmptyNode(main_index=4, sub_index=1), EmptyNode(main_index=4, sub_index=2), Word(index=5), Word(index=6), expected[3] # 7 ]) result = sentence.raw_tokens() self.assertIsInstance(result, Generator) self.assertEqual(expected, list(result))
def test_is_valid_false_if_consecutive_multiwords_overlap(self): sentence = Sentence([ Multiword(first_index=1, last_index=2), Multiword(first_index=1, last_index=2), Word(index=1), Word(index=2) ]) self.assertFalse(sentence.is_valid())
def test_to_conllu_of_sentence_with_all_attributes(self): multiword = Multiword(first_index=1, last_index=2, form='Form', misc='Misc') self.assertEqual('1-2\tForm\t_\t_\t_\t_\t_\t_\t_\tMisc', multiword.to_conllu())
def test_is_valid_false_if_multiwords_are_placed_incorrectly(self): sentence = Sentence([ Multiword(first_index=1, last_index=2), Multiword(first_index=3, last_index=4), # should be before word 3 Word(index=1), Word(index=2), Word(index=3), Word(index=4) ]) self.assertFalse(sentence.is_valid())
def test_is_valid_false_on_sentence_with_invalid_elements(self): sentence = Sentence([ Multiword(first_index=1, last_index=1), # invalid first == last Word(index=1), Word(index=2), ]) self.assertFalse(sentence.is_valid())
def test_words_on_sentence_wit_mixed_element(self): expected = [Word(index=1), Word(index=2), Word(index=3), Word(index=4)] sentence = Sentence([ Multiword(first_index=1, last_index=2), expected[0], # 1 expected[1], # 2 EmptyNode(main_index=2, sub_index=1), EmptyNode(main_index=2, sub_index=2), Multiword(first_index=3, last_index=4), expected[2], # 3 expected[3], # 4 ]) result = sentence.words() self.assertIsInstance(result, Generator) self.assertEqual(expected, list(result))
def test_is_valid_false_if_multiword_last_index_is_too_big(self): sentence = Sentence([ Word(index=1), Multiword(first_index=2, last_index=4), # there is no word w/ ID 4 Word(index=2), Word(index=3), ]) self.assertFalse(sentence.is_valid())
def test_is_valid_true_if_first_element_is_multiword_with_index_1(self): sentence = Sentence([ Multiword(first_index=1, last_index=2), # words also included to prevent other validations to fail Word(index=1), Word(index=2), ]) self.assertTrue(sentence.is_valid())
def test_is_valid_true_if_multiword_index_range_is_within_sentence_bounds( self): sentence = Sentence([ Word(index=1), Multiword(first_index=2, last_index=3), Word(index=2), Word(index=3) ]) self.assertTrue(sentence.is_valid())
def test_is_valid_false_if_multiword_index_is_skipped(self): sentence = Sentence([ Word(index=1), # index 2 missing Multiword(first_index=3, last_index=4), Word(index=3), Word(index=4), ]) self.assertFalse(sentence.is_valid())
def test_words_on_sentence_without_word_elements(self): sentence = Sentence([ EmptyNode(main_index=0, sub_index=1), Multiword(first_index=1, last_index=2), ]) result = sentence.words() self.assertIsInstance(result, Generator) self.assertEqual([], list(result))
def p_wordline_multiword(prod: YaccProduction) -> None: 'wordline : RANGE_ID TAB FORM TAB LEMMA TAB UPOS TAB XPOS TAB FEATS ' \ 'TAB HEAD TAB DEPREL TAB DEPS TAB MISC NEWLINE' if prod[5] != '_' or any(prod[i] is not None for i in range(7, 18, 2)): raise IllegalMultiwordError(prod) prod[0] = Multiword(first_index=prod[1][0], last_index=prod[1][1], form=prod[3], misc=prod[19])
def test_to_conllu_with_many_elements_and_no_comments(self): sentence = Sentence(elements=[ Multiword(first_index=1, last_index=2, form="Foobar"), Word(index=1, form='Foo'), Word(index=2, form='bar'), EmptyNode(main_index=2, sub_index=1, form='Baz') ]) self.assertEqual( '1-2\tFoobar\t_\t_\t_\t_\t_\t_\t_\t_\n' '1\tFoo\t_\t_\t_\t_\t_\t_\t_\t_\n' '2\tbar\t_\t_\t_\t_\t_\t_\t_\t_\n' '2.1\tBaz\t_\t_\t_\t_\t_\t_\t_\t_\n' '\n', sentence.to_conllu())
def test_init_form(self): element = Multiword(form='Foo') self.assertEqual('Foo', element.form)
def test_init_misc(self): element = Multiword(misc='Foo') self.assertEqual('Foo', element.misc)
def test_is_valid_false_with_last_index_equal_to_first_index(self): element = Multiword(first_index=42, last_index=42) self.assertFalse(element.is_valid())
def test_is_valid_false_with_no_values_set(self): element = Multiword() self.assertFalse(element.is_valid())
def test_init_last_index(self): element = Multiword(last_index=42) self.assertEqual(42, element.last_index)
def test_is_valid_false_with_last_index_is_not_set(self): element = Multiword(first_index=42) self.assertFalse(element.is_valid())
def test_to_conllu_of_invalid_sentence_with_no_attributes(self): multiword = Multiword() self.assertEqual('None-None\t_\t_\t_\t_\t_\t_\t_\t_\t_', multiword.to_conllu())
def test_is_valid_true_with_all_values_set(self): element = Multiword(first_index=1, last_index=2, form='Form', misc='Misc') self.assertTrue(element.is_valid())
def test_is_valid_true_with_first_index_greater_than_zero(self): element = Multiword(first_index=1, last_index=42) self.assertTrue(element.is_valid())
def test_is_valid_false_if_first_element_is_multiword_with_index_not_1( self): sentence = Sentence([Multiword(first_index=2, last_index=5)]) self.assertFalse(sentence.is_valid())
def test_is_valid_false_with_first_index_less_than_zero(self): element = Multiword(first_index=-1, last_index=42) self.assertFalse(element.is_valid())
def test_is_valid_false_on_sentence_without_word_elements(self): sentence = Sentence([ EmptyNode(main_index=0, sub_index=1), Multiword(first_index=1, last_index=2) ]) self.assertFalse(sentence.is_valid())
def test_init_first_index(self): element = Multiword(first_index=42) self.assertEqual(42, element.first_index)