def test_empty_set_format_error(): """ Test that outputing an empty collection for the values of a column errors. """ token_line = '33 cintre cintre NOUN _ Gender=Fem|Number=Sing ' \ '30 nmod 2:nsubj|4:root SpaceAfter=No' token = Token(token_line) token.feats['Gender'].pop() with pytest.raises(FormatError): token.conll()
def test_all_empty_deps_component_error(): """ Test that an error is thrown when all components of a dep value are None. """ token_line = '33 cintre cintre NOUN _ Gender=Fem|Number=Sing ' \ '30 nmod 2:nsubj|4:root SpaceAfter=No' token = Token(token_line) cur_list = [None] + list(token.deps['2'][1:]) token.deps['2'] = cur_list with pytest.raises(FormatError): token.conll()
def test_to_string(): """ Test if a token's string representation is accurate. """ token_line = '26 surmonté surmonter VERB _ ' \ 'Gender=Masc|Number=Sing|Tense=Past|VerbForm=Part 22 acl _ _' token = Token(token_line) assert token.conll() == token_line
def test_feats_keep_case_insensitive_order(): """ Test that the features are kept sorted via case insensitive attributes. """ token_line = '10 gave give VERB _ gender=Fem|Number=Sing ' \ '0 root _ SpaceAfter=No' token = Token(token_line) conll = token.conll() assert conll == token_line
def test_deps_parsing(): """ Test that the deps field is properly parsed. """ token_line = '33 cintre cintre NOUN _ Gender=Masc|Number=Sing ' \ '30 nmod 2:nsubj|4:nmod SpaceAfter=No' token = Token(token_line) assert token.deps['2'] == ('nsubj', None, None, None) assert token.deps['4'] == ('nmod', None, None, None) assert token.conll() == token_line
def test_feats_induce_case_insensitive_order(): """ Test that case insensitive sorting of feature attributes is induced. """ token_line = '10 gave give VERB _ Number=Sing|gender=Fem ' \ '0 root _ SpaceAfter=No' token = Token(token_line) conll = token.conll() formatted_line = '10 gave give VERB _ gender=Fem|Number=Sing ' \ '0 root _ SpaceAfter=No' assert conll == formatted_line
def test_deps_sort_order_decimal(): """ Test that enhanced dependencies are sorted properly for ranges. """ token_line = '10 gave give VERB _ Number=Sing|Gender=Fem ' \ '0 root 10.2:nsubj|2:nmod|10.1:nsubj SpaceAfter=No' token = Token(token_line) conll = token.conll() formatted_line = '10 gave give VERB _ Gender=Fem|Number=Sing ' \ '0 root 2:nmod|10.1:nsubj|10.2:nsubj SpaceAfter=No' assert conll == formatted_line
def test_deps_sort_order_double_digits(): """ Test that enhanced dependencies are sorted via numeric index and not string. """ token_line = '10 gave give VERB _ Number=Sing|Gender=Fem ' \ '0 root 10:nsubj|2:nmod SpaceAfter=No' token = Token(token_line) conll = token.conll() formatted_line = '10 gave give VERB _ Gender=Fem|Number=Sing ' \ '0 root 2:nmod|10:nsubj SpaceAfter=No' assert conll == formatted_line
def test_deps_sort_order(): """ Test that the enhanced dependencies order is properly sorted. """ token_line = '10 gave give VERB _ Number=Sing|Gender=Fem ' \ '0 root 4:nsubj|2:nmod SpaceAfter=No' token = Token(token_line) conll = token.conll() formatted_line = '10 gave give VERB _ Gender=Fem|Number=Sing ' \ '0 root 2:nmod|4:nsubj SpaceAfter=No' assert conll == formatted_line
def test_remove_feature_to_string(): """ Test a token's string representation after removing a feature completely. """ token_line = '33 cintre cintre NOUN _ Gender=Masc|Number=Sing ' \ '30 nmod _ SpaceAfter=No' token = Token(token_line) del token.feats['Gender'] new_token_line = '33 cintre cintre NOUN _ ' \ 'Number=Sing 30 nmod _ SpaceAfter=No' assert token.conll() == new_token_line
def test_modify_dict_field_to_string(): """ Test a token's string representation after adding a feature. """ token_line = '33 cintre cintre NOUN _ Gender=Masc|Number=Sing ' \ '30 nmod _ SpaceAfter=No' token = Token(token_line) token.feats['Gender'].add('Fem') new_token_line = '33 cintre cintre NOUN _ ' \ 'Gender=Fem,Masc|Number=Sing 30 nmod _ SpaceAfter=No' assert token.conll() == new_token_line
def test_modify_unit_field_to_string(): """ Test a token's string representation after changing one of it's fields. """ token_line = '33 cintre cintre NOUN _ Gender=Masc|Number=Sing ' \ '30 nmod _ SpaceAfter=No' token = Token(token_line) token.lemma = 'pain' new_token_line = '33 cintre pain NOUN _ ' \ 'Gender=Masc|Number=Sing 30 nmod _ SpaceAfter=No' assert token.conll() == new_token_line
def test_del_values(): """ Test that values and features can be deleted from different token columns. """ token_line = '33 cintre cintre NOUN _ Gender=Fem|Number=Sing ' \ '30 nmod 2:nsubj|4:root SpaceAfter=No' token = Token(token_line) del token.feats['Gender'] del token.misc['SpaceAfter'] expected = '33 cintre cintre NOUN _ Number=Sing ' \ '30 nmod 2:nsubj|4:root _' assert expected == token.conll()
def test_misc_parsing_output(): """ Test that the misc field is properly output in CoNLL-U format. """ token_line = '33 cintre cintre NOUN _ Gender=Fem|Number=Sing ' \ '30 nmod 2:nsubj|4:root SpaceAfter=No' token = Token(token_line) token.misc['Independent'] = None token.misc['SpaceAfter'].add('Yes') token.misc['OtherTest'] = set() token.misc['OtherTest'].add('X') token.misc['OtherTest'].add('Z') token.misc['OtherTest'].add('Y') expected_output = '33 cintre cintre NOUN _ Gender=Fem|Number=Sing ' \ '30 nmod 2:nsubj|4:root Independent|OtherTest=X,Y,Z|SpaceAfter=No,Yes' assert expected_output == token.conll()