def parse_incr(in_file, fields=None, field_parsers=None, metadata_parsers=None): if not fields: fields = parse_conllu_plus_fields(in_file, metadata_parsers=metadata_parsers) for sentence in parse_sentences(in_file): yield TokenList( *parse_token_and_metadata(sentence, fields=fields, field_parsers=field_parsers, metadata_parsers=metadata_parsers))
def test_simple(self): data = dedent("""\ 1\thej 2\tdå 3\thej 1\thej 2\tdå 3\thej """) sentences = list(parse_sentences(string_to_file(data))) self.assertEqual(sentences, [ '1\thej\n2\tdå\n3\thej', '1\thej\n2\tdå\n3\thej', ])
def parse_incr(in_file, fields=None, field_parsers=None, metadata_parsers=None): if not hasattr(in_file, 'read'): raise FileNotFoundError( "Invalid file, 'parse_incr' needs an opened file as input") if not fields: fields = parse_conllu_plus_fields(in_file, metadata_parsers=metadata_parsers) for sentence in parse_sentences(in_file): yield TokenList( *parse_token_and_metadata(sentence, fields=fields, field_parsers=field_parsers, metadata_parsers=metadata_parsers))
def parse_incr( in_file: T.TextIO, fields: T.Optional[T.Sequence[str]] = None, field_parsers: T.Dict[str, _FieldParserType] = None, metadata_parsers: T.Optional[T.Dict[str, _MetadataParserType]] = None ) -> T.Iterator[TokenList]: if not hasattr(in_file, 'read'): raise FileNotFoundError( "Invalid file, 'parse_incr' needs an opened file as input") if not fields: fields = parse_conllu_plus_fields(in_file, metadata_parsers=metadata_parsers) for sentence in parse_sentences(in_file): yield parse_token_and_metadata(sentence, fields=fields, field_parsers=field_parsers, metadata_parsers=metadata_parsers)
def test_multiple_newlines(self): data = dedent("""\ 1\thej 2\tdå 1\thej 2\tdå 1\thej 2\tdå """) sentences = list(parse_sentences(string_to_file(data))) self.assertEqual(sentences, [ '1\thej\n2\tdå', '1\thej\n2\tdå', '1\thej\n2\tdå', ])
def test_ends_without_newline(self): data = "1\thej\n2\tdå" sentences = list(parse_sentences(string_to_file(data))) self.assertEqual(sentences, [ '1\thej\n2\tdå', ])
def test_empty(self): self.assertEqual(list(parse_sentences(string_to_file(""))), []) self.assertEqual(list(parse_sentences(string_to_file(None))), [])
def test_empty(self): self.assertEqual(list(parse_sentences(StringIO(""))), []) self.assertEqual(list(parse_sentences(StringIO(None))), [])
def parse_incr(in_file, fields=None, field_parsers=None): for sentence in parse_sentences(in_file): yield TokenList(*parse_token_and_metadata(sentence, fields=fields, field_parsers=field_parsers))