def test_parser_variant_lands_in_comment(caplog): caplog.set_level(logging.INFO) dataset = pycldf.Dataset.from_metadata( Path(__file__).parent / "data/cldf/smallmawetiguarani/cldf-metadata.json") dataset.remove_columns("FormTable", "variants") parser = c.CellParser( dataset=dataset, element_semantics=[ ("/", "/", "phonemic", True), ("[", "]", "phonetic", True), ("<", ">", "orthographic", True), ("{", "}", "source", False), ("(", ")", "comment", False), ], ) form = parser.parse_form(" {2} [dʒi'tɨka] ~[ʒi'tɨka] {2}", "language") assert re.search( "No 'variants' column found .* will be added to #comment.*", caplog.text) and form == { "Language_ID": "language", "Value": " {2} [dʒi'tɨka] ~[ʒi'tɨka] {2}", "phonetic": "dʒi'tɨka", "Comment": "~[ʒi'tɨka]\t2", "Source": {"language_s2"}, "Form": "dʒi'tɨka", }
def test_fields_of_formtable_no_transcription(no_dialect): dataset = no_dialect dataset.add_columns("FormTable", "value") dataset.add_columns("FormTable", "form") dataset.add_columns("FormTable", "languageReference") dataset.add_columns("FormTable", "comment") dataset.add_columns("FormTable", "source") # missing transcription element with pytest.raises( AssertionError, match= r"Your metadata json file and your cell parser don’t match.*transcriptions \(at least one of " r"'orthographic', 'phonemic', and 'phonetic'\) to derive a #form.*", ): c.CellParser( dataset=dataset, element_semantics=[ # ("[", "]", "phonetic", True), ("<", ">", "form", False), # ("/", "/", "phonemic", True), ("(", ")", "comment", False), ("{", "}", "source", False), ], )
def test_fields_of_formtable_no_source(no_dialect): dataset = no_dialect dataset.add_columns("FormTable", "value") dataset.add_columns("FormTable", "form") dataset.add_columns("FormTable", "languageReference") dataset.add_columns("FormTable", "comment") # missing field #source with pytest.raises( ValueError, match= "Your metadata json file and your cell parser don’t match.*#source.*", ): c.CellParser(dataset=dataset)
def parser(): dataset = pycldf.Dataset.from_metadata( Path(__file__).parent / "data/cldf/smallmawetiguarani/cldf-metadata.json") return c.CellParser( dataset, element_semantics=[ ("/", "/", "phonemic", True), ("[", "]", "phonetic", True), ("<", ">", "orthographic", True), ("{", "}", "source", False), ("(", ")", "comment", False), ], )