def test_GraphTransliterator_productions(): """Test productions.""" tokens = {"ab": ["class_ab"], " ": ["wb"]} whitespace = {"default": " ", "token_class": "wb", "consolidate": True} rules = {"ab": "AB", " ": "_"} settings = {"tokens": tokens, "rules": rules, "whitespace": whitespace} assert set(GraphTransliterator.from_easyreading_dict(settings).productions) == set( ["AB", "_"] )
def test_GraphTransliterator_graph(): """Test graph.""" tokens = {"ab": ["class_ab"], " ": ["wb"]} whitespace = {"default": " ", "token_class": "wb", "consolidate": True} rules = {"ab": "AB", " ": "_"} settings = {"tokens": tokens, "rules": rules, "whitespace": whitespace} gt = GraphTransliterator.from_easyreading_dict(settings) assert gt._graph assert gt._graph.node[0]["type"] == "Start" # test for Start assert gt
def test_GraphTransliterator(tmpdir): """Test GraphTransliterator.""" yaml_str = r""" tokens: a: [token, class1] b: [token, class2] u: [token] ' ': [wb] rules: a: A b: B <wb> u: \N{DEVANAGARI LETTER U} onmatch_rules: - <class1> + <class2>: "," - <class1> + <token>: \N{DEVANAGARI SIGN VIRAMA} whitespace: default: ' ' token_class: 'wb' consolidate: true metadata: author: Author """ input_dict = yaml.safe_load(yaml_str) assert "a" in GraphTransliterator.from_easyreading_dict(input_dict).tokens.keys() gt = GraphTransliterator.from_easyreading_dict(input_dict) assert gt.onmatch_rules[0].production == "," assert gt.tokens assert gt.rules assert gt.whitespace assert gt.whitespace.default assert gt.whitespace.token_class assert gt.whitespace.consolidate assert gt.metadata["author"] == "Author" assert type(gt.graph) == DirectedGraph yaml_file = tmpdir.join("yaml_test.yaml") yaml_filename = str(yaml_file) yaml_file.write(yaml_str) assert yaml_file.read() == yaml_str assert GraphTransliterator.from_yaml_file(yaml_filename) assert len(set(GraphTransliterator.from_easyreading_dict(input_dict).tokens)) == 4 assert GraphTransliterator.from_yaml(yaml_str).transliterate("ab") == "A,B" assert ( GraphTransliterator.from_yaml_file(yaml_filename).transliterate("ab") == "A,B" ) assert ( GraphTransliterator.from_easyreading_dict( { "tokens": {"a": ["class_a"], "b": ["class_b"], " ": ["wb"]}, "onmatch_rules": [{"<class_a> + <class_b>": ","}], "whitespace": { "default": " ", "token_class": "wb", "consolidate": True, }, "rules": {"a": "A", "b": "B"}, } ).transliterate("ab") == "A,B" )