Exemplo n.º 1
0
    def testFilledExporter(self):
        """Export two FARs and check that they contain the right FSTs."""
        filename_a = os.path.join(FLAGS.test_tmpdir, 'test_a.far')
        filename_b = os.path.join(FLAGS.test_tmpdir, 'test_b.far')
        FLAGS.outputs = 'a=' + filename_a + ',b=' + filename_b
        with self.assertRaises(SystemExit):
            multi_grm.run(generator_method)

        stored_fsts_a = _read_fst_map(filename_a)
        self.assertLen(stored_fsts_a, 1)
        self.assertTrue(stored_fsts_a['FST1'])

        stored_fsts_b = _read_fst_map(filename_b)
        self.assertLen(stored_fsts_b, 2)
        self.assertTrue(stored_fsts_b['FST2'])
        self.assertTrue(stored_fsts_b['FST3'])
Exemplo n.º 2
0
    cluster_or_vowel_with_coda = pynini.union(
        independent_vowel,
        cluster_with_vowel) + (uf.QuesSafe(vowel_length_sign) +
                               uf.QuesSafe(coda) + uf.QuesSafe(dead_consonant))
    akshara = pynini.union(cluster_or_vowel_with_coda, cluster_and_virama)
    return uf.StarSafe(pynini.union(akshara, standalone, accept)).optimize()


def generator_main(exporter_map: multi_grm.ExporterMapping):
    """Generate unweighted FSAs accepting the language of each Brahmic script."""
    for token_type in ('byte', 'utf8'):
        with pynini.default_token_type(token_type):
            exporter = exporter_map[token_type]
            for script in u.SCRIPTS:
                exporter[script.upper()] = accept_well_formed(
                    u.SCRIPT_DIR / script / 'script_config.textproto',
                    u.SCRIPT_DIR / script / 'consonant.tsv',
                    u.SCRIPT_DIR / script / 'dead_consonant.tsv',
                    u.SCRIPT_DIR / script / 'subjoined_consonant.tsv',
                    u.SCRIPT_DIR / script / 'vowel_sign.tsv',
                    u.SCRIPT_DIR / script / 'vowel.tsv',
                    u.SCRIPT_DIR / script / 'vowel_length_sign.tsv',
                    u.SCRIPT_DIR / script / 'coda.tsv', u.SCRIPT_DIR / script /
                    'standalone.tsv', u.SCRIPT_DIR / script / 'virama.tsv',
                    u.SCRIPT_DIR / script / 'accept.tsv',
                    u.SCRIPT_DIR / script / 'preserve.tsv')


if __name__ == '__main__':
    multi_grm.run(generator_main)