示例#1
0
 def test_as_is(self):
     mapping = Mapping([{'in': 'a', "out": 'b'}, {'in': 'aa', 'out': 'c'}])
     mapping_as_is = Mapping([{'in': 'a', "out": 'b'}, {'in': 'aa', 'out': 'c'}], as_is=True)
     transducer = Transducer(mapping)
     transducer_as_is = Transducer(mapping_as_is)
     self.assertEqual(transducer('aa'), 'c')
     self.assertEqual(transducer_as_is('aa'), 'bb')
示例#2
0
 def test_mapping(self):
     mapping = Mapping([{'in': 'a', 'out': 'æ'},
                        {'in': 'e', 'out': 'ɐ'},
                        {'in': 'i', 'out': 'ɑ̃'},
                        {'in': 'b', 'out': 'β'},
                        {'in': 'g', 'out': 'ɡ'},
                        {'in': 'g', 'out': 'g'},
                        {'in': 'i', 'out': 'ةُ'}], in_lang='test', out_lang='test-out')
     ipa_mapping = Mapping([{'in': 'a', 'out': 'æ'},
                            {'in': 'e', 'out': 'ɐ'},
                            {'in': 'i', 'out': 'ɑ̃'},
                            {'in': 'b', 'out': 'β'},
                            {'in': 'g', 'out': 'ɡ'}], in_lang='test', out_lang='test-ipa')
     test_in = align_to_dummy_fallback(mapping)
     self.assertEqual(test_in.mapping, [{'in': 'a', 'out': 'ɑ', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('a')}, {'in': 'e', 'out': 'i', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('e')}, {'in': 'i', 'out': 'i', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('i')}, {'in': 'b', 'out': 't', 'context_before': '',
                                                                                                                                                                                                                                                                                                                                                           'context_after': '', 'match_pattern': re.compile('b')}, {'in': 'g', 'out': 't', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('g')}, {'in': 'g', 'out': 't', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('g')}, {'in': 'i', 'out': 'i', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('i')}])
     test_out = align_to_dummy_fallback(mapping, 'out')
     self.assertEqual(test_out.mapping, [{'in': 'æ', 'out': 'ɑi', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('æ')}, {'in': 'ɐ', 'out': 'ɑ', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('ɐ')}, {'in': 'ɑ̃', 'out': 'ɑ', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('ɑ̃')}, {
                      'in': 'β', 'out': 't', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('β')}, {'in': 'ɡ', 'out': 't', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('ɡ')}, {'in': 'g', 'out': 't', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('g')}, {'in': 'ةُ', 'out': 'ɑu', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('ةُ')}])
     test_ipa = align_to_dummy_fallback(ipa_mapping, 'out')
     self.assertEqual(
         test_ipa.mapping,
         [{'in': 'æ', 'out': 'ɑ', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('æ')},
          {'in': 'ɐ', 'out': 'ɑ', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('ɐ')},
          {'in': 'ɑ̃', 'out': 'ɑ', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('ɑ̃')},
          {'in': 'β', 'out': 's', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('β')},
          {'in': 'ɡ', 'out': 't', 'context_before': '', 'context_after': '', 'match_pattern': re.compile('ɡ')}])
示例#3
0
def generate_mapping(in_lang, dummy, ipa, list_dummy, out_dir):
    ''' For specified IN_LANG, generate a mapping from IN_LANG-ipa to eng-ipa,
        or from IN_LANG-ipa to a dummy minimalist phone inventory.

        If you just modified or wrote the IN_LANG to IN_LANG-ipa mapping, don't forget
        to call "g2p update" first so "g2p generate-mapping" sees the latest version.

        Call "g2p update" again after calling "g2p generate-mapping" to make the new
        IN_LANG-ipa to eng-ipa mapping available.
    '''
    if not ipa and not dummy and not list_dummy:
        click.echo('You have to choose to generate either an IPA-based mapping or a dummy fallback mapping. Check the docs for more information.')
    if out_dir and (os.path.exists(os.path.join(out_dir, 'config.yaml')) or os.path.exists(os.path.join(out_dir, 'config.yaml'))):
        click.echo(
            f'There is already a mapping config file in \'{out_dir}\' \nPlease choose another path.')
        return
    if list_dummy:
        print("Dummy phone inventory: {}".format(DUMMY_INVENTORY))
    if ipa:
        check_ipa_known_segs([f'{in_lang}-ipa'])
        eng_ipa = Mapping(in_lang='eng-ipa', out_lang='eng-arpabet')
        new_mapping = Mapping(in_lang=in_lang, out_lang=f'{in_lang}-ipa')
        click.echo(f"Writing English IPA mapping for {in_lang} to file")
        create_mapping(new_mapping, eng_ipa,
                       write_to_file=True, out_dir=out_dir)
    if dummy:
        new_mapping = Mapping(in_lang=in_lang, out_lang=f'{in_lang}-ipa')
        click.echo(f"Writing dummy fallback mapping for {in_lang} to file")
        dummy_config, dummy_mapping = align_to_dummy_fallback(
            new_mapping, write_to_file=True, out_dir=out_dir)
示例#4
0
 def test_case_sensitive(self):
     mapping = Mapping([{"in": "A", "out": "b"}], case_sensitive=False)
     mapping_case_sensitive = Mapping([{"in": "A", "out": "b"}])
     transducer = Transducer(mapping)
     transducer_case_sensitive = Transducer(mapping_case_sensitive)
     self.assertEqual(transducer("a").output_string, "b")
     self.assertEqual(transducer_case_sensitive("a").output_string, "a")
     self.assertEqual(transducer("A").output_string, "b")
示例#5
0
 def test_case_sensitive(self):
     mapping = Mapping([{'in': 'A', "out": 'b'}], case_sensitive=False)
     mapping_case_sensitive = Mapping([{'in': 'A', "out": 'b'}])
     transducer = Transducer(mapping)
     transducer_case_sensitive = Transducer(mapping_case_sensitive)
     self.assertEqual(transducer('a').output_string, 'b')
     self.assertEqual(transducer_case_sensitive('a').output_string, 'a')
     self.assertEqual(transducer('A').output_string, 'b')
示例#6
0
 def test_reverse(self):
     mapping = Mapping([{'in': 'a', "out": 'b'}])
     mapping_reversed = Mapping([{'in': 'a', "out": 'b'}], reverse=True)
     transducer = Transducer(mapping)
     transducer_reversed = Transducer(mapping_reversed)
     self.assertEqual(transducer('a').output_string, 'b')
     self.assertEqual(transducer('b').output_string, 'b')
     self.assertEqual(transducer_reversed('a').output_string, 'a')
     self.assertEqual(transducer_reversed('b').output_string, 'a')
示例#7
0
 def test_escape_special(self):
     mapping = Mapping([{'in': '\d', "out": 'digit'}])
     mapping_escaped = Mapping([{'in': '\d', "out": 'b'}], escape_special=True)
     transducer = Transducer(mapping)
     transducer_escaped = Transducer(mapping_escaped)
     self.assertEqual(transducer('1'), 'digit')
     self.assertEqual(transducer('\d'), '\d')
     self.assertEqual(transducer_escaped('1'), '1')
     self.assertEqual(transducer_escaped('\d'), 'b')
示例#8
0
 def test_escape_special(self):
     mapping = Mapping([{"in": r"\d", "out": "digit"}])
     mapping_escaped = Mapping([{"in": r"\d", "out": "b"}], escape_special=True)
     transducer = Transducer(mapping)
     transducer_escaped = Transducer(mapping_escaped)
     self.assertEqual(transducer("1").output_string, "digit")
     self.assertEqual(transducer(r"\d").output_string, r"\d")
     self.assertEqual(transducer_escaped("1").output_string, "1")
     self.assertEqual(transducer_escaped(r"\d").output_string, "b")
示例#9
0
 def test_reverse(self):
     mapping = Mapping([{"in": "a", "out": "b"}])
     mapping_reversed = Mapping([{"in": "a", "out": "b"}], reverse=True)
     transducer = Transducer(mapping)
     transducer_reversed = Transducer(mapping_reversed)
     self.assertEqual(transducer("a").output_string, "b")
     self.assertEqual(transducer("b").output_string, "b")
     self.assertEqual(transducer_reversed("a").output_string, "a")
     self.assertEqual(transducer_reversed("b").output_string, "a")
示例#10
0
    def test_as_is(self):
        """
        Test deprecated config: as_is.
        """

        # explicitly set as_is=False
        log_output = io.StringIO()
        with redirect_stderr(log_output):
            mapping_sorted = Mapping([{
                'in': 'a',
                "out": 'b'
            }, {
                'in': 'aa',
                'out': 'c'
            }],
                                     as_is=False)
        self.assertTrue(mapping_sorted.wants_rules_sorted())
        self.assertIn("deprecated", log_output.getvalue(),
                      "it should warn that the feature is deprecated")
        self.assertIn("apply-longest-first", log_output.getvalue(),
                      "it should show the equivalent rule_ordering setting")

        # explicitly set as_is=True
        log_output = io.StringIO()
        with redirect_stderr(log_output):
            mapping = Mapping([{
                'in': 'a',
                "out": 'b'
            }, {
                'in': 'aa',
                'out': 'c'
            }],
                              as_is=True)
        self.assertFalse(mapping.wants_rules_sorted())
        self.assertIn("deprecated", log_output.getvalue(),
                      "it should warn that the feature is deprecated")
        self.assertIn("as-written", log_output.getvalue(),
                      "it should show the equivalent rule_ordering setting")

        # test the default (rule_ordering="as-written")
        mapping_as_is = Mapping([{
            'in': 'a',
            "out": 'b'
        }, {
            'in': 'aa',
            'out': 'c'
        }])
        self.assertFalse(mapping.wants_rules_sorted())

        # test the alternative (rule_ordering="apply-longest-first")
        transducer = Transducer(mapping_sorted)
        transducer_as_is = Transducer(mapping_as_is)
        self.assertEqual(transducer('aa').output_string, 'c')
        self.assertEqual(transducer_as_is('aa').output_string, 'bb')
示例#11
0
 def test_extend_and_deduplicate(self):
     mapping1 = Mapping(rules_from_strings("a:b", "c:d", "g:h"))
     mapping2 = Mapping(rules_from_strings("a:x", "c:d", "e:f"))
     extend_ref = Mapping(
         rules_from_strings("a:b", "c:d", "g:h", "a:x", "c:d", "e:f")
     )
     mapping1.extend(mapping2)
     self.assertEquals(mapping1.mapping, extend_ref.mapping)
     dedup_ref = Mapping(rules_from_strings("a:b", "c:d", "g:h", "a:x", "e:f"))
     mapping1.deduplicate()
     self.assertEquals(mapping1.mapping, dedup_ref.mapping)
示例#12
0
    def return_js_template(self, t_name_or_path: str) -> str:
        '''Given a transducer, create JavaScript string of that transducer.

        Args: 
            :param str t_name_or_path: name of transducer or path to transducer.
        '''
        name = self.return_transducer_name(t_name_or_path)
        transducer_js_template = '''\n\nmtd.transducers["{name}"] = (function() {{
                                        var correspondences = {cors};
                                        var keys = {keys};
                                        var regex = new RegExp("(" + keys.join("|") + ")", "g");
                                        return function(str) {{
                                            return str.replace(regex, function(a,b) {{
                                                return correspondences[a];
                                            }});
                                        }};
                                    }})();'''

        composite_js_template = u'''\n\nmtd.transducers["{name}"] = (function(){{
                                        var orths = {composite_transducers};
                                        return function(str) {{
                                            for (var i = 0; i < orths.length; i++) {{
                                                transducer = mtd.transducers[orths[i]];
                                                str = transducer(str);
                                            }}
                                            return str;
                                        }};
                                    }})();'''

        if "composite" in t_name_or_path:
            with open(t_name_or_path, encoding='utf8') as f:
                composite_transducers = json.load(f)
                return composite_js_template.format(
                    name=name, composite_transducers=composite_transducers)
        else:
            path = self.return_transducer_path(t_name_or_path)
            if not path and t_name_or_path in self.available_transducers:
                mapping = Mapping(**self.available_transducers[t_name_or_path])
            elif path.endswith('yaml'):
                cors = Mapping(self.return_transducer_path(t_name_or_path))
            else:
                cors = Mapping(
                    load_from_file(
                        self.return_transducer_path(t_name_or_path)))
            keys = sorted([cor['in'] for cor in cors.mapping],
                          key=len,
                          reverse=True)
            # js_cors = [{k:v for k,v in cor} for cor in cors]
            js_cors = [{cor['in']: cor['out']} for cor in cors.mapping]
            js_cors = dict(ChainMap(*js_cors))
            return transducer_js_template.format(name=name,
                                                 cors=js_cors,
                                                 keys=keys)
示例#13
0
def generate_mapping(in_lang, dummy, ipa):
    ''' Generate English mapping
    '''
    if not ipa and not dummy:
        click.echo('You have to choose to generate either an IPA-based mapping or a dummy fallback mapping. Check the docs for more information.')
    if ipa:
        eng_ipa = Mapping(in_lang='eng-ipa', out_lang='eng-arpabet')
        new_mapping = Mapping(in_lang=in_lang, out_lang=f'{in_lang}-ipa')
        click.echo(f"Writing English IPA mapping for {in_lang} to file")
        create_mapping(new_mapping, eng_ipa, write_to_file=True)
    if dummy:
        new_mapping = Mapping(in_lang=in_lang, out_lang=f'{in_lang}-ipa')
        click.echo(f"Writing dummy fallback mapping for {in_lang} to file")
        dummy_config, dummy_mapping = align_to_dummy_fallback(new_mapping, write_to_file=True)
示例#14
0
 def setUp(self):
     self.test_mapping_no_norm = Mapping([{
         'in': '\u00e1',
         'out': '\u00e1'
     }, {
         'in': '\u0061\u0301',
         'out': '\u0061\u0301'
     }],
                                         norm_form='none')
     self.test_mapping_norm = Mapping([{'in': '\u00e1', 'out': '\u00e1'}])
     with open(os.path.join(os.path.dirname(public_data),
                            'git_to_ipa.json'),
               encoding='utf8') as f:
         self.json_map = json.load(f)
示例#15
0
 def setUp(self):
     self.test_mapping_no_norm = Mapping(
         [
             {"in": "\u00e1", "out": "\u00e1"},
             {"in": "\u0061\u0301", "out": "\u0061\u0301"},
         ],
         norm_form="none",
     )
     self.test_mapping_norm = Mapping([{"in": "\u00e1", "out": "\u00e1"}])
     with open(
         os.path.join(os.path.dirname(public_data), "git_to_ipa.json"),
         encoding="utf8",
     ) as f:
         self.json_map = json.load(f)
示例#16
0
文件: __init__.py 项目: dhdaines/g2p
def change_table(message):
    """ Change the lookup table
    """
    if message['in_lang'] == 'custom' or message['out_lang'] == 'custom':
        mappings = Mapping(return_empty_mappings())
    else:
        mappings = Mapping(in_lang=message['in_lang'],
                           out_lang=message['out_lang'])
    emit(
        'table response', {
            'mappings': mappings.plain_mapping(),
            'abbs': expand_abbreviations(mappings.abbreviations),
            'kwargs': mappings.kwargs
        })
示例#17
0
    def test_norm_form(self):
        mapping_nfc = Mapping([{"in": "a\u0301", "out": "a"}])  # Defaults to NFC
        mapping_nfd = Mapping([{"in": "a\u0301", "out": "a"}], norm_form="NFD")
        mapping_none = Mapping([{"in": "a\u0301", "out": "a"}], norm_form=False)

        transducer_nfc = Transducer(mapping_nfc)
        transducer_nfd = Transducer(mapping_nfd)
        transducer_none = Transducer(mapping_none)

        self.assertEqual(transducer_nfc("a\u0301").output_string, "a")
        self.assertEqual(transducer_nfc("\u00E1").output_string, "a")
        self.assertEqual(transducer_nfd("a\u0301").output_string, "a")
        self.assertEqual(transducer_nfd("\u00E1").output_string, "a")
        self.assertEqual(transducer_none("a\u0301").output_string, "a")
        self.assertEqual(transducer_none("\u00E1").output_string, "\u00E1")
示例#18
0
    def test_norm_form(self):
        mapping_nfc = Mapping([{'in': 'a\u0301', "out": 'a'}]) # Defaults to NFC
        mapping_nfd = Mapping([{'in': 'a\u0301', "out": 'a'}], norm_form='NFD')
        mapping_none = Mapping([{'in': 'a\u0301', "out": 'a'}], norm_form=False)

        transducer_nfc = Transducer(mapping_nfc)
        transducer_nfd = Transducer(mapping_nfd)
        transducer_none = Transducer(mapping_none)

        self.assertEqual(transducer_nfc('a\u0301'), 'a')
        self.assertEqual(transducer_nfc('\u00E1'), 'a')
        self.assertEqual(transducer_nfd('a\u0301'), 'a')
        self.assertEqual(transducer_nfd('\u00E1'), 'a')
        self.assertEqual(transducer_none('a\u0301'), 'a')
        self.assertEqual(transducer_none('\u00E1'), '\u00E1')
示例#19
0
    def create_transducer_mapping(self,
                                  t_name_or_path: str) -> Callable[[str], str]:
        """ Gets mapping of transducer

        :param t_name_or_path: <string> path to transducer or default transducer
        """
        path = self.return_transducer_path(t_name_or_path)
        if t_name_or_path in self.available_transducers:
            mapping = Mapping(**self.available_transducers[t_name_or_path])
        elif path.endswith('yaml'):
            mapping = Mapping(self.return_transducer_path(t_name_or_path))
        else:
            mapping = Mapping(
                load_from_file(self.return_transducer_path(t_name_or_path)))
        return G2PTransducer(mapping)
示例#20
0
    def create_transducer_function(
            self, t_name_or_path: str) -> Callable[[str], str]:
        """ Creates function based on transducer

        :param t_name_or_path: <string> path to transducer or default transducer
        """
        path = self.return_transducer_path(t_name_or_path)
        if t_name_or_path in self.available_transducers:
            mapping = Mapping(**self.available_transducers[t_name_or_path])
        elif path.endswith('yaml'):
            mapping = Mapping(self.return_transducer_path(t_name_or_path))
        else:
            mapping = Mapping(
                load_from_file(self.return_transducer_path(t_name_or_path)))
        transducer = G2PTransducer(mapping)
        return lambda x: transducer(x).output_string
示例#21
0
 def test_json_map(self):
     json_map = Mapping(
         self.json_map["map"],
         **{k: v for k, v in self.json_map.items() if k != "map"}
     )
     self.assertEqual(len(json_map), 34)
     self.assertTrue(json_map.kwargs["in_metadata"]["case_insensitive"])
示例#22
0
 def create_transducer(mapping):
     if mapping:
         if isinstance(mapping, list):
             mapping_obj = Mapping(mapping)
         elif isinstance(mapping, str) and re.search(
                 r'.y(a)*ml\b', mapping):
             mapping_obj = Mapping(mapping)
         elif os.path.isfile(mapping):
             mapping_data = load_from_file(mapping)
             mapping_obj = Mapping(mapping_data)
         else:
             raise exceptions.MissingFileError(mapping)
         return Transducer(mapping_obj)
     else:
         mapping = str(mapping)
         raise exceptions.MissingFileError(mapping)
示例#23
0
 def test_unidecode_mapping(self):
     m = Mapping(type="unidecode")
     self.assertEqual(m.mapping, [])
     self.assertEqual(m.kwargs["type"], "unidecode")
     t = Transducer(m)
     tg = t("été Nunavut ᓄᓇᕗᑦ")
     self.assertEqual(tg.output_string, "ete Nunavut nonafot")
示例#24
0
 def test_basic_composition(self):
     mapping = Mapping([{"in": "a", "out": "b"}])
     transducer = Transducer(mapping)
     tg = transducer("abba")
     self.assertEqual(tg.output_string, "bbbb")
     self.assertEqual(tg.edges, [(0, 0), (1, 1), (2, 2), (3, 3)])
     self.assertEqual(tg.edges, compose_indices(tg.edges, tg.edges))
示例#25
0
def convert(message):
    """ Convert input text and return output
    """
    transducers = []
    for mapping in message['data']['mappings']:
        mappings_obj = Mapping(hot_to_mappings(mapping['mapping']),
                               abbreviations=flatten_abbreviations(
                                   mapping['abbreviations']),
                               **mapping['kwargs'])
        transducer = Transducer(mappings_obj)
        transducers.append(transducer)
    transducer = CompositeTransducer(transducers)
    if message['data']['index']:
        tg = transducer(message['data']['input_string'])
        data, links = return_echart_data(tg)
        emit(
            'conversion response', {
                'output_string': tg.output_string,
                'index_data': data,
                'index_links': links
            })
    else:
        output_string = transducer(
            message['data']['input_string']).output_string
        emit('conversion response', {'output_string': output_string})
示例#26
0
def create_mapping(mapping_1: Mapping,
                   mapping_2: Mapping,
                   mapping_1_io: str = 'out',
                   mapping_2_io: str = 'in',
                   write_to_file: bool = False) -> Mapping:
    map_1_name = mapping_1.kwargs[f'{mapping_1_io}_lang']
    map_2_name = mapping_2.kwargs[f'{mapping_2_io}_lang']
    if not is_ipa(map_1_name) and not is_xsampa(map_1_name):
        LOGGER.warning(
            "Unsupported orthography of inventory 1: %s"
            " (must be ipa or x-sampa)", map_1_name)
    if not is_ipa(map_2_name) and not is_xsampa(map_2_name):
        LOGGER.warning(
            "Unsupported orthography of inventory 2: %s"
            " (must be ipa or x-sampa)", map_2_name)
    l1_is_xsampa, l2_is_xsampa = is_xsampa(map_1_name), is_xsampa(map_2_name)
    mapping = align_inventories(mapping_1.inventory(mapping_1_io),
                                mapping_2.inventory(mapping_2_io),
                                l1_is_xsampa, l2_is_xsampa)

    l1_display_name = mapping_1.kwargs.get(
        'language_name', 'No Language display name in Config')
    l2_display_name = mapping_2.kwargs.get(
        'language_name', 'No Language display name in Config')

    config = generate_config(map_1_name, map_2_name, l1_display_name,
                             l2_display_name)

    if write_to_file:
        write_generated_mapping_to_file(config, mapping)

    return Mapping(mapping,
                   **{k: v
                      for k, v in config.items() if k != 'mapping'})
示例#27
0
 def test_json_map(self):
     json_map = Mapping(
         self.json_map['map'],
         **{k: v
            for k, v in self.json_map.items() if k != 'map'})
     self.assertEqual(len(json_map), 34)
     self.assertTrue(json_map.kwargs['in_metadata']['case_insensitive'])
示例#28
0
 def test_basic_composition(self):
     """Indices mapped through a two-step basic composition"""
     mapping = Mapping([{"in": "a", "out": "b"}])
     transducer = Transducer(mapping)
     tg = transducer("abba")
     self.assertEqual(tg.output_string, "bbbb")
     self.assertEqual(tg.edges, [(0, 0), (1, 1), (2, 2), (3, 3)])
def align_to_dummy_fallback(mapping: Mapping, io: str = 'in', write_to_file: bool = False, out_dir: str = ''):
    display_name = mapping.kwargs.get('language_name', 'No Language display name in Config')
    config = {'in_lang': mapping.kwargs[f'{io}_lang'], 'out_lang': 'dummy'}
    default_char = 't'
    if is_ipa(mapping.kwargs[f'{io}_lang']):
        mapping = align_inventories(mapping.inventory(io), DUMMY_INVENTORY)
    else:
        und_g2p = make_g2p('und', 'und-ipa')
        mapping = [{"in": unicode_escape(x), "out": und_g2p(unidecode(x).lower()).output_string} for x in mapping.inventory(io)]
        dummy_list = align_inventories([x['out'] for x in mapping], DUMMY_INVENTORY)
        dummy_dict = {}
        for x in dummy_list:
            if x['in']:
                dummy_dict[x['in']] = x['out']
                
        for x in mapping:
            try:
                x['out'] = dummy_dict[x['out']]
            except KeyError:
                LOGGER.warn(f"We couldn't guess at what {x['in']} means, so it's being replaced with '{default_char}' instead.")
                x['out'] = default_char       

    config['mapping'] = mapping
    mapping = Mapping(**config)
    if write_to_file:
        if out_dir:
            if os.path.isdir(out_dir):
                mapping.config_to_file(out_dir)
                mapping.mapping_to_file(out_dir)
            else:
                LOGGER.warning(f'{out_dir} is not a directory. Writing to default instead.')
        else:
            mapping.config_to_file()
            mapping.mapping_to_file()
    return mapping
示例#30
0
 def test_distance_errors(self):
     src_mappings = [{"in": "ᐃ", "out": "i"}]
     src_mapping = Mapping(src_mappings, in_lang="crj", out_lang="crj-ipa")
     # Exercise looking up distances in the known list
     with self.assertRaises(ValueError):
         mapping = create_mapping(src_mapping,
                                  self.target_mapping,
                                  distance="not_a_distance")
     with self.assertRaises(ValueError):
         mapping = create_multi_mapping(
             [(src_mapping, "out")],
             [(self.target_mapping, "in")],
             distance="not_a_distance",
         )
     # White box testing: monkey-patch an invalid distance to validate the
     # second way we make sure distances are supported
     DISTANCE_METRICS.append("not_a_real_distance")
     with self.assertRaises(ValueError):
         mapping = create_mapping(src_mapping,
                                  self.target_mapping,
                                  distance="not_a_real_distance")
     with self.assertRaises(ValueError):
         mapping = create_multi_mapping(
             [(src_mapping, "out")],
             [(self.target_mapping, "in")],
             distance="not_a_real_distance",
         )
     DISTANCE_METRICS.pop()