Пример #1
0
 def test_split_using_dictionary_key_one_single(self):
     d = dict()
     d[u"a"] = 1
     d[u"b"] = 2
     d[u"c"] = 4
     values = [
         (None, None),
         (u"", []),
         (u"aza", [u"a", u"z", u"a"]),
         (u"aaba", [u"a", u"a", u"b", u"a"]),
         (u"acaba", [u"a", u"c", u"a", u"b", u"a"]),
     ]
     for v, e in values:
         self.assertEqual(split_using_dictionary(v, d, 1, single_char_parsing=True), e)
Пример #2
0
 def test_split_using_dictionary_key_one_single(self):
     d = dict()
     d[u"a"] = 1
     d[u"b"] = 2
     d[u"c"] = 4
     values = [
         (None, None),
         (u"", []),
         (u"aza", [u"a", u"z", u"a"]),
         (u"aaba", [u"a", u"a", u"b", u"a"]),
         (u"acaba", [u"a", u"c", u"a", u"b", u"a"]),
     ]
     for v, e in values:
         self.assertEqual(
             split_using_dictionary(v, d, 1, single_char_parsing=True), e)
Пример #3
0
 def test_split_using_dictionary(self):
     d = dict()
     d[u"a"] = 1
     d[u"ba"] = 2
     d[u"b"] = 3
     d[u"c"] = 4
     d[u"ca"] = 5
     values = [
         (None, None),
         (u"", []),
         (u"aza", [u"a", u"z", u"a"]),
         (u"aaba", [u"a", u"a", u"ba"]),
         (u"acaba", [u"a", u"ca", u"ba"]),
     ]
     for v, e in values:
         self.assertEqual(split_using_dictionary(v, d, 2, single_char_parsing=False), e)
Пример #4
0
    def can_map_ipa_string(self, ipa_string):
        """
        Return ``True`` if the mapper can map all the IPA characters
        in the given IPA string.

        :param IPAString ipa_string: the IPAString to be parsed
        :rtype: bool
        """
        canonical = [(c.canonical_representation, ) for c in ipa_string]
        split = split_using_dictionary(canonical,
                                       self,
                                       self.max_key_length,
                                       single_char_parsing=False)
        for sub in split:
            if not sub in self.ipa_canonical_representation_to_mapped_str:
                return False
        return True
Пример #5
0
 def test_split_using_dictionary(self):
     d = dict()
     d[u"a"] = 1
     d[u"ba"] = 2
     d[u"b"] = 3
     d[u"c"] = 4
     d[u"ca"] = 5
     values = [
         (None, None),
         (u"", []),
         (u"aza", [u"a", u"z", u"a"]),
         (u"aaba", [u"a", u"a", u"ba"]),
         (u"acaba", [u"a", u"ca", u"ba"]),
     ]
     for v, e in values:
         self.assertEqual(
             split_using_dictionary(v, d, 2, single_char_parsing=False), e)
Пример #6
0
    def map_ipa_string(self,
                       ipa_string,
                       ignore=False,
                       return_as_list=False,
                       return_can_map=False):
        """
        Convert the given IPAString to a string
        containing the corresponding ASCII IPA representation.

        :param IPAString ipa_string: the IPAString to be parsed
        :param bool ignore: if ``True``, ignore Unicode characters that are not IPA valid
        :param bool return_as_list: if ``True``, return as a list of strings, one for each IPAChar,
                                    instead of their concatenation (single str)
        :param bool return_can_map: if ``True``, return a pair ``(bool, str)``, where the first element
                                    says if the mapper can map all the IPA characters in the given IPA string,
                                    and the second element is either ``None`` or the mapped string/list
        :rtype: str or (bool, str) or (bool, list)
        """
        acc = []
        can_map = True
        canonical = [(c.canonical_representation, ) for c in ipa_string]
        split = split_using_dictionary(canonical,
                                       self,
                                       self.max_key_length,
                                       single_char_parsing=False)
        for sub in split:
            try:
                acc.append(
                    self.ipa_canonical_representation_to_mapped_str[sub])
            except KeyError:
                if ignore:
                    can_map = False
                else:
                    raise ValueError(
                        "The IPA string contains an IPA character that is not mapped: %s"
                        % sub)
        mapped = acc if return_as_list else u"".join(acc)
        if return_can_map:
            return (can_map, mapped)
        return mapped