Пример #1
0
def get_transcriptor(lang="es_ES", alphabet="IPA",
                     syllabic_separator=u".", stress_mark=u"'",
                     word_separator=u"|"):
    """
    Return a `Transcriptor` object

    :param lang: string with the ISO 639-1 code or IETF language tag of `text`
    :param alphabet: string with the name of the phonetic alphabet to use
    :param syllabic_separator: string with the syllabic separator character
    :param stress_mark: string to mark the stress in words
    :param word_separator: string with the word separator character
    :return: a `Transcriptor` object
    """
    if not syllabic_separator:
        syllabic_separator = u"."
    alphabet = alphabet.lower()
    # Language identification
    if lang.lower() in ("es", "es_es"):
        lang = "es_ES"
        hyphenator = get_hyphenator(lang)
        module = import_module("stevens.languages.es.castillian")
        transcriptor = module.Transcriptor(
            hyphenator=hyphenator,
            syllabic_separator=syllabic_separator,
            word_separator=word_separator,
            alphabet=alphabet,
            stress_mark=stress_mark,
        )
        return transcriptor
    else:
        raise NotLanguageSupported(lang)
Пример #2
0
 def __init__(self, *args, **kwargs):
     super(Transcriptor, self).__init__(*args, **kwargs)
     if not self._hyphenator:
         self._hyphenator = get_hyphenator("es_ES")
     self._punctuation = re.compile(r"[ \.,\?\!¡¿\n\t]+")
     self._grave = re.compile(u'[aeiouns]')
     self._irregular = re.compile(u'[áéíóú]')
     self._double_consonants = {u'rr': u'R', u'll': u'ʎ', u'ch': u'ʧ',
                                u'gu': u'g', u'qu': u'q'}
     self._double_consonants_set = set(self._double_consonants.keys())
     self._nasals = u'mnñ'
     self._laterals = u'l'
     self._vowels = u'aeiouáéíóú'
     self._voiced = u'aeioubdglmnrRvw'
     self._voiced_consonants = u'bdglmnrRv'
     self._labiodentals = u'fv'
     self._coronals = u'dlrnstzʧ'
     self._palatals = u'yʎ'
     self._bilabials = u'bmp'
     self._velars = u'gjq'
     self._pause = u'‖'