Python extract_diacritic示例，greek_accentuation.characters.extract_diacritic Python示例

示例#1

0

显示文件

文件： transcription.py 项目： kylepjohnson/cltkv1

    def __init__(self, ipa_ch):

        # Additions to greek_accentuation.characters for use in this class:
        IPA_CIRCUMFLEX = "\u0302"  # ˆ, the IPA tonal notation for ῀
        tones = chars.extract_diacritic(chars.ACUTE, IPA_CIRCUMFLEX)
        # Collects IPA tonal diacritics
        clear_tones = chars.remove_diacritic(chars.ACUTE, IPA_CIRCUMFLEX)
        # Clears IPA tonal diacritics

        # eventually exported to output string
        self.ipa = unicodedata.normalize("NFC", ipa_ch)
        # without IPA diacritics
        self.bare = unicodedata.normalize("NFC", clear_tones(ipa_ch))
        # selects the IPA diacritics
        self.tone = tones(ipa_ch)
        # will be assigned once in Word, as the pre-context of this phone
        self.left = ""
        # .... as the post-context of this phone
        self.right = ""

        # bundle of features, stored as booleans:
        self.vce = self.bare in IPA["voiced"]
        self.lab = self.bare in IPA["labial"]
        self.cor = self.bare in IPA["coronal"]
        self.vel = self.bare in IPA["velar"]
        self.nas = self.bare in IPA["nasal"]
        self.app = self.bare in IPA["approximant"]
        self.cont = self.bare in IPA["continuant"]
        self.vow = self.bare in IPA["vowel"]
        self.hi = self.bare in IPA["high"]
        self.lo = self.bare in IPA["low"]
        self.fr = self.bare in IPA["front"]
        self.bk = self.bare in IPA["back"]
        self.bound = self.bare in IPA["boundary"]

示例#2

0

显示文件

文件： transcription.py 项目： TylerKirby/cltk

    def __init__(self, ipa_ch):

        # Additions to greek_accentuation.characters for use in this class:
        IPA_CIRCUMFLEX = "\u0302"  # ˆ, the IPA tonal notation for ῀
        tones = chars.extract_diacritic(chars.ACUTE, IPA_CIRCUMFLEX)  
        # Collects IPA tonal diacritics
        clear_tones = chars.remove_diacritic(chars.ACUTE, IPA_CIRCUMFLEX)  
        # Clears IPA tonal diacritics

        # eventually exported to output string
        self.ipa = unicodedata.normalize('NFC', ipa_ch)
        # without IPA diacritics
        self.bare = unicodedata.normalize('NFC', clear_tones(ipa_ch))
        # selects the IPA diacritics  
        self.tone = tones(ipa_ch) 
        # will be assigned once in Word, as the pre-context of this phone 
        self.left = ""  
        # .... as the post-context of this phone
        self.right = ""  

        # bundle of features, stored as booleans:
        self.vce = self.bare in IPA['voiced']
        self.lab = self.bare in IPA['labial']
        self.cor = self.bare in IPA['coronal']
        self.vel = self.bare in IPA['velar']
        self.nas = self.bare in IPA['nasal']
        self.app = self.bare in IPA['approximant']
        self.cont = self.bare in IPA['continuant']
        self.vow = self.bare in IPA['vowel']
        self.hi = self.bare in IPA['high']
        self.lo = self.bare in IPA['low']
        self.fr = self.bare in IPA['front']
        self.bk = self.bare in IPA['back']
        self.bound = self.bare in IPA['boundary']

示例#3

0

显示文件

文件： transcription.py 项目： kylepjohnson/cltkv1

    def _parse_diacritics(self, ch):
        # Returns a string with seperated and organized diacritics
        # for easier access later.
        # EG: input with base α -> α/ACCENT/ETC/
        # (where ETC includes diaeresis, iota subscripts, and macrons)

        # Additions to greek_accentuation.characters for use here:
        marked_breathing = chars.extract_diacritic(chars.ROUGH)
        # (Don't need SMOOTH for these purposes)
        marked_accents = chars.extract_diacritic(chars.ACUTE, chars.CIRCUMFLEX)
        # (Don't need GRAVE for these purposes)
        marked_length = chars.extract_diacritic(chars.LONG)
        # (Don't need SHORT for these purposes)

        h = marked_breathing(ch)
        acc = marked_accents(ch)
        etc = [
            chars.diaeresis(ch),
            chars.iota_subscript(ch),
            marked_length(ch)
        ]

        out = chars.base(ch).lower()  # Initialize out as base of character.

        if h != None and out != "ρ":  # If any rough breathing, and not rho
            out = "h///" + out  # insert an h/// before the base.
            # ('aspirated' rhos can be ignored,
            # and dealt with seperately.)

        out += "/"  # Create 1st boundary

        if acc != None:  # If any accent, place between 1st and 2nd boundary
            out += acc

        out += "/"  # Create 2nd boundary

        for c in [c for c in etc if c != None]:  # If any other diacritics,
            out += c  # place between second and final boundary

        out += "/"  # Create final boundary

        return out

示例#4

0

显示文件

文件： transcription.py 项目： TylerKirby/cltk

    def _parse_diacritics(self, ch):
        # Returns a string with seperated and organized diacritics
        # for easier access later.
        # EG: input with base α -> α/ACCENT/ETC/
        # (where ETC includes diaeresis, iota subscripts, and macrons)

        # Additions to greek_accentuation.characters for use here:
        marked_breathing = chars.extract_diacritic(chars.ROUGH)  
        # (Don't need SMOOTH for these purposes)
        marked_accents = chars.extract_diacritic(
            chars.ACUTE, chars.CIRCUMFLEX
        )  
        # (Don't need GRAVE for these purposes)
        marked_length = chars.extract_diacritic(chars.LONG)  
        # (Don't need SHORT for these purposes)

        h = marked_breathing(ch)
        acc = marked_accents(ch)
        etc = [
        chars.diaeresis(ch), chars.iota_subscript(ch), marked_length(ch)
        ]

        out = chars.base(ch).lower()  # Initialize out as base of character.

        if h != None and out != "ρ":  # If any rough breathing, and not rho
            out = "h///" + out  # insert an h/// before the base.
            # ('aspirated' rhos can be ignored,
            # and dealt with seperately.)

        out += "/"  # Create 1st boundary

        if acc != None:  # If any accent, place between 1st and 2nd boundary
            out += acc

        out += "/"  # Create 2nd boundary

        for c in [c for c in etc if c != None]:  # If any other diacritics, 
            out += c  # place between second and final boundary

        out += "/"  # Create final boundary

        return out