Exemplo n.º 1
0
    def get_sourcestr_representation(self):
        """
                DCharacterLAT.get_sourcestr_representation

                Return a string.
        """

        #.......................................................................
        # unknown char ? Nothing to do :
        #.......................................................................
        if self.unknown_char:
            if self.dstring_object.options["anonymize the unknown characters"] == 'yes':
                return UNKNOWN_CHAR_SYMBOL
            else:
                return self.base_char

        #.......................................................................
        # ok, the function can analyse <self> :
        #.......................................................................
        res = []

        if self.base_char is not None:
            if self.punctuation:
                # punctuation symbol :
                res.append( self.base_char )
            elif not self.capital_letter:
                # lower case :
                res.append( SYMB_LOWER_CASE.get_default_symbol(self.base_char) )
            else:
                # upper case :
                res.append( SYMB_UPPER_CASE.get_default_symbol(self.base_char) )

        if self.stress:
            res.append( DEFAULTSYMB__STRESS )

        if self.length == 'short' or self.length == 'long':
            res.append( SYMB_DIACRITICS.get_default_symbol(self.length) )

        if self.diaeresis:
            res.append( DEFAULTSYMB__DIAERESIS )

        res = "".join(res)

        # (1/2) composition with unicodedata.normalize :
        res = unicodedata.normalize('NFC', res)
        # (2/2) composition with COMPLETE_NORMALIZE_NFC :
        for src, dest in COMPLETE_NORMALIZE_NFC:
            res = res.replace(src, dest)

        return res