Пример #1
0
    def calc_offset(self, syllables_spaces: List[str]) -> Dict[int, int]:
        """
        Calculate a dictionary of accent positions from a list of syllables with spaces.

        :param syllables_spaces:
        :return:
        """
        line = string_utils.flatten(syllables_spaces)
        mydict = {}  # type: Dict[int, int]
        # #defaultdict(int) #type: Dict[int, int]
        for idx, syl in enumerate(syllables_spaces):
            target_syllable = syllables_spaces[idx]
            skip_qu = string_utils.starts_with_qu(target_syllable)
            matches = list(self.syllable_matcher.finditer(target_syllable))
            for position, possible in enumerate(matches):
                if skip_qu:
                    skip_qu = False
                    continue
                (start, end) = possible.span()
                if target_syllable[start:end] in \
                        self.constants.VOWELS + self.constants.ACCENTED_VOWELS:
                    part = line[:len("".join(syllables_spaces[:idx]))]
                    offset = len(part) + start
                    if line[offset] not in self.constants.VOWELS + self.constants.ACCENTED_VOWELS:
                        LOG.error("Problem at line {} offset {}".format(
                            line, offset))
                    mydict[idx] = offset
        return mydict
Пример #2
0
    def produce_scansion(self, stresses: list, syllables_wspaces: List[str],
                         offset_map: Dict[int, int]) -> str:
        """
        Create a scansion string that has stressed and unstressed syllable positions in locations
        that correspond with the original texts syllable vowels.

        :param stresses list of syllable positions
        :param syllables_wspaces list of syllables with spaces escaped for punctuation or elision
        :param offset_map dictionary of syllable positions, and an offset amount which is the
        number of spaces to skip in the original line before inserting the accent.
        """
        scansion = list(" " * len(string_utils.flatten(syllables_wspaces)))
        unstresses = string_utils.get_unstresses(stresses,
                                                 len(syllables_wspaces))
        try:
            for idx in unstresses:
                location = offset_map.get(idx)
                if location is not None:
                    scansion[location] = self.constants.UNSTRESSED
            for idx in stresses:
                location = offset_map.get(idx)
                if location is not None:
                    scansion[location] = self.constants.STRESSED
        except Exception as e:
            LOG.error(
                "problem with syllables; check syllabification {}, {}".format(
                    syllables_wspaces, e))
        return "".join(scansion)
Пример #3
0
    def produce_scansion(self, stresses: list, syllables_wspaces: List[str],
                         offset_map: Dict[int, int]) -> str:
        """
        Create a scansion string that has stressed and unstressed syllable positions in locations
        that correspond with the original texts syllable vowels.

        :param stresses list of syllable positions
        :param syllables_wspaces list of syllables with spaces escaped for punctuation or elision
        :param offset_map dictionary of syllable positions, and an offset amount which is the
        number of spaces to skip in the original line before inserting the accent.
        """
        scansion = list(" " * len(string_utils.flatten(syllables_wspaces)))
        unstresses = string_utils.get_unstresses(stresses, len(syllables_wspaces))
        try:
            for idx in unstresses:
                location = offset_map.get(idx)
                if location is not None:
                    scansion[location] = self.constants.UNSTRESSED
            for idx in stresses:
                location = offset_map.get(idx)
                if location is not None:
                    scansion[location] = self.constants.STRESSED
        except Exception as e:
            LOG.error("problem with syllables; check syllabification {}, {}".format(
                syllables_wspaces, e))
        return "".join(scansion)
Пример #4
0
    def calc_offset(self, syllables_spaces: List[str]) -> Dict[int, int]:
        """
        Calculate a dictionary of accent positions from a list of syllables with spaces.

        :param syllables_spaces:
        :return:
        """
        line = string_utils.flatten(syllables_spaces)
        mydict = {} # type: Dict[int, int]
        # #defaultdict(int) #type: Dict[int, int]
        for idx, syl in enumerate(syllables_spaces):
            target_syllable = syllables_spaces[idx]
            skip_qu = string_utils.starts_with_qu(target_syllable)
            matches = list(self.syllable_matcher.finditer(target_syllable))
            for position, possible in enumerate(matches):
                if skip_qu:
                    skip_qu = False
                    continue
                (start, end) = possible.span()
                if target_syllable[start:end] in \
                        self.constants.VOWELS + self.constants.ACCENTED_VOWELS:
                    part = line[:len("".join(syllables_spaces[:idx]))]
                    offset = len(part) + start
                    if line[offset] not in self.constants.VOWELS + self.constants.ACCENTED_VOWELS:
                        LOG.error("Problem at line {} offset {}".format(line, offset))
                    mydict[idx] = offset
        return mydict
Пример #5
0
 def paras(self, fileids=None):
     for para in super().paras(fileids):
         flat_para = flatten(para)
         skip = False
         if self.skip_keywords:
             for keyword in self.skip_keywords:
                 if keyword in flat_para:
                     skip = True
         if not skip:
             yield para
Пример #6
0
 def words(self, fileids=None):
     """
     Provide the words of the corpus; skipping any paragraphs flagged by keywords to the main
     class constructor
     :param fileids:
     :return: words, including punctuation, one by one
     """
     for para in self.paras(fileids):
         flat_para = flatten(para)
         skip = False
         if self.skip_keywords:
             for keyword in self.skip_keywords:
                 if keyword in flat_para:
                     skip = True
         if not skip:
             for word in flat_para:
                 yield word
Пример #7
0
 def paras(self, fileids=None) -> Generator[str, str, None]:
     """
     Provide paragraphs, if possible
     :param fileids:
     :return: a generator of paragraphs
     """
     if not fileids:
         fileids = self.fileids()
     for para in super().paras(fileids):
         flat_para = flatten(para)
         skip = False
         if self.skip_keywords:
             for keyword in self.skip_keywords:
                 if keyword in flat_para:
                     skip = True
         if not skip:
             yield para
Пример #8
0
 def paras(self, fileids=None) -> Generator[str, str, None]:
     """
     Provide paragraphs, if possible
     :param fileids:
     :return: a generator of paragraphs
     """
     if not fileids:
         fileids = self.fileids()
     for para in super().paras(fileids):
         flat_para = flatten(para)
         skip = False
         if self.skip_keywords:
             for keyword in self.skip_keywords:
                 if keyword in flat_para:
                     skip = True
         if not skip:
             yield para
Пример #9
0
 def words(self, fileids=None) -> Generator[str, str, None]:
     """
     Provide the words of the corpus; skipping any paragraphs flagged by keywords to the main
     class constructor
     :param fileids:
     :return: words, including punctuation, one by one
     """
     if not fileids:
         fileids = self.fileids()
     for para in self.paras(fileids):
         flat_para = flatten(para)
         skip = False
         if self.skip_keywords:
             for keyword in self.skip_keywords:
                 if keyword in flat_para:
                     skip = True
         if not skip:
             for word in flat_para:
                 yield word