def calc_offset(self, syllables_spaces: List[str]) -> Dict[int, int]: """ Calculate a dictionary of accent positions from a list of syllables with spaces. :param syllables_spaces: :return: """ line = string_utils.flatten(syllables_spaces) mydict = {} # type: Dict[int, int] # #defaultdict(int) #type: Dict[int, int] for idx, syl in enumerate(syllables_spaces): target_syllable = syllables_spaces[idx] skip_qu = string_utils.starts_with_qu(target_syllable) matches = list(self.syllable_matcher.finditer(target_syllable)) for position, possible in enumerate(matches): if skip_qu: skip_qu = False continue (start, end) = possible.span() if target_syllable[start:end] in \ self.constants.VOWELS + self.constants.ACCENTED_VOWELS: part = line[:len("".join(syllables_spaces[:idx]))] offset = len(part) + start if line[offset] not in self.constants.VOWELS + self.constants.ACCENTED_VOWELS: LOG.error("Problem at line {} offset {}".format( line, offset)) mydict[idx] = offset return mydict
def produce_scansion(self, stresses: list, syllables_wspaces: List[str], offset_map: Dict[int, int]) -> str: """ Create a scansion string that has stressed and unstressed syllable positions in locations that correspond with the original texts syllable vowels. :param stresses list of syllable positions :param syllables_wspaces list of syllables with spaces escaped for punctuation or elision :param offset_map dictionary of syllable positions, and an offset amount which is the number of spaces to skip in the original line before inserting the accent. """ scansion = list(" " * len(string_utils.flatten(syllables_wspaces))) unstresses = string_utils.get_unstresses(stresses, len(syllables_wspaces)) try: for idx in unstresses: location = offset_map.get(idx) if location is not None: scansion[location] = self.constants.UNSTRESSED for idx in stresses: location = offset_map.get(idx) if location is not None: scansion[location] = self.constants.STRESSED except Exception as e: LOG.error( "problem with syllables; check syllabification {}, {}".format( syllables_wspaces, e)) return "".join(scansion)
def produce_scansion(self, stresses: list, syllables_wspaces: List[str], offset_map: Dict[int, int]) -> str: """ Create a scansion string that has stressed and unstressed syllable positions in locations that correspond with the original texts syllable vowels. :param stresses list of syllable positions :param syllables_wspaces list of syllables with spaces escaped for punctuation or elision :param offset_map dictionary of syllable positions, and an offset amount which is the number of spaces to skip in the original line before inserting the accent. """ scansion = list(" " * len(string_utils.flatten(syllables_wspaces))) unstresses = string_utils.get_unstresses(stresses, len(syllables_wspaces)) try: for idx in unstresses: location = offset_map.get(idx) if location is not None: scansion[location] = self.constants.UNSTRESSED for idx in stresses: location = offset_map.get(idx) if location is not None: scansion[location] = self.constants.STRESSED except Exception as e: LOG.error("problem with syllables; check syllabification {}, {}".format( syllables_wspaces, e)) return "".join(scansion)
def calc_offset(self, syllables_spaces: List[str]) -> Dict[int, int]: """ Calculate a dictionary of accent positions from a list of syllables with spaces. :param syllables_spaces: :return: """ line = string_utils.flatten(syllables_spaces) mydict = {} # type: Dict[int, int] # #defaultdict(int) #type: Dict[int, int] for idx, syl in enumerate(syllables_spaces): target_syllable = syllables_spaces[idx] skip_qu = string_utils.starts_with_qu(target_syllable) matches = list(self.syllable_matcher.finditer(target_syllable)) for position, possible in enumerate(matches): if skip_qu: skip_qu = False continue (start, end) = possible.span() if target_syllable[start:end] in \ self.constants.VOWELS + self.constants.ACCENTED_VOWELS: part = line[:len("".join(syllables_spaces[:idx]))] offset = len(part) + start if line[offset] not in self.constants.VOWELS + self.constants.ACCENTED_VOWELS: LOG.error("Problem at line {} offset {}".format(line, offset)) mydict[idx] = offset return mydict
def paras(self, fileids=None): for para in super().paras(fileids): flat_para = flatten(para) skip = False if self.skip_keywords: for keyword in self.skip_keywords: if keyword in flat_para: skip = True if not skip: yield para
def words(self, fileids=None): """ Provide the words of the corpus; skipping any paragraphs flagged by keywords to the main class constructor :param fileids: :return: words, including punctuation, one by one """ for para in self.paras(fileids): flat_para = flatten(para) skip = False if self.skip_keywords: for keyword in self.skip_keywords: if keyword in flat_para: skip = True if not skip: for word in flat_para: yield word
def paras(self, fileids=None) -> Generator[str, str, None]: """ Provide paragraphs, if possible :param fileids: :return: a generator of paragraphs """ if not fileids: fileids = self.fileids() for para in super().paras(fileids): flat_para = flatten(para) skip = False if self.skip_keywords: for keyword in self.skip_keywords: if keyword in flat_para: skip = True if not skip: yield para
def words(self, fileids=None) -> Generator[str, str, None]: """ Provide the words of the corpus; skipping any paragraphs flagged by keywords to the main class constructor :param fileids: :return: words, including punctuation, one by one """ if not fileids: fileids = self.fileids() for para in self.paras(fileids): flat_para = flatten(para) skip = False if self.skip_keywords: for keyword in self.skip_keywords: if keyword in flat_para: skip = True if not skip: for word in flat_para: yield word