def produce_scansion(self, stresses: list, syllables_wspaces: list, offset_map: dict) -> str: """Create a scansion string that has stressed and unstressed syllable positions in locations that correspond with the original texts syllable vowels. :param stresses list of syllable positions :param syllables_wspaces list of syllables with spaces escaped for punctuation or elision :param offset_map dictionary of syllable positions, and an offset amount which is the number of spaces to skip in the original line before inserting the accent. """ scansion = list(" " * len(StringUtils.flatten(syllables_wspaces))) unstresses = StringUtils.get_unstresses(stresses, len(syllables_wspaces)) try: for idx in unstresses: location = offset_map[idx] if location is not None: scansion[location] = self.constants.UNSTRESSED for idx in stresses: location = offset_map[idx] if location is not None: scansion[location] = self.constants.STRESSED except Exception as e: print("problem with syllables; check syllabification %s %s" % (syllables_wspaces, e)) pass return "".join(scansion)
def __init__(self, constants=ScansionConstants(), syllabifier=Syllabifier(), optional_transform: bool = False, *args, **kwargs): super().__init__(*args, **kwargs) self.constants = constants self.remove_punct_map = StringUtils.remove_punctuation_dict() self.punctuation_substitutions = StringUtils.punctuation_for_spaces_dict() self.metrical_validator = MetricalValidator(constants) self.formatter = ScansionFormatter(constants) self.syllabifier = syllabifier self.optional_transform = optional_transform self.inverted_amphibrach_re = re.compile( r"{}\s*{}\s*{}".format(self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)) self.syllable_matcher = re.compile(r"[{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS + self.constants.LIQUIDS + self.constants.MUTES)) self.SPONDAIC_PENTAMETER = self.constants.SPONDEE + self.constants.SPONDEE + \ self.constants.STRESSED + self.constants.DACTYL + \ self.constants.DACTYL + self.constants.OPTIONAL_ENDING self.DACTYLIC_PENTAMETER = self.constants.DACTYL + self.constants.DACTYL + \ self.constants.STRESSED + self.constants.DACTYL + \ self.constants.DACTYL + self.constants.OPTIONAL_ENDING
def _move_consonant(self, letters: list, positions: list) -> list: """Given a list of consonant positions, move the consonants according to certain consonant syllable behavioral rules for gathering and grouping.""" for pos in positions: previous_letter = letters[pos - 1] consonant = letters[pos] next_letter = letters[pos + 1] if self._contains_vowels(next_letter) and self._starts_with_vowel(next_letter): return StringUtils.move_consonant_right(letters, [pos]) if self._contains_vowels(previous_letter) and self._ends_with_vowel( previous_letter) and len(previous_letter) == 1: return StringUtils.move_consonant_left(letters, [pos]) if previous_letter + consonant in self.constants.ASPIRATES: return StringUtils.move_consonant_left(letters, [pos]) if consonant + next_letter in self.constants.ASPIRATES: return StringUtils.move_consonant_right(letters, [pos]) if next_letter[0] == consonant: return StringUtils.move_consonant_left(letters, [pos]) if consonant in self.constants.MUTES and next_letter[0] in self.constants.LIQUIDS: return StringUtils.move_consonant_right(letters, [pos]) if consonant in ['k', 'K'] and next_letter[0] in ['w', 'W']: return StringUtils.move_consonant_right(letters, [pos]) if self._contains_consonants(next_letter[0]) and self._starts_with_vowel( previous_letter[-1]): return StringUtils.move_consonant_left(letters, [pos]) # fall through case if self._contains_consonants(next_letter[0]): return StringUtils.move_consonant_right(letters, [pos]) return letters
def _move_consonant(self, letters: list, positions: list) -> list: """Given a list of consonant positions, move the consonants according to certain consonant syllable behavioral rules for gathering and grouping.""" for pos in positions: previous_letter = letters[pos - 1] consonant = letters[pos] next_letter = letters[pos + 1] if self._contains_vowels(next_letter) and self._starts_with_vowel( next_letter): return StringUtils.move_consonant_right(letters, [pos]) if self._contains_vowels( previous_letter) and self._ends_with_vowel( previous_letter) and len(previous_letter) == 1: return StringUtils.move_consonant_left(letters, [pos]) if previous_letter + consonant in self.constants.ASPIRATES: return StringUtils.move_consonant_left(letters, [pos]) if consonant + next_letter in self.constants.ASPIRATES: return StringUtils.move_consonant_right(letters, [pos]) if next_letter[0] == consonant: return StringUtils.move_consonant_left(letters, [pos]) if consonant in self.constants.MUTES and next_letter[ 0] in self.constants.LIQUIDS: return StringUtils.move_consonant_right(letters, [pos]) if consonant in ['k', 'K'] and next_letter[0] in ['w', 'W']: return StringUtils.move_consonant_right(letters, [pos]) if self._contains_consonants( next_letter[0]) and self._starts_with_vowel( previous_letter[-1]): return StringUtils.move_consonant_left(letters, [pos]) # fall through case if self._contains_consonants(next_letter[0]): return StringUtils.move_consonant_right(letters, [pos]) return letters
def _setup(self, word) -> list: """Prepares a word for syllable processing. If the word starts with a prefix, process it separately.""" if len(word) == 1: return [word] for prefix in self.constants.PREFIXES: if word.startswith(prefix): (first, rest) = StringUtils.split_on(word, prefix) if self._contains_vowels(rest): return StringUtils.remove_blank_spaces( self._process(first) + self._process(rest)) # a word like pror can happen from ellision return StringUtils.remove_blank_spaces(self._process(word)) return StringUtils.remove_blank_spaces(self._process(word))
def __init__(self, constants=ScansionConstants(), syllabifier=Syllabifier(), **kwargs): self.constants = constants self.remove_punct_map = StringUtils.remove_punctuation_dict() self.punctuation_substitutions = StringUtils.punctuation_for_spaces_dict() self.metrical_validator = MetricalValidator(constants) self.formatter = ScansionFormatter(constants) self.syllabifier = syllabifier self.inverted_amphibrach_re = re.compile( r"{}\s*{}\s*{}".format(self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)) self.syllable_matcher = re.compile(r"[{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS + self.constants.LIQUIDS + self.constants.MUTES))
def get_syllable_count(self, syllables: list) -> int: """Counts the number of syllable groups that would occur after ellision. Often we will want preserve the position and separation of syllables so that they can be used to reconstitute a line, and apply stresses to the original word positions. However, we also want to be able to count the number of syllables accurately. >>> syllabifier = Syllabifier() >>> print(syllabifier.get_syllable_count([ ... 'Jām', 'tūm', 'c', 'au', 'sus', 'es', 'u', 'nus', 'I', 'ta', 'lo', 'rum'])) 11 """ tmp_syllables = copy.deepcopy(syllables) return len(StringUtils.remove_blank_spaces( StringUtils.move_consonant_right(tmp_syllables, self._find_solo_consonant(tmp_syllables))))
def correct_first_two_dactyls(self, scansion: str) -> str: """If a hexameter or pentameter starts with spondee, an unstressed syllable in the third position must actually be stressed, so we will convert it: - - | U -> - - | - And/or if the starting pattern is spondee + trochee + stressed, then the unstressed trochee can be corrected: - - | - u | - -> - - | - -| - :param scansion: :return: >>> print(VerseScanner().correct_first_two_dactyls( ... " - - U U - - U U U U U U - -")) # doctest: +NORMALIZE_WHITESPACE - - - - - - U U U U U U - - """ mark_list = StringUtils.mark_list(scansion) new_line = self.correct_invalid_start(scansion) raw_scansion = new_line.replace(" ", "") if raw_scansion.startswith(self.constants.SPONDEE + self.constants.TROCHEE + self.constants.STRESSED): new_scansion = list(self.constants.SPONDEE + self.constants.SPONDEE + self.constants.STRESSED + raw_scansion[5:]) corrected = "".join(new_scansion) new_sequence = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_sequence[mark_list[idx]] = car return "".join(new_sequence) return new_line
def accent_by_position(self, verse: str) -> str: """:param verse: a line of unaccented hexameter verse :return: the same line with vowels accented by position >>> print(HexameterScanner().accent_by_position( ... "Arma virumque cano, Troiae qui primus ab oris").lstrip()) Ārma virūmque canō Trojae quī primus ab oris """ line = verse.translate(self.punctuation_substitutions) line = self.transform_i_to_j(line) marks = list(line) # Vowels followed by 2 consonants # The digraphs ch, ph, th, qu and sometimes gu and su count as single consonants. # see http://people.virginia.edu/~jdk3t/epicintrog/scansion.htm marks = StringUtils.overwrite( marks, "[{}][{}][{}]".format(self.constants.VOWELS, self.constants.CONSONANTS, self.constants.CONSONANTS_WO_H), self.constants.STRESSED) # one space (or more for 'dropped' punctuation may intervene) marks = StringUtils.overwrite( marks, r"[{}][{}]\s*[{}]".format(self.constants.VOWELS, self.constants.CONSONANTS, self.constants.CONSONANTS_WO_H), self.constants.STRESSED) # ... if both consonants are in the next word, the vowel may be long # .... but it could be short if the vowel is not on the thesis/emphatic part of the foot # ... see Gildersleeve and Lodge p.446 marks = StringUtils.overwrite( marks, r"[{}]\s*[{}][{}]".format(self.constants.VOWELS, self.constants.CONSONANTS, self.constants.CONSONANTS_WO_H), self.constants.STRESSED) # x is considered as two letters marks = StringUtils.overwrite(marks, "[{}][xX]".format(self.constants.VOWELS), self.constants.STRESSED) # z is considered as two letters marks = StringUtils.overwrite( marks, r"[{}][zZ]".format(self.constants.VOWELS), self.constants.STRESSED) original_verse = list(line) for idx, word in enumerate(original_verse): if marks[idx] == self.constants.STRESSED: original_verse[idx] = self.constants.VOWELS_TO_ACCENTS[ original_verse[idx]] return "".join(original_verse)
def __init__(self, constants=ScansionConstants(), syllabifier=Syllabifier()): self.constants = constants self.remove_punct_map = StringUtils.remove_punctuation_dict() self.punctuation_substitutions = StringUtils.punctuation_for_spaces_dict( ) self.metrical_validator = MetricalValidator(constants) self.formatter = ScansionFormatter(constants) self.syllabifier = syllabifier self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format( self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)) self.syllable_matcher = re.compile( r"[{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS + self.constants.LIQUIDS + self.constants.MUTES))
def flag_dipthongs(self, syllables: list) -> list: """Return a list of syllables that contain a dipthong""" long_positions = [] for idx, syl in enumerate(syllables): for dipthong in self.constants.DIPTHONGS: if dipthong in syllables[idx]: if not StringUtils.starts_with_qu(syllables[idx]): long_positions.append(idx) return long_positions
def transform_i_to_j_optional(self, line: str) -> str: """Sometimes for the demands of meter a more permissive i to j transformation is warranted. :param line: :return: >>> print(HexameterScanner().transform_i_to_j_optional("Italiam")) Italjam >>> print(HexameterScanner().transform_i_to_j_optional("Lāvīniaque")) Lāvīnjaque >>> print(HexameterScanner().transform_i_to_j_optional("omnium")) omnjum """ words = line.split(" ") space_list = StringUtils.space_list(line) corrected_words = [] for word in words: found = False for prefix in self.constants.PREFIXES: if word.startswith(prefix) and word != prefix: corrected_words.append( self.syllabifier.convert_consonantal_i(prefix)) corrected_words.append( self.syllabifier.convert_consonantal_i( word[len(prefix):])) found = True break if not found: corrected_words.append( self.syllabifier.convert_consonantal_i(word)) new_line = StringUtils.join_syllables_spaces(corrected_words, space_list) # the following two may be tunable and subject to improvement char_list = StringUtils.overwrite( list(new_line), "[bcdfgjkmpqrstvwxzBCDFGHJKMPQRSTVWXZ][i][{}]".format( self.constants.VOWELS_WO_I), "j", 1) char_list = StringUtils.overwrite( char_list, "[{}][iI][{}]".format(self.constants.LIQUIDS, self.constants.VOWELS_WO_I), "j", 1) return "".join(char_list)
def _setup(self, word) -> list: """Prepares a word for syllable processing. If the word starts with a prefix, process it separately. """ if len(word) == 1: return [word] for prefix in self.constants.PREFIXES: if word.startswith(prefix): (first, rest) = StringUtils.split_on(word, prefix) if self._contains_vowels(rest): return StringUtils.remove_blank_spaces( self._process(first) + self._process(rest)) # a word like pror can happen from ellision return StringUtils.remove_blank_spaces(self._process(word)) if word in self.constants.UI_EXCEPTIONS.keys(): return self.constants.UI_EXCEPTIONS[word] return StringUtils.remove_blank_spaces(self._process(word))
def __init__(self, constants=ScansionConstants()): self.constants = constants self.consonant_matcher = re.compile("[{}]".format(constants.CONSONANTS)) self.vowel_matcher = re.compile( "[{}]".format(constants.VOWELS + constants.ACCENTED_VOWELS)) self.consonantal_i_matcher = re.compile( r"\b[iIīĪ][{}]".format(constants.VOWELS + constants.ACCENTED_VOWELS)) self.remove_punct_map = StringUtils.remove_punctuation_dict() self.kw_matcher = re.compile("[kK][w]") self.ACCEPTABLE_CHARS = constants.ACCENTED_VOWELS + constants.VOWELS + ' ' \ + constants.CONSONANTS
def __init__(self, constants=ScansionConstants()): self.constants = constants self.consonant_matcher = re.compile("[{}]".format(constants.CONSONANTS)) self.vowel_matcher = re.compile( "[{}]".format(constants.VOWELS + constants.ACCENTED_VOWELS)) self.consonantal_i_matcher = re.compile( r"\b[iIīĪ][{}]".format(constants.VOWELS + constants.ACCENTED_VOWELS)) self.remove_punct_map = StringUtils.remove_punctuation_dict() self.kw_matcher = re.compile("[kK][w]") self.ACCEPTABLE_CHARS = constants.ACCENTED_VOWELS + constants.VOWELS + ' ' \ + constants.CONSONANTS self.diphthongs = [d for d in constants.DIPTHONGS if d not in ["ui", "Ui", "uī"]]
def calc_offset(self, syllables_spaces: list) -> dict: """Calculate a dictionary of accent positions from a list of syllables with spaces.""" line = StringUtils.flatten(syllables_spaces) mydict = defaultdict(lambda: None) for idx, syl in enumerate(syllables_spaces): target_syllable = syllables_spaces[idx] skip_qu = StringUtils.starts_with_qu(target_syllable) matches = list(self.syllable_matcher.finditer(target_syllable)) for position, possible in enumerate(matches): if skip_qu: skip_qu = False continue (start, end) = possible.span() if target_syllable[start:end] in \ self.constants.VOWELS + self.constants.ACCENTED_VOWELS: part = line[:len("".join(syllables_spaces[:idx]))] offset = len(part) + start if line[offset] not in self.constants.VOWELS + self.constants.ACCENTED_VOWELS: print("Problem at line %s offset %s" % (line, offset)) mydict[idx] = offset return mydict
def calc_offset(self, syllables_spaces: list) -> dict: """Calculate a dictionary of accent positions from a list of syllables with spaces.""" line = StringUtils.flatten(syllables_spaces) mydict = defaultdict(lambda: None) for idx, syl in enumerate(syllables_spaces): target_syllable = syllables_spaces[idx] skip_qu = StringUtils.starts_with_qu(target_syllable) matches = list(self.syllable_matcher.finditer(target_syllable)) for position, possible in enumerate(matches): if skip_qu: skip_qu = False continue (start, end) = possible.span() if target_syllable[start:end] in \ self.constants.VOWELS + self.constants.ACCENTED_VOWELS: part = line[:len("".join(syllables_spaces[:idx]))] offset = len(part) + start if line[offset] not in self.constants.VOWELS + self.constants.ACCENTED_VOWELS: LOG.error("Problem at line {} offset {}".format(line, offset)) mydict[idx] = offset return mydict
def transform_i_to_j(self, line: str) -> str: """Transform instances of consonantal i to j :param line: :return: >>> print(HexameterScanner().transform_i_to_j("iactātus")) jactātus >>> print(HexameterScanner().transform_i_to_j("bracchia")) bracchia """ words = line.split(" ") space_list = StringUtils.space_list(line) corrected_words = [] for word in words: found = False for prefix in self.constants.PREFIXES: if word.startswith(prefix) and word != prefix: corrected_words.append( self.syllabifier.convert_consonantal_i(prefix)) corrected_words.append( self.syllabifier.convert_consonantal_i( word[len(prefix):])) found = True break if not found: corrected_words.append( self.syllabifier.convert_consonantal_i(word)) new_line = StringUtils.join_syllables_spaces(corrected_words, space_list) char_list = StringUtils.overwrite( list(new_line), r"\b[iī][{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS), "j") char_list = StringUtils.overwrite( char_list, r"\b[I][{}]".format(self.constants.VOWELS_WO_I), "J") char_list = StringUtils.overwrite( char_list, r"[{}][i][{}]".format(self.constants.VOWELS_WO_I, self.constants.VOWELS), "j", 1) return "".join(char_list)
def _process(self, word: str) -> list: """Process a word into a list of strings representing the syllables of the word. This method describes rules for consonant grouping behaviors and then iteratively applies those rules the list of letters that comprise the word, until all the letters are grouped into appropriate syllable groups.""" # if a blank arrives from splitting, just return an empty list if len(word.strip()) == 0: return [] word = self.convert_consonantal_i(word) my_word = " " + word + " " letters = list(my_word) positions = [] for dipth in self.diphthongs: if dipth in my_word: dipth_matcher = re.compile("{}".format(dipth)) matches = dipth_matcher.finditer(my_word) for match in matches: (start, end) = match.span() positions.append(start) matches = self.kw_matcher.finditer(my_word) for match in matches: (start, end) = match.span() positions.append(start) letters = StringUtils.merge_next(letters, positions) letters = StringUtils.remove_blanks(letters) positions.clear() if not self._contains_vowels("".join(letters)): return ["".join(letters).strip() ] # occurs when only 'qu' appears by ellision positions = self._starting_consonants_only(letters) while len(positions) > 0: letters = StringUtils.move_consonant_right(letters, positions) letters = StringUtils.remove_blanks(letters) positions = self._starting_consonants_only(letters) positions = self._ending_consonants_only(letters) while len(positions) > 0: letters = StringUtils.move_consonant_left(letters, positions) letters = StringUtils.remove_blanks(letters) positions = self._ending_consonants_only(letters) positions = self._find_solo_consonant(letters) while len(positions) > 0: letters = self._move_consonant(letters, positions) letters = StringUtils.remove_blanks(letters) positions = self._find_solo_consonant(letters) positions = self._find_consonant_cluster(letters) while len(positions) > 0: letters = self._move_consonant(letters, positions) letters = StringUtils.remove_blanks(letters) positions = self._find_consonant_cluster(letters) return letters
def _process(self, word: str) -> list: """Process a word into a list of strings representing the syllables of the word. This method describes rules for consonant grouping behaviors and then iteratively applies those rules the list of letters that comprise the word, until all the letters are grouped into appropriate syllable groups.""" # if a blank arrives from splitting, just return an empty list if len(word.strip()) == 0: return [] my_word = " " + word + " " letters = list(my_word) positions = [] for dipth in self.diphthongs: if dipth in my_word: dipth_matcher = re.compile("{}".format(dipth)) matches = dipth_matcher.finditer(my_word) for match in matches: (start, end) = match.span() positions.append(start) matches = self.kw_matcher.finditer(my_word) for match in matches: (start, end) = match.span() positions.append(start) letters = StringUtils.merge_next(letters, positions) letters = StringUtils.remove_blanks(letters) positions.clear() if not self._contains_vowels("".join(letters)): return ["".join(letters).strip()] # occurs when only 'qu' appears by ellision positions = self._starting_consonants_only(letters) while len(positions) > 0: letters = StringUtils.move_consonant_right(letters, positions) letters = StringUtils.remove_blanks(letters) positions = self._starting_consonants_only(letters) positions = self._ending_consonants_only(letters) while len(positions) > 0: letters = StringUtils.move_consonant_left(letters, positions) letters = StringUtils.remove_blanks(letters) positions = self._ending_consonants_only(letters) positions = self._find_solo_consonant(letters) while len(positions) > 0: letters = self._move_consonant(letters, positions) letters = StringUtils.remove_blanks(letters) positions = self._find_solo_consonant(letters) positions = self._find_consonant_cluster(letters) while len(positions) > 0: letters = self._move_consonant(letters, positions) letters = StringUtils.remove_blanks(letters) positions = self._find_consonant_cluster(letters) return letters
def make_dactyls(self, scansion: str) -> str: """If a pentameter line has 14 syllables, it starts and ends with double dactyls. >>> print(PentameterScanner().make_dactyls("U U U U U U U U U U U U U U")) - U U - U U - - U U - U U U """ mark_list = StringUtils.mark_list(scansion) vals = list(scansion.replace(" ", "")) new_vals = self.DACTYLIC_PENTAMETER[:-1] + vals[-1] corrected = "".join(new_vals) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line)
def make_spondaic(self, scansion: str) -> str: """If a pentameter line has 12 syllables, then it must start with double spondees. >>> print(PentameterScanner().make_spondaic("U U U U U U U U U U U U")) - - - - - - U U - U U U """ mark_list = StringUtils.mark_list(scansion) vals = list(scansion.replace(" ", "")) new_vals = self.SPONDAIC_PENTAMETER[:-1] + vals[-1] corrected = "".join(new_vals) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line)
def produce_scansion(self, stresses: list, syllables_wspaces: list, offset_map: dict) -> str: """Create a scansion string that has stressed and unstressed syllable positions in locations that correspond with the original texts syllable vowels. :param stresses list of syllable positions :param syllables_wspaces list of syllables with spaces escaped for punctuation or elision :param offset_map dictionary of syllable positions, and an offset amount which is the number of spaces to skip in the original line before inserting the accent. """ scansion = list(" " * len(StringUtils.flatten(syllables_wspaces))) unstresses = StringUtils.get_unstresses(stresses, len(syllables_wspaces)) try: for idx in unstresses: location = offset_map[idx] if location is not None: scansion[location] = self.constants.UNSTRESSED for idx in stresses: location = offset_map[idx] if location is not None: scansion[location] = self.constants.STRESSED except Exception as e: LOG.error("problem with syllables; check syllabification {}, {}".format( syllables_wspaces, e)) return "".join(scansion)
def transform_i_to_j_optional(self, line: str) -> str: """Sometimes for the demands of meter a more permissive i to j transformation is warranted. :param line: :return: >>> print(VerseScanner().transform_i_to_j_optional("Italiam")) Italjam >>> print(VerseScanner().transform_i_to_j_optional("Lāvīniaque")) Lāvīnjaque >>> print(VerseScanner().transform_i_to_j_optional("omnium")) omnjum """ words = line.split(" ") space_list = StringUtils.space_list(line) corrected_words = [] for word in words: found = False for prefix in self.constants.PREFIXES: if word.startswith(prefix) and word != prefix: corrected_words.append(self.syllabifier.convert_consonantal_i(prefix)) corrected_words.append( self.syllabifier.convert_consonantal_i(word[len(prefix):])) found = True break if not found: corrected_words.append(self.syllabifier.convert_consonantal_i(word)) new_line = StringUtils.join_syllables_spaces(corrected_words, space_list) # the following two may be tunable and subject to improvement char_list = StringUtils.overwrite(list(new_line), "[bcdfgjkmpqrstvwxzBCDFGHJKMPQRSTVWXZ][i][{}]".format( self.constants.VOWELS_WO_I), "j", 1) char_list = StringUtils.overwrite(char_list, "[{}][iI][{}]".format(self.constants.LIQUIDS, self.constants.VOWELS_WO_I), "j", 1) return "".join(char_list)
def transform_i_to_j(self, line: str) -> str: """Transform instances of consonantal i to j :param line: :return: >>> print(VerseScanner().transform_i_to_j("iactātus")) jactātus >>> print(VerseScanner().transform_i_to_j("bracchia")) bracchia """ words = line.split(" ") space_list = StringUtils.space_list(line) corrected_words = [] for word in words: found = False for prefix in self.constants.PREFIXES: if word.startswith(prefix) and word != prefix: corrected_words.append(self.syllabifier.convert_consonantal_i(prefix)) corrected_words.append( self.syllabifier.convert_consonantal_i(word[len(prefix):])) found = True break if not found: corrected_words.append(self.syllabifier.convert_consonantal_i(word)) new_line = StringUtils.join_syllables_spaces(corrected_words, space_list) char_list = StringUtils.overwrite(list(new_line), r"\b[iī][{}]".format( self.constants.VOWELS + self.constants.ACCENTED_VOWELS), "j") char_list = StringUtils.overwrite(char_list, r"\b[I][{}]".format(self.constants.VOWELS_WO_I), "J") char_list = StringUtils.overwrite(char_list, r"[{}][i][{}]".format( self.constants.VOWELS_WO_I, self.constants.VOWELS), "j", 1) return "".join(char_list)
def correct_invalid_start(self, scansion: str) -> str: """The third syllable of a hendecasyllabic line is long, so we will convert it :param scansion: :return: scansion string with corrected start >>> print(HendecasyllableScanner().correct_invalid_start( ... "- U U U U - U - U - U").strip()) - U - U U - U - U - U """ mark_list = StringUtils.mark_list(scansion) vals = list(scansion.replace(" ", "")) corrected = vals[:2] + [self.constants.STRESSED] + vals[3:] new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line)
def correct_penultimate_dactyl_chain(self, scansion: str) -> str: """For pentameter the last two feet of the verse are predictable dactyls, and do not regularly allow substitutions. :param scansion: scansion line thus far :return: corrected line of scansion >>> print(PentameterScanner().correct_penultimate_dactyl_chain( ... "U U U U U U U U U U U U U U")) U U U U U U U - U U - U U U """ mark_list = StringUtils.mark_list(scansion) vals = list(scansion.replace(" ", "")) n_vals = vals[:-7] + [self.constants.DACTYL + self.constants.DACTYL] + [vals[-1]] corrected = "".join(n_vals) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line)
def elide_all(self, line: str) -> str: """Given a string of space separated syllables, erase with spaces the syllable portions that would disappear according to the rules of elision.""" marks = list(line.translate(self.remove_punct_map)) all_vowels = self.constants.VOWELS + self.constants.ACCENTED_VOWELS tmp = "".join(marks) # Elision rules are compound but not cummulative: we place all elision edits into a list # of candidates, and then merge, taking the least of each section of the line. candidates = [tmp, self.elide(tmp, r"[{}][{}]\s+[{}]".format(self.constants.CONSONANTS, all_vowels, all_vowels), 1, 1), self.elide(tmp, r"[{}][{}]\s+[hH]".format(self.constants.CONSONANTS, all_vowels), 1, 1), self.elide(tmp, r"[aāuū]m\s+[{}]".format(all_vowels), 2), self.elide(tmp, r"ae\s+[{}]".format(all_vowels), 2), self.elide(tmp, r"[{}]\s+[{}]".format(all_vowels, all_vowels), 1), self.elide(tmp, r"[uū]m\s+h", 2)] results = StringUtils.merge_elisions(candidates) return results
def correct_invalid_start(self, scansion: str) -> str: """If a hexameter, hendecasyllables, or pentameter scansion starts with spondee, an unstressed syllable in the third position must actually be stressed, so we will convert it: - - | U -> - - | - :param scansion: :return: >>> print(VerseScanner().correct_invalid_start( ... " - - U U - - U U U U U U - -").strip()) - - - - - - U U U U U U - - """ mark_list = StringUtils.mark_list(scansion) raw_scansion = scansion.replace(" ", "") if raw_scansion.startswith(self.constants.SPONDEE + self.constants.UNSTRESSED): new_scansion = list(self.constants.SPONDEE + self.constants.SPONDEE + raw_scansion[4:]) corrected = "".join(new_scansion) new_sequence = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_sequence[mark_list[idx]] = car return "".join(new_sequence) return scansion
def correct_antepenult_chain(self, scansion: str) -> str: """For hendecasyllables the last three feet of the verse are predictable and do not regularly allow substitutions. :param scansion: scansion line thus far :return: corrected line of scansion >>> print(HendecasyllableScanner().correct_antepenult_chain( ... "-U -UU UU UU UX").strip()) -U -UU -U -U -X """ mark_list = StringUtils.mark_list(scansion) vals = list(scansion.replace(" ", "")) new_vals = vals[:len(vals) - 6] + [self.constants.TROCHEE + self.constants.TROCHEE + self.constants.STRESSED] + vals[-1:] corrected = "".join(new_vals) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line)
def correct_antepenult_chain(self, scansion: str) -> str: """For hendecasyllables the last three feet of the verse are predictable and do not regularly allow substitutions. :param scansion: scansion line thus far :return: corrected line of scansion >>> print(HendecasyllableScanner().correct_antepenult_chain( ... "-U -UU UU UU UX").strip()) -U -UU -U -U -X """ mark_list = StringUtils.mark_list(scansion) vals = list(scansion.replace(" ", "")) new_vals = vals[:len(vals) - 6] + [ self.constants.TROCHEE + self.constants.TROCHEE + self.constants.STRESSED ] + vals[-1:] corrected = "".join(new_vals) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line)
def elide_all(self, line: str) -> str: """Given a string of space separated syllables, erase with spaces the syllable portions that would disappear according to the rules of elision.""" marks = list(line.translate(self.remove_punct_map)) all_vowels = self.constants.VOWELS + self.constants.ACCENTED_VOWELS tmp = "".join(marks) # Elision rules are compound but not cummulative: we place all elision edits into a list # of candidates, and then merge, taking the least of each section of the line. candidates = [ tmp, self.elide( tmp, r"[{}][{}]\s+[{}]".format(self.constants.CONSONANTS, all_vowels, all_vowels), 1, 1), self.elide( tmp, r"[{}][{}]\s+[hH]".format(self.constants.CONSONANTS, all_vowels), 1, 1), self.elide(tmp, r"[aāuū]m\s+[{}]".format(all_vowels), 2), self.elide(tmp, r"ae\s+[{}]".format(all_vowels), 2), self.elide(tmp, r"[{}]\s+[{}]".format(all_vowels, all_vowels), 1), self.elide(tmp, r"[uū]m\s+h", 2) ] results = StringUtils.merge_elisions(candidates) return results
def scan(self, original_line: str, optional_transform: bool = False) -> Verse: """Scan a line of Latin pentameter and produce a scansion pattern, and other data. >>> scanner = PentameterScanner() >>> print(scanner.scan('ex hoc ingrato gaudia amore tibi.')) Verse(original='ex hoc ingrato gaudia amore tibi.', scansion='- - - - - - U U - U U U ', meter='pentameter', valid=True, syllable_count=12, accented='ēx hōc īngrātō gaudia amōre tibi.', scansion_notes=['Spondaic pentameter'], syllables = ['ēx', 'hoc', 'īn', 'gra', 'to', 'gau', 'di', 'a', 'mo', 're', 'ti', 'bi']) >>> print(scanner.scan( ... "in vento et rapida scribere oportet aqua.").scansion) # doctest: +NORMALIZE_WHITESPACE - - - U U - - U U - U U U """ verse = Verse(original_line, meter='pentameter') # replace punctuation with spaces line = original_line.translate(self.punctuation_substitutions) # conservative i to j line = self.transform_i_to_j(line) working_line = self.elide_all(line) working_line = self.accent_by_position(working_line) syllables = self.syllabifier.syllabify(working_line) if optional_transform: working_line = self.transform_i_to_j_optional(line) working_line = self.elide_all(working_line) working_line = self.accent_by_position(working_line) syllables = self.syllabifier.syllabify(working_line) verse.scansion_notes += [self.constants.NOTE_MAP["optional i to j"]] verse.working_line = working_line verse.syllable_count = self.syllabifier.get_syllable_count(syllables) verse.syllables = syllables if verse.syllable_count < 12: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["< 12p"]] return verse stresses = self.flag_dipthongs(syllables) syllables_wspaces = StringUtils.to_syllables_with_trailing_spaces(working_line, syllables) offset_map = self.calc_offset(syllables_wspaces) for idx, syl in enumerate(syllables): for accented in self.constants.ACCENTED_VOWELS: if accented in syl: stresses.append(idx) # first syllable is always long in Pentameter stresses.append(0) # second to last syllable is always long stresses.append(verse.syllable_count - 2) verse.scansion = self.produce_scansion(stresses, syllables_wspaces, offset_map) if len(StringUtils.stress_positions(self.constants.STRESSED, verse.scansion)) != \ len(set(stresses)): verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["invalid syllables"]] return verse if self.metrical_validator.is_valid_pentameter(verse.scansion): verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]] return self.assign_candidate(verse, verse.scansion) # identify some obvious and probably choices based on number of syllables if verse.syllable_count == 12: # produce spondees where possible candidate = self.make_spondaic(verse.scansion) verse.scansion_notes += [self.constants.NOTE_MAP["12p"]] return self.assign_candidate(verse, candidate) if verse.syllable_count == 14: # produce spondees where possible candidate = self.make_dactyls(verse.scansion) verse.scansion_notes += [self.constants.NOTE_MAP["14p"]] return self.assign_candidate(verse, candidate) if verse.syllable_count > 14: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["> 14"]] return verse smoothed = self.correct_first_two_dactyls(verse.scansion) if distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]] verse.scansion = smoothed stresses += StringUtils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_pentameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_penultimate_dactyl_chain(verse.scansion) if distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["penultimate dactyl chain"]] verse.scansion = smoothed stresses += StringUtils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_pentameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) candidates = self.metrical_validator.closest_pentameter_patterns(verse.scansion) if candidates is not None: if len(candidates) == 1 \ and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \ and len(StringUtils.differences(verse.scansion, candidates[0])) == 1: tmp_scansion = self.produce_scansion( StringUtils.differences(verse.scansion, candidates[0]), syllables_wspaces, offset_map) if self.metrical_validator.is_valid_pentameter(tmp_scansion): verse.scansion_notes += [self.constants.NOTE_MAP["closest match"]] return self.assign_candidate(verse, tmp_scansion) # if the line doesn't scan "as is", it may scan if the optional i to j transformations # are made, so here we set them and try again. if self.optional_transform and not verse.valid: return self.scan(original_line, optional_transform=True) verse.accented = self.formatter.merge_line_scansion(verse.original, verse.scansion) return verse
def correct_dactyl_chain(self, scansion: str) -> str: """Three or more unstressed accents in a row is a broken dactyl chain, best detected and processed backwards. Since this method takes a Procrustean approach to modifying the scansion pattern, it is not used by default in the scan method; however, it is available as an optional keyword parameter, and users looking to further automate the generation of scansion candidates should consider using this as a fall back. :param scansion: scansion with broken dactyl chain; inverted amphibrachs not allowed :return: corrected line of scansion >>> print(HexameterScanner().correct_dactyl_chain( ... "- U U - - U U - - - U U - x").strip()) - - - - - U U - - - U U - x >>> print(HexameterScanner().correct_dactyl_chain( ... "- U U U U - - - - - U U - U").strip()) - - - U U - - - - - U U - U """ mark_list = StringUtils.mark_list(scansion) vals = list(scansion.replace(" ", "")) # ignore last two positions, save them feet = [vals.pop(), vals.pop()] length = len(vals) idx = length - 1 while idx > 0: one = vals[idx] two = vals[idx - 1] if idx > 1: three = vals[idx - 2] else: three = "" # Dactyl foot is okay, no corrections if one == self.constants.UNSTRESSED and \ two == self.constants.UNSTRESSED and \ three == self.constants.STRESSED: feet += [one] feet += [two] feet += [three] idx -= 3 continue # Spondee foot is okay, no corrections if one == self.constants.STRESSED and \ two == self.constants.STRESSED: feet += [one] feet += [two] idx -= 2 continue # handle "U U U" foot as "- U U" if one == self.constants.UNSTRESSED and \ two == self.constants.UNSTRESSED and \ three == self.constants.UNSTRESSED: feet += [one] feet += [two] feet += [self.constants.STRESSED] idx -= 3 continue # handle "U U -" foot as "- -" if one == self.constants.STRESSED and \ two == self.constants.UNSTRESSED and \ three == self.constants.UNSTRESSED: feet += [self.constants.STRESSED] feet += [self.constants.STRESSED] idx -= 2 continue # handle "- U" foot as "- -" if one == self.constants.UNSTRESSED and \ two == self.constants.STRESSED: feet += [self.constants.STRESSED] feet += [two] idx -= 2 continue corrected = "".join(feet[::-1]) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line)
def scan(self, original_line: str, optional_transform: bool = False, dactyl_smoothing: bool = False) -> Verse: """Scan a line of Latin hexameter and produce a scansion pattern, and other data. >>> scanner = HexameterScanner() >>> print(scanner.scan("impulerit. Tantaene animis caelestibus irae?")) Verse(original='impulerit. Tantaene animis caelestibus irae?', scansion='- U U - - - U U - - - U U - - ', meter='hexameter', valid=True, syllable_count=15, accented='īmpulerīt. Tāntaene animīs caelēstibus īrae?', scansion_notes=['Valid by positional stresses.'], syllables = ['īm', 'pu', 'le', 'rīt', 'Tān', 'taen', 'a', 'ni', 'mīs', 'cae', 'lēs', 'ti', 'bus', 'i', 'rae']) >>> print(scanner.scan( ... "Arma virumque cano, Troiae qui prīmus ab ōrīs").scansion) # doctest: +NORMALIZE_WHITESPACE - U U - U U - - - - - U U - - >>> # some hexameters need the optional transformations: >>> optional_transform_scanner = HexameterScanner(optional_transform=True) >>> print(optional_transform_scanner.scan( ... "Ītaliam, fāto profugus, Lāvīniaque vēnit").scansion) # doctest: +NORMALIZE_WHITESPACE - - - - - U U - - - U U - U >>> print(HexameterScanner().scan( ... "lītora, multum ille et terrīs iactātus et alto").scansion) # doctest: +NORMALIZE_WHITESPACE - U U - - - - - - - U U - U >>> print(HexameterScanner().scan( ... "vī superum saevae memorem Iūnōnis ob īram;").scansion) # doctest: +NORMALIZE_WHITESPACE - U U - - - U U - - - U U - U >>> # handle multiple elisions >>> print(scanner.scan("monstrum horrendum, informe, ingens, cui lumen ademptum").scansion) # doctest: +NORMALIZE_WHITESPACE - - - - - - - - - U U - U >>> # if we have 17 syllables, create a chain of all dactyls >>> print(scanner.scan("quadrupedante putrem sonitu quatit ungula campum" ... ).scansion) # doctest: +NORMALIZE_WHITESPACE - U U - U U - U U - U U - U U - U >>> # if we have 13 syllables exactly, we'll create a spondaic hexameter >>> print(HexameterScanner().scan( ... "illi inter sese multa vi bracchia tollunt").scansion) # doctest: +NORMALIZE_WHITESPACE - - - - - - - - - UU - - >>> print(HexameterScanner().scan( ... "dat latus; insequitur cumulo praeruptus aquae mons").scansion) # doctest: +NORMALIZE_WHITESPACE - U U - U U - U U - - - U U - - >>> print(optional_transform_scanner.scan( ... "Non quivis videt inmodulata poëmata iudex").scansion) # doctest: +NORMALIZE_WHITESPACE - - - U U - U U - U U- U U - - >>> print(HexameterScanner().scan( ... "certabant urbem Romam Remoramne vocarent").scansion) # doctest: +NORMALIZE_WHITESPACE - - - - - - - U U - U U - - >>> # advanced smoothing is available via keyword flags: dactyl_smoothing >>> # print(HexameterScanner().scan( #... "his verbis: 'o gnata, tibi sunt ante ferendae", #... dactyl_smoothing=True).scansion) # doctest: +NORMALIZE_WHITESPACE #- - - - - U U - - - U U - - """ verse = Verse(original_line, meter='hexameter') # replace punctuation with spaces line = original_line.translate(self.punctuation_substitutions) # conservative i to j line = self.transform_i_to_j(line) working_line = self.elide_all(line) working_line = self.accent_by_position(working_line) syllables = self.syllabifier.syllabify(working_line) if optional_transform: working_line = self.transform_i_to_j_optional(line) working_line = self.elide_all(working_line) working_line = self.accent_by_position(working_line) syllables = self.syllabifier.syllabify(working_line) verse.scansion_notes += [self.constants.NOTE_MAP["optional i to j"]] verse.working_line = working_line verse.syllable_count = self.syllabifier.get_syllable_count(syllables) verse.syllables = syllables if verse.syllable_count < 12: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["< 12"]] return verse stresses = self.flag_dipthongs(syllables) syllables_wspaces = StringUtils.to_syllables_with_trailing_spaces(working_line, syllables) offset_map = self.calc_offset(syllables_wspaces) for idx, syl in enumerate(syllables): for accented in self.constants.ACCENTED_VOWELS: if accented in syl: stresses.append(idx) # first syllable is always long in hexameter stresses.append(0) # second to last syllable is always long stresses.append(verse.syllable_count - 2) verse.scansion = self.produce_scansion(stresses, syllables_wspaces, offset_map) if len(StringUtils.stress_positions(self.constants.STRESSED, verse.scansion)) != \ len(set(stresses)): verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["invalid syllables"]] return verse if self.metrical_validator.is_valid_hexameter(verse.scansion): verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]] return self.assign_candidate(verse, verse.scansion) # identify some obvious and probably choices based on number of syllables if verse.syllable_count == 17: # produce all dactyls candidate = self.produce_scansion( self.metrical_validator.hexameter_known_stresses(), syllables_wspaces, offset_map) verse.scansion_notes += [self.constants.NOTE_MAP["17"]] if self.metrical_validator.is_valid_hexameter(candidate): return self.assign_candidate(verse, candidate) if verse.syllable_count == 12: # create all spondee hexameter candidate = self.produce_scansion(list(range(12)), syllables_wspaces, offset_map) if self.metrical_validator.is_valid_hexameter(verse.scansion): verse.scansion_notes += [self.constants.NOTE_MAP["12"]] return self.assign_candidate(verse, candidate) if verse.syllable_count == 13: # create spondee hexameter with a dactyl at 5th foot known_unaccents = [9, 10] last_syllable_accented = False for vowel in self.constants.ACCENTED_VOWELS: if vowel in verse.syllables[12]: last_syllable_accented = True if not last_syllable_accented: known_unaccents.append(12) if set(known_unaccents) - set(stresses) != len(known_unaccents): verse.scansion = self.produce_scansion([x for x in range(13) if x not in known_unaccents], syllables_wspaces, offset_map) verse.scansion_notes += [self.constants.NOTE_MAP["5th dactyl"]] if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) if verse.syllable_count > 17: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["> 17"]] return verse smoothed = self.correct_inverted_amphibrachs(verse.scansion) if distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]] verse.scansion = smoothed stresses += StringUtils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_first_two_dactyls(verse.scansion) if distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]] verse.scansion = smoothed stresses += StringUtils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_invalid_fifth_foot(verse.scansion) if distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["invalid 5th"]] verse.scansion = smoothed stresses += StringUtils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) feet = self.metrical_validator.hexameter_feet(verse.scansion.replace(" ", "")) if feet: # Normal good citizens are unwelcome in the house of hexameter invalid_feet_in_hexameter = [self.constants.IAMB, self.constants.TROCHEE] current_foot = 0 ending = feet.pop() # don't process the ending, a possible trochee, add it back after scanned_line = "" for foot in feet: if foot.replace(" ", "") in invalid_feet_in_hexameter: scanned_line = self.invalid_foot_to_spondee(feet, foot, current_foot) scanned_line = scanned_line + ending current_foot += 1 smoothed = self.produce_scansion(stresses + StringUtils.stress_positions( self.constants.STRESSED, scanned_line), syllables_wspaces, offset_map) if self.metrical_validator.is_valid_hexameter(smoothed): verse.scansion_notes += [self.constants.NOTE_MAP["invalid foot"]] return self.assign_candidate(verse, smoothed) # need to do this again, since the scansion has changed smoothed = self.correct_inverted_amphibrachs(verse.scansion) if distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]] verse.scansion = smoothed stresses += StringUtils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) candidates = self.metrical_validator.closest_hexameter_patterns(verse.scansion) if candidates is not None: if len(candidates) == 1 \ and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \ and len(StringUtils.differences(verse.scansion, candidates[0])) == 1: tmp_scansion = self.produce_scansion( StringUtils.differences(verse.scansion, candidates[0]), syllables_wspaces, offset_map) if self.metrical_validator.is_valid_hexameter(tmp_scansion): verse.scansion_notes += [self.constants.NOTE_MAP["closest match"]] return self.assign_candidate(verse, tmp_scansion) # need to do this again, since the scansion has changed smoothed = self.correct_inverted_amphibrachs(smoothed) if self.metrical_validator.is_valid_hexameter(smoothed): verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]] return self.assign_candidate(verse, smoothed) if dactyl_smoothing: smoothed = self.correct_dactyl_chain(smoothed) if distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["dactyl smoothing"]] verse.scansion = smoothed if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) # if the line doesn't scan "as is", if may scan if the optional i to j transformations # are made, so here we set them and try again. if self.optional_transform and not verse.valid: return self.scan(original_line, optional_transform=True, dactyl_smoothing=True) return verse
def correct_dactyl_chain(self, scansion: str) -> str: """Three or more unstressed accents in a row is a broken dactyl chain, best detected and processed backwards. Since this method takes a Procrustean approach to modifying the scansion pattern, it is not used by default in the scan method; however, it is available as an optional keyword parameter, and users looking to further automate the generation of scansion candidates should consider using this as a fall back. :param scansion: scansion with broken dactyl chain; inverted amphibrachs not allowed :return: corrected line of scansion >>> print(HexameterScanner().correct_dactyl_chain( ... "- U U - - U U - - - U U - x")) - - - - - U U - - - U U - x >>> print(HexameterScanner().correct_dactyl_chain( ... "- U U U U - - - - - U U - U")) # doctest: +NORMALIZE_WHITESPACE - - - U U - - - - - U U - U """ mark_list = StringUtils.mark_list(scansion) vals = list(scansion.replace(" ", "")) # ignore last two positions, save them feet = [vals.pop(), vals.pop()] length = len(vals) idx = length - 1 while idx > 0: one = vals[idx] two = vals[idx - 1] if idx > 1: three = vals[idx - 2] else: three = "" # Dactyl foot is okay, no corrections if one == self.constants.UNSTRESSED and \ two == self.constants.UNSTRESSED and \ three == self.constants.STRESSED: feet += [one] feet += [two] feet += [three] idx -= 3 continue # Spondee foot is okay, no corrections if one == self.constants.STRESSED and \ two == self.constants.STRESSED: feet += [one] feet += [two] idx -= 2 continue # handle "U U U" foot as "- U U" if one == self.constants.UNSTRESSED and \ two == self.constants.UNSTRESSED and \ three == self.constants.UNSTRESSED: feet += [one] feet += [two] feet += [self.constants.STRESSED] idx -= 3 continue # handle "U U -" foot as "- -" if one == self.constants.STRESSED and \ two == self.constants.UNSTRESSED and \ three == self.constants.UNSTRESSED: feet += [self.constants.STRESSED] feet += [self.constants.STRESSED] idx -= 2 continue # handle "- U" foot as "- -" if one == self.constants.UNSTRESSED and \ two == self.constants.STRESSED: feet += [self.constants.STRESSED] feet += [two] idx -= 2 continue corrected = "".join(feet[::-1]) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line)
def syllabify(self, words: str) -> list: """Parse a Latin word into a list of syllable strings. :param words: a string containing one latin word or many words separated by spaces. :return: list of string, each representing a syllable. >>> syllabifier = Syllabifier() >>> print(syllabifier.syllabify("fuit")) ['fu', 'it'] >>> print(syllabifier.syllabify("libri")) ['li', 'bri'] >>> print(syllabifier.syllabify("contra")) ['con', 'tra'] >>> print(syllabifier.syllabify("iaculum")) ['ja', 'cu', 'lum'] >>> print(syllabifier.syllabify("amo")) ['a', 'mo'] >>> print(syllabifier.syllabify("bracchia")) ['brac', 'chi', 'a'] >>> print(syllabifier.syllabify("deinde")) ['dein', 'de'] >>> print(syllabifier.syllabify("certabant")) ['cer', 'ta', 'bant'] >>> print(syllabifier.syllabify("aere")) ['ae', 're'] >>> print(syllabifier.syllabify("adiungere")) ['ad', 'jun', 'ge', 're'] >>> print(syllabifier.syllabify("mōns")) ['mōns'] >>> print(syllabifier.syllabify("domus")) ['do', 'mus'] >>> print(syllabifier.syllabify("lixa")) ['li', 'xa'] >>> print(syllabifier.syllabify("asper")) ['as', 'per'] >>> # handle doubles >>> print(syllabifier.syllabify("siccus")) ['sic', 'cus'] >>> # handle liquid + liquid >>> print(syllabifier.syllabify("almus")) ['al', 'mus'] >>> # handle liquid + mute >>> print(syllabifier.syllabify("ambo")) ['am', 'bo'] >>> print(syllabifier.syllabify("anguis")) ['an', 'guis'] >>> print(syllabifier.syllabify("arbor")) ['ar', 'bor'] >>> print(syllabifier.syllabify("pulcher")) ['pul', 'cher'] >>> print(syllabifier.syllabify("ruptus")) ['ru', 'ptus'] >>> print(syllabifier.syllabify("Bīthÿnus")) ['Bī', 'thÿ', 'nus'] >>> print(syllabifier.syllabify("sanguen")) ['san', 'guen'] >>> print(syllabifier.syllabify("unguentum")) ['un', 'guen', 'tum'] >>> print(syllabifier.syllabify("lingua")) ['lin', 'gua'] >>> print(syllabifier.syllabify("linguā")) ['lin', 'guā'] >>> print(syllabifier.syllabify("languidus")) ['lan', 'gui', 'dus'] >>> print(syllabifier.syllabify("suis")) ['su', 'is'] >>> print(syllabifier.syllabify("habui")) ['ha', 'bu', 'i'] >>> print(syllabifier.syllabify("habuit")) ['ha', 'bu', 'it'] >>> print(syllabifier.syllabify("qui")) ['qui'] >>> print(syllabifier.syllabify("quibus")) ['qui', 'bus'] >>> print(syllabifier.syllabify("hui")) ['hui'] >>> print(syllabifier.syllabify("cui")) ['cui'] >>> print(syllabifier.syllabify("huic")) ['huic'] """ cleaned = words.translate(self.remove_punct_map) cleaned = cleaned.replace("qu", "kw") cleaned = cleaned.replace("Qu", "Kw") cleaned = cleaned.replace("gua", "gwa") cleaned = cleaned.replace("Gua", "Gwa") cleaned = cleaned.replace("gue", "gwe") cleaned = cleaned.replace("Gue", "Gwe") cleaned = cleaned.replace("gui", "gwi") cleaned = cleaned.replace("Gui", "Gwi") cleaned = cleaned.replace("guo", "gwo") cleaned = cleaned.replace("Guo", "Gwo") cleaned = cleaned.replace("guu", "gwu") cleaned = cleaned.replace("Guu", "Gwu") cleaned = cleaned.replace("guā", "gwā") cleaned = cleaned.replace("Guā", "Gwā") cleaned = cleaned.replace("guē", "gwē") cleaned = cleaned.replace("Guē", "Gwē") cleaned = cleaned.replace("guī", "gwī") cleaned = cleaned.replace("Guī", "Gwī") cleaned = cleaned.replace("guō", "gwō") cleaned = cleaned.replace("Guō", "Gwō") cleaned = cleaned.replace("guū", "gwū") cleaned = cleaned.replace("Guū", "Gwū") items = cleaned.strip().split(" ") for char in cleaned: if not char in self.ACCEPTABLE_CHARS: LOG.error("Unsupported character found in %s " % cleaned) return items syllables: list = [] for item in items: syllables += self._setup(item) for idx, syl in enumerate(syllables): if "kw" in syl: syl = syl.replace("kw", "qu") syllables[idx] = syl if "Kw" in syl: syl = syl.replace("Kw", "Qu") syllables[idx] = syl if "gw" in syl: syl = syl.replace("gw", "gu") syllables[idx] = syl if "Gw" in syl: syl = syl.replace("Gw", "Gu") syllables[idx] = syl return StringUtils.remove_blank_spaces(syllables)
def scan(self, original_line: str, optional_transform: bool = False, dactyl_smoothing: bool = False) -> Hexameter: """Scan a line of Latin hexameter and produce a scansion pattern, and other data. >>> scanner = HexameterScanner() >>> print(scanner.scan("impulerit. Tantaene animis caelestibus irae?")) Hexameter( original='impulerit. Tantaene animis caelestibus irae?', scansion='- U U - - - U U - - - U U - - ', valid=True, syllable_count=15, accented='īmpulerīt. Tāntaene animīs caelēstibus īrae?', scansion_notes=['Valid by positional stresses.'], syllables = ['īm, pu, le, rīt, Tān, taen, a, ni, mīs, cae, lēs, ti, bus, i, rae']) >>> # Note: possible doctest quirk with leading whitespace; so we strip responses: >>> print(scanner.scan( ... "Arma virumque cano, Troiae qui prīmus ab ōrīs").scansion.strip()) - U U - U U - - - - - U U - - >>> print(scanner.scan( ... "Ītaliam, fāto profugus, Lāvīniaque vēnit").scansion.strip()) - - - - - U U - - - U U - U >>> print(HexameterScanner().scan( ... "lītora, multum ille et terrīs iactātus et alto").scansion.strip()) - U U - - - - - - - U U - U >>> print(HexameterScanner().scan( ... "vī superum saevae memorem Iūnōnis ob īram;").scansion.strip()) - U U - - - U U - - - U U - U >>> # handle multiple elisions >>> print(scanner.scan( ... "monstrum horrendum, informe, ingens, cui lumen ademptum" ... ).scansion.strip()) - - - - - - - - - U U - U >>> # if we have 17 syllables, create a chain of all dactyls >>> print(scanner.scan("quadrupedante putrem sonitu quatit ungula campum" ... ).scansion.strip()) - U U - U U - U U - U U - U U - U >>> print(HexameterScanner().scan( ... "illi inter sese multa vi bracchia tollunt").scansion.strip()) - - - - - - - - - UU - - >>> print( HexameterScanner().scan( ... "dat latus; insequitur cumulo praeruptus aquae mons").scansion.strip()) - U U - U U - U U - - - U U - - >>> print(HexameterScanner().scan( ... "Non quivis videt inmodulata poëmata iudex").scansion.strip()) - - - U U - U U - U U- U U - - >>> print( HexameterScanner().scan( ... "certabant urbem Romam Remoramne vocarent").scansion.strip()) - - - - - - - U U - U U - - >>> # advanced smoothing is available via keyword flags >>> print(HexameterScanner().scan( ... "his verbis: 'o gnata, tibi sunt ante ferendae", ... dactyl_smoothing=True).scansion.strip() ) - - - - - U U - - - U U - - """ hexameter = Hexameter(original_line) # replace punctuation with spaces line = original_line.translate(self.punctuation_substitutions) # conservative i to j line = self.transform_i_to_j(line) working_line = self.elide_all(line) working_line = self.accent_by_position(working_line) syllables = self.syllabifier.syllabify(working_line) if optional_transform: working_line = self.transform_i_to_j_optional(line) working_line = self.elide_all(working_line) working_line = self.accent_by_position(working_line) syllables = self.syllabifier.syllabify(working_line) hexameter.scansion_notes += [ self.constants.NOTE_MAP["optional i to j"] ] hexameter.working_line = working_line hexameter.syllable_count = len(syllables) hexameter.syllables = syllables stresses = self.flag_dipthongs(syllables) syllables_wspaces = StringUtils.to_syllables_with_trailing_spaces( working_line, syllables) offset_map = self.calc_offset(syllables_wspaces) for idx, syl in enumerate(syllables): for accented in self.constants.ACCENTED_VOWELS: if accented in syl: stresses.append(idx) # first syllable is always long stresses.append(0) # second to last syllable is always long stresses.append(hexameter.syllable_count - 2) def validate(scansion: str) -> bool: """Helper closure for validation.""" if self.metrical_validator.is_valid_hexameter(scansion): hexameter.scansion = scansion hexameter.valid = True hexameter.accented = self.formatter.merge_line_scansion( hexameter.original, hexameter.scansion) return True return False hexameter.scansion = self.produce_scansion(stresses, syllables_wspaces, offset_map) if len(StringUtils.stress_positions(self.constants.STRESSED, hexameter.scansion)) != \ len(set(stresses)): hexameter.valid = False hexameter.scansion_notes += [ self.constants.NOTE_MAP["invalid syllables"] ] return hexameter if validate(hexameter.scansion): hexameter.scansion_notes += [ self.constants.NOTE_MAP["positionally"] ] return hexameter smoothed = self.correct_inverted_amphibrachs(hexameter.scansion) if distance(hexameter.scansion, smoothed) > 0: hexameter.scansion_notes += [self.constants.NOTE_MAP["inverted"]] hexameter.scansion = smoothed stresses += StringUtils.differences(hexameter.scansion, smoothed) if validate(hexameter.scansion): return hexameter smoothed = self.correct_invalid_start(hexameter.scansion) if distance(hexameter.scansion, smoothed) > 0: hexameter.scansion_notes += [ self.constants.NOTE_MAP["invalid start"] ] hexameter.scansion = smoothed stresses += StringUtils.differences(hexameter.scansion, smoothed) if validate(hexameter.scansion): return hexameter smoothed = self.correct_invalid_fifth_foot(hexameter.scansion) if distance(hexameter.scansion, smoothed) > 0: hexameter.scansion_notes += [ self.constants.NOTE_MAP["invalid 5th"] ] hexameter.scansion = smoothed stresses += StringUtils.differences(hexameter.scansion, smoothed) if validate(hexameter.scansion): return hexameter feet = self.metrical_validator.hexameter_feet( hexameter.scansion.replace(" ", "")) if feet: # Normal good citizens are unwelcome in the house of hexameter invalid_feet_in_hexameter = [ self.constants.IAMB, self.constants.TROCHEE ] current_foot = 0 ending = feet.pop( ) # don't process the ending, a possible trochee, add it back after scanned_line = "" for foot in feet: if foot.replace(" ", "") in invalid_feet_in_hexameter: scanned_line = self.invalid_foot_to_spondee( feet, foot, current_foot) scanned_line = scanned_line + ending current_foot += 1 smoothed = self.produce_scansion( stresses + StringUtils.stress_positions( self.constants.STRESSED, scanned_line), syllables_wspaces, offset_map) if validate(smoothed): hexameter.scansion_notes += [ self.constants.NOTE_MAP["invalid foot"] ] return hexameter # need to do this again, since the scansion has changed smoothed = self.correct_inverted_amphibrachs(hexameter.scansion) if distance(hexameter.scansion, smoothed) > 0: hexameter.scansion_notes += [self.constants.NOTE_MAP["inverted"]] hexameter.scansion = smoothed stresses += StringUtils.differences(hexameter.scansion, smoothed) if validate(hexameter.scansion): return hexameter candidates = self.metrical_validator.closest_hexameter_patterns( hexameter.scansion) if candidates is not None: if len(candidates) == 1 \ and len(hexameter.scansion.replace(" ", "")) == len(candidates[0]) \ and len(StringUtils.differences(hexameter.scansion, candidates[0])) == 1: tmp_scansion = self.produce_scansion( StringUtils.differences(hexameter.scansion, candidates[0]), syllables_wspaces, offset_map) if validate(tmp_scansion): hexameter.scansion = tmp_scansion hexameter.scansion_notes += [ self.constants.NOTE_MAP["closest match"] ] return hexameter # identify some obvious and probably choices based on number of syllables if hexameter.syllable_count == 17: # produce all dactyls candidate = self.produce_scansion( self.metrical_validator.hexameter_known_stresses(), syllables_wspaces, offset_map) hexameter.scansion_notes += [self.constants.NOTE_MAP["17"]] if validate(candidate): return hexameter if hexameter.syllable_count == 12: # create all spondee hexameter if validate( self.produce_scansion(list(range(12)), syllables_wspaces, offset_map)): hexameter.scansion_notes += [self.constants.NOTE_MAP["12"]] return hexameter if hexameter.syllable_count < 12: hexameter.valid = False hexameter.scansion_notes += [self.constants.NOTE_MAP["< 12"]] return hexameter if hexameter.syllable_count == 13: # create spondee hexameter with a dactyl at 5th foot known_unaccents = [9, 10, 12] if set(known_unaccents) - set(stresses) != len(known_unaccents): hexameter.scansion = self.produce_scansion( [x for x in range(13) if x not in known_unaccents], syllables_wspaces, offset_map) hexameter.scansion_notes += [ self.constants.NOTE_MAP["5th dactyl"] ] if validate(hexameter.scansion): return hexameter if hexameter.syllable_count > 17: hexameter.valid = False hexameter.scansion_notes += [self.constants.NOTE_MAP["> 17"]] return hexameter # need to do this again, since the scansion has changed smoothed = self.correct_inverted_amphibrachs(smoothed) if validate(smoothed): hexameter.scansion = smoothed hexameter.scansion_notes += [self.constants.NOTE_MAP["inverted"]] return hexameter if dactyl_smoothing: smoothed = self.correct_dactyl_chain(smoothed) if distance(hexameter.scansion, smoothed) > 0: hexameter.scansion_notes += [ self.constants.NOTE_MAP["dactyl smoothing"] ] hexameter.scansion = smoothed if validate(hexameter.scansion): return hexameter # if the line doesn't scan "as is", if may scan if the optional i to j transformations # are made, so here we set them and try again. if not optional_transform and not hexameter.valid: return self.scan(original_line, optional_transform=True, dactyl_smoothing=True) return hexameter
def scan(self, original_line: str, optional_transform: bool = False) -> Verse: """Scan a line of Latin hendecasyllables and produce a scansion pattern, and other data. :return: a Verse object >>> scanner = HendecasyllableScanner() >>> print(scanner.scan("Cui dono lepidum novum libellum")) Verse(original='Cui dono lepidum novum libellum', scansion=' - U - U U - U - U - U ', meter='hendecasyllable', valid=True, syllable_count=11, accented='Cui donō lepidūm novūm libēllum', scansion_notes=['Corrected invalid start.'], syllables = ['Cui', 'do', 'no', 'le', 'pi', 'dūm', 'no', 'vūm', 'li', 'bēl', 'lum']) >>> print(scanner.scan( ... "ārida modo pumice expolitum?").scansion) # doctest: +NORMALIZE_WHITESPACE - U - U U - U - U - U """ verse = Verse(original_line, meter='hendecasyllable') # replace punctuation with spaces line = original_line.translate(self.punctuation_substitutions) # conservative i to j line = self.transform_i_to_j(line) working_line = self.elide_all(line) working_line = self.accent_by_position(working_line) syllables = self.syllabifier.syllabify(working_line) if optional_transform: working_line = self.transform_i_to_j_optional(line) working_line = self.elide_all(working_line) working_line = self.accent_by_position(working_line) syllables = self.syllabifier.syllabify(working_line) verse.scansion_notes += [ self.constants.NOTE_MAP["optional i to j"] ] verse.working_line = working_line verse.syllable_count = self.syllabifier.get_syllable_count(syllables) verse.syllables = syllables # identify some obvious and probably choices based on number of syllables if verse.syllable_count > 11: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["> 11"]] return verse if verse.syllable_count < 11: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["< 11"]] return verse stresses = self.flag_dipthongs(syllables) syllables_wspaces = StringUtils.to_syllables_with_trailing_spaces( working_line, syllables) offset_map = self.calc_offset(syllables_wspaces) for idx, syl in enumerate(syllables): for accented in self.constants.ACCENTED_VOWELS: if accented in syl: stresses.append(idx) # second to last syllable is always long stresses.append(verse.syllable_count - 2) verse.scansion = self.produce_scansion(stresses, syllables_wspaces, offset_map) if len(StringUtils.stress_positions(self.constants.STRESSED, verse.scansion)) != \ len(set(stresses)): verse.valid = False verse.scansion_notes += [ self.constants.NOTE_MAP["invalid syllables"] ] return verse if self.metrical_validator.is_valid_hendecasyllables(verse.scansion): verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]] return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_invalid_start(verse.scansion) if distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]] verse.scansion = smoothed stresses += StringUtils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hendecasyllables(verse.scansion): return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_antepenult_chain(verse.scansion) if distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [ self.constants.NOTE_MAP["antepenult chain"] ] verse.scansion = smoothed stresses += StringUtils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hendecasyllables(verse.scansion): return self.assign_candidate(verse, verse.scansion) candidates = self.metrical_validator.closest_hendecasyllable_patterns( verse.scansion) if candidates is not None: if len(candidates) == 1 \ and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \ and len(StringUtils.differences(verse.scansion, candidates[0])) == 1: tmp_scansion = self.produce_scansion( StringUtils.differences(verse.scansion, candidates[0]), syllables_wspaces, offset_map) if self.metrical_validator.is_valid_hendecasyllables( tmp_scansion): verse.scansion_notes += [ self.constants.NOTE_MAP["closest match"] ] return self.assign_candidate(verse, tmp_scansion) # if the line doesn't scan "as is", if may scan if the optional i to j transformations # are made, so here we set them and try again. if self.optional_transform and not verse.valid: return self.scan(original_line, optional_transform=True) verse.accented = self.formatter.merge_line_scansion( verse.original, verse.scansion) return verse
def scan(self, original_line: str, optional_transform: bool = False) -> Verse: """Scan a line of Latin hendecasyllables and produce a scansion pattern, and other data. :return: a Verse object >>> scanner = HendecasyllableScanner() >>> print(scanner.scan("Cui dono lepidum novum libellum")) Verse(original='Cui dono lepidum novum libellum', scansion=' - U - U U - U - U - U ', meter='hendecasyllable', valid=True, syllable_count=11, accented='Cui donō lepidūm novūm libēllum', scansion_notes=['Corrected invalid start.'], syllables = ['Cui', 'do', 'no', 'le', 'pi', 'dūm', 'no', 'vūm', 'li', 'bēl', 'lum']) >>> print(scanner.scan( ... "ārida modo pumice expolitum?").scansion) # doctest: +NORMALIZE_WHITESPACE - U - U U - U - U - U """ verse = Verse(original_line, meter='hendecasyllable') # replace punctuation with spaces line = original_line.translate(self.punctuation_substitutions) # conservative i to j line = self.transform_i_to_j(line) working_line = self.elide_all(line) working_line = self.accent_by_position(working_line) syllables = self.syllabifier.syllabify(working_line) if optional_transform: working_line = self.transform_i_to_j_optional(line) working_line = self.elide_all(working_line) working_line = self.accent_by_position(working_line) syllables = self.syllabifier.syllabify(working_line) verse.scansion_notes += [self.constants.NOTE_MAP["optional i to j"]] verse.working_line = working_line verse.syllable_count = self.syllabifier.get_syllable_count(syllables) verse.syllables = syllables # identify some obvious and probably choices based on number of syllables if verse.syllable_count > 11: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["> 11"]] return verse if verse.syllable_count < 11: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["< 11"]] return verse stresses = self.flag_dipthongs(syllables) syllables_wspaces = StringUtils.to_syllables_with_trailing_spaces(working_line, syllables) offset_map = self.calc_offset(syllables_wspaces) for idx, syl in enumerate(syllables): for accented in self.constants.ACCENTED_VOWELS: if accented in syl: stresses.append(idx) # second to last syllable is always long stresses.append(verse.syllable_count - 2) verse.scansion = self.produce_scansion(stresses, syllables_wspaces, offset_map) if len(StringUtils.stress_positions(self.constants.STRESSED, verse.scansion)) != \ len(set(stresses)): verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["invalid syllables"]] return verse if self.metrical_validator.is_valid_hendecasyllables(verse.scansion): verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]] return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_invalid_start(verse.scansion) if distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]] verse.scansion = smoothed stresses += StringUtils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hendecasyllables(verse.scansion): return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_antepenult_chain(verse.scansion) if distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["antepenult chain"]] verse.scansion = smoothed stresses += StringUtils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hendecasyllables(verse.scansion): return self.assign_candidate(verse, verse.scansion) candidates = self.metrical_validator.closest_hendecasyllable_patterns(verse.scansion) if candidates is not None: if len(candidates) == 1 \ and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \ and len(StringUtils.differences(verse.scansion, candidates[0])) == 1: tmp_scansion = self.produce_scansion( StringUtils.differences(verse.scansion, candidates[0]), syllables_wspaces, offset_map) if self.metrical_validator.is_valid_hendecasyllables(tmp_scansion): verse.scansion_notes += [self.constants.NOTE_MAP["closest match"]] return self.assign_candidate(verse, tmp_scansion) # if the line doesn't scan "as is", if may scan if the optional i to j transformations # are made, so here we set them and try again. if self.optional_transform and not verse.valid: return self.scan(original_line, optional_transform=True) verse.accented = self.formatter.merge_line_scansion( verse.original, verse.scansion) return verse
def accent_by_position(self, verse_line: str) -> str: """Accent vowels according to the rules of scansion. :param verse: a line of unaccented verse :return: the same line with vowels accented by position >>> print(VerseScanner().accent_by_position( ... "Arma virumque cano, Troiae qui primus ab oris").lstrip()) Ārma virūmque canō Trojae qui primus ab oris """ line = verse_line.translate(self.punctuation_substitutions) line = self.transform_i_to_j(line) marks = list(line) # locate and save dipthong positions since we don't want them being accented dipthong_positions = [] for dipth in self.constants.DIPTHONGS: if dipth in line: dipthong_positions.append(line.find(dipth)) # Vowels followed by 2 consonants # The digraphs ch, ph, th, qu and sometimes gu and su count as single consonants. # see http://people.virginia.edu/~jdk3t/epicintrog/scansion.htm marks = StringUtils.overwrite(marks, "[{}][{}][{}]".format( self.constants.VOWELS, self.constants.CONSONANTS, self.constants.CONSONANTS_WO_H), self.constants.STRESSED) # one space (or more for 'dropped' punctuation may intervene) marks = StringUtils.overwrite(marks, r"[{}][{}]\s*[{}]".format( self.constants.VOWELS, self.constants.CONSONANTS, self.constants.CONSONANTS_WO_H), self.constants.STRESSED) # ... if both consonants are in the next word, the vowel may be long # .... but it could be short if the vowel is not on the thesis/emphatic part of the foot # ... see Gildersleeve and Lodge p.446 marks = StringUtils.overwrite(marks, r"[{}]\s*[{}][{}]".format( self.constants.VOWELS, self.constants.CONSONANTS, self.constants.CONSONANTS_WO_H), self.constants.STRESSED) # x is considered as two letters marks = StringUtils.overwrite(marks, "[{}][xX]".format(self.constants.VOWELS), self.constants.STRESSED) # z is considered as two letters marks = StringUtils.overwrite(marks, r"[{}][zZ]".format(self.constants.VOWELS), self.constants.STRESSED) original_verse = list(line) for idx, word in enumerate(original_verse): if marks[idx] == self.constants.STRESSED: original_verse[idx] = self.constants.VOWELS_TO_ACCENTS[original_verse[idx]] # make sure dipthongs aren't accented for idx in dipthong_positions: if original_verse[idx + 1] in self.constants.ACCENTS_TO_VOWELS: original_verse[idx + 1] = self.constants.ACCENTS_TO_VOWELS[original_verse[idx + 1]] return "".join(original_verse)