def __init__(self, constants=None, syllabifier=None, optional_transform: bool = False, *args, **kwargs)->None: """ :param constants: None or a class that implements ScansionConstants :param syllabifier: None or a class that implements Syllabifier methods :param optional_tranform: boolean, whether or not to apply aggresive verse transformations. :param kwargs: """ super().__init__(*args, **kwargs) self.constants = ScansionConstants() if constants is None else constants self.syllabifier = Syllabifier() if syllabifier is None else syllabifier self.remove_punct_map = string_utils.remove_punctuation_dict() self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict() self.metrical_validator = MetricalValidator(self.constants) self.formatter = ScansionFormatter(self.constants) self.optional_transform = optional_transform self.inverted_amphibrach_re = re.compile( r"{}\s*{}\s*{}".format(self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)) self.syllable_matcher = re.compile(r"[{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS + self.constants.LIQUIDS + self.constants.MUTES)) self.SPONDAIC_PENTAMETER = self.constants.SPONDEE + self.constants.SPONDEE + \ self.constants.STRESSED + self.constants.DACTYL + \ self.constants.DACTYL + self.constants.OPTIONAL_ENDING self.DACTYLIC_PENTAMETER = self.constants.DACTYL + self.constants.DACTYL + \ self.constants.STRESSED + self.constants.DACTYL + \ self.constants.DACTYL + self.constants.OPTIONAL_ENDING
def __init__(self, punctuation=None, clausula_length=13, elide=True): if punctuation is None: self.punctuation = [".", "?", "!", ";", ":"] else: self.punctuation = punctuation self.clausula_length = clausula_length self.elide = elide self.syllabifier = Syllabifier()
def test_syllabifier(self): syllabifier = Syllabifier() # break a word into syllables self.assertEqual(syllabifier.syllabify("Bīthÿnus"), ['Bī', 'thÿ', 'nus']) # break a group of words into a group of syllables: self.assertEqual(syllabifier.syllabify("arbor pulcher ruptus"), [ 'ar', 'bor', 'pul', 'cher', 'ru', 'ptus']) # do not process character sets that have not been specified by the ScansionConstants class # that is injected into the constructor; a whole group is rejected when this occurs self.assertEqual(syllabifier.syllabify("Platonis Ψυχη"),['Platonis', 'Ψυχη'])
def test_syllabifier(self): syllabifier = Syllabifier() # break a word into syllables self.assertEqual(syllabifier.syllabify("Bīthÿnus"), ['Bī', 'thÿ', 'nus']) # break a group of words into a group of syllables: self.assertEqual(syllabifier.syllabify("arbor pulcher ruptus"), ['ar', 'bor', 'pul', 'cher', 'ru', 'ptus']) # do not process character sets that have not been specified by the ScansionConstants class # that is injected into the constructor; a whole group is rejected when this occurs self.assertEqual(syllabifier.syllabify("Platonis Ψυχη"), ['Platonis', 'Ψυχη'])
def __init__(self, constants=ScansionConstants(), syllabifier=Syllabifier(), optional_transform: bool = False, *args, **kwargs): super().__init__(*args, **kwargs) self.constants = constants self.remove_punct_map = StringUtils.remove_punctuation_dict() self.punctuation_substitutions = StringUtils.punctuation_for_spaces_dict( ) self.metrical_validator = MetricalValidator(constants) self.formatter = ScansionFormatter(constants) self.syllabifier = syllabifier self.optional_transform = optional_transform self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format( self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)) self.syllable_matcher = re.compile( r"[{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS + self.constants.LIQUIDS + self.constants.MUTES)) self.SPONDAIC_PENTAMETER = self.constants.SPONDEE + self.constants.SPONDEE + \ self.constants.STRESSED + self.constants.DACTYL + \ self.constants.DACTYL + self.constants.OPTIONAL_ENDING self.DACTYLIC_PENTAMETER = self.constants.DACTYL + self.constants.DACTYL + \ self.constants.STRESSED + self.constants.DACTYL + \ self.constants.DACTYL + self.constants.OPTIONAL_ENDING
def __init__(self, constants=None, syllabifier=None, **kwargs): """ :param constants: None or a class that implements ScansionConstants :param syllabifier: None or a class that implements Syllabifier methods :param kwargs: """ self.constants = ScansionConstants( ) if constants is None else constants self.syllabifier = Syllabifier( ) if syllabifier is None else syllabifier self.remove_punct_map = string_utils.remove_punctuation_dict() self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict( ) self.metrical_validator = MetricalValidator(self.constants) self.formatter = ScansionFormatter(self.constants) self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format( self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)) self.syllable_matcher = re.compile( r"[{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS + self.constants.LIQUIDS + self.constants.MUTES))
def __init__(self, constants=ScansionConstants(), syllabifier=Syllabifier(), **kwargs): self.constants = constants self.remove_punct_map = string_utils.remove_punctuation_dict() self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict( ) self.metrical_validator = MetricalValidator(constants) self.formatter = ScansionFormatter(constants) self.syllabifier = syllabifier self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format( self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)) self.syllable_matcher = re.compile( r"[{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS + self.constants.LIQUIDS + self.constants.MUTES))
class VerseScanner: """ The scansion symbols used can be configured by passing a suitable constants class to the constructor. """ def __init__(self, constants=None, syllabifier=None, **kwargs): """ :param constants: None or a class that implements ScansionConstants :param syllabifier: None or a class that implements Syllabifier methods :param kwargs: """ self.constants = ScansionConstants( ) if constants is None else constants self.syllabifier = Syllabifier( ) if syllabifier is None else syllabifier self.remove_punct_map = string_utils.remove_punctuation_dict() self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict( ) self.metrical_validator = MetricalValidator(self.constants) self.formatter = ScansionFormatter(self.constants) self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format( self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)) self.syllable_matcher = re.compile( r"[{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS + self.constants.LIQUIDS + self.constants.MUTES)) def transform_i_to_j(self, line: str) -> str: """ Transform instances of consonantal i to j :param line: :return: >>> print(VerseScanner().transform_i_to_j("iactātus")) jactātus >>> print(VerseScanner().transform_i_to_j("bracchia")) bracchia """ words = line.split(" ") space_list = string_utils.space_list(line) corrected_words = [] for word in words: found = False for prefix in self.constants.PREFIXES: if word.startswith(prefix) and word != prefix: corrected_words.append( self.syllabifier.convert_consonantal_i(prefix)) corrected_words.append( self.syllabifier.convert_consonantal_i( word[len(prefix):])) found = True break if not found: corrected_words.append( self.syllabifier.convert_consonantal_i(word)) new_line = string_utils.join_syllables_spaces(corrected_words, space_list) char_list = string_utils.overwrite( list(new_line), r"\b[iī][{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS), "j") char_list = string_utils.overwrite( char_list, r"\b[I][{}]".format(self.constants.VOWELS_WO_I), "J") char_list = string_utils.overwrite( char_list, r"[{}][i][{}]".format(self.constants.VOWELS_WO_I, self.constants.VOWELS), "j", 1) return "".join(char_list) def transform_i_to_j_optional(self, line: str) -> str: """ Sometimes for the demands of meter a more permissive i to j transformation is warranted. :param line: :return: >>> print(VerseScanner().transform_i_to_j_optional("Italiam")) Italjam >>> print(VerseScanner().transform_i_to_j_optional("Lāvīniaque")) Lāvīnjaque >>> print(VerseScanner().transform_i_to_j_optional("omnium")) omnjum """ words = line.split(" ") space_list = string_utils.space_list(line) corrected_words = [] for word in words: found = False for prefix in self.constants.PREFIXES: if word.startswith(prefix) and word != prefix: corrected_words.append( self.syllabifier.convert_consonantal_i(prefix)) corrected_words.append( self.syllabifier.convert_consonantal_i( word[len(prefix):])) found = True break if not found: corrected_words.append( self.syllabifier.convert_consonantal_i(word)) new_line = string_utils.join_syllables_spaces(corrected_words, space_list) # the following two may be tunable and subject to improvement char_list = string_utils.overwrite( list(new_line), "[bcdfgjkmpqrstvwxzBCDFGHJKMPQRSTVWXZ][i][{}]".format( self.constants.VOWELS_WO_I), "j", 1) char_list = string_utils.overwrite( char_list, "[{}][iI][{}]".format(self.constants.LIQUIDS, self.constants.VOWELS_WO_I), "j", 1) return "".join(char_list) def accent_by_position(self, verse_line: str) -> str: """ Accent vowels according to the rules of scansion. :param verse_line: a line of unaccented verse :return: the same line with vowels accented by position >>> print(VerseScanner().accent_by_position( ... "Arma virumque cano, Troiae qui primus ab oris").lstrip()) Ārma virūmque canō Trojae qui primus ab oris """ line = verse_line.translate(self.punctuation_substitutions) line = self.transform_i_to_j(line) marks = list(line) # locate and save dipthong positions since we don't want them being accented dipthong_positions = [] for dipth in self.constants.DIPTHONGS: if dipth in line: dipthong_positions.append(line.find(dipth)) # Vowels followed by 2 consonants # The digraphs ch, ph, th, qu and sometimes gu and su count as single consonants. # see http://people.virginia.edu/~jdk3t/epicintrog/scansion.htm marks = string_utils.overwrite( marks, "[{}][{}][{}]".format(self.constants.VOWELS, self.constants.CONSONANTS, self.constants.CONSONANTS_WO_H), self.constants.STRESSED) # one space (or more for 'dropped' punctuation may intervene) marks = string_utils.overwrite( marks, r"[{}][{}]\s*[{}]".format(self.constants.VOWELS, self.constants.CONSONANTS, self.constants.CONSONANTS_WO_H), self.constants.STRESSED) # ... if both consonants are in the next word, the vowel may be long # .... but it could be short if the vowel is not on the thesis/emphatic part of the foot # ... see Gildersleeve and Lodge p.446 marks = string_utils.overwrite( marks, r"[{}]\s*[{}][{}]".format(self.constants.VOWELS, self.constants.CONSONANTS, self.constants.CONSONANTS_WO_H), self.constants.STRESSED) # x is considered as two letters marks = string_utils.overwrite( marks, "[{}][xX]".format(self.constants.VOWELS), self.constants.STRESSED) # z is considered as two letters marks = string_utils.overwrite( marks, r"[{}][zZ]".format(self.constants.VOWELS), self.constants.STRESSED) original_verse = list(line) for idx, word in enumerate(original_verse): if marks[idx] == self.constants.STRESSED: original_verse[idx] = self.constants.VOWELS_TO_ACCENTS[ original_verse[idx]] # make sure dipthongs aren't accented for idx in dipthong_positions: if original_verse[idx + 1] in self.constants.ACCENTS_TO_VOWELS: original_verse[idx + 1] = self.constants.ACCENTS_TO_VOWELS[ original_verse[idx + 1]] return "".join(original_verse) def elide_all(self, line: str) -> str: """ Given a string of space separated syllables, erase with spaces the syllable portions that would disappear according to the rules of elision. :param line: :return: """ marks = list(line.translate(self.remove_punct_map)) all_vowels = self.constants.VOWELS + self.constants.ACCENTED_VOWELS tmp = "".join(marks) # Elision rules are compound but not cummulative: we place all elision edits into a list # of candidates, and then merge, taking the least of each section of the line. candidates = [ tmp, self.elide( tmp, r"[{}][{}]\s+[{}]".format(self.constants.CONSONANTS, all_vowels, all_vowels), 1, 1), self.elide( tmp, r"[{}][{}]\s+[hH]".format(self.constants.CONSONANTS, all_vowels), 1, 1), self.elide(tmp, r"[aāuū]m\s+[{}]".format(all_vowels), 2), self.elide(tmp, r"ae\s+[{}]".format(all_vowels), 2), self.elide(tmp, r"[{}]\s+[{}]".format(all_vowels, all_vowels), 1), self.elide(tmp, r"[uū]m\s+h", 2) ] results = string_utils.merge_elisions(candidates) return results def calc_offset(self, syllables_spaces: List[str]) -> Dict[int, int]: """ Calculate a dictionary of accent positions from a list of syllables with spaces. :param syllables_spaces: :return: """ line = string_utils.flatten(syllables_spaces) mydict = {} # type: Dict[int, int] # #defaultdict(int) #type: Dict[int, int] for idx, syl in enumerate(syllables_spaces): target_syllable = syllables_spaces[idx] skip_qu = string_utils.starts_with_qu(target_syllable) matches = list(self.syllable_matcher.finditer(target_syllable)) for position, possible in enumerate(matches): if skip_qu: skip_qu = False continue (start, end) = possible.span() if target_syllable[start:end] in \ self.constants.VOWELS + self.constants.ACCENTED_VOWELS: part = line[:len("".join(syllables_spaces[:idx]))] offset = len(part) + start if line[offset] not in self.constants.VOWELS + self.constants.ACCENTED_VOWELS: LOG.error("Problem at line {} offset {}".format( line, offset)) mydict[idx] = offset return mydict def produce_scansion(self, stresses: list, syllables_wspaces: List[str], offset_map: Dict[int, int]) -> str: """ Create a scansion string that has stressed and unstressed syllable positions in locations that correspond with the original texts syllable vowels. :param stresses list of syllable positions :param syllables_wspaces list of syllables with spaces escaped for punctuation or elision :param offset_map dictionary of syllable positions, and an offset amount which is the number of spaces to skip in the original line before inserting the accent. """ scansion = list(" " * len(string_utils.flatten(syllables_wspaces))) unstresses = string_utils.get_unstresses(stresses, len(syllables_wspaces)) try: for idx in unstresses: location = offset_map.get(idx) if location is not None: scansion[location] = self.constants.UNSTRESSED for idx in stresses: location = offset_map.get(idx) if location is not None: scansion[location] = self.constants.STRESSED except Exception as e: LOG.error( "problem with syllables; check syllabification {}, {}".format( syllables_wspaces, e)) return "".join(scansion) def flag_dipthongs(self, syllables: List[str]) -> List[int]: """ Return a list of syllables that contain a dipthong :param syllables: :return: """ long_positions = [] for idx, syl in enumerate(syllables): for dipthong in self.constants.DIPTHONGS: if dipthong in syllables[idx]: if not string_utils.starts_with_qu(syllables[idx]): long_positions.append(idx) return long_positions def elide(self, line: str, regexp: str, quantity: int = 1, offset: int = 0) -> str: """ Erase a section of a line, matching on a regex, pushing in a quantity of blank spaces, and jumping forward with an offset if necessary. If the elided vowel was strong, the vowel merged with takes on the stress. :param line: :param regexp: :param quantity: :param offset: :return: >>> print(VerseScanner().elide("uvae avaritia", r"[e]\s*[a]")) uv āvaritia >>> print(VerseScanner().elide("mare avaritia", r"[e]\s*[a]")) mar avaritia """ matcher = re.compile(regexp) positions = matcher.finditer(line) new_line = line for match in positions: (start, end) = match.span() # pylint: disable=unused-variable if (start > 0) and new_line[start - 1:start + 1] in self.constants.DIPTHONGS: vowel_to_coerce = new_line[end - 1] new_line = new_line[:(start - 1) + offset] + (" " * (quantity + 2)) + \ self.constants.stress_accent_dict[vowel_to_coerce] + new_line[end:] else: new_line = new_line[:start + offset] + \ (" " * quantity) + new_line[start + quantity + offset:] return new_line def correct_invalid_start(self, scansion: str) -> str: """ If a hexameter, hendecasyllables, or pentameter scansion starts with spondee, an unstressed syllable in the third position must actually be stressed, so we will convert it: - - | U -> - - | - :param scansion: :return: >>> print(VerseScanner().correct_invalid_start( ... " - - U U - - U U U U U U - -").strip()) - - - - - - U U U U U U - - """ mark_list = string_utils.mark_list(scansion) raw_scansion = scansion.replace(" ", "") if raw_scansion.startswith(self.constants.SPONDEE + self.constants.UNSTRESSED): new_scansion = list(self.constants.SPONDEE + self.constants.SPONDEE + raw_scansion[4:]) corrected = "".join(new_scansion) new_sequence = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_sequence[mark_list[idx]] = car return "".join(new_sequence) return scansion def correct_first_two_dactyls(self, scansion: str) -> str: """ If a hexameter or pentameter starts with spondee, an unstressed syllable in the third position must actually be stressed, so we will convert it: - - | U -> - - | - And/or if the starting pattern is spondee + trochee + stressed, then the unstressed trochee can be corrected: - - | - u | - -> - - | - -| - :param scansion: :return: >>> print(VerseScanner().correct_first_two_dactyls( ... " - - U U - - U U U U U U - -")) # doctest: +NORMALIZE_WHITESPACE - - - - - - U U U U U U - - """ mark_list = string_utils.mark_list(scansion) new_line = self.correct_invalid_start(scansion) raw_scansion = new_line.replace(" ", "") if raw_scansion.startswith(self.constants.SPONDEE + self.constants.TROCHEE + self.constants.STRESSED): new_scansion = list(self.constants.SPONDEE + self.constants.SPONDEE + self.constants.STRESSED + raw_scansion[5:]) corrected = "".join(new_scansion) new_sequence = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_sequence[mark_list[idx]] = car return "".join(new_sequence) return new_line def assign_candidate(self, verse: Verse, candidate: str) -> Verse: """ Helper method; make sure that the verse object is properly packaged. :param verse: :param candidate: :return: """ verse.scansion = candidate verse.valid = True verse.accented = self.formatter.merge_line_scansion( verse.original, verse.scansion) return verse
def ProcessLine(self, givenLine, df): syllabifier = Syllabifier() words = givenLine.find_all('word') line = givenLine['name'] for word in words: myWord = word.string mySyllables = syllabifier.syllabify(myWord.lower()) # We now want to split every syllable to match its scansion. item = word['sy'] n = 2 # print('syllable', [item[i:i+n] for i in range(0, len(item), n)]) myScansions = [item[i:i + n] for i in range(0, len(item), n)] # try: # # print('word boundary', word['wb']) # myWb = word['wb'] # except: # # print("empty field") # myWb = '' # try: # # print('metrical feature', word['mf']) # myMf = word['mf'] # except: # # print("empty field") # myMf = '' # print('-------------------------------') for i in range(len(mySyllables)): mySyllable = mySyllables[i] # To remove punctuation. mySyllable = mySyllable.translate( str.maketrans('', '', string.punctuation)) try: myScansion = myScansions[i] foot = myScansion[0] feet_pos = myScansion[1] # No metrical feature, so leave field empty myMf2 = '' except: myScansion = '' foot = feet_pos = '' # Add the reason for this emptiness myMf2 = myMf if feet_pos == 'A': length = 1 elif feet_pos == 'T': length = 1 elif feet_pos == 'b': length = 0 elif feet_pos == 'c': length = 0 elif feet_pos == '': length = -1 else: print('Error occured determining feet_pos of syllable') # Now, fill the dataframe: TODO: split length in foot and length newLine = { 'author': self.author, 'text': self.title, 'line': line, 'syllable': mySyllable, 'foot': foot, 'feet_pos': feet_pos, 'length': length } # newLine = {'author': self.author, 'text': self.title, 'line': line, 'syllable': mySyllable, 'foot': foot, 'feet_pos': feet_pos, # 'length': length, 'word_boundary': myWb, 'metrical_feature': myMf2} df = df.append(newLine, ignore_index=True) return df
class Scansion: """ Prepossesses Latin text for prose rhythm analysis. """ SHORT_VOWELS = ["a", "e", "i", "o", "u", "y"] LONG_VOWELS = ["ā", "ē", "ī", "ō", "ū"] VOWELS = SHORT_VOWELS + LONG_VOWELS DIPHTHONGS = ["ae", "au", "ei", "oe", "ui"] SINGLE_CONSONANTS = [ "b", "c", "d", "g", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "f", "j" ] DOUBLE_CONSONANTS = ["x", "z"] CONSONANTS = SINGLE_CONSONANTS + DOUBLE_CONSONANTS DIGRAPHS = ["ch", "ph", "th", "qu"] LIQUIDS = ["r", "l"] MUTES = ["b", "p", "d", "t", "c", "g"] MUTE_LIQUID_EXCEPTIONS = ["gl", "bl"] NASALS = ["m", "n"] SESTS = ["sc", "sm", "sp", "st", "z"] def __init__(self, punctuation=None, clausula_length=13, elide=True): if punctuation is None: self.punctuation = [".", "?", "!", ";", ":"] else: self.punctuation = punctuation self.clausula_length = clausula_length self.elide = elide self.syllabifier = Syllabifier() def _tokenize_syllables(self, word: str) -> List[Dict[str, Any]]: """ Tokenize syllables for word. "mihi" -> [{"syllable": "mi", index: 0, ... } ... ] Syllable properties: syllable: string -> syllable index: int -> postion in word long_by_nature: bool -> is syllable long by nature accented: bool -> does receive accent long_by_position: bool -> is syllable long by position :param word: string :return: list >>> Scansion()._tokenize_syllables("mihi") [{'syllable': 'mi', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'hi', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}] >>> Scansion()._tokenize_syllables("ivi") [{'syllable': 'i', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'vi', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}] >>> Scansion()._tokenize_syllables("audītū") [{'syllable': 'au', 'index': 0, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': False}, {'syllable': 'dī', 'index': 1, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'tū', 'index': 2, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': False}] >>> Scansion()._tokenize_syllables("ā") [{'syllable': 'ā', 'index': 0, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': True}] >>> Scansion()._tokenize_syllables("conjiciō") [{'syllable': 'con', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': False}, {'syllable': 'ji', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'ci', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}, {'syllable': 'ō', 'index': 3, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': False}] >>> Scansion()._tokenize_syllables("lingua") [{'syllable': 'lin', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}, {'syllable': 'gua', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}] >>> Scansion()._tokenize_syllables("abrante") [{'syllable': 'ab', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, 'mute+liquid'), 'accented': False}, {'syllable': 'ran', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}, {'syllable': 'te', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}] >>> Scansion()._tokenize_syllables("redemptor") [{'syllable': 'red', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}, {'syllable': 'em', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}, {'syllable': 'ptor', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}] >>> Scansion()._tokenize_syllables("nagrante") [{'syllable': 'na', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, 'mute+liquid'), 'accented': False}, {'syllable': 'gran', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}, {'syllable': 'te', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}] """ syllable_tokens = [] syllables = self.syllabifier.syllabify(word) longs = self.LONG_VOWELS + self.DIPHTHONGS for i, _ in enumerate(syllables): # basic properties syllable_dict = { "syllable": syllables[i], "index": i, "elide": (False, None) } # is long by nature if any(long in syllables[i] for long in longs): if syllables[i][:3] != "qui": syllable_dict["long_by_nature"] = True else: syllable_dict["long_by_nature"] = False else: syllable_dict["long_by_nature"] = False # long by position intra word if i < len(syllables) - 1 and \ syllable_dict["syllable"][-1] in self.CONSONANTS: if syllable_dict["syllable"][-1] in self.MUTES and syllables[i + \ 1][0] in self.LIQUIDS and syllable_dict["syllable"][-1] + syllables[i + 1][0] not in self.MUTE_LIQUID_EXCEPTIONS: syllable_dict["long_by_position"] = \ (False, "mute+liquid") elif syllable_dict["syllable"][-1] in self.DOUBLE_CONSONANTS or \ syllables[i + 1][0] in self.CONSONANTS: syllable_dict["long_by_position"] = (True, None) else: syllable_dict["long_by_position"] = (False, None) elif i < len(syllables) - 1 and syllable_dict["syllable"][-1] in \ self.VOWELS and len(syllables[i + 1]) > 1: if syllables[i + 1][0] in self.MUTES and syllables[i + 1][1] in self.LIQUIDS and syllables[i + \ 1][0] + syllables[i + 1][1] not in self.MUTE_LIQUID_EXCEPTIONS: syllable_dict["long_by_position"] = \ (False, "mute+liquid") elif syllables[i + 1][0] in self.CONSONANTS and syllables[i + 1][1] in \ self.CONSONANTS or syllables[i + 1][0] in self.DOUBLE_CONSONANTS: syllable_dict["long_by_position"] = (True, None) else: syllable_dict["long_by_position"] = (False, None) elif len(syllable_dict["syllable"]) > 2 and syllable_dict["syllable"][-1] in self.CONSONANTS and \ syllable_dict["syllable"][-2] in self.CONSONANTS and syllable_dict["syllable"][-3] in self.VOWELS: syllable_dict["long_by_position"] = (True, None) else: syllable_dict["long_by_position"] = (False, None) syllable_tokens.append(syllable_dict) # is accented if len(syllables) > 2 and i == len(syllables) - 2: if syllable_dict["long_by_nature"] or syllable_dict[ "long_by_position"][0]: syllable_dict["accented"] = True else: syllable_tokens[i - 1]["accented"] = True elif len(syllables) == 2 and i == 0 or len(syllables) == 1: syllable_dict["accented"] = True syllable_dict[ "accented"] = False if "accented" not in syllable_dict else True return syllable_tokens def _tokenize_words(self, sentence: str) -> List[Dict[str, Any]]: """ Tokenize words for sentence. "Puella bona est" -> [{word: puella, index: 0, ... }, ... ] Word properties: word: string -> word index: int -> position in sentence syllables: list -> list of syllable objects syllables_count: int -> number of syllables in word :param sentence: string :return: list >>> Scansion()._tokenize_words('dedērunt te miror antōnī quorum.') [{'word': 'dedērunt', 'index': 0, 'syllables': [{'syllable': 'de', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}, {'syllable': 'dē', 'index': 1, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'runt', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': False}], 'syllables_count': 3}, {'word': 'te', 'index': 1, 'syllables': [{'syllable': 'te', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}, {'word': 'miror', 'index': 2, 'syllables': [{'syllable': 'mi', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'ror', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}, {'word': 'antōnī', 'index': 3, 'syllables': [{'syllable': 'an', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': False}, {'syllable': 'tō', 'index': 1, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'nī', 'index': 2, 'elide': (False, None), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 3}, {'word': 'quorum.', 'index': 4, 'syllables': [{'syllable': 'quo', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'rum', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}] >>> Scansion()._tokenize_words('a spes co i no xe cta.') [{'word': 'a', 'index': 0, 'syllables': [{'syllable': 'a', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, 'sest'), 'accented': True}], 'syllables_count': 1}, {'word': 'spes', 'index': 1, 'syllables': [{'syllable': 'spes', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}, {'word': 'co', 'index': 2, 'syllables': [{'syllable': 'co', 'index': 0, 'elide': (True, 'weak'), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}, {'word': 'i', 'index': 3, 'syllables': [{'syllable': 'i', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}, {'word': 'no', 'index': 4, 'syllables': [{'syllable': 'no', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}, {'word': 'xe', 'index': 5, 'syllables': [{'syllable': 'xe', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}, {'word': 'cta.', 'index': 6, 'syllables': [{'syllable': 'cta', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}] >>> Scansion()._tokenize_words('x') [] >>> Scansion()._tokenize_words('atae amo.') [{'word': 'atae', 'index': 0, 'syllables': [{'syllable': 'a', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'tae', 'index': 1, 'elide': (True, 'strong'), 'long_by_nature': True, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}, {'word': 'amo.', 'index': 1, 'syllables': [{'syllable': 'a', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'mo', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}] >>> Scansion()._tokenize_words('bar rid.') [{'word': 'bar', 'index': 0, 'syllables': [{'syllable': 'bar', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}, {'word': 'rid.', 'index': 1, 'syllables': [{'syllable': 'rid', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}] >>> Scansion()._tokenize_words('ba brid.') [{'word': 'ba', 'index': 0, 'syllables': [{'syllable': 'ba', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, 'mute+liquid'), 'accented': True}], 'syllables_count': 1}, {'word': 'brid.', 'index': 1, 'syllables': [{'syllable': 'brid', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}], 'syllables_count': 1}] """ tokens = [] split_sent = [word for word in sentence.split(" ") if word != ''] for i, word in enumerate(split_sent): if len(word) == 1 and word not in self.VOWELS: break # basic properties word_dict = {"word": split_sent[i], "index": i} # syllables and syllables count word_dict["syllables"] = self._tokenize_syllables(split_sent[i]) word_dict["syllables_count"] = len(word_dict["syllables"]) if i != 0 and word_dict["syllables"][0]["syllable"][0] in \ self.VOWELS or i != 0 and \ word_dict["syllables"][0]["syllable"][0] == "h": last_syll_prev_word = tokens[i - 1]["syllables"][-1] if last_syll_prev_word["syllable"][-1] in \ self.LONG_VOWELS or \ last_syll_prev_word["syllable"][-1] == "m": last_syll_prev_word["elide"] = (True, "strong") elif len(last_syll_prev_word["syllable"]) > 1 and \ last_syll_prev_word["syllable"][-2:] in self.DIPHTHONGS: last_syll_prev_word["elide"] = (True, "strong") elif last_syll_prev_word["syllable"][-1] in self.SHORT_VOWELS: last_syll_prev_word["elide"] = (True, "weak") # long by position inter word if i > 0 and tokens[i - 1]["syllables"][-1]["syllable"][-1] in \ self.CONSONANTS and \ word_dict["syllables"][0]["syllable"][0] in self.CONSONANTS: # previous word ends in consonant and current word begins with # consonant tokens[i - 1]["syllables"][-1]["long_by_position"] = (True, None) elif i > 0 and tokens[i - 1]["syllables"][-1]["syllable"][-1] in \ self.VOWELS and \ word_dict["syllables"][0]["syllable"][0] in self.CONSONANTS: # previous word ends in vowel and current word begins in # consonant if any(sest in word_dict["syllables"][0]["syllable"] for sest in self.SESTS): # current word begins with sest tokens[i - \ 1]["syllables"][-1]["long_by_position"] = (False, "sest") elif word_dict["syllables"][0]["syllable"][0] in self.MUTES and \ word_dict["syllables"][0]["syllable"][1] in self.LIQUIDS: # current word begins with mute + liquid tokens[i - \ 1]["syllables"][-1]["long_by_position"] = (False, "mute+liquid") elif word_dict["syllables"][0]["syllable"][0] in \ self.DOUBLE_CONSONANTS or\ word_dict["syllables"][0]["syllable"][1] in self.CONSONANTS: # current word begins 2 consonants tokens[i - \ 1]["syllables"][-1]["long_by_position"] = (True, None) tokens.append(word_dict) return tokens def tokenize(self, text: str) -> List[Dict[str, Any]]: """ Tokenize text on supplied characters. "Puella bona est. Puer malus est." -> [ [{word: puella, syllables: [...], index: 0}, ... ], ... ] :return:list >>> Scansion().tokenize('puella bona est. puer malus est.') [{'plain_text_sentence': 'puella bona est', 'structured_sentence': [{'word': 'puella', 'index': 0, 'syllables': [{'syllable': 'pu', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}, {'syllable': 'el', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}, {'syllable': 'la', 'index': 2, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 3}, {'word': 'bona', 'index': 1, 'syllables': [{'syllable': 'bo', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'na', 'index': 1, 'elide': (True, 'weak'), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}, {'word': 'est', 'index': 2, 'syllables': [{'syllable': 'est', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}]}, {'plain_text_sentence': ' puer malus est', 'structured_sentence': [{'word': 'puer', 'index': 0, 'syllables': [{'syllable': 'pu', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'er', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': False}], 'syllables_count': 2}, {'word': 'malus', 'index': 1, 'syllables': [{'syllable': 'ma', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': True}, {'syllable': 'lus', 'index': 1, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (False, None), 'accented': False}], 'syllables_count': 2}, {'word': 'est', 'index': 2, 'syllables': [{'syllable': 'est', 'index': 0, 'elide': (False, None), 'long_by_nature': False, 'long_by_position': (True, None), 'accented': True}], 'syllables_count': 1}]}, {'plain_text_sentence': '', 'structured_sentence': []}] """ tokenized_sentences = text.split('.') tokenized_text = [] for sentence in tokenized_sentences: sentence_dict = {} # type: ignore sentence_dict["plain_text_sentence"] = sentence sentence_dict["structured_sentence"] = self._tokenize_words( sentence) # type: ignore tokenized_text.append(sentence_dict) return tokenized_text def scan_text(self, text: str) -> List[str]: """ Return a flat list of rhythms. Desired clausula length is passed as a parameter. Clausula shorter than the specified length can be exluded. :return: >>> Scansion().scan_text('dedērunt te miror antōnī quorum. sī quid est in mē ingenī jūdicēs quod sentiō.') ['u--uuu---ux', 'u---u--u---ux'] """ tokens = self.tokenize(text) clausulae = [] for sentence in tokens: sentence_clausula = [] # type: List[str] syllables = [ word['syllables'] for word in sentence['structured_sentence'] ] flat_syllables = [ syllable for word in syllables for syllable in word ] if self.elide: flat_syllables = [ syll for syll in flat_syllables if not syll['elide'][0] ][:-1][::-1] for syllable in flat_syllables: if len(sentence_clausula) < self.clausula_length - 1: if syllable['long_by_nature'] or syllable[ 'long_by_position'][0]: sentence_clausula.append('-') else: sentence_clausula.append('u') sentence_clausula = sentence_clausula[::-1] sentence_clausula.append('x') clausulae.append(''.join(sentence_clausula)) clausulae = clausulae[:-1] return clausulae
class PentameterScanner(VerseScanner): """The scansion symbols used can be configured by passing a suitable constants class to the constructor.""" def __init__(self, constants=None, syllabifier=None, optional_transform: bool = False, *args, **kwargs)->None: """ :param constants: None or a class that implements ScansionConstants :param syllabifier: None or a class that implements Syllabifier methods :param optional_tranform: boolean, whether or not to apply aggresive verse transformations. :param kwargs: """ super().__init__(*args, **kwargs) self.constants = ScansionConstants() if constants is None else constants self.syllabifier = Syllabifier() if syllabifier is None else syllabifier self.remove_punct_map = string_utils.remove_punctuation_dict() self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict() self.metrical_validator = MetricalValidator(self.constants) self.formatter = ScansionFormatter(self.constants) self.optional_transform = optional_transform self.inverted_amphibrach_re = re.compile( r"{}\s*{}\s*{}".format(self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)) self.syllable_matcher = re.compile(r"[{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS + self.constants.LIQUIDS + self.constants.MUTES)) self.SPONDAIC_PENTAMETER = self.constants.SPONDEE + self.constants.SPONDEE + \ self.constants.STRESSED + self.constants.DACTYL + \ self.constants.DACTYL + self.constants.OPTIONAL_ENDING self.DACTYLIC_PENTAMETER = self.constants.DACTYL + self.constants.DACTYL + \ self.constants.STRESSED + self.constants.DACTYL + \ self.constants.DACTYL + self.constants.OPTIONAL_ENDING def scan(self, original_line: str, optional_transform: bool = False) -> Verse: """ Scan a line of Latin pentameter and produce a scansion pattern, and other data. :param original_line: the original line of Latin verse :param optional_transform: whether or not to perform i to j transform for syllabification :return: a Verse object >>> scanner = PentameterScanner() >>> print(scanner.scan('ex hoc ingrato gaudia amore tibi.')) Verse(original='ex hoc ingrato gaudia amore tibi.', scansion='- - - - - - U U - U U U ', meter='pentameter', valid=True, syllable_count=12, accented='ēx hōc īngrātō gaudia amōre tibi.', scansion_notes=['Spondaic pentameter'], syllables = ['ēx', 'hoc', 'īn', 'gra', 'to', 'gau', 'di', 'a', 'mo', 're', 'ti', 'bi']) >>> print(scanner.scan( ... "in vento et rapida scribere oportet aqua.").scansion) # doctest: +NORMALIZE_WHITESPACE - - - U U - - U U - U U U """ verse = Verse(original_line, meter='pentameter') # replace punctuation with spaces line = original_line.translate(self.punctuation_substitutions) if optional_transform: working_line = self.transform_i_to_j_optional(line) verse.scansion_notes += [self.constants.NOTE_MAP["optional i to j"]] else: working_line = self.transform_i_to_j(line) # conservative i to j working_line = self.elide_all(working_line) verse.working_line = self.accent_by_position(working_line) verse.syllables = self.syllabifier.syllabify(verse.working_line) verse.syllable_count = self.syllabifier.get_syllable_count(verse.syllables) if verse.syllable_count < 12: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["< 12p"]] return verse stresses = self.flag_dipthongs(verse.syllables) syllables_wspaces = string_utils.to_syllables_with_trailing_spaces(verse.working_line, verse.syllables) offset_map = self.calc_offset(syllables_wspaces) for idx, syl in enumerate(verse.syllables): for accented in self.constants.ACCENTED_VOWELS: if accented in syl: stresses.append(idx) # first syllable is always long in Pentameter stresses.append(0) # second to last syllable is always long stresses.append(verse.syllable_count - 2) verse.scansion = self.produce_scansion(stresses, syllables_wspaces, offset_map) if len(string_utils.stress_positions(self.constants.STRESSED, verse.scansion)) != \ len(set(stresses)): verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["invalid syllables"]] return verse if self.metrical_validator.is_valid_pentameter(verse.scansion): verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]] return self.assign_candidate(verse, verse.scansion) # identify some obvious and probably choices based on number of syllables if verse.syllable_count == 12: # produce spondees where possible candidate = self.make_spondaic(verse.scansion) verse.scansion_notes += [self.constants.NOTE_MAP["12p"]] return self.assign_candidate(verse, candidate) if verse.syllable_count == 14: # produce spondees where possible candidate = self.make_dactyls(verse.scansion) verse.scansion_notes += [self.constants.NOTE_MAP["14p"]] return self.assign_candidate(verse, candidate) if verse.syllable_count > 14: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["> 14"]] return verse smoothed = self.correct_first_two_dactyls(verse.scansion) if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]] verse.scansion = smoothed stresses += string_utils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_pentameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_penultimate_dactyl_chain(verse.scansion) if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["penultimate dactyl chain"]] verse.scansion = smoothed stresses += string_utils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_pentameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) candidates = self.metrical_validator.closest_pentameter_patterns(verse.scansion) if candidates is not None: if len(candidates) == 1 \ and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \ and len(string_utils.differences(verse.scansion, candidates[0])) == 1: tmp_scansion = self.produce_scansion( string_utils.differences(verse.scansion, candidates[0]), syllables_wspaces, offset_map) if self.metrical_validator.is_valid_pentameter(tmp_scansion): verse.scansion_notes += [self.constants.NOTE_MAP["closest match"]] return self.assign_candidate(verse, tmp_scansion) # if the line doesn't scan "as is", it may scan if the optional i to j transformations # are made, so here we set them and try again. if self.optional_transform and not optional_transform and not verse.valid: return self.scan(original_line, optional_transform=True) verse.accented = self.formatter.merge_line_scansion(verse.original, verse.scansion) return verse def make_spondaic(self, scansion: str) -> str: """ If a pentameter line has 12 syllables, then it must start with double spondees. :param scansion: a string of scansion patterns :return: a scansion pattern string starting with two spondees >>> print(PentameterScanner().make_spondaic("U U U U U U U U U U U U")) - - - - - - U U - U U U """ mark_list = string_utils.mark_list(scansion) vals = list(scansion.replace(" ", "")) new_vals = self.SPONDAIC_PENTAMETER[:-1] + vals[-1] corrected = "".join(new_vals) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line) def make_dactyls(self, scansion: str) -> str: """ If a pentameter line has 14 syllables, it starts and ends with double dactyls. :param scansion: a string of scansion patterns :return: a scansion pattern string starting and ending with double dactyls >>> print(PentameterScanner().make_dactyls("U U U U U U U U U U U U U U")) - U U - U U - - U U - U U U """ mark_list = string_utils.mark_list(scansion) vals = list(scansion.replace(" ", "")) new_vals = self.DACTYLIC_PENTAMETER[:-1] + vals[-1] corrected = "".join(new_vals) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line) def correct_penultimate_dactyl_chain(self, scansion: str) -> str: """ For pentameter the last two feet of the verse are predictable dactyls, and do not regularly allow substitutions. :param scansion: scansion line thus far :return: corrected line of scansion >>> print(PentameterScanner().correct_penultimate_dactyl_chain( ... "U U U U U U U U U U U U U U")) U U U U U U U - U U - U U U """ mark_list = string_utils.mark_list(scansion) vals = list(scansion.replace(" ", "")) n_vals = vals[:-7] + [self.constants.DACTYL + self.constants.DACTYL] + [vals[-1]] corrected = "".join(n_vals) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line)
class HendecasyllableScanner(VerseScanner): """The scansion symbols used can be configured by passing a suitable constants class to the constructor.""" def __init__(self, constants=None, syllabifier=None, optional_tranform: bool = False, *args, **kwargs): """ :param constants: None or a class that implements ScansionConstants :param syllabifier: None or a class that implements Syllabifier methods :param optional_tranform: boolean, whether or not to apply aggresive verse transformations. :param kwargs: """ super().__init__(*args, **kwargs) self.constants = ScansionConstants( ) if constants is None else constants self.syllabifier = Syllabifier( ) if syllabifier is None else syllabifier self.remove_punct_map = string_utils.remove_punctuation_dict() self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict( ) self.metrical_validator = MetricalValidator(self.constants) self.formatter = ScansionFormatter(self.constants) self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format( self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)) self.syllable_matcher = re.compile( r"[{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS + self.constants.LIQUIDS + self.constants.MUTES)) self.optional_transform = optional_tranform def scan(self, original_line: str, optional_transform: bool = False) -> Verse: """ Scan a line of Latin hendecasyllables and produce a scansion pattern, and other data. :param original_line: the original line of Latin verse :param optional_transform: whether or not to perform i to j transform for syllabification :return: a Verse object >>> scanner = HendecasyllableScanner() >>> print(scanner.scan("Cui dono lepidum novum libellum")) Verse(original='Cui dono lepidum novum libellum', scansion=' - U - U U - U - U - U ', meter='hendecasyllable', valid=True, syllable_count=11, accented='Cui donō lepidūm novūm libēllum', scansion_notes=['Corrected invalid start.'], syllables = ['Cui', 'do', 'no', 'le', 'pi', 'dūm', 'no', 'vūm', 'li', 'bēl', 'lum']) >>> print(scanner.scan( ... "ārida modo pumice expolitum?").scansion) # doctest: +NORMALIZE_WHITESPACE - U - U U - U - U - U """ verse = Verse(original_line, meter='hendecasyllable') # replace punctuation with spaces line = original_line.translate(self.punctuation_substitutions) if optional_transform: working_line = self.transform_i_to_j_optional(line) verse.scansion_notes += [ self.constants.NOTE_MAP["optional i to j"] ] else: working_line = self.transform_i_to_j(line) # conservative i to j working_line = self.elide_all(working_line) verse.working_line = self.accent_by_position(working_line) verse.syllables = self.syllabifier.syllabify(verse.working_line) verse.syllable_count = self.syllabifier.get_syllable_count( verse.syllables) # identify some obvious and probably choices based on number of syllables if verse.syllable_count > 11: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["> 11"]] return verse if verse.syllable_count < 11: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["< 11"]] return verse stresses = self.flag_dipthongs(verse.syllables) syllables_wspaces = string_utils.to_syllables_with_trailing_spaces( verse.working_line, verse.syllables) offset_map = self.calc_offset(syllables_wspaces) for idx, syl in enumerate(verse.syllables): for accented in self.constants.ACCENTED_VOWELS: if accented in syl: stresses.append(idx) # second to last syllable is always long stresses.append(verse.syllable_count - 2) verse.scansion = self.produce_scansion(stresses, syllables_wspaces, offset_map) if len(string_utils.stress_positions(self.constants.STRESSED, verse.scansion)) != \ len(set(stresses)): verse.valid = False verse.scansion_notes += [ self.constants.NOTE_MAP["invalid syllables"] ] return verse if self.metrical_validator.is_valid_hendecasyllables(verse.scansion): verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]] return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_invalid_start(verse.scansion) if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]] verse.scansion = smoothed stresses += string_utils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hendecasyllables(verse.scansion): return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_antepenult_chain(verse.scansion) if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [ self.constants.NOTE_MAP["antepenult chain"] ] verse.scansion = smoothed stresses += string_utils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hendecasyllables(verse.scansion): return self.assign_candidate(verse, verse.scansion) candidates = self.metrical_validator.closest_hendecasyllable_patterns( verse.scansion) if candidates is not None: if len(candidates) == 1 \ and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \ and len(string_utils.differences(verse.scansion, candidates[0])) == 1: tmp_scansion = self.produce_scansion( string_utils.differences(verse.scansion, candidates[0]), syllables_wspaces, offset_map) if self.metrical_validator.is_valid_hendecasyllables( tmp_scansion): verse.scansion_notes += [ self.constants.NOTE_MAP["closest match"] ] return self.assign_candidate(verse, tmp_scansion) # if the line doesn't scan "as is", if may scan if the optional i to j transformations # are made, so here we set them and try again. if self.optional_transform and not optional_transform and not verse.valid: return self.scan(original_line, optional_transform=True) verse.accented = self.formatter.merge_line_scansion( verse.original, verse.scansion) return verse def correct_invalid_start(self, scansion: str) -> str: """ The third syllable of a hendecasyllabic line is long, so we will convert it. :param scansion: scansion string :return: scansion string with corrected start >>> print(HendecasyllableScanner().correct_invalid_start( ... "- U U U U - U - U - U").strip()) - U - U U - U - U - U """ mark_list = string_utils.mark_list(scansion) vals = list(scansion.replace(" ", "")) corrected = vals[:2] + [self.constants.STRESSED] + vals[3:] new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line) def correct_antepenult_chain(self, scansion: str) -> str: """ For hendecasyllables the last three feet of the verse are predictable and do not regularly allow substitutions. :param scansion: scansion line thus far :return: corrected line of scansion >>> print(HendecasyllableScanner().correct_antepenult_chain( ... "-U -UU UU UU UX").strip()) -U -UU -U -U -X """ mark_list = string_utils.mark_list(scansion) vals = list(scansion.replace(" ", "")) new_vals = vals[:len(vals) - 6] + [ self.constants.TROCHEE + self.constants.TROCHEE + self.constants.STRESSED ] + vals[-1:] corrected = "".join(new_vals) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line)
class HexameterScanner(VerseScanner): """The scansion symbols used can be configured by passing a suitable constants class to the constructor.""" def __init__(self, constants=None, syllabifier=None, optional_transform: bool = False, *args, **kwargs): """ :param constants: None or a class that implements ScansionConstants :param syllabifier: None or a class that implements Syllabifier methods :param optional_tranform: boolean, whether or not to apply aggresive verse transformations. :param kwargs: """ super().__init__(*args, **kwargs) self.constants = ScansionConstants( ) if constants is None else constants self.syllabifier = Syllabifier( ) if syllabifier is None else syllabifier self.remove_punct_map = string_utils.remove_punctuation_dict() self.punctuation_substitutions = string_utils.punctuation_for_spaces_dict( ) self.metrical_validator = MetricalValidator(self.constants) self.formatter = ScansionFormatter(self.constants) self.inverted_amphibrach_re = re.compile(r"{}\s*{}\s*{}".format( self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)) self.syllable_matcher = re.compile( r"[{}]".format(self.constants.VOWELS + self.constants.ACCENTED_VOWELS + self.constants.LIQUIDS + self.constants.MUTES)) self.optional_transform = optional_transform def scan(self, original_line: str, optional_transform: bool = False, dactyl_smoothing: bool = False) -> Verse: """ Scan a line of Latin hexameter and produce a scansion pattern, and other data. :param original_line: the original line of Latin verse :param optional_transform: whether or not to perform i to j transform for syllabification :param dactyl_smoothing: whether or not to perform dactyl smoothing :return: a Verse object >>> scanner = HexameterScanner() >>> print(HexameterScanner().scan( ... "ēxiguām sedēm pariturae tērra negavit").scansion) # doctest: +NORMALIZE_WHITESPACE - - - - - U U - - - U U - U >>> print(scanner.scan("impulerit. Tantaene animis caelestibus irae?")) Verse(original='impulerit. Tantaene animis caelestibus irae?', scansion='- U U - - - U U - - - U U - - ', meter='hexameter', valid=True, syllable_count=15, accented='īmpulerīt. Tāntaene animīs caelēstibus īrae?', scansion_notes=['Valid by positional stresses.'], syllables = ['īm', 'pu', 'le', 'rīt', 'Tān', 'taen', 'a', 'ni', 'mīs', 'cae', 'lēs', 'ti', 'bus', 'i', 'rae']) >>> print(scanner.scan( ... "Arma virumque cano, Troiae qui prīmus ab ōrīs").scansion) # doctest: +NORMALIZE_WHITESPACE - U U - U U - - - - - U U - - >>> # some hexameters need the optional transformations: >>> optional_transform_scanner = HexameterScanner(optional_transform=True) >>> print(optional_transform_scanner.scan( ... "Ītaliam, fāto profugus, Lāvīniaque vēnit").scansion) # doctest: +NORMALIZE_WHITESPACE - - - - - U U - - - U U - U >>> print(HexameterScanner().scan( ... "lītora, multum ille et terrīs iactātus et alto").scansion) # doctest: +NORMALIZE_WHITESPACE - U U - - - - - - - U U - U >>> print(HexameterScanner().scan( ... "vī superum saevae memorem Iūnōnis ob īram;").scansion) # doctest: +NORMALIZE_WHITESPACE - U U - - - U U - - - U U - U >>> # handle multiple elisions >>> print(scanner.scan("monstrum horrendum, informe, ingens, cui lumen ademptum").scansion) # doctest: +NORMALIZE_WHITESPACE - - - - - - - - - U U - U >>> # if we have 17 syllables, create a chain of all dactyls >>> print(scanner.scan("quadrupedante putrem sonitu quatit ungula campum" ... ).scansion) # doctest: +NORMALIZE_WHITESPACE - U U - U U - U U - U U - U U - U >>> # if we have 13 syllables exactly, we'll create a spondaic hexameter >>> print(HexameterScanner().scan( ... "illi inter sese multa vi bracchia tollunt").scansion) # doctest: +NORMALIZE_WHITESPACE - - - - - - - - - UU - - >>> print(HexameterScanner().scan( ... "dat latus; insequitur cumulo praeruptus aquae mons").scansion) # doctest: +NORMALIZE_WHITESPACE - U U - U U - U U - - - U U - - >>> print(optional_transform_scanner.scan( ... "Non quivis videt inmodulata poëmata iudex").scansion) # doctest: +NORMALIZE_WHITESPACE - - - U U - U U - U U- U U - - >>> print(HexameterScanner().scan( ... "certabant urbem Romam Remoramne vocarent").scansion) # doctest: +NORMALIZE_WHITESPACE - - - - - - - U U - U U - - >>> # advanced smoothing is available via keyword flags: dactyl_smoothing >>> print(HexameterScanner().scan( ... "his verbis: 'o gnata, tibi sunt ante ferendae", ... dactyl_smoothing=True).scansion) # doctest: +NORMALIZE_WHITESPACE - - - - - U U - - - U U - - >>> HexameterScanner().scan('Italiam non sponte sequor.') Verse(original='Italiam non sponte sequor.', scansion='', meter='hexameter', valid=False, syllable_count=9, accented='', scansion_notes=['Incomplete hexameter; not enough syllables.'], syllables = ['I', 'ta', 'li', 'ām', 'nōn', 'spōn', 'te', 'se', 'quor']) >>> HexameterScanner().scan('Phaselus ille, quem videtis, hospites') Verse(original='Phaselus ille, quem videtis, hospites', scansion=' - U U - - - U U U - - U ', meter='hexameter', valid=False, syllable_count=12, accented='', scansion_notes=['Inverted amphibrachs corrected.'], syllables = ['Pha', 'se', 'lus', 'īl', 'le', 'quēm', 'vi', 'de', 'tis', 'hōs', 'pi', 'tes']) """ verse = Verse(original_line, meter='hexameter') # replace punctuation with spaces line = original_line.translate(self.punctuation_substitutions) if optional_transform: working_line = self.transform_i_to_j_optional(line) verse.scansion_notes += [ self.constants.NOTE_MAP["optional i to j"] ] else: working_line = self.transform_i_to_j(line) # conservative i to j working_line = self.elide_all(working_line) verse.working_line = self.accent_by_position(working_line) verse.syllables = self.syllabifier.syllabify(verse.working_line) verse.syllable_count = self.syllabifier.get_syllable_count( verse.syllables) if verse.syllable_count < 12: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["< 12"]] return verse stresses = self.flag_dipthongs(verse.syllables) syllables_wspaces = string_utils.to_syllables_with_trailing_spaces( verse.working_line, verse.syllables) offset_map = self.calc_offset(syllables_wspaces) for idx, syl in enumerate(verse.syllables): for accented in self.constants.ACCENTED_VOWELS: if accented in syl: stresses.append(idx) # first syllable is always long in hexameter stresses.append(0) # second to last syllable is always long stresses.append(verse.syllable_count - 2) verse.scansion = self.produce_scansion(stresses, syllables_wspaces, offset_map) if len(string_utils.stress_positions(self.constants.STRESSED, verse.scansion)) != \ len(set(stresses)): verse.valid = False verse.scansion_notes += [ self.constants.NOTE_MAP["invalid syllables"] ] return verse if self.metrical_validator.is_valid_hexameter(verse.scansion): verse.scansion_notes += [self.constants.NOTE_MAP["positionally"]] return self.assign_candidate(verse, verse.scansion) # identify some obvious and probably choices based on number of syllables if verse.syllable_count == 17: # produce all dactyls candidate = self.produce_scansion( self.metrical_validator.hexameter_known_stresses(), syllables_wspaces, offset_map) verse.scansion_notes += [self.constants.NOTE_MAP["17"]] if self.metrical_validator.is_valid_hexameter(candidate): return self.assign_candidate(verse, candidate) if verse.syllable_count == 12: # create all spondee hexameter candidate = self.produce_scansion(list(range(12)), syllables_wspaces, offset_map) if self.metrical_validator.is_valid_hexameter(verse.scansion): verse.scansion_notes += [self.constants.NOTE_MAP["12"]] return self.assign_candidate(verse, candidate) if verse.syllable_count == 13: # create spondee hexameter with a dactyl at 5th foot known_unaccents = [9, 10] last_syllable_accented = False for vowel in self.constants.ACCENTED_VOWELS: if vowel in verse.syllables[12]: last_syllable_accented = True if not last_syllable_accented: known_unaccents.append(12) if set(known_unaccents) - set(stresses) != len(known_unaccents): verse.scansion = self.produce_scansion( [x for x in range(13) if x not in known_unaccents], syllables_wspaces, offset_map) verse.scansion_notes += [self.constants.NOTE_MAP["5th dactyl"]] if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) if verse.syllable_count > 17: verse.valid = False verse.scansion_notes += [self.constants.NOTE_MAP["> 17"]] return verse smoothed = self.correct_inverted_amphibrachs(verse.scansion) if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]] verse.scansion = smoothed stresses += string_utils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_first_two_dactyls(verse.scansion) if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["invalid start"]] verse.scansion = smoothed stresses += string_utils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) smoothed = self.correct_invalid_fifth_foot(verse.scansion) if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["invalid 5th"]] verse.scansion = smoothed stresses += string_utils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) feet = self.metrical_validator.hexameter_feet( verse.scansion.replace(" ", "")) if feet: # Normal good citizens are unwelcome in the house of hexameter invalid_feet_in_hexameter = [ self.constants.IAMB, self.constants.TROCHEE ] current_foot = 0 ending = feet.pop( ) # don't process the ending, a possible trochee, add it back after scanned_line = "" for foot in feet: if foot.replace(" ", "") in invalid_feet_in_hexameter: scanned_line = self.invalid_foot_to_spondee( feet, foot, current_foot) scanned_line = scanned_line + ending current_foot += 1 smoothed = self.produce_scansion( stresses + string_utils.stress_positions( self.constants.STRESSED, scanned_line), syllables_wspaces, offset_map) if self.metrical_validator.is_valid_hexameter(smoothed): verse.scansion_notes += [ self.constants.NOTE_MAP["invalid foot"] ] return self.assign_candidate(verse, smoothed) # need to do this again, since the scansion has changed smoothed = self.correct_inverted_amphibrachs(verse.scansion) if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]] verse.scansion = smoothed stresses += string_utils.differences(verse.scansion, smoothed) if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) candidates = self.metrical_validator.closest_hexameter_patterns( verse.scansion) if candidates is not None: if len(candidates) == 1 \ and len(verse.scansion.replace(" ", "")) == len(candidates[0]) \ and len(string_utils.differences(verse.scansion, candidates[0])) == 1: tmp_scansion = self.produce_scansion( string_utils.differences(verse.scansion, candidates[0]), syllables_wspaces, offset_map) if self.metrical_validator.is_valid_hexameter(tmp_scansion): verse.scansion_notes += [ self.constants.NOTE_MAP["closest match"] ] return self.assign_candidate(verse, tmp_scansion) # need to do this again, since the scansion has changed smoothed = self.correct_inverted_amphibrachs(smoothed) if self.metrical_validator.is_valid_hexameter(smoothed): verse.scansion_notes += [self.constants.NOTE_MAP["inverted"]] return self.assign_candidate(verse, smoothed) if dactyl_smoothing: smoothed = self.correct_dactyl_chain(smoothed) if Levenshtein.levenshtein_distance(verse.scansion, smoothed) > 0: verse.scansion_notes += [ self.constants.NOTE_MAP["dactyl smoothing"] ] verse.scansion = smoothed if self.metrical_validator.is_valid_hexameter(verse.scansion): return self.assign_candidate(verse, verse.scansion) # if the line doesn't scan "as is", if may scan if the optional i to j transformations # are made, so here we set them and try again. if self.optional_transform and not optional_transform and not verse.valid: return self.scan(original_line, optional_transform=True, dactyl_smoothing=True) return verse def correct_invalid_fifth_foot(self, scansion: str) -> str: """ The 'inverted amphibrach': stressed_unstressed_stressed syllable pattern is invalid in hexameters, so here we coerce it to stressed when it occurs at the end of a line :param scansion: the scansion pattern :return corrected scansion: the corrected scansion pattern >>> print(HexameterScanner().correct_invalid_fifth_foot( ... " - - - U U - U U U - - U U U - x")) # doctest: +NORMALIZE_WHITESPACE - - - U U - U U U - - - U U - x """ scansion_wo_spaces = scansion.replace( " ", "")[:-1] + self.constants.OPTIONAL_ENDING if scansion_wo_spaces.endswith(self.constants.DACTYL + self.constants.IAMB + self.constants.OPTIONAL_ENDING): matches = list( re.compile(r"{}\s*{}\s*{}\s*{}\s*{}".format( self.constants.STRESSED, self.constants.UNSTRESSED, self.constants.UNSTRESSED, self.constants.UNSTRESSED, self.constants.STRESSED)).finditer(scansion)) (start, end) = matches[len(matches) - 1].span() unstressed_idx = scansion.index(self.constants.UNSTRESSED, start) new_line = scansion[:unstressed_idx] + self.constants.STRESSED \ + scansion[unstressed_idx + 1:] return new_line return scansion def invalid_foot_to_spondee(self, feet: list, foot: str, idx: int) -> str: """ In hexameters, a single foot that is a unstressed_stressed syllable pattern is often just a double spondee, so here we coerce it to stressed. :param feet: list of string representations of meterical feet :param foot: the bad foot to correct :param idx: the index of the foot to correct :return: corrected scansion >>> print(HexameterScanner().invalid_foot_to_spondee( ... ['-UU', '--', '-U', 'U-', '--', '-UU'],'-U', 2)) # doctest: +NORMALIZE_WHITESPACE -UU----U----UU """ new_foot = foot.replace(self.constants.UNSTRESSED, self.constants.STRESSED) feet[idx] = new_foot return "".join(feet) def correct_dactyl_chain(self, scansion: str) -> str: """ Three or more unstressed accents in a row is a broken dactyl chain, best detected and processed backwards. Since this method takes a Procrustean approach to modifying the scansion pattern, it is not used by default in the scan method; however, it is available as an optional keyword parameter, and users looking to further automate the generation of scansion candidates should consider using this as a fall back. :param scansion: scansion with broken dactyl chain; inverted amphibrachs not allowed :return: corrected line of scansion >>> print(HexameterScanner().correct_dactyl_chain( ... "- U U - - U U - - - U U - x")) - - - - - U U - - - U U - x >>> print(HexameterScanner().correct_dactyl_chain( ... "- U U U U - - - - - U U - U")) # doctest: +NORMALIZE_WHITESPACE - - - U U - - - - - U U - U """ mark_list = string_utils.mark_list(scansion) vals = list(scansion.replace(" ", "")) # ignore last two positions, save them feet = [vals.pop(), vals.pop()] length = len(vals) idx = length - 1 while idx > 0: one = vals[idx] two = vals[idx - 1] if idx > 1: three = vals[idx - 2] else: three = "" # Dactyl foot is okay, no corrections if one == self.constants.UNSTRESSED and \ two == self.constants.UNSTRESSED and \ three == self.constants.STRESSED: feet += [one] feet += [two] feet += [three] idx -= 3 continue # Spondee foot is okay, no corrections if one == self.constants.STRESSED and \ two == self.constants.STRESSED: feet += [one] feet += [two] idx -= 2 continue # handle "U U U" foot as "- U U" if one == self.constants.UNSTRESSED and \ two == self.constants.UNSTRESSED and \ three == self.constants.UNSTRESSED: feet += [one] feet += [two] feet += [self.constants.STRESSED] idx -= 3 continue # handle "U U -" foot as "- -" if one == self.constants.STRESSED and \ two == self.constants.UNSTRESSED and \ three == self.constants.UNSTRESSED: feet += [self.constants.STRESSED] feet += [self.constants.STRESSED] idx -= 2 continue # handle "- U" foot as "- -" if one == self.constants.UNSTRESSED and \ two == self.constants.STRESSED: feet += [self.constants.STRESSED] feet += [two] idx -= 2 continue corrected = "".join(feet[::-1]) new_line = list(" " * len(scansion)) for idx, car in enumerate(corrected): new_line[mark_list[idx]] = car return "".join(new_line) def correct_inverted_amphibrachs(self, scansion: str) -> str: """ The 'inverted amphibrach': stressed_unstressed_stressed syllable pattern is invalid in hexameters, so here we coerce it to stressed: - U - -> - - - :param scansion: the scansion stress pattern :return: a string with the corrected scansion pattern >>> print(HexameterScanner().correct_inverted_amphibrachs( ... " - U - - U - U U U U - U - x")) # doctest: +NORMALIZE_WHITESPACE - - - - - - U U U U - - - x >>> print(HexameterScanner().correct_inverted_amphibrachs( ... " - - - U - - U U U U U- - U - x")) # doctest: +NORMALIZE_WHITESPACE - - - - - - U U U U U- - - - x >>> print(HexameterScanner().correct_inverted_amphibrachs( ... "- - - - - U - U U - U U - -")) # doctest: +NORMALIZE_WHITESPACE - - - - - - - U U - U U - - >>> print(HexameterScanner().correct_inverted_amphibrachs( ... "- UU- U - U - - U U U U- U")) # doctest: +NORMALIZE_WHITESPACE - UU- - - - - - U U U U- U """ new_line = scansion while list(self.inverted_amphibrach_re.finditer(new_line)): matches = list(self.inverted_amphibrach_re.finditer(new_line)) for match in matches: (start, end) = match.span() # pylint: disable=unused-variable unstressed_idx = new_line.index(self.constants.UNSTRESSED, start) new_line = new_line[:unstressed_idx] + \ self.constants.STRESSED + new_line[unstressed_idx + 1:] return new_line