def __look_up(self): """ Look up self.word in the dictionaries. Return a set of WordScansion objects :return: """ key = multireplace(self.word, {"v": "u", "j": "i"}) scansions = {WordScansion(x, False) for x in Word.MORPHEUS_DICT[key]} hasMorpheusEntries = len(scansions) != 0 mqdq_entry = Word.MQDQ_DICT.look_up(key) for scansion in mqdq_entry.keys(): if sum([ Scansion(scansion).matches(Scansion(x.scansion)) for x in scansions ]) != 0: continue # do not consider scansion options that already exist # TODO check how well this works if hasMorpheusEntries and ( len(mqdq_entry[scansion].keys()) < Word.AUTHOR_COUNT or sum(mqdq_entry[scansion].values()) < Word.TOTAL_COUNT or "*" in scansion): continue # do not consider infrequent scansions # TODO should (or "*" in scansion) be added here if not hasMorpheusEntries: # make final syllable unknown scansion = re.sub("[\^_]([^\^_*[\]()]*)$", r"*\1", scansion) scansions.add(WordScansion(scansion, True)) return scansions
def update_flags(self, scansion): """ Given that the scansion chosen corresponds to particular way the words are macronized, record any irregularities in word macronizations in self.flags :param scansion: a scansion (word macronizations separated by " ") :return: None """ for i, macrons in enumerate(scansion.scansion.lstrip(" ").rstrip(" ").split(" ")): if self.words[i].is_mqdq_only(Scansion(macrons)): self.flags.append("Mqdq only scansion: " + macrons) elif self.words[i].is_morpheus_only(Scansion(macrons)): self.flags.append("Morpheus only scansion: " + macrons) if self.words[i].is_new: self.flags.append("A previuosly unencountered word: " + macrons)
def is_mqdq_only(self, scansion): if self.is_new: return False for option in self.scansions: if Scansion(option.scansion + self.postfix).matches(scansion) and not option.isMqDq: return False return True
def __macronize(self): """ Populate self.macronizations, with all possible ways the line can be macronized :return: """ self.macronizations = [Scansion("")] for i, word in enumerate(self.words): new_macrons = [] for exist in self.macronizations: for macrons in word.macronize(): new_macrons.append(exist + macrons) self.macronizations = new_macrons
def compare_scansions(self, scansion1, scansion2): """ Compare two ways of scanning a word by calculating how many mqdq entries match the given scansions :param scansion1: :param scansion2: :return: """ if (len(self.postfix) != 0) and ("(" not in self.postfix): scansion1 = Scansion(scansion1).pattern[:-1] scansion2 = Scansion(scansion2).pattern[:-1] key = multireplace(self.word, {"v": "u", "j": "i"}) mqdq_entries = Word.MQDQ_DICT.look_up(key) s1_count, s2_count = 0, 0 for entry in mqdq_entries.keys(): mqdq_scansion = WordScansion(entry, True) self.__process(mqdq_scansion, self.next_word_prefix) matches1 = Scansion(mqdq_scansion.scansion).matches( Scansion(scansion1)) matches2 = Scansion(mqdq_scansion.scansion).matches( Scansion(scansion2)) if matches1 and not matches2: s1_count += sum(mqdq_entries[entry].values()) elif not matches1 and matches2: s2_count += sum(mqdq_entries[entry].values()) if s1_count + s2_count == 0: return 0.5, 0.5 if s1_count == 0: s1_count = 1 s2_count += 1 elif s2_count == 0: s2_count = 1 s1_count += 1 return s1_count / (s1_count + s2_count), s2_count / (s1_count + s2_count)
def read_manual_file(filename): """ Populate Verse.DICT with manually made scansions :param filename: the name of the file from which to extract the manual scansions :return: None """ if not filename: return print("Loading manual scansions...") with open(filename, "r") as file: lines = file.readlines() lines = [line.rstrip("\n").split("\t") for line in lines] for line in lines: verse_key = Verse.get_verse_key(line[0]) scansion = re.sub(r"([^a-z_\^*\[\]()])", " ", line[0].lower()).rstrip(" ").lstrip(" ") scansion = Scansion(re.sub(" +", " ", scansion)) if len(line) == 1: Verse.DICT[verse_key] = {"scansion": scansion, "comment": ""} else: Verse.DICT[verse_key] = {"scansion": scansion, "comment": line[1]}
def analyse(data): """ Analyze the scanned data and record various statistics such as elision occurrence :param data: :return: a dictionary with various statistical measurements """ print("Analysis in progress...") stats = defaultdict(dict) dummy_func = lambda *args: None # function used if look up of a function fails for verse in tqdm(data.values()): meter = Meter.METERS[verse["meter"]] decomposition = meter.decompose(Scansion(verse["scansion"]), turn_off_assertions=True) if len(decomposition) != 1: warnings.warn("Multiple ways to decompose a scansion!") decomposition = decomposition[0] record_global(decomposition, verse, stats["global"]) # record global statistics globals().get("record_" + meter.name, dummy_func)(decomposition, verse, stats[meter.name]) for key in stats: globals().get("finalize_" + key, dummy_func)(stats[key]) return stats
def macronize(self): return [Scansion(x.scansion + self.postfix) for x in self.scansions]