Пример #1
0
 def __look_up(self):
     """
     Look up self.word in the dictionaries. Return a set of WordScansion objects
     :return:
     """
     key = multireplace(self.word, {"v": "u", "j": "i"})
     scansions = {WordScansion(x, False) for x in Word.MORPHEUS_DICT[key]}
     hasMorpheusEntries = len(scansions) != 0
     mqdq_entry = Word.MQDQ_DICT.look_up(key)
     for scansion in mqdq_entry.keys():
         if sum([
                 Scansion(scansion).matches(Scansion(x.scansion))
                 for x in scansions
         ]) != 0:
             continue  # do not consider scansion options that already exist
             # TODO check how well this works
         if hasMorpheusEntries and (
                 len(mqdq_entry[scansion].keys()) < Word.AUTHOR_COUNT
                 or sum(mqdq_entry[scansion].values()) < Word.TOTAL_COUNT
                 or "*" in scansion):
             continue  # do not consider infrequent scansions
             # TODO should (or "*" in scansion) be added here
         if not hasMorpheusEntries:
             # make final syllable unknown
             scansion = re.sub("[\^_]([^\^_*[\]()]*)$", r"*\1", scansion)
         scansions.add(WordScansion(scansion, True))
     return scansions
Пример #2
0
 def update_flags(self, scansion):
     """
     Given that the scansion chosen corresponds to particular way the words are macronized,
     record any irregularities in word macronizations in self.flags
     :param scansion:    a scansion (word macronizations separated by " ")
     :return:            None
     """
     for i, macrons in enumerate(scansion.scansion.lstrip(" ").rstrip(" ").split(" ")):
         if self.words[i].is_mqdq_only(Scansion(macrons)):
             self.flags.append("Mqdq only scansion: " + macrons)
         elif self.words[i].is_morpheus_only(Scansion(macrons)):
             self.flags.append("Morpheus only scansion: " + macrons)
         if self.words[i].is_new:
             self.flags.append("A previuosly unencountered word: " + macrons)
Пример #3
0
 def is_mqdq_only(self, scansion):
     if self.is_new:
         return False
     for option in self.scansions:
         if Scansion(option.scansion +
                     self.postfix).matches(scansion) and not option.isMqDq:
             return False
     return True
Пример #4
0
 def __macronize(self):
     """
     Populate self.macronizations, with all possible ways the line can be macronized
     :return:
     """
     self.macronizations = [Scansion("")]
     for i, word in enumerate(self.words):
         new_macrons = []
         for exist in self.macronizations:
             for macrons in word.macronize():
                 new_macrons.append(exist + macrons)
         self.macronizations = new_macrons
Пример #5
0
 def compare_scansions(self, scansion1, scansion2):
     """
     Compare two ways of scanning a word by calculating how many mqdq entries match the given
     scansions
     :param scansion1:
     :param scansion2:
     :return:
     """
     if (len(self.postfix) != 0) and ("(" not in self.postfix):
         scansion1 = Scansion(scansion1).pattern[:-1]
         scansion2 = Scansion(scansion2).pattern[:-1]
     key = multireplace(self.word, {"v": "u", "j": "i"})
     mqdq_entries = Word.MQDQ_DICT.look_up(key)
     s1_count, s2_count = 0, 0
     for entry in mqdq_entries.keys():
         mqdq_scansion = WordScansion(entry, True)
         self.__process(mqdq_scansion, self.next_word_prefix)
         matches1 = Scansion(mqdq_scansion.scansion).matches(
             Scansion(scansion1))
         matches2 = Scansion(mqdq_scansion.scansion).matches(
             Scansion(scansion2))
         if matches1 and not matches2:
             s1_count += sum(mqdq_entries[entry].values())
         elif not matches1 and matches2:
             s2_count += sum(mqdq_entries[entry].values())
     if s1_count + s2_count == 0:
         return 0.5, 0.5
     if s1_count == 0:
         s1_count = 1
         s2_count += 1
     elif s2_count == 0:
         s2_count = 1
         s1_count += 1
     return s1_count / (s1_count + s2_count), s2_count / (s1_count +
                                                          s2_count)
Пример #6
0
 def read_manual_file(filename):
     """
     Populate Verse.DICT with manually made scansions
     :param filename:    the name of the file from which to extract the manual scansions
     :return:            None
     """
     if not filename:
         return
     print("Loading manual scansions...")
     with open(filename, "r") as file:
         lines = file.readlines()
     lines = [line.rstrip("\n").split("\t") for line in lines]
     for line in lines:
         verse_key = Verse.get_verse_key(line[0])
         scansion = re.sub(r"([^a-z_\^*\[\]()])", " ", line[0].lower()).rstrip(" ").lstrip(" ")
         scansion = Scansion(re.sub(" +", " ", scansion))
         if len(line) == 1:
             Verse.DICT[verse_key] = {"scansion": scansion, "comment": ""}
         else:
             Verse.DICT[verse_key] = {"scansion": scansion, "comment": line[1]}
Пример #7
0
def analyse(data):
    """
    Analyze the scanned data and record various statistics such as elision occurrence
    :param data:
    :return: a dictionary with various statistical measurements
    """
    print("Analysis in progress...")
    stats = defaultdict(dict)
    dummy_func = lambda *args: None  # function used if look up of a function fails
    for verse in tqdm(data.values()):
        meter = Meter.METERS[verse["meter"]]
        decomposition = meter.decompose(Scansion(verse["scansion"]),
                                        turn_off_assertions=True)
        if len(decomposition) != 1:
            warnings.warn("Multiple ways to decompose a scansion!")
        decomposition = decomposition[0]
        record_global(decomposition, verse,
                      stats["global"])  # record global statistics
        globals().get("record_" + meter.name, dummy_func)(decomposition, verse,
                                                          stats[meter.name])
    for key in stats:
        globals().get("finalize_" + key, dummy_func)(stats[key])
    return stats
Пример #8
0
 def macronize(self):
     return [Scansion(x.scansion + self.postfix) for x in self.scansions]