Exemplo n.º 1
0
def load_coded_dictionary(file_or_stream):
    "Load a dictionary which is split by the codes within it."
    if isinstance(file_or_stream, (str, unicode)):
        file_or_stream = open(file_or_stream)

    word_to_entry = {}
    coded_dict = defaultdict(dict)
    for entry in iter_entries(file_or_stream):
        word = entry.word

        # Merge any word entries with the same graphical form.
        if word in word_to_entry:
            e = word_to_entry[word]
            e.update(entry)
            entry = e
        else:
            word_to_entry[word] = entry

        codes = set()
        for sense in entry.senses:
            codes.update(get_codes(sense))

        # Create a subset of the dictionary for each code.
        for code in codes:
            # Note that overwriting is ok, since we already merged any
            # coincidental forms.
            coded_dict[code][word] = entry

    return coded_dict
Exemplo n.º 2
0
def iter_coded_entries(filename):
    """
    Returns an iterator over dictionary entries in the filename, with their
    parsed codes, in (entry, code_list) pairs.
    """
    for entry in iter_entries(filename):
        codes = set()
        for sense in entry.senses:
            codes.update(get_codes(sense))
        yield entry, codes