Пример #1
0
 def load(self):
     if not self._loaded:
         self.indexFile = IndexFile(self.pos, self._filenameroot)
         path = nltk_data.find('wordnet/data.%s' %
                               self._filenameroot)
         self.dataFile = open(path, FILE_OPEN_MODE)
         self._loaded = True
Пример #2
0
def _morphy(form, pos=NOUN):
    pos = normalizePOS(pos)
    section = {NOUN: NOUN, VERB: VERB, ADJECTIVE: ADJECTIVE, ADVERB: ADVERB}[pos]
    excfile = open(nltk_data.find('wordnet/%s.exc' % section))
    substitutions = MORPHOLOGICAL_SUBSTITUTIONS[pos]
    dictionary=dictionaryFor(pos)
    collection=[]
    def trySubstitutions(form,                # reduced form
                         substitutions):      # remaining substitutions
        if dictionary.has_key(form):
            yield form
        for n,(old,new) in enumerate(substitutions):
            if form.endswith(old):
                new_form = form[:-len(old)] + new
                for f in trySubstitutions(new_form, substitutions[:n] +
                                                    substitutions[n+1:]):
                    yield f
            
    exceptions = binarySearchFile(excfile, form)
    if exceptions:
        forms = exceptions[exceptions.find(' ')+1:-1].split()
        for f in forms:
            yield f
    if pos == NOUN and form.endswith('ful'):
        suffix = 'ful'
        form = form[:-3]
    else:
        suffix = ''
    for f in trySubstitutions(form, substitutions):
        yield f + suffix
Пример #3
0
    def __init__(self, pos, filenameroot):
        """
        @type  pos: {string}
        @param pos: The part of speech of this index file e.g. 'noun'
        @type  filenameroot: {string}
        @param filenameroot: The base filename of the index file.
        """
        self.pos = pos
        path = nltk_data.find("wordnet/index.%s" % filenameroot)
        self.file = open(path, FILE_OPEN_MODE)

        # Table of (pathname, offset) -> (line, nextOffset)
        self.offsetLineCache = {}

        self.rewind()
Пример #4
0
def _morphy(form, pos=NOUN):
    pos = normalizePOS(pos)
    section = {
        NOUN: NOUN,
        VERB: VERB,
        ADJECTIVE: ADJECTIVE,
        ADVERB: ADVERB
    }[pos]
    excfile = open(nltk_data.find('wordnet/%s.exc' % section))
    substitutions = MORPHOLOGICAL_SUBSTITUTIONS[pos]
    dictionary = dictionaryFor(pos)
    collection = []

    def trySubstitutions(
            form,  # reduced form
            substitutions):  # remaining substitutions
        if dictionary.has_key(form):
            yield form
        for n, (old, new) in enumerate(substitutions):
            if form.endswith(old):
                new_form = form[:-len(old)] + new
                for f in trySubstitutions(
                        new_form, substitutions[:n] + substitutions[n + 1:]):
                    yield f

    exceptions = binarySearchFile(excfile, form)
    if exceptions:
        forms = exceptions[exceptions.find(' ') + 1:-1].split()
        for f in forms:
            yield f
    if pos == NOUN and form.endswith('ful'):
        suffix = 'ful'
        form = form[:-3]
    else:
        suffix = ''
    for f in trySubstitutions(form, substitutions):
        yield f + suffix
Пример #5
0
 def load(self):
     if not self._loaded:
         self.indexFile = IndexFile(self.pos, self._filenameroot)
         path = nltk_data.find('wordnet/data.%s' % self._filenameroot)
         self.dataFile = open(path, FILE_OPEN_MODE)
         self._loaded = True
Пример #6
0

try:
    from collections import defaultdict
except ImportError:

    class defaultdict(dict):
        """Dictionary with a default value for unknown keys."""

        def __init__(self, default):
            self.default = default

        def __getitem__(self, key):
            if key in self:
                return self.get(key)
            return self.setdefault(key, self.default())

        def __copy__(self):
            copy = defaultdict(self.default)
            copy.update(self)
            return copy


# Just a test to see if things are working...
if __name__ == "__main__":
    indexFile = IndexFile("noun", "noun")
    path = nltk_data.find("wordnet/data.noun")
    dataFile = open(path, FILE_OPEN_MODE)
    loaded = True
    print "OK"