def load(self): if not self._loaded: self.indexFile = IndexFile(self.pos, self._filenameroot) path = nltk_data.find('wordnet/data.%s' % self._filenameroot) self.dataFile = open(path, FILE_OPEN_MODE) self._loaded = True
def _morphy(form, pos=NOUN): pos = normalizePOS(pos) section = {NOUN: NOUN, VERB: VERB, ADJECTIVE: ADJECTIVE, ADVERB: ADVERB}[pos] excfile = open(nltk_data.find('wordnet/%s.exc' % section)) substitutions = MORPHOLOGICAL_SUBSTITUTIONS[pos] dictionary=dictionaryFor(pos) collection=[] def trySubstitutions(form, # reduced form substitutions): # remaining substitutions if dictionary.has_key(form): yield form for n,(old,new) in enumerate(substitutions): if form.endswith(old): new_form = form[:-len(old)] + new for f in trySubstitutions(new_form, substitutions[:n] + substitutions[n+1:]): yield f exceptions = binarySearchFile(excfile, form) if exceptions: forms = exceptions[exceptions.find(' ')+1:-1].split() for f in forms: yield f if pos == NOUN and form.endswith('ful'): suffix = 'ful' form = form[:-3] else: suffix = '' for f in trySubstitutions(form, substitutions): yield f + suffix
def __init__(self, pos, filenameroot): """ @type pos: {string} @param pos: The part of speech of this index file e.g. 'noun' @type filenameroot: {string} @param filenameroot: The base filename of the index file. """ self.pos = pos path = nltk_data.find("wordnet/index.%s" % filenameroot) self.file = open(path, FILE_OPEN_MODE) # Table of (pathname, offset) -> (line, nextOffset) self.offsetLineCache = {} self.rewind()
def _morphy(form, pos=NOUN): pos = normalizePOS(pos) section = { NOUN: NOUN, VERB: VERB, ADJECTIVE: ADJECTIVE, ADVERB: ADVERB }[pos] excfile = open(nltk_data.find('wordnet/%s.exc' % section)) substitutions = MORPHOLOGICAL_SUBSTITUTIONS[pos] dictionary = dictionaryFor(pos) collection = [] def trySubstitutions( form, # reduced form substitutions): # remaining substitutions if dictionary.has_key(form): yield form for n, (old, new) in enumerate(substitutions): if form.endswith(old): new_form = form[:-len(old)] + new for f in trySubstitutions( new_form, substitutions[:n] + substitutions[n + 1:]): yield f exceptions = binarySearchFile(excfile, form) if exceptions: forms = exceptions[exceptions.find(' ') + 1:-1].split() for f in forms: yield f if pos == NOUN and form.endswith('ful'): suffix = 'ful' form = form[:-3] else: suffix = '' for f in trySubstitutions(form, substitutions): yield f + suffix
try: from collections import defaultdict except ImportError: class defaultdict(dict): """Dictionary with a default value for unknown keys.""" def __init__(self, default): self.default = default def __getitem__(self, key): if key in self: return self.get(key) return self.setdefault(key, self.default()) def __copy__(self): copy = defaultdict(self.default) copy.update(self) return copy # Just a test to see if things are working... if __name__ == "__main__": indexFile = IndexFile("noun", "noun") path = nltk_data.find("wordnet/data.noun") dataFile = open(path, FILE_OPEN_MODE) loaded = True print "OK"