def lookup(self, term): """Return a set of labels for those documents containing the search term.""" term = ourStrip(term) if term in self._hasWord: return set(self._hasWord[term]) # intentionally return a copy else: return set()
def getContext(self, word, maxOccur=10): """Return a string demonstrating occurrences of the word in context. Will show up to maxOccur distinct occurrences (default 10) """ word = ourStrip(word) # clean query term output = [] # build list of lines to output if word in self._wordAt: occurrences = self._wordAt[word] # list of line numbers for lineNum in occurrences[ : maxOccur]: # limit the number of reported results startContext = max(lineNum - 1, 0) # typically the previous line stopContext = min(lineNum + 2, len(self._lines)) output.append('-' * 40) output.extend(self._lines[startContext : stopContext]) return '\n'.join(output)
def __init__(self, contents, sourceLabel): """Construct a new index for the given document. contents a single string representing the complete contents sourceLabel a string which identifies the source of the contents """ self._lines = contents.split('\n') self._label= sourceLabel # Now build a dictionary on the apparent words of the file. # Each word is mapped to an ordered list of line numbers at which that word occurs self._wordAt = {} for linenum in range(len(self._lines)): words = self._lines[linenum].split() for w in words: w = ourStrip(w) if w: # not reduced to empty string if w not in self._wordAt: # this is first occurrence of the word self._wordAt[w] = [ linenum ] elif self._wordAt[w][-1] != linenum: # occurring on a new line for this word self._wordAt[w].append(linenum)