示例#1
0
 def lookup(self, term):
     """Return a set of labels for those documents containing the search term."""
     term = ourStrip(term)
     if term in self._hasWord:
         return set(self._hasWord[term])  # intentionally return a copy
     else:
         return set()
示例#2
0
  def getContext(self, word, maxOccur=10):
    """Return a string demonstrating occurrences of the word in context.

    Will show up to maxOccur distinct occurrences (default 10)
    """
    word = ourStrip(word)                         # clean query term
    output = []                                   # build list of lines to output
    if word in self._wordAt:
      occurrences = self._wordAt[word]            # list of line numbers
      for lineNum in occurrences[ : maxOccur]:    # limit the number of reported results
        startContext = max(lineNum - 1, 0)        # typically the previous line
        stopContext = min(lineNum + 2, len(self._lines))
        output.append('-' * 40)
        output.extend(self._lines[startContext : stopContext])
    return '\n'.join(output)
示例#3
0
  def __init__(self, contents, sourceLabel):
    """Construct a new index for the given document.

    contents         a single string representing the complete contents
    sourceLabel      a string which identifies the source of the contents
    """
    self._lines = contents.split('\n')
    self._label= sourceLabel

    # Now build a dictionary on the apparent words of the file.
    # Each word is mapped to an ordered list of line numbers at which that word occurs
    self._wordAt = {}
    for linenum in range(len(self._lines)):
      words = self._lines[linenum].split()
      for w in words:
        w = ourStrip(w)
        if w:                                     # not reduced to empty string
          if w not in self._wordAt:               # this is first occurrence of the word
            self._wordAt[w] = [ linenum ]
          elif self._wordAt[w][-1] != linenum:    # occurring on a new line for this word
            self._wordAt[w].append(linenum)