示例#1
0
    def postings(self, fieldname, text, exclude_docs=frozenset(), scorer=None):
        self._test_field(fieldname)
        format = self.format(fieldname)
        try:
            offset = self.termsindex[(fieldname, text)][1]
        except KeyError:
            raise TermNotFound("%s:%r" % (fieldname, text))

        if self.segment.deleted and exclude_docs:
            exclude_docs = self.segment.deleted | exclude_docs
        elif self.segment.deleted:
            exclude_docs = self.segment.deleted

        if isinstance(offset, (int, long)):
            postreader = FilePostingReader(self.postfile, offset, format,
                                           scorer=scorer, fieldname=fieldname,
                                           text=text)
        else:
            docids, weights, values = offset
            postreader = ListMatcher(docids, weights, values, format, scorer)
        
        if exclude_docs:
            postreader = ExcludeMatcher(postreader, exclude_docs)
            
        return postreader
示例#2
0
    def matcher(self, fieldname, text, format_, scorer=None):
        # Note this does not filter out deleted documents; a higher level is
        # expected to wrap this matcher to eliminate deleted docs
        pf = self.postfile
        term = (fieldname, text)
        try:
            terminfo = self[term]
        except KeyError:
            raise TermNotFound("No term %s:%r" % (fieldname, text))

        p = terminfo.postings
        if isinstance(p, integer_types):
            # terminfo.postings is an offset into the posting file
            pr = PostingMatcher(pf, p, format_, scorer=scorer, term=term)
        else:
            # terminfo.postings is an inlined tuple of (ids, weights, values)
            docids, weights, values = p
            pr = ListMatcher(docids,
                             weights,
                             values,
                             format_,
                             scorer=scorer,
                             term=term,
                             terminfo=terminfo)
        return pr
示例#3
0
    def postings(self, fieldname, text, scorer=None):
        try:
            terminfo = self.termsindex[fieldname, text]
        except KeyError:
            raise TermNotFound("%s:%r" % (fieldname, text))

        format = self.schema[fieldname].format
        postings = terminfo.postings
        if isinstance(postings, integer_types):
            postreader = FilePostingReader(self.postfile,
                                           postings,
                                           format,
                                           scorer=scorer,
                                           term=(fieldname, text))
        else:
            docids, weights, values = postings
            postreader = ListMatcher(docids,
                                     weights,
                                     values,
                                     format,
                                     scorer=scorer,
                                     term=(fieldname, text),
                                     terminfo=terminfo)

        deleted = self.segment.deleted
        if deleted:
            postreader = FilterMatcher(postreader, deleted, exclude=True)

        return postreader
示例#4
0
    def postings(self, fieldname, text, scorer=None):
        try:
            offset = self.termsindex[fieldname, text][1]
        except KeyError:
            raise TermNotFound("%s:%r" % (fieldname, text))

        format = self.schema[fieldname].format
        if isinstance(offset, (int, long)):
            postreader = FilePostingReader(self.postfile,
                                           offset,
                                           format,
                                           scorer=scorer,
                                           fieldname=fieldname,
                                           text=text)
        else:
            docids, weights, values, maxwol, minlength = offset
            postreader = ListMatcher(docids,
                                     weights,
                                     values,
                                     format,
                                     scorer,
                                     maxwol=maxwol,
                                     minlength=minlength)

        deleted = self.segment.deleted
        if deleted:
            postreader = FilterMatcher(postreader, deleted, exclude=True)

        return postreader
示例#5
0
 def terms_from(self, fieldname, prefix):
     if fieldname not in self._invindex:
         raise TermNotFound("Unknown field %r" % (fieldname, ))
     terms = sorted(self._invindex[fieldname])
     if not terms:
         return
     start = bisect_left(terms, prefix)
     for i in range(start, len(terms)):
         yield (fieldname, terms[i])
示例#6
0
    def vector(self, docnum, fieldname):
        if fieldname not in self.schema:
            raise TermNotFound("No  field %r" % fieldname)
        vformat = self.schema[fieldname].vector
        if not vformat:
            raise Exception("No vectors are stored for field %r" % fieldname)

        self._open_vectors()
        return self._vectors.matcher(docnum, fieldname, vformat)
示例#7
0
 def postings(self, fieldname, text, scorer=None):
     if fieldname not in self.schema:
         raise TermNotFound("No  field %r" % fieldname)
     format_ = self.schema[fieldname].format
     matcher = self._terms.matcher(fieldname, text, format_, scorer=scorer)
     deleted = self.segment.deleted
     if deleted:
         matcher = FilterMatcher(matcher, deleted, exclude=True)
     return matcher
示例#8
0
    def vector(self, docnum, fieldname):
        if fieldname not in self.schema:
            raise TermNotFound("No  field %r" % fieldname)
        vformat = self.schema[fieldname].vector
        if not vformat:
            raise Exception("No vectors are stored for field %r" % fieldname)

        vformat = self.schema[fieldname].vector
        ids, weights, values = zip_(*self.vectors[docnum, fieldname])
        return ListMatcher(ids, weights, values, format=vformat)
示例#9
0
 def first_id(self, fieldname, text):
     try:
         plist = self.invindex[fieldname][text]
     except KeyError:
         raise TermNotFound((fieldname, text))
     else:
         deleted = self.deleted
         for x in plist:
             docnum = x[0]
             if docnum not in deleted:
                 return docnum
示例#10
0
    def postings(self, fieldname, text, scorer=None):
        try:
            postings = self.invindex[fieldname][text]
        except KeyError:
            raise TermNotFound((fieldname, text))

        excludeset = self.deleted
        format = self.schema[fieldname].format
        if excludeset:
            postings = [x for x in postings if x[0] not in excludeset]
            if not postings:
                return NullMatcher()
        ids, weights, values = zip(*postings)
        return ListMatcher(ids, weights, values, format=format)
示例#11
0
 def first_id(self, fieldname, text):
     # Override to not construct a posting reader, just pull the first
     # non-deleted docnum out of the list directly
     self._test_field(fieldname)
     try:
         plist = self.invindex[fieldname][text]
     except KeyError:
         raise TermNotFound((fieldname, text))
     else:
         deleted = self.deleted
         for x in plist:
             docnum = x[0]
             if docnum not in deleted:
                 return docnum
示例#12
0
    def vector(self, docnum, fieldname):
        if fieldname not in self.schema:
            raise TermNotFound("No  field %r" % fieldname)
        vformat = self.schema[fieldname].vector
        if not vformat:
            raise Exception("No vectors are stored for field %r" % fieldname)

        self._open_vectors()
        offset = self.vectorindex.get((docnum, fieldname))
        if offset is None:
            raise Exception("No vector found for document"
                            " %s field %r" % (docnum, fieldname))

        return FilePostingReader(self.vpostfile, offset, vformat, stringids=True)
示例#13
0
    def matcher(self, fieldname, btext, format_, scorer=None):
        if not self._find_term(fieldname, btext):
            raise TermNotFound((fieldname, btext))

        ids = []
        weights = []
        values = []
        c = self._find_line(3, "POST")
        while c is not None:
            ids.append(c["dn"])
            weights.append(c["w"])
            values.append(c["v"])
            c = self._find_line(3, "POST")

        return ListMatcher(ids, weights, values, format_, scorer=scorer)
示例#14
0
    def postings(self, fieldname, text, scorer=None):
        self._test_field(fieldname)
        try:
            terminfo = self.term_info(fieldname, text)
        except KeyError:
            raise TermNotFound((fieldname, text))

        format = self.schema[fieldname].format
        postings = self.invindex[fieldname][text]
        excludeset = self.deleted
        if excludeset:
            postings = [x for x in postings if x[0] not in excludeset]
            if not postings:
                return NullMatcher()
        ids, weights, values = zip_(*postings)
        lm = ListMatcher(ids, weights, values, format=format, scorer=scorer,
                         term=(fieldname, text), terminfo=terminfo)
        return lm
示例#15
0
    def postings(self, fieldid, text, exclude_docs=None):
        schema = self.schema
        fieldnum = schema.to_number(fieldid)
        format = schema[fieldnum].format

        try:
            totalfreq, offset, postcount = self.termtable[(
                fieldnum, text)]  #@UnusedVariable
        except KeyError:
            raise TermNotFound("%s:%r" % (fieldid, text))

        if self.segment.deleted and exclude_docs:
            exclude_docs = self.segment.deleted | exclude_docs
        elif self.segment.deleted:
            exclude_docs = self.segment.deleted

        postreader = FilePostingReader(self.postfile, offset, format)
        if exclude_docs:
            postreader = Exclude(postreader, exclude_docs)
        return postreader
示例#16
0
    def postings(self, fieldid, text, exclude_docs=frozenset()):
        schema = self.schema
        fieldnum = schema.to_number(fieldid)
        format = schema[fieldnum].format

        try:
            totalfreq, offset, postcount = self.termtable[(
                fieldnum, text)]  #@UnusedVariable
        except KeyError:
            raise TermNotFound("%s:%r" % (fieldid, text))

        if self.segment.deleted and exclude_docs:
            exclude_docs = self.segment.deleted | exclude_docs
        elif self.segment.deleted:
            exclude_docs = self.segment.deleted

        if not self.postfile:
            self.postfile = self.storage.open_file(self.segment.posts_filename,
                                                   mapped=False)
        postreader = FilePostingReader(self.postfile, offset, format)
        if exclude_docs:
            postreader = Exclude(postreader, exclude_docs)
        return postreader
示例#17
0
    def postings(self, fieldname, text, exclude_docs=frozenset(), scorer=None):
        self._test_field(fieldname)
        format = self.format(fieldname)
        try:
            offset = self.termsindex[(fieldname, text)][1]
        except KeyError:
            raise TermNotFound("%s:%r" % (fieldname, text))

        if self.segment.deleted and exclude_docs:
            exclude_docs = self.segment.deleted | exclude_docs
        elif self.segment.deleted:
            exclude_docs = self.segment.deleted

        postreader = FilePostingReader(self.postfile,
                                       offset,
                                       format,
                                       scorer=scorer,
                                       fieldname=fieldname,
                                       text=text)
        if exclude_docs:
            postreader = ExcludeMatcher(postreader, exclude_docs)

        return postreader
示例#18
0
 def term_info(self, fieldname, tbytes):
     key = self._keycoder(fieldname, tbytes)
     try:
         return W3TermInfo.from_bytes(self._tindex[key])
     except KeyError:
         raise TermNotFound("No term %s:%r" % (fieldname, tbytes))
示例#19
0
 def _term_info(self, fieldnum, text):
     try:
         return self.termtable[(fieldnum, text)]
     except KeyError:
         raise TermNotFound("%s:%r" % (fieldnum, text))
示例#20
0
 def _test_field(self, fieldname):
     if fieldname not in self.schema:
         raise TermNotFound("No field %r" % fieldname)
     if self.schema[fieldname].format is None:
         raise TermNotFound("Field %r is not indexed" % fieldname)
示例#21
0
 def term_info(self, fieldname, btext):
     if not self._find_term(fieldname, btext):
         raise TermNotFound((fieldname, btext))
     return self._find_terminfo()
示例#22
0
 def _find_field(self, fieldname):
     self._find_root("TERMS")
     if self._find_line(1, "TERMFIELD", fn=fieldname) is None:
         raise TermNotFound("No field %r" % fieldname)
示例#23
0
 def _term_info(self, fieldname, text):
     self._test_field(fieldname)
     try:
         return self.termsindex[fieldname, text]
     except KeyError:
         raise TermNotFound("%s:%r" % (fieldname, text))