Пример #1
0
    def postings(self, fieldname, text, scorer=None):
        try:
            terminfo = self.termsindex[fieldname, text]
        except KeyError:
            raise TermNotFound("%s:%r" % (fieldname, text))

        format = self.schema[fieldname].format
        postings = terminfo.postings
        if isinstance(postings, integer_types):
            postreader = FilePostingReader(self.postfile,
                                           postings,
                                           format,
                                           scorer=scorer,
                                           term=(fieldname, text))
        else:
            docids, weights, values = postings
            postreader = ListMatcher(docids,
                                     weights,
                                     values,
                                     format,
                                     scorer=scorer,
                                     term=(fieldname, text),
                                     terminfo=terminfo)

        deleted = self.segment.deleted
        if deleted:
            postreader = FilterMatcher(postreader, deleted, exclude=True)

        return postreader
Пример #2
0
 def postings_reader(self,
                     dbfile,
                     terminfo,
                     format_,
                     term=None,
                     scorer=None):
     if terminfo.is_inlined():
         # If the postings were inlined into the terminfo object, pull them
         # out and use a ListMatcher to wrap them in a Matcher interface
         ids, weights, values = terminfo.inlined_postings()
         m = ListMatcher(ids,
                         weights,
                         values,
                         format_,
                         scorer=scorer,
                         term=term,
                         terminfo=terminfo)
     else:
         offset, length = terminfo.extent()
         m = W3LeafMatcher(dbfile,
                           offset,
                           length,
                           format_,
                           term=term,
                           scorer=scorer)
     return m
Пример #3
0
    def postings(self, fieldname, text, scorer=None):
        try:
            offset = self.termsindex[fieldname, text][1]
        except KeyError:
            raise TermNotFound("%s:%r" % (fieldname, text))

        format = self.schema[fieldname].format
        if isinstance(offset, (int, long)):
            postreader = FilePostingReader(self.postfile,
                                           offset,
                                           format,
                                           scorer=scorer,
                                           fieldname=fieldname,
                                           text=text)
        else:
            docids, weights, values, maxwol, minlength = offset
            postreader = ListMatcher(docids,
                                     weights,
                                     values,
                                     format,
                                     scorer,
                                     maxwol=maxwol,
                                     minlength=minlength)

        deleted = self.segment.deleted
        if deleted:
            postreader = FilterMatcher(postreader, deleted, exclude=True)

        return postreader
Пример #4
0
    def matcher(self, fieldname, text, format_, scorer=None):
        # Note this does not filter out deleted documents; a higher level is
        # expected to wrap this matcher to eliminate deleted docs
        pf = self.postfile
        term = (fieldname, text)
        try:
            terminfo = self[term]
        except KeyError:
            raise TermNotFound("No term %s:%r" % (fieldname, text))

        p = terminfo.postings
        if isinstance(p, integer_types):
            # terminfo.postings is an offset into the posting file
            pr = PostingMatcher(pf, p, format_, scorer=scorer, term=term)
        else:
            # terminfo.postings is an inlined tuple of (ids, weights, values)
            docids, weights, values = p
            pr = ListMatcher(docids,
                             weights,
                             values,
                             format_,
                             scorer=scorer,
                             term=term,
                             terminfo=terminfo)
        return pr
Пример #5
0
    def postings(self, fieldname, text, exclude_docs=frozenset(), scorer=None):
        self._test_field(fieldname)
        format = self.format(fieldname)
        try:
            offset = self.termsindex[(fieldname, text)][1]
        except KeyError:
            raise TermNotFound("%s:%r" % (fieldname, text))

        if self.segment.deleted and exclude_docs:
            exclude_docs = self.segment.deleted | exclude_docs
        elif self.segment.deleted:
            exclude_docs = self.segment.deleted

        if isinstance(offset, (int, long)):
            postreader = FilePostingReader(self.postfile, offset, format,
                                           scorer=scorer, fieldname=fieldname,
                                           text=text)
        else:
            docids, weights, values = offset
            postreader = ListMatcher(docids, weights, values, format, scorer)
        
        if exclude_docs:
            postreader = ExcludeMatcher(postreader, exclude_docs)
            
        return postreader
Пример #6
0
    def vector(self, docnum, fieldname):
        if fieldname not in self.schema:
            raise TermNotFound("No  field %r" % fieldname)
        vformat = self.schema[fieldname].vector
        if not vformat:
            raise Exception("No vectors are stored for field %r" % fieldname)

        vformat = self.schema[fieldname].vector
        ids, weights, values = zip_(*self.vectors[docnum, fieldname])
        return ListMatcher(ids, weights, values, format=vformat)
Пример #7
0
    def postings(self, fieldname, text, scorer=None):
        try:
            postings = self.invindex[fieldname][text]
        except KeyError:
            raise TermNotFound((fieldname, text))

        excludeset = self.deleted
        format = self.schema[fieldname].format
        if excludeset:
            postings = [x for x in postings if x[0] not in excludeset]
            if not postings:
                return NullMatcher()
        ids, weights, values = zip(*postings)
        return ListMatcher(ids, weights, values, format=format)
Пример #8
0
    def matcher(self, fieldname, btext, format_, scorer=None):
        if not self._find_term(fieldname, btext):
            raise TermNotFound((fieldname, btext))

        ids = []
        weights = []
        values = []
        c = self._find_line(3, "POST")
        while c is not None:
            ids.append(c["dn"])
            weights.append(c["w"])
            values.append(c["v"])
            c = self._find_line(3, "POST")

        return ListMatcher(ids, weights, values, format_, scorer=scorer)
Пример #9
0
    def vector(self, docnum, fieldname, format_):
        if not self._find_doc(docnum):
            raise Exception
        if not self._find_line(2, "VECTOR"):
            raise Exception

        ids = []
        weights = []
        values = []
        c = self._find_line(3, "VPOST")
        while c is not None:
            ids.append(c["t"])
            weights.append(c["w"])
            values.append(c["v"])
            c = self._find_line(3, "VPOST")

        return ListMatcher(ids, weights, values, format_,)
Пример #10
0
    def postings(self, fieldname, text, scorer=None):
        self._test_field(fieldname)
        try:
            terminfo = self.term_info(fieldname, text)
        except KeyError:
            raise TermNotFound((fieldname, text))

        format = self.schema[fieldname].format
        postings = self.invindex[fieldname][text]
        excludeset = self.deleted
        if excludeset:
            postings = [x for x in postings if x[0] not in excludeset]
            if not postings:
                return NullMatcher()
        ids, weights, values = zip_(*postings)
        lm = ListMatcher(ids, weights, values, format=format, scorer=scorer,
                         term=(fieldname, text), terminfo=terminfo)
        return lm
Пример #11
0
 def matcher(self, fieldname, btext, format_, scorer=None):
     items = self._invindex[fieldname][btext]
     ids, weights, values = list(zip(*items))
     return ListMatcher(ids, weights, values, format_, scorer=scorer)
Пример #12
0
 def vector(self, docnum, fieldname, format_):
     items = self._segment._vectors[docnum][fieldname]
     ids, weights, values = list(zip(*items))
     return ListMatcher(ids, weights, values, format_)
Пример #13
0
 def vector(self, docnum, fieldname):
     vformat = self.schema[fieldname].vector
     ids, weights, values = zip(*self.vectors[docnum, fieldname])
     return ListMatcher(ids, weights, values, format=vformat)