def postings(self, fieldname, text, scorer=None): try: terminfo = self.termsindex[fieldname, text] except KeyError: raise TermNotFound("%s:%r" % (fieldname, text)) format = self.schema[fieldname].format postings = terminfo.postings if isinstance(postings, integer_types): postreader = FilePostingReader(self.postfile, postings, format, scorer=scorer, term=(fieldname, text)) else: docids, weights, values = postings postreader = ListMatcher(docids, weights, values, format, scorer=scorer, term=(fieldname, text), terminfo=terminfo) deleted = self.segment.deleted if deleted: postreader = FilterMatcher(postreader, deleted, exclude=True) return postreader
def postings_reader(self, dbfile, terminfo, format_, term=None, scorer=None): if terminfo.is_inlined(): # If the postings were inlined into the terminfo object, pull them # out and use a ListMatcher to wrap them in a Matcher interface ids, weights, values = terminfo.inlined_postings() m = ListMatcher(ids, weights, values, format_, scorer=scorer, term=term, terminfo=terminfo) else: offset, length = terminfo.extent() m = W3LeafMatcher(dbfile, offset, length, format_, term=term, scorer=scorer) return m
def postings(self, fieldname, text, scorer=None): try: offset = self.termsindex[fieldname, text][1] except KeyError: raise TermNotFound("%s:%r" % (fieldname, text)) format = self.schema[fieldname].format if isinstance(offset, (int, long)): postreader = FilePostingReader(self.postfile, offset, format, scorer=scorer, fieldname=fieldname, text=text) else: docids, weights, values, maxwol, minlength = offset postreader = ListMatcher(docids, weights, values, format, scorer, maxwol=maxwol, minlength=minlength) deleted = self.segment.deleted if deleted: postreader = FilterMatcher(postreader, deleted, exclude=True) return postreader
def matcher(self, fieldname, text, format_, scorer=None): # Note this does not filter out deleted documents; a higher level is # expected to wrap this matcher to eliminate deleted docs pf = self.postfile term = (fieldname, text) try: terminfo = self[term] except KeyError: raise TermNotFound("No term %s:%r" % (fieldname, text)) p = terminfo.postings if isinstance(p, integer_types): # terminfo.postings is an offset into the posting file pr = PostingMatcher(pf, p, format_, scorer=scorer, term=term) else: # terminfo.postings is an inlined tuple of (ids, weights, values) docids, weights, values = p pr = ListMatcher(docids, weights, values, format_, scorer=scorer, term=term, terminfo=terminfo) return pr
def postings(self, fieldname, text, exclude_docs=frozenset(), scorer=None): self._test_field(fieldname) format = self.format(fieldname) try: offset = self.termsindex[(fieldname, text)][1] except KeyError: raise TermNotFound("%s:%r" % (fieldname, text)) if self.segment.deleted and exclude_docs: exclude_docs = self.segment.deleted | exclude_docs elif self.segment.deleted: exclude_docs = self.segment.deleted if isinstance(offset, (int, long)): postreader = FilePostingReader(self.postfile, offset, format, scorer=scorer, fieldname=fieldname, text=text) else: docids, weights, values = offset postreader = ListMatcher(docids, weights, values, format, scorer) if exclude_docs: postreader = ExcludeMatcher(postreader, exclude_docs) return postreader
def vector(self, docnum, fieldname): if fieldname not in self.schema: raise TermNotFound("No field %r" % fieldname) vformat = self.schema[fieldname].vector if not vformat: raise Exception("No vectors are stored for field %r" % fieldname) vformat = self.schema[fieldname].vector ids, weights, values = zip_(*self.vectors[docnum, fieldname]) return ListMatcher(ids, weights, values, format=vformat)
def postings(self, fieldname, text, scorer=None): try: postings = self.invindex[fieldname][text] except KeyError: raise TermNotFound((fieldname, text)) excludeset = self.deleted format = self.schema[fieldname].format if excludeset: postings = [x for x in postings if x[0] not in excludeset] if not postings: return NullMatcher() ids, weights, values = zip(*postings) return ListMatcher(ids, weights, values, format=format)
def matcher(self, fieldname, btext, format_, scorer=None): if not self._find_term(fieldname, btext): raise TermNotFound((fieldname, btext)) ids = [] weights = [] values = [] c = self._find_line(3, "POST") while c is not None: ids.append(c["dn"]) weights.append(c["w"]) values.append(c["v"]) c = self._find_line(3, "POST") return ListMatcher(ids, weights, values, format_, scorer=scorer)
def vector(self, docnum, fieldname, format_): if not self._find_doc(docnum): raise Exception if not self._find_line(2, "VECTOR"): raise Exception ids = [] weights = [] values = [] c = self._find_line(3, "VPOST") while c is not None: ids.append(c["t"]) weights.append(c["w"]) values.append(c["v"]) c = self._find_line(3, "VPOST") return ListMatcher(ids, weights, values, format_,)
def postings(self, fieldname, text, scorer=None): self._test_field(fieldname) try: terminfo = self.term_info(fieldname, text) except KeyError: raise TermNotFound((fieldname, text)) format = self.schema[fieldname].format postings = self.invindex[fieldname][text] excludeset = self.deleted if excludeset: postings = [x for x in postings if x[0] not in excludeset] if not postings: return NullMatcher() ids, weights, values = zip_(*postings) lm = ListMatcher(ids, weights, values, format=format, scorer=scorer, term=(fieldname, text), terminfo=terminfo) return lm
def matcher(self, fieldname, btext, format_, scorer=None): items = self._invindex[fieldname][btext] ids, weights, values = list(zip(*items)) return ListMatcher(ids, weights, values, format_, scorer=scorer)
def vector(self, docnum, fieldname, format_): items = self._segment._vectors[docnum][fieldname] ids, weights, values = list(zip(*items)) return ListMatcher(ids, weights, values, format_)
def vector(self, docnum, fieldname): vformat = self.schema[fieldname].vector ids, weights, values = zip(*self.vectors[docnum, fieldname]) return ListMatcher(ids, weights, values, format=vformat)