def get_one_document(idx, by_id=None): with idx.searcher() as search: q = QueryParser('uuid', idx.schema).parse(by_id) results = search.search(q) related = results[0].more_like_this('keywords', top=3, numterms=10) if len(related.top_n) == 0: # if we can't find anything with related keywords, we want # to extend our search into the content with lots of possible terms. # the goal is to have 3 related articles... related = results[0].more_like_this('content', top=3, numterms=80) return clean_results(idx, results), clean_results(idx, related)
def generic(idx, qs=None, q=None, limit=5, parser=None, page=1): if qs is q is None: raise ValueError('cannot have a null querystring and query') if parser is None: parser = MultifieldParser( ['title', 'keywords', 'summary', 'content', 'author'], idx.schema, group=OrGroup) # add better date parsing support parser.add_plugin(DateParserPlugin()) parser.remove_plugin_class(WildcardPlugin) with idx.searcher() as search: # generate the Query object if qs: query = parser.parse(qs) else: query = q facet = MultiFacet() facet.add_score() facet.add_field('modified', reverse=True) facet.add_field('title') results = search.search_page(query, pagenum=page, sortedby=facet, pagelen=limit) res = clean_results(idx, results, query) # pagination attributes on `search_page` method res.page_number = results.pagenum # current page number res.page_total = results.pagecount # total pages in results res.offset = results.offset # first result of current page res.pagelen = results.pagelen # the number of max results per page return res
def get(self, old_slug): # There are still cached sites from google hitting # the server, that don't match the new routing scheme. # # To compensate for this, the "OldBlogHandler" will # parse the previous slug format, and do a broad generic # query with the keywords (and title). Since old content # is basically being ported 1-to-1, if we have ANY search # results, we want to return that page via permanent redirect. pieces = list(map(str.strip, old_slug.split('-'))) idx = self.meta.search_index with idx.searcher() as searcher: q = QueryParser('title', idx.schema).parse(' OR '.join(pieces)) results = searcher.search(q, limit=1) results = clean_results(idx, results) if results.get('count', 0) > 0: # rebuild the url, and redirect PERMANENTLY # to the correct endpoing reslug = document_slug(results.results[0]) self.redirect(reslug, permanent=True) else: # we couldn't find a "missing" old article # given the slug in the new data store, we need to # return some kind of "Missing" page. self.write('That content appears to be missing.')
def get_all_documents(idx, limit=None): with idx.searcher() as search: results = search.search(Every(), limit=limit) return clean_results(idx, results)