def _load_solrdata(self): if self.type == "edition": return { 'ebook_count': int(bool(self.document.ocaid)), 'edition_count': 1, 'work_count': 1, 'last_update': self.document.last_modified } else: q = self.get_solr_query_term() if q: solr = get_works_solr() result = solr.select(q, fields=["edition_count", "ebook_count_i"]) last_update_i = [doc['last_update_i'] for doc in result.docs if 'last_update_i' in doc] if last_update_i: last_update = self._inttime_to_datetime(last_update_i) else: # if last_update is not present in solr, consider last_modfied of # that document as last_update if self.type in ['work', 'author']: last_update = self.document.last_modified else: last_update = None return { 'ebook_count': sum(doc.get('ebook_count_i', 0) for doc in result.docs), 'edition_count': sum(doc.get('edition_count', 0) for doc in result.docs), 'work_count': 0, 'last_update': last_update } return {}
def random_ebooks(limit=2000): solr = search.get_works_solr() sort = "edition_count desc" result = solr.select( query='has_fulltext:true -public_scan_b:false', rows=limit, sort=sort, fields=[ 'has_fulltext', 'key', 'ia', "title", "cover_edition_key", "author_key", "author_name", ]) def process_doc(doc): d = {} d['url'] = "/works/" + doc['key'] d['title'] = doc.get('title', '') if 'author_key' in doc and 'author_name' in doc: d['authors'] = [{"key": key, "name": name} for key, name in zip(doc['author_key'], doc['author_name'])] if 'cover_edition_key' in doc: d['cover_url'] = h.get_coverstore_url() + "/b/olid/%s-M.jpg" % doc['cover_edition_key'] d['read_url'] = "//archive.org/stream/" + doc['ia'][0] return d return [process_doc(doc) for doc in result['docs'] if doc.get('ia')]
def random_ebooks(limit=2000): solr = search.get_works_solr() sort = "edition_count desc" result = solr.select( query="has_fulltext:true -public_scan_b:false", rows=limit, sort=sort, fields=["has_fulltext", "key", "ia", "title", "cover_edition_key", "author_key", "author_name"], ) def process_doc(doc): d = {} key = doc["key"] # New solr stores the key as /works/OLxxxW if not key.startswith("/works/"): key = "/works/" + key d["url"] = key d["title"] = doc.get("title", "") if "author_key" in doc and "author_name" in doc: d["authors"] = [{"key": key, "name": name} for key, name in zip(doc["author_key"], doc["author_name"])] if "cover_edition_key" in doc: d["cover_url"] = h.get_coverstore_url() + "/b/olid/%s-M.jpg" % doc["cover_edition_key"] d["read_url"] = "//archive.org/stream/" + doc["ia"][0] return d return [process_doc(doc) for doc in result["docs"] if doc.get("ia")]
def random_ebooks(limit=2000): solr = search.get_works_solr() sort = "edition_count desc" result = solr.select(query='has_fulltext:true -public_scan_b:false', rows=limit, sort=sort, fields=[ 'has_fulltext', 'key', 'ia', "title", "cover_edition_key", "author_key", "author_name", ]) def process_doc(doc): d = {} d['url'] = "/works/" + doc['key'] d['title'] = doc.get('title', '') if 'author_key' in doc and 'author_name' in doc: d['authors'] = [{ "key": key, "name": name } for key, name in zip(doc['author_key'], doc['author_name'])] if 'cover_edition_key' in doc: d['cover_url'] = h.get_coverstore_url( ) + "/b/olid/%s-M.jpg" % doc['cover_edition_key'] d['read_url'] = "http://www.archive.org/stream/" + doc['ia'][0] return d return [process_doc(doc) for doc in result['docs'] if doc.get('ia')]
def _get_edition_keys_from_solr(self, query_terms): if not query_terms: return q = " OR ".join(query_terms) solr = get_works_solr() result = solr.select(q, fields=["edition_key"], rows=10000) for doc in result['docs']: for k in doc['edition_key']: yield "/books/" + k
def get_solr_query_term(self): if self.type == 'edition': return "edition_key:" + self._get_document_basekey() elif self.type == 'work': return 'key:/works/' + self._get_document_basekey() elif self.type == 'author': return "author_key:" + self._get_document_basekey() elif self.type == 'subject': type, value = self.key.split(":", 1) # escaping value as it can have special chars like : etc. value = get_works_solr().escape(value) return "%s_key:%s" % (type, value)
def get_solr_query_term(self): if self.type == 'edition': return "edition_key:" + self._get_document_basekey() elif self.type == 'work': return 'key:' + self._get_document_basekey() elif self.type == 'author': return "author_key:" + self._get_document_basekey() elif self.type == 'subject': type, value = self.key.split(":", 1) # escaping value as it can have special chars like : etc. value = get_works_solr().escape(value) return "%s_key:%s" % (type, value)
def _get_all_subjects(self): solr = get_works_solr() q = self._get_solr_query_for_subjects() # Solr has a maxBooleanClauses constraint there too many seeds, the if len(self.seeds) > 500: logger.warn( "More than 500 seeds. skipping solr query for finding subjects." ) return [] facet_names = [ 'subject_facet', 'place_facet', 'person_facet', 'time_facet' ] try: result = solr.select(q, fields=[], facets=facet_names, facet_limit=20, facet_mincount=1) except IOError: logger.error("Error in finding subjects of list %s", self.key, exc_info=True) return [] def get_subject_prefix(facet_name): name = facet_name.replace("_facet", "") if name == 'subject': return '' else: return name + ":" def process_subject(facet_name, title, count): prefix = get_subject_prefix(facet_name) key = prefix + title.lower().replace(" ", "_") url = "/subjects/" + key return web.storage({ "title": title, "name": title, "count": count, "key": key, "url": url }) def process_all(): facets = result['facets'] for k in facet_names: for f in facets.get(k, []): yield process_subject(f.name, f.value, f.count) return sorted(process_all(), reverse=True, key=lambda s: s["count"])
def _get_all_subjects(self): solr = get_works_solr() q = self._get_solr_query_for_subjects() # Solr has a maxBooleanClauses constraint there too many seeds, the if len(self.seeds) > 500: logger.warn("More than 500 seeds. skipping solr query for finding subjects.") return [] facet_names = ['subject_facet', 'place_facet', 'person_facet', 'time_facet'] try: result = solr.select(q, fields=[], facets=facet_names, facet_limit=20, facet_mincount=1) except IOError: logger.error("Error in finding subjects of list %s", self.key, exc_info=True) return [] def get_subject_prefix(facet_name): name = facet_name.replace("_facet", "") if name == 'subject': return '' else: return name + ":" def process_subject(facet_name, title, count): prefix = get_subject_prefix(facet_name) key = prefix + title.lower().replace(" ", "_") url = "/subjects/" + key return web.storage({ "title": title, "name": title, "count": count, "key": key, "url": url }) def process_all(): facets = result['facets'] for k in facet_names: for f in facets.get(k, []): yield process_subject(f.name, f.value, f.count) return sorted(process_all(), reverse=True, key=lambda s: s["count"])
def GET(self): from openlibrary.plugins.worksearch.search import get_works_solr result = get_works_solr().select(query='borrowed_b:false', fields=['key', 'lending_edition_s'], limit=100) def make_doc(d): # Makes a store doc from solr doc return { "_key": "ebooks/books/" + d['lending_edition_s'], "_rev": None, # Don't worry about consistancy "type": "ebook", "book_key": "/books/" + d['lending_edition_s'], "borrowed": "false" } docs = [make_doc(d) for d in result['docs']] docdict = dict((d['_key'], d) for d in docs) web.ctx.site.store.update(docdict) return delegate.RawText("ok\n")
def _get_all_subjects(self): solr = get_works_solr() q = self._get_solr_query_for_subjects() facet_names = ['subject_facet', 'place_facet', 'person_facet', 'time_facet'] result = solr.select(q, fields=[], facets=facet_names, facet_limit=20, facet_mincount=1) def get_subject_prefix(facet_name): name = facet_name.replace("_facet", "") if name == 'subject': return '' else: return name + ":" def process_subject(facet_name, title, count): prefix = get_subject_prefix(facet_name) key = prefix + title.lower().replace(" ", "_") url = "/subjects/" + key return web.storage({ "title": title, "name": title, "count": count, "key": key, "url": url }) def process_all(): facets = result['facets'] for k in facet_names: for f in facets.get(k, []): yield process_subject(f.name, f.value, f.count) return sorted(process_all(), reverse=True, key=lambda s: s["count"])