def get_meta_xml(itemid): """Returns the contents of meta_xml as JSON. """ itemid = itemid.strip() url = 'http://www.archive.org/download/%s/%s_meta.xml' % (itemid, itemid) try: stats.begin("archive.org", url=url) metaxml = urllib2.urlopen(url).read() stats.end() except IOError: stats.end() return web.storage() # archive.org returns html on internal errors. # Checking for valid xml before trying to parse it. if not metaxml.strip().startswith("<?xml"): return web.storage() try: defaults = {"collection": [], "external-identifier": []} return web.storage(xml2dict(metaxml, **defaults)) except Exception, e: print >> web.debug, "Failed to parse metaxml for %s: %s" % (itemid, str(e)) return web.storage()
def get_many(self, sitename, data): keys = simplejson.loads(data['keys']) stats.begin("memcache.get_multi") result = self.memcache.get_multi(keys) stats.end(found=len(result)) keys2 = [k for k in keys if k not in result] if keys2: data['keys'] = simplejson.dumps(keys2) result2 = ConnectionMiddleware.get_many(self, sitename, data) result2 = simplejson.loads(result2) # Memcache expects dict with (key, json) mapping and we have (key, doc) mapping. # Converting the docs to json before passing to memcache. self.mc_set_multi(dict((key, simplejson.dumps(doc)) for key, doc in result2.items())) result.update(result2) #@@ too many JSON conversions for k in result: if isinstance(result[k], six.string_types): result[k] = simplejson.loads(result[k]) return simplejson.dumps(result)
def works_by_author(akey, sort='editions', page=1, rows=100): q='author_key:' + akey offset = rows * (page - 1) fields = ['key', 'author_name', 'author_key', 'title', 'subtitle', 'edition_count', 'ia', 'cover_edition_key', 'has_fulltext', 'first_publish_year', 'public_scan_b', 'lending_edition_s', 'overdrive_s', 'ia_collection_s'] fl = ','.join(fields) solr_select = solr_select_url + "?q.op=AND&q=%s&fq=&start=%d&rows=%d&fl=%s&wt=json" % (q, offset, rows, fl) facet_fields = ["author_facet", "language", "publish_year", "publisher_facet", "subject_facet", "person_facet", "place_facet", "time_facet"] if sort == 'editions': solr_select += '&sort=edition_count+desc' elif sort.startswith('old'): solr_select += '&sort=first_publish_year+asc' elif sort.startswith('new'): solr_select += '&sort=first_publish_year+desc' elif sort.startswith('title'): solr_select += '&sort=title+asc' solr_select += "&facet=true&facet.mincount=1&f.author_facet.facet.sort=count&f.publish_year.facet.limit=-1&facet.limit=25&" + '&'.join("facet.field=" + f for f in facet_fields) stats.begin("solr", url=solr_select) reply = json.load(urllib.urlopen(solr_select)) stats.end() facets = reply['facet_counts']['facet_fields'] works = [work_object(w) for w in reply['response']['docs']] def get_facet(f, limit=None): return list(web.group(facets[f][:limit * 2] if limit else facets[f], 2)) return web.storage( num_found = int(reply['response']['numFound']), works = works, years = [(int(k), v) for k, v in get_facet('publish_year')], get_facet = get_facet, sort = sort, )
def work_search(query, limit=20, offset=0, **kw): """Search for works.""" kw.setdefault("doc_wrapper", work_wrapper) fields = [ "key", "author_name", "author_key", "title", "edition_count", "ia", "cover_edition_key", "has_fulltext", "subject", "ia_collection_s", "public_scan_b", "overdrive_s", "lending_edition_s", ] kw.setdefault("fields", fields) if config.get('single_core_solr'): kw.setdefault("fq", "type:work") query = process_work_query(query) solr = get_works_solr() stats.begin("solr", query=query, start=offset, rows=limit, kw=kw) try: result = solr.select(query, start=offset, rows=limit, **kw) finally: stats.end() return result
def get_results(q, offset=0, limit=100): q = escape_bracket(q) solr_select = solr_edition_select_url + "?q.op=AND&q=%s&fq=&start=%d&rows=%d&fl=*&qt=standard&wt=json" % (web.urlquote(q), offset, limit) stats.begin("solr", url=solr_select) json_data = urllib.urlopen(solr_select).read() stats.end() return json.loads(json_data)
def get_results(q, offset=0, limit=100, snippets=3, fragsize=200, hl_phrase=False): m = re_bad_fields.match(q) if m: return { 'error': m.group(1) + ' search not supported' } q = escape_q(q) solr_params = [ ('fl', 'ia,body_length,page_count'), ('hl', 'true'), ('hl.fl', 'body'), ('hl.snippets', snippets), ('hl.mergeContiguous', 'true'), ('hl.usePhraseHighlighter', 'true' if hl_phrase else 'false'), ('hl.simple.pre', '{{{'), ('hl.simple.post', '}}}'), ('hl.fragsize', fragsize), ('q.op', 'AND'), ('q', web.urlquote(q)), ('start', offset), ('rows', limit), ('qf', 'body'), ('qt', 'standard'), ('hl.maxAnalyzedChars', '-1'), ('wt', 'json'), ] solr_select = solr_select_url + '?' + '&'.join("%s=%s" % (k, unicode(v)) for k, v in solr_params) stats.begin("solr", url=solr_select) json_data = urllib.urlopen(solr_select).read() stats.end() try: return simplejson.loads(json_data) except: m = re_query_parser_error.search(json_data) return { 'error': web.htmlunquote(m.group(1)) }
def _editions_view(self, seeds, **kw): reverse = str(kw.pop("reverse", "")).lower() if 'sort' in kw and reverse == "true": # sort=\field is the couchdb-lucene's way of telling ORDER BY field DESC kw['sort'] = '\\' + kw['sort'] view_url = config.get("lists", {}).get("editions_view") if not view_url: return {} def escape(value): special_chars = '+-&|!(){}[]^"~*?:\\' pattern = "([%s])" % re.escape(special_chars) quote = '"' return quote + web.re_compile(pattern).sub(r'\\\1', value) + quote q = " OR ".join("seed:" + escape(seed.encode('utf-8')) for seed in seeds) url = view_url + "?" + urllib.urlencode(dict(kw, q=q)) stats.begin("couchdb", url=url) try: json = urllib2.urlopen(url).read() finally: stats.end() return simplejson.loads(json)
def work_search(query, limit=20, offset=0, **kw): """Search for works.""" kw.setdefault("doc_wrapper", work_wrapper) fields = [ "key", "author_name", "author_key", "title", "edition_count", "ia", "cover_edition_key", "has_fulltext", "subject", "ia_collection_s", "public_scan_b", "overdrive_s", "lending_edition_s", ] kw.setdefault("fields", fields) query = process_work_query(query) solr = get_works_solr() stats.begin("solr", query=query, start=offset, rows=limit, kw=kw) try: result = solr.select(query, start=offset, rows=limit, **kw) finally: stats.end() return result
def add(self, key, value, expires=0): key = web.safestr(key) value = simplejson.dumps(value) stats.begin("memcache.add", key=key) value = self.memcache.add(key, value, expires) stats.end() return value
def _get_solr_data(self): if config.get("single_core_solr"): key = self.key else: key = self.get_olid() fields = [ "cover_edition_key", "cover_id", "edition_key", "first_publish_year", "has_fulltext", "lending_edition", "checked_out", "public_scan_b", "ia"] solr = get_works_solr() stats.begin("solr", query={"key": key}, fields=fields) try: d = solr.select({"key": key}, fields=fields) finally: stats.end() if d.num_found > 0: w = d.docs[0] else: w = None # Replace _solr_data property with the attribute self.__dict__['_solr_data'] = w return w
def _couchdb_view(self, db, viewname, **kw): stats.begin("couchdb", db=db.name, view=viewname, kw=kw) try: result = db.view(viewname, **kw) finally: stats.end() return result
def set(self, key, value, expires=0): key = web.safestr(key) value = simplejson.dumps(value) stats.begin("memcache.set", key=key) value = self.memcache.set(key, value, expires) stats.end() return value
def _get_solr_data(self): if config.get("single_core_solr"): key = self.key else: key = self.get_olid() fields = [ "cover_edition_key", "cover_id", "edition_key", "first_publish_year", "has_fulltext", "lending_edition_s", "checked_out", "public_scan_b", "ia" ] solr = get_works_solr() stats.begin("solr", query={"key": key}, fields=fields) try: d = solr.select({"key": key}, fields=fields) except Exception as e: logging.getLogger("openlibrary").exception( "Failed to get solr data") return None finally: stats.end() if d.num_found > 0: w = d.docs[0] else: w = None # Replace _solr_data property with the attribute self.__dict__['_solr_data'] = w return w
def get_ia_meta_fields(self): # Check for cached value # $$$ we haven't assigned _ia_meta_fields the first time around but there's apparently # some magic that lets us check this way (and breaks using hasattr to check if defined) if self._ia_meta_fields: return self._ia_meta_fields if not self.get('ocaid', None): return {} ia = self.ocaid url = 'http://www.archive.org/download/%s/%s_meta.xml' % (ia, ia) reply = dict([ (set_name, set()) for set_name in ia_meta_sets ]) # create empty sets try: stats.begin("archive.org", url=url) f = urllib2.urlopen(url) stats.end() except: stats.end() return reply for line in f: m = re_meta_field.search(line) if not m: continue k = m.group(1).lower() v = m.group(2) if k == 'collection': reply[k].add(v.lower()) elif k in ia_meta_sets: reply[k].add(v) else: if k in ia_meta_fields: reply[k] = v self._ia_meta_fields = reply return self._ia_meta_fields
def get_ia_meta_fields(self): if not self.get('ocaid', None): return {} ia = self.ocaid url = 'http://www.archive.org/download/%s/%s_meta.xml' % (ia, ia) reply = { 'collection': set() } try: stats.begin("archive.org", url=url) f = urllib2.urlopen(url) stats.end() except: stats.end() return reply for line in f: m = re_meta_field.search(line) if not m: continue k = m.group(1).lower() v = m.group(2) if k == 'collection': reply[k].add(v.lower()) else: assert k == 'contributor' reply[k] = v return reply
def add(self, key, value, expires=0): key = web.safestr(key) value = json.dumps(value) stats.begin("memcache.add", key=key) value = self.memcache.add(key, value, expires) stats.end() return value
def get_couchdb_docs(self, db, keys): try: stats.begin(name="_all_docs", keys=keys, include_docs=True) docs = dict((row.id, row.doc) for row in db.view("_all_docs", keys=keys, include_docs=True)) finally: stats.end() return docs
def read_from_archive(ia): meta_xml = 'http://archive.org/download/' + ia + '/' + ia + '_meta.xml' stats.begin("archive.org", url=meta_xml) xml_data = urllib.urlopen(meta_xml) item = {} try: tree = etree.parse(xml_data) except etree.XMLSyntaxError: return {} finally: stats.end() root = tree.getroot() fields = ['title', 'creator', 'publisher', 'date', 'language'] for k in 'title', 'date', 'publisher': v = root.find(k) if v is not None: item[k] = v.text for k in 'creator', 'language', 'collection': v = root.findall(k) if len(v): item[k] = [i.text for i in v if i.text] return item
def _old_get_meta_xml(itemid): """Returns the contents of meta_xml as JSON. """ itemid = web.safestr(itemid.strip()) url = 'http://www.archive.org/download/%s/%s_meta.xml' % (itemid, itemid) try: stats.begin('archive.org', url=url) metaxml = urllib2.urlopen(url).read() stats.end() except IOError: logger.error("Failed to download _meta.xml for %s", itemid, exc_info=True) stats.end() return web.storage() # archive.org returns html on internal errors. # Checking for valid xml before trying to parse it. if not metaxml.strip().startswith("<?xml"): return web.storage() try: defaults = {"collection": [], "external-identifier": []} return web.storage(xml2dict(metaxml, **defaults)) except Exception as e: logger.error("Failed to parse metaxml for %s", itemid, exc_info=True) return web.storage()
def get_many(self, sitename, data): keys = simplejson.loads(data['keys']) stats.begin("memcache.get_multi") result = self.memcache.get_multi(keys) stats.end(found=len(result)) keys2 = [k for k in keys if k not in result] if keys2: data['keys'] = simplejson.dumps(keys2) result2 = ConnectionMiddleware.get_many(self, sitename, data) result2 = simplejson.loads(result2) # Memcache expects dict with (key, json) mapping and we have (key, doc) mapping. # Converting the docs to json before passing to memcache. self.mc_set_multi( dict((key, simplejson.dumps(doc)) for key, doc in result2.items())) result.update(result2) #@@ too many JSON conversions for k in result: if isinstance(result[k], basestring): result[k] = simplejson.loads(result[k]) return simplejson.dumps(result)
def read_from_archive(ia): meta_xml = 'http://www.archive.org/download/' + ia + '/' + ia + '_meta.xml' stats.begin("archive.org", url=meta_xml) xml_data = urllib.urlopen(meta_xml) item = {} try: tree = etree.parse(xml_data) except etree.XMLSyntaxError: return {} finally: stats.end() root = tree.getroot() fields = ['title', 'creator', 'publisher', 'date', 'language'] for k in 'title', 'date', 'publisher': v = root.find(k) if v is not None: item[k] = v.text for k in 'creator', 'language', 'collection': v = root.findall(k) if len(v): item[k] = [i.text for i in v if i.text] return item
def memcache_set(self, args, kw, value, time): """Adds value and time to memcache. Key is computed from the arguments.""" key = self.compute_key(args, kw) json_data = self.json_encode([value, time]) stats.begin("memcache.set", key=key) self.memcache.set(key, json_data) stats.end()
def get_results(q, offset=0, limit=100, snippets=3, fragsize=200, hl_phrase=False): m = re_bad_fields.match(q) if m: return {'error': m.group(1) + ' search not supported'} q = escape_q(q) solr_params = [ ('fl', 'ia,body_length,page_count'), ('hl', 'true'), ('hl.fl', 'body'), ('hl.snippets', snippets), ('hl.mergeContiguous', 'true'), ('hl.usePhraseHighlighter', 'true' if hl_phrase else 'false'), ('hl.simple.pre', '{{{'), ('hl.simple.post', '}}}'), ('hl.fragsize', fragsize), ('q.op', 'AND'), ('q', web.urlquote(q)), ('start', offset), ('rows', limit), ('qf', 'body'), ('qt', 'standard'), ('hl.maxAnalyzedChars', '-1'), ('wt', 'json'), ] solr_select = solr_select_url + '?' + '&'.join( "%s=%s" % (k, unicode(v)) for k, v in solr_params) stats.begin("solr", url=solr_select) json_data = urllib.urlopen(solr_select).read() stats.end() try: results = simplejson.loads(json_data) except: m = re_query_parser_error.search(json_data) return {'error': web.htmlunquote(m.group(1))} ekey_doc = {} for doc in results['response']['docs']: ia = doc['ia'] q = {'type': '/type/edition', 'ocaid': ia} ekeys = web.ctx.site.things(q) if not ekeys: del q['ocaid'] q['source_records'] = 'ia:' + ia ekeys = web.ctx.site.things(q) if ekeys: ekey_doc[ekeys[0]] = doc editions = web.ctx.site.get_many(ekey_doc.keys()) for e in editions: ekey_doc[e['key']]['edition'] = e return results
def get_results(q, offset=0, limit=100): valid_fields = ['key', 'name', 'alternate_names', 'birth_date', 'death_date', 'date', 'work_count'] q = escape_colon(escape_bracket(q), valid_fields) solr_select = solr_author_select_url + "?q.op=AND&q=%s&fq=&start=%d&rows=%d&fl=*&qt=standard&wt=json" % (web.urlquote(q), offset, limit) solr_select += '&sort=work_count+desc' stats.begin("solr", url=solr_select) json_data = urllib.urlopen(solr_select).read() stats.end() return json.loads(json_data)
def memcache_set(self, args, kw, value, time): """Adds value and time to memcache. Key is computed from the arguments. """ key = self.compute_key(args, kw) json = self.json_encode([value, time]) stats.begin("memcache.set", key=key) self.memcache.set(key, json) stats.end()
def get_couchdb_docs(self, db, keys): try: stats.begin(name="_all_docs", keys=keys, include_docs=True) docs = dict( (row.id, row.doc) for row in db.view("_all_docs", keys=keys, include_docs=True)) finally: stats.end() return docs
def simple_search(q, offset=0, rows=20, sort=None): solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&fq=&start=%d&rows=%d&fl=*%%2Cscore&qt=standard&wt=json" % (web.urlquote(q), offset, rows) if sort: solr_select += "&sort=" + web.urlquote(sort) stats.begin("solr", url=solr_select) json_data = urllib.urlopen(solr_select) stats.end() return json.load(json_data)
def get_results(q, offset=0, limit=100): valid_fields = ['key', 'name', 'type', 'count'] q = escape_colon(escape_bracket(q), valid_fields) solr_select = solr_subject_select_url + "?q.op=AND&q=%s&fq=&start=%d&rows=%d&fl=name,type,count&qt=standard&wt=json" % (web.urlquote(q), offset, limit) solr_select += '&sort=count+desc' stats.begin("solr", url=solr_select) json_data = urllib.urlopen(solr_select).read() stats.end() return json.loads(json_data)
def _couchdb_view(self, db, viewname, **kw): stats.begin("couchdb", db=db.name, view=viewname, kw=kw) try: result = db.view(viewname, **kw) # force fetching the results result.rows finally: stats.end() return result
def execute_solr_query(url): stats.begin("solr", url=url) try: solr_result = urllib2.urlopen(url, timeout=3) except Exception as e: logger.exception("Failed solr query") return None finally: stats.end() return solr_result
def top_books_from_author(akey, rows=5, offset=0): q = 'author_key:(' + akey + ')' solr_select = solr_select_url + "?q=%s&start=%d&rows=%d&fl=key,title,edition_count,first_publish_year&wt=json&sort=edition_count+desc" % (q, offset, rows) stats.begin("solr", url=solr_select) response = json.load(urllib.urlopen(solr_select))['response'] stats.end() return { 'books': [web.storage(doc) for doc in response['docs']], 'total': response['numFound'], }
def get_results(q, offset=0, limit=100, snippets=3, fragsize=200): q = escape_bracket(q) solr_select = solr_select_url + "?fl=ia,body_length,page_count&hl=true&hl.fl=body&hl.snippets=%d&hl.mergeContiguous=true&hl.usePhraseHighlighter=false&hl.simple.pre={{{&hl.simple.post=}}}&hl.fragsize=%d&q.op=AND&q=%s&start=%d&rows=%d&qf=body&qt=standard&hl.maxAnalyzedChars=1000000&wt=json" % (snippets, fragsize, web.urlquote(q), offset, limit) stats.begin("solr", url=solr_select) json_data = urllib.urlopen(solr_select).read() stats.end() try: return simplejson.loads(json_data) except: m = re_query_parser_error.search(json_data) return { 'error': web.htmlunquote(m.group(1)) }
def run_solr_query(param = {}, rows=100, page=1, sort=None, spellcheck_count=None): # called by do_search if spellcheck_count == None: spellcheck_count = default_spellcheck_count offset = rows * (page - 1) (q_list, use_dismax) = build_q_list(param) fields = ['key', 'author_name', 'author_key', 'title', 'subtitle', 'edition_count', 'ia', 'has_fulltext', 'first_publish_year', 'cover_edition_key', 'public_scan_b', 'lending_edition_s', 'overdrive_s', 'ia_collection_s'] fl = ','.join(fields) if use_dismax: q = web.urlquote(' '.join(q_list)) solr_select = solr_select_url + "?defType=dismax&q.op=AND&q=%s&qf=text+title^5+author_name^5&bf=sqrt(edition_count)^10&start=%d&rows=%d&fl=%s&wt=standard" % (q, offset, rows, fl) else: q = web.urlquote(' '.join(q_list + ['_val_:"sqrt(edition_count)"^10'])) solr_select = solr_select_url + "?q.op=AND&q=%s&start=%d&rows=%d&fl=%s&wt=standard" % (q, offset, rows, fl) solr_select += '&spellcheck=true&spellcheck.count=%d' % spellcheck_count solr_select += "&facet=true&" + '&'.join("facet.field=" + f for f in facet_fields) if 'public_scan' in param: v = param.pop('public_scan').lower() if v in ('true', 'false'): if v == 'false': # also constrain on print disabled since the index may not be in sync param.setdefault('print_disabled', 'false') solr_select += '&fq=public_scan_b:%s' % v if 'print_disabled' in param: v = param.pop('print_disabled').lower() if v in ('true', 'false'): solr_select += '&fq=%ssubject_key:protected_daisy' % ('-' if v == 'false' else '') k = 'has_fulltext' if k in param: v = param[k].lower() if v not in ('true', 'false'): del param[k] param[k] == v solr_select += '&fq=%s:%s' % (k, v) for k in facet_list_fields: if k == 'author_facet': k = 'author_key' if k not in param: continue v = param[k] solr_select += ''.join('&fq=%s:"%s"' % (k, url_quote(l)) for l in v if l) if sort: solr_select += "&sort=" + url_quote(sort) stats.begin("solr", url=solr_select) reply = urllib.urlopen(solr_select).read() stats.end() return (reply, solr_select, q_list)
def memcache_get(self, args, kw): """Reads the value from memcache. Key is computed from the arguments. Returns (value, time) when the value is available, None otherwise. """ key = self.compute_key(args, kw) stats.begin("memcache.get", key=key) json = self.memcache.get(key) stats.end(hit=bool(json)) return json and self.json_decode(json)
def get_item_manifest(item_id, item_server, item_path): url = 'https://%s/BookReader/BookReaderJSON.php' % item_server url += '?itemPath=%s&itemId=%s&server=%s' % (item_path, item_id, item_server) try: stats.begin('archive.org', url=url) manifest = requests.get(url) stats.end() return manifest.json() except IOError: stats.end() return {}
def get_item_json(itemid): itemid = web.safestr(itemid.strip()) url = 'http://archive.org/metadata/%s' % itemid try: stats.begin('archive.org', url=url) metadata_json = urllib2.urlopen(url).read() stats.end() return simplejson.loads(metadata_json) except IOError: stats.end() return {}
def search_inside_result_count(q): q = escape_q(q) solr_select = solr_select_url + "?fl=ia&q.op=AND&wt=json&q=" + web.urlquote(q) stats.begin("solr", url=solr_select) json_data = urllib.urlopen(solr_select).read() stats.end() try: results = simplejson.loads(json_data) except: return None return results['response']['numFound']
def _get_metadata(itemid): """Returns metadata by querying the archive.org metadata API. """ url = "http://www.archive.org/metadata/%s" % itemid try: stats.begin("archive.org", url=url) text = urllib2.urlopen(url).read() stats.end() return simplejson.loads(text) except (IOError, ValueError): return None
def get_item_manifest(item_id, item_server, item_path): url = 'https://%s/BookReader/BookReaderJSON.php' % item_server url += "?itemPath=%s&itemId=%s&server=%s" % (item_path, item_id, item_server) try: stats.begin("archive.org", url=url) manifest_json = urllib2.urlopen(url).read() stats.end() return simplejson.loads(manifest_json) except IOError: stats.end() return {}
def sorted_work_editions(wkey, json_data=None): q='key:' + wkey if not json_data: # for testing solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=edition_key&qt=standard&wt=json" % q stats.begin("solr", url=solr_select) json_data = urllib.urlopen(solr_select).read() stats.end() reply = json.loads(json_data) if reply['response']['numFound'] == 0: return [] return reply["response"]['docs'][0].get('edition_key', [])
def search_inside_result_count(q): q = escape_q(q) solr_select = solr_select_url + "?fl=ia&q.op=AND&wt=json&q=" + web.urlquote( q) stats.begin("solr", url=solr_select) json_data = urllib.urlopen(solr_select).read() stats.end() try: results = simplejson.loads(json_data) except: return None return results['response']['numFound']
def _get_metadata(itemid): """Returns metadata by querying the archive.org metadata API. """ itemid = web.safestr(itemid.strip()) url = '%s/metadata/%s' % (IA_BASE_URL, itemid) try: stats.begin('archive.org', url=url) metadata = requests.get(url) stats.end() return metadata.json() except IOError: stats.end() return {}
def execute_solr_query( solr_path: str, params: Union[dict, list[tuple[str, Any]]]) -> Optional[Response]: stats.begin("solr", url=f'{solr_path}?{urlencode(params)}') try: response = requests.get(solr_path, params=params, timeout=10) response.raise_for_status() except requests.HTTPError: logger.exception("Failed solr query") return None finally: stats.end() return response
def request(self, sitename, path, method='GET', data=None): path = "/" + sitename + path web.ctx.infobase_auth_token = self.get_auth_token() try: stats.begin("infobase", path=path, method=method, data=data) out = server.request(path, method, data) stats.end() if 'infobase_auth_token' in web.ctx: self.set_auth_token(web.ctx.infobase_auth_token) except common.InfobaseException as e: stats.end(error=True) self.handle_error(e.status, str(e)) return out
def _solr_data(self): fields = [ "cover_edition_key", "cover_id", "edition_key", "first_publish_year", "has_fulltext", "lending_edition_s", "public_scan_b", "ia"] solr = get_solr() stats.begin("solr", get=self.key, fields=fields) try: return solr.get(self.key, fields=fields) except Exception as e: logging.getLogger("openlibrary").exception("Failed to get solr data") return None finally: stats.end()
def get_work_iaids(wkey): #wid = wkey.split('/')[2] solr_select_url = get_works_solr_select_url() filter = 'ia' q = 'key:' + wkey stats.begin('solr', url=wkey) solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=%s&qt=standard&wt=json&fq=type:work" % (q, filter) json_data = urllib.urlopen(solr_select).read() stats.end() print json_data reply = simplejson.loads(json_data) if reply['response']['numFound'] == 0: return [] return reply["response"]['docs'][0].get(filter, [])
def get_work_iaids(wkey): #wid = wkey.split('/')[2] solr_select_url = get_solr_select_url() filter = 'ia' q = 'key:' + wkey stats.begin('solr', url=wkey) solr_select = solr_select_url + "?version=2.2&q.op=AND&q=%s&rows=10&fl=%s&qt=standard&wt=json&fq=type:work" % ( q, filter) reply = requests.get(solr_select).json() stats.end() print(reply) if reply['response']['numFound'] == 0: return [] return reply["response"]['docs'][0].get(filter, [])
def execute_solr_query(url): """ Returns a requests.Response or None """ stats.begin("solr", url=url) try: response = requests.get(url, timeout=10) response.raise_for_status() except requests.HTTPError: logger.exception("Failed solr query") return None finally: stats.end() return response
def _get_count_docs(ndays): """Returns the count docs from admin couchdb database. This function is memoized to avoid accessing couchdb for every request. """ admin_db = couchdb.Database(config.admin.counts_db) end = datetime.datetime.now().strftime("counts-%Y-%m-%d") start = (datetime.datetime.now() - datetime.timedelta(days = ndays)).strftime("counts-%Y-%m-%d") stats.begin("couchdb") docs = [x.doc for x in admin_db.view("_all_docs", startkey_docid = start, endkey_docid = end, include_docs = True)] stats.end() return docs