def GET(self, query): qs = web.ctx.query if qs.startswith('?'): qs = qs[1:] params = cgi.parse_qs(qs) if not 'start' in params: start = 0 else: start = params['start'][0] # XXX hack for .html ending -- remove once fixed if start.endswith('.html'): start = start[:-5] start = int(start) q = params['q'][0] qq = urllib.quote(q) solrUrl = 'http://se.us.archive.org:8983/solr/select?q='+qq+'+AND+'+pubInfo['query_base']+'&fl=identifier,title,creator,oai_updatedate,date,contributor,publisher,subject,language,format&rows='+str(numRows)+'&start='+str(start*numRows)+'&wt=json' titleFragment = 'search results for ' + q urn = pubInfo['urnroot'] + ':search:%s:%d' % (qq, start) ingestor = catalog.ingest.SolrToCatalog(pubInfo, solrUrl, urn, start=start, numRows=numRows, urlBase='/search?q=%s&start=' % (qq), # XXX adding .html to end... titleFragment = titleFragment) c = ingestor.getCatalog() web.header('Content-Type', 'text/html') r = output.ArchiveCatalogToHtml(c, device = getDevice()) return r.toString()
def GET(self, letter, start): mode = 'xml' if not start: start = 0 else: if start.endswith('.html'): start = start[:-5] mode = 'html' start = int(start) #TODO: add Image PDFs to this query solrUrl = pubInfo['solr_base'] + '&q=firstTitle%3A' + letter.upper( ) + '&sort=titleSorter+asc&rows=' + str(numRows) + '&start=' + str( start * numRows) titleFragment = 'books starting with "%s"' % (letter.upper()) urn = pubInfo['urnroot'] + ':%s:%d' % (letter, start) ingestor = catalog.ingest.SolrToCatalog(pubInfo, solrUrl, urn, start=start, numRows=numRows, urlBase='%s/alpha/%s/' % (pubInfo['url_base'], letter), titleFragment=titleFragment) c = ingestor.getCatalog() if 'html' == mode: web.header('Content-Type', 'text/html') r = output.ArchiveCatalogToHtml(c, device=getDevice()) return r.toString() else: web.header('Content-Type', pubInfo['mimetype']) r = output.CatalogToAtom(c, fabricateContentElement=True) return r.toString()
def GET(self, start, extension): if extension == '.html': extension = 'html' else: extension = 'xml' if not start: start = 0 else: if start.endswith('.html'): extension = 'html' start = start[:-5] start = int(start) crawlNumRows = 1000; solrUrl = pubInfo['solr_base'] + '&q='+pubInfo['query_base']+'&rows='+str(crawlNumRows)+'&start='+str(start*crawlNumRows) titleFragment = '- crawlable feed' urn = pubInfo['urnroot'] + ':crawl:%d' % (start) ingestor = catalog.ingest.IASolrToCatalog(pubInfo, solrUrl, urn, start=start, numRows=crawlNumRows, urlBase='/catalog/crawlable/', titleFragment = titleFragment) c = ingestor.getCatalog() if 'html' == extension: web.header('Content-Type', 'text/html') r = output.ArchiveCatalogToHtml(c, device = getDevice()) return r.toString() else: web.header('Content-Type', pubInfo['mimetype']) r = output.CatalogToAtom(c, fabricateContentElement=True) return r.toString()
def GET(self, mode='xml'): datestr = catalog.getCurrentDate() c = catalog.Catalog( title=pubInfo['name'] + ' Aggregator', urn=pubInfo['urnroot'], url=pubInfo['opdsroot'], datestr=datestr, author=pubInfo['name'], authorUri=pubInfo['uri'], ) l = catalog.Link(url='alpha.' + mode, type=bookserver.catalog.Link.opds) e = catalog.Entry( { 'title': 'Alphabetical By Title', 'urn': pubInfo['urnroot'] + ':titles:all', 'updated': datestr, 'content': 'Alphabetical list of all titles.' }, links=[l]) c.addEntry(e) l = catalog.Link(url='providers.' + mode, type=bookserver.catalog.Link.opds) e = catalog.Entry( { 'title': 'By Provider', 'urn': pubInfo['urnroot'] + ':providers:all', 'updated': datestr, 'content': 'Listing of all publishers and sellers.' }, links=[l]) c.addEntry(e) #l = catalog.Link(url = 'devices.'+mode, type = types[mode]) #e = catalog.Entry({'title' : 'By Device', # 'urn' : pubInfo['urnroot'] + ':devices', # 'updated' : datestr, # 'content' : 'Filter by books compatible with your e-book reading device.' # }, links=[l]) #c.addEntry(e) osDescriptionDoc = 'http://bookserver.archive.org/aggregator/opensearch.xml' o = catalog.OpenSearch(osDescriptionDoc) c.addOpenSearch(o) if 'html' == mode: r = output.ArchiveCatalogToHtml(c, device=getDevice()) web.header('Content-Type', 'text/html') return r.toString() else: r = output.CatalogToAtom(c) web.header('Content-Type', pubInfo['mimetype']) return r.toString()
def GET(self, extension): #IA is continuously scanning books. Since this OPDS file is constructed #from search engine results, let's change the updated date every midnight #TODO: create a version of /alpha.xml with the correct updated dates, #and cache it for an hour to ease load on solr datestr = getDateString() c = catalog.Catalog( title = 'Internet Archive - All Titles', urn = pubInfo['urnroot'] + ':titles:all', url = pubInfo['opdsroot'] + '/alpha.xml', datestr = datestr, author = 'Internet Archive', authorUri = 'http://www.archive.org', crawlableUrl = pubInfo['opdsroot'] + '/crawlable', ) for letter in string.ascii_uppercase: lower = letter.lower() if 'html' == extension: linkType = 'text/html' elif 'xml' == extension: linkType = 'application/atom+xml' else: raise ValueError('Unsupported extension %s' % extension) l = catalog.Link(url = self.alphaURL(extension, lower, 0), type = linkType) e = catalog.Entry({'title' : 'Titles: ' + letter, 'urn' : pubInfo['urnroot'] + ':titles:'+lower, 'updated' : datestr, 'content' : 'Titles starting with ' + letter }, links=(l,)) c.addEntry(e) osDescriptionDoc = 'http://bookserver.archive.org/catalog/opensearch.xml' o = catalog.OpenSearch(osDescriptionDoc) c.addOpenSearch(o) if ('xml' == extension): web.header('Content-Type', pubInfo['mimetype']) r = output.CatalogToAtom(c) return r.toString() else: web.header('Content-Type', 'text/html') r = output.ArchiveCatalogToHtml(c, device = getDevice()) return r.toString()
def GET(self, extension): solrUrl = pubInfo['solr_base']+'&q='+pubInfo['query_base']+'&sort=month+desc&rows='+str(numRows) titleFragment = 'Most Downloaded Books in the last Month' urn = pubInfo['urnroot'] + ':downloads' ingestor = catalog.ingest.IASolrToCatalog(pubInfo, solrUrl, urn, titleFragment=titleFragment) c = ingestor.getCatalog() if ('xml' == extension): web.header('Content-Type', pubInfo['mimetype']) r = output.CatalogToAtom(c, fabricateContentElement=True) return r.toString() elif ('html' == extension): web.header('Content-Type', 'text/html') r = output.ArchiveCatalogToHtml(c, device = getDevice()) return r.toString() else: web.seeother('/')
def GET(self, extension): #IA is continuously scanning books. Since this OPDS file is constructed #from search engine results, let's change the updated date every midnight #TODO: create a version of /alpha.xml with the correct updated dates, #and cache it for an hour to ease load on solr datestr = catalog.getCurrentDate() c = catalog.Catalog( title=pubInfo['name'] + ' Aggregator - All Titles', urn=pubInfo['urnroot'] + ':titles:all', url=pubInfo['opdsroot'] + '/alpha.xml', datestr=datestr, author=pubInfo['name'], authorUri=pubInfo['uri'], ) for letter in string.ascii_uppercase: lower = letter.lower() l = catalog.Link(url=self.alphaURL(extension, lower, 0), type=bookserver.catalog.Link.opds) e = catalog.Entry( { 'title': 'Titles: ' + letter, 'urn': pubInfo['urnroot'] + ':titles:' + lower, 'updated': datestr, 'content': 'Titles starting with ' + letter }, links=[l]) c.addEntry(e) osDescriptionDoc = 'http://bookserver.archive.org/aggregator/opensearch.xml' o = catalog.OpenSearch(osDescriptionDoc) c.addOpenSearch(o) web.header('Content-Type', types[extension]) if ('xml' == extension): r = output.CatalogToAtom(c) else: r = output.ArchiveCatalogToHtml(c, device=getDevice()) return r.toString()
def GET(self, mode): #TODO: get correct updated dates datestr = catalog.getCurrentDate() c = catalog.Catalog( title=pubInfo['name'] + ' Aggregator - All Providers', urn=pubInfo['urnroot'] + ':providers:all', url=pubInfo['opdsroot'] + '/providers.' + mode, datestr=datestr, author=pubInfo['name'], authorUri=pubInfo['uri'], ) for provider in providers: if 'html' == mode: ext = '.html' # $$$ should do URL mapping in output side? else: ext = '' l = catalog.Link(url='provider/' + provider + '/0' + ext, type=bookserver.catalog.Link.opds) e = catalog.Entry( { 'title': providers[provider], 'urn': pubInfo['urnroot'] + ':providers:' + provider, 'updated': datestr, 'content': 'All Titles for provider ' + provider }, links=[l]) c.addEntry(e) osDescriptionDoc = 'http://bookserver.archive.org/aggregator/opensearch.xml' o = catalog.OpenSearch(osDescriptionDoc) c.addOpenSearch(o) web.header('Content-Type', types[mode]) if ('xml' == mode): r = output.CatalogToAtom(c) else: r = output.ArchiveCatalogToHtml(c, device=getDevice()) return r.toString()
def GET(self, domain, start): mode = 'xml' if not start: start = 0 else: if start.endswith('.html'): start = start[:-5] mode = 'html' start = int(start) #TODO: add Image PDFs to this query solrUrl = pubInfo[ 'solr_base'] + '&q=provider%3A' + domain + '&sort=titleSorter+asc&rows=' + str( numRows) + '&start=' + str(start * numRows) titleFragment = 'books for provider ' + providers[domain] urn = pubInfo['urnroot'] + ':provider:%s:%d' % (domain, start) ingestor = catalog.ingest.SolrToCatalog(pubInfo, solrUrl, urn, start=start, numRows=numRows, urlBase='%s/provider/%s/' % (pubInfo['url_base'], domain), titleFragment=titleFragment) c = ingestor.getCatalog() web.header('Content-Type', types[mode]) if ('xml' == mode): r = output.CatalogToAtom(c, fabricateContentElement=True) else: r = output.ArchiveCatalogToHtml(c, device=getDevice(), provider=domain) return r.toString()
def GET(self, query): qs = web.ctx.query if qs.startswith('?'): qs = qs[1:] params = cgi.parse_qs(qs) if not 'start' in params: start = 0 else: start = params['start'][ 0] # XXX hack for .html ending -- remove once fixed if start.endswith('.html'): start = start[:-5] start = int(start) if 'q' in params: q = params['q'][0] else: q = '' # Provider-specific search if 'provider' in params: providerMatch = re.search('(\w+)$', params['provider'][0]) if providerMatch: provider = providerMatch.group(0) if not re.search('provider:', q): if len(q) > 0: q += ' AND ' q += 'provider:%s' % provider else: provider = None else: provider = None # Device-specific search # $$$ extend to other devices if 'device' in params: deviceStr = params['device'][0] if re.search('Kindle', deviceStr): formatStr = 'format:mobi' if not re.search(formatStr, q): # XXX brittle if len(q) > 0: q += ' AND ' q += formatStr qq = urllib.quote(q) solrUrl = pubInfo[ 'solr_base'] + '&q=' + qq + '&sort=titleSorter+asc&rows=' + str( numRows) + '&start=' + str(start * numRows) #solrUrl = pubInfo['solr_base'] + '?q='+qq+'+AND+mediatype%3Atexts+AND+format%3A(LuraTech+PDF)&fl=identifier,title,creator,oai_updatedate,date,contributor,publisher,subject,language,format&rows='+str(numRows)+'&start='+str(start*numRows)+'&wt=json' titleFragment = 'search results for ' + q urn = pubInfo['urnroot'] + ':search:%s:%d' % (qq, start) ingestor = catalog.ingest.SolrToCatalog( pubInfo, solrUrl, urn, start=start, numRows=numRows, # XXX assuming calling from archive.org/bookserver/catalog # XXX HTML output is adding .html to end... urlBase='/bookserver/catalog/search?q=%s&start=' % (qq), titleFragment=titleFragment) c = ingestor.getCatalog() web.header('Content-Type', 'text/html') r = output.ArchiveCatalogToHtml(c, device=getDevice(), query=q, provider=provider) return r.toString()
def GET(self, url): mode = 'xml' if url and url.endswith('.html'): mode = 'html' datestr = getDateString() c = catalog.Catalog( title = 'Internet Archive Catalog', urn = pubInfo['urnroot'], url = pubInfo['opdsroot'] + '/', datestr = datestr, author = 'Internet Archive', authorUri = 'http://www.archive.org', crawlableUrl = pubInfo['opdsroot'] + '/crawlable', ) if 'html' == mode: links = { 'alpha': 'alpha.html', 'downloads': 'downloads.html', 'new': 'new.html' } type = 'text/html' else: links = {'alpha': 'alpha.xml', 'downloads': 'downloads.xml', 'new': 'new' } type = 'application/atom+xml' l = catalog.Link(url = links['alpha'], type = type) e = catalog.Entry({'title' : 'Alphabetical By Title', 'urn' : pubInfo['urnroot'] + ':titles:all', 'updated' : datestr, 'content' : 'Alphabetical list of all titles.' }, links=(l,)) c.addEntry(e) l = catalog.Link(url = links['downloads'], type = type) e = catalog.Entry({'title' : 'Most Downloaded Books', 'urn' : pubInfo['urnroot'] + ':downloads', 'updated' : datestr, 'content' : 'The most downloaded books from the Internet Archive in the last month.' }, links=(l,)) c.addEntry(e) l = catalog.Link(url = links['new'], type = type) e = catalog.Entry({'title' : 'Recent Scans', 'urn' : pubInfo['urnroot'] + ':new', 'updated' : datestr, 'content' : 'Books most recently scanned by the Internet Archive.' }, links=(l,)) c.addEntry(e) osDescriptionDoc = 'http://bookserver.archive.org/catalog/opensearch.xml' o = catalog.OpenSearch(osDescriptionDoc) c.addOpenSearch(o) if url and url.endswith('.html'): r = output.ArchiveCatalogToHtml(c, device = getDevice()) web.header('Content-Type', 'text/html') return r.toString() else: r = output.CatalogToAtom(c) web.header('Content-Type', pubInfo['mimetype']) return r.toString()