def search(self, environ, start_response): """WSGI method, called by the wsgi app for requests that matches ``searchendpoint``.""" queryparams = self._search_parse_query(environ['QUERY_STRING']) # massage queryparams['issued'] if present, then restore it y = None if 'issued' in queryparams: y = int(queryparams['issued']) queryparams['issued'] = Between(datetime(y, 1, 1), datetime(y, 12, 31, 23, 59, 59)) boost_types = [("sfs", 10)] res, pager = self._search_run_query(queryparams, boost_types=boost_types) if y: queryparams['issued'] = str(y) if pager['totalresults'] == 1: title = "1 träff" else: title = "%s träffar" % pager['totalresults'] title += " för '%s'" % queryparams.get("q") body = html.Body() if hasattr(res, 'aggregations'): body.append( self._search_render_facets(res.aggregations, queryparams, environ)) for r in res: if 'label' not in r: label = r['uri'] elif isinstance(r['label'], list): label = str(r['label']) # flattens any nested element # structure, eg # <p><strong><em>foo</em></strong></p> # -> foo else: label = r['label'] rendered_hit = html.Div([ html.B([elements.Link(label, uri=r['uri'])], ** {'class': 'lead'}) ], **{'class': 'hit'}) if r.get('text'): rendered_hit.append(html.P([r.get('text', '')])) if 'innerhits' in r: for innerhit in r['innerhits']: rendered_hit.append(self._search_render_innerhit(innerhit)) body.append(rendered_hit) pagerelem = self._search_render_pager(pager, queryparams, environ['PATH_INFO']) body.append( html.Div([ html.P([ "Träff %(firstresult)s-%(lastresult)s " "av %(totalresults)s" % pager ]), pagerelem ], **{'class': 'pager'})) data = self._transform(title, body, environ, template="xsl/search.xsl") return self._return_response(data, start_response)
def exception(self, environ, start_response): import traceback from pprint import pformat exc_type, exc_value, tb = sys.exc_info() tblines = traceback.format_exception(exc_type, exc_value, tb) tbstr = "\n".join(tblines) # render the error title = tblines[-1] body = html.Body([ html.Div([ html.H1(self.exception_heading), html.P([self.exception_description]), html.H2("Traceback"), html.Pre([tbstr]), html.H2("Variables"), html.Pre([ "request_uri: %s\nos.getcwd(): %s" % (request_uri(environ), os.getcwd()) ]), html.H2("environ"), html.Pre([pformat(environ)]), html.H2("sys.path"), html.Pre([pformat(sys.path)]), html.H2("os.environ"), html.Pre([pformat(dict(os.environ))]) ]) ]) msg = self._transform(title, body, environ) return self._return_response(msg, start_response, status="500 Internal Server Error", contenttype="text/html")
def search(self, environ, start_response): """WSGI method, called by the wsgi app for requests that matches ``searchendpoint``.""" queryparams = self._search_parse_query(environ['QUERY_STRING']) res, pager = self._search_run_query(queryparams) if pager['totalresults'] == 1: title = "1 match" else: title = "%s matches" % pager['totalresults'] title += " for '%s'" % queryparams.get("q") body = html.Body() for r in res: if not 'dcterms_title' in r or r['dcterms_title'] is None: r['dcterms_title'] = r['uri'] if r.get('dcterms_identifier', False): r['dcterms_title'] = r['dcterms_identifier'] + ": " + r[ 'dcterms_title'] body.append( html.Div([ html.H2([elements.Link(r['dcterms_title'], uri=r['uri'])]), r.get('text', '') ], **{'class': 'hit'})) pagerelem = self._search_render_pager(pager, queryparams, environ['PATH_INFO']) body.append( html.Div([ html.P([ "Results %(firstresult)s-%(lastresult)s " "of %(totalresults)s" % pager ]), pagerelem ], **{'class': 'pager'})) data = self._transform(title, body, environ, template="xsl/search.xsl") return self._return_response(data, start_response)
def test_highlighted_snippet(self): res = ([{'title':'Example', 'uri':'http://example.org/base/123/b1', 'text':html.P(['sollicitudin justo ', html.Strong(['needle'], **{'class':'match'}), ' tempor ut eu enim ... himenaeos. ', html.Strong(['Needle'], **{'class':'match'}), ' id tincidunt orci'])}], {'pagenum': 1, 'pagecount': 1, 'firstresult': 1, 'lastresult': 1, 'totalresults': 1}) self.builder.query_string = "q=needle" config = {'connect.return_value': Mock(**{'query.return_value': res})} with patch('ferenda.wsgiapp.FulltextIndex', **config): status, headers, content = self.call_wsgi() self.assertResponse("200 OK", {'Content-Type': 'text/html; charset=utf-8'}, None, status, headers, None) t = etree.fromstring(content) docs = t.findall(".//section[@class='hit']") self.assertEqualXML(res[0][0]['text'].as_xhtml(), docs[0][1], namespace_aware=False)
def _search_render_facets(self, facets, queryparams, environ): facetgroups = [] commondata = self.repos[0].commondata searchurl = request_uri(environ, include_query=False) for facetresult in ('type', 'creator', 'issued'): if facetresult in facets: if facetresult in queryparams: # the user has selected a value for this # particular facet, we should not display all # buckets (but offer a link to reset the value) qpcopy = dict(queryparams) del qpcopy[facetresult] href = "%s?%s" % (searchurl, urlencode(qpcopy)) val = queryparams[facetresult] if facetresult == "creator": val = self.repos[0].lookup_label(val) elif facetresult == "type": val = self.repolabels.get(val, val) lbl = "%s: %s" % (self.facetlabels.get( facetresult, facetresult), val) facetgroups.append( html.LI([ lbl, html.A( "\xa0", **{ 'href': href, 'class': 'glyphicon glyphicon-remove' }) ])) else: facetgroup = [] for bucket in facets[facetresult]['buckets']: if facetresult == 'type': lbl = self.repolabels.get(bucket['key'], bucket['key']) key = bucket['key'] elif facetresult == 'creator': k = URIRef(bucket['key']) pred = SKOS.altLabel if commondata.value( k, SKOS.altLabel) else FOAF.name lbl = commondata.value(k, pred) key = bucket['key'] elif facetresult == "issued": lbl = bucket["key_as_string"] key = lbl qpcopy = dict(queryparams) qpcopy[facetresult] = key href = "%s?%s" % (searchurl, urlencode(qpcopy)) facetgroup.append( html.LI([ html.A("%s" % (lbl), **{'href': href}), html.Span([str(bucket['doc_count'])], **{'class': 'badge pull-right'}) ])) lbl = self.facetlabels.get(facetresult, facetresult) facetgroups.append( html.LI([html.P([lbl]), html.UL(facetgroup)])) return html.Div(facetgroups, **{'class': 'facets'})
def _search_render_innerhit(self, innerhit): r = innerhit if 'text' not in r: r['text'] = [] r['text'].insert(0, ": ") r['text'].insert( 0, elements.LinkMarkup(r.get('label', ['(beteckning saknas)']), uri=r['uri'])) return html.P(r['text'], **{'class': 'innerhit'})
def stream(self, environ, start_response): """WSGI method, called by the wsgi app for requests that indicate the need for a streaming response.""" path = environ['PATH_INFO'] if not isinstance(path, str): path = path.decode("utf-8") fullpath = self.config.documentroot + path # we start by asking all repos "do you handle this path"? # default impl is to say yes if 1st seg == self.alias and the # rest can be treated as basefile yielding a existing # generated file. a yes answer contains a FileWrapper around # the repo-selected file and optionally length (but not # status, always 200, or mimetype, always text/html). None # means no. fp = None reasons = OrderedDict() if not ((path.startswith("/rsrc") or path == "/robots.txt") and os.path.exists(fullpath)): for repo in self.repos: supports = repo.requesthandler.supports(environ) if supports: return repo.requesthandler.stream(environ, start_response) elif hasattr(supports, 'reason'): reasons[repo.alias] = supports.reason else: reasons[repo.alias] = '(unknown reason)' # if we reach this, no repo handled the path mimetype = "text/html" reasonmsg = "\n".join(["%s: %s" % (k, reasons[k]) for k in reasons]) msgbody = html.Body([ html.H1("Document not found"), html.P(["The path %s was not found at %s" % (path, fullpath)]), html.P(["Examined %s repos" % (len(self.repos))]), html.Pre([reasonmsg]) ]) iterdata = self._transform("404 Not found", msgbody, environ) status = "404 Not Found" length = None return self._return_response(iterdata, start_response, status, mimetype, length)
def test_html(self): # test 2: use element.html elements only, to make a similar # document (although without metadata about # sections/subsection and classses). Uses some HTML5 elements # that are converted to divs when rendering as XHTML 1.1 body = html.Body([ html.H1(['Toplevel heading']), html.Summary(['Introductory preamble']), html.Section([ html.H2(['First section']), html.P(['Some text']), html.Section( [html.H3(['First subsection']), html.P(['More text'])]) ]), html.Section( [html.H2(['Second section']), html.P(['Even more text'])]) ]) want = """ <body xmlns="http://www.w3.org/1999/xhtml" about="http://localhost:8000/res/base/basefile"> <h1>Toplevel heading</h1> <div class="summary">Introductory preamble</div> <div class="section"> <h2>First section</h2> <p>Some text</p> <div class="section"> <h3>First subsection</h3> <p>More text</p> </div> </div> <div class="section"> <h2>Second section</h2> <p>Even more text</p> </div> </body> """ self._test_asxhtml(want, body)
def mkres(page=1, pagesize=10, total=25): hits = [] for i in range((page-1)*pagesize, min(page*pagesize, total)): hits.append( {'title':'', 'uri':'http://example.org/base/123/c#S%d'% ((i*2)-1), 'text': html.P(['This is a needle document'])}) return (hits, {'pagenum': page, 'pagecount': int(total / pagesize) + 1, 'firstresult': (page - 1) * pagesize + 1, 'lastresult': (page - 1) * pagesize + len(hits), 'totalresults': total})
def test_elements_from_soup(self): soup = BeautifulSoup( """<html> <head> <title>Example doc</title> </head> <body> <marquee>Hello world</marquee> <!-- Hello world --> <center>Hello world</center> <p>That's enough of this nonsense</p> </body>""", "lxml") got = html.elements_from_soup(soup.html) self.assertEqual( html.HTML([ html.Head([html.Title(["Example doc"])]), html.Body([html.P(["That's enough of this nonsense"])]) ]), got)
def test_elements_from_soup(self): from ferenda.elements import html soup = BeautifulSoup( """<body> <h1>Sample</h1> <div class="main"> <img src="xyz.png"/> <p>Some <b>text</b></p> <dl> <dt>Term 1</dt> <dd>Definition 1</dd> </dl> </div> <div id="foot"> <hr/> <a href="/">home</a> - <a href="/about">about</a> </div> </body>""", "lxml") body = html.elements_from_soup(soup.body) # print("Body: \n%s" % serialize(body)) result = html.Body([ html.H1(["Sample"]), html.Div([ html.Img(src="xyz.png"), html.P(["Some ", html.B(["text"])]), html.DL([html.DT(["Term 1"]), html.DD(["Definition 1"])]) ], **{"class": "main"}), html.Div([ html.HR(), html.A(["home"], href="/"), " - ", html.A(["about"], href="/about") ], id="foot") ]) self.maxDiff = 4096 self.assertEqual(serialize(body), serialize(result))
def handle_search(self, request, **values): # return Response("<h1>Hello search: " + request.args.get("q") +" </h1>", mimetype="text/html") res, pager = self._search_run_query(request.args) if pager['totalresults'] == 1: title = "1 match" else: title = "%s matches" % pager['totalresults'] title += " for '%s'" % request.args.get("q") body = html.Body() for r in res: if not 'dcterms_title' in r or r['dcterms_title'] is None: r['dcterms_title'] = r['uri'] if r.get('dcterms_identifier', False): r['dcterms_title'] = r['dcterms_identifier'] + ": " + r[ 'dcterms_title'] body.append( html.Div([ html.H2([elements.Link(r['dcterms_title'], uri=r['uri'])]), r.get('text', '') ], **{'class': 'hit'})) pagerelem = self._search_render_pager(pager, dict(request.args), request.path) body.append( html.Div([ html.P([ "Results %(firstresult)s-%(lastresult)s " "of %(totalresults)s" % pager ]), pagerelem ], **{'class': 'pager'})) data = self._transform(title, body, request.environ, template="xsl/search.xsl") return Response(data, mimetype="text/html")
def test_search_multiple(self): self.env['QUERY_STRING'] = "q=part" res = ( [ { 'dcterms_title': 'Introduction', 'dcterms_identifier': '123/a¶1', 'uri': 'http://example.org/base/123/a#S1', 'text': html.P([ 'This is ', html.Strong(['part'], **{'class': 'match'}), ' of document-', html.Strong(['part'], **{'class': 'match'}), ' section 1</p>' ]) }, { #'title':'Definitions and Abbreviations', 'uri': 'http://example.org/base/123/a#S2', 'text': html.P([ 'second main document ', html.Strong(['part'], **{'class': 'match'}) ]) }, { 'dcterms_title': 'Example', 'uri': 'http://example.org/base/123/a', 'text': html.P([ 'This is ', html.Strong(['part'], **{'class': 'match'}), ' of the main document' ]) } ], { 'pagenum': 1, 'pagecount': 1, 'firstresult': 1, 'lastresult': 3, 'totalresults': 3 }) config = {'connect.return_value': Mock(**{'query.return_value': res})} with patch('ferenda.wsgiapp.FulltextIndex', **config): status, headers, content = self.call_wsgi(self.env) self.assertResponse("200 OK", {'Content-Type': 'text/html; charset=utf-8'}, None, status, headers, None) t = etree.parse(BytesIO(content)) css = t.findall("head/link[@rel='stylesheet']") self.assertEqual(len(css), 3) # bootstrap, bootstrap-theme, ferenda and sfs (?!) self.assertEqual( 'https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css', css[0].get('href')) js = t.findall("body/script") self.assertEqual(len(js), 4) # jquery, bootstrap, ferenda, typeahead resulthead = t.find(".//article/h1").text self.assertEqual(resulthead, "3 matches for 'part'") docs = t.findall(".//section[@class='hit']") self.assertEqual(len(docs), 3) self.assertEqual(docs[0][0].tag, 'h2') expect = res[0] self.assertIn(expect[0]['dcterms_title'], docs[0][0][0].text) self.assertEqual(expect[0]['uri'], docs[0][0][0].get('href')) self.assertEqualXML(expect[0]['text'].as_xhtml(), docs[0][1], namespace_aware=False) self.assertIn(expect[1]['dcterms_title'], docs[1][0][0].text) self.assertEqual(expect[1]['uri'], docs[1][0][0].get('href')) self.assertEqualXML(expect[1]['text'].as_xhtml(), docs[1][1], namespace_aware=False) self.assertIn(expect[2]['dcterms_title'], docs[2][0][0].text) self.assertEqual(expect[2]['uri'], docs[2][0][0].get('href')) self.assertEqualXML(expect[2]['text'].as_xhtml(), docs[2][1], namespace_aware=False)
def test_meta(self): # test 3: use a mix of our own elements and html elements, # with meta + uri attached to some nodes g1 = Graph().parse(format='n3', data=""" @prefix bibo: <http://purl.org/ontology/bibo/> . @prefix dcterms: <http://purl.org/dc/terms/> . <http://localhost:8000/res/base/basefile#S1> a bibo:DocumentPart; dcterms:title "First section"; bibo:chapter "1" . """) g2 = Graph().parse(format='n3', data=""" @prefix bibo: <http://purl.org/ontology/bibo/> . @prefix dcterms: <http://purl.org/dc/terms/> . @prefix owl: <http://www.w3.org/2002/07/owl#> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . <http://localhost:8000/res/base/basefile#S2> a bibo:DocumentPart; dcterms:title "Second section"; bibo:chapter "2"; dcterms:creator "Fred Bloggs"@en-GB; dcterms:issued "2013-05-10"^^xsd:date; owl:sameAs <http://example.org/s2> . <http://example.org/s2> dcterms:title "Same same but different" . <http://localhost:8000/res/base/unlrelated> dcterms:title "Unrelated document" . """) body = el.Body([ el.Heading(['Toplevel heading'], level=1), html.P(['Introductory preamble']), html.Div([ html.P(['Some text']), el.Subsection([el.Paragraph(['More text'])], ordinal='1.1', title="First subsection") ], uri='http://localhost:8000/res/base/basefile#S1', meta=g1), el.Section([el.Paragraph(['Even more text'])], uri='http://localhost:8000/res/base/basefile#S2', meta=g2) ]) want = """ <body xmlns="http://www.w3.org/1999/xhtml" about="http://localhost:8000/res/base/basefile"> <h1>Toplevel heading</h1> <p>Introductory preamble</p> <div about="http://localhost:8000/res/base/basefile#S1" content="First section" property="dcterms:title" typeof="bibo:DocumentPart"> <span href="http://localhost:8000/res/base/basefile" rel="dcterms:isPartOf"/> <span content="1" property="bibo:chapter" xml:lang=""/> <p>Some text</p> <div about="http://localhost:8000/res/base/basefile#S1.1" content="First subsection" property="dcterms:title" typeof="bibo:DocumentPart" class="subsection"> <span href="http://localhost:8000/res/base/basefile#S1" rel="dcterms:isPartOf"/> <span about="http://localhost:8000/res/base/basefile#S1.1" content="1.1" property="bibo:chapter"/> <p>More text</p> </div> </div> <div about="http://localhost:8000/res/base/basefile#S2" class="section" content="Second section" property="dcterms:title" typeof="bibo:DocumentPart"> <span href="http://localhost:8000/res/base/basefile" rel="dcterms:isPartOf"/> <span href="http://example.org/s2" rel="owl:sameAs"> <span content="Same same but different" property="dcterms:title" xml:lang=""/> </span> <span content="2" property="bibo:chapter" xml:lang=""/> <span content="2013-05-10" property="dcterms:issued" datatype="xsd:date"/> <span content="Fred Bloggs" property="dcterms:creator" xml:lang="en-GB"/> <p>Even more text</p> </div> </body>""" self._test_asxhtml(want, body)
def static(self, environ, start_response): """WSGI method, called by the wsgi app for all other requests not handled by :py:func:`~ferenda.Manager.search` or :py:func:`~ferenda.Manager.api` """ path = environ['PATH_INFO'] if not isinstance(path, str): path = path.decode("utf-8") fullpath = self.config.documentroot + path # we start by asking all repos "do you handle this path"? # default impl is to say yes if 1st seg == self.alias and the # rest can be treated as basefile yielding a existing # generated file. a yes answer contains a FileWrapper around # the repo-selected file and optionally length (but not # status, always 200, or mimetype, always text/html). None # means no. fp = None reasons = OrderedDict() if not ((path.startswith("/rsrc") or path == "/robots.txt") and os.path.exists(fullpath)): for repo in self.repos: supports = repo.requesthandler.supports(environ) if supports: fp, length, status, mimetype = repo.requesthandler.handle( environ) elif hasattr(supports, 'reason'): reasons[repo.alias] = supports.reason else: reasons[repo.alias] = '(unknown reason)' if fp: status = { 200: "200 OK", 404: "404 Not found", 406: "406 Not Acceptable", 500: "500 Server error" }[status] iterdata = FileWrapper(fp) break # no repo handled the path if not fp: if self.config.legacyapi: # rewrite the path to some resources. FIXME: # shouldn't hardcode the "rsrc" path of the path if path == "/json-ld/context.json": fullpath = self.config.documentroot + "/rsrc/api/context.json" elif path == "/var/terms": fullpath = self.config.documentroot + "/rsrc/api/terms.json" elif path == "/var/common": fullpath = self.config.documentroot + "/rsrc/api/common.json" if os.path.isdir(fullpath): fullpath = fullpath + "index.html" if os.path.exists(fullpath): ext = os.path.splitext(fullpath)[1] # if not mimetypes.inited: # mimetypes.init() mimetype = mimetypes.types_map.get(ext, 'text/plain') status = "200 OK" length = os.path.getsize(fullpath) fp = open(fullpath, "rb") iterdata = FileWrapper(fp) else: mimetype = "text/html" reasonmsg = "\n".join( ["%s: %s" % (k, reasons[k]) for k in reasons]) msgbody = html.Body([ html.H1("Document not found"), html.P([ "The path %s was not found at %s" % (path, fullpath) ]), html.P(["Examined %s repos" % (len(self.repos))]), html.Pre([reasonmsg]) ]) iterdata = self._transform("404 Not found", msgbody, environ) status = "404 Not Found" length = None return self._return_response(iterdata, start_response, status, mimetype, length)