def test_sesame_select(self, mock_get): store = TripleStore.connect("SESAME", "", "") rf = util.readfile want = rf("test/files/triplestore/select-results.xml").encode() got = store.select("the-query") self.assertEqual(want, got) self.assertEqual(mock_get.call_count, 1) want = rf("test/files/triplestore/select-results.json") got = store.select("the-query", format="json").decode() self.assertEqual(json.loads(want), json.loads(got)) self.assertEqual(mock_get.call_count, 2) want = json.loads( rf("test/files/triplestore/select-results-python.json"), object_hook=util.make_json_date_object_hook("issued")) got = store.select("the-query", format="python") self.assertEqual(want, got) self.assertEqual(mock_get.call_count, 3) with self.assertRaises(errors.TriplestoreError): mockresponse = Mock() mockresponse.text = "This is the actual error text" mock_get.side_effect = requests.exceptions.HTTPError( "Server error", response=mockresponse) got = store.select("the-query", format="python")
def test_toc(self): # tests the main TOC method, not the helper methods (they are # tested separately) self.repo.facets = MagicMock() self.repo.facet_select = MagicMock() self.repo.facet_query = MagicMock() self.repo.faceted_data = MagicMock() self.repo.log = Mock() self.repo.toc_pagesets = Mock() self.repo.toc_select_for_pages = Mock() self.repo.toc_generate_pages = Mock() self.repo.toc_generate_first_page = Mock() with patch('json.dump'): self.repo.toc() # assert facet_query was properly called, error and info msg # was printed self.assertEqual("http://localhost:8000/dataset/base", self.repo.facet_query.call_args[0][0]) self.assertTrue(self.repo.log.error.called) self.assertTrue(self.repo.log.info.called) # and that the rest of the methods were NOT called self.assertFalse(self.repo.toc_pagesets.called) self.assertFalse(self.repo.toc_select_for_pages.called) self.assertFalse(self.repo.toc_generate_pages.called) # test2: facet_select returns something self.repo.faceted_data.return_value = ["fake", "data"] with patch('json.load'): self.repo.toc() # Now all other methods should be called self.assertTrue(self.repo.toc_pagesets.called) self.assertTrue(self.repo.toc_select_for_pages.called) self.assertTrue(self.repo.toc_generate_pages.called)
def test_ifneeded_parse(self): @ifneeded("parse") def testfunc(repo, basefile): repo.called = True # mockdoc = Mock() # mockdoc.basefile="1234" mockbasefile = "1234" mockrepo = Mock() mockrepo.store.needed = DocumentStore(datadir='fake').needed mockrepo.called = False mockrepo.config.force = False # test 1: Outfile is newer - the ifneeded decorator should # make sure the actual testfunc code is never reached with patch('ferenda.util.outfile_is_newer', return_value=True): testfunc(mockrepo, mockbasefile) self.assertFalse(mockrepo.called) mockrepo.called = False # test 2: Outfile is older with patch('ferenda.util.outfile_is_newer', return_value=False): testfunc(mockrepo, mockbasefile) self.assertTrue(mockrepo.called) mockrepo.called = False # test 3: Outfile is newer, but the global force option was set mockrepo.config.force = True with patch('ferenda.util.outfile_is_newer', return_value=True): testfunc(mockrepo, mockbasefile) self.assertTrue(mockrepo.called) mockrepo.config.force = None mockrepo.called = False
def test_render(self,mock_graph): @render def testfunc(repo,doc): pass mockdoc = Mock() mockrepo = Mock() mockrepo.store.parsed_path.return_value = "parsed_path.xhtml" with open("parsed_path.xhtml", "w") as fp: fp.write("""<?xml version="1.0" encoding="utf-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xmlns:dct="http://purl.org/dc/terms/"> <head about="http://example.org/doc"> <title property="dct:title">Document title</title> </head> <body> <h1>Hello!</h1> </body> </html>""") mockrepo.store.distilled_path.return_value = "distilled_path.xhtml" mockrepo.get_globals.return_value = {'symbol table':'fake'} mockdoc.meta = MagicMock() # need Magicmock which supports magic funcs like __iter__ bodypart = MagicMock() bodypart.meta = MagicMock() mockdoc.body = [bodypart] mockdoc.meta.__iter__.return_value = [] mockdoc.uri = "http://example.org/doc" with patch('ferenda.util.ensure_dir', return_value=True): testfunc(mockrepo, mockdoc) # 1 ensure that DocumentRepository.render_xhtml is called with # four arguments mockrepo.render_xhtml.assert_called_with(mockdoc, "parsed_path.xhtml") # 2 ensure that DocumentRepository.create_external_resources # is called with 1 argument mockrepo.create_external_resources.assert_called_with(mockdoc) # 3 ensure that a Graph object is created, its parse and # serialize methods called # FIXME: Why doesn't the patching work?! # self.assertTrue(mock_graph().parse.called) # self.assertTrue(mock_graph().serialize.called) # (4. ensure that a warning gets printed if doc.meta and # distilled_graph do not agree) mock_graph().__iter__.return_value = ['a','b'] mockdoc.meta.__iter__.return_value = ['a','b','c'] mockdoc.meta.serialize.return_value = b"<c>" with patch('ferenda.util.ensure_dir', return_value=True): testfunc(mockrepo, mockdoc) self.assertTrue(mockrepo.log.warning.called) os.remove("parsed_path.xhtml") os.remove("distilled_path.xhtml")
def test_handleerror(self): @handleerror def testfunc(repo, doc): if doc.exception: raise doc.exception else: return True mockrepo = Mock() mockrepo.config = MagicMock() # must support __in__() mockdoc = Mock() # 1. should not raise an exception (but should call log.info # and util.robust_remove, and return false) with patch('ferenda.util.robust_remove') as robust_remove: mockdoc.exception = DocumentRemovedError self.assertFalse(testfunc(mockrepo, mockdoc)) self.assertTrue(mockrepo.log.info.called) self.assertTrue(robust_remove.called) # 2. should raise the same exception mockdoc.exception = KeyboardInterrupt with self.assertRaises(KeyboardInterrupt): testfunc(mockrepo, mockdoc) # 3.1 Should raise the same exeption mockdoc.exception = ParseError mockrepo.config.fatalexceptions = True with self.assertRaises(ParseError): testfunc(mockrepo, mockdoc) mockrepo.config.fatalexceptions = None # 3.2 Should not raise an exception (but should call log.error and return false) mockdoc.exception = ParseError self.assertFalse(testfunc(mockrepo, mockdoc)) self.assertTrue(mockrepo.log.error.called) # 4.1 Should raise the same exception mockdoc.exception = Exception mockrepo.config.fatalexceptions = True with self.assertRaises(Exception): testfunc(mockrepo, mockdoc) mockrepo.config.fatalexceptions = None # 4.2 Should not raise an exception mockdoc.exception = Exception self.assertFalse(testfunc(mockrepo, mockdoc)) self.assertTrue(mockrepo.log.error.called) # 5. No exceptions - everything should go fine mockdoc.exception = None self.assertTrue(testfunc(mockrepo, mockdoc))
def test_dumpstore(self): d = Devel() d.config = Mock() # only test that Triplestore is called correctly, mock any # calls to any real database config = { 'connect.return_value': Mock(**{'get_serialized.return_value': b'[fake store content]'}) } printmock = MagicMock() with patch('ferenda.devel.TripleStore', **config): with patch('builtins.print', printmock): d.dumpstore(format="trix") want = "[fake store content]" printmock.assert_has_calls([call(want)])
def makeresponse(*args, **kwargs): if len(returned) > len(responses): raise IndexError("Ran out of canned responses after %s calls" % len(returned)) resp = Mock() resp.status_code = responses[len(returned)][0] responsefile = responses[len(returned)][1] if responsefile: responsefile = "test/files/triplestore/" + responsefile resp.content = util.readfile(responsefile, "rb") resp.text = util.readfile(responsefile) if responsefile.endswith(".json"): data = json.loads(util.readfile(responsefile)) resp.json = Mock(return_value=data) returned.append(True) return resp
def test_incomplete_entries(self): self.repo.faceted_data = Mock(return_value=self.faceted_data) # make our entries incomplete in various ways entry = DocumentEntry(self.repo.store.documententry_path("1")) entry.published = None entry.save() # try very hard to remove title from everywhere entry = DocumentEntry(self.repo.store.documententry_path("2")) del entry.title entry.save() g = rdflib.Graph().parse(self.repo.store.distilled_path("2")) g.remove((rdflib.URIRef("http://localhost:8000/res/base/2"), self.repo.ns['dcterms'].title, rdflib.Literal("Doc #2"))) with open(self.repo.store.distilled_path("2"), "wb") as fp: g.serialize(fp, format="pretty-xml") os.unlink(self.repo.store.distilled_path("3")) # entries w/o published date and w/o distilled file should not # be published, but w/o title is OK with silence(): # avoid warnings about stale entry files # since the downloaded and intermediate file # is missing, which would exist in a real # scenario self.assertEqual(len(list(self.repo.news_entries())), 23) # also make sure that corresponding faceted_entries do not # show these non-published entries self.assertEqual(len(self.repo.news_facet_entries()), 23)
def test_highlighted_snippet(self): res = ([{'title':'Example', 'uri':'http://example.org/base/123/b1', 'text':html.P(['sollicitudin justo ', html.Strong(['needle'], **{'class':'match'}), ' tempor ut eu enim ... himenaeos. ', html.Strong(['Needle'], **{'class':'match'}), ' id tincidunt orci'])}], {'pagenum': 1, 'pagecount': 1, 'firstresult': 1, 'lastresult': 1, 'totalresults': 1}) self.builder.query_string = "q=needle" config = {'connect.return_value': Mock(**{'query.return_value': res})} with patch('ferenda.wsgiapp.FulltextIndex', **config): status, headers, content = self.call_wsgi() self.assertResponse("200 OK", {'Content-Type': 'text/html; charset=utf-8'}, None, status, headers, None) t = etree.fromstring(content) docs = t.findall(".//section[@class='hit']") self.assertEqualXML(res[0][0]['text'].as_xhtml(), docs[0][1], namespace_aware=False)
def test_write_atom_inline(self): self.repo.faceted_data = Mock(return_value=self.faceted_data) for basefile in range(25): de = DocumentEntry( self.repo.store.documententry_path(str(basefile))) util.writefile(self.repo.store.parsed_path(str(basefile)), "<html><p>Document #%s</p></html>" % basefile) de.set_content(self.repo.store.parsed_path(str(basefile)), self.repo.canonical_uri(str(basefile)), inline=True) de.save() unsorted_entries = self.repo.news_facet_entries() entries = sorted(list(unsorted_entries), key=itemgetter('updated'), reverse=True) self.repo.news_write_atom(entries, 'New and updated documents', 'main', archivesize=6) tree = etree.parse('%s/base/feed/main.atom' % self.datadir) NS = "{http://www.w3.org/2005/Atom}" content = tree.find(".//" + NS + "content") self.assertIsNotNone(content) self.assertIsNone(content.get("src")) self.assertIsNone(content.get("hash")) self.assertEqual(content.get("type"), "xhtml") self.assertEqualXML( etree.tostring(content[0]), '<html xmlns="http://www.w3.org/2005/Atom" xmlns:le="http://purl.org/atompub/link-extensions/1.0"><p>Document #24</p></html>' )
def test_parameters_legacy(self): # legacy api res = ([], {'firstresult': 1, 'totalresults': 0}) # FIXME: we leave out free=true (should map to schema_free=True) self.builder.query_string = "type=Standard&title=Hello+World&issued=2014-06-30&schema_free=true" self.app.config.legacyapi = True config = {'connect.return_value': Mock(**{'query.return_value': res, 'schema.return_value': {'dcterms_issued': fulltextindex.Datetime(), 'schema_free': fulltextindex.Boolean(), 'dcterms_title': None, 'rdf_type': None}})} want = {'q': None, 'dcterms_title': "Hello World", 'dcterms_issued': datetime.datetime(2014,6,30,0,0,0), 'schema_free': True, 'rdf_type': '*Standard', # should be bibo:Standard or even http://purl.org/ontology/bibo/Standard, but requires proper context handling to work 'pagenum': 1, 'pagelen': 10, 'ac_query': False, 'boost_repos': None, 'exclude_repos': None, 'include_fragments': None} with patch('ferenda.wsgiapp.FulltextIndex', **config): status, headers, content = self.call_wsgi() config['connect.return_value'].query.assert_called_once_with(**want)
def test_parameters(self): # normal api res = ([], {'firstresult': 1, 'totalresults': 0}) self.env[ 'QUERY_STRING'] = "rdf_type=bibo:Standard&dcterms_title=Hello+World&dcterms_issued=2014-06-30&schema_free=true" config = { 'connect.return_value': Mock( **{ 'query.return_value': res, 'schema.return_value': { 'dcterms_issued': fulltextindex.Datetime(), 'schema_free': fulltextindex.Boolean() } }) } want = { 'q': None, 'dcterms_title': "Hello World", 'dcterms_issued': datetime.datetime(2014, 6, 30, 0, 0, 0), 'schema_free': True, 'rdf_type': 'bibo:Standard', # FIXME: should be http://purl.org/ontology/bibo/Standard -- but requires that self.repos in wsgiapp is set up 'pagenum': 1, 'pagelen': 10, 'ac_query': False, 'boost_types': None, 'exclude_types': None } with patch('ferenda.wsgiapp.FulltextIndex', **config): status, headers, content = self.call_wsgi(self.env) config['connect.return_value'].query.assert_called_once_with( **want)
def test_runserver(self): self._enable_repos() m = Mock() with patch('ferenda.manager.make_server', return_value=m) as m2: manager.run(["all", "runserver"]) self.assertTrue(m2.called) self.assertTrue(m.serve_forever.called)
def test_preflight(self): log = Mock() # test 1: python too old with patch('ferenda.manager.sys') as sysmock: sysmock.version_info = (2, 5, 6, 'final', 0) sysmock.version = sys.version self.assertFalse(manager._preflight_check(log, verbose=True)) self.assertTrue(log.error.called) log.error.reset_mock() # test 2: modules are old / or missing with patch('importlib.import_module') as importmock: setattr(importmock.return_value, '__version__', '0.0.1') self.assertFalse(manager._preflight_check(log, verbose=True)) self.assertTrue(log.error.called) log.error.reset_mock() importmock.side_effect = ImportError self.assertFalse(manager._preflight_check(log, verbose=True)) self.assertTrue(log.error.called) log.error.reset_mock() # test 3: binaries are nonexistent or errors with patch('ferenda.manager.subprocess.call') as callmock: callmock.return_value = 127 self.assertFalse(manager._preflight_check(log, verbose=True)) self.assertTrue(log.error.called) log.error.reset_mock() callmock.side_effect = OSError self.assertFalse(manager._preflight_check(log, verbose=True)) self.assertTrue(log.error.called) log.error.reset_mock()
def test_queryindex(self): res = [{ 'label': 'Doc #1', 'uri': 'http://example.org/doc1', 'text': 'matching doc 1' }, { 'label': 'Doc #2', 'uri': 'http://example.org/doc2', 'text': 'matching doc 2' }] pager = None config = { 'connect.return_value': Mock(**{'query.return_value': (res, pager)}) } printmock = MagicMock() with patch('ferenda.devel.FulltextIndex', **config): with patch('builtins.print', printmock): d = Devel() d.config = LayeredConfig( Defaults({ 'indextype': 'a', 'indexlocation': 'b' })) d.queryindex("doc") want = """ Doc #1 (http://example.org/doc1): matching doc 1 Doc #2 (http://example.org/doc2): matching doc 2 """.strip() got = "\n".join([x[1][0] for x in printmock.mock_calls]) self.maxDiff = None self.assertEqual(want, got)
def test_stats(self): self.env['PATH_INFO'] += ";stats" self.app.repos[0].faceted_data = Mock(return_value=self.fakedata) status, headers, content = self.call_wsgi(self.env) got = json.loads(content.decode("utf-8")) with open("test/files/api/basicapi-stats.json") as fp: want = json.load(fp) self.assertEqual(want, got)
def test_select(self): uri = "http://example.org/doc" with open("testselecttemplate.rq", "wb") as fp: fp.write("""PREFIX dcterms: <http://purl.org/dc/terms/> SELECT ?p ?o WHERE { <%(uri)s> ?p ?o . } """.encode()) result = """ [ { "p": "http://purl.org/dc/terms/title", "o": "Document title" }, { "p": "http://purl.org/dc/terms/identifier", "o": "Document ID" } ]""".lstrip().encode("utf-8") config = { 'connect.return_value': Mock(**{'select.return_value': result}) } printmock = MagicMock() with patch('ferenda.devel.TripleStore', **config): with patch('builtins.print', printmock): d = Devel() d.config = LayeredConfig( Defaults({ 'storetype': 'a', 'storelocation': 'b', 'storerepository': 'c' })) d.select("testselecttemplate.rq", uri) want = """ # Constructing the following from b, repository c, type a # PREFIX dcterms: <http://purl.org/dc/terms/> # # SELECT ?p ?o # WHERE { <http://example.org/doc> ?p ?o . } # [ { "p": "http://purl.org/dc/terms/title", "o": "Document title" }, { "p": "http://purl.org/dc/terms/identifier", "o": "Document ID" } ] # Selected in 0.001s """.strip() got = "\n".join([x[1][0] for x in printmock.mock_calls]) self.maxDiff = None self.assertEqual(self.mask_time(want), self.mask_time(got)) os.unlink("testselecttemplate.rq")
def _myget(self, url, **kwargs): res = Mock() res.headers = collections.defaultdict(lambda: None) res.headers['Content-type'] = "text/html" res.status_code = 200 res.encoding = 'utf-8' if url == "http://example.org/": res.content = b"<p><a href='doc/a_.html'>ID: a</a></p>" elif url == "http://example.org/doc/a_.html": res.content = b"<p>This is doc A</p>" else: raise ValueError("Unknown url %s" % url) res.text = res.content.decode() return res
def test_timed(self): # Test that a wrapped method... @timed def testfunc(repo, doc): pass # ...passed a particular docrepo and doc mockrepo = Mock() mockdoc = Mock() mockdoc.basefile = "1234" # ...has it's instances logger called... testfunc(mockrepo, mockdoc) call_args = mockrepo.log.info.call_args # ...with the correct method and arguments self.assertEqual(len(call_args[0]), 2) self.assertEqual(call_args[0][0], 'parse OK (%.3f sec)')
def test_stats_legacy(self): self.builder.path += ";stats" self.app.config.legacyapi = True # This used to be commented out -- was there a good reason for that? self.app.repos[0].faceted_data = Mock(return_value=self.fakedata) status, headers, content = self.call_wsgi() got = json.loads(content) with open("test/files/api/basicapi-stats.legacy.json") as fp: want = json.load(fp) self.assertEqual(want, got)
def test_timed(self): # Test that a wrapped method... @timed def testfunc(repo,doc): pass # ...passed a particular docrepo and doc mockrepo = Mock() mockdoc = Mock() mockdoc.basefile = "1234" # ...has it's instances logger called... testfunc(mockrepo,mockdoc) call_args = mockrepo.log.info.call_args # ...with the correct method and arguments self.assertEqual(len(call_args[0]), 3) self.assertEqual(call_args[0][0], '%s: parse OK (%.3f sec)') self.assertEqual(call_args[0][1], "1234")
def test_news(self): # should test the main method, not the helpers. That'll # require mocking most methods. self.repo.news_facet_entries = MagicMock() self.repo.facets = MagicMock() self.repo.news_feedsets = Mock() self.repo.news_select_for_feeds = Mock() self.repo.news_generate_feeds = Mock() # this isn't really a good test -- it only verifies the # internal implementation of the main method not the # behaviour. But other tests verifieds behaviour of individual # methods. self.repo.news() assert (self.repo.news_facet_entries.called) assert (self.repo.facets.called) assert (self.repo.news_feedsets.called) assert (self.repo.news_select_for_feeds.called) assert (self.repo.news_generate_feeds.called)
def call_wsgi(self, environ): start_response = Mock() buf = BytesIO() iterable = self.app(environ, start_response) for chunk in iterable: buf.write(chunk) if hasattr(iterable, 'close'): iterable.close() call_args = start_response.mock_calls[0][1] # call_kwargs = start_response.mock_calls[0][2] return call_args[0], call_args[1], buf.getvalue()
def test_select_fulltextindex(self): log = Mock() # first manipulate requests.get to give the impression that # elasticsearch either is or isn't available with patch('ferenda.manager.requests.get') as mock_get: r = manager._select_fulltextindex(log, "mysite", verbose=True) self.assertEqual("ELASTICSEARCH", r[0]) self.assertEqual("http://localhost:9200/mysite/", r[1]) mock_get.side_effect = requests.exceptions.HTTPError r = manager._select_fulltextindex(log, "mysite", verbose=True) self.assertEqual("WHOOSH", r[0])
def test_recordlastdownload(self): @recordlastdownload def testfunc(repo): pass mockrepo = Mock() with patch('ferenda.decorators.LayeredConfig.write') as mockconf: testfunc(mockrepo) # check that config.lastdownload has been set to a datetime self.assertIsInstance(mockrepo.config.lastdownload, datetime.datetime) # and that LayeredConfig.write has been called self.assertTrue(mockconf.called)
def _myget(self, url, **kwargs): res = Mock() res.headers = collections.defaultdict(lambda:None) res.headers['Content-type'] = "text/html" res.status_code = 200 res.encoding = 'utf-8' if url == "http://example.org/": res.content = b"<p><a href='doc/a_.html'>ID: a</a></p>" elif url == "http://example.org/doc/a_.html": res.content = b"<p>This is doc A</p>" else: raise ValueError("Unknown url %s" % url) res.text = res.content.decode() return res
def test_ifneeded_relate(self): @ifneeded("relate") def testfunc(repo, basefile, needed): repo.called = True repo.needed = needed try: datadir = tempfile.mkdtemp() mockbasefile = "1234" mockrepo = Mock() mockrepo.store = DocumentStore(datadir=datadir) mockrepo.called = False mockrepo.config.force = False # create some docentry file in a good place de = DocumentEntry(mockrepo.store.documententry_path("1234")) now = datetime.datetime.now() de.indexed_ts = now + datetime.timedelta(seconds=3600) de.indexed_ft = now + datetime.timedelta(seconds=-3600) de.indexed_dep = now + datetime.timedelta(seconds=-3600) de.save() # test 1: Outfile is newer - the ifneeded decorator should # make sure the actual testfunc code is never reached # NOTE: the "relate" branch of DocumentStore.needed # doesn't use outfile_is_newer, so we can't patch that, we # have to create actual files parsedpath = mockrepo.store.parsed_path("1234") util.writefile(parsedpath, "dummy") os.utime(parsedpath, (now.timestamp(), now.timestamp() - 7200)) testfunc(mockrepo, mockbasefile) self.assertFalse(mockrepo.called) mockrepo.called = False # test 2: Outfile is older than the information in the documententry file os.utime(parsedpath, (now.timestamp(), now.timestamp())) testfunc(mockrepo, mockbasefile) self.assertTrue(mockrepo.called) self.assertTrue(mockrepo.needed) self.assertFalse(mockrepo.needed.triples) self.assertFalse(mockrepo.needed.dependencies) self.assertTrue(mockrepo.needed.fulltext) mockrepo.called = False # test 3: Outfile is newer, but the global force option was set os.utime(parsedpath, (now.timestamp(), now.timestamp() - 7200)) mockrepo.config.force = True testfunc(mockrepo, mockbasefile) self.assertTrue(mockrepo.called) mockrepo.config.force = None mockrepo.called = False finally: if os.path.exists(datadir): shutil.rmtree(datadir)
def my_get(url, **kwargs): urlspec = spec[url] if isinstance(urlspec, str): urlspec = {'file': urlspec} if 'charset' not in urlspec: urlspec['charset'] = 'utf-8' url_location = os.path.join(os.path.dirname(specfile), urlspec['file']) res = Mock() # load the .content property with open(url_location, "rb") as fp: res.content = fp.read() # but only load .text if a charset is present (note # default value of 'utf-8' above -- set 'charset': null in # the json file for binary files if urlspec['charset']: with codecs.open(url_location, "r", encoding=urlspec['charset']) as fp: res.text = fp.read() # FIXME: Using a defaultdict ensures that we'll never trip # over the non-existance of certain headers. WE should # specify only the most basic headers to make sure calling # code doesn't rely on eg. the etag header always being # there, because it won't res.headers = collections.defaultdict(lambda: None) res.headers['X-These-Headers-Are'] = 'Faked' res.status_code = 200 return res
def test_construct(self): uri = "http://example.org/doc" with open("testconstructtemplate.rq", "wb") as fp: fp.write("""PREFIX dcterms: <http://purl.org/dc/terms/> CONSTRUCT { ?s ?p ?o . } WHERE { ?s ?p ?o . <%(uri)s> ?p ?o . } """.encode()) g = Graph() g.bind("dcterms", str(DCTERMS)) g.add((URIRef(uri), DCTERMS.title, Literal("Document title"))) config = { 'connect.return_value': Mock(**{'construct.return_value': g}) } printmock = MagicMock() with patch('ferenda.devel.TripleStore', **config): with patch('builtins.print', printmock): d = Devel() d.config = LayeredConfig( Defaults({ 'storetype': 'a', 'storelocation': 'b', 'storerepository': 'c' })) d.construct("testconstructtemplate.rq", uri) want = """ # Constructing the following from b, repository c, type a # PREFIX dcterms: <http://purl.org/dc/terms/> # # CONSTRUCT { ?s ?p ?o . } # WHERE { ?s ?p ?o . # <http://example.org/doc> ?p ?o . } # @prefix dcterms: <http://purl.org/dc/terms/> . @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . @prefix xml: <http://www.w3.org/XML/1998/namespace> . @prefix xsd: <http://www.w3.org/2001/XMLSchema#> . <http://example.org/doc> dcterms:title "Document title" . # 1 triples constructed in 0.001s """.strip() got = "\n".join([x[1][0] for x in printmock.mock_calls]) self.maxDiff = None self.assertEqual(self.mask_time(want), self.mask_time(got)) os.unlink("testconstructtemplate.rq")
def test_news_facet_entries(self): # setup makes sure that a bunch of DocumentEntry objects # exists, and that repo.faceted_data returns a list of dict # that corresponds with this self.repo.faceted_data = Mock(return_value=self.faceted_data) faceted_entries = self.repo.news_facet_entries() self.assertEqual(faceted_entries[0]['title'], "Doc #24") self.assertEqual(faceted_entries[-1]['title'], "Doc #0") self.assertEqual(faceted_entries[-1]['uri'], "http://localhost:8000/res/base/0") self.assertEqual(faceted_entries[-1]['dcterms_title'], "Doc #0") self.assertEqual(faceted_entries[-1]['rdf_type'], "http://xmlns.com/foaf/0.1/Document") self.assertEqual(faceted_entries[-1]['updated'], datetime(2013, 1, 1, 12, 40))
def test_ifneeded_generate(self): @ifneeded("generate") def testfunc(repo, basefile): repo.called = True mockbasefile = "1234" mockrepo = Mock() mockrepo.store.needed = DocumentStore(datadir='fake').needed mockrepo.called = False mockrepo.config.force = False # test 1: Outfile is newer - the ifneeded decorator should # make sure the actual testfunc code is never reached with patch('ferenda.util.outfile_is_newer', return_value=True): testfunc(mockrepo, mockbasefile) self.assertFalse(mockrepo.called) mockrepo.called = False # test 2: Outfile is older than source file with patch('ferenda.util.outfile_is_newer', return_value=False): testfunc(mockrepo, mockbasefile) self.assertTrue(mockrepo.called) mockrepo.called = False # FIXME: we could add more tests, eg create a dependency file # and make sure an arbitrary file named in that depfile is # newer then outfile. but the tests in testDocStore.Needed # should cover that pretty well. # test 3: Outfile is newer, but the global force option was set mockrepo.config.force = True with patch('ferenda.util.outfile_is_newer', return_value=True): testfunc(mockrepo, mockbasefile) self.assertTrue(mockrepo.called) mockrepo.config.force = None mockrepo.called = False
def test_sesame_select(self, mock_get): store = TripleStore.connect("SESAME", "", "") rf = util.readfile want = rf("test/files/triplestore/select-results.xml").encode() got = store.select("the-query") self.assertEqual(want, got) self.assertEqual(mock_get.call_count, 1) want = rf("test/files/triplestore/select-results.json") got = store.select("the-query", format="json").decode() self.assertEqual(json.loads(want), json.loads(got)) self.assertEqual(mock_get.call_count, 2) want = json.loads(rf("test/files/triplestore/select-results-python.json"), object_hook=util.make_json_date_object_hook("issued")) got = store.select("the-query", format="python") self.assertEqual(want, got) self.assertEqual(mock_get.call_count, 3) with self.assertRaises(errors.TriplestoreError): mockresponse = Mock() mockresponse.text = "This is the actual error text" mock_get.side_effect = requests.exceptions.HTTPError("Server error", response=mockresponse) got = store.select("the-query", format="python")
def test_search_single(self): self.builder.query_string = "q=subsection" res = ([{'dcterms_title': 'Result #1', 'uri': 'http://example.org', 'text': 'Text that contains the subsection term'}], {'pagenum': 1, 'pagecount': 1, 'firstresult': 1, 'lastresult': 1, 'totalresults': 1}) config = {'connect.return_value': Mock(**{'query.return_value': res})} with patch('ferenda.wsgiapp.FulltextIndex', **config): status, headers, content = self.call_wsgi() t = etree.fromstring(content) resulthead = t.find(".//article/h1").text self.assertEqual("1 match for 'subsection'", resulthead)
def test_select_triplestore(self): log = Mock() # first manipulate requests.get to give the impression that # fuseki or sesame either is or isn't available with patch('ferenda.manager.requests.get') as mock_get: r = manager._select_triplestore("sitename", log, verbose=True) self.assertEqual("FUSEKI", r[0]) mock_get.side_effect = requests.exceptions.HTTPError r = manager._select_triplestore("sitename", log, verbose=True) self.assertNotEqual("FUSEKI", r[0]) def get_sesame(url): if not 'openrdf-sesame' in url: raise requests.exceptions.HTTPError resp = Mock() resp.text = "ok" return resp mock_get.side_effect = get_sesame r = manager._select_triplestore("sitename", log, verbose=True) self.assertEqual("SESAME", r[0]) mock_get.side_effect = requests.exceptions.HTTPError r = manager._select_triplestore("sitename", log, verbose=True) self.assertNotEqual("SESAME", r[0]) # all request.get calls still raises HTTP error with patch('ferenda.manager.TripleStore.connect') as mock_connect: r = manager._select_triplestore("sitename", log, verbose=True) self.assertEqual("SQLITE", r[0]) def connectfail(storetype, location, repository): if storetype == "SQLITE": raise ImportError("BOOM") mock_connect.side_effect = connectfail r = manager._select_triplestore("sitename", log, verbose=True) self.assertNotEqual("SQLITE", r[0]) r = manager._select_triplestore("sitename", log, verbose=True) self.assertEqual("SLEEPYCAT", r[0]) mock_connect.side_effect = ImportError r = manager._select_triplestore("sitename", log, verbose=True) self.assertEqual(None, r[0])
def test_parseifneeded(self): @parseifneeded def testfunc(repo,doc): repo.called = True mockdoc = Mock() mockrepo = Mock() mockrepo.called = False mockrepo.config.force = False # test 1: Outfile is newer - the parseifneeded decorator # should make sure the actual testfunc code is never reached with patch('ferenda.util.outfile_is_newer', return_value=True): testfunc(mockrepo,mockdoc) self.assertFalse(mockrepo.called) mockrepo.called = False # test 2: Outfile is older with patch('ferenda.util.outfile_is_newer', return_value=False): testfunc(mockrepo,mockdoc) self.assertTrue(mockrepo.called) mockrepo.called = False # test 3: Outfile is newer, but the global force option was set mockrepo.config.force = True with patch('ferenda.util.outfile_is_newer', return_value=True): testfunc(mockrepo,mockdoc) self.assertTrue(mockrepo.called) mockrepo.config.force = None mockrepo.called = False # test 4: Outfile is newer, but the module parseforce option was set mockrepo.config.parseforce = True with patch('ferenda.util.outfile_is_newer', return_value=True): testfunc(mockrepo,mockdoc) self.assertTrue(mockrepo.called) mockrepo.called = False
# -*- coding: utf-8 -*- from __future__ import unicode_literals from ferenda.compat import Mock from ferenda.elements.html import elements_from_soup from bs4 import BeautifulSoup doc = Mock() doc.body = elements_from_soup(BeautifulSoup("""<html> <body> URLs often appear like http://example.org/foo, in running text </body> </html>""").body) # begin from ferenda import CitationParser from ferenda import URIFormatter import ferenda.citationpatterns import ferenda.uriformats # CitationParser is initialized with a list of pyparsing # ParserElements (or any other object that has a scanString method # that returns a generator of (tokens,start,end) tuples, where start # and end are integer string indicies and tokens are dict-like # objects) citparser = CitationParser(ferenda.citationpatterns.url) # URIFormatter is initialized with a list of tuples, where each # tuple is a string (identifying a named ParseResult) and a function # (that takes as a single argument a dict-like object and returns a # URI string (possibly relative) citparser.set_formatter(URIFormatter(("URLRef", ferenda.uriformats.url)))
# -*- coding: utf-8 -*- from __future__ import unicode_literals from ferenda.compat import Mock from ferenda.elements.html import elements_from_soup from bs4 import BeautifulSoup doc = Mock() filedir = os.path.dirname(__file__) doc.body = elements_from_soup(BeautifulSoup(open(filedir+"/../doc/examples/citationparsing-before.xhtml").read()).body) # begin from pyparsing import Word, nums from ferenda import CitationParser from ferenda import URIFormatter import ferenda.citationpatterns import ferenda.uriformats # Create two ParserElements for IETF document references and internal # references rfc_citation = "RFC" + Word(nums).setResultsName("RFCRef") bcp_citation = "BCP" + Word(nums).setResultsName("BCPRef") std_citation = "STD" + Word(nums).setResultsName("STDRef") ietf_doc_citation = (rfc_citation | bcp_citation | std_citation).setResultsName("IETFRef") endnote_citation = ("[" + Word(nums).setResultsName("EndnoteID") + "]").setResultsName("EndnoteRef") # Create a URI formatter for IETF documents (URI formatter for endnotes # is so simple that we just use a lambda function below def rfc_uri_formatter(parts):
def get_sesame(url): if not 'openrdf-sesame' in url: raise requests.exceptions.HTTPError resp = Mock() resp.text = "ok" return resp
def test_search_multiple(self): self.env['QUERY_STRING'] = "q=part" res = ( [ { 'dcterms_title': 'Introduction', 'dcterms_identifier': '123/a¶1', 'uri': 'http://example.org/base/123/a#S1', 'text': html.P([ 'This is ', html.Strong(['part'], **{'class': 'match'}), ' of document-', html.Strong(['part'], **{'class': 'match'}), ' section 1</p>' ]) }, { #'title':'Definitions and Abbreviations', 'uri': 'http://example.org/base/123/a#S2', 'text': html.P([ 'second main document ', html.Strong(['part'], **{'class': 'match'}) ]) }, { 'dcterms_title': 'Example', 'uri': 'http://example.org/base/123/a', 'text': html.P([ 'This is ', html.Strong(['part'], **{'class': 'match'}), ' of the main document' ]) } ], { 'pagenum': 1, 'pagecount': 1, 'firstresult': 1, 'lastresult': 3, 'totalresults': 3 }) config = {'connect.return_value': Mock(**{'query.return_value': res})} with patch('ferenda.wsgiapp.FulltextIndex', **config): status, headers, content = self.call_wsgi(self.env) self.assertResponse("200 OK", {'Content-Type': 'text/html; charset=utf-8'}, None, status, headers, None) t = etree.parse(BytesIO(content)) css = t.findall("head/link[@rel='stylesheet']") self.assertEqual(len(css), 3) # bootstrap, bootstrap-theme, ferenda and sfs (?!) self.assertEqual( 'https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css', css[0].get('href')) js = t.findall("body/script") self.assertEqual(len(js), 4) # jquery, bootstrap, ferenda, typeahead resulthead = t.find(".//article/h1").text self.assertEqual(resulthead, "3 matches for 'part'") docs = t.findall(".//section[@class='hit']") self.assertEqual(len(docs), 3) self.assertEqual(docs[0][0].tag, 'h2') expect = res[0] self.assertIn(expect[0]['dcterms_title'], docs[0][0][0].text) self.assertEqual(expect[0]['uri'], docs[0][0][0].get('href')) self.assertEqualXML(expect[0]['text'].as_xhtml(), docs[0][1], namespace_aware=False) self.assertIn(expect[1]['dcterms_title'], docs[1][0][0].text) self.assertEqual(expect[1]['uri'], docs[1][0][0].get('href')) self.assertEqualXML(expect[1]['text'].as_xhtml(), docs[1][1], namespace_aware=False) self.assertIn(expect[2]['dcterms_title'], docs[2][0][0].text) self.assertEqual(expect[2]['uri'], docs[2][0][0].get('href')) self.assertEqualXML(expect[2]['text'].as_xhtml(), docs[2][1], namespace_aware=False)
def test_paged(self): # FIXME: This should check overflow, ie when results are so # many that the pager cannot show all pages and uses » # characters. def mkres(page=1, pagesize=10, total=25): hits = [] for i in range((page - 1) * pagesize, min(page * pagesize, total)): hits.append({ 'title': '', 'uri': 'http://example.org/base/123/c#S%d' % ((i * 2) - 1), 'text': html.P(['This is a needle document']) }) return (hits, { 'pagenum': page, 'pagecount': int(total / pagesize) + 1, 'firstresult': (page - 1) * pagesize + 1, 'lastresult': (page - 1) * pagesize + len(hits), 'totalresults': total }) self.env['QUERY_STRING'] = "q=needle" res = mkres() config = {'connect.return_value': Mock(**{'query.return_value': res})} with patch('ferenda.wsgiapp.FulltextIndex', **config): status, headers, content = self.call_wsgi(self.env) self.assertResponse("200 OK", {'Content-Type': 'text/html; charset=utf-8'}, None, status, headers, None) t = etree.fromstring(content) docs = t.findall(".//section[@class='hit']") self.assertEqual(10, len(docs)) # default page size (too small?) # assert that pager looks smth like this: # <div class="pager"> # <p class="label">Results 1-10 of 25</p> # <ul class="pagination"> # <li class="active"><a href="/mysearch/?q=needle&p=1">1</a> # <li><a href="/mysearch/?q=needle&p=2">2</a> # <li><a href="/mysearch/?q=needle&p=3">2</a> # </ul> # </div> pager = t.find(".//div[@class='pager']") pagination = pager.find("ul[@class='pagination']") self.assertEqual('p', pager[0].tag) self.assertEqual('Results 1-10 of 25', pager[0].text) self.assertEqual(3, len(pagination)) self.assertEqual('li', pagination[0].tag) self.assertEqual('a', pagination[1][0].tag) self.assertEqual('/mysearch/?q=needle&p=2', pagination[1][0].get('href')) self.env['QUERY_STRING'] = "q=needle&p=2" res = mkres(page=2) config = {'connect.return_value': Mock(**{'query.return_value': res})} with patch('ferenda.wsgiapp.FulltextIndex', **config): status, headers, content = self.call_wsgi(self.env) t = etree.fromstring(content) docs = t.findall(".//section[@class='hit']") self.assertEqual(10, len(docs)) pager = t.find(".//div[@class='pager']") pagination = pager.find("ul[@class='pagination']") self.assertEqual('Results 11-20 of 25', pager[0].text) self.assertEqual(3, len(pagination)) self.assertEqual('/mysearch/?q=needle&p=1', pagination[0][0].get('href')) self.env['QUERY_STRING'] = "q=needle&p=3" res = mkres(page=3) config = {'connect.return_value': Mock(**{'query.return_value': res})} with patch('ferenda.wsgiapp.FulltextIndex', **config): status, headers, content = self.call_wsgi(self.env) t = etree.fromstring(content) docs = t.findall(".//section[@class='hit']") self.assertEqual(5, len(docs)) # only 5 remaining docs pager = t.find(".//div[@class='pager']") pagination = pager.find("ul[@class='pagination']") self.assertEqual(3, len(pagination)) self.assertEqual('Results 21-25 of 25', pager[0].text)
# -*- coding: utf-8 -*- from __future__ import unicode_literals from ferenda.compat import Mock from ferenda.elements.html import elements_from_soup from bs4 import BeautifulSoup doc = Mock() filedir = os.path.dirname(__file__) with open(filedir+"/../doc/examples/citationparsing-before.xhtml") as fp: doc.body = elements_from_soup(BeautifulSoup(fp.read(), "lxml").body) # begin from pyparsing import Word, nums from ferenda import CitationParser from ferenda import URIFormatter import ferenda.citationpatterns import ferenda.uriformats # Create two ParserElements for IETF document references and internal # references rfc_citation = "RFC" + Word(nums).setResultsName("RFCRef") bcp_citation = "BCP" + Word(nums).setResultsName("BCPRef") std_citation = "STD" + Word(nums).setResultsName("STDRef") ietf_doc_citation = (rfc_citation | bcp_citation | std_citation).setResultsName("IETFRef") endnote_citation = ("[" + Word(nums).setResultsName("EndnoteID") + "]").setResultsName("EndnoteRef") # Create a URI formatter for IETF documents (URI formatter for endnotes # is so simple that we just use a lambda function below