def test_iter(self): """ the iteration method should handle pagination automatically based on rows and max_rows; use the mock solr server to query data row by row and ensure that the data return as expected """ sq = SearchQuery(q="unittest", rows=1, max_pages=20) with MockSolrResponse(sq.HTTP_ENDPOINT): self.assertEqual(sq._query['start'], 0) self.assertEqual(next(sq).bibcode, '1971Sci...174..142S') self.assertEqual(len(sq.articles), 1) self.assertEqual(sq._query['start'], 1) self.assertEqual(next(sq).bibcode, '2012GCN..13229...1S') self.assertEqual(len(list(sq)), 19) # 2 already returned with self.assertRaisesRegexp( StopIteration, "Maximum number of pages queried"): next(sq) sq.max_pages = 500 self.assertEqual(len(list(sq)), 28-19-2) with self.assertRaisesRegexp( StopIteration, "All records found"): next(sq)
def test_init(self): """ init should result in a properly formatted query attribute """ with self.assertRaisesRegexp(AssertionError, "q must not be empty"): SearchQuery() sq = SearchQuery(q="star") self.assertIn("star", sq.query['q']) sq = SearchQuery(title="t", author="ln, fn") # In Python 3, assertItemsEqual is named assertCountEqual assertItemsEqual = self.assertItemsEqual if sys.version_info[0] == 2 \ else self.assertCountEqual assertItemsEqual( sq.query['q'].split(), 'title:"t" author:"ln, fn"'.split(), ) sq = SearchQuery(q="star", aff="institute") assertItemsEqual( sq.query['q'].split(), 'aff:"institute" star'.split(), )
def test_no_highlights(self): """ Test when there are no highlights """ sq = SearchQuery(q='star', fl=['bibcode'], hl=['abstract']) with MockSolrResponse(SEARCH_URL): p = list(sq)[1] highlights = sq.highlights(p) self.assertEqual(highlights, {})
def test_get_highlight(self): """ Test can retrieve a highlight for a given bibcode for a given query """ sq = SearchQuery(q='star') self.assertNotIn('hl', sq.query) self.assertNotIn('hl.fl', sq.query) sq = SearchQuery(q='star', fl=['bibcode'], hl=['abstract']) self.assertEqual(sq.query['hl'], 'true') self.assertEqual(sq.query['hl.fl'], ['abstract']) with MockSolrResponse(SEARCH_URL): p = list(sq)[0] highlights = sq.highlights(p) self.assertEqual(highlights, {'abstract': 'astronomy abstract'})
def test_print_methods(self): """ the class should return a user-friendly formatted identified when the __str__ or __unicode__ methods are called """ self.assertEqual( '<Sudilovsky, V. et al. 2013, 2013A&A...552A.143S>', self.article.__str__() ) self.assertEqual(self.article.__unicode__(), self.article.__str__()) self.article.first_author = None self.article.author = None self.article.bibcode = None self.article.year = None self.assertEqual( self.article.__str__(), "<Unknown author Unknown year, Unknown bibcode>" ) # accessing print methods should use lazy loaded attributes: issue#98 sq = SearchQuery(q="unittest", rows=1, fl=["bibcode"]) with MockSolrResponse(sq.HTTP_ENDPOINT): article = list(sq)[0] self.assertEqual(article.__unicode__(), '<Sudilovsky, Oscar et al. 1971, 1971Sci...174..142S>')
def test_incorrect_highlight_fl(self): """ Test when user passes incorrect fields it raises an exception """ with self.assertRaises(Exception): SearchQuery(q='star', fl=['bibcode'], hl=['foo', 'bar', 'abstract', 'abstract'])
def test_iter(self): """ the iteration method should handle pagination automatically based on rows and max_rows; use the mock solr server to query data row by row and ensure that the data return as expected """ sq = SearchQuery(q="unittest", rows=1, max_pages=20, start=0) with MockSolrResponse(sq.HTTP_ENDPOINT): self.assertEqual(sq._query['start'], 0) self.assertEqual(next(sq).bibcode, '1971Sci...174..142S') self.assertEqual(len(sq.articles), 1) self.assertEqual(sq._query['start'], 1) self.assertEqual(next(sq).bibcode, '2012GCN..13229...1S') self.assertEqual(len(list(sq)), 18) # 2 already returned with six.assertRaisesRegex(self, StopIteration, "Maximum number of pages queried"): next(sq) sq.max_pages = 500 self.assertEqual(len(list(sq)), 28 - 18 - 2) with six.assertRaisesRegex(self, StopIteration, "All records found"): next(sq) # not setting max_pages should return the exact number of rows requests sq = SearchQuery(q="unittest", rows=3) with MockSolrResponse(sq.HTTP_ENDPOINT): self.assertEqual(len(list(sq)), 3) # Default should use cursorMark sq = SearchQuery(q="unittest", sort="date") with MockSolrResponse(sq.HTTP_ENDPOINT): self.assertEqual(next(sq).bibcode, '1971Sci...174..142S') self.assertEqual(sq.query['cursorMark'], sq.response.json['nextCursorMark'])
def test_iter(self): """ the iteration method should handle pagination automatically based on rows and max_rows; use the mock solr server to query data row by row and ensure that the data return as expected """ sq = SearchQuery(q="unittest", rows=1, max_pages=20, start=0) with MockSolrResponse(sq.HTTP_ENDPOINT): self.assertEqual(sq._query['start'], 0) self.assertEqual(next(sq).bibcode, '1971Sci...174..142S') self.assertEqual(len(sq.articles), 1) self.assertEqual(sq._query['start'], 1) self.assertEqual(next(sq).bibcode, '2012GCN..13229...1S') self.assertEqual(len(list(sq)), 18) # 2 already returned with self.assertRaisesRegexp( StopIteration, "Maximum number of pages queried"): next(sq) sq.max_pages = 500 self.assertEqual(len(list(sq)), 28-18-2) with self.assertRaisesRegexp( StopIteration, "All records found"): next(sq) # not setting max_pages should return the exact number of rows requests sq = SearchQuery(q="unittest", rows=3) with MockSolrResponse(sq.HTTP_ENDPOINT): self.assertEqual(len(list(sq)), 3) # Default should use cursorMark sq = SearchQuery(q="unittest", sort="date") with MockSolrResponse(sq.HTTP_ENDPOINT): self.assertEqual(next(sq).bibcode, '1971Sci...174..142S') self.assertEqual( sq.query['cursorMark'], sq.response.json['nextCursorMark'] )
def test_get_field_bibtex(self): """ should emit a warning when calling _get_field with bibtex but otherwise work as expected both in the case of fl=bibtex and fl=None """ for fl in [None, "bibtex"]: sq = SearchQuery(q="*", fl=fl) with warnings.catch_warnings(record=True) as w: with MockSolrResponse(SEARCH_URL): with MockExportResponse("{base}bibtex".format(base=EXPORT_URL)): article = next(sq) article.bibtex if six.PY3: msg = w[1].message.args[0] self.assertEqual(msg, "bibtex should be queried with ads.ExportQuery(); " "You will hit API ratelimits very quickly otherwise.") self.assertTrue(article.bibtex.startswith("@ARTICLE{2013A&A...552A.143S"))
def test_rows_rewrite(self): """ if the responseHeader "rows" is not the same as the query's "rows", the query's "rows" should be re-written and a warning should be emitted """ sq = SearchQuery(q="unittest", rows=10e6) with MockSolrResponse(sq.HTTP_ENDPOINT): self.assertEqual(sq.query['rows'], 10e6) with warnings.catch_warnings(record=True) as w: next(sq) if six.PY3: msg = w[-1].message.args[0] elif six.PY2: msg = w[-1].message.message self.assertEqual( msg, "Response rows did not match input rows. Setting this query's rows to 300" ) self.assertEqual(sq.query['rows'], 300)
def test_init(self): """ init should result in a properly formatted query attribute """ with six.assertRaisesRegex(self, AssertionError, "q must not be empty"): SearchQuery() sq = SearchQuery(q="star") self.assertIn("star", sq.query['q']) sq = SearchQuery(title="t", author="ln, fn") # In Python 3, assertItemsEqual is named assertCountEqual assertItemsEqual = self.assertItemsEqual if sys.version_info[0] == 2 \ else self.assertCountEqual assertItemsEqual( sq.query['q'].split(), 'title:"t" author:"ln, fn"'.split(), ) sq = SearchQuery(q="star", aff="institute") assertItemsEqual( sq.query['q'].split(), 'aff:"institute" star'.split(), ) sq = SearchQuery(q="star", token="test-token") self.assertEqual(sq.token, "test-token") self.assertEqual(sq._token, "test-token") sq = SearchQuery(q="star", sort="date") self.assertEqual(sq.query['cursorMark'], '*') self.assertNotIn('start', sq.query) self.assertEqual(sq.query['sort'], 'date desc,id desc') sq = SearchQuery(q="star", start=0) self.assertEqual(sq.query['start'], 0) self.assertNotIn('cursorMark', sq.query) self.assertEqual(sq.query['sort'], "score desc") sq = SearchQuery(q="star", start=0, sort="date") self.assertEqual(sq.query['start'], 0) self.assertEqual(sq.query['sort'], "date desc") sq = SearchQuery({"q": "star"}) self.assertEqual(sq.query['cursorMark'], "*") self.assertEqual(sq.query['sort'], "score desc,id desc") self.assertNotIn('start', sq.query) # test unicode sq = SearchQuery(author=u'é') self.assertIn(u'é', sq.query['q']) with six.assertRaisesRegex(self, AssertionError, ".+mutually exclusive.+"): SearchQuery(q="start", start=0, cursorMark="*") # test that bibtex/metrics is excluded from sq.query['fl'] sq = SearchQuery(q="star", fl=["f1", "bibtex", "f2", "metrics", "f3"]) self.assertEqual(sq.query['fl'], ["id", "f1", "f2", "f3"])