Пример #1
0
    def test_iter(self):
        """
        the iteration method should handle pagination automatically
        based on rows and max_rows; use the mock solr server to query
        data row by row and ensure that the data return as expected
        """

        sq = SearchQuery(q="unittest", rows=1, max_pages=20)
        with MockSolrResponse(sq.HTTP_ENDPOINT):
            self.assertEqual(sq._query['start'], 0)
            self.assertEqual(next(sq).bibcode, '1971Sci...174..142S')
            self.assertEqual(len(sq.articles), 1)
            self.assertEqual(sq._query['start'], 1)
            self.assertEqual(next(sq).bibcode, '2012GCN..13229...1S')
            self.assertEqual(len(list(sq)), 19)  # 2 already returned
            with self.assertRaisesRegexp(
                    StopIteration,
                    "Maximum number of pages queried"):
                next(sq)
            sq.max_pages = 500
            self.assertEqual(len(list(sq)), 28-19-2)
            with self.assertRaisesRegexp(
                    StopIteration,
                    "All records found"):
                next(sq)
Пример #2
0
    def test_init(self):
        """
        init should result in a properly formatted query attribute
        """

        with self.assertRaisesRegexp(AssertionError, "q must not be empty"):
            SearchQuery()

        sq = SearchQuery(q="star")
        self.assertIn("star", sq.query['q'])

        sq = SearchQuery(title="t", author="ln, fn")

        # In Python 3, assertItemsEqual is named assertCountEqual
        assertItemsEqual = self.assertItemsEqual if sys.version_info[0] == 2 \
            else self.assertCountEqual
        
        assertItemsEqual(
            sq.query['q'].split(),
            'title:"t" author:"ln, fn"'.split(),
        )

        sq = SearchQuery(q="star", aff="institute")
        assertItemsEqual(
            sq.query['q'].split(),
            'aff:"institute" star'.split(),
        )
Пример #3
0
    def test_no_highlights(self):
        """
        Test when there are no highlights
        """
        sq = SearchQuery(q='star', fl=['bibcode'], hl=['abstract'])
        with MockSolrResponse(SEARCH_URL):
            p = list(sq)[1]

        highlights = sq.highlights(p)
        self.assertEqual(highlights, {})
Пример #4
0
    def test_no_highlights(self):
        """
        Test when there are no highlights
        """
        sq = SearchQuery(q='star', fl=['bibcode'], hl=['abstract'])
        with MockSolrResponse(SEARCH_URL):
            p = list(sq)[1]

        highlights = sq.highlights(p)
        self.assertEqual(highlights, {})
Пример #5
0
    def test_get_highlight(self):
        """
        Test can retrieve a highlight for a given bibcode for a given query
        """
        sq = SearchQuery(q='star')
        self.assertNotIn('hl', sq.query)
        self.assertNotIn('hl.fl', sq.query)

        sq = SearchQuery(q='star', fl=['bibcode'], hl=['abstract'])
        self.assertEqual(sq.query['hl'], 'true')
        self.assertEqual(sq.query['hl.fl'], ['abstract'])
        with MockSolrResponse(SEARCH_URL):
            p = list(sq)[0]

        highlights = sq.highlights(p)
        self.assertEqual(highlights, {'abstract': 'astronomy abstract'})
Пример #6
0
    def test_print_methods(self):
        """
        the class should return a user-friendly formatted identified when the
        __str__ or __unicode__ methods are called
        """
        self.assertEqual(
            '<Sudilovsky, V. et al. 2013, 2013A&A...552A.143S>',
            self.article.__str__()
        )
        self.assertEqual(self.article.__unicode__(), self.article.__str__())
        self.article.first_author = None
        self.article.author = None
        self.article.bibcode = None
        self.article.year = None

        self.assertEqual(
            self.article.__str__(),
            "<Unknown author Unknown year, Unknown bibcode>"
        )

        # accessing print methods should use lazy loaded attributes: issue#98
        sq = SearchQuery(q="unittest", rows=1, fl=["bibcode"])
        with MockSolrResponse(sq.HTTP_ENDPOINT):
            article = list(sq)[0]
            self.assertEqual(article.__unicode__(), '<Sudilovsky, Oscar et al. 1971, 1971Sci...174..142S>')
Пример #7
0
    def test_get_highlight(self):
        """
        Test can retrieve a highlight for a given bibcode for a given query
        """
        sq = SearchQuery(q='star')
        self.assertNotIn('hl', sq.query)
        self.assertNotIn('hl.fl', sq.query)

        sq = SearchQuery(q='star', fl=['bibcode'], hl=['abstract'])
        self.assertEqual(sq.query['hl'], 'true')
        self.assertEqual(sq.query['hl.fl'], ['abstract'])
        with MockSolrResponse(SEARCH_URL):
            p = list(sq)[0]

        highlights = sq.highlights(p)
        self.assertEqual(highlights, {'abstract': 'astronomy abstract'})
Пример #8
0
 def test_incorrect_highlight_fl(self):
     """
     Test when user passes incorrect fields it raises an exception
     """
     with self.assertRaises(Exception):
         SearchQuery(q='star',
                     fl=['bibcode'],
                     hl=['foo', 'bar', 'abstract', 'abstract'])
Пример #9
0
    def test_iter(self):
        """
        the iteration method should handle pagination automatically
        based on rows and max_rows; use the mock solr server to query
        data row by row and ensure that the data return as expected
        """

        sq = SearchQuery(q="unittest", rows=1, max_pages=20, start=0)
        with MockSolrResponse(sq.HTTP_ENDPOINT):
            self.assertEqual(sq._query['start'], 0)
            self.assertEqual(next(sq).bibcode, '1971Sci...174..142S')
            self.assertEqual(len(sq.articles), 1)
            self.assertEqual(sq._query['start'], 1)
            self.assertEqual(next(sq).bibcode, '2012GCN..13229...1S')
            self.assertEqual(len(list(sq)), 18)  # 2 already returned
            with six.assertRaisesRegex(self, StopIteration,
                                       "Maximum number of pages queried"):
                next(sq)
            sq.max_pages = 500
            self.assertEqual(len(list(sq)), 28 - 18 - 2)
            with six.assertRaisesRegex(self, StopIteration,
                                       "All records found"):
                next(sq)

        # not setting max_pages should return the exact number of rows requests
        sq = SearchQuery(q="unittest", rows=3)
        with MockSolrResponse(sq.HTTP_ENDPOINT):
            self.assertEqual(len(list(sq)), 3)

        # Default should use cursorMark
        sq = SearchQuery(q="unittest", sort="date")
        with MockSolrResponse(sq.HTTP_ENDPOINT):
            self.assertEqual(next(sq).bibcode, '1971Sci...174..142S')
            self.assertEqual(sq.query['cursorMark'],
                             sq.response.json['nextCursorMark'])
Пример #10
0
    def test_iter(self):
        """
        the iteration method should handle pagination automatically
        based on rows and max_rows; use the mock solr server to query
        data row by row and ensure that the data return as expected
        """

        sq = SearchQuery(q="unittest", rows=1, max_pages=20, start=0)
        with MockSolrResponse(sq.HTTP_ENDPOINT):
            self.assertEqual(sq._query['start'], 0)
            self.assertEqual(next(sq).bibcode, '1971Sci...174..142S')
            self.assertEqual(len(sq.articles), 1)
            self.assertEqual(sq._query['start'], 1)
            self.assertEqual(next(sq).bibcode, '2012GCN..13229...1S')
            self.assertEqual(len(list(sq)), 18)  # 2 already returned
            with self.assertRaisesRegexp(
                    StopIteration,
                    "Maximum number of pages queried"):
                next(sq)
            sq.max_pages = 500
            self.assertEqual(len(list(sq)), 28-18-2)
            with self.assertRaisesRegexp(
                    StopIteration,
                    "All records found"):
                next(sq)

        # not setting max_pages should return the exact number of rows requests
        sq = SearchQuery(q="unittest", rows=3)
        with MockSolrResponse(sq.HTTP_ENDPOINT):
            self.assertEqual(len(list(sq)), 3)

        # Default should use cursorMark
        sq = SearchQuery(q="unittest", sort="date")
        with MockSolrResponse(sq.HTTP_ENDPOINT):
            self.assertEqual(next(sq).bibcode, '1971Sci...174..142S')
            self.assertEqual(
                sq.query['cursorMark'], sq.response.json['nextCursorMark']
            )
Пример #11
0
 def test_get_field_bibtex(self):
     """
     should emit a warning when calling _get_field with bibtex but otherwise work
     as expected both in the case of fl=bibtex and fl=None
     """
     for fl in [None, "bibtex"]:
         sq = SearchQuery(q="*", fl=fl)
         with warnings.catch_warnings(record=True) as w:
             with MockSolrResponse(SEARCH_URL):
                 with MockExportResponse("{base}bibtex".format(base=EXPORT_URL)):
                     article = next(sq)
                     article.bibtex
             if six.PY3:
                 msg = w[1].message.args[0]
                 self.assertEqual(msg, "bibtex should be queried with ads.ExportQuery(); "
                                       "You will hit API ratelimits very quickly otherwise.")
             self.assertTrue(article.bibtex.startswith("@ARTICLE{2013A&A...552A.143S"))
Пример #12
0
 def test_rows_rewrite(self):
     """
     if the responseHeader "rows" is not the same as the query's "rows",
     the query's "rows" should be re-written and a warning should be emitted
     """
     sq = SearchQuery(q="unittest", rows=10e6)
     with MockSolrResponse(sq.HTTP_ENDPOINT):
         self.assertEqual(sq.query['rows'], 10e6)
         with warnings.catch_warnings(record=True) as w:
             next(sq)
             if six.PY3:
                 msg = w[-1].message.args[0]
             elif six.PY2:
                 msg = w[-1].message.message
             self.assertEqual(
                 msg,
                 "Response rows did not match input rows. Setting this query's rows to 300"
             )
         self.assertEqual(sq.query['rows'], 300)
Пример #13
0
    def test_init(self):
        """
        init should result in a properly formatted query attribute
        """

        with six.assertRaisesRegex(self, AssertionError,
                                   "q must not be empty"):
            SearchQuery()

        sq = SearchQuery(q="star")
        self.assertIn("star", sq.query['q'])

        sq = SearchQuery(title="t", author="ln, fn")
        # In Python 3, assertItemsEqual is named assertCountEqual
        assertItemsEqual = self.assertItemsEqual if sys.version_info[0] == 2 \
            else self.assertCountEqual
        assertItemsEqual(
            sq.query['q'].split(),
            'title:"t" author:"ln, fn"'.split(),
        )

        sq = SearchQuery(q="star", aff="institute")
        assertItemsEqual(
            sq.query['q'].split(),
            'aff:"institute" star'.split(),
        )

        sq = SearchQuery(q="star", token="test-token")
        self.assertEqual(sq.token, "test-token")
        self.assertEqual(sq._token, "test-token")

        sq = SearchQuery(q="star", sort="date")
        self.assertEqual(sq.query['cursorMark'], '*')
        self.assertNotIn('start', sq.query)
        self.assertEqual(sq.query['sort'], 'date desc,id desc')

        sq = SearchQuery(q="star", start=0)
        self.assertEqual(sq.query['start'], 0)
        self.assertNotIn('cursorMark', sq.query)
        self.assertEqual(sq.query['sort'], "score desc")

        sq = SearchQuery(q="star", start=0, sort="date")
        self.assertEqual(sq.query['start'], 0)
        self.assertEqual(sq.query['sort'], "date desc")

        sq = SearchQuery({"q": "star"})
        self.assertEqual(sq.query['cursorMark'], "*")
        self.assertEqual(sq.query['sort'], "score desc,id desc")
        self.assertNotIn('start', sq.query)

        # test unicode
        sq = SearchQuery(author=u'é')
        self.assertIn(u'é', sq.query['q'])

        with six.assertRaisesRegex(self, AssertionError,
                                   ".+mutually exclusive.+"):
            SearchQuery(q="start", start=0, cursorMark="*")

        # test that bibtex/metrics is excluded from sq.query['fl']
        sq = SearchQuery(q="star", fl=["f1", "bibtex", "f2", "metrics", "f3"])
        self.assertEqual(sq.query['fl'], ["id", "f1", "f2", "f3"])