def test_query_only(self): """User has entered only a query (value); this should never happen.""" data = MultiDict({ 'query': 'someone monkeyed with the request' }) form = SimpleSearchForm(data) self.assertFalse(form.validate(), "Form should be invalid")
def test_searchtype_only(self): """User has entered only a searchtype (field).""" data = MultiDict({ 'searchtype': 'title' }) form = SimpleSearchForm(data) self.assertFalse(form.validate(), "Form should be invalid")
def test_querystring_has_unbalanced_quotes(self): """Querystring has an odd number of quote characters.""" data = MultiDict({"searchtype": "title", "query": '"rhubarb'}) form = SimpleSearchForm(data) self.assertFalse(form.validate(), "Form should be invalid") data["query"] = '"rhubarb"' form = SimpleSearchForm(data) self.assertTrue(form.validate(), "Form should be valid") data["query"] = '"rhubarb" "pie' form = SimpleSearchForm(data) self.assertFalse(form.validate(), "Form should be invalid") data["query"] = '"rhubarb" "pie"' form = SimpleSearchForm(data) self.assertTrue(form.validate(), "Form should be valid")
def test_querystring_has_wildcard_at_start(self): """Querystring starts with a wildcard.""" data = MultiDict({ 'searchtype': 'title', 'query': '*foo title' }) form = SimpleSearchForm(data) self.assertFalse(form.validate(), "Form should be invalid")
def test_query_and_searchtype(self): """User has entered a searchtype (field) and query (value).""" data = MultiDict({ 'searchtype': 'title', 'query': 'foo title' }) form = SimpleSearchForm(data) self.assertTrue(form.validate(), "Form should be valid")
def test_input_whitespace_is_stripped(self): """If query has padding whitespace, it should be removed.""" data = MultiDict({ 'searchtype': 'title', 'query': ' foo title ' }) form = SimpleSearchForm(data) self.assertTrue(form.validate(), "Form should be valid.") self.assertEqual(form.query.data, 'foo title')
def search(request_params: MultiDict, archives: Optional[List[str]] = None) -> Response: """ Perform a simple search. This supports requests from both the form-based view (provided here) AND from the mini search widget displayed on all arXiv.org pages. At a minimum, expects the parameter ``value`` in the GET request. This may be a match value for a search query, or an arXiv ID. Parameters ---------- request_params : :class:`.MultiDict` archives : list A list of archives within which the search should be performed. Returns ------- dict Search result response data. int HTTP status code. dict Headers to add to the response. Raises ------ :class:`.InternalServerError` Raised when there is a problem communicating with ES, or there was an unexpected problem executing the query. """ if archives is not None and len(archives) == 0: raise NotFound("No such archive") # We may need to intervene on the request parameters, so we'll # reinstantiate as a mutable MultiDict. if isinstance(request_params, ImmutableMultiDict): request_params = MultiDict(request_params.items(multi=True)) logger.debug("simple search form") response_data = {} # type: Dict[str, Any] logger.debug("simple search request") if "query" in request_params: try: # first check if the URL includes an arXiv ID arxiv_id: Optional[str] = identifier.parse_arxiv_id( request_params["query"]) # If so, redirect. logger.debug(f"got arXiv ID: {arxiv_id}") except ValueError: logger.debug("No arXiv ID detected; fall back to form") arxiv_id = None else: arxiv_id = None if arxiv_id: headers = {"Location": url_for("abs_by_id", paper_id=arxiv_id)} return {}, HTTPStatus.MOVED_PERMANENTLY, headers # Here we intervene on the user's query to look for holdouts from the # classic search system's author indexing syntax (surname_f). We # rewrite with a comma, and show a warning to the user about the # change. response_data["has_classic_format"] = False if "searchtype" in request_params and "query" in request_params: if request_params["searchtype"] in ["author", "all"]: _query, _classic = catch_underscore_syntax(request_params["query"]) response_data["has_classic_format"] = _classic request_params["query"] = _query # Fall back to form-based search. form = SimpleSearchForm(request_params) if form.query.data: # Temporary workaround to support classic help search if form.searchtype.data == "help": return ( {}, HTTPStatus.MOVED_PERMANENTLY, { "Location": f"/help/search?q={form.query.data}" }, ) # Support classic "expeirmental" search elif form.searchtype.data == "full_text": return ( {}, HTTPStatus.MOVED_PERMANENTLY, { "Location": "http://search.arxiv.org:8081/" f"?in=&query={form.query.data}" }, ) q: Optional[Query] if form.validate(): logger.debug("form is valid") q = _query_from_form(form) if archives is not None: q = _update_with_archives(q, archives) # Pagination is handled outside of the form. q = paginate(q, request_params) try: # Execute the search. We'll use the results directly in # template rendering, so they get added directly to the # response content.asdict response_data.update(SearchSession.search(q)) # type: ignore except index.IndexConnectionError as ex: # There was a (hopefully transient) connection problem. Either # this will clear up relatively quickly (next request), or # there is a more serious outage. logger.error("IndexConnectionError: %s", ex) raise InternalServerError( "There was a problem connecting to the search index. This is " "quite likely a transient issue, so please try your search " "again. If this problem persists, please report it to " "[email protected].") from ex except index.QueryError as ex: # Base exception routers should pick this up and show bug page. logger.error("QueryError: %s", ex) raise InternalServerError( "There was a problem executing your query. Please try your " "search again. If this problem persists, please report it to " "[email protected].") from ex except index.OutsideAllowedRange as ex: raise BadRequest( "Hello clever friend. You can't get results in that range" " right now.") from ex except Exception as ex: logger.error("Unhandled exception: %s", str(ex)) raise else: logger.debug("form is invalid: %s", str(form.errors)) if "order" in form.errors or "size" in form.errors: # It's likely that the user tried to set these parameters manually, # or that the search originated from somewhere else (and was # configured incorrectly). simple_url = url_for("ui.search") raise BadRequest( f"It looks like there's something odd about your search" f" request. Please try <a href='{simple_url}'>starting" f" over</a>.") q = None response_data["query"] = q response_data["form"] = form return response_data, HTTPStatus.OK, {}