def search(self, query): """Do a Yahoo! BOSS web search for *query*. Returns a list of URLs, no more than fifty, ranked by relevance (as determined by Yahoo). Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. """ base_url = "http://yboss.yahooapis.com/ysearch/web" query = quote_plus(query.join('"', '"')) params = {"q": query, "type": "html,text", "format": "json"} url = "{0}?{1}".format(base_url, urlencode(params)) consumer = oauth.Consumer(key=self.cred["key"], secret=self.cred["secret"]) client = oauth.Client(consumer) headers, body = client.request(url, "GET") if headers["status"] != "200": e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'" raise SearchQueryError(e.format(headers["status"], body)) try: res = loads(body) except ValueError: e = "Yahoo! BOSS Error: JSON could not be decoded" raise SearchQueryError(e) try: results = res["bossresponse"]["web"]["results"] except KeyError: return [] return [result["url"] for result in results]
def search(self, query): """Do a Yandex web search for *query*. Returns a list of URLs ranked by relevance (as determined by Yandex). Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. """ domain = self.cred.get("proxy", "yandex.com") url = "https://{0}/search/xml?".format(domain) query = re_sub(r"[^a-zA-Z0-9 ]", "", query).encode("utf8") params = { "user": self.cred["user"], "key": self.cred["key"], "query": '"' + query + '"', "l10n": "en", "filter": "none", "maxpassages": "1", "groupby": "mode=flat.groups-on-page={0}".format(self.count) } result = self._open(url + urlencode(params)) try: data = lxml.etree.fromstring(result) return [elem.text for elem in data.xpath(".//url")] except lxml.etree.Error as exc: raise SearchQueryError("Yandex XML parse error: " + str(exc))
def search(self, query): """Do a Google web search for *query*. Returns a list of URLs ranked by relevance (as determined by Google). Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. """ domain = self.cred.get("proxy", "www.googleapis.com") url = "https://{0}/customsearch/v1?".format(domain) params = { "cx": self.cred["id"], "key": self.cred["key"], "q": '"' + query.replace('"', "").encode("utf8") + '"', "alt": "json", "num": str(self.count), "safe": "off", "fields": "items(link)" } result = self._open(url + urlencode(params)) try: res = loads(result) except ValueError: err = "Google Error: JSON could not be decoded" raise SearchQueryError(err) try: return [item["link"] for item in res["items"]] except KeyError: return []
def search(self, query): """Do a Bing web search for *query*. Returns a list of URLs ranked by relevance (as determined by Bing). Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. """ service = "SearchWeb" if self.cred["type"] == "searchweb" else "Search" url = "https://api.datamarket.azure.com/Bing/{0}/Web?".format(service) params = { "$format": "json", "$top": str(self.count), "Query": "'\"" + query.replace('"', "").encode("utf8") + "\"'", "Market": "'en-US'", "Adult": "'Off'", "Options": "'DisableLocationDetection'", "WebSearchOptions": "'DisableHostCollapsing+DisableQueryAlterations'" } result = self._open(url + urlencode(params)) try: res = loads(result) except ValueError: err = "Bing Error: JSON could not be decoded" raise SearchQueryError(err) try: results = res["d"]["results"] except KeyError: return [] return [result["Url"] for result in results]
def search(self, query): """Do a Yahoo! BOSS web search for *query*. Returns a list of URLs, no more than five, ranked by relevance (as determined by Yahoo). Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. """ key, secret = self.cred["key"], self.cred["secret"] consumer = oauth.Consumer(key=key, secret=secret) url = "http://yboss.yahooapis.com/ysearch/web" params = { "oauth_version": oauth.OAUTH_VERSION, "oauth_nonce": oauth.generate_nonce(), "oauth_timestamp": oauth.Request.make_timestamp(), "oauth_consumer_key": consumer.key, "q": '"' + query.encode("utf8") + '"', "count": "5", "type": "html,text,pdf", "format": "json", } req = oauth.Request(method="GET", url=url, parameters=params) req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), consumer, None) try: response = self.opener.open(self._build_url(url, req)) result = response.read() except (URLError, error) as exc: raise SearchQueryError("Yahoo! BOSS Error: " + str(exc)) if response.headers.get("Content-Encoding") == "gzip": stream = StringIO(result) gzipper = GzipFile(fileobj=stream) result = gzipper.read() if response.getcode() != 200: e = "Yahoo! BOSS Error: got response code '{0}':\n{1}'" raise SearchQueryError(e.format(response.getcode(), result)) try: res = loads(result) except ValueError: e = "Yahoo! BOSS Error: JSON could not be decoded" raise SearchQueryError(e) try: results = res["bossresponse"]["web"]["results"] except KeyError: return [] return [result["url"] for result in results]
def _open(self, *args): """Open a URL (like urlopen) and try to return its contents.""" try: response = self.opener.open(*args) result = response.read() except (URLError, error) as exc: raise SearchQueryError("{0} Error: {1}".format(self.name, exc)) if response.headers.get("Content-Encoding") == "gzip": stream = StringIO(result) gzipper = GzipFile(fileobj=stream) result = gzipper.read() code = response.getcode() if code != 200: err = "{0} Error: got response code '{1}':\n{2}'" raise SearchQueryError(err.format(self.name, code, result)) return result
def search(self, query): """Do a Yahoo! BOSS web search for *query*. Returns a list of URLs ranked by relevance (as determined by Yahoo). Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors. """ key, secret = self.cred["key"], self.cred["secret"] consumer = oauth.Consumer(key=key, secret=secret) url = "http://yboss.yahooapis.com/ysearch/web" params = { "oauth_version": oauth.OAUTH_VERSION, "oauth_nonce": oauth.generate_nonce(), "oauth_timestamp": oauth.Request.make_timestamp(), "oauth_consumer_key": consumer.key, "q": '"' + query.encode("utf8") + '"', "count": str(self.count), "type": "html,text,pdf", "format": "json", } req = oauth.Request(method="GET", url=url, parameters=params) req.sign_request(oauth.SignatureMethod_HMAC_SHA1(), consumer, None) result = self._open(self._build_url(url, req)) try: res = loads(result) except ValueError: err = "Yahoo! BOSS Error: JSON could not be decoded" raise SearchQueryError(err) try: results = res["bossresponse"]["web"]["results"] except KeyError: return [] return [result["url"] for result in results]