def test_bytes(self): for secret_key in ['secret', b'secret', 1]: if secret_key == 1: with self.assertRaises(TypeError): webutils.new_hmac(secret_key, b'http://example.com') continue res = webutils.new_hmac(secret_key, b'http://example.com') self.assertEqual( res, '23e2baa2404012a5cc8e4a18b4aabf0dde4cb9b56f679ddc0fd6d7c24339d819')
def image_proxify(url): if url.startswith('//'): url = 'https:' + url if not request.preferences.get_value('image_proxy'): return url if url.startswith('data:image/'): # 50 is an arbitrary number to get only the beginning of the image. partial_base64 = url[len('data:image/'):50].split(';') if len(partial_base64) == 2 \ and partial_base64[0] in ['gif', 'png', 'jpeg', 'pjpeg', 'webp', 'tiff', 'bmp']\ and partial_base64[1].startswith('base64,'): return url else: return None if settings.get('result_proxy'): return proxify(url) h = new_hmac(settings['server']['secret_key'], url.encode()) return '{0}?{1}'.format(url_for('image_proxy'), urlencode(dict(url=url.encode(), h=h)))
def image_proxify(url): if url.startswith("//"): url = "https:" + url if not request.preferences.get_value("image_proxy"): return url if url.startswith("data:image/"): # 50 is an arbitrary number to get only the beginning of the image. partial_base64 = url[len("data:image/"):50].split(";") if (len(partial_base64) == 2 and partial_base64[0] in ["gif", "png", "jpeg", "pjpeg", "webp", "tiff", "bmp"] and partial_base64[1].startswith("base64,")): return url else: return None if settings.get("result_proxy"): return proxify(url) h = new_hmac(settings["server"]["secret_key"], url.encode()) return "{0}?{1}".format(url_for("image_proxy"), urlencode(dict(url=url.encode(), h=h)))
def image_proxy(): url = request.args.get("url").encode() if not url: return "", 400 h = new_hmac(settings["server"]["secret_key"], url) if h != request.args.get("h"): return "", 400 headers = dict_subset(request.headers, {"If-Modified-Since", "If-None-Match"}) headers["User-Agent"] = gen_useragent() resp = requests.get( url, stream=True, timeout=settings["outgoing"]["request_timeout"], headers=headers, proxies=get_global_proxies(), ) if resp.status_code == 304: return "", resp.status_code if resp.status_code != 200: logger.debug("image-proxy: wrong response code: {0}".format( resp.status_code)) if resp.status_code >= 400: return "", resp.status_code return "", 400 if not resp.headers.get("content-type", "").startswith("image/"): logger.debug("image-proxy: wrong content-type: {0}".format( resp.headers.get("content-type"))) return "", 400 img = b"" chunk_counter = 0 for chunk in resp.iter_content(1024 * 1024): chunk_counter += 1 if chunk_counter > 5: return "", 502 # Bad gateway - file is too big (>5M) img += chunk headers = dict_subset( resp.headers, { "Content-Length", "Length", "Date", "Last-Modified", "Expires", "Etag" }, ) return Response(img, mimetype=resp.headers["content-type"], headers=headers)
def image_proxy(): url = request.args.get('url') if not url: return '', 400 h = new_hmac(settings['server']['secret_key'], url.encode()) if h != request.args.get('h'): return '', 400 maximum_size = 5 * 1024 * 1024 try: headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) headers['User-Agent'] = gen_useragent() stream = http_stream( method='GET', url=url, headers=headers, timeout=settings['outgoing']['request_timeout'], allow_redirects=True, max_redirects=20) resp = next(stream) content_length = resp.headers.get('Content-Length') if content_length and content_length.isdigit() and int(content_length) > maximum_size: return 'Max size', 400 if resp.status_code == 304: return '', resp.status_code if resp.status_code != 200: logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) if resp.status_code >= 400: return '', resp.status_code return '', 400 if not resp.headers.get('content-type', '').startswith('image/'): logger.debug('image-proxy: wrong content-type: {0}'.format(resp.headers.get('content-type'))) return '', 400 headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) total_length = 0 def forward_chunk(): nonlocal total_length for chunk in stream: total_length += len(chunk) if total_length > maximum_size: break yield chunk return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers) except httpx.HTTPError: return '', 400
def image_proxy(): url = request.args.get('url').encode() if not url: return '', 400 h = new_hmac(settings['server']['secret_key'], url) if h != request.args.get('h'): return '', 400 headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) headers['User-Agent'] = gen_useragent() resp = requests.get(url, stream=True, timeout=settings['outgoing']['request_timeout'], headers=headers, proxies=outgoing_proxies) if resp.status_code == 304: return '', resp.status_code if resp.status_code != 200: logger.debug('image-proxy: wrong response code: {0}'.format( resp.status_code)) if resp.status_code >= 400: return '', resp.status_code return '', 400 if not resp.headers.get('content-type', '').startswith('image/'): logger.debug('image-proxy: wrong content-type: {0}'.format( resp.headers.get('content-type'))) return '', 400 img = b'' chunk_counter = 0 for chunk in resp.iter_content(1024 * 1024): chunk_counter += 1 if chunk_counter > 5: return '', 502 # Bad gateway - file is too big (>5M) img += chunk headers = dict_subset(resp.headers, { 'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag' }) return Response(img, mimetype=resp.headers['content-type'], headers=headers)
def response(resp): results = [] dom = html.fromstring(resp.text) try: results.append({'number_of_results': int(dom.xpath('//span[@class="nums_text"]/text()')[0] .split(u'\u7ea6')[1].split(u'\u4e2a')[0].replace(',', ''))}) except Exception as e: logger.debug('result error :\n%s', e) # parse results for result in dom.xpath('//div[@class="result c-container new-pmd"]'): title = extract_text(result.xpath('.//h3/a')[0]) # when search query is Chinese words try: url = result.xpath('.//h3[@class="t"]/a')[0].attrib.get('href') url = get_baidu_link_location(url) # To generate miji url with baidu url content = extract_text((result.xpath('.//div[@class="c-abstract"]') or result.xpath('.//div[@class="c-abstract c-abstract-en"]'))) # append result results.append({'url': url,'title': title,'content': content}) # when search query is English words except Exception: try: url = result.xpath('.//h3[@class="t"]/a')[0].attrib.get('href') content = extract_text(result.xpath('.//div[@class="c-span18 c-span-last"]')[0]) # To generate miji url with baidu url url = settings['result_proxy'].get('server_name') + "/url_proxy?proxyurl=" + \ url + "&token=" + new_hmac(settings['result_proxy']['key'], url.encode("utf-8")) # append result results.append({'url': url, 'title': title, 'content': content}) except Exception as e: logger.debug('result error :\n%s', e) # return results return results
def image_proxy(): # pylint: disable=too-many-return-statements, too-many-branches url = request.args.get('url') if not url: return '', 400 h = new_hmac(settings['server']['secret_key'], url.encode()) if h != request.args.get('h'): return '', 400 maximum_size = 5 * 1024 * 1024 forward_resp = False resp = None try: request_headers = { 'User-Agent': gen_useragent(), 'Accept': 'image/webp,*/*', 'Accept-Encoding': 'gzip, deflate', 'Sec-GPC': '1', 'DNT': '1', } set_context_network_name('image_proxy') stream = http_stream(method='GET', url=url, headers=request_headers, timeout=settings['outgoing']['request_timeout'], follow_redirects=True, max_redirects=20) resp = next(stream) content_length = resp.headers.get('Content-Length') if content_length and content_length.isdigit( ) and int(content_length) > maximum_size: return 'Max size', 400 if resp.status_code != 200: logger.debug('image-proxy: wrong response code: {0}'.format( resp.status_code)) if resp.status_code >= 400: return '', resp.status_code return '', 400 if not resp.headers.get('Content-Type', '').startswith('image/'): logger.debug('image-proxy: wrong content-type: %s', resp.headers.get('Content-Type', '')) return '', 400 forward_resp = True except httpx.HTTPError: logger.exception('HTTP error') return '', 400 finally: if resp and not forward_resp: # the code is about to return an HTTP 400 error to the browser # we make sure to close the response between searxng and the HTTP server try: resp.close() except httpx.HTTPError: logger.exception('HTTP error on closing') try: headers = dict_subset( resp.headers, {'Content-Type', 'Content-Encoding', 'Content-Length', 'Length'}) def forward_chunk(): total_length = 0 for chunk in stream: total_length += len(chunk) if total_length > maximum_size: break yield chunk return Response(forward_chunk(), mimetype=resp.headers['Content-Type'], headers=headers) except httpx.HTTPError: return '', 400