def response(resp): results = [] dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): try: url = parse_url(extract_url(result.xpath(url_xpath), search_url)) title = extract_text(result.xpath(title_xpath)[0]) except: continue content = extract_text(result.xpath(content_xpath)[0]) # append result results.append({'url': url, 'title': title, 'content': content}) # if no suggestion found, return results if not suggestion_xpath: return results # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion results.append({'suggestion': extract_text(suggestion)}) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): title = extract_text(result.xpath(title_xpath)[0]) try: url = parse_url(extract_url(result.xpath(url_xpath), search_url)) parsed_url = urlparse(url) if parsed_url.netloc==google_hostname and parsed_url.path==search_path: # remove the link to google news continue if parsed_url.netloc==google_hostname and parsed_url.path==images_path: # images result results = results + parse_images(result) else: # normal result content = extract_text(result.xpath(content_xpath)[0]) # append result results.append({'url': url, 'title': title, 'content': content}) except: continue # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion results.append({'suggestion': extract_text(suggestion)}) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): try: url = parse_url(extract_url(result.xpath(url_xpath), search_url)) title = extract_text(result.xpath(title_xpath)[0]) except: continue content = extract_text(result.xpath(content_xpath)[0]) # append result results.append({'url': url, 'title': title, 'content': content}) # if no suggestion found, return results suggestions = dom.xpath(suggestion_xpath) if not suggestions: return results # parse suggestion for suggestion in suggestions: # append suggestion results.append({'suggestion': extract_text(suggestion)}) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): urls = result.xpath(url_xpath) if len(urls) != 1: continue url = sanitize_url(parse_url(extract_url(urls, search_url))) title = extract_text(result.xpath(title_xpath)[0]) content = extract_text(result.xpath(content_xpath)[0]) # parse publishedDate publishedDate = extract_text(result.xpath(publishedDate_xpath)[0]) # still useful ? if re.match("^[0-9]+ minute(s|) ago$", publishedDate): publishedDate = datetime.now() - timedelta( minutes=int(re.match(r'\d+', publishedDate).group())) elif re.match("^[0-9]+ days? ago$", publishedDate): publishedDate = datetime.now() - timedelta( days=int(re.match(r'\d+', publishedDate).group())) elif re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) publishedDate = datetime.now()\ - timedelta(hours=int(timeNumbers[0]))\ - timedelta(minutes=int(timeNumbers[1])) else: try: publishedDate = parser.parse(publishedDate) except: publishedDate = datetime.now() if publishedDate.year == 1900: publishedDate = publishedDate.replace(year=datetime.now().year) # append result results.append({ 'url': url, 'title': title, 'content': content, 'publishedDate': publishedDate }) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath(results_xpath): url = parse_url(extract_url(result.xpath(url_xpath), search_url)) title = extract_text(result.xpath(title_xpath)[0]) content = extract_text(result.xpath(content_xpath)[0]) results.append({'url': url, 'title': title, 'content': content}) if not suggestion_xpath: return results for suggestion in dom.xpath(suggestion_xpath): results.append({'suggestion': extract_text(suggestion)}) return results
def response(resp): results = [] dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): urls = result.xpath(url_xpath) if len(urls) != 1: continue url = sanitize_url(parse_url(extract_url(urls, search_url))) title = extract_text(result.xpath(title_xpath)[0]) content = extract_text(result.xpath(content_xpath)[0]) # parse publishedDate publishedDate = extract_text(result.xpath(publishedDate_xpath)[0]) # still useful ? if re.match("^[0-9]+ minute(s|) ago$", publishedDate): publishedDate = datetime.now() - timedelta(minutes=int(re.match(r'\d+', publishedDate).group())) elif re.match("^[0-9]+ days? ago$", publishedDate): publishedDate = datetime.now() - timedelta(days=int(re.match(r'\d+', publishedDate).group())) elif re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) publishedDate = datetime.now()\ - timedelta(hours=int(timeNumbers[0]))\ - timedelta(minutes=int(timeNumbers[1])) else: try: publishedDate = parser.parse(publishedDate) except: publishedDate = datetime.now() if publishedDate.year == 1900: publishedDate = publishedDate.replace(year=datetime.now().year) # append result results.append({'url': url, 'title': title, 'content': content, 'publishedDate': publishedDate}) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): title = extract_text(result.xpath(title_xpath)[0]) try: url = parse_url(extract_url(result.xpath(url_xpath), search_url)) parsed_url = urlparse(url) if (parsed_url.netloc == google_hostname and parsed_url.path == search_path): # remove the link to google news continue # images result if (parsed_url.netloc == google_hostname and parsed_url.path == images_path): # only thumbnail image provided, # so skipping image results # results = results + parse_images(result) pass else: # normal result content = extract_text(result.xpath(content_xpath)[0]) # append result results.append({ 'url': url, 'title': title, 'content': content }) except: continue # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion results.append({'suggestion': extract_text(suggestion)}) # return results return results
def response(resp): results = [] dom = fromstring(resp.text) # trim results so there's not way too many at once first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1) all_results = eval_xpath_list(dom, results_xpath) trimmed_results = all_results[first_result_index:first_result_index + page_size] # get results for result in trimmed_results: # remove ahmia url and extract the actual url for the result raw_url = extract_url(eval_xpath_list(result, url_xpath, min_len=1), search_url) cleaned_url = parse_qs(urlparse(raw_url).query).get( 'redirect_url', [''])[0] title = extract_text(eval_xpath(result, title_xpath)) content = extract_text(eval_xpath(result, content_xpath)) results.append({ 'url': cleaned_url, 'title': title, 'content': content, 'is_onion': True }) # get spelling corrections for correction in eval_xpath_list(dom, correction_xpath): results.append({'correction': extract_text(correction)}) # get number of results number_of_results = eval_xpath(dom, number_of_results_xpath) if number_of_results: try: results.append( {'number_of_results': int(extract_text(number_of_results))}) except: pass return results
def response(resp): results = [] dom = html.fromstring(resp.text) try: results_num = int( eval_xpath(dom, '//div[@class="compPagination"]/span[last()]/text()') [0].split()[0].replace(',', '')) results.append({'number_of_results': results_num}) except: pass # parse results for result in eval_xpath(dom, results_xpath): try: url = parse_url( extract_url(eval_xpath(result, url_xpath), search_url)) title = extract_text(eval_xpath(result, title_xpath)[0]) except: continue content = extract_text(eval_xpath(result, content_xpath)[0]) # append result results.append({'url': url, 'title': title, 'content': content}) # if no suggestion found, return results suggestions = eval_xpath(dom, suggestion_xpath) if not suggestions: return results # parse suggestion for suggestion in suggestions: # append suggestion results.append({'suggestion': extract_text(suggestion)}) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) try: results_num = int(dom.xpath('//div[@class="compPagination"]/span[last()]/text()')[0] .split()[0].replace(',', '')) results.append({'number_of_results': results_num}) except: pass # parse results for result in dom.xpath(results_xpath): try: url = parse_url(extract_url(result.xpath(url_xpath), search_url)) title = extract_text(result.xpath(title_xpath)[0]) except: continue content = extract_text(result.xpath(content_xpath)[0]) # append result results.append({'url': url, 'title': title, 'content': content}) # if no suggestion found, return results suggestions = dom.xpath(suggestion_xpath) if not suggestions: return results # parse suggestion for suggestion in suggestions: # append suggestion results.append({'suggestion': extract_text(suggestion)}) # return results return results
def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath(results_xpath): url = parse_url(extract_url(result.xpath(url_xpath), search_url)) title = extract_text(result.xpath(title_xpath)[0]) content = extract_text(result.xpath(content_xpath)[0]) publishedDate = extract_text(result.xpath(publishedDate_xpath)[0]) if re.match("^[0-9]+ minute(s|) ago$", publishedDate): publishedDate = datetime.now() - timedelta(minutes=int(re.match(r'\d+', publishedDate).group())) # noqa else: if re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) publishedDate = datetime.now()\ - timedelta(hours=int(timeNumbers[0]))\ - timedelta(minutes=int(timeNumbers[1])) else: # TODO year in string possible? publishedDate = datetime.strptime(publishedDate, "%b %d %H:%M%p") if publishedDate.year == 1900: publishedDate = publishedDate.replace(year=datetime.now().year) results.append({'url': url, 'title': title, 'content': content, 'publishedDate': publishedDate}) if not suggestion_xpath: return results for suggestion in dom.xpath(suggestion_xpath): results.append({'suggestion': extract_text(suggestion)}) return results
def response(resp): results = [] dom = html.fromstring(resp.text) for result in dom.xpath(results_xpath): url = parse_url(extract_url(result.xpath(url_xpath), search_url)) title = extract_text(result.xpath(title_xpath)[0]) content = extract_text(result.xpath(content_xpath)[0]) publishedDate = extract_text(result.xpath(publishedDate_xpath)[0]) if re.match("^[0-9]+ minute(s|) ago$", publishedDate): publishedDate = datetime.now() - timedelta(minutes=int(re.match(r'\d+', publishedDate).group())) # noqa else: if re.match("^[0-9]+ hour(s|), [0-9]+ minute(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) publishedDate = datetime.now()\ - timedelta(hours=int(timeNumbers[0]))\ - timedelta(minutes=int(timeNumbers[1])) else: publishedDate = parser.parse(publishedDate) if publishedDate.year == 1900: publishedDate = publishedDate.replace(year=datetime.now().year) results.append({'url': url, 'title': title, 'content': content, 'publishedDate': publishedDate}) if not suggestion_xpath: return results for suggestion in dom.xpath(suggestion_xpath): results.append({'suggestion': extract_text(suggestion)}) return results
def response(resp): results = [] # detect google sorry resp_url = urlparse(resp.url) if resp_url.netloc == 'sorry.google.com' or resp_url.path == '/sorry/IndexRedirect': raise RuntimeWarning('sorry.google.com') if resp_url.path.startswith('/sorry'): raise RuntimeWarning(gettext('CAPTCHA required')) # which hostname ? google_hostname = resp.search_params.get('google_hostname') google_url = "https://" + google_hostname # convert the text to dom dom = html.fromstring(resp.text) instant_answer = dom.xpath('//div[@id="_vBb"]//text()') if instant_answer: results.append({'answer': u' '.join(instant_answer)}) try: results_num = int(dom.xpath('//div[@id="resultStats"]//text()')[0] .split()[1].replace(',', '')) results.append({'number_of_results': results_num}) except: pass # parse results for result in dom.xpath(results_xpath): try: title = extract_text(result.xpath(title_xpath)[0]) url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) # map result if parsed_url.netloc == google_hostname: # TODO fix inside links continue # if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start): # print "yooooo"*30 # x = result.xpath(map_near) # if len(x) > 0: # # map : near the location # results = results + parse_map_near(parsed_url, x, google_hostname) # else: # # map : detail about a location # results = results + parse_map_detail(parsed_url, result, google_hostname) # # google news # elif parsed_url.path == search_path: # # skipping news results # pass # # images result # elif parsed_url.path == images_path: # # only thumbnail image provided, # # so skipping image results # # results = results + parse_images(result, google_hostname) # pass else: # normal result content = extract_text_from_dom(result, content_xpath) if content is None: continue content_misc = extract_text_from_dom(result, content_misc_xpath) if content_misc is not None: content = content_misc + "<br />" + content # append result results.append({'url': url, 'title': title, 'content': content }) except: logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True)) continue # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion results.append({'suggestion': extract_text(suggestion)}) for correction in dom.xpath(spelling_suggestion_xpath): results.append({'correction': extract_text(correction)}) # return results return results
def response(resp): results = [] # detect google sorry resp_url = urlparse(resp.url) if resp_url.netloc == 'sorry.google.com' or resp_url.path == '/sorry/IndexRedirect': raise RuntimeWarning('sorry.google.com') # which hostname ? google_hostname = resp.search_params.get('google_hostname') google_url = "https://" + google_hostname # convert the text to dom dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): try: title = extract_text(result.xpath(title_xpath)[0]) url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) # map result if parsed_url.netloc == google_hostname: # TODO fix inside links continue # if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start): # print "yooooo"*30 # x = result.xpath(map_near) # if len(x) > 0: # # map : near the location # results = results + parse_map_near(parsed_url, x, google_hostname) # else: # # map : detail about a location # results = results + parse_map_detail(parsed_url, result, google_hostname) # # google news # elif parsed_url.path == search_path: # # skipping news results # pass # # images result # elif parsed_url.path == images_path: # # only thumbnail image provided, # # so skipping image results # # results = results + parse_images(result, google_hostname) # pass else: # normal result content = extract_text_from_dom(result, content_xpath) if content is None: continue content_misc = extract_text_from_dom(result, content_misc_xpath) if content_misc is not None: content = content_misc + "<br />" + content # append result results.append({'url': url, 'title': title, 'content': content }) except: logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True)) continue # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion results.append({'suggestion': escape(extract_text(suggestion))}) # return results return results
def response(resp): results = [] # detect google sorry resp_url = urlparse(resp.url) if resp_url.netloc == 'sorry.google.com' or resp_url.path == '/sorry/IndexRedirect': raise RuntimeWarning('sorry.google.com') if resp_url.path.startswith('/sorry'): raise RuntimeWarning(gettext('CAPTCHA required')) # which hostname ? google_hostname = resp.search_params.get('google_hostname') google_url = "https://" + google_hostname # convert the text to dom dom = html.fromstring(resp.text) instant_answer = dom.xpath('//div[contains(@id, "ires")]//div[contains(@class, "hp-xpdbox")]') if instant_answer: answer_re = r'(?P<prefix><a\s+href=")\/url\?q=(?P<url>[^"]+?)\&\;[^"]*(?P<suffix>"\s*>)' answer_subst = "\\g<prefix>\\g<url>\\g<suffix>" answer_html = ['<br>'] for element in instant_answer: answer_html.append(etree.tostring(element, method="html")) answer_str = u' '.join(answer_html) answer_fixed = re.sub(answer_re, answer_subst, answer_str, 0, re.MULTILINE) results.append({'answer': answer_fixed}) try: results_num = int(dom.xpath('//div[@id="resultStats"]//text()')[0] .split()[1].replace(',', '')) results.append({'number_of_results': results_num}) except: pass # parse results for result in dom.xpath(results_xpath): try: title = extract_text(result.xpath(title_xpath)[0]) url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) # map result if parsed_url.netloc == google_hostname: # TODO fix inside links continue # if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start): # print "yooooo"*30 # x = result.xpath(map_near) # if len(x) > 0: # # map : near the location # results = results + parse_map_near(parsed_url, x, google_hostname) # else: # # map : detail about a location # results = results + parse_map_detail(parsed_url, result, google_hostname) # # google news # elif parsed_url.path == search_path: # # skipping news results # pass # # images result # elif parsed_url.path == images_path: # # only thumbnail image provided, # # so skipping image results # # results = results + parse_images(result, google_hostname) # pass else: # normal result content = extract_text_from_dom(result, content_xpath) if content is None: continue content_misc = extract_text_from_dom(result, content_misc_xpath) if content_misc is not None: content = content_misc + "<br />" + content # append result results.append({'url': url, 'title': title, 'content': content }) except: logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True)) continue # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion results.append({'suggestion': extract_text(suggestion)}) for correction in dom.xpath(spelling_suggestion_xpath): results.append({'correction': extract_text(correction)}) # return results return results
def response(resp): results = [] # detect google sorry resp_url = urlparse(resp.url) if resp_url.netloc == "sorry.google.com" or resp_url.path == "/sorry/IndexRedirect": raise RuntimeWarning("sorry.google.com") # which hostname ? google_hostname = resp.search_params.get("google_hostname") google_url = "https://" + google_hostname # convert the text to dom dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): try: title = extract_text(result.xpath(title_xpath)[0]) url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) # map result if parsed_url.netloc == google_hostname: # TODO fix inside links continue # if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start): # print "yooooo"*30 # x = result.xpath(map_near) # if len(x) > 0: # # map : near the location # results = results + parse_map_near(parsed_url, x, google_hostname) # else: # # map : detail about a location # results = results + parse_map_detail(parsed_url, result, google_hostname) # # google news # elif parsed_url.path == search_path: # # skipping news results # pass # # images result # elif parsed_url.path == images_path: # # only thumbnail image provided, # # so skipping image results # # results = results + parse_images(result, google_hostname) # pass else: # normal result content = extract_text_from_dom(result, content_xpath) if content is None: continue content_misc = extract_text_from_dom(result, content_misc_xpath) if content_misc is not None: content = content_misc + "<br />" + content # append result results.append({"url": url, "title": title, "content": content}) except: logger.debug("result parse error in:\n%s", etree.tostring(result, pretty_print=True)) continue # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion results.append({"suggestion": extract_text(suggestion)}) # return results return results
def response(resp): results = [] # detect google sorry resp_url = urlparse(resp.url) if resp_url.netloc == 'sorry.google.com' or resp_url.path == '/sorry/IndexRedirect': raise RuntimeWarning('sorry.google.com') # which hostname ? google_hostname = resp.search_params.get('google_hostname') google_url = "https://" + google_hostname # convert the text to dom dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): title = extract_text(result.xpath(title_xpath)[0]) try: url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) # map result if ((parsed_url.netloc == google_hostname and parsed_url.path.startswith(maps_path)) or (parsed_url.netloc.startswith(map_hostname_start))): x = result.xpath(map_near) if len(x) > 0: # map : near the location results = results + parse_map_near(parsed_url, x, google_hostname) else: # map : detail about a location results = results + parse_map_detail(parsed_url, result, google_hostname) # google news elif (parsed_url.netloc == google_hostname and parsed_url.path == search_path): # skipping news results pass # images result elif (parsed_url.netloc == google_hostname and parsed_url.path == images_path): # only thumbnail image provided, # so skipping image results # results = results + parse_images(result, google_hostname) pass else: # normal result content = extract_text_from_dom(result, content_xpath) if content is None: continue content_misc = extract_text_from_dom(result, content_misc_xpath) if content_misc is not None: content = content_misc + "<br />" + content # append result results.append({'url': url, 'title': title, 'content': content }) except: continue # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion results.append({'suggestion': escape(extract_text(suggestion))}) # return results return results
def response(resp): results = [] # detect google sorry resp_url = urlparse(resp.url) if resp_url.netloc == 'sorry.google.com' or resp_url.path == '/sorry/IndexRedirect': raise RuntimeWarning('sorry.google.com') # which hostname ? google_hostname = resp.search_params.get('google_hostname') google_url = "https://" + google_hostname # convert the text to dom dom = html.fromstring(resp.text) # parse results for result in dom.xpath(results_xpath): title = extract_text(result.xpath(title_xpath)[0]) try: url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) # map result if ((parsed_url.netloc == google_hostname and parsed_url.path.startswith(maps_path)) or (parsed_url.netloc.startswith(map_hostname_start))): x = result.xpath(map_near) if len(x) > 0: # map : near the location results = results + parse_map_near(parsed_url, x, google_hostname) else: # map : detail about a location results = results + parse_map_detail( parsed_url, result, google_hostname) # google news elif (parsed_url.netloc == google_hostname and parsed_url.path == search_path): # skipping news results pass # images result elif (parsed_url.netloc == google_hostname and parsed_url.path == images_path): # only thumbnail image provided, # so skipping image results # results = results + parse_images(result, google_hostname) pass else: # normal result content = extract_text_from_dom(result, content_xpath) if content is None: continue content_misc = extract_text_from_dom(result, content_misc_xpath) if content_misc is not None: content = content_misc + "<br />" + content # append result results.append({ 'url': url, 'title': title, 'content': content }) except: continue # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion results.append({'suggestion': extract_text(suggestion)}) # return results return results
def response(resp): results = [] # detect google sorry resp_url = urlparse(resp.url) if resp_url.netloc == 'sorry.google.com' or resp_url.path == '/sorry/IndexRedirect': raise RuntimeWarning('sorry.google.com') if resp_url.path.startswith('/sorry'): raise RuntimeWarning(gettext('CAPTCHA required')) # which hostname ? google_hostname = resp.search_params.get('google_hostname') google_url = "https://" + google_hostname # convert the text to dom dom = html.fromstring(resp.text) instant_answer = eval_xpath(dom, '//div[@id="_vBb"]//text()') if instant_answer: results.append({'answer': u' '.join(instant_answer)}) try: results_num = int( eval_xpath( dom, '//div[@id="resultStats"]//text()')[0].split()[1].replace( ',', '')) results.append({'number_of_results': results_num}) except: pass # parse results for result in eval_xpath(dom, results_xpath): try: title = extract_text(eval_xpath(result, title_xpath)[0]) url = parse_url( extract_url(eval_xpath(result, url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) # map result if parsed_url.netloc == google_hostname: # TODO fix inside links continue # if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start): # print "yooooo"*30 # x = eval_xpath(result, map_near) # if len(x) > 0: # # map : near the location # results = results + parse_map_near(parsed_url, x, google_hostname) # else: # # map : detail about a location # results = results + parse_map_detail(parsed_url, result, google_hostname) # # google news # elif parsed_url.path == search_path: # # skipping news results # pass # # images result # elif parsed_url.path == images_path: # # only thumbnail image provided, # # so skipping image results # # results = results + parse_images(result, google_hostname) # pass else: # normal result content = extract_text_from_dom(result, content_xpath) if content is None: continue # append result results.append({ 'url': url, 'title': title, 'content': content }) except: logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True)) continue # parse suggestion for suggestion in eval_xpath(dom, suggestion_xpath): # append suggestion results.append({'suggestion': extract_text(suggestion)}) for correction in eval_xpath(dom, spelling_suggestion_xpath): results.append({'correction': extract_text(correction)}) # return results return results