def get_reviews_url_asin(self, goods_html): result = '' xpath_list = [ '//*[text()="Customer Reviews"]/../..//a[contains(text(), "customer reviews")]/@href', ] result_list = ReviewsParser.get_new_data(xpath_list=xpath_list, html_code=goods_html) if len(result_list) > 0: pattern_list = [ re.compile('product-reviews/([A-Za-z0-9]{10,10})/ref'), ] result_list1 = ReviewsParser.get_new_data( pattern_list=pattern_list, html_code=result_list[0]) if len(result_list1) > 0: result = result_list1[0] print('get_reviews_url.result: ', result) return result
def looking_something(self, html_code): not_found_patterns = [ re.compile('The Web address you entered is not a functioning', re.S), re.compile('Looking for something', re.S), ] not_found = ReviewsParser.get_new_data(pattern_list=not_found_patterns, html_code=html_code) if len(not_found) > 0: return True return False