def parse_search_result(self, element, block_xpath, sample): search_result = Component() search_result.type = "SEARCH_RESULT" search_result.alignment = "LEFT" block_xpath = self.extract_xpath(block_xpath) page_url_xpath = self.extract_xpath( sample.page_url.xpath)[len(block_xpath):] search_result.page_url = self.get_attr( element.xpath(self.combine_xpath(page_url_xpath, True)), sample.page_url.attr) title_xpath = self.extract_xpath(sample.title.xpath)[len(block_xpath):] search_result.title = self.get_attr( element.xpath(self.combine_xpath(title_xpath, True)), sample.title.attr) snippet_xpath = self.extract_xpath( sample.snippet.xpath)[len(block_xpath):] search_result.snippet = self.get_attr( element.xpath(self.combine_xpath(snippet_xpath, True)), sample.snippet.attr) view_url_xpath = self.extract_xpath( sample.view_url.xpath)[len(block_xpath):] search_result.view_url = self.get_attr( element.xpath(self.combine_xpath(view_url_xpath, True)), sample.view_url.attr) return search_result
def parse_component(self, element): component = Component() component.type = "WIKI" component.alignment = "JSON" component.page_url = jsonpath.jsonpath(element, "$.pageid")[0] component.title = jsonpath.jsonpath(element, "$.title")[0] component.snippet = jsonpath.jsonpath(element, "$.snippet")[0] return component
def parse_search_result(self, element): search_result = Component() search_result.type = "SEARCH_RESULT" search_result.alignment = "LEFT" search_result.page_url = self.get_from_page(element, "./h3/a", "href") search_result.title = self.get_from_page(element, "./h3/a", "string") search_result.snippet = self.get_from_page(element, "./div/span", "strings") search_result.view_url = self.get_from_page(element, "./div/div/cite", "string") return search_result
def parse_adv(self, element): adv = Component() adv.type = "ADV" adv.alignment = "LEFT" adv.page_url = self.get_from_page(element, "./h3/a", "href") adv.title = self.get_from_page(element, "./h3/a", "string") adv.snippet = self.get_from_page(element, "./div[2]", "string") adv.view_url = self.get_from_page(element, "./div/cite", "string") return adv
def parse_adv(self, element): adv = Component() adv.type = "ADV" adv.alignment = "LEFT" adv.page_url = self.get_from_page(element, "./h2/a", "href") adv.title = self.get_from_page(element, "./h2/a", "string") adv.snippet = self.get_from_page(element, "./div[@class='organic__content-wrapper clearfix']/div[1]", "string") adv.view_url = self.get_from_page(element, "./div[@class='organic__subtitle typo typo_type_greenurl']/div[1]/a", "string") return adv
def parse_actor(self, element): cinema = Component() cinema.type = "Actor" cinema.alignment = "LEFT" cinema.page_url = self.get_from_page(element, "./div[2]/p/a", "href") cinema.title = self.get_from_page(element, "./div[2]/p/a", "string") cinema.snippet = self.get_from_page(element, "./div[2]/span[2]", "string") cinema.image = self.get_from_page(element, "./p/a/img", "src") return cinema
def parse_evaluated_cinema(self, element): cinema = Component() cinema.type = "EvaluatedCinema" cinema.alignment = "LEFT" cinema.page_url = self.get_from_page(element, "./div[2]/p/a", "href") cinema.title = self.get_from_page(element, "./div[2]/p/a", "string") cinema.snippet = self.get_from_page(element, "./div[2]/span[2]", "string") cinema.actors = self.get_from_page(element, "./div[2]/span[3]", "string") cinema.year = self.get_from_page(element, "./div[2]/p/span", "string") cinema.value = self.get_from_page(element, "./div[1]/div", "string") cinema.image = self.get_from_page(element, "./p/a/img", "src") return cinema