def extract_search_result(self, element):
     search_result = SearchMarkupSearchResult()
     search_result.alignment = "LEFT"
     search_result.page_url = HTMLPath(GoogleParser_v2.get_path(element) + "/h3/a", "href")
     search_result.title = HTMLPath(GoogleParser_v2.get_path(element) + "/h3/a", "string")
     search_result.snippet = HTMLPath(GoogleParser_v2.get_path(element) + "/div/span", "strings")
     search_result.view_url = HTMLPath(GoogleParser_v2.get_path(element) + "/div/div/cite", "string")
     return search_result
Exemplo n.º 2
0
 def extract_search_result(self, element):
     search_result = SearchMarkupSearchResult()
     search_result.alignment = "LEFT"
     search_result.page_url = HTMLPath(YandexParser.get_path(element) + "/h2/a", "href")
     search_result.title = HTMLPath(YandexParser.get_path(element) + "/h2/a", "string")
     search_result.snippet = HTMLPath(YandexParser.get_path(element) + "/div[2]/div[1]", "string")
     search_result.view_url = HTMLPath(YandexParser.get_path(element) + "/div/div[1]/a[last()]", "href")
     return search_result
Exemplo n.º 3
0
 def extract_adv(self, element):
     adv = SearchMarkupAdv()
     adv.alignment = "LEFT"
     adv.page_url = HTMLPath(YandexParser_v3.get_path(element) + "/h2/a", "href")
     adv.title = HTMLPath(YandexParser_v3.get_path(element) + "/h2/a", "string")
     adv.snippet = HTMLPath(YandexParser_v3.get_path(element) + "/div[@class='organic__content-wrapper clearfix']/div[1]", "string")
     adv.view_url = HTMLPath(YandexParser_v3.get_path(element) + "/div[@class='organic__subtitle typo typo_type_greenurl']/div[1]/a", "string")
     return adv
 def extract_wizard_image(self, element):
     wizard = SearchMarkupWizardImage()
     wizard.alignment = "LEFT"
     img_list = element.xpath("./div/a/img")
     for img in img_list:
         wizard.media_links.append(HTMLPath(GoogleParser_v2.get_path(img), "src"))
     wizard.page_url = HTMLPath(GoogleParser_v2.get_path(element) + "/h3/a", "href")
     wizard.title = HTMLPath(GoogleParser_v2.get_path(element) + "/h3/a", "string")
     return wizard
Exemplo n.º 5
0
 def extract_wizard_image(self, element):
     wizard = SearchMarkupWizardImage()
     wizard.alignment = "LEFT"
     img_list = element.xpath("./div[2]/div/div/div/a")
     for img in img_list:
         wizard.media_links.append(HTMLPath(YandexParser.get_path(img) + "/div[1]/div[1]", "style"))
     wizard.page_url = HTMLPath(YandexParser.get_path(element) + "/div[1]/h2/a", "href")
     wizard.title = HTMLPath(YandexParser.get_path(element) + "/div[1]/h2/a", "string")
     return wizard
Exemplo n.º 6
0
 def extract_actor(self, element):
     cinema = ActorComponent()
     cinema.page_url = HTMLPath(
         KinopoiskParser.get_path(element) + "/div[2]/p/a", "href")
     cinema.title = HTMLPath(
         KinopoiskParser.get_path(element) + "/div[2]/p/a", "string")
     cinema.snippet = HTMLPath(
         KinopoiskParser.get_path(element) + "/div[2]/span[2]", "string")
     cinema.image = HTMLPath(
         KinopoiskParser.get_path(element) + "/p/a/img", "src")
     return cinema
 def extract_adv(self, element):
     adv = SearchMarkupAdv()
     adv.alignment = "LEFT"
     adv.page_url = HTMLPath(
         GoogleParser_v3.get_path(element) + "/h3/a", "href")
     adv.title = HTMLPath(
         GoogleParser_v3.get_path(element) + "/h3/a", "string")
     adv.snippet = HTMLPath(
         GoogleParser_v3.get_path(element) + "/div[2]", "string")
     adv.view_url = HTMLPath(
         GoogleParser_v3.get_path(element) + "/div/cite", "string")
     return adv
Exemplo n.º 8
0
 def extract_evaluated_cinema(self, element):
     cinema = EvaluatedCinemaComponent()
     cinema.page_url = HTMLPath(
         KinopoiskParser.get_path(element) + "/div[2]/p/a", "href")
     cinema.title = HTMLPath(
         KinopoiskParser.get_path(element) + "/div[2]/p/a", "string")
     cinema.snippet = HTMLPath(
         KinopoiskParser.get_path(element) + "/div[2]/span[2]", "string")
     cinema.actors = HTMLPath(
         KinopoiskParser.get_path(element) + "/div[2]/span[3]", "string")
     cinema.year = HTMLPath(
         KinopoiskParser.get_path(element) + "/div[2]/p/span", "string")
     cinema.value = HTMLPath(
         KinopoiskParser.get_path(element) + "/div[1]/div", "string")
     cinema.image = HTMLPath(
         KinopoiskParser.get_path(element) + "/p/a/img", "src")
     return cinema
 def extract_wizard_news(self, element):
     wizard = SearchMarkupWizardNews()
     wizard.alignment = "LEFT"
     wizard.page_url = HTMLPath(GoogleParser_v2.get_path(element), "href")
     wizard.title = HTMLPath(GoogleParser_v2.get_path(element), "string")
     return wizard
Exemplo n.º 10
0
 def extract_wizard_news(self, element):
     wizard = SearchMarkupWizardNews()
     wizard.alignment = "LEFT"
     wizard.page_url = HTMLPath(YandexParser.get_path(element) + "/div[1]/h2/a[2]", "href")
     wizard.title = HTMLPath(YandexParser.get_path(element) + "/div[1]/h2/a[2]", "string")
     return wizard