示例#1
0
 class Meta:
     scraping = Chain(
         Form('/', q='//input[@class="urlfield"]'),
         Values(
             pagerank='//div[@id="pagerank"]//div[@class="smprbutton"]', ),
     )
     session_init = {
         'base_url': 'http://www.pageranking.org/',
     }
示例#2
0
 class Meta:
     scraping = Chain(
         Form('/', q='//input[@name="q"]'),
         Items(
             _li='//div[@data-elm]/div[@class="modCont result cr"]/div/h3/a',
             _next='//div[@class="next"]/a',
             url=lambda a: a.get_attr('href').decode('utf-8'),
             title=lambda a: a.text().decode('utf-8'),
         ))
     session_init = {
         'base_url': 'http://search.seznam.cz/',
     }
示例#3
0
 class Meta:
     scraping = Chain(
         Form('/', q='//input[@name="p"]'),
         Items(
             _li='//ol/li//h3/a',
             _next='//div[@id="pg"]/a[@id="pg-next"]',
             url=lambda a: a.get_attr('href').decode('utf-8'),
             title=lambda a: a.text().decode('utf-8'),
         ))
     session_init = {
         'base_url': 'http://search.yahoo.com/',
     }
示例#4
0
 class Meta:
     scraping = Chain(
         Form('/', q='//input[@name="q"]'),
         Items(
             _li='//ol[@id="b_results"]/li/h2/a',
             _next='//nav//a[@class="sb_pagN"]',
             url=lambda a: a.get_attr('href').decode('utf-8'),
             title=lambda a: a.text().decode('utf-8'),
         ))
     session_init = {
         'base_url': 'http://www.bing.com/',
     }
示例#5
0
 class Meta:
     scraping = Chain(
         Form('/?q=', q='//input[@name="q"]'),
         Items(
             _li='//div[@class="g"]//h3[@class="r"]/a',
             _next='//a[@id="pnnext"]',
             url=lambda a: a.get_attr('href').decode('utf-8'),
             title=lambda a: a.text().decode('utf-8'),
         ))
     session_init = {
         'base_url': 'https://www.google.com/',
     }
示例#6
0
 class Meta:
     scraping = Chain(
         Form('/', q='//input[@id="q"]'),
         Values(
             monthly_users='//table[@class="widget"]//tr[2]/td[2]',
             monthly_pageviews='//table[@class="widget"]//tr[2]/td[3]',
             summary='//div[@id="content"]/p',
         ),
     )
     session_init = {
         'base_url': 'http://websitetrafficspy.com/',
     }
示例#7
0
 class Meta:
     scraping = Chain(
         Form('/', q='//input[@name="q"]'),
         Items(
             _li='//div[@id="resultframe"]/ol/li[@class="list"]/div/a',
             _next='//div[@class="resultpages"]/span[@class="nn"]/a',
             url=lambda a: a.get_attr('href').decode('utf-8'),
             title=lambda a: a.text().decode('utf-8'),
         ))
     session_init = {
         'base_url': 'http://www.vinden.nl/',
     }
示例#8
0
 class Meta:
     scraping = Chain(
         Form('/?o=1&l=dir', q='//input[@name="q"]'),
         Items(
             _li='//div[@id="lindm"]/div/div/div/a',
             _next='//div[@id="paging"]/div[2]/a',
             url=lambda a: a.get_attr('href').decode('utf-8'),
             title=lambda a: a.text().decode('utf-8'),
         ))
     session_init = {
         'base_url': 'http://www.ask.com/',
     }
示例#9
0
 class Meta:
     scraping = Chain(
         Form('/', q='//input[@name="website"]'),
         Values(
             daily_pageview='//table[@class="hreview"]//tr[2]/td[2]',
             daily_adds_revenue='//table[@class="hreview"]//tr[3]/td[2]',
             rating='//table[@class="hreview"]//tr[4]/td[2]',
             summary='//div[@class="wid"]',
         ),
     )
     session_init = {
         'base_url': 'http://www.websiteoutlook.com/',
     }
示例#10
0
 class Meta:
     scraping = Chain(
         Form('/', q='//input[@name="q"]'),
         Values(
             global_rank='//span[@data-cat="globalRank"]/div/strong',
             pageviews_per_visitor=
             '//span[@data-cat="pageviews_per_visitor"]/div/strong',
             time_on_site='//span[@data-cat="time_on_site"]/div/strong',
             links_in='//div[@id="linksin_div"]//div[@class="box-2"]/span'),
     )
     session_init = {
         'base_url': 'http://alexa.com/',
     }
示例#11
0
 class Meta:
     scraping = Chain(
         Form('/', q='//input[@name="q"]'),
         Items(
             _li='//div[@class="srsa"]/a',
             _next=
             '//span[@class="prevnext"][last()]/a',  # '//div[@class="moreInfo"]/a'
             url=lambda a: a.get_attr('href').decode('utf-8'),
             title=lambda a: a.text().decode('utf-8'),
         ))
     session_init = {
         'base_url': 'http://search.delta-search.com/',
     }
示例#12
0
 class Meta:
     scraping = Chain(
         Form('/', q='//input[@name="qt"]'),
         Items(
             _li='//div[@class="boxResult"]/div/div/div[@class="link"]/a',
             _next=
             '//div[@class="boxMore"]/div[@class="moreInfo"]/a',  # '//div[@class="paginate"]/a[@class="button nextActive"]'
             url=lambda a: a.get_attr('href').decode('utf-8'),
             title=lambda a: a.text().decode('utf-8'),
         ))
     session_init = {
         'base_url': 'http://szukaj.onet.pl/',
     }
示例#13
0
 class Meta:
     scraping = Chain(
         Form('/', q='//input[@name="q"]'),
         Items(
             _li=
             '//ul[@class="results-list"]/li/div[@class="entry-wrap"]/h3/a',
             _next=
             '//ul[@class="pagination"]/li[@class="pageArrow nextPage"]/a',
             url=lambda a: a.get_attr('href').decode('utf-8'),
             title=lambda a: a.text().decode('utf-8'),
         ))
     session_init = {
         'base_url': 'http://search.centrum.cz/',
     }