class Meta: scraping = Chain( Form('/', q='//input[@class="urlfield"]'), Values( pagerank='//div[@id="pagerank"]//div[@class="smprbutton"]', ), ) session_init = { 'base_url': 'http://www.pageranking.org/', }
class Meta: scraping = Chain( Form('/', q='//input[@name="q"]'), Items( _li='//div[@data-elm]/div[@class="modCont result cr"]/div/h3/a', _next='//div[@class="next"]/a', url=lambda a: a.get_attr('href').decode('utf-8'), title=lambda a: a.text().decode('utf-8'), )) session_init = { 'base_url': 'http://search.seznam.cz/', }
class Meta: scraping = Chain( Form('/', q='//input[@name="p"]'), Items( _li='//ol/li//h3/a', _next='//div[@id="pg"]/a[@id="pg-next"]', url=lambda a: a.get_attr('href').decode('utf-8'), title=lambda a: a.text().decode('utf-8'), )) session_init = { 'base_url': 'http://search.yahoo.com/', }
class Meta: scraping = Chain( Form('/', q='//input[@name="q"]'), Items( _li='//ol[@id="b_results"]/li/h2/a', _next='//nav//a[@class="sb_pagN"]', url=lambda a: a.get_attr('href').decode('utf-8'), title=lambda a: a.text().decode('utf-8'), )) session_init = { 'base_url': 'http://www.bing.com/', }
class Meta: scraping = Chain( Form('/?q=', q='//input[@name="q"]'), Items( _li='//div[@class="g"]//h3[@class="r"]/a', _next='//a[@id="pnnext"]', url=lambda a: a.get_attr('href').decode('utf-8'), title=lambda a: a.text().decode('utf-8'), )) session_init = { 'base_url': 'https://www.google.com/', }
class Meta: scraping = Chain( Form('/', q='//input[@id="q"]'), Values( monthly_users='//table[@class="widget"]//tr[2]/td[2]', monthly_pageviews='//table[@class="widget"]//tr[2]/td[3]', summary='//div[@id="content"]/p', ), ) session_init = { 'base_url': 'http://websitetrafficspy.com/', }
class Meta: scraping = Chain( Form('/', q='//input[@name="q"]'), Items( _li='//div[@id="resultframe"]/ol/li[@class="list"]/div/a', _next='//div[@class="resultpages"]/span[@class="nn"]/a', url=lambda a: a.get_attr('href').decode('utf-8'), title=lambda a: a.text().decode('utf-8'), )) session_init = { 'base_url': 'http://www.vinden.nl/', }
class Meta: scraping = Chain( Form('/?o=1&l=dir', q='//input[@name="q"]'), Items( _li='//div[@id="lindm"]/div/div/div/a', _next='//div[@id="paging"]/div[2]/a', url=lambda a: a.get_attr('href').decode('utf-8'), title=lambda a: a.text().decode('utf-8'), )) session_init = { 'base_url': 'http://www.ask.com/', }
class Meta: scraping = Chain( Form('/', q='//input[@name="website"]'), Values( daily_pageview='//table[@class="hreview"]//tr[2]/td[2]', daily_adds_revenue='//table[@class="hreview"]//tr[3]/td[2]', rating='//table[@class="hreview"]//tr[4]/td[2]', summary='//div[@class="wid"]', ), ) session_init = { 'base_url': 'http://www.websiteoutlook.com/', }
class Meta: scraping = Chain( Form('/', q='//input[@name="q"]'), Values( global_rank='//span[@data-cat="globalRank"]/div/strong', pageviews_per_visitor= '//span[@data-cat="pageviews_per_visitor"]/div/strong', time_on_site='//span[@data-cat="time_on_site"]/div/strong', links_in='//div[@id="linksin_div"]//div[@class="box-2"]/span'), ) session_init = { 'base_url': 'http://alexa.com/', }
class Meta: scraping = Chain( Form('/', q='//input[@name="q"]'), Items( _li='//div[@class="srsa"]/a', _next= '//span[@class="prevnext"][last()]/a', # '//div[@class="moreInfo"]/a' url=lambda a: a.get_attr('href').decode('utf-8'), title=lambda a: a.text().decode('utf-8'), )) session_init = { 'base_url': 'http://search.delta-search.com/', }
class Meta: scraping = Chain( Form('/', q='//input[@name="qt"]'), Items( _li='//div[@class="boxResult"]/div/div/div[@class="link"]/a', _next= '//div[@class="boxMore"]/div[@class="moreInfo"]/a', # '//div[@class="paginate"]/a[@class="button nextActive"]' url=lambda a: a.get_attr('href').decode('utf-8'), title=lambda a: a.text().decode('utf-8'), )) session_init = { 'base_url': 'http://szukaj.onet.pl/', }
class Meta: scraping = Chain( Form('/', q='//input[@name="q"]'), Items( _li= '//ul[@class="results-list"]/li/div[@class="entry-wrap"]/h3/a', _next= '//ul[@class="pagination"]/li[@class="pageArrow nextPage"]/a', url=lambda a: a.get_attr('href').decode('utf-8'), title=lambda a: a.text().decode('utf-8'), )) session_init = { 'base_url': 'http://search.centrum.cz/', }